--- title: Title keywords: fastai sidebar: home_sidebar nb_path: "nbs/99_tutorial.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
from webrefine.query import WarcFileQuery, WaybackQuery, CommonCrawlQuery
from datetime import datetime
{% endraw %} {% raw %}
cache_path = '../data/cache'
{% endraw %} {% raw %}
cc = CommonCrawlQuery('skeptric.com/*', start=datetime(2021,10,1), cache_location=cache_path)
{% endraw %} {% raw %}
%%time
results = cc.query()
CPU times: user 479 ms, sys: 0 ns, total: 479 ms
Wall time: 17.5 s
{% endraw %} {% raw %}
len(results)
278
{% endraw %} {% raw %}
with open('../data/test.html', 'wb') as f:
    f.write(results[1].content)
    
from IPython.displ import FileLink
FileLink('../data/test.html')
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
/tmp/ipykernel_2581/3204914761.py in <module>
      2     f.write(results[1].content)
      3 
----> 4 from IPython import FileLink
      5 FileLink('../data/test.html')

ImportError: cannot import name 'FileLink' from 'IPython' (/home/edward/.virtualenvs/webrefine/lib/python3.9/site-packages/IPython/__init__.py)
{% endraw %} {% raw %}
[r.url for r in results[:50]]
['https://skeptric.com/',
 'https://skeptric.com/4-analyst-competencies/',
 'https://skeptric.com/4am-rule/',
 'https://skeptric.com/all-of-statistics/',
 'https://skeptric.com/atomic-writer/',
 'https://skeptric.com/beautiful-soup-tips/',
 'https://skeptric.com/bernoulli-mixing/',
 'https://skeptric.com/beta-distribution/',
 'https://skeptric.com/binary-rms/',
 'https://skeptric.com/binomial-power/',
 'https://skeptric.com/bridging-bipartite-graph/',
 'https://skeptric.com/calculate-centroid-on-sphere/',
 'https://skeptric.com/cartesian-product/',
 'https://skeptric.com/centroid-spherical-polygon/',
 'https://skeptric.com/chompjs/',
 'https://skeptric.com/cloudrun/',
 'https://skeptric.com/cluster-exploration/',
 'https://skeptric.com/collecting-training-data-whatcar/',
 'https://skeptric.com/common-substring/',
 'https://skeptric.com/community-detection/',
 'https://skeptric.com/complex-analysis/',
 'https://skeptric.com/composition-over-inheritence/',
 'https://skeptric.com/considering-vscode/',
 'https://skeptric.com/constant-models/',
 'https://skeptric.com/constrained-gradient-descent/',
 'https://skeptric.com/contact-tracing/',
 'https://skeptric.com/data-models/',
 'https://skeptric.com/data-science-reputation/',
 'https://skeptric.com/data-tests-sql/',
 'https://skeptric.com/dataflow-chasing/',
 'https://skeptric.com/decorating-pandas-tables/',
 'https://skeptric.com/demjson/',
 'https://skeptric.com/descriptive-to-predictive/',
 'https://skeptric.com/distribution-between-mean-median/',
 'https://skeptric.com/dnn-block/',
 'https://skeptric.com/drive-metrics/',
 'https://skeptric.com/dumbjump/',
 'https://skeptric.com/emacs-buffering/',
 'https://skeptric.com/embed-behaviour/',
 'https://skeptric.com/embeddings/',
 'https://skeptric.com/evil-yank-pop/',
 'https://skeptric.com/exact-duplicates/',
 'https://skeptric.com/extracting-links-from-html/',
 'https://skeptric.com/finite-groups/',
 'https://skeptric.com/flatten-object-python/',
 'https://skeptric.com/g-naf/',
 'https://skeptric.com/glassbox-ml/',
 'https://skeptric.com/haar/',
 'https://skeptric.com/html-nlp/',
 'https://skeptric.com/html2text-doubleemph/']
{% endraw %}