diff --git a/sample9.ipynb b/sample9.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..fff08d253a2abf75dd4c6eab220fbee087e4b3db --- /dev/null +++ b/sample9.ipynb @@ -0,0 +1,125 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a WebarchivSession Object with convenience methods for easy access with your API-Key " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from webarchiv import WebarchivSession\n", + "\n", + "apikey = '2pm8i0hnmpcTK4Oj4CUeBoZd7vywrm4c'\n", + "w = WebarchivSession(apikey)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "url = \"http://sport.orf.at/l/stories/2003717/\"\n", + "response = w.wayback_search(\"http://sport.orf.at/l/stories/2003717/\", \"20110101000000\", \"20120401000000\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "if response.status_code != 200:\n", + " print(\"Error \", response.status_code)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20110401202828/http://sport.orf.at/l/stories/2003717/\n", + "http://wayback/web/20110704202825/http://sport.orf.at/l/stories/2003717/\n", + "A watchlist with all captures mentioned above was generated. The code for this watchlist is Zp. \n", + "end\n" + ] + } + ], + "source": [ + "lastchecksum = ''\n", + "captures = []\n", + "for capture in response.json()['hits']:\n", + " capturedate = capture['c']\n", + "\n", + " resp = w.fragment_checksum_html(url, capturedate, \".odd td\", 3)\n", + " checksum = resp.json()['checksum']\n", + " returncode = resp.json()['returncode']\n", + "\n", + " if returncode != 0:\n", + " continue\n", + "\n", + " if checksum != lastchecksum:\n", + " #print(resp.json())\n", + " print(\"http://wayback/web/\" + capturedate + \"/\" + url)\n", + " capture = {\"url\": url, \"timestamp\": capturedate}\n", + " captures.append(capture)\n", + " #print(capturedate + \" \" + checksum)\n", + "\n", + " lastchecksum = checksum\n", + "\n", + "if len(captures) > 0:\n", + " response = w.create_watchlist(captures)\n", + " print (\"A watchlist with all captures mentioned above was generated. The code for this watchlist is \" + response.json() + \". \" )\n", + " \n", + " \n", + "print(\"end\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}