{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Create a WebarchivSession Object with convenience methods for easy access with your API-Key " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from webarchiv import WebarchivSession\n", "\n", "apikey = '2pm8i0hnmpcTK4Oj4CUeBoZd7vywrm4c'\n", "w = WebarchivSession(apikey)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Anzahl der Versionen von http://www.bmi.gv.at/cms/bmi_minister/ : 1815\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "http://wayback/web/20090924173355/http://www.bmi.gv.at/cms/bmi_minister/\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "http://wayback/web/20110806220856/http://www.bmi.gv.at/cms/bmi_minister/\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "http://wayback/web/20160421194828/http://www.bmi.gv.at/cms/bmi_minister/\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Anzahl der Versionen von https://www.bmi.gv.at/101/start.aspx : 815\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "http://wayback/web/20171218174329/https://www.bmi.gv.at/101/start.aspx\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "http://wayback/web/20190522183605/https://www.bmi.gv.at/101/start.aspx\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "http://wayback/web/20190603200736/https://www.bmi.gv.at/101/start.aspx\n" ] } ], "source": [ "urls = []\n", "urls.append(\"http://www.bmi.gv.at/cms/bmi_minister/\")\n", "urls.append(\"https://www.bmi.gv.at/101/start.aspx\")\n", "\n", "modifieddates = []\n", "lastchecksum = ''\n", "checksum = ''\n", "\n", "for url in urls:\n", " response = w.wayback_search(url)\n", " if response.status_code != 200:\n", " print(\"Something went wrong ...\")\n", " exit(1)\n", " \n", " print(\"Anzahl der Versionen von \" + url + \" : \", response.json()['total'])\n", " \n", " for capture in response.json()['hits']:\n", " capturedate = capture['c']\n", " resp = w.fragment_checksum_html(url, capturedate, \"h2\", 1)\n", " checksum = resp.json()['checksum']\n", " returncode = resp.json()['returncode']\n", "\n", " if returncode != 0:\n", " continue\n", " \n", " if checksum != lastchecksum :\n", " modification = json={\"url\": url, \"timestamp\": capturedate }\n", " print (\"http://wayback/web/\" + capturedate + \"/\" + url)\n", " modifieddates.append(modification)\n", " lastchecksum = checksum\n", "\n", " " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Webarchiv Merklistencode: LA\n" ] } ], "source": [ "if len(modifieddates) > 0 :\n", " response = w.create_watchlist(modifieddates)\n", " print (\"Webarchiv Merklistencode: \" + response.json())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }