diff --git a/Innenminister2009-2019.ipynb b/Innenminister2009-2019.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..3520b4016d9aa94fce9e912205d90ce2e583a404 --- /dev/null +++ b/Innenminister2009-2019.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a WebarchivSession Object with convenience methods for easy access with your API-Key " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from webarchiv import WebarchivSession\n", + "\n", + "apikey = '2pm8i0hnmpcTK4Oj4CUeBoZd7vywrm4c'\n", + "w = WebarchivSession(apikey)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Anzahl der Versionen von http://www.bmi.gv.at/cms/bmi_minister/ : 1815\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20090924173355/http://www.bmi.gv.at/cms/bmi_minister/\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20110806220856/http://www.bmi.gv.at/cms/bmi_minister/\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20160421194828/http://www.bmi.gv.at/cms/bmi_minister/\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Anzahl der Versionen von https://www.bmi.gv.at/101/start.aspx : 815\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20171218174329/https://www.bmi.gv.at/101/start.aspx\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20190522183605/https://www.bmi.gv.at/101/start.aspx\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20190603200736/https://www.bmi.gv.at/101/start.aspx\n" + ] + } + ], + "source": [ + "urls = []\n", + "urls.append(\"http://www.bmi.gv.at/cms/bmi_minister/\")\n", + "urls.append(\"https://www.bmi.gv.at/101/start.aspx\")\n", + "\n", + "modifieddates = []\n", + "lastchecksum = ''\n", + "checksum = ''\n", + "\n", + "for url in urls:\n", + " response = w.wayback_search(url)\n", + " if response.status_code != 200:\n", + " print(\"Something went wrong ...\")\n", + " exit(1)\n", + " \n", + " print(\"Anzahl der Versionen von \" + url + \" : \", response.json()['total'])\n", + " \n", + " for capture in response.json()['hits']:\n", + " capturedate = capture['c']\n", + " resp = w.fragment_checksum_html(url, capturedate, \"h2\", 1)\n", + " checksum = resp.json()['checksum']\n", + " returncode = resp.json()['returncode']\n", + "\n", + " if returncode != 0:\n", + " continue\n", + " \n", + " if checksum != lastchecksum :\n", + " modification = json={\"url\": url, \"timestamp\": capturedate }\n", + " print (\"http://wayback/web/\" + capturedate + \"/\" + url)\n", + " modifieddates.append(modification)\n", + " lastchecksum = checksum\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Webarchiv Merklistencode: LA\n" + ] + } + ], + "source": [ + "if len(modifieddates) > 0 :\n", + " response = w.create_watchlist(modifieddates)\n", + " print (\"Webarchiv Merklistencode: \" + response.json())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Nationalrat2015.ipynb b/Nationalrat2015.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..588f5bb677eca17eaf1abb1b83479692c5d27826 --- /dev/null +++ b/Nationalrat2015.ipynb @@ -0,0 +1,240 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a WebarchivSession Object with convenience methods for easy access with your API-Key " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from webarchiv import WebarchivSession\n", + "\n", + "apikey = '2pm8i0hnmpcTK4Oj4CUeBoZd7vywrm4c'\n", + "w = WebarchivSession(apikey)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "url = \"http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\"\n", + "response = w.wayback_search(url, \"20150101000000\", \"20151231235959\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "if response.status_code != 200:\n", + " print(\"Something went wrong ...\")\n", + " exit(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Anzahl der Versionen: 343\n" + ] + } + ], + "source": [ + "print(\"Anzahl der Versionen: \", response.json()['total'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150101220053/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150301233010/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150609061707/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150617213901/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150709235648/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150717223402/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150804221022/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150825120602/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150828000644/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150902014359/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20150924004200/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20151015041353/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20151016015450/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20151112044118/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20151216144759/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://wayback/web/20151217154540/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n" + ] + } + ], + "source": [ + "modifieddates = []\n", + "lastchecksum = ''\n", + "checksum = ''\n", + "\n", + "for capture in response.json()['hits']:\n", + " capturedate = capture['c']\n", + " resp = w.fragment_checksum_binary(url, capturedate)\n", + " checksum = resp.json()['checksum']\n", + " returncode = resp.json()['returncode']\n", + "\n", + " if returncode != 0:\n", + " continue\n", + " \n", + " if checksum != lastchecksum :\n", + " modification = json={\"url\": url, \"timestamp\": capturedate }\n", + " print (\"http://wayback/web/\" + capturedate + \"/\" + url)\n", + " modifieddates.append(modification)\n", + " lastchecksum = checksum\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Webarchiv Merklistencode: al\n" + ] + } + ], + "source": [ + "if len(modifieddates) > 0 :\n", + " response = w.create_watchlist(modifieddates)\n", + " print (\"Webarchiv Merklistencode: \" + response.json())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}