Commit a9907fda authored by onbpre's avatar onbpre

fragment checksumming samples

parent 95174dbb
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a WebarchivSession Object with convenience methods for easy access with your API-Key "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from webarchiv import WebarchivSession\n",
"\n",
"apikey = '2pm8i0hnmpcTK4Oj4CUeBoZd7vywrm4c'\n",
"w = WebarchivSession(apikey)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Anzahl der Versionen von http://www.bmi.gv.at/cms/bmi_minister/ : 1815\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20090924173355/http://www.bmi.gv.at/cms/bmi_minister/\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20110806220856/http://www.bmi.gv.at/cms/bmi_minister/\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20160421194828/http://www.bmi.gv.at/cms/bmi_minister/\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Anzahl der Versionen von https://www.bmi.gv.at/101/start.aspx : 815\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20171218174329/https://www.bmi.gv.at/101/start.aspx\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20190522183605/https://www.bmi.gv.at/101/start.aspx\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20190603200736/https://www.bmi.gv.at/101/start.aspx\n"
]
}
],
"source": [
"urls = []\n",
"urls.append(\"http://www.bmi.gv.at/cms/bmi_minister/\")\n",
"urls.append(\"https://www.bmi.gv.at/101/start.aspx\")\n",
"\n",
"modifieddates = []\n",
"lastchecksum = ''\n",
"checksum = ''\n",
"\n",
"for url in urls:\n",
" response = w.wayback_search(url)\n",
" if response.status_code != 200:\n",
" print(\"Something went wrong ...\")\n",
" exit(1)\n",
" \n",
" print(\"Anzahl der Versionen von \" + url + \" : \", response.json()['total'])\n",
" \n",
" for capture in response.json()['hits']:\n",
" capturedate = capture['c']\n",
" resp = w.fragment_checksum_html(url, capturedate, \"h2\", 1)\n",
" checksum = resp.json()['checksum']\n",
" returncode = resp.json()['returncode']\n",
"\n",
" if returncode != 0:\n",
" continue\n",
" \n",
" if checksum != lastchecksum :\n",
" modification = json={\"url\": url, \"timestamp\": capturedate }\n",
" print (\"http://wayback/web/\" + capturedate + \"/\" + url)\n",
" modifieddates.append(modification)\n",
" lastchecksum = checksum\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Webarchiv Merklistencode: LA\n"
]
}
],
"source": [
"if len(modifieddates) > 0 :\n",
" response = w.create_watchlist(modifieddates)\n",
" print (\"Webarchiv Merklistencode: \" + response.json())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a WebarchivSession Object with convenience methods for easy access with your API-Key "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from webarchiv import WebarchivSession\n",
"\n",
"apikey = '2pm8i0hnmpcTK4Oj4CUeBoZd7vywrm4c'\n",
"w = WebarchivSession(apikey)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"url = \"http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\"\n",
"response = w.wayback_search(url, \"20150101000000\", \"20151231235959\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"if response.status_code != 200:\n",
" print(\"Something went wrong ...\")\n",
" exit(1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Anzahl der Versionen: 343\n"
]
}
],
"source": [
"print(\"Anzahl der Versionen: \", response.json()['total'])\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150101220053/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150301233010/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150609061707/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150617213901/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150709235648/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150717223402/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150804221022/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150825120602/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150828000644/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150902014359/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20150924004200/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20151015041353/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20151016015450/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20151112044118/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20151216144759/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://wayback/web/20151217154540/http://www.parlament.gv.at/WWER/SITZPLAN/sitzplan2Nr.pdf\n"
]
}
],
"source": [
"modifieddates = []\n",
"lastchecksum = ''\n",
"checksum = ''\n",
"\n",
"for capture in response.json()['hits']:\n",
" capturedate = capture['c']\n",
" resp = w.fragment_checksum_binary(url, capturedate)\n",
" checksum = resp.json()['checksum']\n",
" returncode = resp.json()['returncode']\n",
"\n",
" if returncode != 0:\n",
" continue\n",
" \n",
" if checksum != lastchecksum :\n",
" modification = json={\"url\": url, \"timestamp\": capturedate }\n",
" print (\"http://wayback/web/\" + capturedate + \"/\" + url)\n",
" modifieddates.append(modification)\n",
" lastchecksum = checksum\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Webarchiv Merklistencode: al\n"
]
}
],
"source": [
"if len(modifieddates) > 0 :\n",
" response = w.create_watchlist(modifieddates)\n",
" print (\"Webarchiv Merklistencode: \" + response.json())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment