diff --git a/4 - Webarchive.ipynb b/4 - Webarchive.ipynb index 9be661866cd6b8be96a7065c235d793e0f2cede9..ecfa976d442e4cffd4271756d7bf176a2d3a17b3 100644 --- a/4 - Webarchive.ipynb +++ b/4 - Webarchive.ipynb @@ -463,6 +463,30 @@ "source": [ "* Make Andreas happy :)" ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Tracking" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "We'd love to properly count the unique visitors in the Webarchive backend, so we kindly ask you to **opt in to tracking by instantiating `webarchiv.WebarchivSession` with the parameter `allow_tracking=True`**.\n", + "\n", + "This sends your SHA256-hashed MAC address as a fingerprint to the server on authentication. It is only ever used to count unique users.\n", + "\n", + "If you leave `allow_tracking` at the default value `False`, an empty string is sent as fingerprint." + ] } ], "metadata": { diff --git a/4.2 - Webarchive - Full Text Search.ipynb b/4.2 - Webarchive - Full Text Search.ipynb index 38b93c6a5891bddb89c89b9ce773d128fc6d8cec..c019bfe77381947b66c8928cbd55526b3a059ecb 100644 --- a/4.2 - Webarchive - Full Text Search.ipynb +++ b/4.2 - Webarchive - Full Text Search.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, "source": [ "# 4.2 - Webarchive - Full Text Search\n", "\n", @@ -11,7 +15,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, "source": [ "Let's start a WebarchivSession" ] @@ -19,20 +27,40 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, "outputs": [], "source": [ "import webarchiv\n", "\n", "API_KEY = 'wGdLmWMlaM2V6j73V9zS0KHqBgfG67vJ'\n", "\n", - "s = webarchiv.WebarchivSession(API_KEY)" + "# If you want to allow us to count you as a unique visitor, please set allow_tracking to True\n", + "s = webarchiv.WebarchivSession(API_KEY, allow_tracking=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "We want to find the first occurrence of the term *Westbalkanroute* in the webarchive." ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, + "execution_count": 2, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, "outputs": [], "source": [ "r = s.fulltext_search('Westbalkanroute')" @@ -40,8 +68,12 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 3, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, "outputs": [ { "data": { @@ -52,7 +84,7 @@ " 'returncode': 0,\n", " 'total': 10,\n", " 'type': 1,\n", - " 'took': 9716,\n", + " 'took': 1299,\n", " 'version': '0.1.0',\n", " 'searchstring': 'westbalkanroute',\n", " 'grouping': '',\n", @@ -199,7 +231,7 @@ " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]}]}" ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -208,15 +240,1185 @@ "r.json()" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Let's use jsonpath to make collecting values from the result JSON easier." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "from jsonpath_ng import parse\n", + "\n", + "def jp(http_response, query_string):\n", + " compiled = parse(query_string)\n", + " json = http_response.json()\n", + " return [match.value for match in compiled.find(json)]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "all_domains = lambda r: jp(r, '$.hits[*].domain')\n", + "number_hits = lambda r: jp(r, '$.hits[*].total')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[60, 40, 25, 22, 18, 11, 10, 9, 6, 4]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "number_hits(r)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "slideshow": { + "slide_type": "" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['diepresse.com',\n", + " 'bmeia.gv.at',\n", + " 'spoe.at',\n", + " 'derstandard.at',\n", + " 'wienerzeitung.at',\n", + " 'bmi.gv.at',\n", + " 'orf.at',\n", + " 'vol.at',\n", + " 'wordpress.com',\n", + " 'politikeronline.at']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_domains(r)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "We get one hit per domain. If we want to find the *first* occurrence, we have to follow the rabbit down the hole:\n", + "\n", + "The last search gave us all domains. We need to get all URLs for each of these domains, and then all capture dates for every URL, and select the first one." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Steps:\n", + "\n", + "* Get all domains (already done) `s.fulltext_search()`\n", + "* Get all URLs `s.fulltext_search_within_domain()`\n", + "* Get all captures (dates) `s.fulltext_search_within_url()`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Let's execute the single steps with a single domain and a single URL respectively, then automate the search over all domains and URLs.\n", + "\n", + "We'll use the first returned domain `diepresse.com`" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "presse_r = s.fulltext_search_within_domain('Westbalkanroute', domain='diepresse.com')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": true, + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@context': 'https://webarchiv.onb.ac.at/contexts/fulltextsearchresult.jsonld',\n", + " 'requestid': '',\n", + " 'message': '',\n", + " 'returncode': 0,\n", + " 'total': 3,\n", + " 'type': 1,\n", + " 'took': 1785,\n", + " 'version': '0.1.0',\n", + " 'searchstring': 'Westbalkanroute',\n", + " 'grouping': 'diepresse.com',\n", + " 'subtype': 2,\n", + " 'hits': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523182542/http://diepresse.com',\n", + " 'id': '3ef624abfe4a831fcf92dd084b60e221',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': 'http://diepresse.com',\n", + " 'total': 52,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523182542',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170210180036/http://www.diepresse.com',\n", + " 'id': '5bc09002b8151a0bc1b72507ecb2b885',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': 'http://www.diepresse.com',\n", + " 'total': 7,\n", + " 'value': 'http://www.diepresse.com',\n", + " 'timestamp': '20170210180036',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151026192026/http://diepresse.com/home/politik/eu/index.do',\n", + " 'id': '6f8f23cc122d81337a2b7fcaa49c15c6',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': 'http://diepresse.com/home/politik/eu/index.do',\n", + " 'total': 1,\n", + " 'value': 'http://diepresse.com/home/politik/eu/index.do',\n", + " 'timestamp': '20151026192026',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]}]}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "presse_r.json()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "We'll use the first returned URL ('seed') `http://diepresse.com`" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "diepresse_r = s.fulltext_search_within_url('Westbalkanroute', url='http://diepresse.com', pagesize=52)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@context': 'https://webarchiv.onb.ac.at/contexts/fulltextsearchresult.jsonld',\n", + " 'requestid': '',\n", + " 'message': '',\n", + " 'returncode': 0,\n", + " 'total': 52,\n", + " 'type': 1,\n", + " 'took': 1103,\n", + " 'version': '0.1.0',\n", + " 'searchstring': 'Westbalkanroute',\n", + " 'grouping': 'http://diepresse.com',\n", + " 'subtype': 3,\n", + " 'hits': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151026182143/http://diepresse.com',\n", + " 'id': 'cd5b004aa4e87d9b45e07c1a25315460',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20151026182143',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151027031242/http://diepresse.com',\n", + " 'id': '917ca1de057bfb18524756f9b11b3a9f',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20151027031242',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151119183028/http://diepresse.com',\n", + " 'id': '59090f47ac0c8f4e0a5859669c36388c',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20151119183028',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151216144418/http://diepresse.com',\n", + " 'id': 'a82320bb154076295fe3b3f9a1609471',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20151216144418',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20160923170917/http://diepresse.com',\n", + " 'id': '8a9e74c0bb1b36d4ea11f3d51bf2cf68',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20160923170917',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170210180036/http://diepresse.com',\n", + " 'id': 'a1c510c19cb96e6717053a565de8dba8',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170210180036',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170210185606/http://diepresse.com',\n", + " 'id': '35f47bb670dd41f734fa277051b4a4e4',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170210185606',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523170736/http://diepresse.com',\n", + " 'id': '8a92dab60ace3d913d205277c2bc4bda',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523170736',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523181411/http://diepresse.com',\n", + " 'id': '875d3d08b9c767b9d06725ee4bffad5c',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523181411',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523182542/http://diepresse.com',\n", + " 'id': '3ef624abfe4a831fcf92dd084b60e221',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523182542',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523183123/http://diepresse.com',\n", + " 'id': '5776e809c36ddca29f8bd89c489358f7',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523183123',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002155240/http://diepresse.com',\n", + " 'id': 'e4db42e444f4bfa11866da8992fbb21e',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002155240',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002160616/http://diepresse.com',\n", + " 'id': 'eb6a1c5fe8de15093ee335a0bac8852e',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002160616',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002174537/http://diepresse.com',\n", + " 'id': 'b333cd6e2abc8f3b65018b3ea168af5d',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002174537',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002175606/http://diepresse.com',\n", + " 'id': '2333bc82d1e28535ef9c5dff404bd762',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002175606',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002175625/http://diepresse.com',\n", + " 'id': 'd1d802cb6cee93908a50987d058d3ab4',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002175625',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002184049/http://diepresse.com',\n", + " 'id': '63083fe3d6d8890219b8688460f97fef',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002184049',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002185104/http://diepresse.com',\n", + " 'id': 'c89c9a0b2c2a0cf41d6b7034074072e6',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002185104',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002212111/http://diepresse.com',\n", + " 'id': 'e5ffd9af786f8a17f4898e70eab5b7ff',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002212111',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003033941/http://diepresse.com',\n", + " 'id': '6c8961059555319d141faf1807ad8ef5',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003033941',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003070039/http://diepresse.com',\n", + " 'id': 'e8fd4b360fe9aeb1c2851a83a10af824',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003070039',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003084602/http://diepresse.com',\n", + " 'id': '6981685e20c142e9f675125b313bb078',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003084602',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003092853/http://diepresse.com',\n", + " 'id': 'abce25d94557ab05352f8a710dc2e010',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003092853',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003115811/http://diepresse.com',\n", + " 'id': '3e2db855ee3e8e2be700440df8ae6867',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003115811',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003123112/http://diepresse.com',\n", + " 'id': '65c58a98824b3ab983c50ac3e0214d5a',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003123112',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003132945/http://diepresse.com',\n", + " 'id': '3008c7cba008bf78f97f953308972473',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003132945',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003164701/http://diepresse.com',\n", + " 'id': '62fb975397349f5b35a82e431e06bafc',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003164701',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003165723/http://diepresse.com',\n", + " 'id': '49b97ab42aa2fe1b740321ed6e975b47',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003165723',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003180924/http://diepresse.com',\n", + " 'id': '80a6a6008d6232b43c1d7a37fc6eb829',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003180924',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003181543/http://diepresse.com',\n", + " 'id': '26bfdfb84b42aa6bdcfa812a4a8db23b',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003181543',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003181627/http://diepresse.com',\n", + " 'id': '5e17e7e5707a9cbe7f300ce8f4cfe027',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003181627',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004164632/http://diepresse.com',\n", + " 'id': '5c6a8f435652a50a5642be28a1ca2493',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004164632',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004165546/http://diepresse.com',\n", + " 'id': '7d37634d0fe34bdf7f314c862599461d',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004165546',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004165548/http://diepresse.com',\n", + " 'id': 'f05eb43b37217a355a4639222523da56',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004165548',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004181003/http://diepresse.com',\n", + " 'id': '41cfb3dc400932d0679787c6cff87a55',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004181003',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004181807/http://diepresse.com',\n", + " 'id': '71ca86ca0c4b2c51ab893181bae8e4e4',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004181807',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004190613/http://diepresse.com',\n", + " 'id': 'f1771c5c8c96ffab6ef5d5de4e011c39',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004190613',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171005002251/http://diepresse.com',\n", + " 'id': 'b9353c4ac3c18d87ca4be9c29c113dcd',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171005002251',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171005013456/http://diepresse.com',\n", + " 'id': 'ff5f72c8a1f5e54814d5c3aabc1ae9a2',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171005013456',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013152553/http://diepresse.com',\n", + " 'id': '6a2c998d606c326d571e2954e316677f',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013152553',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013173505/http://diepresse.com',\n", + " 'id': '15e06c8aef7eff6b240292884a23b3e7',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013173505',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013174308/http://diepresse.com',\n", + " 'id': '0f87d81c5794388bac78b09a71c22995',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 1,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013174308',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013175058/http://diepresse.com',\n", + " 'id': '32d0f3336d26c92b056f3d5bf1b80e5c',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013175058',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013183341/http://diepresse.com',\n", + " 'id': '084a00afa920b3c934a82e0e28dda120',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 1,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013183341',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013184235/http://diepresse.com',\n", + " 'id': '110c1c117ea56390ce9d5b517abe6281',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013184235',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013184257/http://diepresse.com',\n", + " 'id': 'baedfd42ad79096539548f47c38767da',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013184257',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013190102/http://diepresse.com',\n", + " 'id': '659c2f61ea7d6362c70c75d44d1e84a4',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013190102',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014174350/http://diepresse.com',\n", + " 'id': '9677f6b32b361b495147682c729f2c10',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014174350',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014174853/http://diepresse.com',\n", + " 'id': '198aabe1082aa0c7840b80f3fb1ef19a',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014174853',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014175103/http://diepresse.com',\n", + " 'id': '7e5e8b94739546b63ebf0c82a92ae175',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014175103',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014175508/http://diepresse.com',\n", + " 'id': '5ef3e90835730b74f4d23e200c0cc691',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014175508',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014175545/http://diepresse.com',\n", + " 'id': '29cd0c0df2d41b3e40bdca6962fdd349',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014175545',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]}],\n", + " 'page': 1,\n", + " 'pagesize': 52}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diepresse_r.json()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Ok, in `timestamp` are all the capture dates for this URL.\n", + "\n", + "The date format is `YYYYMMDDHHmmss`, no idea what timezone **TODO**\n", + "\n", + "Collect them all:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "all_captures = lambda r: jp(r, '$.hits[*].timestamp')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20151026182143',\n", + " '20151027031242',\n", + " '20151119183028',\n", + " '20151216144418',\n", + " '20160923170917']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cap = all_captures(diepresse_r)\n", + "cap[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "They sort neatly alphabetically" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'20151026182143'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "min(cap)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Batch Processing" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "all_domains = lambda r: jp(r, '$.hits[*].domain')\n", + "all_urls = lambda r: jp(r, '$.hits[*].value')\n", + "all_captures = lambda r: jp(r, '$.hits[*].timestamp')\n", + "number_hits = lambda r: jp(r, '$.hits[*].total')\n", + "\n", + "def search_deep_for_first_captures(query_string):\n", + " ft_r = s.fulltext_search(query_string)\n", + " first_hit_for_url = {}\n", + " for domain in all_domains(ft_r):\n", + " ft_dom_r = s.fulltext_search_within_domain(query_string, domain=domain)\n", + " urls = all_urls(ft_dom_r)\n", + " hits = number_hits(ft_dom_r)\n", + " for url, pagesize in zip(urls, hits):\n", + " ft_url_r = s.fulltext_search_within_url(query_string, url=url, pagesize=pagesize)\n", + " captures = all_captures(ft_url_r)\n", + " min_capture = min(captures)\n", + " first_hit_for_url[url] = min_capture\n", + " return min(first_hit_for_url.values()), first_hit_for_url" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('20150911190447',\n", + " {'http://diepresse.com': '20151026182143',\n", + " 'http://www.diepresse.com': '20151026182143',\n", + " 'http://diepresse.com/home/politik/eu/index.do': '20151026192026',\n", + " 'http://www.bmeia.gv.at': '20151010185208',\n", + " 'http://www.vorarlberg.spoe.at/index.php': '20151022181252',\n", + " 'http://derstandard.at': '20150911190447',\n", + " 'http://www.derstandard.at': '20150911190447',\n", + " 'http://www.wienerzeitung.at': '20151028214142',\n", + " 'http://www.wienerzeitung.at/': '20151028214142',\n", + " 'http://www.wienerzeitung.at/nachrichten/oesterreich/politik/': '20160313183203',\n", + " 'http://www.bmi.gv.at': '20170110140316',\n", + " 'http://orf.at/': '20160313195759',\n", + " 'http://steiermark.orf.at': '20170304194626',\n", + " 'http://www.orf.at': '20160313195759',\n", + " 'http://news.orf.at': '20160313195751',\n", + " 'http://news.orf.at/': '20160313195751',\n", + " 'http://salzburg.orf.at': '20160418181246',\n", + " 'http://salzburg.orf.at/': '20160418181246',\n", + " 'http://www.vol.at/news/politik': '20151026182823',\n", + " 'http://www.vol.at': '20160320181655',\n", + " 'http://www.vol.at/': '20160320181655',\n", + " 'http://jachwe.wordpress.com': '20170220181340',\n", + " 'https://jachwe.wordpress.com': '20170220181340',\n", + " 'http://www.politikeronline.at': '20151115182507'})" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search_deep_for_first_captures('Westbalkanroute')" + ] } ], "metadata": { + "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", diff --git a/webarchiv.py b/webarchiv.py index af8caf73329b6a10119c34f47570933782ec1571..a2fbf534920f91137198a19ade9d4606147cacc4 100644 --- a/webarchiv.py +++ b/webarchiv.py @@ -129,14 +129,13 @@ class WebarchivSession: def fulltext_search(self, query_string, from_=None, to_=None): """ Start a fulltext search query in the Webarchive. - The current status of running queries can be read via status_open_queries(). :param query_string: String to search for :param from_: Optional earliest date bound for the search in the format YYYYMM. :param to_: Optional latest date bound for the search in the format YYYYMM. - :return: None + :return: HTTP Response object """ params = {'q': query_string} if from_: @@ -152,17 +151,66 @@ class WebarchivSession: self._display_http_error(e) print('Query for "{}" not added'.format(query_string)) + def fulltext_search_within_domain(self, query_string, domain, from_=None, to_=None): + """ + Start a fulltext seed search query in the Webarchive. + + :param query_string: String to search for + :param domain: Search only within this domain name + :param from_: Optional earliest date bound for the search + in the format YYYYMM. + :param to_: Optional latest date bound for the search + in the format YYYYMM. + :return: HTTP Response object + """ + params = {'q': query_string, 'g': domain} + if from_: + params['from'] = from_ + if to_: + params['to'] = to_ + + try: + response = self._get(op='/search/fulltext/seed', params=params) + return self.waitForResponse(response) + + except HTTPError as e: + self._display_http_error(e) + + def fulltext_search_within_url(self, query_string, url, pagesize=10, from_=None, to_=None): + """ + Start a fulltext capture search query in the Webarchive. + + :param query_string: String to search for + :param url: Search only captures starting at this exact web address + :param from_: Optional earliest date bound for the search + in the format YYYYMM. + :param to_: Optional latest date bound for the search + in the format YYYYMM. + :return: HTTP Response object + """ + params = {'q': query_string, 'g': url, 'pagesize': pagesize} + if from_: + params['from'] = from_ + if to_: + params['to'] = to_ + + try: + response = self._get(op='/search/fulltext/capture', params=params) + return self.waitForResponse(response) + + except HTTPError as e: + self._display_http_error(e) + def wayback_search(self, query_string, from_=None, to_=None): """ Start a wayback search query in the Webarchive. - The current status of running queries can be read via status_open_queries(). :param query_string: String to search for :param from_: Optional earliest date bound for the search in the format YYYYMM. :param to_: Optional latest date bound for the search in the format YYYYMM. - :return: None + :return: HTTP Response object """ params = {'q': query_string} if from_: @@ -213,7 +261,6 @@ class WebarchivSession: def domain_name_search(self, query_string, page_=1, pagesize_=100): """ Start a domain name search in the Webarchive. - The current status of running queries can be read via status_open_queries(). :param query_string: String to search for :param page_: The page number parameter works with the page size parameter to control the offset of the records returned in the results. Default value is 1 @@ -237,7 +284,6 @@ class WebarchivSession: def histogram_search(self, query_string, interval_=3, from_=None, to_=None): """ Start a domain name search in the Webarchive. - The current status of running queries can be read via status_open_queries(). :param query_string: String to search for :param page_: The page number parameter works with the page size parameter to control the offset of the records returned in the results. Default value is 1