From 01be7ceae0a7ba90537f9dc64682f1a06e8dfbc7 Mon Sep 17 00:00:00 2001 From: Stefan Karner Date: Thu, 2 May 2019 16:00:06 +0200 Subject: [PATCH] Add Webarchive 4.2; add user tracking for webarchive --- 4 - Webarchive.ipynb | 24 + 4.2 - Webarchive - Full Text Search.ipynb | 1226 ++++++++++++++++++++- webarchiv.py | 58 +- 3 files changed, 1290 insertions(+), 18 deletions(-) diff --git a/4 - Webarchive.ipynb b/4 - Webarchive.ipynb index 9be6618..ecfa976 100644 --- a/4 - Webarchive.ipynb +++ b/4 - Webarchive.ipynb @@ -463,6 +463,30 @@ "source": [ "* Make Andreas happy :)" ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Tracking" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "We'd love to properly count the unique visitors in the Webarchive backend, so we kindly ask you to **opt in to tracking by instantiating `webarchiv.WebarchivSession` with the parameter `allow_tracking=True`**.\n", + "\n", + "This sends your SHA256-hashed MAC address as a fingerprint to the server on authentication. It is only ever used to count unique users.\n", + "\n", + "If you leave `allow_tracking` at the default value `False`, an empty string is sent as fingerprint." + ] } ], "metadata": { diff --git a/4.2 - Webarchive - Full Text Search.ipynb b/4.2 - Webarchive - Full Text Search.ipynb index 38b93c6..c019bfe 100644 --- a/4.2 - Webarchive - Full Text Search.ipynb +++ b/4.2 - Webarchive - Full Text Search.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, "source": [ "# 4.2 - Webarchive - Full Text Search\n", "\n", @@ -11,7 +15,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, "source": [ "Let's start a WebarchivSession" ] @@ -19,20 +27,40 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, "outputs": [], "source": [ "import webarchiv\n", "\n", "API_KEY = 'wGdLmWMlaM2V6j73V9zS0KHqBgfG67vJ'\n", "\n", - "s = webarchiv.WebarchivSession(API_KEY)" + "# If you want to allow us to count you as a unique visitor, please set allow_tracking to True\n", + "s = webarchiv.WebarchivSession(API_KEY, allow_tracking=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "We want to find the first occurrence of the term *Westbalkanroute* in the webarchive." ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, + "execution_count": 2, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, "outputs": [], "source": [ "r = s.fulltext_search('Westbalkanroute')" @@ -40,8 +68,12 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 3, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, "outputs": [ { "data": { @@ -52,7 +84,7 @@ " 'returncode': 0,\n", " 'total': 10,\n", " 'type': 1,\n", - " 'took': 9716,\n", + " 'took': 1299,\n", " 'version': '0.1.0',\n", " 'searchstring': 'westbalkanroute',\n", " 'grouping': '',\n", @@ -199,7 +231,7 @@ " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]}]}" ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -208,15 +240,1185 @@ "r.json()" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Let's use jsonpath to make collecting values from the result JSON easier." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "from jsonpath_ng import parse\n", + "\n", + "def jp(http_response, query_string):\n", + " compiled = parse(query_string)\n", + " json = http_response.json()\n", + " return [match.value for match in compiled.find(json)]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "all_domains = lambda r: jp(r, '$.hits[*].domain')\n", + "number_hits = lambda r: jp(r, '$.hits[*].total')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[60, 40, 25, 22, 18, 11, 10, 9, 6, 4]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "number_hits(r)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "slideshow": { + "slide_type": "" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['diepresse.com',\n", + " 'bmeia.gv.at',\n", + " 'spoe.at',\n", + " 'derstandard.at',\n", + " 'wienerzeitung.at',\n", + " 'bmi.gv.at',\n", + " 'orf.at',\n", + " 'vol.at',\n", + " 'wordpress.com',\n", + " 'politikeronline.at']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_domains(r)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "We get one hit per domain. If we want to find the *first* occurrence, we have to follow the rabbit down the hole:\n", + "\n", + "The last search gave us all domains. We need to get all URLs for each of these domains, and then all capture dates for every URL, and select the first one." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Steps:\n", + "\n", + "* Get all domains (already done) `s.fulltext_search()`\n", + "* Get all URLs `s.fulltext_search_within_domain()`\n", + "* Get all captures (dates) `s.fulltext_search_within_url()`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Let's execute the single steps with a single domain and a single URL respectively, then automate the search over all domains and URLs.\n", + "\n", + "We'll use the first returned domain `diepresse.com`" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "presse_r = s.fulltext_search_within_domain('Westbalkanroute', domain='diepresse.com')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": true, + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@context': 'https://webarchiv.onb.ac.at/contexts/fulltextsearchresult.jsonld',\n", + " 'requestid': '',\n", + " 'message': '',\n", + " 'returncode': 0,\n", + " 'total': 3,\n", + " 'type': 1,\n", + " 'took': 1785,\n", + " 'version': '0.1.0',\n", + " 'searchstring': 'Westbalkanroute',\n", + " 'grouping': 'diepresse.com',\n", + " 'subtype': 2,\n", + " 'hits': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523182542/http://diepresse.com',\n", + " 'id': '3ef624abfe4a831fcf92dd084b60e221',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': 'http://diepresse.com',\n", + " 'total': 52,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523182542',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170210180036/http://www.diepresse.com',\n", + " 'id': '5bc09002b8151a0bc1b72507ecb2b885',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': 'http://www.diepresse.com',\n", + " 'total': 7,\n", + " 'value': 'http://www.diepresse.com',\n", + " 'timestamp': '20170210180036',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151026192026/http://diepresse.com/home/politik/eu/index.do',\n", + " 'id': '6f8f23cc122d81337a2b7fcaa49c15c6',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': 'http://diepresse.com/home/politik/eu/index.do',\n", + " 'total': 1,\n", + " 'value': 'http://diepresse.com/home/politik/eu/index.do',\n", + " 'timestamp': '20151026192026',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]}]}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "presse_r.json()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "We'll use the first returned URL ('seed') `http://diepresse.com`" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "diepresse_r = s.fulltext_search_within_url('Westbalkanroute', url='http://diepresse.com', pagesize=52)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@context': 'https://webarchiv.onb.ac.at/contexts/fulltextsearchresult.jsonld',\n", + " 'requestid': '',\n", + " 'message': '',\n", + " 'returncode': 0,\n", + " 'total': 52,\n", + " 'type': 1,\n", + " 'took': 1103,\n", + " 'version': '0.1.0',\n", + " 'searchstring': 'Westbalkanroute',\n", + " 'grouping': 'http://diepresse.com',\n", + " 'subtype': 3,\n", + " 'hits': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151026182143/http://diepresse.com',\n", + " 'id': 'cd5b004aa4e87d9b45e07c1a25315460',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20151026182143',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151027031242/http://diepresse.com',\n", + " 'id': '917ca1de057bfb18524756f9b11b3a9f',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20151027031242',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151119183028/http://diepresse.com',\n", + " 'id': '59090f47ac0c8f4e0a5859669c36388c',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20151119183028',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20151216144418/http://diepresse.com',\n", + " 'id': 'a82320bb154076295fe3b3f9a1609471',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20151216144418',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20160923170917/http://diepresse.com',\n", + " 'id': '8a9e74c0bb1b36d4ea11f3d51bf2cf68',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20160923170917',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170210180036/http://diepresse.com',\n", + " 'id': 'a1c510c19cb96e6717053a565de8dba8',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170210180036',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170210185606/http://diepresse.com',\n", + " 'id': '35f47bb670dd41f734fa277051b4a4e4',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170210185606',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523170736/http://diepresse.com',\n", + " 'id': '8a92dab60ace3d913d205277c2bc4bda',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523170736',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523181411/http://diepresse.com',\n", + " 'id': '875d3d08b9c767b9d06725ee4bffad5c',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523181411',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523182542/http://diepresse.com',\n", + " 'id': '3ef624abfe4a831fcf92dd084b60e221',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523182542',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20170523183123/http://diepresse.com',\n", + " 'id': '5776e809c36ddca29f8bd89c489358f7',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20170523183123',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002155240/http://diepresse.com',\n", + " 'id': 'e4db42e444f4bfa11866da8992fbb21e',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002155240',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002160616/http://diepresse.com',\n", + " 'id': 'eb6a1c5fe8de15093ee335a0bac8852e',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002160616',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002174537/http://diepresse.com',\n", + " 'id': 'b333cd6e2abc8f3b65018b3ea168af5d',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002174537',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002175606/http://diepresse.com',\n", + " 'id': '2333bc82d1e28535ef9c5dff404bd762',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002175606',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002175625/http://diepresse.com',\n", + " 'id': 'd1d802cb6cee93908a50987d058d3ab4',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002175625',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002184049/http://diepresse.com',\n", + " 'id': '63083fe3d6d8890219b8688460f97fef',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002184049',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002185104/http://diepresse.com',\n", + " 'id': 'c89c9a0b2c2a0cf41d6b7034074072e6',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002185104',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171002212111/http://diepresse.com',\n", + " 'id': 'e5ffd9af786f8a17f4898e70eab5b7ff',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171002212111',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003033941/http://diepresse.com',\n", + " 'id': '6c8961059555319d141faf1807ad8ef5',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003033941',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003070039/http://diepresse.com',\n", + " 'id': 'e8fd4b360fe9aeb1c2851a83a10af824',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003070039',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003084602/http://diepresse.com',\n", + " 'id': '6981685e20c142e9f675125b313bb078',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003084602',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003092853/http://diepresse.com',\n", + " 'id': 'abce25d94557ab05352f8a710dc2e010',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003092853',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003115811/http://diepresse.com',\n", + " 'id': '3e2db855ee3e8e2be700440df8ae6867',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003115811',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003123112/http://diepresse.com',\n", + " 'id': '65c58a98824b3ab983c50ac3e0214d5a',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003123112',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003132945/http://diepresse.com',\n", + " 'id': '3008c7cba008bf78f97f953308972473',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003132945',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003164701/http://diepresse.com',\n", + " 'id': '62fb975397349f5b35a82e431e06bafc',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003164701',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003165723/http://diepresse.com',\n", + " 'id': '49b97ab42aa2fe1b740321ed6e975b47',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003165723',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003180924/http://diepresse.com',\n", + " 'id': '80a6a6008d6232b43c1d7a37fc6eb829',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003180924',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003181543/http://diepresse.com',\n", + " 'id': '26bfdfb84b42aa6bdcfa812a4a8db23b',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003181543',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171003181627/http://diepresse.com',\n", + " 'id': '5e17e7e5707a9cbe7f300ce8f4cfe027',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171003181627',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004164632/http://diepresse.com',\n", + " 'id': '5c6a8f435652a50a5642be28a1ca2493',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004164632',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004165546/http://diepresse.com',\n", + " 'id': '7d37634d0fe34bdf7f314c862599461d',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004165546',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004165548/http://diepresse.com',\n", + " 'id': 'f05eb43b37217a355a4639222523da56',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004165548',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004181003/http://diepresse.com',\n", + " 'id': '41cfb3dc400932d0679787c6cff87a55',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004181003',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004181807/http://diepresse.com',\n", + " 'id': '71ca86ca0c4b2c51ab893181bae8e4e4',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004181807',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171004190613/http://diepresse.com',\n", + " 'id': 'f1771c5c8c96ffab6ef5d5de4e011c39',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171004190613',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171005002251/http://diepresse.com',\n", + " 'id': 'b9353c4ac3c18d87ca4be9c29c113dcd',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171005002251',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171005013456/http://diepresse.com',\n", + " 'id': 'ff5f72c8a1f5e54814d5c3aabc1ae9a2',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171005013456',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013152553/http://diepresse.com',\n", + " 'id': '6a2c998d606c326d571e2954e316677f',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013152553',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013173505/http://diepresse.com',\n", + " 'id': '15e06c8aef7eff6b240292884a23b3e7',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013173505',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013174308/http://diepresse.com',\n", + " 'id': '0f87d81c5794388bac78b09a71c22995',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 1,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013174308',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013175058/http://diepresse.com',\n", + " 'id': '32d0f3336d26c92b056f3d5bf1b80e5c',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013175058',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013183341/http://diepresse.com',\n", + " 'id': '084a00afa920b3c934a82e0e28dda120',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 1,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013183341',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013184235/http://diepresse.com',\n", + " 'id': '110c1c117ea56390ce9d5b517abe6281',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013184235',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013184257/http://diepresse.com',\n", + " 'id': 'baedfd42ad79096539548f47c38767da',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013184257',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171013190102/http://diepresse.com',\n", + " 'id': '659c2f61ea7d6362c70c75d44d1e84a4',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171013190102',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014174350/http://diepresse.com',\n", + " 'id': '9677f6b32b361b495147682c729f2c10',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014174350',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014174853/http://diepresse.com',\n", + " 'id': '198aabe1082aa0c7840b80f3fb1ef19a',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014174853',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014175103/http://diepresse.com',\n", + " 'id': '7e5e8b94739546b63ebf0c82a92ae175',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014175103',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014175508/http://diepresse.com',\n", + " 'id': '5ef3e90835730b74f4d23e200c0cc691',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014175508',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]},\n", + " {'@context': 'https://webarchiv.onb.ac.at/contexts/eshit.jsonld',\n", + " '@id': 'http://wayback/web/20171014175545/http://diepresse.com',\n", + " 'id': '29cd0c0df2d41b3e40bdca6962fdd349',\n", + " 'snapshotavailable': 0,\n", + " 'svgavailable': 0,\n", + " 'domain': None,\n", + " 'total': 0,\n", + " 'value': 'http://diepresse.com',\n", + " 'timestamp': '20171014175545',\n", + " 'restriction': 1,\n", + " 'locked': 1,\n", + " 'highlights': [{'@context': 'https://webarchiv.onb.ac.at/contexts/eshighlight.jsonld',\n", + " 'typ': '',\n", + " 'highlightedtext': 'Der Inhalt ist gesperrt und kann nicht angezeigt werden'}]}],\n", + " 'page': 1,\n", + " 'pagesize': 52}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diepresse_r.json()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Ok, in `timestamp` are all the capture dates for this URL.\n", + "\n", + "The date format is `YYYYMMDDHHmmss`, no idea what timezone **TODO**\n", + "\n", + "Collect them all:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "all_captures = lambda r: jp(r, '$.hits[*].timestamp')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20151026182143',\n", + " '20151027031242',\n", + " '20151119183028',\n", + " '20151216144418',\n", + " '20160923170917']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cap = all_captures(diepresse_r)\n", + "cap[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "They sort neatly alphabetically" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'20151026182143'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "min(cap)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Batch Processing" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "all_domains = lambda r: jp(r, '$.hits[*].domain')\n", + "all_urls = lambda r: jp(r, '$.hits[*].value')\n", + "all_captures = lambda r: jp(r, '$.hits[*].timestamp')\n", + "number_hits = lambda r: jp(r, '$.hits[*].total')\n", + "\n", + "def search_deep_for_first_captures(query_string):\n", + " ft_r = s.fulltext_search(query_string)\n", + " first_hit_for_url = {}\n", + " for domain in all_domains(ft_r):\n", + " ft_dom_r = s.fulltext_search_within_domain(query_string, domain=domain)\n", + " urls = all_urls(ft_dom_r)\n", + " hits = number_hits(ft_dom_r)\n", + " for url, pagesize in zip(urls, hits):\n", + " ft_url_r = s.fulltext_search_within_url(query_string, url=url, pagesize=pagesize)\n", + " captures = all_captures(ft_url_r)\n", + " min_capture = min(captures)\n", + " first_hit_for_url[url] = min_capture\n", + " return min(first_hit_for_url.values()), first_hit_for_url" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('20150911190447',\n", + " {'http://diepresse.com': '20151026182143',\n", + " 'http://www.diepresse.com': '20151026182143',\n", + " 'http://diepresse.com/home/politik/eu/index.do': '20151026192026',\n", + " 'http://www.bmeia.gv.at': '20151010185208',\n", + " 'http://www.vorarlberg.spoe.at/index.php': '20151022181252',\n", + " 'http://derstandard.at': '20150911190447',\n", + " 'http://www.derstandard.at': '20150911190447',\n", + " 'http://www.wienerzeitung.at': '20151028214142',\n", + " 'http://www.wienerzeitung.at/': '20151028214142',\n", + " 'http://www.wienerzeitung.at/nachrichten/oesterreich/politik/': '20160313183203',\n", + " 'http://www.bmi.gv.at': '20170110140316',\n", + " 'http://orf.at/': '20160313195759',\n", + " 'http://steiermark.orf.at': '20170304194626',\n", + " 'http://www.orf.at': '20160313195759',\n", + " 'http://news.orf.at': '20160313195751',\n", + " 'http://news.orf.at/': '20160313195751',\n", + " 'http://salzburg.orf.at': '20160418181246',\n", + " 'http://salzburg.orf.at/': '20160418181246',\n", + " 'http://www.vol.at/news/politik': '20151026182823',\n", + " 'http://www.vol.at': '20160320181655',\n", + " 'http://www.vol.at/': '20160320181655',\n", + " 'http://jachwe.wordpress.com': '20170220181340',\n", + " 'https://jachwe.wordpress.com': '20170220181340',\n", + " 'http://www.politikeronline.at': '20151115182507'})" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search_deep_for_first_captures('Westbalkanroute')" + ] } ], "metadata": { + "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", diff --git a/webarchiv.py b/webarchiv.py index af8caf7..a2fbf53 100644 --- a/webarchiv.py +++ b/webarchiv.py @@ -129,14 +129,13 @@ class WebarchivSession: def fulltext_search(self, query_string, from_=None, to_=None): """ Start a fulltext search query in the Webarchive. - The current status of running queries can be read via status_open_queries(). :param query_string: String to search for :param from_: Optional earliest date bound for the search in the format YYYYMM. :param to_: Optional latest date bound for the search in the format YYYYMM. - :return: None + :return: HTTP Response object """ params = {'q': query_string} if from_: @@ -152,17 +151,66 @@ class WebarchivSession: self._display_http_error(e) print('Query for "{}" not added'.format(query_string)) + def fulltext_search_within_domain(self, query_string, domain, from_=None, to_=None): + """ + Start a fulltext seed search query in the Webarchive. + + :param query_string: String to search for + :param domain: Search only within this domain name + :param from_: Optional earliest date bound for the search + in the format YYYYMM. + :param to_: Optional latest date bound for the search + in the format YYYYMM. + :return: HTTP Response object + """ + params = {'q': query_string, 'g': domain} + if from_: + params['from'] = from_ + if to_: + params['to'] = to_ + + try: + response = self._get(op='/search/fulltext/seed', params=params) + return self.waitForResponse(response) + + except HTTPError as e: + self._display_http_error(e) + + def fulltext_search_within_url(self, query_string, url, pagesize=10, from_=None, to_=None): + """ + Start a fulltext capture search query in the Webarchive. + + :param query_string: String to search for + :param url: Search only captures starting at this exact web address + :param from_: Optional earliest date bound for the search + in the format YYYYMM. + :param to_: Optional latest date bound for the search + in the format YYYYMM. + :return: HTTP Response object + """ + params = {'q': query_string, 'g': url, 'pagesize': pagesize} + if from_: + params['from'] = from_ + if to_: + params['to'] = to_ + + try: + response = self._get(op='/search/fulltext/capture', params=params) + return self.waitForResponse(response) + + except HTTPError as e: + self._display_http_error(e) + def wayback_search(self, query_string, from_=None, to_=None): """ Start a wayback search query in the Webarchive. - The current status of running queries can be read via status_open_queries(). :param query_string: String to search for :param from_: Optional earliest date bound for the search in the format YYYYMM. :param to_: Optional latest date bound for the search in the format YYYYMM. - :return: None + :return: HTTP Response object """ params = {'q': query_string} if from_: @@ -213,7 +261,6 @@ class WebarchivSession: def domain_name_search(self, query_string, page_=1, pagesize_=100): """ Start a domain name search in the Webarchive. - The current status of running queries can be read via status_open_queries(). :param query_string: String to search for :param page_: The page number parameter works with the page size parameter to control the offset of the records returned in the results. Default value is 1 @@ -237,7 +284,6 @@ class WebarchivSession: def histogram_search(self, query_string, interval_=3, from_=None, to_=None): """ Start a domain name search in the Webarchive. - The current status of running queries can be read via status_open_queries(). :param query_string: String to search for :param page_: The page number parameter works with the page size parameter to control the offset of the records returned in the results. Default value is 1 -- GitLab