diff --git a/4.1 - Webarchive - Interacting with the API.ipynb b/4.1 - Webarchive - Interacting with the API.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..fbfe591274c7e96b332dee5b3521aaee51d7c886 --- /dev/null +++ b/4.1 - Webarchive - Interacting with the API.ipynb @@ -0,0 +1,1351 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# 4.1 - Webarchive - Interacting With The API\n", + "\n", + "*Tools for accessing the Webarchive API*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "* Variant 1: Exploring the API manually\n", + "* Variant 2: Generate Code from Swagger JSON\n", + "* Variant 3: Use Swagger JSON dynamically\n", + "* Variant 4: Use `webarchiv.py` from the ONB Labs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "The documentation is available under [https://webarchiv.onb.ac.at/api.html#](https://webarchiv.onb.ac.at/api.html#)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "API_KEY = 'wGdLmWMlaM2V6j73V9zS0KHqBgfG67vJ'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Variant 1: Exploring the API manually\n", + "\n", + "Take a look at [https://webarchiv.onb.ac.at/api.html#/](https://webarchiv.onb.ac.at/api.html#/) and try it out." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "BASE_URL = 'https://webarchiv.onb.ac.at/api'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Let's take a look at `/welcome`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true, + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@context': 'http://schema.org/',\n", + " '@type': 'WebAPI',\n", + " 'name': 'Webarchive Austria Search API',\n", + " 'version': '0.1.0',\n", + " 'description': 'The Webarchive Austria Search API lets you find archived webpages by Fulltext or URL. The API uses standard schema.org types and is compliant with the JSON-LD specification.',\n", + " 'documentation': 'https://webarchiv.onb.ac.at/api.html',\n", + " 'provider': {'@type': 'Organization',\n", + " 'name': 'Austrian National Library',\n", + " 'contactPoint': [{'@type': 'ContactPoint',\n", + " 'name': 'Webarchive Austria',\n", + " 'url': 'https://webarchiv.onb.ac.at'}]},\n", + " 'versions': ['0.1.0'],\n", + " 'license': 'https://creativecommons.org/publicdomain/mark/1.0/',\n", + " 'transport': 'HTTP',\n", + " 'apiProtocol': 'JSON API',\n", + " 'webApiDefinitions': [{'@type': 'EntryPoint',\n", + " 'url': 'https://webarchiv.onb.ac.at/api/authenticate',\n", + " 'encodingType': 'application/json',\n", + " 'contentType': 'application/ld+json',\n", + " 'httpMethod': 'POST'},\n", + " {'@type': 'EntryPoint',\n", + " 'url': 'https://webarchiv.onb.ac.at/api/search/domainname',\n", + " 'urlTemplate': 'https://webarchiv.onb.ac.at/api/search/domainname?q={q}&page={page}&pagesize={pagesize}&t={t}&apikey={apikey}',\n", + " 'encodingType': 'application/json',\n", + " 'contentType': 'application/ld+json',\n", + " 'httpMethod': 'GET'},\n", + " {'@type': 'EntryPoint',\n", + " 'url': 'https://webarchiv.onb.ac.at/api/search/fulltext',\n", + " 'urlTemplate': 'https://webarchiv.onb.ac.at/api/search/fulltext?q={q}&from={from}&to={to}&maxaggs={maxaggs}&t={t}&apikey={apikey}',\n", + " 'encodingType': 'application/json',\n", + " 'contentType': 'application/ld+json',\n", + " 'httpMethod': 'GET'},\n", + " {'@type': 'EntryPoint',\n", + " 'url': 'https://webarchiv.onb.ac.at/api/search/fulltext/seed',\n", + " 'urlTemplate': 'https://webarchiv.onb.ac.at/api/search/fulltext/seed?q={q}&g={g}&from={from}&to={to}&t={t}&apikey={apikey}',\n", + " 'encodingType': 'application/json',\n", + " 'contentType': 'application/ld+json',\n", + " 'httpMethod': 'GET'},\n", + " {'@type': 'EntryPoint',\n", + " 'url': 'https://webarchiv.onb.ac.at/api/search/fulltext/capture',\n", + " 'urlTemplate': 'https://webarchiv.onb.ac.at/api/search/fulltext/capture?q={q}&g={g}&from={from}&to={to}&page={page}&pagesize={pagesize}&t={t}&apikey={apikey}',\n", + " 'encodingType': 'application/json',\n", + " 'contentType': 'application/ld+json',\n", + " 'httpMethod': 'GET'},\n", + " {'@type': 'EntryPoint',\n", + " 'url': 'https://webarchiv.onb.ac.at/api/search/wayback',\n", + " 'urlTemplate': 'https://webarchiv.onb.ac.at/api/search/wayback?q={q}&from={from}&to={to}&t={t}&apikey={apikey}',\n", + " 'encodingType': 'application/json',\n", + " 'contentType': 'application/ld+json',\n", + " 'httpMethod': 'GET'},\n", + " {'@type': 'EntryPoint',\n", + " 'url': 'https://webarchiv.onb.ac.at/api/status/fulltext',\n", + " 'urlTemplate': 'https://webarchiv.onb.ac.at/api/status/fulltext?requestid={requestid}&t={t}&apikey={apikey}',\n", + " 'encodingType': 'application/json',\n", + " 'contentType': 'application/ld+json',\n", + " 'httpMethod': 'GET'},\n", + " {'@type': 'EntryPoint',\n", + " 'url': 'https://webarchiv.onb.ac.at/api/status/wayback',\n", + " 'urlTemplate': 'https://webarchiv.onb.ac.at/api/status/wayback?requestid={requestid}&t={t}&apikey={apikey}',\n", + " 'encodingType': 'application/json',\n", + " 'contentType': 'application/ld+json',\n", + " 'httpMethod': 'GET'},\n", + " {'@type': 'EntryPoint',\n", + " 'url': 'https://webarchiv.onb.ac.at/api/status/kill',\n", + " 'encodingType': 'application/json',\n", + " 'contentType': 'application/ld+json',\n", + " 'httpMethod': 'DELETE'}]}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = requests.get(f'{BASE_URL}/welcome')\n", + "r.json()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "We need a fingerprint and a valid API key.\n", + "A key has been generated for PyDays19." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('00b5b6ec-ca35-4345-b1e2-82d6dd99c05a', 'wGdLmWMlaM2V6j73V9zS0KHqBgfG67vJ')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import uuid\n", + "\n", + "FINGERPRINT = str(uuid.uuid4())\n", + "API_KEY = 'wGdLmWMlaM2V6j73V9zS0KHqBgfG67vJ'\n", + "\n", + "FINGERPRINT, API_KEY" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "We need to authenticate first in order to get a valid token." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "201" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auth_r = requests.post(f'{BASE_URL}/authentication', json={\n", + " 'apikey': API_KEY,\n", + " 'version': '0.1.0',\n", + " 'fingerprint': FINGERPRINT\n", + "})\n", + "auth_r.status_code" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@context': 'https://webarchiv.onb.ac.at/contexts/authenticate.jsonld',\n", + " 'apikey': 'wGdLmWMlaM2V6j73V9zS0KHqBgfG67vJ',\n", + " 'fingerprint': '00b5b6ec-ca35-4345-b1e2-82d6dd99c05a',\n", + " 'timestamp': 1555515593482,\n", + " 't': 'ff58c39dfde2639849c901388fbcf959132dea2d',\n", + " 'version': '0.1.0'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auth_r.json()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "token = auth_r.json()['t']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Now we can submit other requests, a search for example." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "200" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search_r = requests.get(f'{BASE_URL}/search/domainname', params={\n", + " 'apikey': API_KEY,\n", + " 't': token,\n", + " 'q': 'wien'\n", + "})\n", + "search_r.status_code" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true, + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'hits': [{'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wieno.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien1.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiener.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-wien.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiengut.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienmed.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienwin.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiental.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wieners.wien'}],\n", + " 'searchstring': 'wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/domainnamesearchresult.jsonld',\n", + " 'requestid': '',\n", + " 'message': '',\n", + " 'returncode': 0,\n", + " 'total': 35101,\n", + " 'type': 1,\n", + " 'took': 427,\n", + " 'version': '0.1.0'}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search_r.json()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Variant 2: Generate Code from Swagger JSON\n", + "\n", + "We use the online generator at [https://generator.swagger.io/](https://generator.swagger.io/)." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "import io\n", + "import zipfile\n", + "import shutil\n", + "\n", + "def generate_swagger_client():\n", + " # Generate Python Client\n", + " generated_r = requests.post('https://generator.swagger.io/api/gen/clients/python', json={\n", + " 'swaggerUrl': 'https://webarchiv.onb.ac.at/api/swagger.json'\n", + " })\n", + " generated_r.raise_for_status()\n", + " link = generated_r.json()['link']\n", + " # Download ZIP with generated client\n", + " zip_r = requests.get(link)\n", + " zip_r.raise_for_status()\n", + " # Open and extract\n", + " zip_file = zipfile.ZipFile(io.BytesIO(zip_r.content))\n", + " zip_file.extractall()\n", + " # Move package to working directory and clean up\n", + " shutil.move('python-client/swagger_client', 'swagger_client')\n", + " shutil.rmtree('python-client')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "import swagger_client" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Set base URL" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "slideshow": { + "slide_type": "" + } + }, + "outputs": [], + "source": [ + "client = swagger_client.ApiClient()\n", + "client.configuration.host = 'https://webarchiv.onb.ac.at/api'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Authenticate" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'apikey': 'wGdLmWMlaM2V6j73V9zS0KHqBgfG67vJ',\n", + " 'fingerprint': '635fbeae-50d5-4df7-8372-7bc93bcbec74',\n", + " 't': 'b831ef03103dd7bb74838e0678e7d2bf2aaef809',\n", + " 'timestamp': 1555515615761,\n", + " 'version': '0.1.0'}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auth_obj = swagger_client.Authenticate(apikey=API_KEY, fingerprint=str(uuid.uuid4()))\n", + "aa = swagger_client.AuthenticationApi(client)\n", + "auth_r = aa.authenticate(body=auth_obj)\n", + "auth_r" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "token = auth_r.t" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Search for domain name" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "search_api = swagger_client.SearchApi(client)\n", + "search_r = search_api.search_domainname(q='wien', t=token, apikey=API_KEY)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'hits': [{'value': 'wieno.wien'},\n", + " {'value': 'wien.wien'},\n", + " {'value': 'wien1.wien'},\n", + " {'value': 'wiener.wien'},\n", + " {'value': 'wien-wien.at'},\n", + " {'value': 'wiengut.wien'},\n", + " {'value': 'wienmed.wien'},\n", + " {'value': 'wienwin.wien'},\n", + " {'value': 'wiental.wien'},\n", + " {'value': 'wieners.wien'}],\n", + " 'message': '',\n", + " 'requestid': '',\n", + " 'returncode': 0,\n", + " 'searchstring': 'wien',\n", + " 'took': 615,\n", + " 'total': 35101,\n", + " 'type': 1,\n", + " 'version': '0.1.0'}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search_r" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Variant 3: Use Swagger JSON dynamically\n", + "\n", + "Uses package [`pyswagger`](https://github.com/pyopenapi/pyswagger)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "from pyswagger import App\n", + "from pyswagger.contrib.client.requests import Client\n", + "from pyswagger.utils import jp_compose" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Create client and app" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "slideshow": { + "slide_type": "" + } + }, + "outputs": [], + "source": [ + "app = App.create(url='https://webarchiv.onb.ac.at/api/swagger.json')\n", + "client = Client()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Add missing support for JSON-LD" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "app.mime_codec.register('application/ld+json', app.mime_codec._codecs['application/json'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "List operations" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'welcome!##!welcome': ,\n", + " 'snapshot!##!getSnapshot': ,\n", + " 'search!##!searchhistogram': ,\n", + " 'search!##!searchcapturegroup': ,\n", + " 'search!##!searchdomaingroup': ,\n", + " 'search!##!searchDomainname': ,\n", + " 'search!##!killSearchRequest': ,\n", + " 'search!##!getWaybackCalheatmapSearchRequestStatus': ,\n", + " 'search!##!getFulltextsearchRequestStatus': ,\n", + " 'search!##!searchWayback': ,\n", + " 'search!##!searchFulltext': ,\n", + " 'savepage!##!send': ,\n", + " 'authentication!##!authenticate': }" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "app.op" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Authenticate" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "slideshow": { + "slide_type": "" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "201" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = client.request(app.op['authenticate'](body={\n", + " 'apikey': API_KEY,\n", + " 'fingerprint': '1234'\n", + "}))\n", + "r.status" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'apikey': 'wGdLmWMlaM2V6j73V9zS0KHqBgfG67vJ',\n", + " 'fingerprint': '1234',\n", + " 'timestamp': 1555515632891,\n", + " 't': '7cf715f4487b1ace3eacf19bf3febda27f854819',\n", + " 'version': '0.1.0',\n", + " '@context': 'https://webarchiv.onb.ac.at/contexts/authenticate.jsonld'}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.data" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "token = r.data['t']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Search for domain name" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "200" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = client.request(app.op['searchDomainname'](\n", + " apikey=API_KEY,\n", + " t=token,\n", + " q='wien'\n", + "))\n", + "r.status" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'hits': [{'value': 'wieno.wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'},\n", + " {'value': 'wien.wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'},\n", + " {'value': 'wien1.wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'},\n", + " {'value': 'wiener.wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'},\n", + " {'value': 'wien-wien.at',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'},\n", + " {'value': 'wiengut.wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'},\n", + " {'value': 'wienmed.wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'},\n", + " {'value': 'wienwin.wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'},\n", + " {'value': 'wiental.wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'},\n", + " {'value': 'wieners.wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld'}],\n", + " 'searchstring': 'wien',\n", + " 'requestid': '',\n", + " 'message': '',\n", + " 'returncode': 0,\n", + " 'total': 35101,\n", + " 'type': 1,\n", + " 'took': 37,\n", + " 'version': '0.1.0',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/domainnamesearchresult.jsonld'}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Variant 4: Use webarchiv.py from the ONB Labs\n", + "\n", + "`webarchiv.py` is part of this repository. It makes extensive use of `requests`.\n", + "\n", + "If you need the direct download link:\n", + "\n", + "[https://labs.onb.ac.at/gitlab/labs-team/webarchive-api/raw/master/webarchiv.py?inline=false](https://labs.onb.ac.at/gitlab/labs-team/webarchive-api/raw/master/webarchiv.py?inline=false)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "import webarchiv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Authentication is automatic" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "session = webarchiv.WebarchivSession(API_KEY)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Search for domain name" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "200" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = session.domain_name_search('wien')\n", + "r.status_code" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'hits': [{'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wieno.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien1.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiener.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-wien.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiengut.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienmed.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienwin.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiental.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wieners.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien24.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'h-m.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'f-u-c-k.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'b-z.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'h-d.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'm-k.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 's-v-h.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'v-i-p.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'a-z.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'i.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'u-4.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'v-1.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'p-7.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'z-u-g.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'u-d-o.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'f-w.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 's-k.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'h-i-p.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'akh-wien.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'gkk-wien.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'hno-wien.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'seo-wien.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienfoto.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wientaxi.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienwahl.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wieninfo.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiener-gkk.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienwert.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienview.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienerin.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'u1.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'u5.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'u2.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'u6.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'u4.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'u3.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'a1.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienclean.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienergkk.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienliebe.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienguide.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienkarte.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienscout.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienhotel.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienfluss.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-haus.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-wahl.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'f2f.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'e4b.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'a2z.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'b2b.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-2.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-6.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-7.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'm2m.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'm4j.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'c-sk.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-3.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-9.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'b-it.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'se-a.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-1.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-4.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-8.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'c2b.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'h-a-c-wien.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': '24-7.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'e-wien.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-5.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'i2c.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'u-wien.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wien-x.at'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienbibliothek.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiener-biene.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiener-madln.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienergebietskrankenkassegesundheitsverbund.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienerjugendstil.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienerlinien.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienerphilharmoniker.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienerstaedtischeversicherung.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienfuehrung.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wiendomain.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienergesundheitsverbund.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienernaschmarkt.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienersalon.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienerwein.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienerwirtschaft.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienerwohnen.wien'},\n", + " {'context': 'https://webarchiv.onb.ac.at/contexts/dnhit.jsonld',\n", + " 'value': 'wienflughafentaxi.wien'}],\n", + " 'searchstring': 'wien',\n", + " 'context': 'https://webarchiv.onb.ac.at/contexts/domainnamesearchresult.jsonld',\n", + " 'requestid': '',\n", + " 'message': '',\n", + " 'returncode': 0,\n", + " 'total': 35101,\n", + " 'type': 1,\n", + " 'took': 138,\n", + " 'version': '0.1.0'}" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.json()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Available access methods for `WebarchivSession`" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on WebarchivSession in module webarchiv object:\n", + "\n", + "class WebarchivSession(builtins.object)\n", + " | WebarchivSession(api_key)\n", + " | \n", + " | Methods defined here:\n", + " | \n", + " | __init__(self, api_key)\n", + " | Initialize self. See help(type(self)) for accurate signature.\n", + " | \n", + " | connect(self)\n", + " | Connect to the Webarchive API, request and save a token.\n", + " | \n", + " | domain_name_search(self, query_string, page_=1, pagesize_=100)\n", + " | Start a domain name search in the Webarchive.\n", + " | The current status of running queries can be read via status_open_queries().\n", + " | \n", + " | :param query_string: String to search for\n", + " | :param page_: The page number parameter works with the page size parameter to control the offset of the records returned in the results. Default value is 1\n", + " | :param pagesize_: The page size parameter works with the page number parameter to control the offset of the records returned in the results. It also controls how many results are returned with each request. Default value is 10\n", + " | :return: result as json\n", + " | \n", + " | fulltext_search(self, query_string, from_=None, to_=None)\n", + " | Start a fulltext search query in the Webarchive.\n", + " | The current status of running queries can be read via status_open_queries().\n", + " | \n", + " | :param query_string: String to search for\n", + " | :param from_: Optional earliest date bound for the search\n", + " | in the format YYYYMM.\n", + " | :param to_: Optional latest date bound for the search\n", + " | in the format YYYYMM.\n", + " | :return: None\n", + " | \n", + " | getSnapshotUrl(self, seed, capture, onlysvg)\n", + " | \n", + " | histogram_search(self, query_string, interval_=3, from_=None, to_=None)\n", + " | Start a domain name search in the Webarchive.\n", + " | The current status of running queries can be read via status_open_queries().\n", + " | \n", + " | :param query_string: String to search for\n", + " | :param page_: The page number parameter works with the page size parameter to control the offset of the records returned in the results. Default value is 1\n", + " | :param pagesize_: The page size parameter works with the page number parameter to control the offset of the records returned in the results. It also controls how many results are returned with each request. Default value is 10\n", + " | :return: result as json\n", + " | \n", + " | savePage(self, url)\n", + " | \n", + " | status_query(self, resp)\n", + " | this is the pollingrequest for the given typen of request\n", + " | \n", + " | :param response: String to search for\n", + " | :return: response\n", + " | \n", + " | waitForResponse(self, response)\n", + " | Polls until the server responds with a result\n", + " | \n", + " | :param response: String to search for\n", + " | :return: response\n", + " | \n", + " | wayback_search(self, query_string, from_=None, to_=None)\n", + " | Start a wayback search query in the Webarchive.\n", + " | The current status of running queries can be read via status_open_queries().\n", + " | \n", + " | :param query_string: String to search for\n", + " | :param from_: Optional earliest date bound for the search\n", + " | in the format YYYYMM.\n", + " | :param to_: Optional latest date bound for the search\n", + " | in the format YYYYMM.\n", + " | :return: None\n", + " | \n", + " | ----------------------------------------------------------------------\n", + " | Data descriptors defined here:\n", + " | \n", + " | __dict__\n", + " | dictionary for instance variables (if defined)\n", + " | \n", + " | __weakref__\n", + " | list of weak references to the object (if defined)\n", + " | \n", + " | api_path\n", + " | Protocol, domain and path prefix for the Webarchive API,\n", + " | with a single positional format string placeholder\n", + " | for the REST operation and parameters.\n", + " | \n", + " | base_url\n", + " | Protocol, domain and path prefix for the Webarchive API,\n", + " | with a single positional format string placeholder\n", + " | for the REST operation and parameters.\n", + " | \n", + " | version\n", + " | Current protocol version\n", + "\n" + ] + } + ], + "source": [ + "help(session)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "More samples using `webarchiv.py`:\n", + "\n", + "[https://labs.onb.ac.at/gitlab/labs-team/webarchive-api](https://labs.onb.ac.at/gitlab/labs-team/webarchive-api)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}