Commit b9ce798b authored by Andreas's avatar Andreas

new stuff

parent f679092a
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -2,7 +2,6 @@ import sys
import time
import requests
from requests import HTTPError
import datetime
_datetime_format_string = '%Y%m%d%H%M%S'
......@@ -40,10 +39,6 @@ class WebarchivSession:
def __init__(self, api_key):
self.api_key = api_key
self.token = None
self.open_fulltext_queries = {}
self.open_wayback_queries = {}
self.finished_fulltext_queries = {}
self.finished_wayback_queries = {}
def connect(self):
"""
......@@ -101,11 +96,16 @@ class WebarchivSession:
return self._get(op=op, auto_connect=False, **kwargs)
else:
print('Forbidden. Invalid Token or ApiKey transmitted', file=sys.stderr)
return r
elif r.status_code == 400:
print('Bad request', file=sys.stderr)
return r
elif r.status_code == 410:
print('The requested API Version (via X-API-VERSION Header) is not available', file=sys.stderr)
return r
raise HTTPError(response=r)
def query_fulltext_search(self, query_string, from_=None, to_=None):
def fulltext_search(self, query_string, from_=None, to_=None):
"""
Start a fulltext search query in the Webarchive.
The current status of running queries can be read via status_open_queries().
......@@ -124,15 +124,14 @@ class WebarchivSession:
params['to'] = to_
try:
r = self._get(op='/search/fulltext', params=params)
self.open_fulltext_queries[(datetime.datetime.now().strftime(_datetime_format_string),
query_string)] = r
print('Query for "{}" added. Message:"{}"'.format(query_string, r.json()['message']))
response = self._get(op='/search/fulltext', params=params)
return self.waitForResponse(response)
except HTTPError as e:
self._display_http_error(e)
print('Query for "{}" not added'.format(query_string))
def query_wayback_search(self, query_string, from_=None, to_=None):
def wayback_search(self, query_string, from_=None, to_=None):
"""
Start a wayback search query in the Webarchive.
The current status of running queries can be read via status_open_queries().
......@@ -151,18 +150,26 @@ class WebarchivSession:
params['to'] = to_
try:
print('search for ', params)
r = self._get(op='/search/wayback', params=params)
return r
response = self._get(op='/search/wayback', params=params)
return self.waitForResponse(response)
except HTTPError as e:
self._display_http_error(e)
print('Error:'.format(query_string))
def waitForResponse(self, response):
if response.status_code == 400:
return response
while response.status_code != 200:
time.sleep(0.2)
response = self.status_query(response)
return response
def status_query(self, resp):
requestid = resp.json()['requestid'];
type = resp.json()['type']
print('call status for "{}"', requestid)
if type == 1:
r = self._get(op='/search/status/fulltext', params={'requestid': requestid})
elif type == 2:
......@@ -171,42 +178,15 @@ class WebarchivSession:
return r
def status_open_queries(self):
"""
Request the current status of running queries from the Webarchive.
Finished queries are moved from 'open_*' to 'finished_*' queues.
:return: None
"""
for (timestamp, query_string), old_response in list(self.open_fulltext_queries.items()):
requestid = old_response.json()['requestid']
r = self._get(op='/search/status/fulltext', params={'requestid': requestid})
if r.status_code == 200:
self.finished_fulltext_queries[(timestamp, query_string)] = r.json()
print('Query for "{}" done'.format(query_string))
del(self.open_fulltext_queries[(timestamp, query_string)])
elif r.status_code == 202:
print('Query for "{}" is still running'.format(query_string))
for (timestamp, query_string), old_response in list(self.open_wayback_queries.items()):
requestid = old_response.json()['requestid']
r = self._get(op='/search/status/wayback', params={'requestid': requestid})
if r.status_code == 200:
self.finished_wayback_queries[(timestamp, query_string)] = r.json()
print('Query for "{}" done'.format(query_string))
del(self.open_wayback_queries[(timestamp, query_string)])
elif r.status_code == 202:
print('Query for "{}" is still running'.format(query_string))
if __name__ == '__main__':
# noinspection SpellCheckingInspection
w = WebarchivSession('Zz2tQls7fuaocX2pjrfc2npojqbGwXL2')
response = w.query_wayback_search("http://www.onb.ac.at")
print(response.status_code)
while response.status_code != 200:
print(response.status_code)
time.sleep(1)
response = w.status_query(response)
print(response.json()['total'])
# response = w.wayback_search("http://www.onb.ac.at")
#response = w.wayback_search("http://frauenhetz.jetzt")
response = w.fulltext_search("Nationalbibliothek Prunksaal Schwarzenegger")
# response = w.wayback_search("x")
if response.status_code == 200:
print(response.json()['total'], " Captures")
else:
print("Error ", response.status_code)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment