diff --git a/webarchiv.py b/webarchiv.py index 1c1efb0298077320e49f4831fe95617b0d2413c0..82cb31bd2f5c8869b88ca8beb314780a33555836 100644 --- a/webarchiv.py +++ b/webarchiv.py @@ -1,4 +1,5 @@ import sys +import sys import time import requests import hashlib @@ -22,6 +23,7 @@ SEARCHMODE_WEBARCHIV = 1 SEARCHMODE_INTERNETARCHIVE = 2 SEARCHMODE_WEBARCHIV_INTERNETARCHIVE = 3 + class SessionTimeoutError(Exception): pass @@ -32,7 +34,7 @@ class WebarchivSession: """ Current protocol version """ - return '0.1.0' + return '0.2.0' @property def api_path(self): @@ -93,7 +95,8 @@ class WebarchivSession: }}'''.format(api_key=self.api_key, version=self.version, fingerprint=fingerprint), headers={ 'content-type': 'application/json', - 'accept': 'application/ld+json' + 'accept': 'application/ld+json', + 'X-API-VERSION': self.version } ) if r.status_code == 201: @@ -101,17 +104,6 @@ class WebarchivSession: else: raise HTTPError(response=r) - def _add_api_key_and_token(self, params_dict: dict): - """ - Add the saved api key and token to a given dictionary. - - :param params_dict: A dictionary that's probably used - as a 'params' keyword parameter for calling requests.get(). - :return: The same dictionary extended by 'apikey' and 't' keys. - """ - params_dict['apikey'] = self.api_key - params_dict['t'] = self.token - return params_dict def _display_http_error(self, e: HTTPError): print(self._error_template.format(status_code=e.response.status_code, @@ -130,31 +122,43 @@ class WebarchivSession: print('The requested API Version (via X-API-VERSION Header) is not available', file=sys.stderr) return r - def _get(self, op, auto_connect=True, **kwargs, ): - kwargs['params'] = self._add_api_key_and_token(kwargs.pop('params', {})) + def _get(self, op, auto_connect=True, **kwargs): r = requests.get(self.base_url.format(op), **kwargs) if r.ok: return r elif r.status_code == 403 and auto_connect: self.connect() - return self._get(op=op, auto_connect=False, **kwargs) + kwargs['headers'] = { + 'Authorization': 'Bearer ' + self.token + } + return self._get(op, False, **kwargs) else: + kwargs['headers'] = { + 'Authorization': 'Bearer ' + self.token + } return self._handle_response_errors(r) def _post(self, op, auto_connect=True, json: dict = None, **kwargs): if not json: json = {} - kwargs['json'] = self._add_api_key_and_token(json) - kwargs['headers'] = { - 'content-type': 'application/json', - 'accept': 'application/ld+json' - } - r = requests.post(self.base_url.format(op), **kwargs) - if r.ok: + if not self.token: + kwargs['headers'] = { + 'content-type': 'application/json', + 'accept': 'application/ld+json', + } + else: + kwargs['headers'] = { + 'content-type': 'application/json', + 'accept': 'application/ld+json', + 'Authorization': 'Bearer ' + self.token + } + + r = requests.post(self.base_url.format(op), json=json, **kwargs) + if r.status_code == 201: return r elif r.status_code == 403 and auto_connect: self.connect() - return self._post(op=op, auto_connect=False, **kwargs) + return self._post(op, False, json, **kwargs) else: return self._handle_response_errors(r) @@ -278,7 +282,6 @@ class WebarchivSession: this is the polling request for the given type of request """ j = resp.json() - context = j['context'] requestid = j['requestid'] type_ = resp.json()['type'] if type_ == 1: @@ -339,7 +342,7 @@ class WebarchivSession: print('Error:'.format(query_string)) def get_snapshot_url(self, seed, capture, onlysvg): - return self.api_path + 'snapshot?capture=' + capture + '&t=' + self.token + '&apikey=' + self.api_key + '&onlysvg=' + onlysvg + '&seed=' + seed + return self.api_path + '/snapshot?capture=' + capture + '&t=' + self.token + '&onlysvg=' + onlysvg + '&seed=' + seed @staticmethod def result_contains_seeds(response): @@ -373,6 +376,7 @@ class WebarchivSession: "occurrence": occurrence, "extractortype": EXTRACTOR_HTML }) + response = self.status_query(response) return self.wait_for_response(response) except HTTPError as e: @@ -443,4 +447,3 @@ class WebarchivSession: return response except HTTPError as e: self._display_http_error(e) -