diff --git a/webarchiv.py b/webarchiv.py index ee597e88baed6c0c1df63f6519aeaa7ddcb59428..4b355e6752469ed9362baf17d0ad7aaef0864c3d 100644 --- a/webarchiv.py +++ b/webarchiv.py @@ -7,6 +7,11 @@ from requests import HTTPError _datetime_format_string = '%Y%m%d%H%M%S' +EXTRACTOR_TEXT = 1 +EXTRACTOR_CSS = 2 +EXTRACTOR_BINARY = 3 + + class SessionTimeoutError(Exception): pass @@ -102,27 +107,39 @@ class WebarchivSession: print(self._error_template.format(status_code=e.response.status_code, response_text=e.response.text), file=sys.stderr) - + + def _handle_response_errors(self, r): + if r.status_code == 403: + print('Forbidden. Invalid Token or ApiKey transmitted', file=sys.stderr) + return r + elif r.status_code == 400: + print('Bad request', file=sys.stderr) + return r + elif r.status_code == 410: + print('The requested API Version (via X-API-VERSION Header) is not available', file=sys.stderr) + return r + def _get(self, op, auto_connect=True, **kwargs, ): kwargs['params'] = self._add_api_key_and_token(kwargs.pop('params', {})) - # kwargs = self._add_api_key_and_token(kwargs) r = requests.get(self.base_url.format(op), **kwargs) if r.ok: return r + elif r.status_code == 403 and auto_connect: + self.connect() + return self._get(op=op, auto_connect=False, **kwargs) else: - if r.status_code == 403: - if auto_connect: - self.connect() - return self._get(op=op, auto_connect=False, **kwargs) - else: - print('Forbidden. Invalid Token or ApiKey transmitted', file=sys.stderr) - return r - elif r.status_code == 400: - print('Bad request', file=sys.stderr) - return r - elif r.status_code == 410: - print('The requested API Version (via X-API-VERSION Header) is not available', file=sys.stderr) + return self._handle_response_errors(r) + + def _post(self, op, auto_connect=True, **kwargs): + kwargs['json'] = self._add_api_key_and_token(kwargs.pop('json', {})) + r = requests.post(self.base_url.format(op), **kwargs) + if r.ok: return r + elif r.status_code == 403 and auto_connect: + self.connect() + return self._post(op=op, auto_connect=False, **kwargs) + else: + return self._handle_response_errors(r) def fulltext_search(self, query_string, from_=None, to_=None): """ @@ -227,9 +244,6 @@ class WebarchivSession: def waitForResponse(self, response): """ Polls until the server responds with a result - - :param response: String to search for - :return: response """ if response.status_code == 400: return response @@ -242,12 +256,11 @@ class WebarchivSession: def status_query(self, resp): """ - this is the pollingrequest for the given typen of request - - :param resp: String to search for - :return: response + this is the polling request for the given type of request """ - requestid = resp.json()['requestid'] + j = resp.json() + context = j['context'] + requestid = j['requestid'] type_ = resp.json()['type'] if type_ == 1: r = self._get(op='/search/status/fulltext', params={'requestid': requestid})