Commit 8dc2b37d authored by Stefan Karner's avatar Stefan Karner

Refactor fragmentChecksumHtml

parent d0b9e975
......@@ -8,7 +8,7 @@ _datetime_format_string = '%Y%m%d%H%M%S'
EXTRACTOR_TEXT = 1
EXTRACTOR_CSS = 2
EXTRACTOR_HTML = 2
EXTRACTOR_BINARY = 3
......@@ -130,8 +130,14 @@ class WebarchivSession:
else:
return self._handle_response_errors(r)
def _post(self, op, auto_connect=True, **kwargs):
kwargs['json'] = self._add_api_key_and_token(kwargs.pop('json', {}))
def _post(self, op, auto_connect=True, json: dict = None, **kwargs):
if not json:
json = {}
kwargs['json'] = self._add_api_key_and_token(json)
kwargs['headers'] = {
'content-type': 'application/json',
'accept': 'application/ld+json'
}
r = requests.post(self.base_url.format(op), **kwargs)
if r.ok:
return r
......@@ -141,29 +147,6 @@ class WebarchivSession:
else:
return self._handle_response_errors(r)
def _post(self, op, jsondata, auto_connect=True):
r = requests.post(self.base_url.format(op), jsondata,
headers={
'content-type': 'application/json',
'accept': 'application/ld+json'
})
if r.ok:
return r
else:
if r.status_code == 403:
if auto_connect:
self.connect()
return self._post(op=op, jsondata=jsondata, auto_connect=False)
else:
print('Forbidden. Invalid Token or ApiKey transmitted', file=sys.stderr)
return r
elif r.status_code == 400:
print('Bad request', file=sys.stderr)
return r
elif r.status_code == 410:
print('The requested API Version (via X-API-VERSION Header) is not available', file=sys.stderr)
return r
def fulltext_search(self, query_string, from_=None, to_=None):
"""
Start a fulltext search query in the Webarchive.
......@@ -376,17 +359,13 @@ class WebarchivSession:
def fragmentChecksumHtml(self, seed, capture, selector, occurrence):
try:
response = self._post(op='/fragment/checksum/html', jsondata='''{{
"apikey": "{api_key}",
"t": "{token}",
"seed": "{seed}",
"capture": "{capture}",
"selector": "{selector}",
"occurrence": "{occurrence}",
"extractortype": 2
}}'''.format(api_key=self.api_key, token=self.token, seed=seed, capture=capture,
selector=selector, occurrence=occurrence))
response = self._post(op='/fragment/checksum/html', json={
"seed": seed,
"capture": capture,
"selector": selector,
"occurrence": occurrence,
"extractortype": EXTRACTOR_HTML
})
response = self.status_query(response)
return self.waitForResponse(response)
except HTTPError as e:
......@@ -414,7 +393,7 @@ if __name__ == '__main__':
for capture in response.json()['hits']:
capturedate = capture['c']
resp = w.fragmentChecksumHtml(url, capturedate, ".odd td", 3);
resp = w.fragmentChecksumHtml(url, capturedate, ".odd td", 3)
checksum = resp.json()['checksum']
returncode = resp.json()['returncode']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment