diff --git a/travelogues_extraction/getrecords/session.py b/travelogues_extraction/getrecords/session.py index 5a24bd8d13a5461d3d12a280fd86fefbee95b377..73e066d944a7d2cce39e9577167ab25c3a909ff8 100644 --- a/travelogues_extraction/getrecords/session.py +++ b/travelogues_extraction/getrecords/session.py @@ -1,6 +1,8 @@ +import asyncio from dataclasses import dataclass import typing +import httpcore from lxml import etree as lxmletree import httpx @@ -16,6 +18,7 @@ class RecordRetriever: log: list all_ok: bool + sleep: float record_xpath = lxmletree.XPath( r'/srw:searchRetrieveResponse/srw:records/srw:record[1]/srw:recordData/marc:record[1]', @@ -27,6 +30,7 @@ class RecordRetriever: self.log = [] self.all_ok = True # so far self.session = session + self.sleep = 0.01 @dataclass class Record: @@ -44,21 +48,60 @@ class RecordRetriever: async def get_record_from_ac_number(self, ac_number: str) -> typing.Optional[Record]: - # todo timeout error - response = await self.session.get(url=self.url, params={ - 'startRecord': 1, - 'maximumRecords': 1, - 'query': f'alma.local_control_field_009={ac_number}', - 'version': '1.2', - 'operation': 'searchRetrieve', - 'recordSchema': 'marcxml', - }, - timeout=5 - ) + + try: + await asyncio.sleep(self.sleep) + response = await self.session.get(url=self.url, params={ + 'startRecord': 1, + 'maximumRecords': 1, + 'query': f'alma.local_control_field_009={ac_number}', + 'version': '1.2', + 'operation': 'searchRetrieve', + 'recordSchema': 'marcxml', + }, timeout=60) + + except httpx.ConnectTimeout as timeout: + self.log.append({ + 'error': 'time_out', + 'message': str(timeout), + 'ac_number': ac_number, + }) + return + + except Exception as error: + self.sleep *= 1.3 + try: + await asyncio.sleep(self.sleep) + response = await self.session.get(url=self.url, params={ + 'startRecord': 1, + 'maximumRecords': 1, + 'query': f'alma.local_control_field_009={ac_number}', + 'version': '1.2', + 'operation': 'searchRetrieve', + 'recordSchema': 'marcxml', + }, timeout=60) + + except httpx.ConnectTimeout as timeout: + self.log.append({ + 'error': 'time_out', + 'message': str(timeout), + 'ac_number': ac_number, + }) + return + except Exception as error: + self.sleep *= 1.3 + self.log.append({ + 'error': 'time_out', + 'message': str(error), + 'ac_number': ac_number, + }) + return + sub_log = { 'ac_number': ac_number, 'status_code': response.status_code, + 'url': response.url.full_path, } if response.status_code != 200: