Newer
Older
from lxml import etree as lxmletree
import httpx
from travelogues_extraction.dataextractors.namespaces import namespaces
class RecordRetriever:
ac_list: typing.List[str]
session: httpx.AsyncClient
url = 'https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB'
log: list
all_ok: bool
record_xpath = lxmletree.XPath(
r'/srw:searchRetrieveResponse/srw:records/srw:record[1]/srw:recordData/marc:record[1]',
namespaces=namespaces
)
def __init__(self, ac_numbers: typing.List[str], session: httpx.AsyncClient):
self.ac_list = ac_numbers
self.log = []
self.all_ok = True # so far
self.session = session
@dataclass
class Record:
ac_number: str
lxmlelement: lxmletree._Element
async def generate_records(self) -> typing.Generator[Record, None, None]:
for ac_number in self.ac_list:
record = await self.get_record_from_ac_number(ac_number)
if record is None:
continue
else:
yield record
async def get_record_from_ac_number(self, ac_number: str) -> typing.Optional[Record]:
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
try:
await asyncio.sleep(self.sleep)
response = await self.session.get(url=self.url, params={
'startRecord': 1,
'maximumRecords': 1,
'query': f'alma.local_control_field_009={ac_number}',
'version': '1.2',
'operation': 'searchRetrieve',
'recordSchema': 'marcxml',
}, timeout=60)
except httpx.ConnectTimeout as timeout:
self.log.append({
'error': 'time_out',
'message': str(timeout),
'ac_number': ac_number,
})
return
except Exception as error:
self.sleep *= 1.3
try:
await asyncio.sleep(self.sleep)
response = await self.session.get(url=self.url, params={
'startRecord': 1,
'maximumRecords': 1,
'query': f'alma.local_control_field_009={ac_number}',
'version': '1.2',
'operation': 'searchRetrieve',
'recordSchema': 'marcxml',
}, timeout=60)
except httpx.ConnectTimeout as timeout:
self.log.append({
'error': 'time_out',
'message': str(timeout),
'ac_number': ac_number,
})
return
except Exception as error:
self.sleep *= 1.3
self.log.append({
'error': 'time_out',
'message': str(error),
'ac_number': ac_number,
})
return
sub_log = {
'ac_number': ac_number,
'status_code': response.status_code,
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
}
if response.status_code != 200:
sub_log['response_error_message'] = response.text
self.log.append(sub_log)
self.all_ok = False
return None
try:
xml = lxmletree.fromstring(response.content)
except Exception as e:
sub_log['lxml_error'] = e
sub_log['content'] = response.content
self.log.append(sub_log)
self.all_ok = False
return None
records: typing.List[lxmletree._Element] = self.record_xpath(xml)
if len(records) == 0:
sub_log['no_records_found'] = True
self.log.append(sub_log)
self.all_ok = False
return None
return self.Record(ac_number=ac_number, lxmlelement=records[0])