Skip to content
Snippets Groups Projects
Commit 735e2204 authored by philip.roeggla's avatar philip.roeggla
Browse files

abstarct parent class for parent books

parent 839b5d9e
Branches
No related tags found
No related merge requests found
from abc import ABC, abstractmethod
import asyncio
from dataclasses import dataclass
import re as regex
import typing
import httpcore
import httpx
from pandas import DataFrame
......@@ -126,6 +128,7 @@ class AbstractMultifield(AbstractDataExtractor):
class AbstractParentAsSecondCast(AbstractDataExtractor):
column: str
sleep: float
@classmethod
def get_columns_names_I_work_on(cls) -> list:
......@@ -137,6 +140,7 @@ class AbstractParentAsSecondCast(AbstractDataExtractor):
super().__init__(*args, **kwargs)
self.parend_titles = {}
self.log = []
self.sleep = 0.01
@abstractmethod
async def _write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
......@@ -167,6 +171,32 @@ class AbstractParentAsSecondCast(AbstractDataExtractor):
# else: get the data
try:
await asyncio.sleep(self.sleep)
parent_response = await session.get(
url='https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB', params={
'startRecord': 1,
'maximumRecords': 1,
'query': f'alma.local_control_field_009={ac_parent}',
'version': '1.2',
'operation': 'searchRetrieve',
'recordSchema': 'marcxml',
}, timeout=60
)
except httpx.ConnectTimeout as timeout:
self.log.append({
'error': 'time_out',
'message': str(timeout),
'ac_child': record.ac_number,
'ac_parent': ac_parent,
})
return
except Exception as error:
self.sleep *= 1.3
try:
await asyncio.sleep(self.sleep)
parent_response = await session.get(
url='https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB', params={
'startRecord': 1,
......@@ -175,16 +205,37 @@ class AbstractParentAsSecondCast(AbstractDataExtractor):
'version': '1.2',
'operation': 'searchRetrieve',
'recordSchema': 'marcxml',
}, timeout=5
}, timeout=60
)
if parent_response.status_code != '200':
except httpx.ConnectTimeout as timeout:
self.log.append({
'error': 'time_out',
'message': str(timeout),
'ac_child': record.ac_number,
'ac_parent': ac_parent,
})
return
except Exception as error:
self.sleep *= 1.3
self.log.append({
'error': 'time_out',
'message': str(error),
'ac_child': record.ac_number,
'ac_parent': ac_parent,
})
return
if parent_response.status_code != 200:
self.log.append({
'status_code': parent_response.status_code,
'url': parent_response.url,
'url': parent_response.url.full_path,
'message': parent_response.text,
'ac_child': record.ac_number,
'ac_parent': ac_parent,
})
return
try:
xml = lxmletree.fromstring(parent_response.content)
......
......@@ -11,6 +11,9 @@ from travelogues_extraction.dataextractors.abstract import AbstractParentAsSecon
class Werktitel(AbstractParentAsSecondCast):
def __init__(self, *args, **kwargs):
AbstractParentAsSecondCast.__init__(self, *args, **kwargs)
column: str = 'Werktitel'
log: List[Dict[str, Any]]
......@@ -69,6 +72,9 @@ class Werktitel(AbstractParentAsSecondCast):
class Schlagworte(AbstractParentAsSecondCast):
def __init__(self, *args, **kwargs):
AbstractParentAsSecondCast.__init__(self, *args, **kwargs)
column: str = 'Schlagworte'
join_string = AbstractXpathJoinDirectlyToColumn.join_string # to lazy for multi inheritance
parent_ac_xpath = lxmletree.XPath(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment