diff --git a/sru/onbsru.py b/sru/onbsru.py index 335b38bdbfe43366ee90f56f413d2c59fb9d7325..8ec7bf5f17dd34c18fd1c18857f2f216bfb65097 100644 --- a/sru/onbsru.py +++ b/sru/onbsru.py @@ -14,37 +14,42 @@ class OnbRecordRetriever(almasru.RecordRetriever): xml_format: str = 'marcxml'): super().__init__(subdomain, inst_code, xml_format) + self.namespaces = { + 'srw': 'http://www.loc.gov/zing/srw/', + 'marc': 'http://www.loc.gov/MARC21/slim' + } + # Create alias for querying AC-numbers def by_ac_num(self, ac_num: str): return self.by_marc_009(ac_num) - def get_pids_for_ac(self, ac_num: str) -> list: + def get_pids(self, xml: Element) -> list: """ Extract all PIDs from XML based on MARC 856 4 2. PIDs are identifiers migrated from a legacy system. - :param ac_num: ID used in Austrian Library Network + :param xml: MARC xml as xml.etree.ElementTree Element :return: List of identifiers. """ - namespaces = { - 'srw': 'http://www.loc.gov/zing/srw/', - 'marc': 'http://www.loc.gov/MARC21/slim' - } - - marc_xml = self.by_ac_num(ac_num) - datafields = marc_xml.findall("marc:datafield", namespaces) + datafields = xml.findall("marc:datafield", self.namespaces) pids = [] for datafield in datafields: if datafield.attrib == {'tag': '856', 'ind1': '4', 'ind2': '2'}: - marc_856_u = datafield.find("marc:subfield[@code='u']", namespaces) + + marc_856_u = self._get_subfield(datafield, 'u') marc_856_url = marc_856_u.text - marc_856_x = datafield.find("marc:subfield[@code='x']", namespaces) + + marc_856_x = self._get_subfield(datafield, 'x') marc_856_note = marc_856_x.text + if 'ONB-DTL' in marc_856_note: pids.append(self.pid_re.findall(marc_856_url)[0]) - marc_856_url = None return pids + + def _get_subfield(self, element, subfield): + xpath = "marc:subfield[@code='"+subfield+"']" + return element.find(xpath, self.namespaces) \ No newline at end of file