"""Search/Retrieval via URL for Ex Libris' Alma This module is intended for searching the library software system Alma for single or multiple records. The search for single records must be done with a unique ID (e. g. Barcode) and will return exactly one xml.etree.ElementTree Element. The search for multiple records will return an iterable containing such Elements. When querying for multiple records, keep in mind that Ex Libris limits the retrieval of records to 10k. To use this module you will need to know at least the following information: * The subdomain of the Alma installation * The institution code of the institution you want to query * Which format you want returned Possible formats are documented here: https://developers.exlibrisgroup.com/alma/integrations/SRU/ The Library of Congress has general information on SRU: http://www.loc.gov/standards/sru/ """ from urllib import request, parse from xml.etree.ElementTree import fromstring, Element class SruException(Exception): """Base Exception for all SRU related operations.""" class NoRecord(SruException): """Return Exception if SRU query returns zero records.""" class NotUnique(SruException): """Return Exception for query of unique records that returns more than one record.""" class TooManyRecords(SruException): """Return Exception if more than 10k records were found by the query. Alma SRU does not support requesting more than 10k records.""" class SruDiagnostics(SruException): """Return the error that was provided by Alma SRU.""" class RecordRetriever: """Class to initialize searchRetrieve-Requests. Some standard-methods for fetching single records with a unique identifier: """ def __init__(self, subdomain: str, inst_code: str, xml_format: str): """ :param subdomain: Subdomain (preferably CNAME) for the Alma Instance :param inst_code: Alma Institution Code :param xml_format: Format of the returned xml """ self.url_base = ('https://' f'{subdomain}.alma.exlibrisgroup.com/' f'view/sru/{inst_code}?') self.url_query = {'version': '1.2', 'operation': 'searchRetrieve', 'recordSchema': xml_format} self.namespaces = { 'srw': 'http://www.loc.gov/zing/srw/', 'diag': 'http://www.loc.gov/zing/srw/diagnostic/', 'marc': 'http://www.loc.gov/MARC21/slim' } def get_unique(self, alma_query: str) -> Element: """Return one MARC XML record for a query by unique ID. :param alma_query: URI-parameter query for SRU request :return: XML as etree-Element :rtype Element :raises NoRecord: if no record was returned by SRU :raises Multirecord: if > 1 record was returned for uniq ID :raises SruDiagnostics: if SRU returns an error """ first_page = self._get_first_page(alma_query) num_recs = self._get_num_records(first_page) self._raise_error_unique(num_recs) xml = self._extract_marc_xml(first_page) return xml[0] def get_multiple(self, alma_query: str) -> list: """Return list of MARC XML records for a specific query. :param alma_query: URI-parameter query for SRU request :return: XML as list of etree-Elements :rtype list :raises NoRecord: if no record was returned by SRU :raises SruDiagnostics: if SRU returns an error """ first_page = self._get_first_page(alma_query) num_recs = self._get_num_records(first_page) xml_list = self._get_xml_list(alma_query, num_recs) return xml_list def _get_first_page(self, alma_query): sru_url = self._create_url(alma_query, 1) sru_xml = self._response_to_xml(sru_url) return sru_xml def _get_num_records(self, first_page): num_recs_element = first_page.find("srw:numberOfRecords", self.namespaces) self._raise_error_sru(first_page) self._raise_error_no_match(num_recs_element) self._raise_error_too_many_recs(num_recs_element) num_recs = int(num_recs_element.text) return num_recs def _get_xml_list(self, alma_query: str, num_recs: int): xml_list = [] for xml in self._gen_xml(alma_query, num_recs): xml_list.extend(self._extract_marc_xml(xml)) return xml_list def _gen_xml(self, alma_query: str, num_recs: int): for startrecord in range(1, num_recs+1, 10): sru_url = self._create_url(alma_query, startrecord) sru_xml = self._response_to_xml(sru_url) self._raise_error_sru(sru_xml) yield sru_xml def _create_url(self, alma_query: str, startrecord: int) -> str: self.url_query['startRecord'] = str(startrecord) self.url_query['maximumRecords'] = '10' self.url_query['query'] = alma_query url_query_string = parse.urlencode(self.url_query) sru_url = self.url_base + url_query_string return sru_url @staticmethod def _response_to_xml(url: str) -> Element: response = request.urlopen(url).read() xml = fromstring(response) return xml @staticmethod def _raise_error_unique(num_recs: int): if num_recs > 1: raise NotUnique('More than one record found while exactly one was expected.') @staticmethod def _raise_error_no_match(num_recs_element: Element): if (num_recs_element is not None and num_recs_element.text == '0'): raise NoRecord('No matching records found.') @staticmethod def _raise_error_too_many_recs(num_recs_element: Element): if int(num_recs_element.text) > 10000: raise TooManyRecords('SRU searches are limited to 10.000 records.') def _raise_error_sru(self, sru_xml: Element): if sru_xml.find("srw:diagnostics", self.namespaces): xpath = "srw:diagnostics/diag:diagnostic/diag:message" sru_message = sru_xml.find(xpath, self.namespaces).text raise SruDiagnostics(sru_message) def _extract_marc_xml(self, sru_xml): record_xpath = 'srw:records/srw:record//marc:record' marc_xml = sru_xml.findall(record_xpath, self.namespaces) return marc_xml def by_marc_009(self, marc_009: str) -> Element: """Return record by MARC 009 (control number). :param marc_009: MARC bibliographic record control number. """ alma_query = f'alma.local_control_field_009={marc_009}' return self.get_unique(alma_query) def by_barcode(self, barcode: str) -> Element: """Return record by barcode. param barcode: Barcode of a specific item.""" alma_query = f'alma.barcode={barcode}' return self.get_unique(alma_query) def by_mms_id(self, mms_id: str) -> Element: """Return record by MMS-ID. param mmsid: MMS-ID of a specific item.""" alma_query = f'alma.mms_id={mms_id}' return self.get_unique(alma_query)