In [1]:
from lxml import etree
import requests
import pandas as pd

To get the MMS ID for a given barcode, get the metadata via `https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.barcode=<barcode>startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=dc` and extract the MMS ID with `/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordIdentifier/text()`.

Metadata in Dublin Core can be obtained via SRU.

Linked Data from ALMA (library management system) can be retrieved in 

* BIBFRAME via `https://open-na.hosted.exlibrisgroup.com/alma/<institution code>/bf/entity/instance/<mms id>`
* JSON-LD via `https://open-na.hosted.exlibrisgroup.com/alma/<institution code>/bibs/<mms_id>.jsonld`
* RDA/RDF via `https://open-na.hosted.exlibrisgroup.com/alma/<institution code>/rda/entity/manifestation/<mms id>.rdf`

For a Network Zone MMS ID the institution code is 43ACC_NETWORK and for the Institution MMS ID it is 43ACC_ONB.

The following xpath `/rdf:RDF/bf:Instance/bf:hasItem/bf:Item/bf:electronicLocator/rdfs:Resource/bflc:locator/@rdf:resource` selects URLs of the Viewer. We use the + sign (URL encoded %2B) to spilt the URL in order to extract the Barcode.

In [2]:
def getMMS_ID(barcode):
    cont=requests.get('https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.barcode=%2BZ' + barcode + '&startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=dc').content
    e = etree.XML(cont)
    namespaces = {
        'srw': 'http://www.loc.gov/zing/srw/'
    }
    result = e.xpath('/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordIdentifier/text()', namespaces=namespaces)
    return result[0]

def getYear(barcode):
    cont=requests.get('https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.barcode=%2BZ' + barcode + '&startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=marcxml').content
    e = etree.XML(cont)
    namespaces = {
        'srw': 'http://www.loc.gov/zing/srw/',
        'marc21': 'http://www.loc.gov/MARC21/slim'
    }    
    xpath = '/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/marc21:record/marc21:datafield[@tag=\'264\']/marc21:subfield[@code=\'{}\']/text()'
    yearResult = e.xpath(xpath.format('c'), namespaces=namespaces)
    year = "; ".join(yearResult) if yearResult else ''
    placeResult = e.xpath(xpath.format('a'), namespaces=namespaces)
    place = "; ".join(placeResult) if placeResult else ''
    return[year, place]

def getDCDataMMS(mms_id):
    cont=requests.get('https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.mms_id=' + mms_id + '&startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=dc').content
    e = etree.XML(cont)
    namespaces = {
        'srw': 'http://www.loc.gov/zing/srw/',
        'srw_dc': 'info:srw/schema/1/dc-schema',
        'dc': 'http://purl.org/dc/elements/1.1/'
    }
    xpath = '/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/srw_dc:dc/dc:{}/text()'
    
    titleResult = e.xpath(xpath.format('title'), namespaces=namespaces)
    title = "; ".join(titleResult) if titleResult else ''
    
    contributorResult = e.xpath(xpath.format('contributor'), namespaces=namespaces)
    contributor = "; ".join(contributorResult) if contributorResult else ''
    
    dateResult = e.xpath(xpath.format('date'), namespaces=namespaces)
    date = "; ".join(dateResult) if dateResult else ''
    print([title, contributor, date])
    return [title, contributor, date]

def getDCData(barcode):
    cont=requests.get('https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.barcode=' + barcode + '&startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=dc').content
    e = etree.XML(cont)
    namespaces = {
        'srw': 'http://www.loc.gov/zing/srw/',
        'srw_dc': 'info:srw/schema/1/dc-schema',
        'dc': 'http://purl.org/dc/elements/1.1/'
    }
    xpath = '/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/srw_dc:dc/dc:{}/text()'
    
    titleResult = e.xpath(xpath.format('title'), namespaces=namespaces)
    title = "; ".join(titleResult) if titleResult else ''
    
    contributorResult = e.xpath(xpath.format('contributor'), namespaces=namespaces)
    contributor = "; ".join(contributorResult) if contributorResult else ''
    
    dateResult = e.xpath(xpath.format('date'), namespaces=namespaces)
    date = "; ".join(dateResult) if dateResult else ''
    print([title, contributor, date])
    return [title, contributor, date]

def getLinksAndBarcodes(local_mms_id):
    cont=requests.get('https://open-na.hosted.exlibrisgroup.com/alma/43ACC_ONB/bf/entity/instance/' + local_mms_id).content
    e = etree.XML(cont)
    namespaces = {
        'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
        'bf': 'http://id.loc.gov/ontologies/bibframe/',
        'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
        'bflc': 'http://id.loc.gov/ontologies/bflc/'
    }
    result = e.xpath('/rdf:RDF/bf:Instance/bf:hasItem/bf:Item/bf:electronicLocator/rdfs:Resource/bflc:locator/@rdf:resource', namespaces=namespaces)
    barcodes = []
    for link in result:
        splits = link.split('%2B')
        if len(splits) >= 2:
            barcodes.append('+' + link.split('%2B')[1])
    print(local_mms_id + ': ' + ", ".join(barcodes))
    linksJoined = ", ".join(result)
    barcodesJoined = ", ".join(barcodes)
    #returns a list with URLs and Barcodes
    return [linksJoined, barcodesJoined]

def getCatalogLink(local_mms_id):
    print(local_mms_id)
    cont=requests.get('https://open-na.hosted.exlibrisgroup.com/alma/43ACC_ONB/bf/entity/instance/' + local_mms_id).content
    e = etree.XML(cont)
    namespaces = {
        'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
        'bf': 'http://id.loc.gov/ontologies/bibframe/',
        'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
        'bflc': 'http://id.loc.gov/ontologies/bflc/'
    }    
    result = e.xpath('/rdf:RDF/bf:Work/bf:partOf/bf:Work/bf:identifiedBy/bf:Identifier/rdf:value/text()', namespaces=namespaces)
    if not result:
        result = e.xpath('/rdf:RDF/bf:Instance/bf:identifiedBy/bf:Local/rdf:value/text()', namespaces=namespaces)
    return 'http://data.onb.ac.at/rec/' + result[0]

In [3]:
getDCDataMMS(str(990048102650603338))

['Sammlung der besten Reisebeschreibungen', '', '1784']


['Sammlung der besten Reisebeschreibungen', '', '1784']

In [4]:
dc = getDCData(str(178966306))
print(dc)

['Vorstellung der vornehmsten Völkerschaften der Welt nach ihrer Abstammung, Ausbreitung und Sprachen', 'Breitenbauch, Georg-August vonaut', '1786']
['Vorstellung der vornehmsten Völkerschaften der Welt nach ihrer Abstammung, Ausbreitung und Sprachen', 'Breitenbauch, Georg-August vonaut', '1786']


In [5]:
df = pd.read_excel('exampleBarcodes.xlsx')
df_sample = df.sample(3).copy()
df_sample

Unnamed: 0,Identifier
0,Z164418102
2,Z97787406
3,Z198357107


In [6]:
df[['Titel', 'Autor', 'Erscheinungsjahr']] = df.apply(lambda row: getDCData(str(row['Identifier'])[1:]), axis=1, result_type='expand')

['Sammlung der besten Reisebeschreibungen', '', '1784']
['Sammlung der besten Reisebeschreibungen', '', '1784']
['Geschichte der Reisen die seit Cook an der Nordwest- und Nordost-Küste von Amerika und in dem nördlichsten Amerika selbst von Meares,  Dixon, Portlock, Coxe, Long u. a. m. unternommen worden sind Mit vielen Karten und Kupfern', 'Forster, Georg1754-1794(DE-588)118534416edtO:H; Dixon, George1755-1800(DE-588)130525294ctbO:H; Long, JohnctbO:H; Meares, John1756-1809(DE-588)121248275ctbO:H; Portlock, Nathaniel1748-1817(DE-588)133848531ctbO:H; Portlock, NathanielautO:800O:H; Forster, [Johann] GeorgautAdamO:806O:H; Vossische BuchhandlungBerlin(DE-588)64386-5pblO:H', '1792']
['Sammlung der besten Reisebeschreibungen', 'Traßler, Joseph Georg1759-1816(DE-588)129262358prt', '1784']
['', '', '']
['Sammlung der besten Reisebeschreibungen', '', '1784']
['Beschreibung der äussern und innern Merkwürdigkeiten der Königlichen Schlösser in Berlin, Charlottenburg, Schönhausen in und bey Potsdam

In [7]:
writer = pd.ExcelWriter(r'exampleBarcodes_extended.xlsx', engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer)
writer.close()