diff --git a/ABOExamplesFromALMAextended.xlsx b/ABOExamplesFromALMAextended.xlsx index 4f153a4bf645473e029e82761daff0072a75846a..89e59793ff30d86534a5d155173f6b8fe6ded946 100644 Binary files a/ABOExamplesFromALMAextended.xlsx and b/ABOExamplesFromALMAextended.xlsx differ diff --git a/SRU.ipynb b/SRU.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1445705a11f16627b34b25124b26f6e3d861b415 --- /dev/null +++ b/SRU.ipynb @@ -0,0 +1,361 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from lxml import etree\n", + "import requests\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To get the MMS ID for a given barcode, get the metadata via `https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.barcode=startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=dc` and extract the MMS ID with `/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordIdentifier/text()`.\n", + "\n", + "Metadata in Dublin Core can be obtained via SRU.\n", + "\n", + "Linked Data from ALMA (library management system) can be retrieved in \n", + "\n", + "* BIBFRAME via `https://open-na.hosted.exlibrisgroup.com/alma//bf/entity/instance/`\n", + "* JSON-LD via `https://open-na.hosted.exlibrisgroup.com/alma//bibs/.jsonld`\n", + "* RDA/RDF via `https://open-na.hosted.exlibrisgroup.com/alma//rda/entity/manifestation/.rdf`\n", + "\n", + "For a Network Zone MMS ID the institution code is 43ACC_NETWORK and for the Institution MMS ID it is 43ACC_ONB.\n", + "\n", + "The following xpath `/rdf:RDF/bf:Instance/bf:hasItem/bf:Item/bf:electronicLocator/rdfs:Resource/bflc:locator/@rdf:resource` selects URLs of the Viewer. We use the + sign (URL encoded %2B) to spilt the URL in order to extract the Barcode." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def getMMS_ID(barcode):\n", + " cont=requests.get('https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.barcode=%2BZ' + barcode + '&startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=dc').content\n", + " e = etree.XML(cont)\n", + " namespaces = {\n", + " 'srw': 'http://www.loc.gov/zing/srw/'\n", + " }\n", + " result = e.xpath('/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordIdentifier/text()', namespaces=namespaces)\n", + " return result[0]\n", + "\n", + "def getDCData(barcode):\n", + " cont=requests.get('https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.barcode=%2BZ' + barcode + '&startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=dc').content\n", + " e = etree.XML(cont)\n", + " namespaces = {\n", + " 'srw': 'http://www.loc.gov/zing/srw/',\n", + " 'srw_dc': 'info:srw/schema/1/dc-schema',\n", + " 'dc': 'http://purl.org/dc/elements/1.1/'\n", + " }\n", + " xpath = '/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/srw_dc:dc/dc:{}/text()'\n", + " \n", + " titleResult = e.xpath(xpath.format('title'), namespaces=namespaces)\n", + " title = \"; \".join(titleResult) if titleResult else ''\n", + " \n", + " contributorResult = e.xpath(xpath.format('contributor'), namespaces=namespaces)\n", + " contributor = \"; \".join(contributorResult) if contributorResult else ''\n", + " \n", + " dateResult = e.xpath(xpath.format('date'), namespaces=namespaces)\n", + " date = \"; \".join(dateResult) if dateResult else ''\n", + " print([title, contributor, date])\n", + " return [title, contributor, date]\n", + "\n", + "def getLinksAndBarcodes(local_mms_id):\n", + " cont=requests.get('https://open-na.hosted.exlibrisgroup.com/alma/43ACC_ONB/bf/entity/instance/' + local_mms_id).content\n", + " e = etree.XML(cont)\n", + " namespaces = {\n", + " 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',\n", + " 'bf': 'http://id.loc.gov/ontologies/bibframe/',\n", + " 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',\n", + " 'bflc': 'http://id.loc.gov/ontologies/bflc/'\n", + " }\n", + " result = e.xpath('/rdf:RDF/bf:Instance/bf:hasItem/bf:Item/bf:electronicLocator/rdfs:Resource/bflc:locator/@rdf:resource', namespaces=namespaces)\n", + " barcodes = []\n", + " for link in result:\n", + " splits = link.split('%2B')\n", + " if len(splits) >= 2:\n", + " barcodes.append('+' + link.split('%2B')[1])\n", + " print(local_mms_id + ': ' + \", \".join(barcodes))\n", + " linksJoined = \", \".join(result)\n", + " barcodesJoined = \", \".join(barcodes)\n", + " #returns a list with URLs and Barcodes\n", + " return [linksJoined, barcodesJoined]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Vorstellung der vornehmsten Völkerschaften der Welt nach ihrer Abstammung, Ausbreitung und Sprachen', 'Breitenbauch, Georg-August vonaut', '1786']\n", + "['Vorstellung der vornehmsten Völkerschaften der Welt nach ihrer Abstammung, Ausbreitung und Sprachen', 'Breitenbauch, Georg-August vonaut', '1786']\n" + ] + } + ], + "source": [ + "dc = getDCData(str(178966306))\n", + "print(dc)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Identifier
5Z98085705
8Z6987270X
2Z97787406
\n", + "
" + ], + "text/plain": [ + " Identifier\n", + "5 Z98085705\n", + "8 Z6987270X\n", + "2 Z97787406" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_excel('exampleBarcodes.xlsx')\n", + "df_sample = df.sample(3).copy()\n", + "df_sample" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Sammlung der besten Reisebeschreibungen', '', '1784']\n", + "['Sammlung der besten Reisebeschreibungen', '', '1784']\n", + "['Geschichte der Reisen die seit Cook an der Nordwest- und Nordost-Küste von Amerika und in dem nördlichsten Amerika selbst von Meares, Dixon, Portlock, Coxe, Long u. a. m. unternommen worden sind Mit vielen Karten und Kupfern', 'Forster, Georg1754-1794(DE-588)118534416edtO:H; Dixon, George1755-1800(DE-588)130525294ctbO:H; Long, JohnctbO:H; Meares, John1756-1809(DE-588)121248275ctbO:H; Portlock, Nathaniel1748-1817(DE-588)133848531ctbO:H; Portlock, NathanielautO:800O:H; Forster, [Johann] GeorgautAdamO:806O:H; Vossische BuchhandlungBerlin(DE-588)64386-5pblO:H', '1792']\n", + "['Sammlung der besten Reisebeschreibungen', 'Traßler, Joseph Georg1759-1816(DE-588)129262358prt', '1784']\n", + "['Sammlung der besten Reisebeschreibungen', '', '1784']\n", + "['Sammlung der besten Reisebeschreibungen', '', '1784']\n", + "['Beschreibung der äussern und innern Merkwürdigkeiten der Königlichen Schlösser in Berlin, Charlottenburg, Schönhausen in und bey Potsdam', 'Rumpf, Friedrichaut', '1794']\n", + "[\"Neues Elementarwerk für die niedern Klassen lateinischer Schulen und Gymnasien nach einem zusammenhängenden und auf die Lesung klassischer Autoren in den obern Klassen, wie auch auf die übrigen Vorerkenntnisse künftiger Studirenden gründlich vorbereitenden Plane; <> Elementargeographie; M. J. E. Fabri's Elementargeographie; Fabri's Elementargeographie; M. J. E. Fabri's Elementargeographie; zweiten Zweiter\", 'Fabri, Johann Ernst1755-1825(DE-588)11536028Xaut; Gebauer, Johann Jakobpbl', '1790']\n", + "[\"Neues Elementarwerk für die niedern Klassen lateinischer Schulen und Gymnasien nach einem zusammenhängenden und auf die Lesung klassischer Autoren in den obern Klassen, wie auch auf die übrigen Vorerkenntnisse künftiger Studirenden gründlich vorbereitenden Plane; <> Elementargeographie; M. J. E. Fabri's Elementargeographie; Fabri's Elementargeographie; M. J. E. Fabri's Elementargeographie; zweiten Kursus\", 'Fabri, Johann Ernst1755-1825(DE-588)11536028Xaut; Gebauer, Johann Jakobpbl', '1790']\n" + ] + } + ], + "source": [ + "df[['Titel', 'Autor', 'Erscheinungsjahr']] = df.apply(lambda row: getDCData(str(row['Identifier'])[1:]), axis=1, result_type='expand')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdentifierTitelAutorErscheinungsjahr
0Z164418102Sammlung der besten Reisebeschreibungen1784
1Z98085808Sammlung der besten Reisebeschreibungen1784
2Z97787406Geschichte der Reisen die seit Cook an der Nor...Forster, Georg1754-1794(DE-588)118534416edtO:H...1792
3Z198357107Sammlung der besten ReisebeschreibungenTraßler, Joseph Georg1759-1816(DE-588)12926235...1784
4Z16441800XSammlung der besten Reisebeschreibungen1784
5Z98085705Sammlung der besten Reisebeschreibungen1784
6Z167356806Beschreibung der äussern und innern Merkwürdig...Rumpf, Friedrichaut1794
7Z69872607Neues Elementarwerk für die niedern Klassen la...Fabri, Johann Ernst1755-1825(DE-588)11536028Xa...1790
8Z6987270XNeues Elementarwerk für die niedern Klassen la...Fabri, Johann Ernst1755-1825(DE-588)11536028Xa...1790
\n", + "
" + ], + "text/plain": [ + " Identifier Titel \\\n", + "0 Z164418102 Sammlung der besten Reisebeschreibungen \n", + "1 Z98085808 Sammlung der besten Reisebeschreibungen \n", + "2 Z97787406 Geschichte der Reisen die seit Cook an der Nor... \n", + "3 Z198357107 Sammlung der besten Reisebeschreibungen \n", + "4 Z16441800X Sammlung der besten Reisebeschreibungen \n", + "5 Z98085705 Sammlung der besten Reisebeschreibungen \n", + "6 Z167356806 Beschreibung der äussern und innern Merkwürdig... \n", + "7 Z69872607 Neues Elementarwerk für die niedern Klassen la... \n", + "8 Z6987270X Neues Elementarwerk für die niedern Klassen la... \n", + "\n", + " Autor Erscheinungsjahr \n", + "0 1784 \n", + "1 1784 \n", + "2 Forster, Georg1754-1794(DE-588)118534416edtO:H... 1792 \n", + "3 Traßler, Joseph Georg1759-1816(DE-588)12926235... 1784 \n", + "4 1784 \n", + "5 1784 \n", + "6 Rumpf, Friedrichaut 1794 \n", + "7 Fabri, Johann Ernst1755-1825(DE-588)11536028Xa... 1790 \n", + "8 Fabri, Johann Ernst1755-1825(DE-588)11536028Xa... 1790 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/classifier_predicted_travelogueD18.xlsx b/classifier_predicted_travelogueD18.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4973823e53fcfb53f5390db96ce611d5d2b8a475 Binary files /dev/null and b/classifier_predicted_travelogueD18.xlsx differ diff --git a/exampleBarcodes.xlsx b/exampleBarcodes.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..79e28f2ef4c5d87f3bbf0e872b1dfd4bb10f9157 Binary files /dev/null and b/exampleBarcodes.xlsx differ