diff --git a/ViewerlinksFromBibframe.ipynb b/ViewerlinksFromBibframe.ipynb index 8893f8b275f2318574913e3b471e90af6b571698..2daba8e473af19591255602a3bc3e1c4b13e36b3 100644 --- a/ViewerlinksFromBibframe.ipynb +++ b/ViewerlinksFromBibframe.ipynb @@ -3,9 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from lxml import etree\n", @@ -13,6 +11,21 @@ "import pandas as pd" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Linked Data from ALMA (library management system) can be retrieved in \n", + "\n", + "* BIBFRAME via `https://open-na.hosted.exlibrisgroup.com/alma//bf/entity/instance/`\n", + "* JSON-LD via `https://open-na.hosted.exlibrisgroup.com/alma//bibs/.jsonld`\n", + "* RDA/RDF via `https://open-na.hosted.exlibrisgroup.com/alma//rda/entity/manifestation/.rdf`\n", + "\n", + "For a Network Zone MMS ID the institution code is 43ACC_NETWORK and for the Institution MMS ID it is 43ACC_ONB.\n", + "\n", + "The following xpath `/rdf:RDF/bf:Instance/bf:hasItem/bf:Item/bf:electronicLocator/rdfs:Resource/bflc:locator/@rdf:resource` selects URLs of the Viewer. We use the + sign (URL encoded %2B) to spilt the URL in order to extract the Barcode." + ] + }, { "cell_type": "code", "execution_count": 2, @@ -37,21 +50,32 @@ " print (local_mms_id + ': ' + \", \".join(barcodes))\n", " linksJoined = \", \".join(result)\n", " barcodesJoined = \", \".join(barcodes)\n", + " #returns a list with URLs and Barcodes\n", " return [linksJoined, barcodesJoined]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "export lists from ALMA as Excel file and read it into a pandas DataFrame (the column MMS-ID contains Institution MMS IDs)" + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "NaN 990032334150603338\nNaN 990035648370603338\nNaN 990031644540603338\nName: MMS-ID, dtype: int64" + "NaN 990032334150603338\n", + "NaN 990035648370603338\n", + "NaN 990043237990603338\n", + "Name: MMS-ID, dtype: int64" ] }, - "execution_count": 11, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -62,30 +86,25 @@ "df_sample['MMS-ID']" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "add additional columens to the dataframe with ViewerLinks and Barcodes" + ] + }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "990032334150603338: +Z227525900, +Z172047601\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "990035648370603338: +Z219890307, +Z255756803\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "990031644540603338: +Z136879807, +Z172905502\n" + "990032334150603338: +Z227525900, +Z172047601\n", + "990035648370603338: +Z219890307, +Z255756803\n", + "990043237990603338: +Z172048009, +Z207476305\n" ] } ], @@ -93,9 +112,16 @@ "df_sample[['Viewerlinks','Barcodes']] = df_sample.apply(lambda row: getLinksAndBarcodes(str(row['MMS-ID'])), axis=1, result_type='expand')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "write the extened dataframe into an Excel file again" + ] + }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -114,23 +140,23 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.6.7" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/ViewerlinksFromBibframeWithSRU.ipynb b/ViewerlinksFromBibframeWithSRU.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..9f4cec3769cae04d69c92c456b98501e2c47ef9f --- /dev/null +++ b/ViewerlinksFromBibframeWithSRU.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from lxml import etree\n", + "import requests\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To get the MMS ID for a given barcode, get the metadata via `https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.barcode=startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=dc` and extract the MMS ID with `/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordIdentifier/text()`.\n", + "\n", + "Linked Data from ALMA (library management system) can be retrieved in \n", + "\n", + "* BIBFRAME via `https://open-na.hosted.exlibrisgroup.com/alma//bf/entity/instance/`\n", + "* JSON-LD via `https://open-na.hosted.exlibrisgroup.com/alma//bibs/.jsonld`\n", + "* RDA/RDF via `https://open-na.hosted.exlibrisgroup.com/alma//rda/entity/manifestation/.rdf`\n", + "\n", + "For a Network Zone MMS ID the institution code is 43ACC_NETWORK and for the Institution MMS ID it is 43ACC_ONB.\n", + "\n", + "The following xpath `/rdf:RDF/bf:Instance/bf:hasItem/bf:Item/bf:electronicLocator/rdfs:Resource/bflc:locator/@rdf:resource` selects URLs of the Viewer. We use the + sign (URL encoded %2B) to spilt the URL in order to extract the Barcode." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def getMMS_ID(barcode):\n", + " cont=requests.get('https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.barcode=%2BZ' + barcode + '&startRecord=0&maximumRecords=1&operation=searchRetrieve&recordSchema=dc').content\n", + " e = etree.XML(cont)\n", + " namespaces = {\n", + " 'srw': 'http://www.loc.gov/zing/srw/'\n", + " }\n", + " result = e.xpath('/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordIdentifier/text()', namespaces=namespaces)\n", + " return result[0]\n", + "\n", + "def getLinksAndBarcodes(local_mms_id):\n", + " cont=requests.get('https://open-na.hosted.exlibrisgroup.com/alma/43ACC_ONB/bf/entity/instance/' + local_mms_id).content\n", + " e = etree.XML(cont)\n", + " namespaces = {\n", + " 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',\n", + " 'bf': 'http://id.loc.gov/ontologies/bibframe/',\n", + " 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',\n", + " 'bflc': 'http://id.loc.gov/ontologies/bflc/'\n", + " }\n", + " result = e.xpath('/rdf:RDF/bf:Instance/bf:hasItem/bf:Item/bf:electronicLocator/rdfs:Resource/bflc:locator/@rdf:resource', namespaces=namespaces)\n", + " barcodes = []\n", + " for link in result:\n", + " splits = link.split('%2B')\n", + " if len(splits) >= 2:\n", + " barcodes.append('+' + link.split('%2B')[1])\n", + " print(local_mms_id + ': ' + \", \".join(barcodes))\n", + " linksJoined = \", \".join(result)\n", + " barcodesJoined = \", \".join(barcodes)\n", + " #returns a list with URLs and Barcodes\n", + " return [linksJoined, barcodesJoined]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "990035648370603338: +Z219890307, +Z255756803\n" + ] + }, + { + "data": { + "text/plain": [ + "['http://data.onb.ac.at/ABO/%2BZ219890307, http://data.onb.ac.at/ABO/%2BZ255756803',\n", + " '+Z219890307, +Z255756803']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mms_id = getMMS_ID(str(255756803))\n", + "getLinksAndBarcodes(str(mms_id))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}