Skip to content
ViewerlinksFromBibframe.ipynb 3.41 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from lxml import etree\n",
    "import requests\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getLinksAndBarcodes(local_mms_id):\n",
    "    cont=requests.get('https://open-na.hosted.exlibrisgroup.com/alma/43ACC_ONB/bf/entity/instance/' + local_mms_id).content\n",
    "    e = etree.XML(cont)\n",
    "    namespaces = {\n",
    "        'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',\n",
    "        'bf': 'http://id.loc.gov/ontologies/bibframe/',\n",
    "        'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',\n",
    "        'bflc': 'http://id.loc.gov/ontologies/bflc/'\n",
    "    }\n",
    "    result = e.xpath('/rdf:RDF/bf:Instance/bf:hasItem/bf:Item/bf:electronicLocator/rdfs:Resource/bflc:locator/@rdf:resource', namespaces=namespaces)\n",
    "    barcodes = []\n",
    "    for link in result:\n",
    "        splits = link.split('%2B')\n",
    "        if len(splits) >= 2:\n",
    "            barcodes.append('+' + link.split('%2B')[1])\n",
    "    print (local_mms_id + ': ' + \", \".join(barcodes))\n",
    "    linksJoined = \", \".join(result)\n",
    "    barcodesJoined = \", \".join(barcodes)\n",
    "    return [linksJoined, barcodesJoined]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NaN    990032334150603338\nNaN    990035648370603338\nNaN    990031644540603338\nName: MMS-ID, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_excel('ABOExamplesFromALMA.xlsx')\n",
    "df_sample = df.sample(3).copy()\n",
    "df_sample['MMS-ID']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "990032334150603338: +Z227525900, +Z172047601\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "990035648370603338: +Z219890307, +Z255756803\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "990031644540603338: +Z136879807, +Z172905502\n"
     ]
    }
   ],
   "source": [
    "df_sample[['Viewerlinks','Barcodes']] = df_sample.apply(lambda row: getLinksAndBarcodes(str(row['MMS-ID'])), axis=1, result_type='expand')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "writer = pd.ExcelWriter(r'ABOExamplesFromALMAextended.xlsx', engine='xlsxwriter',options={'strings_to_urls': False})\n",
    "df_sample.to_excel(writer)\n",
    "writer.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}