Skip to content
SACHA Statistik Collection.ipynb 5.25 KiB
Newer Older
Georg Petz's avatar
Georg Petz committed
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import pandas as pd\n",
    "from SPARQLWrapper import SPARQLWrapper, JSON\n",
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Set the SPARQL-Endpoint:\n",
    "* https://lod.onb.ac.at/sparql/anno for ANNO\n",
    "* https://lod.onb.ac.at/sparql/akon for AKON"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "anno_lod_endpoint = \"https://lod.onb.ac.at/sparql/anno\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Methods to query the endpoint and build the dataframe:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_sparql_result(service, query):\n",
    "    sparql = SPARQLWrapper(service)\n",
    "    sparql.setQuery(query)\n",
    "    sparql.setReturnFormat(JSON)\n",
    "    return sparql.query()\n",
    "\n",
    "def get_sparql_dataframe(service, query):\n",
    "    result = get_sparql_result(service, query)\n",
    "\n",
    "    processed_results = result.convert()\n",
    "    cols = processed_results['head']['vars']\n",
    "\n",
    "    out = []\n",
    "    for row in processed_results['results']['bindings']:\n",
    "        item = []\n",
    "        for c in cols:\n",
    "            item.append(row.get(c, {}).get('value'))\n",
    "        out.append(item)\n",
    "\n",
    "    return pd.DataFrame(out, columns=cols)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select all newspapers and periodicals with subjectheading Statistik:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = '''\n",
    "PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
    "PREFIX edm: <http://www.europeana.eu/schemas/edm/>\n",
    "PREFIX dcterms: <http://purl.org/dc/terms/>\n",
    "SELECT ?title ?subjectURI ?manifest \n",
    "WHERE {?subjectURI dc:subject <http://d-nb.info/gnd/4056995-0> .\n",
    "       ?subjectURI dc:title ?title .\n",
    "       ?subjectURI edm:isShownBy ?firstpage .\n",
    "       ?subjectURI edm:rights <http://creativecommons.org/publicdomain/mark/1.0/> .\n",
    "       ?firstpage dcterms:isReferencedBy ?manifest\n",
    "}'''"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get list of IIIF Manifests URLs:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['http://iiif.onb.ac.at/presentation/ANNO/stm1875ag0001/manifest',\n",
       " 'http://iiif.onb.ac.at/presentation/ANNO/stm1876ag0001/manifest',\n",
       " 'http://iiif.onb.ac.at/presentation/ANNO/stm1877ag0001/manifest']"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = get_sparql_dataframe(anno_lod_endpoint, query)\n",
    "manifests = list(df['manifest'])\n",
    "manifests"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Function to create a SACHA Collection (https://iiif.onb.ac.at/api#_collectionspostjsonprocessor):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_collection(description, list_of_manifest_ids_or_ids):\n",
    "    j = {\n",
    "        \"description\": description,\n",
    "        \"elements\": list_of_manifest_ids_or_ids\n",
    "    }\n",
    "    creation_link = 'https://iiif.onb.ac.at/presentation/collection'\n",
    "    result = requests.post(creation_link, json=j)\n",
    "    if result.status_code == 201:\n",
    "        print('SUCCESS: Create collection {}'.format(result.json()['url']))\n",
    "    elif result.status_code == 400:\n",
    "        print('ERROR: Request error creating collection')\n",
    "        print(result.text)\n",
    "    elif result.status_code == 500:\n",
    "        print('ERROR: Server error creating collection')\n",
    "        print(result.text)\n",
    "    else:\n",
    "        print('ERROR: General error creating collection, HTTP status = {}'.format(result.status_code))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create the SACHA Collection:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SUCCESS: Create collection https://iiif.onb.ac.at/presentation/collection/8hIOHDd7hW\n"
     ]
    }
   ],
   "source": [
    "create_collection(\"newspaper with subject heading Statistik\", manifests)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}