diff --git a/3.3 - IIIF collection from SPARQL.ipynb b/3.3 - IIIF collection from SPARQL.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..203830db7c7270d08349284935baabb8193004db --- /dev/null +++ b/3.3 - IIIF collection from SPARQL.ipynb @@ -0,0 +1,205 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "from SPARQLWrapper import SPARQLWrapper, JSON\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the SPARQL-Endpoint:\n", + "* https://lod.onb.ac.at/sparql/anno for ANNO\n", + "* https://lod.onb.ac.at/sparql/akon for AKON" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "anno_lod_endpoint = \"https://lod.onb.ac.at/sparql/anno\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Methods to query the endpoint and build the dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def get_sparql_result(service, query):\n", + " sparql = SPARQLWrapper(service)\n", + " sparql.setQuery(query)\n", + " sparql.setReturnFormat(JSON)\n", + " return sparql.query()\n", + "\n", + "def get_sparql_dataframe(service, query):\n", + " result = get_sparql_result(service, query)\n", + "\n", + " processed_results = result.convert()\n", + " cols = processed_results['head']['vars']\n", + "\n", + " out = []\n", + " for row in processed_results['results']['bindings']:\n", + " item = []\n", + " for c in cols:\n", + " item.append(row.get(c, {}).get('value'))\n", + " out.append(item)\n", + "\n", + " return pd.DataFrame(out, columns=cols)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select all newspapers and periodicals with subjectheading Statistik:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "query = '''\n", + "PREFIX dc: \n", + "PREFIX edm: \n", + "PREFIX dcterms: \n", + "SELECT ?title ?subjectURI ?manifest \n", + "WHERE {?subjectURI dc:subject .\n", + " ?subjectURI dc:title ?title .\n", + " ?subjectURI edm:isShownBy ?firstpage .\n", + " ?subjectURI edm:rights .\n", + " ?firstpage dcterms:isReferencedBy ?manifest\n", + "}'''" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get list of IIIF Manifests URLs:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['http://iiif.onb.ac.at/presentation/ANNO/stm1875ag0001/manifest',\n", + " 'http://iiif.onb.ac.at/presentation/ANNO/stm1876ag0001/manifest',\n", + " 'http://iiif.onb.ac.at/presentation/ANNO/stm1877ag0001/manifest']" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = get_sparql_dataframe(anno_lod_endpoint, query)\n", + "manifests = list(df['manifest'])\n", + "manifests" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Function to create a SACHA Collection (https://iiif.onb.ac.at/api#_collectionspostjsonprocessor):" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "def create_collection(description, list_of_manifest_ids_or_ids):\n", + " j = {\n", + " \"description\": description,\n", + " \"elements\": list_of_manifest_ids_or_ids\n", + " }\n", + " creation_link = 'https://iiif.onb.ac.at/presentation/collection'\n", + " result = requests.post(creation_link, json=j)\n", + " if result.status_code == 201:\n", + " print('SUCCESS: Create collection {}'.format(result.json()['url']))\n", + " elif result.status_code == 400:\n", + " print('ERROR: Request error creating collection')\n", + " print(result.text)\n", + " elif result.status_code == 500:\n", + " print('ERROR: Server error creating collection')\n", + " print(result.text)\n", + " else:\n", + " print('ERROR: General error creating collection, HTTP status = {}'.format(result.status_code))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the SACHA Collection:" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SUCCESS: Create collection https://iiif.onb.ac.at/presentation/collection/8hIOHDd7hW\n" + ] + } + ], + "source": [ + "create_collection(\"newspaper with subject heading Statistik\", manifests)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}