From 3e026bf882060550ad33b88773878d410ecc8597 Mon Sep 17 00:00:00 2001 From: Georg Petz Date: Mon, 4 Jul 2022 16:37:53 +0200 Subject: [PATCH] final rehearsal --- 1 - Overview.ipynb | 8 +- 2 - Metadata and Catalogue.ipynb | 552 +++++++++++++-------------- 2.1 - SRU & LOD.ipynb | 4 +- 2.3 - SPARQL.ipynb | 2 +- 3 - Images and Text.ipynb | 8 +- 3.3 - Text - Download OCR Text.ipynb | 360 ++++++++--------- requirements.txt | 1 + 7 files changed, 451 insertions(+), 484 deletions(-) diff --git a/1 - Overview.ipynb b/1 - Overview.ipynb index ff12299..2eed30c 100644 --- a/1 - Overview.ipynb +++ b/1 - Overview.ipynb @@ -106,11 +106,7 @@ } }, "source": [ - "# 1 - Overview\n", - "\n", - "[https://labs.onb.ac.at](https://labs.onb.ac.at)\n", - "\n", - "https://labs.onb.ac.at/gitlab/labs-team/pydays19/-/tree/UnlocktheLibraries" + "# 1 - Overview" ] }, { @@ -316,7 +312,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.0" } }, "nbformat": 4, diff --git a/2 - Metadata and Catalogue.ipynb b/2 - Metadata and Catalogue.ipynb index ce68470..8db06ed 100644 --- a/2 - Metadata and Catalogue.ipynb +++ b/2 - Metadata and Catalogue.ipynb @@ -656,15 +656,15 @@ " \n", " \n", " 990030217420603338\n", - " 22288570940003338\n", " 43ACC_ONB\n", " ZALT\n", - " State Hall at Josefsplatz\n", + " State Hall closed due to renovation\n", " 80.J.58\n", " available\n", " 1\n", " 0\n", " PRUNK\n", + " 8\n", " 1\n", " Department of Manuscripts and Rare Books (ALT)\n", " \n", @@ -690,7 +690,7 @@ " \n", " \n", " true\n", - " 2022-06-22T10:35:42+0200\n", + " 2022-07-04T15:50:06+0200\n", " \n", "\n", "\n" @@ -903,50 +903,6 @@ " * http://www.w3.org/TR/sparql11-protocol/" ] }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - }, - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "* Retrieve and manipulate data stored in RDF\n", - "* Explore data by querying unknown relationships\n", - "* Perform complex joins of disparate databases in a single, simple query\n", - "* Transform RDF data from one vocabulary to another" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "pycharm": { - "name": "#%% md\n" - }, - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "* SPARQL Standards\n", - "\n", - " * SPARQL Query\n", - " * Declarative query language for RDF data\n", - " * http://www.w3.org/TR/rdf-sparql-query/\n", - " * SPARQL Algebra\n", - " * Defines the semantics of a SPARQL query execution\n", - " * http://www.w3.org/2001/sw/DataAccess/rq23/rq24-algebra.html\n", - " * SPARQL Update\n", - " * Declarative manipulation language for RDF data\n", - " * http://www.w3.org/TR/sparql11-update/\n", - " * SPARQL Protocol\n", - " * Standard for communication between SPARQL services and clients\n", - " * http://www.w3.org/TR/sparql11-protocol/" - ] - }, { "cell_type": "markdown", "metadata": { @@ -1143,33 +1099,33 @@ " \n", " \n", " 0\n", - " https://d-nb.info/gnd/118540238\n", - " https://d-nb.info/standards/elementset/gnd#var...\n", - " N8b86883c82bc477d9e7d7fa44bdddac5\n", + " N5efb69221ee348b4996a1c5103086413\n", + " https://d-nb.info/standards/elementset/gnd#sur...\n", + " Ǧūta\n", " \n", " \n", " 1\n", - " https://d-nb.info/gnd/118540238\n", - " https://d-nb.info/standards/elementset/gnd#var...\n", - " N88de5f3df2a8441493655d05943fcc69\n", + " N64140a9af9494b67838bc499a434fb6c\n", + " https://d-nb.info/standards/elementset/gnd#for...\n", + " Y. W.\n", " \n", " \n", " 2\n", " https://d-nb.info/gnd/118540238\n", - " https://d-nb.info/standards/elementset/gnd#var...\n", - " Nf11a189bea6440fd8d1fac51702eda8f\n", + " https://d-nb.info/standards/elementset/agrelon...\n", + " https://d-nb.info/gnd/118628011\n", " \n", " \n", " 3\n", - " Nf71b85e3a7b448dc91fe3e58b98f674b\n", - " https://d-nb.info/standards/elementset/gnd#per...\n", - " Ǧītah\n", + " Nca003c50f6a54f498e337f79449933a8\n", + " https://d-nb.info/standards/elementset/gnd#prefix\n", + " fūn\n", " \n", " \n", " 4\n", - " Ne661943e24434827b7044cd61c40384f\n", - " https://d-nb.info/standards/elementset/gnd#sur...\n", - " Göte\n", + " https://d-nb.info/gnd/118540238\n", + " https://d-nb.info/standards/elementset/gnd#var...\n", + " Nc86ab93ed6354f5a9ac07cd76f6a6820\n", " \n", " \n", " ...\n", @@ -1179,33 +1135,33 @@ " \n", " \n", " 706\n", - " N750f4ea68dfa4d6ba34bc4e8177bfff9\n", - " https://d-nb.info/standards/elementset/gnd#sur...\n", - " Gete\n", + " https://d-nb.info/gnd/118540238\n", + " https://d-nb.info/standards/elementset/gnd#var...\n", + " Gêtê, Y. W.\n", " \n", " \n", " 707\n", - " N265c000c69ec439ab46db8b1b21e33a9\n", - " https://d-nb.info/standards/elementset/gnd#per...\n", - " Ǧīta\n", + " https://d-nb.info/gnd/118540238\n", + " https://d-nb.info/standards/elementset/gnd#var...\n", + " Goethe, Wolfgango\n", " \n", " \n", " 708\n", - " Na62bbb74b64c48dab867e632df1dfac0\n", - " https://d-nb.info/standards/elementset/gnd#sur...\n", - " Gyot'e\n", + " Na4a100fa5c23448f8f64bda9eb91d83a\n", + " https://d-nb.info/standards/elementset/gnd#for...\n", + " Yūhān Fūlfġānḡ\n", " \n", " \n", " 709\n", - " Na5d63b6f7566466692314a110065cc9a\n", + " Nfcf843b6ac57482395b8e41f042ef21a\n", " https://d-nb.info/standards/elementset/gnd#for...\n", - " Johann Volfgang\n", + " G. L.\n", " \n", " \n", " 710\n", " https://d-nb.info/gnd/118540238\n", " https://d-nb.info/standards/elementset/gnd#var...\n", - " Hëte, &Euml;han Vol'fhanh\n", + " Ge de\n", " \n", " \n", "\n", @@ -1214,43 +1170,43 @@ ], "text/plain": [ " 0 \\\n", - "0 https://d-nb.info/gnd/118540238 \n", - "1 https://d-nb.info/gnd/118540238 \n", + "0 N5efb69221ee348b4996a1c5103086413 \n", + "1 N64140a9af9494b67838bc499a434fb6c \n", "2 https://d-nb.info/gnd/118540238 \n", - "3 Nf71b85e3a7b448dc91fe3e58b98f674b \n", - "4 Ne661943e24434827b7044cd61c40384f \n", + "3 Nca003c50f6a54f498e337f79449933a8 \n", + "4 https://d-nb.info/gnd/118540238 \n", ".. ... \n", - "706 N750f4ea68dfa4d6ba34bc4e8177bfff9 \n", - "707 N265c000c69ec439ab46db8b1b21e33a9 \n", - "708 Na62bbb74b64c48dab867e632df1dfac0 \n", - "709 Na5d63b6f7566466692314a110065cc9a \n", + "706 https://d-nb.info/gnd/118540238 \n", + "707 https://d-nb.info/gnd/118540238 \n", + "708 Na4a100fa5c23448f8f64bda9eb91d83a \n", + "709 Nfcf843b6ac57482395b8e41f042ef21a \n", "710 https://d-nb.info/gnd/118540238 \n", "\n", " 1 \\\n", - "0 https://d-nb.info/standards/elementset/gnd#var... \n", - "1 https://d-nb.info/standards/elementset/gnd#var... \n", - "2 https://d-nb.info/standards/elementset/gnd#var... \n", - "3 https://d-nb.info/standards/elementset/gnd#per... \n", - "4 https://d-nb.info/standards/elementset/gnd#sur... \n", + "0 https://d-nb.info/standards/elementset/gnd#sur... \n", + "1 https://d-nb.info/standards/elementset/gnd#for... \n", + "2 https://d-nb.info/standards/elementset/agrelon... \n", + "3 https://d-nb.info/standards/elementset/gnd#prefix \n", + "4 https://d-nb.info/standards/elementset/gnd#var... \n", ".. ... \n", - "706 https://d-nb.info/standards/elementset/gnd#sur... \n", - "707 https://d-nb.info/standards/elementset/gnd#per... \n", - "708 https://d-nb.info/standards/elementset/gnd#sur... \n", + "706 https://d-nb.info/standards/elementset/gnd#var... \n", + "707 https://d-nb.info/standards/elementset/gnd#var... \n", + "708 https://d-nb.info/standards/elementset/gnd#for... \n", "709 https://d-nb.info/standards/elementset/gnd#for... \n", "710 https://d-nb.info/standards/elementset/gnd#var... \n", "\n", " 2 \n", - "0 N8b86883c82bc477d9e7d7fa44bdddac5 \n", - "1 N88de5f3df2a8441493655d05943fcc69 \n", - "2 Nf11a189bea6440fd8d1fac51702eda8f \n", - "3 Ǧītah \n", - "4 Göte \n", + "0 Ǧūta \n", + "1 Y. W. \n", + "2 https://d-nb.info/gnd/118628011 \n", + "3 fūn \n", + "4 Nc86ab93ed6354f5a9ac07cd76f6a6820 \n", ".. ... \n", - "706 Gete \n", - "707 Ǧīta \n", - "708 Gyot'e \n", - "709 Johann Volfgang \n", - "710 Hëte, Ëhan Vol'fhanh \n", + "706 Gêtê, Y. W. \n", + "707 Goethe, Wolfgango \n", + "708 Yūhān Fūlfġānḡ \n", + "709 G. L. \n", + "710 Ge de \n", "\n", "[711 rows x 3 columns]" ] @@ -1323,38 +1279,38 @@ " \n", " 0\n", " https://d-nb.info/gnd/4038243-6\n", - " https://d-nb.info/standards/elementset/gnd#old...\n", - " (DE-588c)4038243-6\n", + " https://d-nb.info/standards/elementset/gnd#gnd...\n", + " https://d-nb.info/standards/vocab/gnd/gnd-sc#2...\n", " \n", " \n", " 1\n", - " https://d-nb.info/gnd/4038243-6/about\n", - " http://purl.org/dc/terms/license\n", - " http://creativecommons.org/publicdomain/zero/1.0/\n", + " https://d-nb.info/gnd/4038243-6\n", + " https://d-nb.info/standards/elementset/gnd#gnd...\n", + " 4038243-6\n", " \n", " \n", " 2\n", " https://d-nb.info/gnd/4038243-6\n", - " http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", - " https://d-nb.info/standards/elementset/gnd#Sub...\n", + " https://d-nb.info/standards/elementset/gnd#rel...\n", + " http://dewey.info/class/610/\n", " \n", " \n", " 3\n", " https://d-nb.info/gnd/4038243-6\n", - " https://d-nb.info/standards/elementset/gnd#var...\n", - " Heilkunst\n", + " http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", + " https://d-nb.info/standards/elementset/gnd#Sub...\n", " \n", " \n", " 4\n", - " https://d-nb.info/gnd/4038243-6\n", - " https://d-nb.info/standards/elementset/gnd#rel...\n", - " http://dewey.info/class/610/\n", + " https://d-nb.info/gnd/4038243-6/about\n", + " http://purl.org/dc/terms/license\n", + " http://creativecommons.org/publicdomain/zero/1.0/\n", " \n", " \n", " 5\n", + " https://d-nb.info/gnd/4038243-6\n", + " http://www.w3.org/2007/05/powder-s#describedby\n", " https://d-nb.info/gnd/4038243-6/about\n", - " http://purl.org/dc/terms/modified\n", - " 2022-04-15T15:15:00\n", " \n", " \n", " 6\n", @@ -1365,32 +1321,32 @@ " \n", " 7\n", " https://d-nb.info/gnd/4038243-6\n", - " https://d-nb.info/standards/elementset/gnd#var...\n", - " Humanmedizin\n", + " https://d-nb.info/standards/elementset/gnd#pre...\n", + " Medizin\n", " \n", " \n", " 8\n", " https://d-nb.info/gnd/4038243-6\n", - " http://www.w3.org/2007/05/powder-s#describedby\n", - " https://d-nb.info/gnd/4038243-6/about\n", + " https://d-nb.info/standards/elementset/gnd#old...\n", + " (DE-588c)4038243-6\n", " \n", " \n", " 9\n", " https://d-nb.info/gnd/4038243-6\n", - " https://d-nb.info/standards/elementset/gnd#pre...\n", - " Medizin\n", + " https://d-nb.info/standards/elementset/gnd#var...\n", + " Humanmedizin\n", " \n", " \n", " 10\n", " https://d-nb.info/gnd/4038243-6\n", - " https://d-nb.info/standards/elementset/gnd#gnd...\n", - " 4038243-6\n", + " https://d-nb.info/standards/elementset/gnd#var...\n", + " Heilkunst\n", " \n", " \n", " 11\n", - " https://d-nb.info/gnd/4038243-6\n", - " https://d-nb.info/standards/elementset/gnd#gnd...\n", - " https://d-nb.info/standards/vocab/gnd/gnd-sc#2...\n", + " https://d-nb.info/gnd/4038243-6/about\n", + " http://purl.org/dc/terms/modified\n", + " 2022-04-15T15:15:00\n", " \n", " \n", "\n", @@ -1399,45 +1355,45 @@ "text/plain": [ " 0 \\\n", "0 https://d-nb.info/gnd/4038243-6 \n", - "1 https://d-nb.info/gnd/4038243-6/about \n", + "1 https://d-nb.info/gnd/4038243-6 \n", "2 https://d-nb.info/gnd/4038243-6 \n", "3 https://d-nb.info/gnd/4038243-6 \n", - "4 https://d-nb.info/gnd/4038243-6 \n", - "5 https://d-nb.info/gnd/4038243-6/about \n", + "4 https://d-nb.info/gnd/4038243-6/about \n", + "5 https://d-nb.info/gnd/4038243-6 \n", "6 https://d-nb.info/gnd/4038243-6 \n", "7 https://d-nb.info/gnd/4038243-6 \n", "8 https://d-nb.info/gnd/4038243-6 \n", "9 https://d-nb.info/gnd/4038243-6 \n", "10 https://d-nb.info/gnd/4038243-6 \n", - "11 https://d-nb.info/gnd/4038243-6 \n", + "11 https://d-nb.info/gnd/4038243-6/about \n", "\n", " 1 \\\n", - "0 https://d-nb.info/standards/elementset/gnd#old... \n", - "1 http://purl.org/dc/terms/license \n", - "2 http://www.w3.org/1999/02/22-rdf-syntax-ns#type \n", - "3 https://d-nb.info/standards/elementset/gnd#var... \n", - "4 https://d-nb.info/standards/elementset/gnd#rel... \n", - "5 http://purl.org/dc/terms/modified \n", + "0 https://d-nb.info/standards/elementset/gnd#gnd... \n", + "1 https://d-nb.info/standards/elementset/gnd#gnd... \n", + "2 https://d-nb.info/standards/elementset/gnd#rel... \n", + "3 http://www.w3.org/1999/02/22-rdf-syntax-ns#type \n", + "4 http://purl.org/dc/terms/license \n", + "5 http://www.w3.org/2007/05/powder-s#describedby \n", "6 http://www.w3.org/2002/07/owl#sameAs \n", - "7 https://d-nb.info/standards/elementset/gnd#var... \n", - "8 http://www.w3.org/2007/05/powder-s#describedby \n", - "9 https://d-nb.info/standards/elementset/gnd#pre... \n", - "10 https://d-nb.info/standards/elementset/gnd#gnd... \n", - "11 https://d-nb.info/standards/elementset/gnd#gnd... \n", + "7 https://d-nb.info/standards/elementset/gnd#pre... \n", + "8 https://d-nb.info/standards/elementset/gnd#old... \n", + "9 https://d-nb.info/standards/elementset/gnd#var... \n", + "10 https://d-nb.info/standards/elementset/gnd#var... \n", + "11 http://purl.org/dc/terms/modified \n", "\n", " 2 \n", - "0 (DE-588c)4038243-6 \n", - "1 http://creativecommons.org/publicdomain/zero/1.0/ \n", - "2 https://d-nb.info/standards/elementset/gnd#Sub... \n", - "3 Heilkunst \n", - "4 http://dewey.info/class/610/ \n", - "5 2022-04-15T15:15:00 \n", + "0 https://d-nb.info/standards/vocab/gnd/gnd-sc#2... \n", + "1 4038243-6 \n", + "2 http://dewey.info/class/610/ \n", + "3 https://d-nb.info/standards/elementset/gnd#Sub... \n", + "4 http://creativecommons.org/publicdomain/zero/1.0/ \n", + "5 https://d-nb.info/gnd/4038243-6/about \n", "6 http://www.wikidata.org/entity/Q11190 \n", - "7 Humanmedizin \n", - "8 https://d-nb.info/gnd/4038243-6/about \n", - "9 Medizin \n", - "10 4038243-6 \n", - "11 https://d-nb.info/standards/vocab/gnd/gnd-sc#2... " + "7 Medizin \n", + "8 (DE-588c)4038243-6 \n", + "9 Humanmedizin \n", + "10 Heilkunst \n", + "11 2022-04-15T15:15:00 " ] }, "execution_count": 6, @@ -1498,27 +1454,27 @@ " \n", " \n", " 0\n", - " https://d-nb.info/standards/elementset/gnd#old...\n", + " https://d-nb.info/standards/elementset/gnd#gnd...\n", " \n", " \n", " 1\n", - " http://purl.org/dc/terms/license\n", + " https://d-nb.info/standards/elementset/gnd#gnd...\n", " \n", " \n", " 2\n", - " http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", + " https://d-nb.info/standards/elementset/gnd#rel...\n", " \n", " \n", " 3\n", - " https://d-nb.info/standards/elementset/gnd#var...\n", + " http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", " \n", " \n", " 4\n", - " https://d-nb.info/standards/elementset/gnd#rel...\n", + " http://purl.org/dc/terms/license\n", " \n", " \n", " 5\n", - " http://purl.org/dc/terms/modified\n", + " http://www.w3.org/2007/05/powder-s#describedby\n", " \n", " \n", " 6\n", @@ -1526,19 +1482,19 @@ " \n", " \n", " 7\n", - " http://www.w3.org/2007/05/powder-s#describedby\n", + " https://d-nb.info/standards/elementset/gnd#pre...\n", " \n", " \n", " 8\n", - " https://d-nb.info/standards/elementset/gnd#pre...\n", + " https://d-nb.info/standards/elementset/gnd#old...\n", " \n", " \n", " 9\n", - " https://d-nb.info/standards/elementset/gnd#gnd...\n", + " https://d-nb.info/standards/elementset/gnd#var...\n", " \n", " \n", " 10\n", - " https://d-nb.info/standards/elementset/gnd#gnd...\n", + " http://purl.org/dc/terms/modified\n", " \n", " \n", "\n", @@ -1546,17 +1502,17 @@ ], "text/plain": [ " 0\n", - "0 https://d-nb.info/standards/elementset/gnd#old...\n", - "1 http://purl.org/dc/terms/license\n", - "2 http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", - "3 https://d-nb.info/standards/elementset/gnd#var...\n", - "4 https://d-nb.info/standards/elementset/gnd#rel...\n", - "5 http://purl.org/dc/terms/modified\n", + "0 https://d-nb.info/standards/elementset/gnd#gnd...\n", + "1 https://d-nb.info/standards/elementset/gnd#gnd...\n", + "2 https://d-nb.info/standards/elementset/gnd#rel...\n", + "3 http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", + "4 http://purl.org/dc/terms/license\n", + "5 http://www.w3.org/2007/05/powder-s#describedby\n", "6 http://www.w3.org/2002/07/owl#sameAs\n", - "7 http://www.w3.org/2007/05/powder-s#describedby\n", - "8 https://d-nb.info/standards/elementset/gnd#pre...\n", - "9 https://d-nb.info/standards/elementset/gnd#gnd...\n", - "10 https://d-nb.info/standards/elementset/gnd#gnd..." + "7 https://d-nb.info/standards/elementset/gnd#pre...\n", + "8 https://d-nb.info/standards/elementset/gnd#old...\n", + "9 https://d-nb.info/standards/elementset/gnd#var...\n", + "10 http://purl.org/dc/terms/modified" ] }, "execution_count": 7, @@ -1612,17 +1568,26 @@ " \n", " \n", " \n", + " 0\n", " \n", " \n", " \n", + " \n", + " 0\n", + " Humanmedizin\n", + " \n", + " \n", + " 1\n", + " Heilkunst\n", + " \n", " \n", "\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" + " 0\n", + "0 Humanmedizin\n", + "1 Heilkunst" ] }, "execution_count": 8, @@ -1635,11 +1600,9 @@ "g=Graph()\n", "g.parse(medicine_rdf)\n", "properties = g.query('''\n", - " PREFIX gndo: \n", - " SELECT ?o \n", + " SELECT ?o\n", " WHERE {\n", " ?s gndo:variantNameForTheSubjectHeading ?o .\n", - " #?s gndo:preferredNameForTheSubjectHeading ?o .\n", " }\n", "''')\n", "df_medicine = pd.DataFrame(properties)\n", @@ -1691,177 +1654,177 @@ " \n", " \n", " 1\n", - " https://d-nb.info/standards/vocab/gnd/gnd-sc#1...\n", + " https://d-nb.info/standards/vocab/gnd/geograph...\n", " 1\n", " \n", " \n", " 2\n", - " https://d-nb.info/gnd/4046517-2\n", + " https://d-nb.info/gnd/1243925787\n", " 1\n", " \n", " \n", " 3\n", - " https://d-nb.info/gnd/4077258-5\n", + " http://creativecommons.org/publicdomain/zero/1.0/\n", " 1\n", " \n", " \n", " 4\n", - " Kreisky, Bruno\n", + " https://d-nb.info/gnd/1243925787\n", " 1\n", " \n", " \n", " 5\n", - " https://d-nb.info/standards/elementset/gnd#Dif...\n", + " https://d-nb.info/gnd/118566512/about\n", " 1\n", " \n", " \n", " 6\n", - " http://viaf.org/viaf/31998484\n", + " http://dbpedia.org/resource/Bruno_Kreisky\n", " 1\n", " \n", " \n", " 7\n", - " https://de.wikipedia.org/wiki/Bruno_Kreisky\n", + " http://id.loc.gov/rwo/agents/n50043948\n", " 1\n", " \n", " \n", " 8\n", - " http://dbpedia.org/resource/Bruno_Kreisky\n", + " Bruno\n", " 1\n", " \n", " \n", " 9\n", - " https://d-nb.info/gnd/118566512/about\n", + " https://d-nb.info/gnd/4077258-5\n", " 1\n", " \n", " \n", " 10\n", - " https://d-nb.info/standards/vocab/gnd/geograph...\n", + " https://d-nb.info/gnd/4046517-2\n", " 1\n", " \n", " \n", " 11\n", - " Bundeskanzler 1970-1983\n", + " Kreisky\n", " 1\n", " \n", " \n", " 12\n", - " http://www.filmportal.de/person/5B113A52F8F14A...\n", + " (DE-588a)118566512\n", " 1\n", " \n", " \n", " 13\n", - " (DE-588a)118566512\n", + " http://www.filmportal.de/person/5B113A52F8F14A...\n", " 1\n", " \n", " \n", " 14\n", - " https://d-nb.info/standards/vocab/gnd/geograph...\n", + " Politiker <SPÖ>\n", " 1\n", " \n", " \n", " 15\n", - " (DE-588)1243925787\n", + " https://d-nb.info/standards/elementset/gnd#Dif...\n", " 1\n", " \n", " \n", " 16\n", - " Bundeskanzler <Österreich>\n", + " https://d-nb.info/standards/vocab/gnd/geograph...\n", " 1\n", " \n", " \n", " 17\n", - " Politiker <SPÖ>\n", + " http://www.wikidata.org/entity/Q44517\n", " 1\n", " \n", " \n", " 18\n", - " Politiker, Oesterreich\n", + " https://d-nb.info/standards/vocab/gnd/gender#male\n", " 1\n", " \n", " \n", " 19\n", - " https://d-nb.info/gnd/1243925787\n", + " (DE-588c)4032993-8\n", " 1\n", " \n", " \n", " 20\n", - " (DE-588c)4032993-8\n", + " https://d-nb.info/gnd/2029382-3\n", " 1\n", " \n", " \n", " 21\n", - " N62cf47e5c82e4e76be8bd9fc96aa1bf9\n", + " Politiker, Oesterreich\n", " 1\n", " \n", " \n", " 22\n", - " http://isni.org/isni/0000000112608767\n", + " Kreisky, Bruno\n", " 1\n", " \n", " \n", " 23\n", - " https://d-nb.info/gnd/121036073\n", + " 1990-07-29\n", " 1\n", " \n", " \n", " 24\n", - " https://d-nb.info/gnd/2029382-3\n", + " 118566512\n", " 1\n", " \n", " \n", " 25\n", - " 118566512\n", + " 2021-12-15T00:31:22\n", " 1\n", " \n", " \n", " 26\n", - " Bruno\n", + " https://de.wikipedia.org/wiki/Bruno_Kreisky\n", " 1\n", " \n", " \n", " 27\n", - " https://d-nb.info/standards/vocab/gnd/gender#male\n", + " http://isni.org/isni/0000000112608767\n", " 1\n", " \n", " \n", " 28\n", - " http://id.loc.gov/rwo/agents/n50043948\n", + " https://d-nb.info/gnd/121036073\n", " 1\n", " \n", " \n", " 29\n", - " 1911-01-22\n", + " https://d-nb.info/standards/vocab/gnd/gnd-sc#1...\n", " 1\n", " \n", " \n", " 30\n", - " 2021-12-15T00:31:22\n", + " 1911-01-22\n", " 1\n", " \n", " \n", " 31\n", - " http://creativecommons.org/publicdomain/zero/1.0/\n", + " (DE-588)1243925787\n", " 1\n", " \n", " \n", " 32\n", - " http://www.wikidata.org/entity/Q44517\n", + " N2b6f2eae924147b6ae0a96cfbdd2dfba\n", " 1\n", " \n", " \n", " 33\n", - " 1990-07-29\n", + " http://viaf.org/viaf/31998484\n", " 1\n", " \n", " \n", " 34\n", - " https://d-nb.info/gnd/1243925787\n", + " Bundeskanzler <Österreich>\n", " 1\n", " \n", " \n", " 35\n", - " Kreisky\n", + " Bundeskanzler 1970-1983\n", " 1\n", " \n", " \n", @@ -1871,41 +1834,41 @@ "text/plain": [ " 0 1\n", "0 https://d-nb.info/gnd/4066009-6 2\n", - "1 https://d-nb.info/standards/vocab/gnd/gnd-sc#1... 1\n", - "2 https://d-nb.info/gnd/4046517-2 1\n", - "3 https://d-nb.info/gnd/4077258-5 1\n", - "4 Kreisky, Bruno 1\n", - "5 https://d-nb.info/standards/elementset/gnd#Dif... 1\n", - "6 http://viaf.org/viaf/31998484 1\n", - "7 https://de.wikipedia.org/wiki/Bruno_Kreisky 1\n", - "8 http://dbpedia.org/resource/Bruno_Kreisky 1\n", - "9 https://d-nb.info/gnd/118566512/about 1\n", - "10 https://d-nb.info/standards/vocab/gnd/geograph... 1\n", - "11 Bundeskanzler 1970-1983 1\n", - "12 http://www.filmportal.de/person/5B113A52F8F14A... 1\n", - "13 (DE-588a)118566512 1\n", - "14 https://d-nb.info/standards/vocab/gnd/geograph... 1\n", - "15 (DE-588)1243925787 1\n", - "16 Bundeskanzler 1\n", - "17 Politiker 1\n", - "18 Politiker, Oesterreich 1\n", - "19 https://d-nb.info/gnd/1243925787 1\n", - "20 (DE-588c)4032993-8 1\n", - "21 N62cf47e5c82e4e76be8bd9fc96aa1bf9 1\n", - "22 http://isni.org/isni/0000000112608767 1\n", - "23 https://d-nb.info/gnd/121036073 1\n", - "24 https://d-nb.info/gnd/2029382-3 1\n", - "25 118566512 1\n", - "26 Bruno 1\n", - "27 https://d-nb.info/standards/vocab/gnd/gender#male 1\n", - "28 http://id.loc.gov/rwo/agents/n50043948 1\n", - "29 1911-01-22 1\n", - "30 2021-12-15T00:31:22 1\n", - "31 http://creativecommons.org/publicdomain/zero/1.0/ 1\n", - "32 http://www.wikidata.org/entity/Q44517 1\n", - "33 1990-07-29 1\n", - "34 https://d-nb.info/gnd/1243925787 1\n", - "35 Kreisky 1" + "1 https://d-nb.info/standards/vocab/gnd/geograph... 1\n", + "2 https://d-nb.info/gnd/1243925787 1\n", + "3 http://creativecommons.org/publicdomain/zero/1.0/ 1\n", + "4 https://d-nb.info/gnd/1243925787 1\n", + "5 https://d-nb.info/gnd/118566512/about 1\n", + "6 http://dbpedia.org/resource/Bruno_Kreisky 1\n", + "7 http://id.loc.gov/rwo/agents/n50043948 1\n", + "8 Bruno 1\n", + "9 https://d-nb.info/gnd/4077258-5 1\n", + "10 https://d-nb.info/gnd/4046517-2 1\n", + "11 Kreisky 1\n", + "12 (DE-588a)118566512 1\n", + "13 http://www.filmportal.de/person/5B113A52F8F14A... 1\n", + "14 Politiker 1\n", + "15 https://d-nb.info/standards/elementset/gnd#Dif... 1\n", + "16 https://d-nb.info/standards/vocab/gnd/geograph... 1\n", + "17 http://www.wikidata.org/entity/Q44517 1\n", + "18 https://d-nb.info/standards/vocab/gnd/gender#male 1\n", + "19 (DE-588c)4032993-8 1\n", + "20 https://d-nb.info/gnd/2029382-3 1\n", + "21 Politiker, Oesterreich 1\n", + "22 Kreisky, Bruno 1\n", + "23 1990-07-29 1\n", + "24 118566512 1\n", + "25 2021-12-15T00:31:22 1\n", + "26 https://de.wikipedia.org/wiki/Bruno_Kreisky 1\n", + "27 http://isni.org/isni/0000000112608767 1\n", + "28 https://d-nb.info/gnd/121036073 1\n", + "29 https://d-nb.info/standards/vocab/gnd/gnd-sc#1... 1\n", + "30 1911-01-22 1\n", + "31 (DE-588)1243925787 1\n", + "32 N2b6f2eae924147b6ae0a96cfbdd2dfba 1\n", + "33 http://viaf.org/viaf/31998484 1\n", + "34 Bundeskanzler 1\n", + "35 Bundeskanzler 1970-1983 1" ] }, "execution_count": 9, @@ -1981,102 +1944,102 @@ " \n", " \n", " 3\n", - " https://d-nb.info/standards/elementset/gnd#bio...\n", + " https://d-nb.info/standards/elementset/gnd#pro...\n", " 2\n", " \n", " \n", " 4\n", - " https://d-nb.info/standards/elementset/gnd#pro...\n", + " https://d-nb.info/standards/elementset/gnd#bio...\n", " 2\n", " \n", " \n", " 5\n", - " https://d-nb.info/standards/elementset/gnd#gnd...\n", + " https://d-nb.info/standards/elementset/dnb#dep...\n", " 1\n", " \n", " \n", " 6\n", - " https://d-nb.info/standards/elementset/gnd#pro...\n", + " http://purl.org/dc/terms/license\n", " 1\n", " \n", " \n", " 7\n", - " https://d-nb.info/standards/elementset/gnd#pla...\n", + " https://d-nb.info/standards/elementset/gnd#for...\n", " 1\n", " \n", " \n", " 8\n", - " https://d-nb.info/standards/elementset/gnd#pre...\n", + " http://www.w3.org/2007/05/powder-s#describedby\n", " 1\n", " \n", " \n", " 9\n", - " http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", + " https://d-nb.info/standards/elementset/gnd#pla...\n", " 1\n", " \n", " \n", " 10\n", - " http://xmlns.com/foaf/0.1/page\n", + " https://d-nb.info/standards/elementset/gnd#pro...\n", " 1\n", " \n", " \n", " 11\n", - " https://d-nb.info/standards/elementset/gnd#sur...\n", + " https://d-nb.info/standards/elementset/gnd#pla...\n", " 1\n", " \n", " \n", " 12\n", - " http://www.w3.org/2007/05/powder-s#describedby\n", + " http://www.w3.org/1999/02/22-rdf-syntax-ns#type\n", " 1\n", " \n", " \n", " 13\n", - " https://d-nb.info/standards/elementset/gnd#fam...\n", + " https://d-nb.info/standards/elementset/gnd#pla...\n", " 1\n", " \n", " \n", " 14\n", - " https://d-nb.info/standards/elementset/gnd#aff...\n", + " https://d-nb.info/standards/elementset/gnd#gender\n", " 1\n", " \n", " \n", " 15\n", - " https://d-nb.info/standards/elementset/gnd#pre...\n", + " https://d-nb.info/standards/elementset/gnd#aff...\n", " 1\n", " \n", " \n", " 16\n", - " https://d-nb.info/standards/elementset/gnd#gnd...\n", + " https://d-nb.info/standards/elementset/gnd#pre...\n", " 1\n", " \n", " \n", " 17\n", - " https://d-nb.info/standards/elementset/gnd#gender\n", + " https://d-nb.info/standards/elementset/gnd#dat...\n", " 1\n", " \n", " \n", " 18\n", - " https://d-nb.info/standards/elementset/gnd#pla...\n", + " https://d-nb.info/standards/elementset/gnd#gnd...\n", " 1\n", " \n", " \n", " 19\n", - " https://d-nb.info/standards/elementset/gnd#for...\n", + " http://purl.org/dc/terms/modified\n", " 1\n", " \n", " \n", " 20\n", - " https://d-nb.info/standards/elementset/gnd#dat...\n", + " http://xmlns.com/foaf/0.1/page\n", " 1\n", " \n", " \n", " 21\n", - " http://purl.org/dc/terms/modified\n", + " https://d-nb.info/standards/elementset/gnd#fam...\n", " 1\n", " \n", " \n", " 22\n", - " http://purl.org/dc/terms/license\n", + " https://d-nb.info/standards/elementset/gnd#gnd...\n", " 1\n", " \n", " \n", @@ -2086,12 +2049,12 @@ " \n", " \n", " 24\n", - " https://d-nb.info/standards/elementset/dnb#dep...\n", + " https://d-nb.info/standards/elementset/gnd#pre...\n", " 1\n", " \n", " \n", " 25\n", - " https://d-nb.info/standards/elementset/gnd#pla...\n", + " https://d-nb.info/standards/elementset/gnd#sur...\n", " 1\n", " \n", " \n", @@ -2103,29 +2066,29 @@ "0 http://www.w3.org/2002/07/owl#sameAs 7\n", "1 https://d-nb.info/standards/elementset/gnd#old... 3\n", "2 https://d-nb.info/standards/elementset/gnd#geo... 2\n", - "3 https://d-nb.info/standards/elementset/gnd#bio... 2\n", - "4 https://d-nb.info/standards/elementset/gnd#pro... 2\n", - "5 https://d-nb.info/standards/elementset/gnd#gnd... 1\n", - "6 https://d-nb.info/standards/elementset/gnd#pro... 1\n", - "7 https://d-nb.info/standards/elementset/gnd#pla... 1\n", - "8 https://d-nb.info/standards/elementset/gnd#pre... 1\n", - "9 http://www.w3.org/1999/02/22-rdf-syntax-ns#type 1\n", - "10 http://xmlns.com/foaf/0.1/page 1\n", - "11 https://d-nb.info/standards/elementset/gnd#sur... 1\n", - "12 http://www.w3.org/2007/05/powder-s#describedby 1\n", - "13 https://d-nb.info/standards/elementset/gnd#fam... 1\n", - "14 https://d-nb.info/standards/elementset/gnd#aff... 1\n", - "15 https://d-nb.info/standards/elementset/gnd#pre... 1\n", - "16 https://d-nb.info/standards/elementset/gnd#gnd... 1\n", - "17 https://d-nb.info/standards/elementset/gnd#gender 1\n", - "18 https://d-nb.info/standards/elementset/gnd#pla... 1\n", - "19 https://d-nb.info/standards/elementset/gnd#for... 1\n", - "20 https://d-nb.info/standards/elementset/gnd#dat... 1\n", - "21 http://purl.org/dc/terms/modified 1\n", - "22 http://purl.org/dc/terms/license 1\n", + "3 https://d-nb.info/standards/elementset/gnd#pro... 2\n", + "4 https://d-nb.info/standards/elementset/gnd#bio... 2\n", + "5 https://d-nb.info/standards/elementset/dnb#dep... 1\n", + "6 http://purl.org/dc/terms/license 1\n", + "7 https://d-nb.info/standards/elementset/gnd#for... 1\n", + "8 http://www.w3.org/2007/05/powder-s#describedby 1\n", + "9 https://d-nb.info/standards/elementset/gnd#pla... 1\n", + "10 https://d-nb.info/standards/elementset/gnd#pro... 1\n", + "11 https://d-nb.info/standards/elementset/gnd#pla... 1\n", + "12 http://www.w3.org/1999/02/22-rdf-syntax-ns#type 1\n", + "13 https://d-nb.info/standards/elementset/gnd#pla... 1\n", + "14 https://d-nb.info/standards/elementset/gnd#gender 1\n", + "15 https://d-nb.info/standards/elementset/gnd#aff... 1\n", + "16 https://d-nb.info/standards/elementset/gnd#pre... 1\n", + "17 https://d-nb.info/standards/elementset/gnd#dat... 1\n", + "18 https://d-nb.info/standards/elementset/gnd#gnd... 1\n", + "19 http://purl.org/dc/terms/modified 1\n", + "20 http://xmlns.com/foaf/0.1/page 1\n", + "21 https://d-nb.info/standards/elementset/gnd#fam... 1\n", + "22 https://d-nb.info/standards/elementset/gnd#gnd... 1\n", "23 https://d-nb.info/standards/elementset/gnd#dat... 1\n", - "24 https://d-nb.info/standards/elementset/dnb#dep... 1\n", - "25 https://d-nb.info/standards/elementset/gnd#pla... 1" + "24 https://d-nb.info/standards/elementset/gnd#pre... 1\n", + "25 https://d-nb.info/standards/elementset/gnd#sur... 1" ] }, "execution_count": 10, @@ -2245,6 +2208,13 @@ "df_kreisky = pd.DataFrame(properties)\n", "df_kreisky" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -2264,7 +2234,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.0" } }, "nbformat": 4, diff --git a/2.1 - SRU & LOD.ipynb b/2.1 - SRU & LOD.ipynb index e641351..a7ce107 100644 --- a/2.1 - SRU & LOD.ipynb +++ b/2.1 - SRU & LOD.ipynb @@ -134,7 +134,7 @@ "output_type": "stream", "text": [ "https://open-na.hosted.exlibrisgroup.com/alma/43ACC_ONB/bf/entity/instance/990030217420603338\n", - "b'\\n\\n \\n \\n \\n \\n DLC marc2bibframe2 v1.8.0-SNAPSHOT\\n 2022-06-21T11:35:48+00:00\\n \\n \\n \\n \\n new\\n \\n \\n \\n \\n isbd\\n \\n \\n \\n \\n 990098715250203331\\n \\n \\n DLC\\n \\n \\n \\n \\n 2018-01-23T08:43:00\\n 2000-01-01\\n \\n \\n ONB\\n \\n \\n \\n \\n \\n \\n \\n AT-OBV\\n \\n \\n \\n \\n pi\\n \\n \\n \\n \\n \\n \\n \\n text\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n \\n \\n \\n \\n \\n Drama\\n Drama\\n \\n \\n bellobv\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n Goedsche, Friedrich Wilhelm\\n 7102 $aGoedsche, Friedrich Wilhelm$4pbl\\n Goedsche, Friedrich Wilhelm\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n Ein erlustirend historisch-rührendes Familiengemälde mit Erscheinungen und vollstimmigen Chören von Baschkiren und Cosaken, und allen Batterien der Deutschen\\n \\n \\n \\n \\n \\n \\n http://data.onb.ac.at/imgk/AZ00308934SZ00220134SZ00628562\\n \\n \\n Zettel\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n Ein erlustirend historisch-rührendes Familiengemälde mit Erscheinungen und vollstimmigen Chören von Baschkiren und Cosaken, und allen Batterien der Deutschen\\n \\n \\n \\n \\n \\n \\n http://data.onb.ac.at/ABO/%2BZ182067107\\n \\n \\n Volltext\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n Ein erlustirend historisch-rührendes Familiengemälde mit Erscheinungen und vollstimmigen Chören von Baschkiren und Cosaken, und allen Batterien der Deutschen\\n \\n \\n \\n \\n \\n \\n http://data.onb.ac.at/ABO/%2BZ199052304\\n \\n \\n Volltext\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n computer\\n \\n \\n \\n \\n \\n online resource\\n \\n \\n \\n \\n \\n 1814/####\\n \\n \\n \\n \\n AC09865194\\n \\n \\n \\n \\n \\n \\n \\n AC09865194\\n \\n \\n \\n \\n 009871525ACC01\\n \\n \\n \\n \\n \\n \\n \\n OBVAC09865194\\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n Ein erlustirend historisch-rührendes Familiengemälde mit Erscheinungen und vollstimmigen Chören von Baschkiren und Cosaken, und allen Batterien der Deutschen\\n \\n \\n \\n \\n \\n \\n \\n [Meißen]\\n \\n \\n \\n \\n [Gödsche]\\n \\n \\n 1814\\n \\n \\n \\n \\n 32 S.\\n \\n \\n \\n \\n\\n'\n" + "b'\\n\\n \\n \\n \\n \\n DLC marc2bibframe2 v1.8.0-SNAPSHOT\\n 2022-07-04T13:51:20+00:00\\n \\n \\n \\n \\n new\\n \\n \\n \\n \\n isbd\\n \\n \\n \\n \\n 990098715250203331\\n \\n \\n DLC\\n \\n \\n \\n \\n 2018-01-23T08:43:00\\n 2000-01-01\\n \\n \\n ONB\\n \\n \\n \\n \\n \\n \\n \\n AT-OBV\\n \\n \\n \\n \\n pi\\n \\n \\n \\n \\n \\n \\n \\n text\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n \\n \\n \\n \\n \\n Drama\\n Drama\\n \\n \\n bellobv\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n Goedsche, Friedrich Wilhelm\\n 7102 $aGoedsche, Friedrich Wilhelm$4pbl\\n Goedsche, Friedrich Wilhelm\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n Ein erlustirend historisch-rührendes Familiengemälde mit Erscheinungen und vollstimmigen Chören von Baschkiren und Cosaken, und allen Batterien der Deutschen\\n \\n \\n \\n \\n \\n \\n http://data.onb.ac.at/imgk/AZ00308934SZ00220134SZ00628562\\n \\n \\n Zettel\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n Ein erlustirend historisch-rührendes Familiengemälde mit Erscheinungen und vollstimmigen Chören von Baschkiren und Cosaken, und allen Batterien der Deutschen\\n \\n \\n \\n \\n \\n \\n http://data.onb.ac.at/ABO/%2BZ182067107\\n \\n \\n Volltext\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n Ein erlustirend historisch-rührendes Familiengemälde mit Erscheinungen und vollstimmigen Chören von Baschkiren und Cosaken, und allen Batterien der Deutschen\\n \\n \\n \\n \\n \\n \\n http://data.onb.ac.at/ABO/%2BZ199052304\\n \\n \\n Volltext\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n \\n computer\\n \\n \\n \\n \\n \\n online resource\\n \\n \\n \\n \\n \\n 1814/####\\n \\n \\n \\n \\n AC09865194\\n \\n \\n \\n \\n \\n \\n \\n AC09865194\\n \\n \\n \\n \\n 009871525ACC01\\n \\n \\n \\n \\n \\n \\n \\n OBVAC09865194\\n \\n \\n \\n \\n \\n \\n \\n <<Die>> Flucht über den Rhein odar Das unverhoffte Wiedersehen\\n Ein erlustirend historisch-rührendes Familiengemälde mit Erscheinungen und vollstimmigen Chören von Baschkiren und Cosaken, und allen Batterien der Deutschen\\n \\n \\n \\n \\n \\n \\n \\n [Meißen]\\n \\n \\n \\n \\n [Gödsche]\\n \\n \\n 1814\\n \\n \\n \\n \\n 32 S.\\n \\n \\n \\n \\n\\n'\n" ] } ], @@ -226,7 +226,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.0" } }, "nbformat": 4, diff --git a/2.3 - SPARQL.ipynb b/2.3 - SPARQL.ipynb index 2e65987..0183a3b 100644 --- a/2.3 - SPARQL.ipynb +++ b/2.3 - SPARQL.ipynb @@ -463,7 +463,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.0" } }, "nbformat": 4, diff --git a/3 - Images and Text.ipynb b/3 - Images and Text.ipynb index d7127a5..33920a4 100644 --- a/3 - Images and Text.ipynb +++ b/3 - Images and Text.ipynb @@ -287,7 +287,8 @@ " * OCR (Optical Character Recognition) data representation format\n", " * XML Schema\n", " * [https://github.com/altoxml](https://github.com/altoxml)\n", - " * [https://www.loc.gov/standards/alto/](https://www.loc.gov/standards/alto/)" + " * [https://www.loc.gov/standards/alto/](https://www.loc.gov/standards/alto/)\n", + " * e.g. https://iiif.onb.ac.at/presentation/ANNO/wrz17030808/resource/00000010.xml" ] }, { @@ -329,8 +330,7 @@ "source": [ "* hOCR\n", " * alternative to ALTO\n", - " * based on XHTML\n", - " * not used in the ONB Labs" + " * based on XHTML" ] } ], @@ -351,7 +351,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.0" } }, "nbformat": 4, diff --git a/3.3 - Text - Download OCR Text.ipynb b/3.3 - Text - Download OCR Text.ipynb index d458396..a80b4ff 100644 --- a/3.3 - Text - Download OCR Text.ipynb +++ b/3.3 - Text - Download OCR Text.ipynb @@ -123,224 +123,224 @@ " \n", " \n", " \n", - " 61892\n", - " fdb18500707\n", - " fdb\n", - " 1850\n", - " 18500707\n", - " Fremden-Blatt\n", + " 146039\n", + " etg18140311\n", + " etg\n", + " 1814\n", + " 18140311\n", + " Hellenikos telegraphos\n", " NaN\n", - " Reise\n", + " Tageszeitung\n", " Wien\n", - " de\n", + " el\n", " newspaper\n", " zeitungen\n", " anno\n", - " 2014-07-21 10:57:07\n", + " 2003-10-23 17:34:04\n", " 0\n", - " 1850-07-07\n", + " 1814-03-11\n", " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=fdb...\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=etg...\n", " 1\n", " 4\n", " \n", " \n", - " 227880\n", - " wtz18550130\n", - " wtz\n", - " 1855\n", - " 18550130\n", - " Theaterzettel der beiden k.k. Hoftheater und d...\n", + " 210148\n", + " vvb18671029\n", + " vvb\n", + " 1867\n", + " 18671029\n", + " Vorarlberger Volksblatt\n", " NaN\n", - " Kultur; Kunst; Theater; Musik\n", - " Wien\n", + " Tageszeitung\n", + " Bregenz\n", " de\n", " newspaper\n", " zeitungen\n", " anno\n", - " 2014-03-03 14:34:58\n", + " 2010-12-20 21:31:28\n", " 0\n", - " 1855-01-30\n", + " 1867-10-29\n", " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=wtz...\n", - " 0\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=vvb...\n", " 1\n", + " 4\n", " \n", " \n", - " 140105\n", - " alf18540819\n", - " alf\n", - " 1854\n", - " 18540819\n", - " Allgemeine land- und forstwirthschaftliche Zei...\n", + " 237952\n", + " wtz17960804\n", + " wtz\n", + " 1796\n", + " 17960804\n", + " Theaterzettel der beiden k.k. Hoftheater und d...\n", " NaN\n", - " Landwirtschaft\n", + " Kultur; Kunst; Theater; Musik\n", " Wien\n", " de\n", " newspaper\n", " zeitungen\n", " anno\n", - " 2004-12-15 15:27:06\n", + " 2020-05-28 08:28:00\n", " 0\n", - " 1854-08-19\n", + " 1796-08-04\n", " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=alf...\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=wtz...\n", + " 1\n", " 1\n", - " 8\n", " \n", " \n", - " 4828\n", - " fug15961030\n", - " fug\n", - " 1596\n", - " 15961030\n", - " Fugger - Zeitungen\n", + " 72359\n", + " fkz18730125\n", + " fkz\n", + " 1873\n", + " 18730125\n", + " Feldkircher Zeitung\n", " NaN\n", " Tageszeitung\n", - " o.O.\n", + " Feldkirch\n", " de\n", " newspaper\n", - " brz\n", + " zeitungen\n", " anno\n", - " 2013-06-27 13:28:36\n", - " 1\n", - " 1596-10-30\n", - " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=fug...\n", + " 2012-07-24 09:47:15\n", " 0\n", - " 3\n", + " 1873-01-25\n", + " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=fkz...\n", + " 1\n", + " 4\n", " \n", " \n", - " 226377\n", - " wtz18500416\n", - " wtz\n", - " 1850\n", - " 18500416\n", - " Theaterzettel der beiden k.k. Hoftheater und d...\n", + " 75047\n", + " ode18571013\n", + " ode\n", + " 1857\n", + " 18571013\n", + " Ost-Deutsche Post\n", " NaN\n", - " Kultur; Kunst; Theater; Musik\n", + " Tageszeitung\n", " Wien\n", " de\n", " newspaper\n", " zeitungen\n", " anno\n", - " 2010-12-21 02:44:49\n", + " 2018-08-29 08:46:08\n", " 0\n", - " 1850-04-16\n", + " 1857-10-13\n", " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=wtz...\n", - " 0\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=ode...\n", " 1\n", + " 4\n", " \n", " \n", - " 1888\n", - " fug15830226\n", - " fug\n", - " 1583\n", - " 15830226\n", - " Fugger - Zeitungen\n", + " 123381\n", + " vtl18701116\n", + " vtl\n", + " 1870\n", + " 18701116\n", + " Das Vaterland\n", " NaN\n", " Tageszeitung\n", - " o.O.\n", + " Wien\n", " de\n", " newspaper\n", - " brz\n", + " zeitungen\n", " anno\n", - " 2013-06-27 13:28:14\n", - " 1\n", - " 1583-02-26\n", - " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=fug...\n", + " 2010-12-17 20:19:06\n", " 0\n", - " 9\n", + " 1870-11-16\n", + " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=vtl...\n", + " 1\n", + " 2\n", " \n", " \n", - " 248861\n", - " wrz18131201\n", - " wrz\n", - " 1813\n", - " 18131201\n", - " Wiener Zeitung\n", + " 146745\n", + " etg18221119\n", + " etg\n", + " 1822\n", + " 18221119\n", + " Hellenikos telegraphos\n", " NaN\n", " Tageszeitung\n", " Wien\n", - " de\n", + " el\n", " newspaper\n", " zeitungen\n", " anno\n", - " 2010-12-28 12:06:27\n", + " 2003-10-23 17:36:09\n", " 0\n", - " 1813-12-01\n", + " 1822-11-19\n", " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=wrz...\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=etg...\n", " 1\n", - " 10\n", + " 6\n", " \n", " \n", - " 217639\n", - " wtz18211105\n", - " wtz\n", - " 1821\n", - " 18211105\n", - " Theaterzettel der beiden k.k. Hoftheater und d...\n", + " 205203\n", + " tpt18800108\n", + " tpt\n", + " 1880\n", + " 18800108\n", + " (Linzer) Tages-Post\n", " NaN\n", - " Kultur; Kunst; Theater; Musik\n", - " Wien\n", + " Tageszeitung\n", + " Linz\n", " de\n", " newspaper\n", " zeitungen\n", " anno\n", - " 2010-12-21 02:35:26\n", + " 2011-12-14 10:05:52\n", " 0\n", - " 1821-11-05\n", + " 1880-01-08\n", " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=wtz...\n", - " 0\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=tpt...\n", " 1\n", + " 6\n", " \n", " \n", - " 161824\n", - " ibn18690929\n", - " ibn\n", - " 1869\n", - " 18690929\n", - " Innsbrucker Nachrichten\n", + " 111021\n", + " ptb18771119\n", + " ptb\n", + " 1877\n", + " 18771119\n", + " Prager Tagblatt\n", " NaN\n", " Tageszeitung\n", - " Innsbruck\n", + " Praha (Prag)\n", " de\n", " newspaper\n", " zeitungen\n", " anno\n", - " 2004-02-05 08:58:40\n", + " 2010-12-14 13:16:10\n", " 0\n", - " 1869-09-29\n", + " 1877-11-19\n", " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=ibn...\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=ptb...\n", " 1\n", - " 8\n", + " 6\n", " \n", " \n", - " 43054\n", - " bor17760803\n", - " bor\n", - " 1776\n", - " 17760803\n", - " Amtliches Cursblatt der Wiener Börse\n", + " 167555\n", + " kfz18220804\n", + " kfz\n", + " 1822\n", + " 18220804\n", + " Klagenfurter Zeitung\n", " NaN\n", - " Wirtschaft\n", - " Wien\n", + " Tageszeitung\n", + " Klagenfurt\n", " de\n", " newspaper\n", " zeitungen\n", " anno\n", - " 2013-04-23 16:28:58\n", + " 2003-11-25 14:48:06\n", " 0\n", - " 1776-08-03\n", + " 1822-08-04\n", " http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...\n", - " http://anno.onb.ac.at/cgi-content/anno?aid=bor...\n", - " 0\n", + " http://anno.onb.ac.at/cgi-content/anno?aid=kfz...\n", " 1\n", + " 16\n", " \n", " \n", "\n", @@ -348,76 +348,76 @@ ], "text/plain": [ " manifest_id aid year day \\\n", - "61892 fdb18500707 fdb 1850 18500707 \n", - "227880 wtz18550130 wtz 1855 18550130 \n", - "140105 alf18540819 alf 1854 18540819 \n", - "4828 fug15961030 fug 1596 15961030 \n", - "226377 wtz18500416 wtz 1850 18500416 \n", - "1888 fug15830226 fug 1583 15830226 \n", - "248861 wrz18131201 wrz 1813 18131201 \n", - "217639 wtz18211105 wtz 1821 18211105 \n", - "161824 ibn18690929 ibn 1869 18690929 \n", - "43054 bor17760803 bor 1776 17760803 \n", + "146039 etg18140311 etg 1814 18140311 \n", + "210148 vvb18671029 vvb 1867 18671029 \n", + "237952 wtz17960804 wtz 1796 17960804 \n", + "72359 fkz18730125 fkz 1873 18730125 \n", + "75047 ode18571013 ode 1857 18571013 \n", + "123381 vtl18701116 vtl 1870 18701116 \n", + "146745 etg18221119 etg 1822 18221119 \n", + "205203 tpt18800108 tpt 1880 18800108 \n", + "111021 ptb18771119 ptb 1877 18771119 \n", + "167555 kfz18220804 kfz 1822 18220804 \n", "\n", " dc_title dc_title_additional \\\n", - "61892 Fremden-Blatt NaN \n", - "227880 Theaterzettel der beiden k.k. Hoftheater und d... NaN \n", - "140105 Allgemeine land- und forstwirthschaftliche Zei... NaN \n", - "4828 Fugger - Zeitungen NaN \n", - "226377 Theaterzettel der beiden k.k. Hoftheater und d... NaN \n", - "1888 Fugger - Zeitungen NaN \n", - "248861 Wiener Zeitung NaN \n", - "217639 Theaterzettel der beiden k.k. Hoftheater und d... NaN \n", - "161824 Innsbrucker Nachrichten NaN \n", - "43054 Amtliches Cursblatt der Wiener Börse NaN \n", + "146039 Hellenikos telegraphos NaN \n", + "210148 Vorarlberger Volksblatt NaN \n", + "237952 Theaterzettel der beiden k.k. Hoftheater und d... NaN \n", + "72359 Feldkircher Zeitung NaN \n", + "75047 Ost-Deutsche Post NaN \n", + "123381 Das Vaterland NaN \n", + "146745 Hellenikos telegraphos NaN \n", + "205203 (Linzer) Tages-Post NaN \n", + "111021 Prager Tagblatt NaN \n", + "167555 Klagenfurter Zeitung NaN \n", "\n", " subjects place_of_publications languages \\\n", - "61892 Reise Wien de \n", - "227880 Kultur; Kunst; Theater; Musik Wien de \n", - "140105 Landwirtschaft Wien de \n", - "4828 Tageszeitung o.O. de \n", - "226377 Kultur; Kunst; Theater; Musik Wien de \n", - "1888 Tageszeitung o.O. de \n", - "248861 Tageszeitung Wien de \n", - "217639 Kultur; Kunst; Theater; Musik Wien de \n", - "161824 Tageszeitung Innsbruck de \n", - "43054 Wirtschaft Wien de \n", + "146039 Tageszeitung Wien el \n", + "210148 Tageszeitung Bregenz de \n", + "237952 Kultur; Kunst; Theater; Musik Wien de \n", + "72359 Tageszeitung Feldkirch de \n", + "75047 Tageszeitung Wien de \n", + "123381 Tageszeitung Wien de \n", + "146745 Tageszeitung Wien el \n", + "205203 Tageszeitung Linz de \n", + "111021 Tageszeitung Praha (Prag) de \n", + "167555 Tageszeitung Klagenfurt de \n", "\n", " dc_type meta_type ini_type modification_datetime longer_page_id \\\n", - "61892 newspaper zeitungen anno 2014-07-21 10:57:07 0 \n", - "227880 newspaper zeitungen anno 2014-03-03 14:34:58 0 \n", - "140105 newspaper zeitungen anno 2004-12-15 15:27:06 0 \n", - "4828 newspaper brz anno 2013-06-27 13:28:36 1 \n", - "226377 newspaper zeitungen anno 2010-12-21 02:44:49 0 \n", - "1888 newspaper brz anno 2013-06-27 13:28:14 1 \n", - "248861 newspaper zeitungen anno 2010-12-28 12:06:27 0 \n", - "217639 newspaper zeitungen anno 2010-12-21 02:35:26 0 \n", - "161824 newspaper zeitungen anno 2004-02-05 08:58:40 0 \n", - "43054 newspaper zeitungen anno 2013-04-23 16:28:58 0 \n", + "146039 newspaper zeitungen anno 2003-10-23 17:34:04 0 \n", + "210148 newspaper zeitungen anno 2010-12-20 21:31:28 0 \n", + "237952 newspaper zeitungen anno 2020-05-28 08:28:00 0 \n", + "72359 newspaper zeitungen anno 2012-07-24 09:47:15 0 \n", + "75047 newspaper zeitungen anno 2018-08-29 08:46:08 0 \n", + "123381 newspaper zeitungen anno 2010-12-17 20:19:06 0 \n", + "146745 newspaper zeitungen anno 2003-10-23 17:36:09 0 \n", + "205203 newspaper zeitungen anno 2011-12-14 10:05:52 0 \n", + "111021 newspaper zeitungen anno 2010-12-14 13:16:10 0 \n", + "167555 newspaper zeitungen anno 2003-11-25 14:48:06 0 \n", "\n", " dc_date link_pdf \\\n", - "61892 1850-07-07 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", - "227880 1855-01-30 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", - "140105 1854-08-19 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", - "4828 1596-10-30 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", - "226377 1850-04-16 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", - "1888 1583-02-26 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", - "248861 1813-12-01 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", - "217639 1821-11-05 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", - "161824 1869-09-29 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", - "43054 1776-08-03 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "146039 1814-03-11 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "210148 1867-10-29 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "237952 1796-08-04 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "72359 1873-01-25 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "75047 1857-10-13 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "123381 1870-11-16 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "146745 1822-11-19 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "205203 1880-01-08 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "111021 1877-11-19 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", + "167555 1822-08-04 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n", "\n", " link_old has_ocr page_count \n", - "61892 http://anno.onb.ac.at/cgi-content/anno?aid=fdb... 1 4 \n", - "227880 http://anno.onb.ac.at/cgi-content/anno?aid=wtz... 0 1 \n", - "140105 http://anno.onb.ac.at/cgi-content/anno?aid=alf... 1 8 \n", - "4828 http://anno.onb.ac.at/cgi-content/anno?aid=fug... 0 3 \n", - "226377 http://anno.onb.ac.at/cgi-content/anno?aid=wtz... 0 1 \n", - "1888 http://anno.onb.ac.at/cgi-content/anno?aid=fug... 0 9 \n", - "248861 http://anno.onb.ac.at/cgi-content/anno?aid=wrz... 1 10 \n", - "217639 http://anno.onb.ac.at/cgi-content/anno?aid=wtz... 0 1 \n", - "161824 http://anno.onb.ac.at/cgi-content/anno?aid=ibn... 1 8 \n", - "43054 http://anno.onb.ac.at/cgi-content/anno?aid=bor... 0 1 " + "146039 http://anno.onb.ac.at/cgi-content/anno?aid=etg... 1 4 \n", + "210148 http://anno.onb.ac.at/cgi-content/anno?aid=vvb... 1 4 \n", + "237952 http://anno.onb.ac.at/cgi-content/anno?aid=wtz... 1 1 \n", + "72359 http://anno.onb.ac.at/cgi-content/anno?aid=fkz... 1 4 \n", + "75047 http://anno.onb.ac.at/cgi-content/anno?aid=ode... 1 4 \n", + "123381 http://anno.onb.ac.at/cgi-content/anno?aid=vtl... 1 2 \n", + "146745 http://anno.onb.ac.at/cgi-content/anno?aid=etg... 1 6 \n", + "205203 http://anno.onb.ac.at/cgi-content/anno?aid=tpt... 1 6 \n", + "111021 http://anno.onb.ac.at/cgi-content/anno?aid=ptb... 1 6 \n", + "167555 http://anno.onb.ac.at/cgi-content/anno?aid=kfz... 1 16 " ] }, "execution_count": 2, @@ -1597,7 +1597,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.0" } }, "nbformat": 4, diff --git a/requirements.txt b/requirements.txt index acaeaae..fc70920 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ pyswagger pandas jsonpath_ng SPARQLWrapper +RISE -- GitLab