From 2d381f0f18eaf1f3a95d9e5c3de6f4c34ed9a998 Mon Sep 17 00:00:00 2001 From: smayer <simon.mayer@onb.ac.at> Date: Fri, 25 Oct 2024 16:21:35 +0000 Subject: [PATCH] Update notebooks --- Notebooks/Completing_BE_data.ipynb | 497 ----------------------------- Notebooks/String_matching.ipynb | 289 ++++++++++++----- Notebooks/XML_Aufbereitung.ipynb | 355 +++++++++++++++------ 3 files changed, 466 insertions(+), 675 deletions(-) diff --git a/Notebooks/Completing_BE_data.ipynb b/Notebooks/Completing_BE_data.ipynb index 54daadf..30e35af 100644 --- a/Notebooks/Completing_BE_data.ipynb +++ b/Notebooks/Completing_BE_data.ipynb @@ -987,503 +987,6 @@ "source": [ "BE_with_Ink_df.to_excel('../Daten/Vorhersagen/WIP_final_BE_4.xlsx')" ] - }, - { - "cell_type": "code", - "execution_count": 278, - "id": "bd73ed76-756a-4a6e-b8f6-183ebfe33ae4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>Signatur</th>\n", - " <th>Barcode</th>\n", - " <th>Titel</th>\n", - " <th>Autor</th>\n", - " <th>Mitwirkender</th>\n", - " <th>Anfang Veröffentlichungsdatum</th>\n", - " <th>Ende Veröffentlichungsdatum</th>\n", - " <th>Veröffentlichungsdatum</th>\n", - " <th>Veröffentlichungsort</th>\n", - " <th>Veröffentlichungsort (normiert)</th>\n", - " <th>...</th>\n", - " <th>Wissensunterklasse</th>\n", - " <th>Formatangabe</th>\n", - " <th>hs. Katalogseite Handschrift</th>\n", - " <th>hs. Katalogeintrag ID</th>\n", - " <th>hs. Katalogeintrag</th>\n", - " <th>hs. Katalog Image URL</th>\n", - " <th>dup_title</th>\n", - " <th>copy_from</th>\n", - " <th>Einfache Klassifizierung</th>\n", - " <th>Komplexe Klassifizierung</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>23376</th>\n", - " <td>*28.A.79.(Vol.1)</td>\n", - " <td>Z222907107</td>\n", - " <td>Histoire des ouvrages des scavans</td>\n", - " <td>Basnage de Beauval, Henri</td>\n", - " <td>NaN</td>\n", - " <td>1687.0</td>\n", - " <td>1709.0</td>\n", - " <td>1687-1709</td>\n", - " <td>Rotterdam</td>\n", - " <td>Rotterdam</td>\n", - " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>True</td>\n", - " <td>-1.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23383</th>\n", - " <td>*28.A.79.(Vol.10)</td>\n", - " <td>Z222908100</td>\n", - " <td>Histoire des ouvrages des scavans</td>\n", - " <td>Basnage de Beauval, Henri</td>\n", - " <td>NaN</td>\n", - " <td>1687.0</td>\n", - " <td>1709.0</td>\n", - " <td>1687-1709</td>\n", - " <td>Rotterdam</td>\n", - " <td>Rotterdam</td>\n", - " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>True</td>\n", - " <td>-1.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23384</th>\n", - " <td>*28.A.79.(Vol.12)</td>\n", - " <td>Z222908306</td>\n", - " <td>Histoire des ouvrages des scavans</td>\n", - " <td>Basnage de Beauval, Henri</td>\n", - " <td>NaN</td>\n", - " <td>1687.0</td>\n", - " <td>1709.0</td>\n", - " <td>1687-1709</td>\n", - " <td>Rotterdam</td>\n", - " <td>Rotterdam</td>\n", - " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>True</td>\n", - " <td>-1.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23385</th>\n", - " <td>*28.A.79.(Vol.13)</td>\n", - " <td>Z222908409</td>\n", - " <td>Histoire des ouvrages des scavans</td>\n", - " <td>Basnage de Beauval, Henri</td>\n", - " <td>NaN</td>\n", - " <td>1687.0</td>\n", - " <td>1709.0</td>\n", - " <td>1687-1709</td>\n", - " <td>Rotterdam</td>\n", - " <td>Rotterdam</td>\n", - " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>True</td>\n", - " <td>-1.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23386</th>\n", - " <td>*28.A.79.(Vol.14)</td>\n", - " <td>Z222908501</td>\n", - " <td>Histoire des ouvrages des scavans</td>\n", - " <td>Basnage de Beauval, Henri</td>\n", - " <td>NaN</td>\n", - " <td>1687.0</td>\n", - " <td>1709.0</td>\n", - " <td>1687-1709</td>\n", - " <td>Rotterdam</td>\n", - " <td>Rotterdam</td>\n", - " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>True</td>\n", - " <td>-1.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23636</th>\n", - " <td>Ink 9.F.4</td>\n", - " <td>NaN</td>\n", - " <td>Opera</td>\n", - " <td>Sallustius Crispus, Gaius</td>\n", - " <td>NaN</td>\n", - " <td>1481.0</td>\n", - " <td>NaN</td>\n", - " <td>23 Dec. 1481</td>\n", - " <td>Venice</td>\n", - " <td>Venedig</td>\n", - " <td>...</td>\n", - " <td>Historia Romana Sæculorum aliquot, præsertim Imperatorum temporibus</td>\n", - " <td>Folio</td>\n", - " <td>825</td>\n", - " <td>14.377_437_08</td>\n", - " <td>1447.........Ejusdem Historia Eadem. Venetiis. 1481.¬ Baptista de Torris. n. 2217. LIII. R. 12.</td>\n", - " <td>https://iiif.onb.ac.at/images/DOD/51184/00000437.jp2/full/full/0/native.jpg</td>\n", - " <td>True</td>\n", - " <td>-3.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>22640</th>\n", - " <td>Ink 9.F.5</td>\n", - " <td>1460328-10</td>\n", - " <td>Biblia ; Interpretationes Hebraicorum nominum</td>\n", - " <td>NaN</td>\n", - " <td>Wild, Leonhard</td>\n", - " <td>1481.0</td>\n", - " <td>NaN</td>\n", - " <td>1481</td>\n", - " <td>Venedig</td>\n", - " <td>Venedig</td>\n", - " <td>...</td>\n", - " <td>Textus & Versiones Sacræ Scripturæ</td>\n", - " <td>Folio</td>\n", - " <td>2</td>\n", - " <td>14.376_026_00</td>\n", - " <td>9. Biblia Sacra Latina. Venetiis. 1481. Leonard Wild de Ratisbonâ n. 2302. III. D. 11.</td>\n", - " <td>https://iiif.onb.ac.at/images/DOD/51202/00000026.jp2/full/full/0/native.jpg</td>\n", - " <td>True</td>\n", - " <td>-3.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>22650</th>\n", - " <td>SA.71.E.58</td>\n", - " <td>Z252861302</td>\n", - " <td>Dialogue sur la musique des anciens</td>\n", - " <td>Chateauneuf, Francois abbe de</td>\n", - " <td>NaN</td>\n", - " <td>1725.0</td>\n", - " <td>NaN</td>\n", - " <td>1725</td>\n", - " <td>Paris</td>\n", - " <td>Paris</td>\n", - " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>False</td>\n", - " <td>-1.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>22651</th>\n", - " <td>SA.71.F.74</td>\n", - " <td>Z252867808</td>\n", - " <td>Friderici Adolfi Lampe De Cymbalis Veterum Libri Tres</td>\n", - " <td>Ember, Paul</td>\n", - " <td>Hase, Cornelius <<von>>; Röell, Herman Alexander</td>\n", - " <td>1703.0</td>\n", - " <td>NaN</td>\n", - " <td>1703</td>\n", - " <td>Trajecti Ad Rhenum</td>\n", - " <td>Utrecht</td>\n", - " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>False</td>\n", - " <td>-1.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>23439</th>\n", - " <td>SA.73.B.48</td>\n", - " <td>Z25920770X</td>\n", - " <td>Claudii Ptolomaei harmonicorum libri tres. Ex Codd. Mss. Undecim, nunc primum graece editus. Johannes Wallis ... recensuit, ed. (etc.)</td>\n", - " <td>Ptolemaeus, Claudius</td>\n", - " <td>Wallis, Johannes</td>\n", - " <td>1682.0</td>\n", - " <td>NaN</td>\n", - " <td>1682</td>\n", - " <td>Oxford</td>\n", - " <td>Oxford</td>\n", - " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>False</td>\n", - " <td>-1.0</td>\n", - " <td>True</td>\n", - " <td>2</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>22874 rows × 37 columns</p>\n", - "</div>" - ], - "text/plain": [ - " Signatur Barcode \n", - "23376 *28.A.79.(Vol.1) Z222907107 \\\n", - "23383 *28.A.79.(Vol.10) Z222908100 \n", - "23384 *28.A.79.(Vol.12) Z222908306 \n", - "23385 *28.A.79.(Vol.13) Z222908409 \n", - "23386 *28.A.79.(Vol.14) Z222908501 \n", - "... ... ... \n", - "23636 Ink 9.F.4 NaN \n", - "22640 Ink 9.F.5 1460328-10 \n", - "22650 SA.71.E.58 Z252861302 \n", - "22651 SA.71.F.74 Z252867808 \n", - "23439 SA.73.B.48 Z25920770X \n", - "\n", - " Titel \n", - "23376 Histoire des ouvrages des scavans \\\n", - "23383 Histoire des ouvrages des scavans \n", - "23384 Histoire des ouvrages des scavans \n", - "23385 Histoire des ouvrages des scavans \n", - "23386 Histoire des ouvrages des scavans \n", - "... ... \n", - "23636 Opera \n", - "22640 Biblia ; Interpretationes Hebraicorum nominum \n", - "22650 Dialogue sur la musique des anciens \n", - "22651 Friderici Adolfi Lampe De Cymbalis Veterum Libri Tres \n", - "23439 Claudii Ptolomaei harmonicorum libri tres. Ex Codd. Mss. Undecim, nunc primum graece editus. Johannes Wallis ... recensuit, ed. (etc.) \n", - "\n", - " Autor \n", - "23376 Basnage de Beauval, Henri \\\n", - "23383 Basnage de Beauval, Henri \n", - "23384 Basnage de Beauval, Henri \n", - "23385 Basnage de Beauval, Henri \n", - "23386 Basnage de Beauval, Henri \n", - "... ... \n", - "23636 Sallustius Crispus, Gaius \n", - "22640 NaN \n", - "22650 Chateauneuf, Francois abbe de \n", - "22651 Ember, Paul \n", - "23439 Ptolemaeus, Claudius \n", - "\n", - " Mitwirkender \n", - "23376 NaN \\\n", - "23383 NaN \n", - "23384 NaN \n", - "23385 NaN \n", - "23386 NaN \n", - "... ... \n", - "23636 NaN \n", - "22640 Wild, Leonhard \n", - "22650 NaN \n", - "22651 Hase, Cornelius <<von>>; Röell, Herman Alexander \n", - "23439 Wallis, Johannes \n", - "\n", - " Anfang Veröffentlichungsdatum Ende Veröffentlichungsdatum \n", - "23376 1687.0 1709.0 \\\n", - "23383 1687.0 1709.0 \n", - "23384 1687.0 1709.0 \n", - "23385 1687.0 1709.0 \n", - "23386 1687.0 1709.0 \n", - "... ... ... \n", - "23636 1481.0 NaN \n", - "22640 1481.0 NaN \n", - "22650 1725.0 NaN \n", - "22651 1703.0 NaN \n", - "23439 1682.0 NaN \n", - "\n", - " Veröffentlichungsdatum Veröffentlichungsort \n", - "23376 1687-1709 Rotterdam \\\n", - "23383 1687-1709 Rotterdam \n", - "23384 1687-1709 Rotterdam \n", - "23385 1687-1709 Rotterdam \n", - "23386 1687-1709 Rotterdam \n", - "... ... ... \n", - "23636 23 Dec. 1481 Venice \n", - "22640 1481 Venedig \n", - "22650 1725 Paris \n", - "22651 1703 Trajecti Ad Rhenum \n", - "23439 1682 Oxford \n", - "\n", - " Veröffentlichungsort (normiert) ... \n", - "23376 Rotterdam ... \\\n", - "23383 Rotterdam ... \n", - "23384 Rotterdam ... \n", - "23385 Rotterdam ... \n", - "23386 Rotterdam ... \n", - "... ... ... \n", - "23636 Venedig ... \n", - "22640 Venedig ... \n", - "22650 Paris ... \n", - "22651 Utrecht ... \n", - "23439 Oxford ... \n", - "\n", - " Wissensunterklasse \n", - "23376 NaN \\\n", - "23383 NaN \n", - "23384 NaN \n", - "23385 NaN \n", - "23386 NaN \n", - "... ... \n", - "23636 Historia Romana Sæculorum aliquot, præsertim Imperatorum temporibus \n", - "22640 Textus & Versiones Sacræ Scripturæ \n", - "22650 NaN \n", - "22651 NaN \n", - "23439 NaN \n", - "\n", - " Formatangabe hs. Katalogseite Handschrift hs. Katalogeintrag ID \n", - "23376 NaN NaN NaN \\\n", - "23383 NaN NaN NaN \n", - "23384 NaN NaN NaN \n", - "23385 NaN NaN NaN \n", - "23386 NaN NaN NaN \n", - "... ... ... ... \n", - "23636 Folio 825 14.377_437_08 \n", - "22640 Folio 2 14.376_026_00 \n", - "22650 NaN NaN NaN \n", - "22651 NaN NaN NaN \n", - "23439 NaN NaN NaN \n", - "\n", - " hs. Katalogeintrag \n", - "23376 NaN \\\n", - "23383 NaN \n", - "23384 NaN \n", - "23385 NaN \n", - "23386 NaN \n", - "... ... \n", - "23636 1447.........Ejusdem Historia Eadem. Venetiis. 1481.¬ Baptista de Torris. n. 2217. LIII. R. 12. \n", - "22640 9. Biblia Sacra Latina. Venetiis. 1481. Leonard Wild de Ratisbonâ n. 2302. III. D. 11. \n", - "22650 NaN \n", - "22651 NaN \n", - "23439 NaN \n", - "\n", - " hs. Katalog Image URL \n", - "23376 NaN \\\n", - "23383 NaN \n", - "23384 NaN \n", - "23385 NaN \n", - "23386 NaN \n", - "... ... \n", - "23636 https://iiif.onb.ac.at/images/DOD/51184/00000437.jp2/full/full/0/native.jpg \n", - "22640 https://iiif.onb.ac.at/images/DOD/51202/00000026.jp2/full/full/0/native.jpg \n", - "22650 NaN \n", - "22651 NaN \n", - "23439 NaN \n", - "\n", - " dup_title copy_from Einfache Klassifizierung Komplexe Klassifizierung \n", - "23376 True -1.0 True 2 \n", - "23383 True -1.0 True 2 \n", - "23384 True -1.0 True 2 \n", - "23385 True -1.0 True 2 \n", - "23386 True -1.0 True 2 \n", - "... ... ... ... ... \n", - "23636 True -3.0 True 2 \n", - "22640 True -3.0 True 2 \n", - "22650 False -1.0 True 2 \n", - "22651 False -1.0 True 2 \n", - "23439 False -1.0 True 2 \n", - "\n", - "[22874 rows x 37 columns]" - ] - }, - "execution_count": 278, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "BE_with_Ink_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "51e2acf9-1a2b-4503-8423-c091a5244d9b", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/Notebooks/String_matching.ipynb b/Notebooks/String_matching.ipynb index 90e4af2..00198ed 100644 --- a/Notebooks/String_matching.ipynb +++ b/Notebooks/String_matching.ipynb @@ -115,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 150, "id": "c0f4a42a-7e21-41e8-833c-2dd2f9d1985e", "metadata": { "tags": [] @@ -125,7 +125,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2\n" + "9\n" ] }, { @@ -162,56 +162,175 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>8398</th>\n", + " <th>3144</th>\n", + " <td>14.377</td>\n", + " <td>65</td>\n", + " <td>Poëtica</td>\n", + " <td>Poëtæ Latini Recentiores, cum Germanicis</td>\n", + " <td>Octavo und kleiner</td>\n", + " <td>473</td>\n", + " <td>14.377_065_01</td>\n", + " <td>Pontani (I. Ioviani) Poëmata. 8.° Venet. 1505. Aldus. n. 755.</td>\n", + " <td>Pontani I Ioviani Poemata 8° Venet 1505 Aldus n 755</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4347</th>\n", + " <td>14.377</td>\n", + " <td>185</td>\n", + " <td>Philologia</td>\n", + " <td>Operum Latinorum varij Argumenti Collectiones</td>\n", + " <td>Folio</td>\n", + " <td>581</td>\n", + " <td>14.377_185_06</td>\n", + " <td>1021 Pontani (I. Ioviani) opera varij Argumenti; Ven. 1501. Bernardinus Vercellensis. n. 2216.</td>\n", + " <td>Pontani I Ioviani opera varij Argumenti Ven 1501 Bernardinus Vercellensis n 2216</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5892</th>\n", + " <td>14.377</td>\n", + " <td>455</td>\n", + " <td>Historia Antiqua</td>\n", + " <td>Historia Imperii Orientalis, Seu Bysantina</td>\n", + " <td>Folio</td>\n", + " <td>843</td>\n", + " <td>14.377_455_01</td>\n", + " <td>1472 Cantacuzeni Ex-Imperatoris (Ioan.) Historiarum Libri IV. græcè ac latinè, Iacobo Pontano interpréte cum ipsius & Ioan. Gretseri notis. 3 Vol. Parisiis. 1645. è Typograph. Regiâ. n. 1421.</td>\n", + " <td>Cantacuzeni Ex-Imperatoris Ioan Historiarum Libri IV graece ac latine Iacobo Pontano interprete cum ipsius & Ioan Gretseri notis 3 Vol Parisiis 1645 e Typograph Regia n 1421</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5895</th>\n", + " <td>14.377</td>\n", + " <td>455</td>\n", + " <td>Historia Antiqua</td>\n", + " <td>Historia Imperii Orientalis, Seu Bysantina</td>\n", + " <td>Folio</td>\n", + " <td>843</td>\n", + " <td>14.377_455_04</td>\n", + " <td>1475 Theophilacti Simocattæ Historiarum Libri VIII. gr. & Latinè Iac. Pontano interprete, cum Fabrotti emendationibus ac Augmento. Paris. 1647. è Typograph. Reg. accedit S. Nycephori Patriarchæ Constantinop. Breviarium¬ historicum græcè ac latine Dionys. Petavio interprete cum notis. Paris. 1648. è Typograph. Regiâ. n. 1424.</td>\n", + " <td>Theophilacti Simocattae Historiarum Libri VIII gr & Latine Iac Pontano interprete cum Fabrotti emendationibus ac Augmento Paris 1647 e Typograph Reg accedit S Nycephori Patriarchae Constantinop Breviarium historicum graece ac latine Dionys Petavio interprete cum notis Paris 1648 e Typograph Regia n 1424</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6019</th>\n", " <td>14.378</td>\n", - " <td>438</td>\n", - " <td>Paralipomena Historica</td>\n", - " <td>De Re Litterariâ & Bibliographicâ, cum Catalogis Bibliothecarum</td>\n", + " <td>30</td>\n", + " <td>Historia Nova Europæ</td>\n", + " <td>Canonum collectores & Canonistæ; Epistolæ decretales & Bullæ; necnon de Hierarchiâ & Rebus Ecclesiasticis</td>\n", " <td>Quarto</td>\n", - " <td>1270</td>\n", - " <td>14.378_438_00</td>\n", - " <td>Le Iournal des Savans commencé en 1665 par M.r de Hédouville, & continué d'année en année jusques & compris 172 par des Anonymes. 64 Vol. Paris. 1665. & années Suivantes. n. 2144 Plus les deux derniers vol. del A. 1733, et 1734 n. 2144.</td>\n", - " <td>Le Iournal des Savans commence en 1665 par Mr de Hedouville & continue d'annee en annee jusques & compris 172 par des Anonymes 64 Vol Paris 1665 & annees Suivantes n 2144 Plus les deux derniers vol del A 1733 et 1734 n 2144</td>\n", + " <td>870</td>\n", + " <td>14.378_030_03</td>\n", + " <td>Discorso di Fabio Pontano Sopra l'Antichità di Foligno. in Perugia. 1618. Mario Naccarini. n. 493.</td>\n", + " <td>Discorso di Fabio Pontano Sopra l'Antichita di Foligno in Perugia 1618 Mario Naccarini n 493</td>\n", " </tr>\n", " <tr>\n", - " <th>8446</th>\n", + " <th>6089</th>\n", " <td>14.378</td>\n", - " <td>444</td>\n", - " <td>Paralipomena Historica</td>\n", - " <td>De Re Litterariâ & Bibliographicâ, cum Catalogis Bibliothecarum</td>\n", + " <td>43</td>\n", + " <td>Historia Nova Europæ</td>\n", + " <td>Historia Neapolitana generalis & Singularis Historia Sicula generalis & Singularis</td>\n", " <td>Octavo und kleiner</td>\n", - " <td>1276</td>\n", - " <td>14.378_444_00</td>\n", - " <td>Iournal Litteraire composé par un nombre de Savans 12.° 6 Vol. La Haye. 1713. & Suiv. Iohnson. n. 2252.</td>\n", - " <td>Iournal Litteraire compose par un nombre de Savans 12° 6 Vol La Haye 1713 & Suiv Iohnson n 2252</td>\n", + " <td>883</td>\n", + " <td>14.378_043_00</td>\n", + " <td>Historia Neapolitana, autoribus Pandulpho Collenutio & Ioanne Ioviano Pontano. Dordrechti. 1618. Berestout. n. 116.</td>\n", + " <td>Historia Neapolitana autoribus Pandulpho Collenutio & Ioanne Ioviano Pontano Dordrechti 1618 Berestout n 116</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6990</th>\n", + " <td>14.378</td>\n", + " <td>190</td>\n", + " <td>Historia Nova Europæ</td>\n", + " <td>Historia generalis & Singularis Belgij Foederati</td>\n", + " <td>Folio</td>\n", + " <td>1028</td>\n", + " <td>14.378_190_01</td>\n", + " <td>2000 Pontani (Ioan. Isaaci) Historiæ Gelricæ Libri XIV. ab anno ante Christum 57. ad Annum Chr. 1581. cum figuris. Hardervici Gelrorum. 1639. Iansson n. 1740.</td>\n", + " <td>Pontani Ioan Isaaci Historiae Gelricae Libri XIV ab anno ante Christum 57 ad Annum Chr 1581 cum figuris Hardervici Gelrorum 1639 Iansson n 1740</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7124</th>\n", + " <td>14.378</td>\n", + " <td>214</td>\n", + " <td>Historia Nova Europæ</td>\n", + " <td>Historia Hispanica generalis & aliquot temporum; Vitæ Regum; aliaque</td>\n", + " <td>Quarto</td>\n", + " <td>1052</td>\n", + " <td>14.378_214_05</td>\n", + " <td>Bracelli (Jac.) de Bello Hispanico Libri V. quibus accedunt I. Ioviani Pontani de Bello Neapolitano Libri VI. Haganoæ. 1530. I. Secerius n. 785.</td>\n", + " <td>Bracelli Jac de Bello Hispanico Libri V quibus accedunt I Ioviani Pontani de Bello Neapolitano Libri VI Haganoae 1530 I Secerius n 785</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7332</th>\n", + " <td>14.378</td>\n", + " <td>257</td>\n", + " <td>Historia Nova Europæ</td>\n", + " <td>Regionum Septentrionalium nempe Daniæ, Norwegiæ, Lapponiæ, Islandiæ, Sueciæ, ac Livoniæ Historia</td>\n", + " <td>Folio</td>\n", + " <td>1095</td>\n", + " <td>14.378_257_06</td>\n", + " <td>2125 Pontani (Isaaci) Rerum Danicarum Historia ab anno ante Christum III. ad Ann. Chr. 1448. cum Regni Chorographiâ. Hardervici Gelror. 1631. Nic. a Wiering. n. 1929.</td>\n", + " <td>Pontani Isaaci Rerum Danicarum Historia ab anno ante Christum III ad Ann Chr 1448 cum Regni Chorographia Hardervici Gelror 1631 Nic a Wiering n 1929</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " volume page number category \n", - "8398 14.378 438 Paralipomena Historica \\\n", - "8446 14.378 444 Paralipomena Historica \n", + " volume page number category \n", + "3144 14.377 65 Poëtica \\\n", + "4347 14.377 185 Philologia \n", + "5892 14.377 455 Historia Antiqua \n", + "5895 14.377 455 Historia Antiqua \n", + "6019 14.378 30 Historia Nova Europæ \n", + "6089 14.378 43 Historia Nova Europæ \n", + "6990 14.378 190 Historia Nova Europæ \n", + "7124 14.378 214 Historia Nova Europæ \n", + "7332 14.378 257 Historia Nova Europæ \n", "\n", - " subcategory \n", - "8398 De Re Litterariâ & Bibliographicâ, cum Catalogis Bibliothecarum \\\n", - "8446 De Re Litterariâ & Bibliographicâ, cum Catalogis Bibliothecarum \n", + " subcategory \n", + "3144 Poëtæ Latini Recentiores, cum Germanicis \\\n", + "4347 Operum Latinorum varij Argumenti Collectiones \n", + "5892 Historia Imperii Orientalis, Seu Bysantina \n", + "5895 Historia Imperii Orientalis, Seu Bysantina \n", + "6019 Canonum collectores & Canonistæ; Epistolæ decretales & Bullæ; necnon de Hierarchiâ & Rebus Ecclesiasticis \n", + "6089 Historia Neapolitana generalis & Singularis Historia Sicula generalis & Singularis \n", + "6990 Historia generalis & Singularis Belgij Foederati \n", + "7124 Historia Hispanica generalis & aliquot temporum; Vitæ Regum; aliaque \n", + "7332 Regionum Septentrionalium nempe Daniæ, Norwegiæ, Lapponiæ, Islandiæ, Sueciæ, ac Livoniæ Historia \n", "\n", " format handwritten page number entry_ID \n", - "8398 Quarto 1270 14.378_438_00 \\\n", - "8446 Octavo und kleiner 1276 14.378_444_00 \n", + "3144 Octavo und kleiner 473 14.377_065_01 \\\n", + "4347 Folio 581 14.377_185_06 \n", + "5892 Folio 843 14.377_455_01 \n", + "5895 Folio 843 14.377_455_04 \n", + "6019 Quarto 870 14.378_030_03 \n", + "6089 Octavo und kleiner 883 14.378_043_00 \n", + "6990 Folio 1028 14.378_190_01 \n", + "7124 Quarto 1052 14.378_214_05 \n", + "7332 Folio 1095 14.378_257_06 \n", "\n", - " entry \n", - "8398 Le Iournal des Savans commencé en 1665 par M.r de Hédouville, & continué d'année en année jusques & compris 172 par des Anonymes. 64 Vol. Paris. 1665. & années Suivantes. n. 2144 Plus les deux derniers vol. del A. 1733, et 1734 n. 2144. \\\n", - "8446 Iournal Litteraire composé par un nombre de Savans 12.° 6 Vol. La Haye. 1713. & Suiv. Iohnson. n. 2252. \n", + " entry \n", + "3144 Pontani (I. Ioviani) Poëmata. 8.° Venet. 1505. Aldus. n. 755. \\\n", + "4347 1021 Pontani (I. Ioviani) opera varij Argumenti; Ven. 1501. Bernardinus Vercellensis. n. 2216. \n", + "5892 1472 Cantacuzeni Ex-Imperatoris (Ioan.) Historiarum Libri IV. græcè ac latinè, Iacobo Pontano interpréte cum ipsius & Ioan. Gretseri notis. 3 Vol. Parisiis. 1645. è Typograph. Regiâ. n. 1421. \n", + "5895 1475 Theophilacti Simocattæ Historiarum Libri VIII. gr. & Latinè Iac. Pontano interprete, cum Fabrotti emendationibus ac Augmento. Paris. 1647. è Typograph. Reg. accedit S. Nycephori Patriarchæ Constantinop. Breviarium¬ historicum græcè ac latine Dionys. Petavio interprete cum notis. Paris. 1648. è Typograph. Regiâ. n. 1424. \n", + "6019 Discorso di Fabio Pontano Sopra l'Antichità di Foligno. in Perugia. 1618. Mario Naccarini. n. 493. \n", + "6089 Historia Neapolitana, autoribus Pandulpho Collenutio & Ioanne Ioviano Pontano. Dordrechti. 1618. Berestout. n. 116. \n", + "6990 2000 Pontani (Ioan. Isaaci) Historiæ Gelricæ Libri XIV. ab anno ante Christum 57. ad Annum Chr. 1581. cum figuris. Hardervici Gelrorum. 1639. Iansson n. 1740. \n", + "7124 Bracelli (Jac.) de Bello Hispanico Libri V. quibus accedunt I. Ioviani Pontani de Bello Neapolitano Libri VI. Haganoæ. 1530. I. Secerius n. 785. \n", + "7332 2125 Pontani (Isaaci) Rerum Danicarum Historia ab anno ante Christum III. ad Ann. Chr. 1448. cum Regni Chorographiâ. Hardervici Gelror. 1631. Nic. a Wiering. n. 1929. \n", "\n", - " cleaned entry \n", - "8398 Le Iournal des Savans commence en 1665 par Mr de Hedouville & continue d'annee en annee jusques & compris 172 par des Anonymes 64 Vol Paris 1665 & annees Suivantes n 2144 Plus les deux derniers vol del A 1733 et 1734 n 2144 \n", - "8446 Iournal Litteraire compose par un nombre de Savans 12° 6 Vol La Haye 1713 & Suiv Iohnson n 2252 " + " cleaned entry \n", + "3144 Pontani I Ioviani Poemata 8° Venet 1505 Aldus n 755 \n", + "4347 Pontani I Ioviani opera varij Argumenti Ven 1501 Bernardinus Vercellensis n 2216 \n", + "5892 Cantacuzeni Ex-Imperatoris Ioan Historiarum Libri IV graece ac latine Iacobo Pontano interprete cum ipsius & Ioan Gretseri notis 3 Vol Parisiis 1645 e Typograph Regia n 1421 \n", + "5895 Theophilacti Simocattae Historiarum Libri VIII gr & Latine Iac Pontano interprete cum Fabrotti emendationibus ac Augmento Paris 1647 e Typograph Reg accedit S Nycephori Patriarchae Constantinop Breviarium historicum graece ac latine Dionys Petavio interprete cum notis Paris 1648 e Typograph Regia n 1424 \n", + "6019 Discorso di Fabio Pontano Sopra l'Antichita di Foligno in Perugia 1618 Mario Naccarini n 493 \n", + "6089 Historia Neapolitana autoribus Pandulpho Collenutio & Ioanne Ioviano Pontano Dordrechti 1618 Berestout n 116 \n", + "6990 Pontani Ioan Isaaci Historiae Gelricae Libri XIV ab anno ante Christum 57 ad Annum Chr 1581 cum figuris Hardervici Gelrorum 1639 Iansson n 1740 \n", + "7124 Bracelli Jac de Bello Hispanico Libri V quibus accedunt I Ioviani Pontani de Bello Neapolitano Libri VI Haganoae 1530 I Secerius n 785 \n", + "7332 Pontani Isaaci Rerum Danicarum Historia ab anno ante Christum III ad Ann Chr 1448 cum Regni Chorographia Hardervici Gelror 1631 Nic a Wiering n 1929 " ] }, - "execution_count": 15, + "execution_count": 150, "metadata": {}, "output_type": "execute_result" } @@ -220,7 +339,7 @@ "def search_in_entry(df, string):\n", " return df[df['cleaned entry'].str.contains(string)]\n", "\n", - "info = search_in_entry(search_in_entry(entry_df, 'Savans'), 'Iournal')\n", + "info = search_in_entry(search_in_entry(entry_df, 'Pontan'), '')\n", "print(len(info))\n", "info" ] @@ -310,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 141, "id": "c4394718-cfd3-459e-8923-3ef255a41012", "metadata": { "tags": [] @@ -319,30 +438,30 @@ { "data": { "text/plain": [ - "volume 14.377\n", - "page number 28\n", - "category Poëtica\n", - "subcategory Poëtæ Græci\n", - "format Octavo und kleiner\n", - "handwritten page number 436\n", - "entry_ID 14.377_028_02\n", - "entry ..........Le Plutus & Les Nuéces du même trad. du grec par Mad.elle Anne le Fevre. 12.° Paris. 1684. Thierry. n. 1062.\n", - "cleaned entry Le Plutus & Les Nueces du meme trad du grec par Madelle Anne le Fevre 12° Paris 1684 Thierry n 1062\n", - "Name: 2857, dtype: object" + "volume 14.378\n", + "page number 239\n", + "category Historia Nova Europæ\n", + "subcategory Angliæ, Scotiæ, ac Hiberniæ Historia generalis & Singularis\n", + "format Folio\n", + "handwritten page number 1077\n", + "entry_ID 14.378_239_06\n", + "entry 2107 Boethij (Hectoris) Scotorum Historiæ a Gentis Orig. cum Ioan. Ferrerij continuatione. Paris. 1557. du Puys. n. 1917\n", + "cleaned entry Boethij Hectoris Scotorum Historiae a Gentis Orig cum Ioan Ferrerij continuatione Paris 1557 du Puys n 1917\n", + "Name: 7242, dtype: object" ] }, - "execution_count": 24, + "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "entry_df.loc[2857]" + "entry_df.loc[7242]" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 89, "id": "20facf5d-d609-498e-9907-7ebdffc09e15", "metadata": { "tags": [] @@ -352,55 +471,55 @@ "name": "stdout", "output_type": "stream", "text": [ - "BE.5.V.45\n", - "+Z175390902\n", - "Principes de philosophie ou preuves naturelles de l'existence de Dieu et de l'immortalite de l'ame\n", - "Genest, Charles-Claude\n", - "Paris\n", - "1716\n", - "B\n" + "BE.1.A.10\n", + "B1214205\n", + "Antiquites du Bosphore Cimmerien conservees au Musee Imperial de l'Ermitage = Drevnosti Bosfora Kimmerijskago\n", + "nan\n", + "St. Petersburg\n", + "nan\n", + "nan\n" ] }, { "data": { "text/plain": [ - "Signatur BE.5.V.45\n", - "Barcode +Z175390902\n", - "Titel Principes de philosophie ou preuves naturelles de l'existence de Dieu et de l'immortalite de l'ame\n", - "Autor Genest, Charles-Claude\n", - "Mitwirkender NaN\n", - "Anfang Veröffentlichungsdatum 1716.0\n", - "Ende Veröffentlichungsdatum NaN\n", - "Veröffentlichungsdatum 1716\n", - "Veröffentlichungsort Paris\n", - "Veröffentlichungsort (normiert) NaN\n", - "Sprache French\n", - "Schlagwörter Gedicht; Gottesbeweis; Seele; Unsterblichkeit; Belletristische Darstellung; Lyrik; Französisch\n", - "Schlagwörter (mit GND) Gottesbeweis$Ds--(DE-588)4021668-8;Belletristische Darstellung$Af;AT-OBV--ONB-AK;Seele$Ds--(DE-588)4054146-0;Unsterblichkeit$Ds--(DE-588)4061874-2;Belletristische Darstellung$Af;AT-OBV--ONB-AK;Lyrik$Ds--(DE-588)4036774-5;Französisch$Ds--(DE-588)4113615-9;AT-OBV--ONB-AK\n", - "Vorbesitzer NaN\n", - "Typ Gedicht--bellobv\n", - "Bemerkungen NaN\n", - "Gültiger Barcode Z175390902\n", - "Dateiname Z175390902_00000001.jpg\n", - "Wappenklassifizierung B\n", - "p_A 0.000108\n", - "p_B 0.99959\n", - "p_C 0.000184\n", - "p_N 0.000118\n", - "Farbklassifizierung red\n", - "p_blue 0.000051\n", - "p_red 0.999859\n", - "p_yellow 0.00009\n", - "Name: 14220, dtype: object" + "Signatur BE.1.A.10\n", + "Barcode B1214205\n", + "Titel Antiquites du Bosphore Cimmerien conservees au Musee Imperial de l'Ermitage = Drevnosti Bosfora Kimmerijskago\n", + "Autor NaN\n", + "Mitwirkender Gosudarstvennyj Ėrmitaž\n", + "Anfang Veröffentlichungsdatum NaN\n", + "Ende Veröffentlichungsdatum NaN\n", + "Veröffentlichungsdatum NaN\n", + "Veröffentlichungsort St. Petersburg\n", + "Veröffentlichungsort (normiert) NaN\n", + "Sprache unknown\n", + "Schlagwörter Sankt Petersburg; Museum; Eremitage; Straße von Kertsch; Antiquität\n", + "Schlagwörter (mit GND) Antiquität$Ds--(DE-588)4002325-4;Straße von Kertsch$Dg--(DE-588)4497588-0;Sankt Petersburg$Dg--(DE-588)4267026-3;Eremitage$Ds--(DE-588)4354208-6;AT-OBV--ONB-AK;Sankt Petersburg$Dg--(DE-588)4267026-3;Museum$Ds--(DE-588)4040795-0;Eremitage$Ds--(DE-588)4354208-6;Straße von Kertsch$Dg--(DE-588)4497588-0;Antiquität$Ds--(DE-588)4002325-4;AT-OBV--ONB-AK\n", + "Vorbesitzer NaN\n", + "Typ NaN\n", + "Bemerkungen NaN\n", + "Gültiger Barcode NaN\n", + "Dateiname NaN\n", + "Wappenklassifizierung NaN\n", + "p_A NaN\n", + "p_B NaN\n", + "p_C NaN\n", + "p_N NaN\n", + "Farbklassifizierung NaN\n", + "p_blue NaN\n", + "p_red NaN\n", + "p_yellow NaN\n", + "Name: 2, dtype: object" ] }, - "execution_count": 25, + "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "BE_entry = BE_df.loc[14220]\n", + "BE_entry = BE_df.loc[2]\n", "print(BE_entry['Signatur'])\n", "print(BE_entry['Barcode'])\n", "print(BE_entry['Titel'])\n", diff --git a/Notebooks/XML_Aufbereitung.ipynb b/Notebooks/XML_Aufbereitung.ipynb index eb317fa..d3a2c7e 100644 --- a/Notebooks/XML_Aufbereitung.ipynb +++ b/Notebooks/XML_Aufbereitung.ipynb @@ -2292,14 +2292,147 @@ }, { "cell_type": "code", - "execution_count": 230, + "execution_count": 269, + "id": "d73467a8-a7d2-44ad-8537-b281beaea2fa", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>AC Nummer</th>\n", + " <th>hs. Katalogeintrag ID</th>\n", + " <th>hs. Katalog Konfidenz</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>23376</th>\n", + " <td>AC09684749</td>\n", + " <td>14.378_443_03</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23383</th>\n", + " <td>AC09684749</td>\n", + " <td>14.378_443_03</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23384</th>\n", + " <td>AC09684749</td>\n", + " <td>14.378_443_03</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23385</th>\n", + " <td>AC09684749</td>\n", + " <td>14.378_443_03</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23386</th>\n", + " <td>AC09684749</td>\n", + " <td>14.378_443_03</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22693</th>\n", + " <td>AC10057410</td>\n", + " <td>14.376_423_10</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23282</th>\n", + " <td>AC10097073</td>\n", + " <td>14.377_059_01</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22650</th>\n", + " <td>AC09771765</td>\n", + " <td>14.376_341_05</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22651</th>\n", + " <td>AC09836828</td>\n", + " <td>14.378_367_03</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23439</th>\n", + " <td>AC10221894</td>\n", + " <td>14.376_340_01</td>\n", + " <td>sicher</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>801 rows × 3 columns</p>\n", + "</div>" + ], + "text/plain": [ + " AC Nummer hs. Katalogeintrag ID hs. Katalog Konfidenz\n", + "23376 AC09684749 14.378_443_03 sicher\n", + "23383 AC09684749 14.378_443_03 sicher\n", + "23384 AC09684749 14.378_443_03 sicher\n", + "23385 AC09684749 14.378_443_03 sicher\n", + "23386 AC09684749 14.378_443_03 sicher\n", + "... ... ... ...\n", + "22693 AC10057410 14.376_423_10 sicher\n", + "23282 AC10097073 14.377_059_01 sicher\n", + "22650 AC09771765 14.376_341_05 sicher\n", + "22651 AC09836828 14.378_367_03 sicher\n", + "23439 AC10221894 14.376_340_01 sicher\n", + "\n", + "[801 rows x 3 columns]" + ] + }, + "execution_count": 269, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matches_sig" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "3f61076e-8384-4080-9003-06b9ea774fe6", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "matches_sig = BE_df[['AC Nummer', 'hs. Katalogeintrag ID', 'hs. Katalog Konfidenz']].dropna(subset=['hs. Katalogeintrag ID'])\n", - "entry_df['AC number'] = -1\n", - "entry_df['AC cert'] = -1\n", + "matches_sig = matches_sig.loc[has_hw_matches['input_id']]\n", "\n", "for i, m in matches_sig.iterrows():\n", " ac_num = m['AC Nummer']\n", @@ -2318,7 +2451,7 @@ " for hs_i, hs_c in zip(hs_id, hs_cert):\n", " ind = entry_df[entry_df['entry_ID'] == hs_i].index.values[0]\n", " \n", - " if entry_df.at[ind, 'AC number'] == -1:\n", + " if (entry_df.at[ind, 'AC number'] == -1) or (entry_df.at[ind, 'AC number'] is None):\n", " entry_df.at[ind, 'AC number'] = [ac_num]\n", " entry_df.at[ind, 'AC cert'] = [hs_c]\n", " else:\n", @@ -2341,7 +2474,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 245, "id": "a21f6db7-19c0-4bf4-a185-d94c647750a2", "metadata": { "tags": [] @@ -2353,7 +2486,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 246, "id": "3aa2e989-255e-42d9-aa9f-986772b478cc", "metadata": { "tags": [] @@ -2363,9 +2496,109 @@ "entry_df = pd.read_excel('data/wip_BE_data/entry_df_WIP.xlsx', index_col=0)" ] }, + { + "cell_type": "markdown", + "id": "0e94b63a-228e-44ae-a8b4-9770ae5fba36", + "metadata": {}, + "source": [ + "## Finalize BE data" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "id": "e7223020-e040-4601-823d-19f219373e00", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "no_BE_matches = pd.read_excel('../Daten/Katalogabgleich/Ausgefüllt/Ausgefüllt_no_BE.xlsx')\n", + "has_hw_matches = no_BE_matches.dropna(subset='control')" + ] + }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 248, + "id": "69ee2de7-a0e1-4f4c-b154-99931abb9765", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "826\n" + ] + } + ], + "source": [ + "def get_matches_from_entry(s):\n", + " certain_match = re.compile('\\d{1,4}\\Z|\\d{1,4},')\n", + " certs = certain_match.findall(s)\n", + " uncertain_match = re.compile('\\d{1,4}\\?')\n", + " uncerts = uncertain_match.findall(s)\n", + " clean_certs = [s.replace(',', '') for s in certs]\n", + " clean_uncerts = [s.replace('?', '') for s in uncerts]\n", + " return clean_certs, clean_uncerts\n", + "\n", + "dod_id = {\n", + " '14.376': 51202, \n", + " '14.377': 51184,\n", + " '14.378': 51219\n", + "}\n", + "\n", + "repl = 0\n", + "\n", + "for i, entry in has_hw_matches.iterrows():\n", + " BE_id = entry['input_id']\n", + " man_match = str(entry['control'])\n", + " certain, uncertain = get_matches_from_entry(man_match)\n", + " match_dict = {\n", + " 'hs. Katalog': 1,\n", + " 'hs. Katalog Konfidenz': [],\n", + " 'hs. Katalogband': [],\n", + " 'hs. Katalogseite Digitalisat': [],\n", + " 'Wissensklasse': [],\n", + " 'Wissensunterklasse': [],\n", + " 'Formatangabe': [],\n", + " 'hs. Katalogseite Handschrift': [],\n", + " 'hs. Katalogeintrag ID': [],\n", + " 'hs. Katalogeintrag': [],\n", + " 'hs. Katalog Image URL': []\n", + " }\n", + " for m in certain:\n", + " match_dict['hs. Katalog Konfidenz'] += ['sicher']\n", + " for m in uncertain:\n", + " match_dict['hs. Katalog Konfidenz'] += ['unsicher']\n", + " \n", + " all_matches = certain + uncertain\n", + " repl += len(all_matches)\n", + " \n", + " for m in all_matches:\n", + " hw_entry = entry_df.loc[int(m)]\n", + " match_dict['hs. Katalogband'] += [str(hw_entry['volume'])]\n", + " match_dict['hs. Katalogseite Digitalisat'] += [str(hw_entry['page number'])]\n", + " match_dict['Wissensklasse'] += [hw_entry['category']]\n", + " match_dict['Wissensunterklasse'] += [hw_entry['subcategory'] if not pd.isna(hw_entry['subcategory']) else '']\n", + " match_dict['Formatangabe'] += [hw_entry['format'] if not pd.isna(hw_entry['format']) else '']\n", + " match_dict['hs. Katalogseite Handschrift'] += [hw_entry['handwritten page number']]\n", + " match_dict['hs. Katalogeintrag ID'] += [hw_entry['entry_ID']]\n", + " match_dict['hs. Katalogeintrag'] += [hw_entry['entry']]\n", + " match_dict['hs. Katalog Image URL'] += [f\"https://iiif.onb.ac.at/images/DOD/{dod_id[str(hw_entry['volume'])]}/{hw_entry['page number']:08}.jp2/full/full/0/native.jpg\"]\n", + " \n", + " for key, val in match_dict.items():\n", + " if key != 'hs. Katalog':\n", + " val = ' | '.join(val)\n", + " BE_df.at[BE_id, key] = val\n", + "\n", + "print(repl)" + ] + }, + { + "cell_type": "code", + "execution_count": 253, "id": "0746de2c-3343-48b1-b921-4215f594ab79", "metadata": { "tags": [] @@ -2378,7 +2611,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 254, "id": "7a448905-e284-44b9-bdb1-38710466b341", "metadata": { "tags": [] @@ -2484,7 +2717,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 272, "id": "ae4d5368-bda2-4cdf-a170-f7e0eda103c3", "metadata": { "tags": [] @@ -2518,7 +2751,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 273, "id": "b73227d4-a0f1-4113-ad2c-093507aaf3a4", "metadata": { "tags": [] @@ -2563,7 +2796,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 274, "id": "ebb4894b-76db-4146-b62a-86c30f2f8609", "metadata": { "tags": [] @@ -2583,7 +2816,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 275, "id": "ba37d08d-f064-4ba0-aaf5-d33c9893bb1c", "metadata": { "tags": [] @@ -2620,7 +2853,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 276, "id": "629a49b3-75c7-4e3d-b3aa-5c6165c32e06", "metadata": { "tags": [] @@ -2643,7 +2876,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 277, "id": "ec90629a-8afb-4a65-b6bb-782830fd8bcf", "metadata": { "tags": [] @@ -2700,7 +2933,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 278, "id": "bd52a621-e46c-49d8-a5c5-a243ee85fba4", "metadata": { "tags": [] @@ -2725,7 +2958,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 279, "id": "a4a7bc48-ec82-4b67-b0dd-055d27dd008e", "metadata": { "tags": [] @@ -2740,7 +2973,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 280, "id": "710a147b-ea34-47ca-a381-5b477d8f12ab", "metadata": { "tags": [] @@ -2757,7 +2990,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 281, "id": "e7e81cbf-e473-4ae4-b846-2aea6b2e5c81", "metadata": { "tags": [] @@ -2782,7 +3015,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 282, "id": "de097669-9180-4510-a9ac-d215433a7fca", "metadata": { "tags": [] @@ -2805,7 +3038,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 283, "id": "4687407c-d205-4aab-8c86-fafa73b997b9", "metadata": { "tags": [] @@ -2833,7 +3066,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 284, "id": "32240f6b-1ecc-47d1-8f2d-0cd14f899b3d", "metadata": { "tags": [] @@ -2860,7 +3093,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 285, "id": "55b131a0-58aa-4eb1-ba47-92b3bb46ffaa", "metadata": { "tags": [] @@ -2896,76 +3129,12 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 286, "id": "a50d2147-4ee5-4d6b-a950-b4aca1f5e5ca", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "<listChange>\n", - "<change when=\"2022-12-12\" who=\"#AT\">\n", - " Import of facsimiles to Transkribus\n", - " </change>\n", - "<change from=\"2022-12-13\" to=\"2023-01-26\" who=\"#AT #SM #MK\">\n", - " Manual transcription of pages to be used as Ground truth for training process in Transkribus\n", - " </change>\n", - "<change from=\"2023-02-20\" to=\"2023-02-28\" who=\"#SM\">\n", - " Automated transcription of the first three volumes Cod. 14.376-14.378\n", - " </change>\n", - "<change from=\"2023-03-01\" to=\"2023-12-13\" who=\"#AT #SM #MK #GF #PE #TG #AR\">\n", - " Correction of automated transcripts in Transkribus, adding custom tags (add, corr, bleistift, roetel, continued, comment)\n", - " </change>\n", - "<change when=\"2023-12-18\" who=\"#SM\">\n", - " Export from Transkribus to TEI-XML\n", - " </change>\n", - "<change from=\"2024-01-18\" to=\"2024-10-10\" who=\"#SM\">\n", - "Layout analysis of pages to obtain entries. Create header for all pages with knowledge classes, page number, and format specification if applicable. Added matching data for entries connecting them to the modern catalog. Add tags for n signature.\n", - "</change></listChange>\n", - "<listChange>\n", - "<change when=\"2022-12-12\" who=\"#AT\">\n", - " Import of facsimiles to Transkribus\n", - " </change>\n", - "<change from=\"2022-12-13\" to=\"2023-01-26\" who=\"#AT #SM #MK\">\n", - " Manual transcription of pages to be used as Ground truth for training process in Transkribus\n", - " </change>\n", - "<change from=\"2023-02-20\" to=\"2023-02-28\" who=\"#SM\">\n", - " Automated transcription of the first three volumes Cod. 14.376-14.378\n", - " </change>\n", - "<change from=\"2023-03-01\" to=\"2023-12-13\" who=\"#AT #SM #MK #GF #PE #TG #AR\">\n", - " Correction of automated transcripts in Transkribus, adding custom tags (add, corr, bleistift, roetel, continued, comment)\n", - " </change>\n", - "<change when=\"2023-12-18\" who=\"#SM\">\n", - " Export from Transkribus to TEI-XML\n", - " </change>\n", - "<change from=\"2024-01-18\" to=\"2024-10-10\" who=\"#SM\">\n", - "Layout analysis of pages to obtain entries. Create header for all pages with knowledge classes, page number, and format specification if applicable. Added matching data for entries connecting them to the modern catalog. Add tags for n signature.\n", - "</change></listChange>\n", - "<listChange>\n", - "<change when=\"2022-12-12\" who=\"#AT\">\n", - " Import of facsimiles to Transkribus\n", - " </change>\n", - "<change from=\"2022-12-13\" to=\"2023-01-26\" who=\"#AT #SM #MK\">\n", - " Manual transcription of pages to be used as Ground truth for training process in Transkribus\n", - " </change>\n", - "<change from=\"2023-02-20\" to=\"2023-02-28\" who=\"#SM\">\n", - " Automated transcription of the first three volumes Cod. 14.376-14.378\n", - " </change>\n", - "<change from=\"2023-03-01\" to=\"2023-12-13\" who=\"#AT #SM #MK #GF #PE #TG #AR\">\n", - " Correction of automated transcripts in Transkribus, adding custom tags (add, corr, bleistift, roetel, continued, comment)\n", - " </change>\n", - "<change when=\"2023-12-18\" who=\"#SM\">\n", - " Export from Transkribus to TEI-XML\n", - " </change>\n", - "<change from=\"2024-01-18\" to=\"2024-10-10\" who=\"#SM\">\n", - "Layout analysis of pages to obtain entries. Create header for all pages with knowledge classes, page number, and format specification if applicable. Added matching data for entries connecting them to the modern catalog. Add tags for n signature.\n", - "</change></listChange>\n" - ] - } - ], + "outputs": [], "source": [ "for key in tei:\n", " change = bs('''<change from=\"2024-01-18\" to=\"2024-10-10\" who=\"#SM\">\n", @@ -2986,7 +3155,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 288, "id": "ee793bf1-5a43-4715-8b2b-b5780d6fd8b3", "metadata": { "tags": [] @@ -3011,7 +3180,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 289, "id": "092825b9-9755-4658-8fd5-2183926ed981", "metadata": { "tags": [] @@ -3069,7 +3238,7 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 290, "id": "63477b16-cda4-4eea-a27d-4ec0c5ceea6c", "metadata": { "tags": [] @@ -3144,7 +3313,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5850f6ad-c2af-40f5-b4f6-0e1a3329d6d2", + "id": "c43a4852-4c90-4ba3-bf14-70e1688c0d74", "metadata": {}, "outputs": [], "source": [] -- GitLab