diff --git a/Daten/Vorhersagen/Complete_BE.xlsx b/Daten/Vorhersagen/Complete_BE.xlsx index a559b5b4f3e0c8c6f9d07a7cc2c58f4e208502a3..bcb86b3127f0d6bcef2c0a4e84981b6d18f1675b 100644 Binary files a/Daten/Vorhersagen/Complete_BE.xlsx and b/Daten/Vorhersagen/Complete_BE.xlsx differ diff --git a/Notebooks/Remaining_barcodes_and_metadata.ipynb b/Notebooks/Remaining_barcodes_and_metadata.ipynb index 5ff5eb3e1dad5ba2f548725a6544abebdd762390..4fbe68ec9e6c5dd497be56663d93bbf30bb8c74c 100644 --- a/Notebooks/Remaining_barcodes_and_metadata.ipynb +++ b/Notebooks/Remaining_barcodes_and_metadata.ipynb @@ -11,7 +11,7 @@ { "data": { "text/plain": [ - "<contextlib.ExitStack at 0x7f790a6b5ba0>" + "<contextlib.ExitStack at 0x7f531bc16590>" ] }, "execution_count": 1, @@ -3378,6 +3378,296 @@ "final_supralibros.to_csv('data/predictions/supralibros_pred_final.csv', index=False)" ] }, + { + "cell_type": "code", + "execution_count": 4, + "id": "563e1571-ba77-49e1-a886-c5b4537d3231", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "final_supralibros = pd.read_csv('data/predictions/supralibros_pred_final.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "50313913-feb6-428e-af42-5c7f677269af", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>identifier</th>\n", + " <th>filename</th>\n", + " <th>prediction</th>\n", + " <th>man_prediction</th>\n", + " <th>p_A</th>\n", + " <th>p_B</th>\n", + " <th>p_C</th>\n", + " <th>p_N</th>\n", + " <th>color</th>\n", + " <th>man_color</th>\n", + " <th>p_blue</th>\n", + " <th>p_red</th>\n", + " <th>p_yellow</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Z10520809</td>\n", + " <td>Z10520809_00000004.jpg</td>\n", + " <td>C</td>\n", + " <td>N</td>\n", + " <td>0.150160</td>\n", + " <td>0.135397</td>\n", + " <td>0.362168</td>\n", + " <td>0.352275</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Z152357407</td>\n", + " <td>Z152357407_00000003.jpg</td>\n", + " <td>C</td>\n", + " <td>N</td>\n", + " <td>0.002158</td>\n", + " <td>0.001710</td>\n", + " <td>0.781663</td>\n", + " <td>0.214469</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Z152357705</td>\n", + " <td>Z152357705_00000003.jpg</td>\n", + " <td>C</td>\n", + " <td>N</td>\n", + " <td>0.022423</td>\n", + " <td>0.019239</td>\n", + " <td>0.582193</td>\n", + " <td>0.376145</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Z155078001</td>\n", + " <td>Z155078001_00000002.jpg</td>\n", + " <td>A</td>\n", + " <td>N</td>\n", + " <td>0.638692</td>\n", + " <td>0.013483</td>\n", + " <td>0.007171</td>\n", + " <td>0.340654</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Z155916602</td>\n", + " <td>Z155916602_00000001.jpg</td>\n", + " <td>C</td>\n", + " <td>N</td>\n", + " <td>0.002850</td>\n", + " <td>0.001018</td>\n", + " <td>0.934370</td>\n", + " <td>0.061763</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6210</th>\n", + " <td>Z43163301</td>\n", + " <td>Z43163301_00000001.jpg</td>\n", + " <td>B</td>\n", + " <td>B</td>\n", + " <td>0.000872</td>\n", + " <td>0.998139</td>\n", + " <td>0.000583</td>\n", + " <td>0.000406</td>\n", + " <td>red</td>\n", + " <td>red</td>\n", + " <td>0.000331</td>\n", + " <td>0.998473</td>\n", + " <td>0.001197</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6211</th>\n", + " <td>Z43165504</td>\n", + " <td>Z43165504_00000001.jpg</td>\n", + " <td>B</td>\n", + " <td>B</td>\n", + " <td>0.000670</td>\n", + " <td>0.998324</td>\n", + " <td>0.000560</td>\n", + " <td>0.000445</td>\n", + " <td>red</td>\n", + " <td>red</td>\n", + " <td>0.001188</td>\n", + " <td>0.996374</td>\n", + " <td>0.002438</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6212</th>\n", + " <td>Z43169509</td>\n", + " <td>Z43169509_00000001.jpg</td>\n", + " <td>A</td>\n", + " <td>A</td>\n", + " <td>0.995156</td>\n", + " <td>0.001711</td>\n", + " <td>0.002191</td>\n", + " <td>0.000941</td>\n", + " <td>red</td>\n", + " <td>red</td>\n", + " <td>0.000120</td>\n", + " <td>0.999710</td>\n", + " <td>0.000170</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6213</th>\n", + " <td>Z43173306</td>\n", + " <td>Z43173306_00000001.jpg</td>\n", + " <td>B</td>\n", + " <td>B</td>\n", + " <td>0.000573</td>\n", + " <td>0.998986</td>\n", + " <td>0.000175</td>\n", + " <td>0.000266</td>\n", + " <td>red</td>\n", + " <td>red</td>\n", + " <td>0.001646</td>\n", + " <td>0.997385</td>\n", + " <td>0.000969</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6214</th>\n", + " <td>Z95576003</td>\n", + " <td>Z95576003_00000001.jpg</td>\n", + " <td>A</td>\n", + " <td>A</td>\n", + " <td>0.997559</td>\n", + " <td>0.001285</td>\n", + " <td>0.000620</td>\n", + " <td>0.000536</td>\n", + " <td>yellow</td>\n", + " <td>yellow</td>\n", + " <td>0.000414</td>\n", + " <td>0.009923</td>\n", + " <td>0.989663</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6215 rows × 13 columns</p>\n", + "</div>" + ], + "text/plain": [ + " identifier filename prediction man_prediction p_A \n", + "0 Z10520809 Z10520809_00000004.jpg C N 0.150160 \\\n", + "1 Z152357407 Z152357407_00000003.jpg C N 0.002158 \n", + "2 Z152357705 Z152357705_00000003.jpg C N 0.022423 \n", + "3 Z155078001 Z155078001_00000002.jpg A N 0.638692 \n", + "4 Z155916602 Z155916602_00000001.jpg C N 0.002850 \n", + "... ... ... ... ... ... \n", + "6210 Z43163301 Z43163301_00000001.jpg B B 0.000872 \n", + "6211 Z43165504 Z43165504_00000001.jpg B B 0.000670 \n", + "6212 Z43169509 Z43169509_00000001.jpg A A 0.995156 \n", + "6213 Z43173306 Z43173306_00000001.jpg B B 0.000573 \n", + "6214 Z95576003 Z95576003_00000001.jpg A A 0.997559 \n", + "\n", + " p_B p_C p_N color man_color p_blue p_red \n", + "0 0.135397 0.362168 0.352275 NaN NaN NaN NaN \\\n", + "1 0.001710 0.781663 0.214469 NaN NaN NaN NaN \n", + "2 0.019239 0.582193 0.376145 NaN NaN NaN NaN \n", + "3 0.013483 0.007171 0.340654 NaN NaN NaN NaN \n", + "4 0.001018 0.934370 0.061763 NaN NaN NaN NaN \n", + "... ... ... ... ... ... ... ... \n", + "6210 0.998139 0.000583 0.000406 red red 0.000331 0.998473 \n", + "6211 0.998324 0.000560 0.000445 red red 0.001188 0.996374 \n", + "6212 0.001711 0.002191 0.000941 red red 0.000120 0.999710 \n", + "6213 0.998986 0.000175 0.000266 red red 0.001646 0.997385 \n", + "6214 0.001285 0.000620 0.000536 yellow yellow 0.000414 0.009923 \n", + "\n", + " p_yellow \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "6210 0.001197 \n", + "6211 0.002438 \n", + "6212 0.000170 \n", + "6213 0.000969 \n", + "6214 0.989663 \n", + "\n", + "[6215 rows x 13 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_supralibros" + ] + }, { "cell_type": "markdown", "id": "97c9d080-fef4-41a4-b45c-b75785017c48", @@ -3388,7 +3678,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 6, "id": "2c090fa7-f6be-474c-9e20-e92b75418183", "metadata": { "tags": [] @@ -3400,7 +3690,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 7, "id": "e8ea4fe0-dd29-4005-8503-3c3feee93d25", "metadata": { "tags": [] @@ -3412,7 +3702,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 8, "id": "1faa1f80-d284-4c6f-86a7-a1f230576ea3", "metadata": { "tags": [] @@ -3435,7 +3725,7 @@ " dtype='object')" ] }, - "execution_count": 115, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -3446,7 +3736,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 9, "id": "96ae8472-5308-4506-b69f-94650beb5d95", "metadata": { "tags": [] @@ -3458,7 +3748,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 10, "id": "63a33898-5fb0-47b0-8662-f8163131ca3a", "metadata": { "tags": [] @@ -3470,7 +3760,7 @@ }, { "cell_type": "code", - "execution_count": 226, + "execution_count": 11, "id": "b0f03031-5d5f-483f-b7d2-770fdd9f563f", "metadata": { "tags": [] @@ -3479,20 +3769,20 @@ { "data": { "text/plain": [ - "571770 Z165045809\n", - "573009 Z16691330X\n", - "574380 Z16907760X\n", - "574405 Z169084305\n", - "575078 Z169788400\n", - "575506 Z170820001\n", - "577395 Z175251203\n", - "601298 Z251589102\n", - "601303 Z251589205\n", - "601970 Z252861302\n", + "61 Z165045809\n", + "63 Z16691330X\n", + "64 Z16907760X\n", + "65 Z169084305\n", + "66 Z169788400\n", + "67 Z170820001\n", + "68 Z175251203\n", + "70 Z251589102\n", + "71 Z251589205\n", + "72 Z252861302\n", "Name: identifier, dtype: object" ] }, - "execution_count": 226, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -3504,7 +3794,7 @@ }, { "cell_type": "code", - "execution_count": 213, + "execution_count": 12, "id": "bb150371-3990-4a72-8a3c-451478083d54", "metadata": { "tags": [] @@ -3543,7 +3833,7 @@ }, { "cell_type": "code", - "execution_count": 282, + "execution_count": 13, "id": "7b90d595-ad11-477a-b3a8-8662191ec55e", "metadata": { "tags": [] @@ -3552,11 +3842,11 @@ { "data": { "text/plain": [ - "610206 rep_131EA36A\n", + "87 rep_131EA36A\n", "Name: identifier, dtype: object" ] }, - "execution_count": 282, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -3567,7 +3857,7 @@ }, { "cell_type": "code", - "execution_count": 414, + "execution_count": 14, "id": "4a2f570d-fca7-49fb-8f84-c6a6a435a9f7", "metadata": { "tags": [] @@ -3683,7 +3973,7 @@ }, { "cell_type": "code", - "execution_count": 415, + "execution_count": 15, "id": "6f6d2e5e-c056-4d72-b890-7247fb42b7f1", "metadata": { "tags": [] @@ -3692,7 +3982,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3619fbbb5b6349cdbb02ca667bb19359", + "model_id": "64070ce7edf24a94b4f28255b3f6cbd7", "version_major": 2, "version_minor": 0 }, @@ -3713,7 +4003,7 @@ }, { "cell_type": "code", - "execution_count": 416, + "execution_count": 16, "id": "94f3695b-1e69-4501-bf7a-bdc392f34b9e", "metadata": { "tags": [] @@ -4128,7 +4418,7 @@ "[804 rows x 22 columns]" ] }, - "execution_count": 416, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -4142,7 +4432,7 @@ }, { "cell_type": "code", - "execution_count": 417, + "execution_count": 17, "id": "8f74b754-71ef-4386-b398-da991d47c44c", "metadata": { "tags": [] @@ -4464,7 +4754,7 @@ "[8 rows x 22 columns]" ] }, - "execution_count": 417, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -4475,7 +4765,7 @@ }, { "cell_type": "code", - "execution_count": 433, + "execution_count": 18, "id": "03f93d26-69ba-4576-a9d5-eab1658db033", "metadata": { "tags": [] @@ -4495,7 +4785,7 @@ "Name: Signatur, dtype: object" ] }, - "execution_count": 433, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -4514,7 +4804,7 @@ }, { "cell_type": "code", - "execution_count": 142, + "execution_count": 19, "id": "d9d94a3c-03c2-4ab3-a36e-71de47d95563", "metadata": { "tags": [] @@ -4527,7 +4817,7 @@ }, { "cell_type": "code", - "execution_count": 143, + "execution_count": 20, "id": "5b39cc07-2906-4841-ac30-9df293ab2fba", "metadata": { "tags": [] @@ -4550,7 +4840,7 @@ "Name: clean_signature, Length: 252, dtype: object" ] }, - "execution_count": 143, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -4561,7 +4851,7 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 21, "id": "a0f1db41-696c-4716-a006-cef7e4e1b2fb", "metadata": { "tags": [] @@ -4573,7 +4863,7 @@ }, { "cell_type": "code", - "execution_count": 307, + "execution_count": 22, "id": "44e1ca58-6466-43e0-b6f0-68513718477e", "metadata": { "tags": [] @@ -4585,7 +4875,7 @@ }, { "cell_type": "code", - "execution_count": 333, + "execution_count": 23, "id": "e9f62a92-5ccd-4b2e-916a-0bd3430b0867", "metadata": { "tags": [] @@ -4606,7 +4896,7 @@ }, { "cell_type": "code", - "execution_count": 353, + "execution_count": 24, "id": "af90c6f4-0aec-4b5d-80f6-03bdc3b38bd9", "metadata": { "tags": [] @@ -4654,7 +4944,7 @@ }, { "cell_type": "code", - "execution_count": 354, + "execution_count": 25, "id": "a1b09796-5790-48e4-a752-dbff8f074ee5", "metadata": { "tags": [] @@ -4663,7 +4953,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "72b341b99a294462addeb5a872149658", + "model_id": "c070bc33faef43b98bf05ab2f5426f56", "version_major": 2, "version_minor": 0 }, @@ -4684,7 +4974,7 @@ }, { "cell_type": "code", - "execution_count": 406, + "execution_count": 26, "id": "36f0635b-e495-43cc-b821-495d0f89080f", "metadata": { "tags": [] @@ -4696,7 +4986,7 @@ }, { "cell_type": "code", - "execution_count": 395, + "execution_count": 27, "id": "11c58f53-2958-4669-9210-949bcaba6bcf", "metadata": { "tags": [] @@ -4717,7 +5007,7 @@ "dtype: object" ] }, - "execution_count": 395, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -4728,7 +5018,7 @@ }, { "cell_type": "code", - "execution_count": 407, + "execution_count": 28, "id": "70262b42-b0a8-442f-8d42-74e3013c2d22", "metadata": { "tags": [] @@ -4752,7 +5042,7 @@ }, { "cell_type": "code", - "execution_count": 460, + "execution_count": 29, "id": "b7753144-b97d-4263-990c-997845300859", "metadata": { "tags": [] @@ -4960,7 +5250,7 @@ "[225 rows x 7 columns]" ] }, - "execution_count": 460, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -4974,7 +5264,7 @@ }, { "cell_type": "code", - "execution_count": 463, + "execution_count": 30, "id": "584cedc1-0eb7-45d6-9535-e79867f4249b", "metadata": { "tags": [] @@ -5182,7 +5472,7 @@ "[232 rows x 7 columns]" ] }, - "execution_count": 463, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -5201,7 +5491,7 @@ }, { "cell_type": "code", - "execution_count": 465, + "execution_count": 31, "id": "f048ed87-4541-4613-9683-227a820d7321", "metadata": { "tags": [] @@ -5523,7 +5813,7 @@ "[8 rows x 22 columns]" ] }, - "execution_count": 465, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -5534,7 +5824,7 @@ }, { "cell_type": "code", - "execution_count": 468, + "execution_count": 32, "id": "b02e6456-5bf7-4f14-8e53-04e71c701159", "metadata": { "tags": [] @@ -5694,15 +5984,15 @@ " <td>Coloniae</td>\n", " <td>Köln</td>\n", " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", + " <td>14.376</td>\n", + " <td>327.0</td>\n", + " <td>Medicina</td>\n", + " <td>Medicina, Chirurgia, Anatomia, Pharmacia & Chimia</td>\n", + " <td>Quarto</td>\n", + " <td>285</td>\n", + " <td>14.376_327_06</td>\n", + " <td>Nicandri Theriaca & Alexipharmaca cum Scholiis...</td>\n", + " <td>https://iiif.onb.ac.at/images/DOD/51202/000003...</td>\n", " <td>BE5N37</td>\n", " </tr>\n", " <tr>\n", @@ -5742,15 +6032,15 @@ " <td>Aresdorffij Basileae</td>\n", " <td>Basel</td>\n", " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", + " <td>14.376</td>\n", + " <td>99.0</td>\n", + " <td>Theologia</td>\n", + " <td>Theologi Scholastici</td>\n", + " <td>Octavo und kleiner</td>\n", + " <td>75</td>\n", + " <td>14.376_099_04</td>\n", + " <td>Castellionis (Seb.) dialogi IV. de Prædestinat...</td>\n", + " <td>https://iiif.onb.ac.at/images/DOD/51202/000000...</td>\n", " <td>BE8S34</td>\n", " </tr>\n", " <tr>\n", @@ -5766,15 +6056,15 @@ " <td>Rotterdam</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", + " <td>14.377</td>\n", + " <td>272.0</td>\n", + " <td>Philologia</td>\n", + " <td>Dialogi, & Colloquia</td>\n", + " <td>Octavo und kleiner</td>\n", + " <td>668</td>\n", + " <td>14.377_272_04</td>\n", + " <td>Entretiens de Maxime & de Themiste, ou reponse...</td>\n", + " <td>https://iiif.onb.ac.at/images/DOD/51184/000002...</td>\n", " <td>BE8S53</td>\n", " </tr>\n", " <tr>\n", @@ -5790,15 +6080,15 @@ " <td>Paris</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", + " <td>14.378</td>\n", + " <td>122.0</td>\n", + " <td>Historia Nova Europæ</td>\n", + " <td>Historia Franciæ Singularum vel aliquot tempor...</td>\n", + " <td>Octavo und kleiner</td>\n", + " <td>962</td>\n", + " <td>14.378_122_04</td>\n", + " <td>Traité de la Loy Salique, ou les origines & au...</td>\n", + " <td>https://iiif.onb.ac.at/images/DOD/51219/000001...</td>\n", " <td>BE9V41</td>\n", " </tr>\n", " </tbody>\n", @@ -5878,71 +6168,71 @@ "7325 Paris ... NaN \n", "7326 Paris ... NaN \n", "11419 NaN ... 14.378 \n", - "13416 Köln ... NaN \n", + "13416 Köln ... 14.376 \n", "19481 Paris ... NaN \n", - "19851 Basel ... NaN \n", - "19873 NaN ... NaN \n", - "21991 NaN ... NaN \n", + "19851 Basel ... 14.376 \n", + "19873 NaN ... 14.377 \n", + "21991 NaN ... 14.378 \n", "\n", " hs. Katalogseite Digitalisat Wissensklasse \n", "2668 105.0 Theologia \\\n", "7325 NaN NaN \n", "7326 NaN NaN \n", "11419 132.0 Historia Nova Europæ \n", - "13416 NaN NaN \n", + "13416 327.0 Medicina \n", "19481 NaN NaN \n", - "19851 NaN NaN \n", - "19873 NaN NaN \n", - "21991 NaN NaN \n", + "19851 99.0 Theologia \n", + "19873 272.0 Philologia \n", + "21991 122.0 Historia Nova Europæ \n", "\n", - " Wissensunterklasse Formatangabe \n", - "2668 Theologi Morales, Ascetici, & Parænetici Quarto \\\n", - "7325 NaN NaN \n", - "7326 NaN NaN \n", - "11419 Historia Provinciarum & Urbium Galliæ Folio \n", - "13416 NaN NaN \n", - "19481 NaN NaN \n", - "19851 NaN NaN \n", - "19873 NaN NaN \n", - "21991 NaN NaN \n", + " Wissensunterklasse Formatangabe \n", + "2668 Theologi Morales, Ascetici, & Parænetici Quarto \\\n", + "7325 NaN NaN \n", + "7326 NaN NaN \n", + "11419 Historia Provinciarum & Urbium Galliæ Folio \n", + "13416 Medicina, Chirurgia, Anatomia, Pharmacia & Chimia Quarto \n", + "19481 NaN NaN \n", + "19851 Theologi Scholastici Octavo und kleiner \n", + "19873 Dialogi, & Colloquia Octavo und kleiner \n", + "21991 Historia Franciæ Singularum vel aliquot tempor... Octavo und kleiner \n", "\n", " hs. Katalogseite Handschrift hs. Katalogeintrag ID \n", "2668 81 14.376_105_00 \\\n", "7325 NaN NaN \n", "7326 NaN NaN \n", "11419 972 14.378_132_07 \n", - "13416 NaN NaN \n", + "13416 285 14.376_327_06 \n", "19481 NaN NaN \n", - "19851 NaN NaN \n", - "19873 NaN NaN \n", - "21991 NaN NaN \n", + "19851 75 14.376_099_04 \n", + "19873 668 14.377_272_04 \n", + "21991 962 14.378_122_04 \n", "\n", " hs. Katalogeintrag \n", "2668 Clichtovei (Iodoci) de vitâ & moribus Sacerdot... \\\n", "7325 NaN \n", "7326 NaN \n", "11419 1759 Les grandes Chroniques de Bretagne depuis... \n", - "13416 NaN \n", + "13416 Nicandri Theriaca & Alexipharmaca cum Scholiis... \n", "19481 NaN \n", - "19851 NaN \n", - "19873 NaN \n", - "21991 NaN \n", + "19851 Castellionis (Seb.) dialogi IV. de Prædestinat... \n", + "19873 Entretiens de Maxime & de Themiste, ou reponse... \n", + "21991 Traité de la Loy Salique, ou les origines & au... \n", "\n", " hs. Katalog Image URL clean_signature \n", "2668 https://iiif.onb.ac.at/images/DOD/51202/000001... BE10O3 \n", "7325 NaN BE2E4(Vol1) \n", "7326 NaN BE2E4(Vol1) \n", "11419 https://iiif.onb.ac.at/images/DOD/51219/000001... BE4L38 \n", - "13416 NaN BE5N37 \n", + "13416 https://iiif.onb.ac.at/images/DOD/51202/000003... BE5N37 \n", "19481 NaN BE8P1920(Vol2) \n", - "19851 NaN BE8S34 \n", - "19873 NaN BE8S53 \n", - "21991 NaN BE9V41 \n", + "19851 https://iiif.onb.ac.at/images/DOD/51202/000000... BE8S34 \n", + "19873 https://iiif.onb.ac.at/images/DOD/51184/000002... BE8S53 \n", + "21991 https://iiif.onb.ac.at/images/DOD/51219/000001... BE9V41 \n", "\n", "[9 rows x 39 columns]" ] }, - "execution_count": 468, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -5953,7 +6243,7 @@ }, { "cell_type": "code", - "execution_count": 484, + "execution_count": 33, "id": "79ed2426-daa4-46df-b148-c4773234a853", "metadata": { "tags": [] @@ -5969,7 +6259,7 @@ " dtype='object')" ] }, - "execution_count": 484, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -5980,7 +6270,7 @@ }, { "cell_type": "code", - "execution_count": 485, + "execution_count": 34, "id": "07290446-3cde-417a-8ba0-6f10959ea50e", "metadata": { "tags": [] @@ -6003,7 +6293,7 @@ " dtype='object')" ] }, - "execution_count": 485, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -6014,7 +6304,7 @@ }, { "cell_type": "code", - "execution_count": 486, + "execution_count": 35, "id": "6f42f938-db2b-4854-800a-521c6845d545", "metadata": { "tags": [] @@ -6468,7 +6758,7 @@ "[23670 rows x 43 columns]" ] }, - "execution_count": 486, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -6482,7 +6772,7 @@ }, { "cell_type": "code", - "execution_count": 487, + "execution_count": 36, "id": "0c398ac9-cd34-49a2-b6d0-d1d057d33593", "metadata": { "tags": [] @@ -6936,7 +7226,7 @@ "[804 rows x 43 columns]" ] }, - "execution_count": 487, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -6952,7 +7242,7 @@ }, { "cell_type": "code", - "execution_count": 488, + "execution_count": 37, "id": "c4a51434-6356-4d5b-9bb4-7737dc26046a", "metadata": { "tags": [] @@ -6976,7 +7266,7 @@ " dtype='object')" ] }, - "execution_count": 488, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -6987,7 +7277,7 @@ }, { "cell_type": "code", - "execution_count": 493, + "execution_count": 38, "id": "d4d83256-5d0d-433f-b3fa-f7ea01ad4ca8", "metadata": { "tags": [] @@ -6999,19 +7289,19 @@ }, { "cell_type": "code", - "execution_count": 494, + "execution_count": 39, "id": "8f4eab54-4475-4b8e-a4f0-1397290d28f1", "metadata": { "tags": [] }, "outputs": [], "source": [ - "to_be_saved.to_excel('../Daten/Vorhersagen/WIP_complete_BE.xlsx', index=False)" + "to_be_saved.to_excel('../Daten/Vorhersagen/Complete_BE.xlsx', index=False)" ] }, { "cell_type": "code", - "execution_count": 495, + "execution_count": 40, "id": "332be4f7-e0e1-4a7c-9aac-a1f42c1df383", "metadata": { "tags": [] @@ -7107,7 +7397,7 @@ "[1 rows x 22 columns]" ] }, - "execution_count": 495, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -7118,7 +7408,7 @@ }, { "cell_type": "code", - "execution_count": 500, + "execution_count": 41, "id": "ce61797d-72f4-40ee-b69e-d65a463aff6f", "metadata": { "tags": [] @@ -7142,7 +7432,7 @@ " dtype='object')" ] }, - "execution_count": 500, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } diff --git a/Notebooks/String_matching.ipynb b/Notebooks/String_matching.ipynb index de64d01ac66adc779773a85a94bdd23fedcd44a8..52bed3cad30e0b49852d7b33d9a755290c83346f 100644 --- a/Notebooks/String_matching.ipynb +++ b/Notebooks/String_matching.ipynb @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 176, "id": "29ca0dc8-cae7-4f12-bd60-fd74ea6ae5ac", "metadata": { "tags": [] @@ -1449,18 +1449,6 @@ "## Add new matching data to existing catalogue data" ] }, - { - "cell_type": "code", - "execution_count": 171, - "id": "c52f2098-221b-4912-841a-d54e25788143", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "BE_df = pd.read_excel('../Daten/Vorhersagen/WIP_complete_BE.xlsx', index_col=0)" - ] - }, { "cell_type": "code", "execution_count": 172, @@ -1526,7 +1514,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 174, "id": "b8d40359-5eba-49e6-b2a6-3bc37786dd4c", "metadata": { "tags": [] @@ -1565,15 +1553,14 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 175, "id": "4ed3c187-ab7a-419a-8530-386a69143b3d", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# BE_df.to_excel('../Daten/Vorhersagen/Katalogauszug, Vorhersagen und hs. Katalogverbindungen.xlsx', index=False)\n", - "BE_df.to_excel('../Daten/Vorhersagen/Complete_BE.xlsx', index=False)" + "BE_df.to_excel('../Daten/Vorhersagen/Katalogauszug, Vorhersagen und hs. Katalogverbindungen.xlsx', index=False)" ] }, {