diff --git a/Daten/Vorhersagen/BE_final.xlsx b/Daten/Vorhersagen/BE_final.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d9db3718bfecc713720c8615353b068e2a5eb420 Binary files /dev/null and b/Daten/Vorhersagen/BE_final.xlsx differ diff --git a/Daten/Vorhersagen/Complete_BE.xlsx b/Daten/Vorhersagen/Complete_BE.xlsx deleted file mode 100644 index 3b9acd1d3aaf0e3ac2b33f7fe96155a45ace5e38..0000000000000000000000000000000000000000 Binary files a/Daten/Vorhersagen/Complete_BE.xlsx and /dev/null differ diff --git a/Daten/Vorhersagen/Complete_BE_Years.xlsx b/Daten/Vorhersagen/Complete_BE_Years.xlsx deleted file mode 100644 index c8878cbf852d9de05ae899f25836c6e04acd2113..0000000000000000000000000000000000000000 Binary files a/Daten/Vorhersagen/Complete_BE_Years.xlsx and /dev/null differ diff --git a/Daten/Vorhersagen/Complete_BE_Years_Places.xlsx b/Daten/Vorhersagen/Complete_BE_Years_Places.xlsx deleted file mode 100644 index 53d6ad84f33f4ad36672f82cab2b9131433d1bf5..0000000000000000000000000000000000000000 Binary files a/Daten/Vorhersagen/Complete_BE_Years_Places.xlsx and /dev/null differ diff --git a/Daten/Vorhersagen/WIP_complete_BE.xlsx b/Daten/Vorhersagen/WIP_complete_BE.xlsx deleted file mode 100644 index 2fccd99211ca209864bddc055f445ba5f50ad799..0000000000000000000000000000000000000000 Binary files a/Daten/Vorhersagen/WIP_complete_BE.xlsx and /dev/null differ diff --git a/Daten/Vorhersagen/WIP_final_BE.xlsx b/Daten/Vorhersagen/WIP_final_BE.xlsx deleted file mode 100644 index b3e7112058821332c99f6a681a7d29eeaf5ddff7..0000000000000000000000000000000000000000 Binary files a/Daten/Vorhersagen/WIP_final_BE.xlsx and /dev/null differ diff --git a/Daten/Vorhersagen/WIP_final_BE_2.xlsx b/Daten/Vorhersagen/WIP_final_BE_2.xlsx deleted file mode 100644 index e8c2c9b921daf437aeefea071a327fd37fbc9a2c..0000000000000000000000000000000000000000 Binary files a/Daten/Vorhersagen/WIP_final_BE_2.xlsx and /dev/null differ diff --git a/Daten/Vorhersagen/WIP_final_BE_3.xlsx b/Daten/Vorhersagen/WIP_final_BE_3.xlsx deleted file mode 100644 index 1052487927814fa10aae0202113c456fd09eed02..0000000000000000000000000000000000000000 Binary files a/Daten/Vorhersagen/WIP_final_BE_3.xlsx and /dev/null differ diff --git a/Daten/Vorhersagen/WIP_final_BE_4.xlsx b/Daten/Vorhersagen/WIP_final_BE_4.xlsx deleted file mode 100644 index 5f52703952a91115560bb9f3a53243529d2541b8..0000000000000000000000000000000000000000 Binary files a/Daten/Vorhersagen/WIP_final_BE_4.xlsx and /dev/null differ diff --git a/Daten/Vorhersagen/hw_entries_final.xlsx b/Daten/Vorhersagen/hw_entries_final.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b07c247e7079769424f0c292b1783a82d7b9382b Binary files /dev/null and b/Daten/Vorhersagen/hw_entries_final.xlsx differ diff --git a/Notebooks/XML_Aufbereitung.ipynb b/Notebooks/XML_Aufbereitung.ipynb index d3a2c7e13250cc9822f9756641a3fe90be643968..d49832591909af3da576b62201533ab4df1278c1 100644 --- a/Notebooks/XML_Aufbereitung.ipynb +++ b/Notebooks/XML_Aufbereitung.ipynb @@ -2596,6 +2596,287 @@ "print(repl)" ] }, + { + "cell_type": "code", + "execution_count": 295, + "id": "10e58748-acb8-48c7-b4c7-a1953f2a70a4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "final_BE_df = BE_df.drop(['dup_title', 'copy_from'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 306, + "id": "ee4e5056-d53d-4288-ba06-41af7e6cd3d4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "final_BE_df.to_excel('../Daten/Vorhersagen/BE_final.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": 311, + "id": "32fc7d42-2093-4a54-9003-00b4a03e9558", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>volume</th>\n", + " <th>page number</th>\n", + " <th>category</th>\n", + " <th>subcategory</th>\n", + " <th>format</th>\n", + " <th>handwritten page number</th>\n", + " <th>entry_ID</th>\n", + " <th>entry</th>\n", + " <th>AC numbers</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>14.376</td>\n", + " <td>25</td>\n", + " <td>Theologia</td>\n", + " <td>Textus & Versiones Sacræ Scripturæ</td>\n", + " <td>Folio</td>\n", + " <td>1</td>\n", + " <td>14.376_025_00</td>\n", + " <td>1 Biblia Sacra Polyglotta curis Cardinalis Xim...</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>14.376</td>\n", + " <td>25</td>\n", + " <td>Theologia</td>\n", + " <td>Textus & Versiones Sacræ Scripturæ</td>\n", + " <td>Folio</td>\n", + " <td>1</td>\n", + " <td>14.376_025_01</td>\n", + " <td>2 Biblia Sacra Polyglotta Philippi II. Regis C...</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>14.376</td>\n", + " <td>25</td>\n", + " <td>Theologia</td>\n", + " <td>Textus & Versiones Sacræ Scripturæ</td>\n", + " <td>Folio</td>\n", + " <td>1</td>\n", + " <td>14.376_025_02</td>\n", + " <td>3 Biblia Sacra Polyglotta Studio & curâ Briani...</td>\n", + " <td>[(AC09762517, sicher)]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>14.376</td>\n", + " <td>25</td>\n", + " <td>Theologia</td>\n", + " <td>Textus & Versiones Sacræ Scripturæ</td>\n", + " <td>Folio</td>\n", + " <td>1</td>\n", + " <td>14.376_025_03</td>\n", + " <td>4 Biblia Sacra Latina Moguntina dicta, prima o...</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>14.376</td>\n", + " <td>25</td>\n", + " <td>Theologia</td>\n", + " <td>Textus & Versiones Sacræ Scripturæ</td>\n", + " <td>Folio</td>\n", + " <td>1</td>\n", + " <td>14.376_025_04</td>\n", + " <td>5 Biblia Sacra Latina Moguntina, editio altera...</td>\n", + " <td>[(AC07081950, sicher)]</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9398</th>\n", + " <td>14.378</td>\n", + " <td>582</td>\n", + " <td>Imaginum Delineatarum Collectio</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>1402</td>\n", + " <td>14.378_582_00</td>\n", + " <td>CCCXXXI Vn Portefeüilles contenant des Dessein...</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9399</th>\n", + " <td>14.378</td>\n", + " <td>582</td>\n", + " <td>Imaginum Delineatarum Collectio</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>1402</td>\n", + " <td>14.378_582_01</td>\n", + " <td>CCCXXXII Vn Recueil des Portraits peints en m...</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9400</th>\n", + " <td>14.378</td>\n", + " <td>582</td>\n", + " <td>Imaginum Delineatarum Collectio</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>1402</td>\n", + " <td>14.378_582_02</td>\n", + " <td>CCCXXXIII Dix Vol. de Plantes peintes en mini...</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9401</th>\n", + " <td>14.378</td>\n", + " <td>582</td>\n", + " <td>Imaginum Delineatarum Collectio</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>1402</td>\n", + " <td>14.378_582_03</td>\n", + " <td>CCCXXXIV Cinq Vol. d'Oiseaux peints en Miniat...</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9402</th>\n", + " <td>14.378</td>\n", + " <td>582</td>\n", + " <td>Imaginum Delineatarum Collectio</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>1402</td>\n", + " <td>14.378_582_04</td>\n", + " <td>CCCXXXV Divers Portraits, Ceremonies, Marches ...</td>\n", + " <td>-1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>9403 rows × 9 columns</p>\n", + "</div>" + ], + "text/plain": [ + " volume page number category \n", + "0 14.376 25 Theologia \\\n", + "1 14.376 25 Theologia \n", + "2 14.376 25 Theologia \n", + "3 14.376 25 Theologia \n", + "4 14.376 25 Theologia \n", + "... ... ... ... \n", + "9398 14.378 582 Imaginum Delineatarum Collectio \n", + "9399 14.378 582 Imaginum Delineatarum Collectio \n", + "9400 14.378 582 Imaginum Delineatarum Collectio \n", + "9401 14.378 582 Imaginum Delineatarum Collectio \n", + "9402 14.378 582 Imaginum Delineatarum Collectio \n", + "\n", + " subcategory format handwritten page number \n", + "0 Textus & Versiones Sacræ Scripturæ Folio 1 \\\n", + "1 Textus & Versiones Sacræ Scripturæ Folio 1 \n", + "2 Textus & Versiones Sacræ Scripturæ Folio 1 \n", + "3 Textus & Versiones Sacræ Scripturæ Folio 1 \n", + "4 Textus & Versiones Sacræ Scripturæ Folio 1 \n", + "... ... ... ... \n", + "9398 NaN NaN 1402 \n", + "9399 NaN NaN 1402 \n", + "9400 NaN NaN 1402 \n", + "9401 NaN NaN 1402 \n", + "9402 NaN NaN 1402 \n", + "\n", + " entry_ID entry \n", + "0 14.376_025_00 1 Biblia Sacra Polyglotta curis Cardinalis Xim... \\\n", + "1 14.376_025_01 2 Biblia Sacra Polyglotta Philippi II. Regis C... \n", + "2 14.376_025_02 3 Biblia Sacra Polyglotta Studio & curâ Briani... \n", + "3 14.376_025_03 4 Biblia Sacra Latina Moguntina dicta, prima o... \n", + "4 14.376_025_04 5 Biblia Sacra Latina Moguntina, editio altera... \n", + "... ... ... \n", + "9398 14.378_582_00 CCCXXXI Vn Portefeüilles contenant des Dessein... \n", + "9399 14.378_582_01 CCCXXXII Vn Recueil des Portraits peints en m... \n", + "9400 14.378_582_02 CCCXXXIII Dix Vol. de Plantes peintes en mini... \n", + "9401 14.378_582_03 CCCXXXIV Cinq Vol. d'Oiseaux peints en Miniat... \n", + "9402 14.378_582_04 CCCXXXV Divers Portraits, Ceremonies, Marches ... \n", + "\n", + " AC numbers \n", + "0 -1 \n", + "1 -1 \n", + "2 [(AC09762517, sicher)] \n", + "3 -1 \n", + "4 [(AC07081950, sicher)] \n", + "... ... \n", + "9398 -1 \n", + "9399 -1 \n", + "9400 -1 \n", + "9401 -1 \n", + "9402 -1 \n", + "\n", + "[9403 rows x 9 columns]" + ] + }, + "execution_count": 311, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_entry_df = entry_df.drop(['AC number', 'AC cert', 'AC comb', 'AC comb set len'], axis=1)\n", + "final_entry_df.rename(columns={'AC comb set': 'AC numbers'}, inplace=True)\n", + "final_entry_df" + ] + }, + { + "cell_type": "code", + "execution_count": 312, + "id": "83c0b002-6cf5-4677-8b84-a39d8815daa1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "final_entry_df.to_excel('../Daten/Vorhersagen/hw_entries_final.xlsx')" + ] + }, { "cell_type": "code", "execution_count": 253,