diff --git a/Notebooks/Completing_BE_data.ipynb b/Notebooks/Completing_BE_data.ipynb
index 8a66ccc2ff3e08ab034d3609bb7dd5330c2d2c35..54daadf3725cb558d781069660d5f638c57e55cf 100644
--- a/Notebooks/Completing_BE_data.ipynb
+++ b/Notebooks/Completing_BE_data.ipynb
@@ -925,7 +925,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 272,
+   "execution_count": 276,
    "id": "b9ee6630-7122-44d9-9d2f-c8113e5a8e97",
    "metadata": {
     "tags": []
@@ -940,7 +940,7 @@
        "Name: count, dtype: int64"
       ]
      },
-     "execution_count": 272,
+     "execution_count": 276,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -951,7 +951,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 273,
+   "execution_count": 277,
    "id": "175c1147-0063-45e4-8498-39ee390ea7a0",
    "metadata": {
     "tags": []
@@ -967,7 +967,7 @@
        "Name: count, dtype: int64"
       ]
      },
-     "execution_count": 273,
+     "execution_count": 277,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -990,68 +990,497 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 274,
-   "id": "dea71f92-ee6d-474c-b655-6936a86a4586",
-   "metadata": {
-    "tags": []
-   },
+   "execution_count": 278,
+   "id": "bd73ed76-756a-4a6e-b8f6-183ebfe33ae4",
+   "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Signatur</th>\n",
+       "      <th>Barcode</th>\n",
+       "      <th>Titel</th>\n",
+       "      <th>Autor</th>\n",
+       "      <th>Mitwirkender</th>\n",
+       "      <th>Anfang Veröffentlichungsdatum</th>\n",
+       "      <th>Ende Veröffentlichungsdatum</th>\n",
+       "      <th>Veröffentlichungsdatum</th>\n",
+       "      <th>Veröffentlichungsort</th>\n",
+       "      <th>Veröffentlichungsort (normiert)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Wissensunterklasse</th>\n",
+       "      <th>Formatangabe</th>\n",
+       "      <th>hs. Katalogseite Handschrift</th>\n",
+       "      <th>hs. Katalogeintrag ID</th>\n",
+       "      <th>hs. Katalogeintrag</th>\n",
+       "      <th>hs. Katalog Image URL</th>\n",
+       "      <th>dup_title</th>\n",
+       "      <th>copy_from</th>\n",
+       "      <th>Einfache Klassifizierung</th>\n",
+       "      <th>Komplexe Klassifizierung</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>23376</th>\n",
+       "      <td>*28.A.79.(Vol.1)</td>\n",
+       "      <td>Z222907107</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23383</th>\n",
+       "      <td>*28.A.79.(Vol.10)</td>\n",
+       "      <td>Z222908100</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23384</th>\n",
+       "      <td>*28.A.79.(Vol.12)</td>\n",
+       "      <td>Z222908306</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23385</th>\n",
+       "      <td>*28.A.79.(Vol.13)</td>\n",
+       "      <td>Z222908409</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23386</th>\n",
+       "      <td>*28.A.79.(Vol.14)</td>\n",
+       "      <td>Z222908501</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23636</th>\n",
+       "      <td>Ink 9.F.4</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Opera</td>\n",
+       "      <td>Sallustius Crispus, Gaius</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1481.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>23 Dec. 1481</td>\n",
+       "      <td>Venice</td>\n",
+       "      <td>Venedig</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Historia Romana Sæculorum aliquot, præsertim Imperatorum temporibus</td>\n",
+       "      <td>Folio</td>\n",
+       "      <td>825</td>\n",
+       "      <td>14.377_437_08</td>\n",
+       "      <td>1447.........Ejusdem Historia Eadem. Venetiis. 1481.¬ Baptista de Torris. n. 2217. LIII. R. 12.</td>\n",
+       "      <td>https://iiif.onb.ac.at/images/DOD/51184/00000437.jp2/full/full/0/native.jpg</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-3.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22640</th>\n",
+       "      <td>Ink 9.F.5</td>\n",
+       "      <td>1460328-10</td>\n",
+       "      <td>Biblia ; Interpretationes Hebraicorum nominum</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Wild, Leonhard</td>\n",
+       "      <td>1481.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1481</td>\n",
+       "      <td>Venedig</td>\n",
+       "      <td>Venedig</td>\n",
+       "      <td>...</td>\n",
+       "      <td>Textus &amp; Versiones Sacræ Scripturæ</td>\n",
+       "      <td>Folio</td>\n",
+       "      <td>2</td>\n",
+       "      <td>14.376_026_00</td>\n",
+       "      <td>9. Biblia Sacra Latina. Venetiis. 1481. Leonard Wild de Ratisbonâ n. 2302. III. D. 11.</td>\n",
+       "      <td>https://iiif.onb.ac.at/images/DOD/51202/00000026.jp2/full/full/0/native.jpg</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-3.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22650</th>\n",
+       "      <td>SA.71.E.58</td>\n",
+       "      <td>Z252861302</td>\n",
+       "      <td>Dialogue sur la musique des anciens</td>\n",
+       "      <td>Chateauneuf, Francois abbe de</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1725.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1725</td>\n",
+       "      <td>Paris</td>\n",
+       "      <td>Paris</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22651</th>\n",
+       "      <td>SA.71.F.74</td>\n",
+       "      <td>Z252867808</td>\n",
+       "      <td>Friderici Adolfi Lampe De Cymbalis Veterum Libri Tres</td>\n",
+       "      <td>Ember, Paul</td>\n",
+       "      <td>Hase, Cornelius &lt;&lt;von&gt;&gt;; Röell, Herman Alexander</td>\n",
+       "      <td>1703.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1703</td>\n",
+       "      <td>Trajecti Ad Rhenum</td>\n",
+       "      <td>Utrecht</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23439</th>\n",
+       "      <td>SA.73.B.48</td>\n",
+       "      <td>Z25920770X</td>\n",
+       "      <td>Claudii Ptolomaei harmonicorum libri tres. Ex Codd. Mss. Undecim, nunc primum graece editus. Johannes Wallis ... recensuit, ed. (etc.)</td>\n",
+       "      <td>Ptolemaeus, Claudius</td>\n",
+       "      <td>Wallis, Johannes</td>\n",
+       "      <td>1682.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1682</td>\n",
+       "      <td>Oxford</td>\n",
+       "      <td>Oxford</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>22874 rows × 37 columns</p>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "Signatur                                                                                                                                                                                                                                                        BE.1.A.14\n",
-       "Barcode                                                                                                                                                                                                                                                           B998501\n",
-       "Titel                                                                                                                                                                                                                   Columna Cochlis M. Aurelio Antonio Augusto dicata\n",
-       "Autor                                                                                                                                                                                                                                            Bellori, Giovanni Pietro\n",
-       "Mitwirkender                                                                                                                                                                                                                                                          NaN\n",
-       "Anfang Veröffentlichungsdatum                                                                                                                                                                                                                                      1704.0\n",
-       "Ende Veröffentlichungsdatum                                                                                                                                                                                                                                           NaN\n",
-       "Veröffentlichungsdatum                                                                                                                                                                                                                                               1704\n",
-       "Veröffentlichungsort                                                                                                                                                                                                                                                 Roma\n",
-       "Veröffentlichungsort (normiert)                                                                                                                                                                                                                                       Rom\n",
-       "Sprache                                                                                                                                                                                                                                                           Italian\n",
-       "Dateiname                                                                                                                                                                                                                                                             NaN\n",
-       "Wappenklassifizierung                                                                                                                                                                                                                                                 NaN\n",
-       "p_A                                                                                                                                                                                                                                                                   NaN\n",
-       "p_B                                                                                                                                                                                                                                                                   NaN\n",
-       "p_C                                                                                                                                                                                                                                                                   NaN\n",
-       "p_N                                                                                                                                                                                                                                                                   NaN\n",
-       "Farbklassifizierung                                                                                                                                                                                                                                                   NaN\n",
-       "p_blue                                                                                                                                                                                                                                                                NaN\n",
-       "p_red                                                                                                                                                                                                                                                                 NaN\n",
-       "p_yellow                                                                                                                                                                                                                                                              NaN\n",
-       "IIIF Manifest                                                                                                                                                                                                                                                         NaN\n",
-       "hs. Katalog                                                                                                                                                                                                                                                           1.0\n",
-       "hs. Katalog Konfidenz                                                                                                                                                                                                                                     sicher | sicher\n",
-       "hs. Katalogband                                                                                                                                                                                                                                           14.378 | 14.378\n",
-       "hs. Katalogseite Digitalisat                                                                                                                                                                                                                                    371 | 560\n",
-       "Wissensklasse                                                                                                                                                                                                                    Paralipomena Historica | Imagines Incisæ\n",
-       "Wissensunterklasse                                                                                                               Antiquitatis Monumenta, seu Ædificia, Amphitheatri, Obelisci, Statuæ, Gemmæ, Lucernæ, Vasa, &c | Imaginum Romæ in æs incisarum Collectio\n",
-       "Formatangabe                                                                                                                                                                                                                                                     Folio | \n",
-       "hs. Katalogseite Handschrift                                                                                                                                                                                                                                  1203 | 1380\n",
-       "hs. Katalogeintrag ID                                                                                                                                                                                                                       14.378_371_01 | 14.378_560_03\n",
-       "hs. Katalogeintrag                 2342 Columna Cochlis seu Antoniana; vide Imagines Romæ | CLXXIX Columna Cochlis seu Antoniana a Petro S. Bartholo in æs incisa cum I. Petri Bellorij notis. in fol.° chartâ magnâ, formâ oblongâ. Romæ. 1704. Domin. de Rubeis n. 886.\n",
-       "hs. Katalog Image URL                                                                                           https://iiif.onb.ac.at/images/DOD/51219/00000371.jp2/full/full/0/native.jpg | https://iiif.onb.ac.at/images/DOD/51219/00000560.jp2/full/full/0/native.jpg\n",
-       "dup_title                                                                                                                                                                                                                                                           False\n",
-       "copy_from                                                                                                                                                                                                                                                            -1.0\n",
-       "Einfache Klassifizierung                                                                                                                                                                                                                                             True\n",
-       "Komplexe Klassifizierung                                                                                                                                                                                                                                                2\n",
-       "Name: 6, dtype: object"
+       "                Signatur     Barcode   \n",
+       "23376   *28.A.79.(Vol.1)  Z222907107  \\\n",
+       "23383  *28.A.79.(Vol.10)  Z222908100   \n",
+       "23384  *28.A.79.(Vol.12)  Z222908306   \n",
+       "23385  *28.A.79.(Vol.13)  Z222908409   \n",
+       "23386  *28.A.79.(Vol.14)  Z222908501   \n",
+       "...                  ...         ...   \n",
+       "23636          Ink 9.F.4         NaN   \n",
+       "22640          Ink 9.F.5  1460328-10   \n",
+       "22650         SA.71.E.58  Z252861302   \n",
+       "22651         SA.71.F.74  Z252867808   \n",
+       "23439         SA.73.B.48  Z25920770X   \n",
+       "\n",
+       "                                                                                                                                        Titel   \n",
+       "23376                                                                                                       Histoire des ouvrages des scavans  \\\n",
+       "23383                                                                                                       Histoire des ouvrages des scavans   \n",
+       "23384                                                                                                       Histoire des ouvrages des scavans   \n",
+       "23385                                                                                                       Histoire des ouvrages des scavans   \n",
+       "23386                                                                                                       Histoire des ouvrages des scavans   \n",
+       "...                                                                                                                                       ...   \n",
+       "23636                                                                                                                                   Opera   \n",
+       "22640                                                                                           Biblia ; Interpretationes Hebraicorum nominum   \n",
+       "22650                                                                                                     Dialogue sur la musique des anciens   \n",
+       "22651                                                                                   Friderici Adolfi Lampe De Cymbalis Veterum Libri Tres   \n",
+       "23439  Claudii Ptolomaei harmonicorum libri tres. Ex Codd. Mss. Undecim, nunc primum graece editus. Johannes Wallis ... recensuit, ed. (etc.)   \n",
+       "\n",
+       "                               Autor   \n",
+       "23376      Basnage de Beauval, Henri  \\\n",
+       "23383      Basnage de Beauval, Henri   \n",
+       "23384      Basnage de Beauval, Henri   \n",
+       "23385      Basnage de Beauval, Henri   \n",
+       "23386      Basnage de Beauval, Henri   \n",
+       "...                              ...   \n",
+       "23636      Sallustius Crispus, Gaius   \n",
+       "22640                            NaN   \n",
+       "22650  Chateauneuf, Francois abbe de   \n",
+       "22651                    Ember, Paul   \n",
+       "23439           Ptolemaeus, Claudius   \n",
+       "\n",
+       "                                           Mitwirkender   \n",
+       "23376                                               NaN  \\\n",
+       "23383                                               NaN   \n",
+       "23384                                               NaN   \n",
+       "23385                                               NaN   \n",
+       "23386                                               NaN   \n",
+       "...                                                 ...   \n",
+       "23636                                               NaN   \n",
+       "22640                                    Wild, Leonhard   \n",
+       "22650                                               NaN   \n",
+       "22651  Hase, Cornelius <<von>>; Röell, Herman Alexander   \n",
+       "23439                                  Wallis, Johannes   \n",
+       "\n",
+       "      Anfang Veröffentlichungsdatum Ende Veröffentlichungsdatum   \n",
+       "23376                        1687.0                      1709.0  \\\n",
+       "23383                        1687.0                      1709.0   \n",
+       "23384                        1687.0                      1709.0   \n",
+       "23385                        1687.0                      1709.0   \n",
+       "23386                        1687.0                      1709.0   \n",
+       "...                             ...                         ...   \n",
+       "23636                        1481.0                         NaN   \n",
+       "22640                        1481.0                         NaN   \n",
+       "22650                        1725.0                         NaN   \n",
+       "22651                        1703.0                         NaN   \n",
+       "23439                        1682.0                         NaN   \n",
+       "\n",
+       "      Veröffentlichungsdatum Veröffentlichungsort   \n",
+       "23376              1687-1709            Rotterdam  \\\n",
+       "23383              1687-1709            Rotterdam   \n",
+       "23384              1687-1709            Rotterdam   \n",
+       "23385              1687-1709            Rotterdam   \n",
+       "23386              1687-1709            Rotterdam   \n",
+       "...                      ...                  ...   \n",
+       "23636           23 Dec. 1481               Venice   \n",
+       "22640                   1481              Venedig   \n",
+       "22650                   1725                Paris   \n",
+       "22651                   1703   Trajecti Ad Rhenum   \n",
+       "23439                   1682               Oxford   \n",
+       "\n",
+       "      Veröffentlichungsort (normiert)  ...   \n",
+       "23376                       Rotterdam  ...  \\\n",
+       "23383                       Rotterdam  ...   \n",
+       "23384                       Rotterdam  ...   \n",
+       "23385                       Rotterdam  ...   \n",
+       "23386                       Rotterdam  ...   \n",
+       "...                               ...  ...   \n",
+       "23636                         Venedig  ...   \n",
+       "22640                         Venedig  ...   \n",
+       "22650                           Paris  ...   \n",
+       "22651                         Utrecht  ...   \n",
+       "23439                          Oxford  ...   \n",
+       "\n",
+       "                                                        Wissensunterklasse   \n",
+       "23376                                                                  NaN  \\\n",
+       "23383                                                                  NaN   \n",
+       "23384                                                                  NaN   \n",
+       "23385                                                                  NaN   \n",
+       "23386                                                                  NaN   \n",
+       "...                                                                    ...   \n",
+       "23636  Historia Romana Sæculorum aliquot, præsertim Imperatorum temporibus   \n",
+       "22640                                   Textus & Versiones Sacræ Scripturæ   \n",
+       "22650                                                                  NaN   \n",
+       "22651                                                                  NaN   \n",
+       "23439                                                                  NaN   \n",
+       "\n",
+       "      Formatangabe hs. Katalogseite Handschrift  hs. Katalogeintrag ID   \n",
+       "23376          NaN                          NaN                    NaN  \\\n",
+       "23383          NaN                          NaN                    NaN   \n",
+       "23384          NaN                          NaN                    NaN   \n",
+       "23385          NaN                          NaN                    NaN   \n",
+       "23386          NaN                          NaN                    NaN   \n",
+       "...            ...                          ...                    ...   \n",
+       "23636        Folio                          825          14.377_437_08   \n",
+       "22640        Folio                            2          14.376_026_00   \n",
+       "22650          NaN                          NaN                    NaN   \n",
+       "22651          NaN                          NaN                    NaN   \n",
+       "23439          NaN                          NaN                    NaN   \n",
+       "\n",
+       "                                                                                    hs. Katalogeintrag   \n",
+       "23376                                                                                              NaN  \\\n",
+       "23383                                                                                              NaN   \n",
+       "23384                                                                                              NaN   \n",
+       "23385                                                                                              NaN   \n",
+       "23386                                                                                              NaN   \n",
+       "...                                                                                                ...   \n",
+       "23636  1447.........Ejusdem Historia Eadem. Venetiis. 1481.¬ Baptista de Torris. n. 2217. LIII. R. 12.   \n",
+       "22640           9. Biblia Sacra Latina. Venetiis. 1481. Leonard Wild de Ratisbonâ n. 2302. III. D. 11.   \n",
+       "22650                                                                                              NaN   \n",
+       "22651                                                                                              NaN   \n",
+       "23439                                                                                              NaN   \n",
+       "\n",
+       "                                                             hs. Katalog Image URL   \n",
+       "23376                                                                          NaN  \\\n",
+       "23383                                                                          NaN   \n",
+       "23384                                                                          NaN   \n",
+       "23385                                                                          NaN   \n",
+       "23386                                                                          NaN   \n",
+       "...                                                                            ...   \n",
+       "23636  https://iiif.onb.ac.at/images/DOD/51184/00000437.jp2/full/full/0/native.jpg   \n",
+       "22640  https://iiif.onb.ac.at/images/DOD/51202/00000026.jp2/full/full/0/native.jpg   \n",
+       "22650                                                                          NaN   \n",
+       "22651                                                                          NaN   \n",
+       "23439                                                                          NaN   \n",
+       "\n",
+       "       dup_title copy_from  Einfache Klassifizierung  Komplexe Klassifizierung  \n",
+       "23376       True      -1.0                      True                         2  \n",
+       "23383       True      -1.0                      True                         2  \n",
+       "23384       True      -1.0                      True                         2  \n",
+       "23385       True      -1.0                      True                         2  \n",
+       "23386       True      -1.0                      True                         2  \n",
+       "...          ...       ...                       ...                       ...  \n",
+       "23636       True      -3.0                      True                         2  \n",
+       "22640       True      -3.0                      True                         2  \n",
+       "22650      False      -1.0                      True                         2  \n",
+       "22651      False      -1.0                      True                         2  \n",
+       "23439      False      -1.0                      True                         2  \n",
+       "\n",
+       "[22874 rows x 37 columns]"
       ]
      },
-     "execution_count": 274,
+     "execution_count": 278,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "BE_with_Ink_df.loc[6]"
+    "BE_with_Ink_df"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bd73ed76-756a-4a6e-b8f6-183ebfe33ae4",
+   "id": "51e2acf9-1a2b-4503-8423-c091a5244d9b",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/Notebooks/String_matching.ipynb b/Notebooks/String_matching.ipynb
index 6b78eec15e66bad8db41f7445b067ef82889f0d8..bffe310dc69e2fcd762b30381f16e9435f5025b9 100644
--- a/Notebooks/String_matching.ipynb
+++ b/Notebooks/String_matching.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 1,
    "id": "7a3837ac-cced-4e01-bf57-265e40729692",
    "metadata": {
     "tags": []
@@ -24,7 +24,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 2,
    "id": "29ca0dc8-cae7-4f12-bd60-fd74ea6ae5ac",
    "metadata": {
     "tags": []
@@ -36,7 +36,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 3,
    "id": "c1e1c42a-962f-40bc-bb17-b62e8089feb7",
    "metadata": {
     "tags": []
@@ -48,7 +48,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 4,
    "id": "50d15898-4687-46b7-b7e0-528d7cf9aec0",
    "metadata": {
     "tags": []
@@ -70,7 +70,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 5,
    "id": "990dfeee-1141-4acb-8a3d-a7af0573f5be",
    "metadata": {
     "tags": []
@@ -85,7 +85,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 6,
    "id": "bcd301fe-cb80-4b1c-b65f-465fce5ed915",
    "metadata": {
     "tags": []
@@ -104,7 +104,7 @@
        "        0.0102726686745882]], dtype=object)"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -115,7 +115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 8,
    "id": "c0f4a42a-7e21-41e8-833c-2dd2f9d1985e",
    "metadata": {
     "tags": []
@@ -125,7 +125,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "6\n"
+      "3\n"
      ]
     },
     {
@@ -162,124 +162,68 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>192</th>\n",
+       "      <th>133</th>\n",
        "      <td>14.376</td>\n",
-       "      <td>58</td>\n",
+       "      <td>45</td>\n",
        "      <td>Theologia</td>\n",
-       "      <td>Concilia, &amp; quæ ad eamdem Rem pertinentia</td>\n",
-       "      <td>Folio</td>\n",
-       "      <td>34</td>\n",
-       "      <td>14.376_058_01</td>\n",
-       "      <td>64 Piccolomini (æneæ Sylvij) Fasciculus expetendarum ac fugiendarum rerum, Seu comment. de Concilij Basiliensis gestis; insunt præterea huic operi aliquot epistolæ, libelli, tractatus, &amp; opuscula quæ, si futurum Concilium celebrari contigerit, summopere tanquam¬ cognitu digna &amp; necessaria ad optimis expostulabunt. omnia ab Orthuio Gratio edita. Coloniæ. 1535. n. 168.</td>\n",
-       "      <td>Piccolomini aeneae Sylvij Fasciculus expetendarum ac fugiendarum rerum Seu comment de Concilij Basiliensis gestis insunt praeterea huic operi aliquot epistolae libelli tractatus &amp; opuscula quae si futurum Concilium celebrari contigerit summopere tanquam cognitu digna &amp; necessaria ad optimis expostulabunt omnia ab Orthuio Gratio edita Coloniae 1535 n 168</td>\n",
+       "      <td>Critici Sacri</td>\n",
+       "      <td>Quarto</td>\n",
+       "      <td>21</td>\n",
+       "      <td>14.376_045_00</td>\n",
+       "      <td>Goësij (Willhelmi) Pilatus judex; cui accedunt¬ Theologi cujusdam in Pilatum judicem Stricturæ, cum ejusdem Goësij notis &amp; animadversionibus.¬ Hagæ Comitis. 1677. Ioan. Tongerloo. n. 200.</td>\n",
+       "      <td>Goesij Willhelmi Pilatus judex cui accedunt Theologi cujusdam in Pilatum judicem Stricturae cum ejusdem Goesij notis &amp; animadversionibus Hagae Comitis 1677 Ioan Tongerloo n 200</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>193</th>\n",
+       "      <th>338</th>\n",
        "      <td>14.376</td>\n",
-       "      <td>58</td>\n",
+       "      <td>85</td>\n",
        "      <td>Theologia</td>\n",
-       "      <td>Concilia, &amp; quæ ad eamdem Rem pertinentia</td>\n",
-       "      <td>Folio</td>\n",
-       "      <td>34</td>\n",
-       "      <td>14.376_058_02</td>\n",
-       "      <td>65.......... Idem Fasciculus ab innumeris mendis expurgat. vna cum appendice, seu tomo 2.° scriptorum veterum qui Ecclesiæ Rom. abusus ac errores detegunt &amp; damnant, necessitatemque reformationis vrgent. opera &amp; Studio Edwardi Brown. 2 Vol. Lond. 1690. Rich. Chiswel. n. 169.</td>\n",
-       "      <td>Idem Fasciculus ab innumeris mendis expurgat vna cum appendice seu tomo 2° scriptorum veterum qui Ecclesiae Rom abusus ac errores detegunt &amp; damnant necessitatemque reformationis vrgent opera &amp; Studio Edwardi Brown 2 Vol Lond 1690 Rich Chiswel n 169</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4585</th>\n",
-       "      <td>14.377</td>\n",
-       "      <td>221</td>\n",
-       "      <td>Philologia</td>\n",
-       "      <td>Epistolographi Græci &amp; Latini</td>\n",
+       "      <td>Sanctissimi Patres Latini</td>\n",
        "      <td>Octavo und kleiner</td>\n",
-       "      <td>617</td>\n",
-       "      <td>14.377_221_00</td>\n",
-       "      <td>Fasciculus Latinarum Epistolarum Ludovici Molinæi cum interpretatione gallicâ. 12.° Eleutheropoli. 1676. n. 1589.</td>\n",
-       "      <td>Fasciculus Latinarum Epistolarum Ludovici Molinaei cum interpretatione gallica 12° Eleutheropoli 1676 n 1589</td>\n",
+       "      <td>61</td>\n",
+       "      <td>14.376_085_04</td>\n",
+       "      <td>S. Augustin de la veritable Religion et des moeurs de L'Eglise catholique trad. en Franç. (par M. Dubois) 8.° Paris. 1694. Louis Guerin. n. 200</td>\n",
+       "      <td>S Augustin de la veritable Religion et des moeurs de L'Eglise catholique trad en Franç par M Dubois 8° Paris 1694 Louis Guerin n 200</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5137</th>\n",
-       "      <td>14.377</td>\n",
-       "      <td>319</td>\n",
-       "      <td>Chronologia</td>\n",
-       "      <td>Chronologia Technica &amp; Historica</td>\n",
-       "      <td>Folio</td>\n",
-       "      <td>711</td>\n",
-       "      <td>14.377_319_00</td>\n",
-       "      <td>1170 Fasciculus Temporum. editio antiqua, sine Loco &amp; Venetijs 1480 XLVIII.R.19 anno. n. 2160. et S. A. n. 2324. XLVIII.R.18</td>\n",
-       "      <td>Fasciculus Temporum editio antiqua sine Loco &amp; Venetijs 1480 XLVIIIR19 anno n 2160 et S A n 2324 XLVIIIR18</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5179</th>\n",
-       "      <td>14.377</td>\n",
-       "      <td>327</td>\n",
-       "      <td>Chronologia</td>\n",
-       "      <td>Chronographi, Seu Chronica, &amp; Historiæ Universales</td>\n",
+       "      <th>930</th>\n",
+       "      <td>14.376</td>\n",
+       "      <td>180</td>\n",
+       "      <td>Iurisprudentia</td>\n",
+       "      <td>Ius Civile, Publicum, &amp; Municipale</td>\n",
        "      <td>Folio</td>\n",
-       "      <td>719</td>\n",
-       "      <td>14.377_327_03</td>\n",
-       "      <td>1192 Chronica qua dicitur Fasciculus temporum per quemdam Carthusiensem edita, nunc emendata cum additionibus ad hæc usque tempora. Venetiis. 1480. Erhardus Ratdolt.</td>\n",
-       "      <td>Chronica qua dicitur Fasciculus temporum per quemdam Carthusiensem edita nunc emendata cum additionibus ad haec usque tempora Venetiis 1480 Erhardus Ratdolt</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8487</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>448</td>\n",
-       "      <td>Paralipomena Historica</td>\n",
-       "      <td>De Academiis, Universitatibus, Scholis, Colleg. &amp;c</td>\n",
-       "      <td>Quarto</td>\n",
-       "      <td>1280</td>\n",
-       "      <td>14.378_448_02</td>\n",
-       "      <td>Omeisij (Magni Dan.) Academiæ Altdorfinæ Gloria, Sive Orationum Fasciculus, Vniversitatis Noricæ Ortus &amp; Progressus, omniumque ipsius Professorum Vitæ &amp; Scripta; accedunt I. Pauli Felwingeri additamenta quædam. Altdorfi. 1683. Meyerus. n. 1600. Fasti Consolari dell'Academia Fiorentina da Salvino Salvini. in Firenze. 1717. Tartini. n. 2151 Dissertationes Ioannis Melchioris Schwimmer de¬ Academicis omnium Facultatum Professoribus. Ienæ. 1671. Mullerus. n. 2152.</td>\n",
-       "      <td>Omeisij Magni Dan Academiae Altdorfinae Gloria Sive Orationum Fasciculus Vniversitatis Noricae Ortus &amp; Progressus omniumque ipsius Professorum Vitae &amp; Scripta accedunt I Pauli Felwingeri additamenta quaedam Altdorfi 1683 Meyerus n 1600 Fasti Consolari dell'Academia Fiorentina da Salvino Salvini in Firenze 1717 Tartini n 2151 Dissertationes Ioannis Melchioris Schwimmer de Academicis omnium Facultatum Professoribus Ienae 1671 Mullerus n 2152</td>\n",
+       "      <td>148</td>\n",
+       "      <td>14.376_180_00</td>\n",
+       "      <td>243 Sigonij (Car.) de antiquo Iure Populi Rom. Libri XI. nempè, de antiquo jure Civium Romanorum Libri II. de Iure antiquo Italiæ Libri III. de antiquo Iure¬ Provinciarum Libri III. ac de Iudiciis Libri III. Bononiæ 1574. Societas Typographorum. n. 200.</td>\n",
+       "      <td>Sigonij Car de antiquo Iure Populi Rom Libri XI nempe de antiquo jure Civium Romanorum Libri II de Iure antiquo Italiae Libri III de antiquo Iure Provinciarum Libri III ac de Iudiciis Libri III Bononiae 1574 Societas Typographorum n 200</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "      volume  page number                category   \n",
-       "192   14.376           58               Theologia  \\\n",
-       "193   14.376           58               Theologia   \n",
-       "4585  14.377          221              Philologia   \n",
-       "5137  14.377          319             Chronologia   \n",
-       "5179  14.377          327             Chronologia   \n",
-       "8487  14.378          448  Paralipomena Historica   \n",
+       "     volume  page number        category                         subcategory   \n",
+       "133  14.376           45       Theologia                       Critici Sacri  \\\n",
+       "338  14.376           85       Theologia           Sanctissimi Patres Latini   \n",
+       "930  14.376          180  Iurisprudentia  Ius Civile, Publicum, & Municipale   \n",
        "\n",
-       "                                             subcategory              format   \n",
-       "192            Concilia, & quæ ad eamdem Rem pertinentia               Folio  \\\n",
-       "193            Concilia, & quæ ad eamdem Rem pertinentia               Folio   \n",
-       "4585                       Epistolographi Græci & Latini  Octavo und kleiner   \n",
-       "5137                    Chronologia Technica & Historica               Folio   \n",
-       "5179  Chronographi, Seu Chronica, & Historiæ Universales               Folio   \n",
-       "8487  De Academiis, Universitatibus, Scholis, Colleg. &c              Quarto   \n",
-       "\n",
-       "     handwritten page number       entry_ID   \n",
-       "192                       34  14.376_058_01  \\\n",
-       "193                       34  14.376_058_02   \n",
-       "4585                     617  14.377_221_00   \n",
-       "5137                     711  14.377_319_00   \n",
-       "5179                     719  14.377_327_03   \n",
-       "8487                    1280  14.378_448_02   \n",
+       "                 format handwritten page number       entry_ID   \n",
+       "133              Quarto                      21  14.376_045_00  \\\n",
+       "338  Octavo und kleiner                      61  14.376_085_04   \n",
+       "930               Folio                     148  14.376_180_00   \n",
        "\n",
-       "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                entry   \n",
-       "192                                                                                                 64 Piccolomini (æneæ Sylvij) Fasciculus expetendarum ac fugiendarum rerum, Seu comment. de Concilij Basiliensis gestis; insunt præterea huic operi aliquot epistolæ, libelli, tractatus, & opuscula quæ, si futurum Concilium celebrari contigerit, summopere tanquam¬ cognitu digna & necessaria ad optimis expostulabunt. omnia ab Orthuio Gratio edita. Coloniæ. 1535. n. 168.  \\\n",
-       "193                                                                                                                                                                                               65.......... Idem Fasciculus ab innumeris mendis expurgat. vna cum appendice, seu tomo 2.° scriptorum veterum qui Ecclesiæ Rom. abusus ac errores detegunt & damnant, necessitatemque reformationis vrgent. opera & Studio Edwardi Brown. 2 Vol. Lond. 1690. Rich. Chiswel. n. 169.   \n",
-       "4585                                                                                                                                                                                                                                                                                                                                                                Fasciculus Latinarum Epistolarum Ludovici Molinæi cum interpretatione gallicâ. 12.° Eleutheropoli. 1676. n. 1589.   \n",
-       "5137                                                                                                                                                                                                                                                                                                                                                     1170 Fasciculus Temporum. editio antiqua, sine Loco & Venetijs 1480 XLVIII.R.19 anno. n. 2160. et S. A. n. 2324. XLVIII.R.18   \n",
-       "5179                                                                                                                                                                                                                                                                                                            1192 Chronica qua dicitur Fasciculus temporum per quemdam Carthusiensem edita, nunc emendata cum additionibus ad hæc usque tempora. Venetiis. 1480. Erhardus Ratdolt.   \n",
-       "8487  Omeisij (Magni Dan.) Academiæ Altdorfinæ Gloria, Sive Orationum Fasciculus, Vniversitatis Noricæ Ortus & Progressus, omniumque ipsius Professorum Vitæ & Scripta; accedunt I. Pauli Felwingeri additamenta quædam. Altdorfi. 1683. Meyerus. n. 1600. Fasti Consolari dell'Academia Fiorentina da Salvino Salvini. in Firenze. 1717. Tartini. n. 2151 Dissertationes Ioannis Melchioris Schwimmer de¬ Academicis omnium Facultatum Professoribus. Ienæ. 1671. Mullerus. n. 2152.   \n",
+       "                                                                                                                                                                                                                                                             entry   \n",
+       "133                                                                    Goësij (Willhelmi) Pilatus judex; cui accedunt¬ Theologi cujusdam in Pilatum judicem Stricturæ, cum ejusdem Goësij notis & animadversionibus.¬ Hagæ Comitis. 1677. Ioan. Tongerloo. n. 200.  \\\n",
+       "338                                                                                                                S. Augustin de la veritable Religion et des moeurs de L'Eglise catholique trad. en Franç. (par M. Dubois) 8.° Paris. 1694. Louis Guerin. n. 200   \n",
+       "930  243 Sigonij (Car.) de antiquo Iure Populi Rom. Libri XI. nempè, de antiquo jure Civium Romanorum Libri II. de Iure antiquo Italiæ Libri III. de antiquo Iure¬ Provinciarum Libri III. ac de Iudiciis Libri III. Bononiæ 1574. Societas Typographorum. n. 200.   \n",
        "\n",
-       "                                                                                                                                                                                                                                                                                                                                                                                                                                                    cleaned entry  \n",
-       "192                                                                                           Piccolomini aeneae Sylvij Fasciculus expetendarum ac fugiendarum rerum Seu comment de Concilij Basiliensis gestis insunt praeterea huic operi aliquot epistolae libelli tractatus & opuscula quae si futurum Concilium celebrari contigerit summopere tanquam cognitu digna & necessaria ad optimis expostulabunt omnia ab Orthuio Gratio edita Coloniae 1535 n 168  \n",
-       "193                                                                                                                                                                                                     Idem Fasciculus ab innumeris mendis expurgat vna cum appendice seu tomo 2° scriptorum veterum qui Ecclesiae Rom abusus ac errores detegunt & damnant necessitatemque reformationis vrgent opera & Studio Edwardi Brown 2 Vol Lond 1690 Rich Chiswel n 169  \n",
-       "4585                                                                                                                                                                                                                                                                                                                                                 Fasciculus Latinarum Epistolarum Ludovici Molinaei cum interpretatione gallica 12° Eleutheropoli 1676 n 1589  \n",
-       "5137                                                                                                                                                                                                                                                                                                                                                   Fasciculus Temporum editio antiqua sine Loco & Venetijs 1480 XLVIIIR19 anno n 2160 et S A n 2324 XLVIIIR18  \n",
-       "5179                                                                                                                                                                                                                                                                                                 Chronica qua dicitur Fasciculus temporum per quemdam Carthusiensem edita nunc emendata cum additionibus ad haec usque tempora Venetiis 1480 Erhardus Ratdolt  \n",
-       "8487  Omeisij Magni Dan Academiae Altdorfinae Gloria Sive Orationum Fasciculus Vniversitatis Noricae Ortus & Progressus omniumque ipsius Professorum Vitae & Scripta accedunt I Pauli Felwingeri additamenta quaedam Altdorfi 1683 Meyerus n 1600 Fasti Consolari dell'Academia Fiorentina da Salvino Salvini in Firenze 1717 Tartini n 2151 Dissertationes Ioannis Melchioris Schwimmer de Academicis omnium Facultatum Professoribus Ienae 1671 Mullerus n 2152  "
+       "                                                                                                                                                                                                                                    cleaned entry  \n",
+       "133                                                              Goesij Willhelmi Pilatus judex cui accedunt Theologi cujusdam in Pilatum judicem Stricturae cum ejusdem Goesij notis & animadversionibus Hagae Comitis 1677 Ioan Tongerloo n 200  \n",
+       "338                                                                                                          S Augustin de la veritable Religion et des moeurs de L'Eglise catholique trad en Franç par M Dubois 8° Paris 1694 Louis Guerin n 200  \n",
+       "930  Sigonij Car de antiquo Iure Populi Rom Libri XI nempe de antiquo jure Civium Romanorum Libri II de Iure antiquo Italiae Libri III de antiquo Iure Provinciarum Libri III ac de Iudiciis Libri III Bononiae 1574 Societas Typographorum n 200  "
       ]
      },
-     "execution_count": 23,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -288,7 +232,7 @@
     "def search_in_entry(df, string):\n",
     "    return df[df['cleaned entry'].str.contains(string)]\n",
     "\n",
-    "info = search_in_entry(search_in_entry(entry_df, ''), 'Fasciculus')\n",
+    "info = search_in_entry(search_in_entry(entry_df, ''), 'n 200\\Z')\n",
     "print(len(info))\n",
     "info"
    ]
diff --git a/Notebooks/XML_Aufbereitung.ipynb b/Notebooks/XML_Aufbereitung.ipynb
index 1f4ee4ac9296f64cf0198bf4d07d6bdbc72617c7..27a17b9af590f57be0f4fcdf1a83f53b5d838c90 100644
--- a/Notebooks/XML_Aufbereitung.ipynb
+++ b/Notebooks/XML_Aufbereitung.ipynb
@@ -2,27 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "8a7c8849-b1a3-4f88-b534-cec8b4c13f09",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
-   "source": [
-    "%pip install -r ../requirements.txt -q"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 380,
+   "execution_count": 1,
    "id": "5b24e324-6659-482d-8d82-39c1d604f0d3",
    "metadata": {
     "tags": []
@@ -1599,8 +1579,357 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 403,
-   "id": "4953438b-2426-4827-a6de-51c89e9e3e65",
+   "execution_count": 404,
+   "id": "3a779456-4810-4428-85d7-b0c5277717d8",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "entry_df.to_excel('../Daten/Katalogabgleich/Einträge.xlsx')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98bf1e20-d09c-41d5-8bc3-725a851f6ab3",
+   "metadata": {},
+   "source": [
+    "# Add matches from handwritten catalog to modern catalog into the TEI_XMLs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 158,
+   "id": "0d8fa911-2986-4a4d-af5b-2adf3bddada3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "entry_df = pd.read_excel('../Daten/Katalogabgleich/Einträge.xlsx', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 135,
+   "id": "0a86cfbe-33b3-4a3a-b384-270b5228359a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "BE_df = pd.read_excel('../Daten/Vorhersagen/WIP_final_BE_4.xlsx', index_col=0)\n",
+    "\n",
+    "# drop signature duplicates?!\n",
+    "to_drop = [2292, 13801, 18647]\n",
+    "\n",
+    "BE_df.drop(to_drop, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d0c055e8-59c4-41c7-b0d7-f709c2734e5a",
+   "metadata": {},
+   "source": [
+    "## Download all available IIIF manifests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "id": "2e24b3fe-2908-4d88-8df1-6045815bd4ae",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0e6779a4cc604b988daac74fa308d6c3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/21356 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from tqdm.notebook import tqdm\n",
+    "\n",
+    "urls = BE_df[['IIIF Manifest', 'Barcode']].dropna(subset=['IIIF Manifest'])\n",
+    "\n",
+    "for i, url in tqdm(urls.iterrows(), total=len(urls)):\n",
+    "    filename = url['Barcode'] + '.json'\n",
+    "    man = requests.get(url['IIIF Manifest']).content\n",
+    "    with open(f'data/iiif_manifests/{filename}', 'wb') as fh:\n",
+    "        fh.write(man)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "id": "fdff3c2f-0326-412f-8347-da864f306322",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "21356\n"
+     ]
+    }
+   ],
+   "source": [
+    "import glob\n",
+    "\n",
+    "mans = glob.glob('data/iiif_manifests/*.json')\n",
+    "\n",
+    "print(len(mans))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 138,
+   "id": "a0c928c7-8bc2-4e89-997b-34be5a9d2a54",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6d335c8424de4d169a44b9e1aeeb0079",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/22871 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "def get_AC_num(BE_row):\n",
+    "    bc = BE_row['Barcode']\n",
+    "    fn = f'data/iiif_manifests/{bc}.json'\n",
+    "    try:\n",
+    "        with open(fn, 'r') as fh:\n",
+    "            man = fh.read()\n",
+    "        metadata = json.loads(man)['metadata']\n",
+    "        ac = ''\n",
+    "        for dic in metadata:\n",
+    "            if dic['label'] == 'IDNR':\n",
+    "                ac = dic['value']\n",
+    "        return ac\n",
+    "    except FileNotFoundError as e:\n",
+    "        return ''\n",
+    "\n",
+    "for i, BE_row in tqdm(BE_df.iterrows(), total=len(BE_df)):\n",
+    "    BE_df.at[i, 'AC Nummer'] = get_AC_num(BE_row)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "71d603b3-5ead-4ecd-9039-961325a60973",
+   "metadata": {},
+   "source": [
+    "## Obtain AC number from ASTOR repository via signature"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 210,
+   "id": "28fc3270-5d3e-4b20-a149-37d433afc197",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "BE_no_AC = BE_df[BE_df['AC Nummer'] == '']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 207,
+   "id": "7fd80561-9e5d-498c-b2f9-428e293fd3a2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "16d72df27a6548f6a148f3dd4158d2f2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1515 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
+    "ASTOR_API_KEY = os.getenv('ASTOR_API_KEY')\n",
+    "\n",
+    "for i, BE_row in tqdm(BE_no_AC.iterrows(), total=len(BE_no_AC)):\n",
+    "    sig = BE_row['Signatur']\n",
+    "    sig_esc = sig.replace('.', '\\.').replace('(', '\\(').replace(')', '\\)')\n",
+    "    sig_sru = json.loads(requests.get(f'https://astor.onb.ac.at/discovery/internal/search?query=signature:{sig_esc}&from=1&rows=200&apikey={ASTOR_API_KEY}').content)\n",
+    "    if 'documents' in sig_sru.keys():\n",
+    "        docs = sig_sru['documents']\n",
+    "        for doc in docs:\n",
+    "            if doc['signature'] == sig:\n",
+    "                BE_df.at[i, 'AC Nummer'] = doc['idnr']\n",
+    "                break"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "30500e79-5227-4c21-9825-365bf6992c2a",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Obtain AC number from catalagoue via signature and SRU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 211,
+   "id": "c293013f-b44d-44ec-9529-c40a2863d9f3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7fa10904058e425c9670c9d5f09b1713",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/67 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from lxml import etree\n",
+    "\n",
+    "ns = {\n",
+    "    'srw': 'http://www.loc.gov/zing/srw/',\n",
+    "    'marc': 'http://www.loc.gov/MARC21/slim'\n",
+    "}\n",
+    "\n",
+    "lang_data = pd.read_csv('data/iso-639-3.tab', sep='\\t')\n",
+    "\n",
+    "def english_language_from_code(lang_code):\n",
+    "    find_by_Id = lang_data[lang_data['Id'] == lang_code]\n",
+    "    find_by_Part2b = lang_data[lang_data['Part2b'] == lang_code]\n",
+    "    if len(find_by_Id):\n",
+    "        name = find_by_Id['Ref_Name'].values[0]\n",
+    "    elif len(find_by_Part2b):\n",
+    "        name = find_by_Part2b['Ref_Name'].values[0]\n",
+    "    else:\n",
+    "        name = ''\n",
+    "    return name\n",
+    "\n",
+    "def extract_catalog_data_from_signature(sig):\n",
+    "    metadata_lis = []\n",
+    "    sru = f'https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.accessionNumber%3D%22{sig}%22&operation=searchRetrieve'\n",
+    "    sru_request = requests.get(sru)\n",
+    "    marcxml = sru_request.content\n",
+    "    tree = etree.fromstring(marcxml)\n",
+    "    records = tree.xpath('.//marc:record', namespaces=ns)\n",
+    "    for rec in records:\n",
+    "        metadata = {}\n",
+    "        marc_paths = {\n",
+    "            'Titel': './/marc:datafield[@tag=\"245\"]/marc:subfield[@code=\"a\"]',\n",
+    "            'Autor': './/marc:datafield[@tag=\"100\"]/marc:subfield[@code=\"a\"]',\n",
+    "            'Mitwirkender': './/marc:datafield[@tag=\"700\"]/marc:subfield[@code=\"a\"]',\n",
+    "            'Signatur': './/marc:datafield[@tag=\"AVA\"]/marc:subfield[@code=\"d\"]',\n",
+    "            'Veröffentlichungsdatum': './/marc:datafield[@tag=\"264\"]/marc:subfield[@code=\"c\"]',\n",
+    "            'Veröffentlichungsort': './/marc:datafield[@tag=\"264\"]/marc:subfield[@code=\"a\"]',\n",
+    "            'Sprache': './/marc:datafield[@tag=\"041\"]/marc:subfield[@code=\"a\"]',\n",
+    "            'AC Nummer': './/marc:controlfield[@tag=\"009\"]'\n",
+    "        }\n",
+    "\n",
+    "        for key, path in marc_paths.items():\n",
+    "            values = [elm.text for elm in rec.xpath(path, namespaces=ns)]\n",
+    "            if key == 'Sprache':\n",
+    "                values = [english_language_from_code(val) for val in values]\n",
+    "\n",
+    "            metadata[key] = '; '.join(values)\n",
+    "            # metadata['Signatur'] = sig\n",
+    "        metadata_lis.append(metadata)\n",
+    "    return metadata_lis\n",
+    "\n",
+    "for i, BE_row in tqdm(BE_no_AC.iterrows(), total=len(BE_no_AC)):\n",
+    "    if BE_row['AC Nummer'] != '':\n",
+    "        continue\n",
+    "    sig = BE_row['Signatur']\n",
+    "    sig_esc = sig.replace('.', '\\.').replace('(', '\\(').replace(')', '\\)').replace('-', '\\-').replace(',', '\\,')\n",
+    "    metadata_lis = extract_catalog_data_from_signature(sig_esc)\n",
+    "    for dic in metadata_lis:\n",
+    "        if '; ' in sig:\n",
+    "            siglis = dic['Signatur'].split('; ')\n",
+    "        else:\n",
+    "            siglis = [sig]\n",
+    "        for s in siglis:\n",
+    "            if sig == s:\n",
+    "                BE_df.at[i, 'AC Nummer'] = dic['AC Nummer']\n",
+    "                break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "be259674-5257-43e4-ba19-cbd12b3bfc29",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "useless_entries = [2074, 4200, 5976]\n",
+    "\n",
+    "BE_df.drop(useless_entries, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 228,
+   "id": "805140d6-8b72-40ac-b297-e0ff24bc74a5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "BE_df.at[11481, 'AC Nummer'] = 'AC10374727'\n",
+    "BE_df.at[12810, 'AC Nummer'] = 'AC11979464'\n",
+    "BE_df.at[19399, 'AC Nummer'] = 'AC10075950'\n",
+    "BE_df.at[19785, 'AC Nummer'] = 'AC10103649'\n",
+    "BE_df.at[19958, 'AC Nummer'] = 'AC10058904'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 229,
+   "id": "4a8d583c-f5a2-4626-a133-c48f80f0a2fc",
    "metadata": {
     "tags": []
    },
@@ -1626,211 +1955,1056 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>volume</th>\n",
-       "      <th>page number</th>\n",
-       "      <th>category</th>\n",
-       "      <th>subcategory</th>\n",
-       "      <th>format</th>\n",
-       "      <th>handwritten page number</th>\n",
-       "      <th>entry_ID</th>\n",
-       "      <th>entry</th>\n",
+       "      <th>Signatur</th>\n",
+       "      <th>Barcode</th>\n",
+       "      <th>Titel</th>\n",
+       "      <th>Autor</th>\n",
+       "      <th>Mitwirkender</th>\n",
+       "      <th>Anfang Veröffentlichungsdatum</th>\n",
+       "      <th>Ende Veröffentlichungsdatum</th>\n",
+       "      <th>Veröffentlichungsdatum</th>\n",
+       "      <th>Veröffentlichungsort</th>\n",
+       "      <th>Veröffentlichungsort (normiert)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Formatangabe</th>\n",
+       "      <th>hs. Katalogseite Handschrift</th>\n",
+       "      <th>hs. Katalogeintrag ID</th>\n",
+       "      <th>hs. Katalogeintrag</th>\n",
+       "      <th>hs. Katalog Image URL</th>\n",
+       "      <th>dup_title</th>\n",
+       "      <th>copy_from</th>\n",
+       "      <th>Einfache Klassifizierung</th>\n",
+       "      <th>Komplexe Klassifizierung</th>\n",
+       "      <th>AC Nummer</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>14.376</td>\n",
-       "      <td>25</td>\n",
-       "      <td>Theologia</td>\n",
-       "      <td>Textus &amp; Versiones Sacræ Scripturæ</td>\n",
-       "      <td>Folio</td>\n",
-       "      <td>1</td>\n",
-       "      <td>14.376_025_00</td>\n",
-       "      <td>1 Biblia Sacra Polyglotta curis Cardinalis Xim...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>14.376</td>\n",
-       "      <td>25</td>\n",
-       "      <td>Theologia</td>\n",
-       "      <td>Textus &amp; Versiones Sacræ Scripturæ</td>\n",
-       "      <td>Folio</td>\n",
-       "      <td>1</td>\n",
-       "      <td>14.376_025_01</td>\n",
-       "      <td>2 Biblia Sacra Polyglotta Philippi II. Regis C...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>14.376</td>\n",
-       "      <td>25</td>\n",
-       "      <td>Theologia</td>\n",
-       "      <td>Textus &amp; Versiones Sacræ Scripturæ</td>\n",
-       "      <td>Folio</td>\n",
-       "      <td>1</td>\n",
-       "      <td>14.376_025_02</td>\n",
-       "      <td>3 Biblia Sacra Polyglotta Studio &amp; curâ Briani...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>14.376</td>\n",
-       "      <td>25</td>\n",
-       "      <td>Theologia</td>\n",
-       "      <td>Textus &amp; Versiones Sacræ Scripturæ</td>\n",
-       "      <td>Folio</td>\n",
+       "      <th>657</th>\n",
+       "      <td>BE.1.N.75.(Teil.1)</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Prvi [Drugi] del Novoga Teslamenta [!], vatom ...</td>\n",
+       "      <td>Trubar, Primož 1508-1586 [Bearb.]</td>\n",
+       "      <td>Ungnad, Hans von; Maximilian II. Heiliges Römi...</td>\n",
+       "      <td>1563.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1562-1563</td>\n",
+       "      <td>V Tubingi [Urach]</td>\n",
+       "      <td>Tübingen</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>False</td>\n",
        "      <td>1</td>\n",
-       "      <td>14.376_025_03</td>\n",
-       "      <td>4 Biblia Sacra Latina Moguntina dicta, prima o...</td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>14.376</td>\n",
-       "      <td>25</td>\n",
-       "      <td>Theologia</td>\n",
-       "      <td>Textus &amp; Versiones Sacræ Scripturæ</td>\n",
-       "      <td>Folio</td>\n",
-       "      <td>1</td>\n",
-       "      <td>14.376_025_04</td>\n",
-       "      <td>5 Biblia Sacra Latina Moguntina, editio altera...</td>\n",
+       "      <th>12124</th>\n",
+       "      <td>BE.4.S.81</td>\n",
+       "      <td>B1572545</td>\n",
+       "      <td>Description des Monumens Musulmans du Cabinet ...</td>\n",
+       "      <td>Reinaud, Joseph Toussaint 1795-1867</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1828.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1828</td>\n",
+       "      <td>Paris</td>\n",
+       "      <td>Paris</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
+       "      <th>16795</th>\n",
+       "      <td>BE.7.E.7.(2)</td>\n",
+       "      <td>B1782971</td>\n",
+       "      <td>Appendix ad historiam literariam Gulielmi cave...</td>\n",
+       "      <td>Wharton, Henricus</td>\n",
+       "      <td>Gere, Robertus</td>\n",
+       "      <td>1743.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1743</td>\n",
+       "      <td>Oxonii</td>\n",
+       "      <td>Oxford</td>\n",
        "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9398</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>582</td>\n",
-       "      <td>Imaginum Delineatarum Collectio</td>\n",
-       "      <td></td>\n",
+       "      <th>17957</th>\n",
+       "      <td>BE.7.T.64</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Journal Universel, Ou Mémoires Pour servir à l...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1743.0</td>\n",
+       "      <td>1748.0</td>\n",
+       "      <td>1743-1748</td>\n",
+       "      <td>La Haye</td>\n",
+       "      <td>Den Haag</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
        "      <td></td>\n",
-       "      <td>1402</td>\n",
-       "      <td>14.378_582_00</td>\n",
-       "      <td>CCCXXXI Vn Portefeüilles contenant des Dessein...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9399</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>582</td>\n",
-       "      <td>Imaginum Delineatarum Collectio</td>\n",
-       "      <td></td>\n",
+       "      <th>18105</th>\n",
+       "      <td>BE.7.V.58.(Adl)</td>\n",
+       "      <td>B1380793</td>\n",
+       "      <td>Leben und letzte Stunden Christinä von Munk we...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1757.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1757</td>\n",
+       "      <td>Kopenhagen usw.</td>\n",
+       "      <td>Kopenhagen</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
        "      <td></td>\n",
-       "      <td>1402</td>\n",
-       "      <td>14.378_582_01</td>\n",
-       "      <td>CCCXXXII  Vn Recueil des Portraits peints en m...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9400</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>582</td>\n",
-       "      <td>Imaginum Delineatarum Collectio</td>\n",
-       "      <td></td>\n",
+       "      <th>19065</th>\n",
+       "      <td>BE.8.K.58.(Vol.Tab.,1)</td>\n",
+       "      <td>B1633235</td>\n",
+       "      <td>Geschichte der Griechischen Litteratur, von de...</td>\n",
+       "      <td>Schoell, Friedrich 1766-1833</td>\n",
+       "      <td>Schoell, Maximilien Samson Frederic; Pinder, M...</td>\n",
+       "      <td>1828.0</td>\n",
+       "      <td>1830.0</td>\n",
+       "      <td>1828-1830</td>\n",
+       "      <td>Berlin</td>\n",
+       "      <td>Berlin</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
        "      <td></td>\n",
-       "      <td>1402</td>\n",
-       "      <td>14.378_582_02</td>\n",
-       "      <td>CCCXXXIII  Dix Vol. de Plantes peintes en mini...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9401</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>582</td>\n",
-       "      <td>Imaginum Delineatarum Collectio</td>\n",
-       "      <td></td>\n",
+       "      <th>19066</th>\n",
+       "      <td>BE.8.K.58.(Vol.Tab.,2)</td>\n",
+       "      <td>3461960-50</td>\n",
+       "      <td>Geschichte der Griechischen Litteratur, von de...</td>\n",
+       "      <td>Schoell, Friedrich 1766-1833</td>\n",
+       "      <td>Schoell, Maximilien Samson Frederic; Pinder, M...</td>\n",
+       "      <td>1828.0</td>\n",
+       "      <td>1830.0</td>\n",
+       "      <td>1828-1830</td>\n",
+       "      <td>Berlin</td>\n",
+       "      <td>Berlin</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
        "      <td></td>\n",
-       "      <td>1402</td>\n",
-       "      <td>14.378_582_03</td>\n",
-       "      <td>CCCXXXIV  Cinq Vol. d'Oiseaux peints en Miniat...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9402</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>582</td>\n",
-       "      <td>Imaginum Delineatarum Collectio</td>\n",
-       "      <td></td>\n",
+       "      <th>20641</th>\n",
+       "      <td>BE.9.C.4.(Vol.2-5)</td>\n",
+       "      <td>B1657002</td>\n",
+       "      <td>Fauna Japonica Sive Descriptio animalium, quae...</td>\n",
+       "      <td>Siebold, Philipp Franz &lt;&lt;von&gt;&gt; 1796-1866</td>\n",
+       "      <td>Temminck, Coenraad Jacob; Schlegel, Hermann; H...</td>\n",
+       "      <td>1833.0</td>\n",
+       "      <td>1850.0</td>\n",
+       "      <td>1833-1850</td>\n",
+       "      <td>Lugduni Batavorum</td>\n",
+       "      <td>Leiden</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>False</td>\n",
+       "      <td>0</td>\n",
        "      <td></td>\n",
-       "      <td>1402</td>\n",
-       "      <td>14.378_582_04</td>\n",
-       "      <td>CCCXXXV Divers Portraits, Ceremonies, Marches ...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>9403 rows × 8 columns</p>\n",
+       "<p>8 rows × 38 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "      volume  page number                         category   \n",
-       "0     14.376           25                        Theologia  \\\n",
-       "1     14.376           25                        Theologia   \n",
-       "2     14.376           25                        Theologia   \n",
-       "3     14.376           25                        Theologia   \n",
-       "4     14.376           25                        Theologia   \n",
-       "...      ...          ...                              ...   \n",
-       "9398  14.378          582  Imaginum Delineatarum Collectio   \n",
-       "9399  14.378          582  Imaginum Delineatarum Collectio   \n",
-       "9400  14.378          582  Imaginum Delineatarum Collectio   \n",
-       "9401  14.378          582  Imaginum Delineatarum Collectio   \n",
-       "9402  14.378          582  Imaginum Delineatarum Collectio   \n",
+       "                     Signatur     Barcode   \n",
+       "657        BE.1.N.75.(Teil.1)         NaN  \\\n",
+       "12124               BE.4.S.81    B1572545   \n",
+       "16795            BE.7.E.7.(2)    B1782971   \n",
+       "17957               BE.7.T.64         NaN   \n",
+       "18105         BE.7.V.58.(Adl)    B1380793   \n",
+       "19065  BE.8.K.58.(Vol.Tab.,1)    B1633235   \n",
+       "19066  BE.8.K.58.(Vol.Tab.,2)  3461960-50   \n",
+       "20641      BE.9.C.4.(Vol.2-5)    B1657002   \n",
+       "\n",
+       "                                                   Titel   \n",
+       "657    Prvi [Drugi] del Novoga Teslamenta [!], vatom ...  \\\n",
+       "12124  Description des Monumens Musulmans du Cabinet ...   \n",
+       "16795  Appendix ad historiam literariam Gulielmi cave...   \n",
+       "17957  Journal Universel, Ou Mémoires Pour servir à l...   \n",
+       "18105  Leben und letzte Stunden Christinä von Munk we...   \n",
+       "19065  Geschichte der Griechischen Litteratur, von de...   \n",
+       "19066  Geschichte der Griechischen Litteratur, von de...   \n",
+       "20641  Fauna Japonica Sive Descriptio animalium, quae...   \n",
+       "\n",
+       "                                          Autor   \n",
+       "657           Trubar, Primož 1508-1586 [Bearb.]  \\\n",
+       "12124       Reinaud, Joseph Toussaint 1795-1867   \n",
+       "16795                         Wharton, Henricus   \n",
+       "17957                                       NaN   \n",
+       "18105                                       NaN   \n",
+       "19065              Schoell, Friedrich 1766-1833   \n",
+       "19066              Schoell, Friedrich 1766-1833   \n",
+       "20641  Siebold, Philipp Franz <<von>> 1796-1866   \n",
+       "\n",
+       "                                            Mitwirkender   \n",
+       "657    Ungnad, Hans von; Maximilian II. Heiliges Römi...  \\\n",
+       "12124                                                NaN   \n",
+       "16795                                     Gere, Robertus   \n",
+       "17957                                                NaN   \n",
+       "18105                                                NaN   \n",
+       "19065  Schoell, Maximilien Samson Frederic; Pinder, M...   \n",
+       "19066  Schoell, Maximilien Samson Frederic; Pinder, M...   \n",
+       "20641  Temminck, Coenraad Jacob; Schlegel, Hermann; H...   \n",
+       "\n",
+       "       Anfang Veröffentlichungsdatum  Ende Veröffentlichungsdatum   \n",
+       "657                           1563.0                          NaN  \\\n",
+       "12124                         1828.0                          NaN   \n",
+       "16795                         1743.0                          NaN   \n",
+       "17957                         1743.0                       1748.0   \n",
+       "18105                         1757.0                          NaN   \n",
+       "19065                         1828.0                       1830.0   \n",
+       "19066                         1828.0                       1830.0   \n",
+       "20641                         1833.0                       1850.0   \n",
+       "\n",
+       "      Veröffentlichungsdatum Veröffentlichungsort   \n",
+       "657                1562-1563    V Tubingi [Urach]  \\\n",
+       "12124                   1828                Paris   \n",
+       "16795                   1743               Oxonii   \n",
+       "17957              1743-1748              La Haye   \n",
+       "18105                   1757      Kopenhagen usw.   \n",
+       "19065              1828-1830               Berlin   \n",
+       "19066              1828-1830               Berlin   \n",
+       "20641              1833-1850    Lugduni Batavorum   \n",
        "\n",
-       "                             subcategory format handwritten page number   \n",
-       "0     Textus & Versiones Sacræ Scripturæ  Folio                       1  \\\n",
-       "1     Textus & Versiones Sacræ Scripturæ  Folio                       1   \n",
-       "2     Textus & Versiones Sacræ Scripturæ  Folio                       1   \n",
-       "3     Textus & Versiones Sacræ Scripturæ  Folio                       1   \n",
-       "4     Textus & Versiones Sacræ Scripturæ  Folio                       1   \n",
-       "...                                  ...    ...                     ...   \n",
-       "9398                                                               1402   \n",
-       "9399                                                               1402   \n",
-       "9400                                                               1402   \n",
-       "9401                                                               1402   \n",
-       "9402                                                               1402   \n",
+       "      Veröffentlichungsort (normiert)  ... Formatangabe   \n",
+       "657                          Tübingen  ...          NaN  \\\n",
+       "12124                           Paris  ...          NaN   \n",
+       "16795                          Oxford  ...          NaN   \n",
+       "17957                        Den Haag  ...          NaN   \n",
+       "18105                      Kopenhagen  ...          NaN   \n",
+       "19065                          Berlin  ...          NaN   \n",
+       "19066                          Berlin  ...          NaN   \n",
+       "20641                          Leiden  ...          NaN   \n",
        "\n",
-       "           entry_ID                                              entry  \n",
-       "0     14.376_025_00  1 Biblia Sacra Polyglotta curis Cardinalis Xim...  \n",
-       "1     14.376_025_01  2 Biblia Sacra Polyglotta Philippi II. Regis C...  \n",
-       "2     14.376_025_02  3 Biblia Sacra Polyglotta Studio & curâ Briani...  \n",
-       "3     14.376_025_03  4 Biblia Sacra Latina Moguntina dicta, prima o...  \n",
-       "4     14.376_025_04  5 Biblia Sacra Latina Moguntina, editio altera...  \n",
-       "...             ...                                                ...  \n",
-       "9398  14.378_582_00  CCCXXXI Vn Portefeüilles contenant des Dessein...  \n",
-       "9399  14.378_582_01  CCCXXXII  Vn Recueil des Portraits peints en m...  \n",
-       "9400  14.378_582_02  CCCXXXIII  Dix Vol. de Plantes peintes en mini...  \n",
-       "9401  14.378_582_03  CCCXXXIV  Cinq Vol. d'Oiseaux peints en Miniat...  \n",
-       "9402  14.378_582_04  CCCXXXV Divers Portraits, Ceremonies, Marches ...  \n",
+       "      hs. Katalogseite Handschrift hs. Katalogeintrag ID  hs. Katalogeintrag   \n",
+       "657                            NaN                   NaN                 NaN  \\\n",
+       "12124                          NaN                   NaN                 NaN   \n",
+       "16795                          NaN                   NaN                 NaN   \n",
+       "17957                          NaN                   NaN                 NaN   \n",
+       "18105                          NaN                   NaN                 NaN   \n",
+       "19065                          NaN                   NaN                 NaN   \n",
+       "19066                          NaN                   NaN                 NaN   \n",
+       "20641                          NaN                   NaN                 NaN   \n",
        "\n",
-       "[9403 rows x 8 columns]"
+       "       hs. Katalog Image URL  dup_title  copy_from Einfache Klassifizierung   \n",
+       "657                      NaN        1.0       -1.0                    False  \\\n",
+       "12124                    NaN        0.0       -1.0                    False   \n",
+       "16795                    NaN        0.0       -1.0                    False   \n",
+       "17957                    NaN        1.0       -1.0                    False   \n",
+       "18105                    NaN        0.0       -1.0                    False   \n",
+       "19065                    NaN        1.0       -1.0                    False   \n",
+       "19066                    NaN        1.0       -1.0                    False   \n",
+       "20641                    NaN        1.0       -1.0                    False   \n",
+       "\n",
+       "       Komplexe Klassifizierung  AC Nummer  \n",
+       "657                           1             \n",
+       "12124                         0             \n",
+       "16795                         0             \n",
+       "17957                         0             \n",
+       "18105                         0             \n",
+       "19065                         0             \n",
+       "19066                         0             \n",
+       "20641                         0             \n",
+       "\n",
+       "[8 rows x 38 columns]"
       ]
      },
-     "execution_count": 403,
+     "execution_count": 229,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "entry_df"
+    "BE_df[BE_df['AC Nummer'] == '']"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 404,
-   "id": "3a779456-4810-4428-85d7-b0c5277717d8",
-   "metadata": {
-    "tags": []
-   },
+   "execution_count": 230,
+   "id": "3f61076e-8384-4080-9003-06b9ea774fe6",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "entry_df.to_excel('../Daten/Katalogabgleich/Einträge.xlsx')"
+    "matches_sig = BE_df[['AC Nummer', 'hs. Katalogeintrag ID', 'hs. Katalog Konfidenz']].dropna(subset=['hs. Katalogeintrag ID'])\n",
+    "entry_df['AC number'] = -1\n",
+    "entry_df['AC cert'] = -1\n",
+    "\n",
+    "for i, m in matches_sig.iterrows():\n",
+    "    ac_num = m['AC Nummer']\n",
+    "    if ac_num == '':\n",
+    "        continue\n",
+    "    hs_id = m['hs. Katalogeintrag ID']\n",
+    "    hs_cert = m['hs. Katalog Konfidenz']\n",
+    "    \n",
+    "    if ' | ' in hs_id:\n",
+    "        hs_id = hs_id.split(' | ')\n",
+    "        hs_cert = hs_cert.split(' | ')\n",
+    "    else:\n",
+    "        hs_id = [hs_id]\n",
+    "        hs_cert = [hs_cert]\n",
+    "    \n",
+    "    for hs_i, hs_c in zip(hs_id, hs_cert):\n",
+    "        ind = entry_df[entry_df['entry_ID'] == hs_i].index.values[0]\n",
+    "        \n",
+    "        if entry_df.at[ind, 'AC number'] == -1:\n",
+    "            entry_df.at[ind, 'AC number'] = [ac_num]\n",
+    "            entry_df.at[ind, 'AC cert'] = [hs_c]\n",
+    "        else:\n",
+    "            entry_df.at[ind, 'AC number'] += [ac_num]\n",
+    "            entry_df.at[ind, 'AC cert'] += [hs_c]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "0a86cfbe-33b3-4a3a-b384-270b5228359a",
+   "execution_count": 248,
+   "id": "f83d1d13-5dee-4bf3-a06d-ed58b643adfa",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "BE_df.to_excel('data/wip_BE_data/BE_df_WIP.xlsx')\n",
+    "entry_df.to_excel('data/wip_BE_data/entry_df_WIP.xlsx')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "a21f6db7-19c0-4bf4-a185-d94c647750a2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "BE_df = pd.read_excel('data/wip_BE_data/BE_df_WIP.xlsx', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "3aa2e989-255e-42d9-aa9f-986772b478cc",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "entry_df = pd.read_excel('data/wip_BE_data/entry_df_WIP.xlsx', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "0746de2c-3343-48b1-b921-4215f594ab79",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "entry_df.at[8510, 'AC number'] = \"['AC07010383']\"\n",
+    "entry_df.at[8510, 'AC cert'] = \"['sicher']\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "7a448905-e284-44b9-bdb1-38710466b341",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-1\n",
+      "['AC09762517']\n"
+     ]
+    }
+   ],
+   "source": [
+    "def read_list_from_str(s):\n",
+    "    if s != -1:\n",
+    "        lis = eval(s)\n",
+    "        return lis\n",
+    "    else:\n",
+    "        return s\n",
+    "\n",
+    "print(read_list_from_str(\"-1\"))\n",
+    "print(read_list_from_str(\"['AC09762517']\"))\n",
+    "\n",
+    "entry_df['AC number'] = entry_df['AC number'].apply(lambda x: read_list_from_str(x))\n",
+    "entry_df['AC cert'] = entry_df['AC cert'].apply(lambda x: read_list_from_str(x))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "27eda982-6931-4c31-addb-912bbe743cdf",
+   "metadata": {},
+   "source": [
+    "## Add `<idno>` tags for n signature using simple string replacement (ignoring nested occurrences)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 185,
+   "id": "02ccd717-a6e4-4754-b617-485b16b73ca7",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "with open('../../digital-edition/Step1_Aufbereitung/Cod. 14.376_tei.xml', 'r') as tei_xml_input:\n",
+    "    f1 = tei_xml_input.readlines()\n",
+    "\n",
+    "with open('../../digital-edition/Step1_Aufbereitung/Cod. 14.377_tei.xml', 'r') as tei_xml_input:\n",
+    "    f2 = tei_xml_input.readlines()\n",
+    "\n",
+    "with open('../../digital-edition/Step1_Aufbereitung/Cod. 14.378_tei.xml', 'r') as tei_xml_input:\n",
+    "    f3 = tei_xml_input.readlines()\n",
+    "\n",
+    "raw_files = [f1, f2, f3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 186,
+   "id": "2792c546-aafc-4224-97a3-4f8db5fc585c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for file in raw_files:\n",
+    "    for i, line in enumerate(file):\n",
+    "        n_re = re.compile(' ([nN]\\.? ?\\d{1,4})')\n",
+    "        match = re.search(n_re, line)\n",
+    "        if match:\n",
+    "            n_tag = f'<idno type=\"n_signature\">{match[1]}</idno>'\n",
+    "            file[i] = line.replace(match[1], n_tag)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 188,
+   "id": "c4baa4ed-65de-4c20-99a3-275fb49ed6e9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "with open('../../digital-edition/Step1_Aufbereitung/Cod. 14.376_tei.xml', 'w') as out:\n",
+    "    out.writelines(f1)\n",
+    "\n",
+    "with open('../../digital-edition/Step1_Aufbereitung/Cod. 14.377_tei.xml', 'w') as out:\n",
+    "    out.writelines(f2)\n",
+    "\n",
+    "with open('../../digital-edition/Step1_Aufbereitung/Cod. 14.378_tei.xml', 'w') as out:\n",
+    "    out.writelines(f3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ac728dd-4fe8-4d8d-9049-5891b556a19e",
+   "metadata": {},
+   "source": [
+    "## Parse files as XML"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "ae4d5368-bda2-4cdf-a170-f7e0eda103c3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "with open('../../digital-edition/Step1_Aufbereitung/Cod. 14.376_tei.xml', 'r') as tei_xml_input:\n",
+    "    content = tei_xml_input.read()\n",
+    "    tei_1 = bs(content, \"lxml-xml\")\n",
+    "\n",
+    "with open('../../digital-edition/Step1_Aufbereitung/Cod. 14.377_tei.xml', 'r') as tei_xml_input:\n",
+    "    content = tei_xml_input.read()\n",
+    "    tei_2 = bs(content, \"lxml-xml\")\n",
+    "\n",
+    "with open('../../digital-edition/Step1_Aufbereitung/Cod. 14.378_tei.xml', 'r') as tei_xml_input:\n",
+    "    content = tei_xml_input.read()\n",
+    "    tei_3 = bs(content, \"lxml-xml\")\n",
+    "\n",
+    "dod_ids = [51202, 51184, 51219]\n",
+    "cod_prefixes = {51202: '14.376', 51184: '14.377', 51219: '14.378'}\n",
+    "tei = {51202: tei_1, 51184: tei_2, 51219: tei_3}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d59c6a3a-e06b-4beb-a273-9594f8473cbf",
+   "metadata": {},
+   "source": [
+    "## Add `<div type=\"knowledge_class\"></div>` tags and modify pagenumber tag"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "b73227d4-a0f1-4113-ad2c-093507aaf3a4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for key in tei:\n",
+    "    headers = tei[key].find_all(\"div\", type=\"head\")\n",
+    "    for head in headers:\n",
+    "        cat = head.find(type=\"category\")\n",
+    "        subcat = head.find(type=\"subcategory\")\n",
+    "        if cat is not None:\n",
+    "            cat['subtype'] = cat['value']\n",
+    "            del cat['value']\n",
+    "            cat = cat.extract()\n",
+    "            cat_lis = [cat]\n",
+    "            if subcat is not None:\n",
+    "                subcat['subtype'] = subcat['value']\n",
+    "                del subcat['value']\n",
+    "                subcat = subcat.extract()\n",
+    "                cat_lis.append(subcat)\n",
+    "            new_div = bs('''<div type=\"knowledge_class\"></div>''', \"lxml-xml\")\n",
+    "            new_div.div.extend(cat_lis)\n",
+    "            head.insert(1, new_div)\n",
+    "\n",
+    "        form = head.find(type=\"format\")\n",
+    "        if form is not None:\n",
+    "            form['subtype'] = form['value']\n",
+    "            del form['value']\n",
+    "        pnum = head.find(type=\"pagenumber\")\n",
+    "        if pnum is not None:\n",
+    "            pnum['type'] = 'pageNum'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "46340680-bc79-4bcb-a4ab-d17c9883ba86",
+   "metadata": {},
+   "source": [
+    "## Add links to modern catalog"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "ebb4894b-76db-4146-b62a-86c30f2f8609",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def zip_without_m1(row):\n",
+    "    if row[0] == -1:\n",
+    "        return -1\n",
+    "    else:\n",
+    "        return list(zip(row[0], row[1]))\n",
+    "\n",
+    "entry_df['AC comb'] = entry_df[['AC number', 'AC cert']].apply(lambda x: zip_without_m1(x), axis=1)\n",
+    "entry_df['AC comb set'] = entry_df['AC comb'].apply(lambda x: list(set(x)) if x != -1 else -1)\n",
+    "entry_df['AC comb set len'] = entry_df['AC comb set'].apply(lambda x: len(x) if x != -1 else -1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "ba37d08d-f064-4ba0-aaf5-d33c9893bb1c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "links = entry_df[entry_df['AC number'] != -1]\n",
+    "p1 = tei_1.find_all('pb')\n",
+    "p2 = tei_2.find_all('pb')\n",
+    "p3 = tei_3.find_all('pb')\n",
+    "\n",
+    "ps = {\n",
+    "    \"14.376\": p1,\n",
+    "    \"14.377\": p2,\n",
+    "    \"14.378\": p3\n",
+    "}\n",
+    "\n",
+    "for i, row in links.iterrows():\n",
+    "    entry = ps[str(row['volume'])][row['page number'] - 1].find_next('p').find_all(type='entry')[int(row['entry_ID'][-2:])]\n",
+    "    ref_lis = []\n",
+    "    for ref in row['AC comb set']:\n",
+    "        ref_tag = bs(f'''<ref cert=\"{'high' if ref[1] == 'sicher' else 'low'}\" target=\"https://data.onb.ac.at/rec/{ref[0]}\" type=\"catalog\"/>''', \"lxml-xml\")\n",
+    "        ref_lis.append(ref_tag)\n",
+    "    entry.extend(ref_lis)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "deadc616-1dc6-4d7e-9392-5a92923026fc",
+   "metadata": {},
+   "source": [
+    "## Remove `choice`, `expan` and `abbr` tags"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "629a49b3-75c7-4e3d-b3aa-5c6165c32e06",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for key in tei:\n",
+    "    tags = tei[key].find_all('choice') + tei[key].find_all('expan') + tei[key].find_all('abbr')\n",
+    "    for tag in tags:\n",
+    "        tag.unwrap()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b5c447ce-40c1-4f5b-b8f5-41d5d4bb5558",
+   "metadata": {},
+   "source": [
+    "## Move `note` tags to `ref` tags if there is no corresponding `ref` already"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "ec90629a-8afb-4a65-b6bb-782830fd8bcf",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "                                    Falsche Jahresangabe\n",
+      "                                \n",
+      "\n",
+      "                                        Guinee, altes Synonym für Pfund?\n",
+      "                                    \n",
+      "\n",
+      "                                Falsche Nummerierung (139 wäre richtig)\n",
+      "                            \n"
+     ]
+    }
+   ],
+   "source": [
+    "for key in tei:\n",
+    "    notes = tei[key].find_all('note')\n",
+    "    for note in notes:\n",
+    "        re_ac = re.compile('AC\\d{8}')\n",
+    "        match_ac = re.search(re_ac, str(note.string))\n",
+    "        if not match_ac:\n",
+    "            print(note.string)\n",
+    "        else:\n",
+    "            note_ac = match_ac[0]\n",
+    "            entry_div = note.find_previous('div', type='entry')\n",
+    "            refs = entry_div.find_all('ref')\n",
+    "            if refs is not None:\n",
+    "                is_new_ac = True\n",
+    "                for ref in refs:\n",
+    "                    ref_ac = ref['target'].split('/')[-1]\n",
+    "                    if note_ac == ref_ac:\n",
+    "                        is_new_ac = False\n",
+    "                        break\n",
+    "                if is_new_ac:\n",
+    "                    new_ref_tag = bs(f'''<ref cert=\"high\" target=\"https://data.onb.ac.at/rec/{note_ac}\" type=\"catalog\"/>''', \"lxml-xml\")\n",
+    "                    entry_div.append(new_ref_tag)\n",
+    "            note.decompose()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aa8a809c-adb7-44c0-9c8b-4a762bbeace3",
+   "metadata": {},
+   "source": [
+    "## Add information to `<idno>` tags"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "bd52a621-e46c-49d8-a5c5-a243ee85fba4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for key in tei:\n",
+    "    idnos = tei[key].find_all('idno')\n",
+    "    for id_tag in idnos:\n",
+    "        num = re.search('\\d{1,4}', id_tag.string)[0]\n",
+    "        form = id_tag.find_previous(type='format')['subtype']\n",
+    "        id_tag['subtype'] = f'{form}, {num}'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d15e8da2-da31-4068-8aae-320f5534c523",
+   "metadata": {},
+   "source": [
+    "## Change `roetel`, `bleistift` and `add` tags"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "a4a7bc48-ec82-4b67-b0dd-055d27dd008e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for key in tei:\n",
+    "    adds = tei[key].find_all('add')\n",
+    "    for tag in adds:\n",
+    "        tag['hand'] = 'other'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "710a147b-ea34-47ca-a381-5b477d8f12ab",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for key in tei:\n",
+    "    bls = tei[key].find_all('bleistift')\n",
+    "    for tag in bls:\n",
+    "        tag.name = 'add'\n",
+    "        tag['hand'] = 'other'\n",
+    "        tag['rend'] = 'bleistift'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "e7e81cbf-e473-4ae4-b846-2aea6b2e5c81",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for key in tei:\n",
+    "    roet = tei[key].find_all('roetel')\n",
+    "    for tag in roet:\n",
+    "        tag.name = 'add'\n",
+    "        tag['hand'] = 'other'\n",
+    "        tag['rend'] = 'roetel'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9ef5fcf1-74fe-4c55-95a8-a2a8ffe4e265",
+   "metadata": {},
+   "source": [
+    "## Remove `<continued/>` tags"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "de097669-9180-4510-a9ac-d215433a7fca",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for key in tei:\n",
+    "    contd = tei[key].find_all('continued')\n",
+    "    for tag in contd:\n",
+    "        tag.decompose()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8b699159-7ac2-4ced-9cb0-4a5e7e10df34",
+   "metadata": {},
+   "source": [
+    "## Combine nested `<add>` tags"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "4687407c-d205-4aab-8c86-fafa73b997b9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for key in tei:\n",
+    "    adds = tei[key].find_all('add')\n",
+    "    for tag in adds:\n",
+    "        for child in tag.children:\n",
+    "            if child.name == 'add':\n",
+    "                if (('rend' not in tag.attrs) and ('rend' in child.attrs)):\n",
+    "                    tag.unwrap()\n",
+    "                if (('rend' in tag.attrs) and ('rend' not in child.attrs)):\n",
+    "                    child.unwrap()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f4a47880-5ecf-41e1-b2ea-624bfb301c99",
+   "metadata": {},
+   "source": [
+    "## Change `<num>` tag"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "32240f6b-1ecc-47d1-8f2d-0cd14f899b3d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for key in tei:\n",
+    "    num_lis = tei[key].find_all('num')\n",
+    "    for tag in num_lis:\n",
+    "        num = re.search('\\d{1,4}', tag.string)[0]\n",
+    "        form = tag.find_previous(type='format')['subtype']\n",
+    "        tag.name = 'idno'\n",
+    "        tag['type'] = 'n_signature'\n",
+    "        tag['subtype'] = f'{form}, {num}'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4a34042b-f93a-4072-884f-66fef48d6a30",
+   "metadata": {},
+   "source": [
+    "## List all tags in the documents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "55b131a0-58aa-4eb1-ba47-92b3bb46ffaa",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "34\n",
+      "['TEI', 'ab', 'add', 'bibl', 'body', 'change', 'corr', 'div', 'encodingDesc', 'facsimile', 'fileDesc', 'graphic', 'hi', 'idno', 'l', 'listChange', 'note', 'p', 'pb', 'profileDesc', 'publicationStmt', 'publisher', 'ref', 'revisionDesc', 'sic', 'sourceDesc', 'surface', 'teiHeader', 'text', 'title', 'titleStmt', 'unclear', 'xenoData', 'zone']\n"
+     ]
+    }
+   ],
+   "source": [
+    "all_tags = set()\n",
+    "for key in tei:\n",
+    "    tag_lis = tei[key].find_all()\n",
+    "    for tag in tag_lis:\n",
+    "        all_tags.add(tag.name)\n",
+    "\n",
+    "print(len(all_tags))\n",
+    "print(sorted(list(all_tags)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "85b00fab-7faa-43c4-a294-b0e5febc8024",
+   "metadata": {},
+   "source": [
+    "## Add description of changes for second step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "a50d2147-4ee5-4d6b-a950-b4aca1f5e5ca",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<listChange>\n",
+      "<change when=\"2022-12-12\" who=\"#AT\">\n",
+      "                    Import of facsimiles to Transkribus\n",
+      "                </change>\n",
+      "<change from=\"2022-12-13\" to=\"2023-01-26\" who=\"#AT #SM #MK\">\n",
+      "                    Manual transcription of pages to be used as Ground truth for training process in Transkribus\n",
+      "                </change>\n",
+      "<change from=\"2023-02-20\" to=\"2023-02-28\" who=\"#SM\">\n",
+      "                    Automated transcription of the first three volumes Cod. 14.376-14.378\n",
+      "                </change>\n",
+      "<change from=\"2023-03-01\" to=\"2023-12-13\" who=\"#AT #SM #MK #GF #PE #TG #AR\">\n",
+      "                    Correction of automated transcripts in Transkribus, adding custom tags (add, corr, bleistift, roetel, continued, comment)\n",
+      "                </change>\n",
+      "<change when=\"2023-12-18\" who=\"#SM\">\n",
+      "                    Export from Transkribus to TEI-XML\n",
+      "                </change>\n",
+      "<change from=\"2024-01-18\" to=\"2024-10-10\" who=\"#SM\">\n",
+      "Layout analysis of pages to obtain entries. Create header for all pages with knowledge classes, page number, and format specification if applicable. Added matching data for entries connecting them to the modern catalog. Add tags for n signature.\n",
+      "</change></listChange>\n",
+      "<listChange>\n",
+      "<change when=\"2022-12-12\" who=\"#AT\">\n",
+      "                    Import of facsimiles to Transkribus\n",
+      "                </change>\n",
+      "<change from=\"2022-12-13\" to=\"2023-01-26\" who=\"#AT #SM #MK\">\n",
+      "                    Manual transcription of pages to be used as Ground truth for training process in Transkribus\n",
+      "                </change>\n",
+      "<change from=\"2023-02-20\" to=\"2023-02-28\" who=\"#SM\">\n",
+      "                    Automated transcription of the first three volumes Cod. 14.376-14.378\n",
+      "                </change>\n",
+      "<change from=\"2023-03-01\" to=\"2023-12-13\" who=\"#AT #SM #MK #GF #PE #TG #AR\">\n",
+      "                    Correction of automated transcripts in Transkribus, adding custom tags (add, corr, bleistift, roetel, continued, comment)\n",
+      "                </change>\n",
+      "<change when=\"2023-12-18\" who=\"#SM\">\n",
+      "                    Export from Transkribus to TEI-XML\n",
+      "                </change>\n",
+      "<change from=\"2024-01-18\" to=\"2024-10-10\" who=\"#SM\">\n",
+      "Layout analysis of pages to obtain entries. Create header for all pages with knowledge classes, page number, and format specification if applicable. Added matching data for entries connecting them to the modern catalog. Add tags for n signature.\n",
+      "</change></listChange>\n",
+      "<listChange>\n",
+      "<change when=\"2022-12-12\" who=\"#AT\">\n",
+      "                    Import of facsimiles to Transkribus\n",
+      "                </change>\n",
+      "<change from=\"2022-12-13\" to=\"2023-01-26\" who=\"#AT #SM #MK\">\n",
+      "                    Manual transcription of pages to be used as Ground truth for training process in Transkribus\n",
+      "                </change>\n",
+      "<change from=\"2023-02-20\" to=\"2023-02-28\" who=\"#SM\">\n",
+      "                    Automated transcription of the first three volumes Cod. 14.376-14.378\n",
+      "                </change>\n",
+      "<change from=\"2023-03-01\" to=\"2023-12-13\" who=\"#AT #SM #MK #GF #PE #TG #AR\">\n",
+      "                    Correction of automated transcripts in Transkribus, adding custom tags (add, corr, bleistift, roetel, continued, comment)\n",
+      "                </change>\n",
+      "<change when=\"2023-12-18\" who=\"#SM\">\n",
+      "                    Export from Transkribus to TEI-XML\n",
+      "                </change>\n",
+      "<change from=\"2024-01-18\" to=\"2024-10-10\" who=\"#SM\">\n",
+      "Layout analysis of pages to obtain entries. Create header for all pages with knowledge classes, page number, and format specification if applicable. Added matching data for entries connecting them to the modern catalog. Add tags for n signature.\n",
+      "</change></listChange>\n"
+     ]
+    }
+   ],
+   "source": [
+    "for key in tei:\n",
+    "    change = bs('''<change from=\"2024-01-18\" to=\"2024-10-10\" who=\"#SM\">\n",
+    "Layout analysis of pages to obtain entries. Create header for all pages with knowledge classes, page number, and format specification if applicable. Added matching data for entries connecting them to the modern catalog. Add tags for n signature.\n",
+    "</change>\n",
+    "''', \"lxml-xml\")\n",
+    "    listChange = tei[key].listChange\n",
+    "    listChange.append(change)\n",
+    "    print(listChange)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed89defa-7231-42e7-bab8-2b235eee5b24",
+   "metadata": {},
+   "source": [
+    "## Export TEI_XML files to Step 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "ee793bf1-5a43-4715-8b2b-b5780d6fd8b3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "with open('../../digital-edition/Step2_Matching/Cod. 14.376_tei.xml', 'w') as tei_xml_output:\n",
+    "    tei_xml_output.write(tei_1.prettify(formatter='minimal'))\n",
+    "with open('../../digital-edition/Step2_Matching/Cod. 14.377_tei.xml', 'w') as tei_xml_output:\n",
+    "    tei_xml_output.write(tei_2.prettify(formatter='minimal'))\n",
+    "with open('../../digital-edition/Step2_Matching/Cod. 14.378_tei.xml', 'w') as tei_xml_output:\n",
+    "    tei_xml_output.write(tei_3.prettify(formatter='minimal'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7d6908c-6008-49f7-b854-3b3f66c34986",
    "metadata": {},
    "outputs": [],
    "source": []