diff --git a/Daten/Vorhersagen/WIP_final_BE_3.xlsx b/Daten/Vorhersagen/WIP_final_BE_3.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..1052487927814fa10aae0202113c456fd09eed02
Binary files /dev/null and b/Daten/Vorhersagen/WIP_final_BE_3.xlsx differ
diff --git a/Notebooks/Albertina_Bestaende.ipynb b/Notebooks/Albertina_Bestaende.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8c2d83d16eb51be34fce0f0a1d492a20b187638e
--- /dev/null
+++ b/Notebooks/Albertina_Bestaende.ipynb
@@ -0,0 +1,530 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "4234a6f9-d208-4e5c-ba1d-114470811b67",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import re\n",
+    "import numpy as np\n",
+    "from tqdm.notebook import tqdm\n",
+    "import matplotlib.pyplot as plt\n",
+    "import requests\n",
+    "import json\n",
+    "from lxml import etree\n",
+    "from IPython.display import display\n",
+    "\n",
+    "pd.set_option('display.max_colwidth', None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "f1e3a8e5-dee4-4ed1-ac72-5bf1dc2d868b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "onb_sig = [13, 15, 16, 17, 29, 43, 51, 52, 65, 81, 83]\n",
+    "\n",
+    "alb_df = pd.read_excel('../Daten/Katalog/BibliothecaEugeniana_StandortAlbertina.xlsx')\n",
+    "alb_df['ÖNB Signatur'] = False\n",
+    "\n",
+    "for ind in onb_sig:\n",
+    "    alb_df.at[ind, 'ÖNB Signatur'] = True\n",
+    "    \n",
+    "onb_baende = alb_df[alb_df['ÖNB Signatur']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "9a59727f-3174-40b4-86fe-651509359c6e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "lang_data = pd.read_csv('data/iso-639-3.tab', sep='\\t')\n",
+    "\n",
+    "def english_language_from_code(lang_code):\n",
+    "    find_by_Id = lang_data[lang_data['Id'] == lang_code]\n",
+    "    find_by_Part2b = lang_data[lang_data['Part2b'] == lang_code]\n",
+    "    if len(find_by_Id):\n",
+    "        name = find_by_Id['Ref_Name'].values[0]\n",
+    "    elif len(find_by_Part2b):\n",
+    "        name = find_by_Part2b['Ref_Name'].values[0]\n",
+    "    else:\n",
+    "        name = ''\n",
+    "    return name\n",
+    "\n",
+    "ns = {\n",
+    "    'srw': 'http://www.loc.gov/zing/srw/',\n",
+    "    'marc': 'http://www.loc.gov/MARC21/slim'\n",
+    "}\n",
+    "\n",
+    "def extract_catalog_data_from_signature(sig):\n",
+    "    metadata_lis = []\n",
+    "    sru = f'https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB?version=1.2&query=alma.accessionNumber%3D%22{sig}%22&operation=searchRetrieve'\n",
+    "    sru_request = requests.get(sru)\n",
+    "    marcxml = sru_request.content\n",
+    "    tree = etree.fromstring(marcxml)\n",
+    "    records = tree.xpath('.//marc:record', namespaces=ns)\n",
+    "    for rec in records:\n",
+    "        metadata = {}\n",
+    "        marc_paths = {\n",
+    "            'Titel': './/marc:datafield[@tag=\"245\"]/marc:subfield[@code=\"a\"]',\n",
+    "            'Autor': './/marc:datafield[@tag=\"100\"]/marc:subfield[@code=\"a\"]',\n",
+    "            'Mitwirkender': './/marc:datafield[@tag=\"700\"]/marc:subfield[@code=\"a\"]',\n",
+    "            'Signatur': './/marc:datafield[@tag=\"AVA\"]/marc:subfield[@code=\"d\"]',\n",
+    "            'Veröffentlichungsdatum': './/marc:datafield[@tag=\"264\"]/marc:subfield[@code=\"c\"]',\n",
+    "            'Veröffentlichungsort': './/marc:datafield[@tag=\"264\"]/marc:subfield[@code=\"a\"]',\n",
+    "            'Sprache': './/marc:datafield[@tag=\"041\"]/marc:subfield[@code=\"a\"]'\n",
+    "        }\n",
+    "\n",
+    "        for key, path in marc_paths.items():\n",
+    "            values = [elm.text for elm in rec.xpath(path, namespaces=ns)]\n",
+    "            if key == 'Sprache':\n",
+    "                values = [english_language_from_code(val) for val in values]\n",
+    "\n",
+    "            metadata[key] = '; '.join(values)\n",
+    "            # metadata['Signatur'] = sig\n",
+    "        metadata_lis.append(metadata)\n",
+    "    return metadata_lis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "d336f2e0-87c7-40c4-ba62-30fae7d21ceb",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "onb_baende_metadata = [extract_catalog_data_from_signature(sig) for sig in onb_baende['Signatur (rot: ÖNB)']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "id": "6fd96da5-7b09-426a-83ff-56ba41de5af3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def is_exact_signature(cand):\n",
+    "    sigs = onb_baende['Signatur (rot: ÖNB)'].values\n",
+    "    return cand in sigs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "id": "384857bb-2720-4699-92b3-a26e0e21bf9e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "onb_md = []\n",
+    "\n",
+    "for md in onb_baende_metadata:\n",
+    "    filtered_md = [d for d in md if is_exact_signature(d['Signatur'])]\n",
+    "    onb_md.append(filtered_md)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "id": "5632416d-ad96-4e1f-8563-11bec3025f60",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[{'Titel': 'Aigentliche wahrhaffte Delineatio vnnd Abbildung aller fürstl. Auufzug vnd Ritterspilen in der fürstlichen Hauptstadt Stuttgart. (Tübingen 1618).',\n",
+       "   'Autor': 'Hulsen, Esaias von',\n",
+       "   'Mitwirkender': '',\n",
+       "   'Signatur': '24.D.6',\n",
+       "   'Veröffentlichungsdatum': '',\n",
+       "   'Veröffentlichungsort': 'Tübingen',\n",
+       "   'Sprache': 'German'},\n",
+       "  {'Titel': 'Aigentliche wahrhaffte Delineatio vund Abbildung aller fürstlichen Auffzüg vnd Ritterspilen ... in der fürstlichen Hauptstatt Stuttgart (etc.)',\n",
+       "   'Autor': '',\n",
+       "   'Mitwirkender': '',\n",
+       "   'Signatur': '24.D.6',\n",
+       "   'Veröffentlichungsdatum': '1618',\n",
+       "   'Veröffentlichungsort': 'Tübingen',\n",
+       "   'Sprache': 'German'}],\n",
+       " [],\n",
+       " [],\n",
+       " [],\n",
+       " [{'Titel': \"Pompe funebre du tres-pieux et tres puissant prince Albert, archiduc d'Autriche (etc.) representee au naturel en tailles 12, dessinees par Jacques Francqvart et gravees par Corneille Galle. Avec unde diss. historique et morale d'Eryce Puteanus (etc.)\",\n",
+       "   'Autor': 'Francquart, Jacques',\n",
+       "   'Mitwirkender': 'Galle, Corneille; Puteanus, Erycius',\n",
+       "   'Signatur': '24.D.7',\n",
+       "   'Veröffentlichungsdatum': '1729',\n",
+       "   'Veröffentlichungsort': 'Bruxelles',\n",
+       "   'Sprache': 'French'}],\n",
+       " [{'Titel': 'Funera Caroli V. (tit. fict.)',\n",
+       "   'Autor': '',\n",
+       "   'Mitwirkender': '',\n",
+       "   'Signatur': '24.D.8',\n",
+       "   'Veröffentlichungsdatum': '1619',\n",
+       "   'Veröffentlichungsort': 'Hagae-Comit',\n",
+       "   'Sprache': ''}],\n",
+       " [{'Titel': 'Certamen equestre caeteraque solemnia. Holmiae Suecorum av 1672 celebrata cum Carolus XI. omnium cum applausu avati regnii regimen capescret. s. Das grosse Carrosel (Carrousel) und prächtige Ring-Rännen nebst dem, was sonsten fürtreffliches zu sehen war, alß König ... Carl der Elffte die Regierung seines Väterlichen Erb. Königreichs anno 1672 den 18. Dezembris in seiner kgl. Residenz zu Stockholm antratt.',\n",
+       "   'Autor': '',\n",
+       "   'Mitwirkender': '',\n",
+       "   'Signatur': '24.D.4',\n",
+       "   'Veröffentlichungsdatum': '1672',\n",
+       "   'Veröffentlichungsort': 'Stockholm',\n",
+       "   'Sprache': 'German'},\n",
+       "  {'Titel': 'Das große Carrosel (Carroussel) und prächtige Ring-Rännen nebst dem, was sonsten fürtreffliches zu sehen war, alß ... König ... Carl der Eylffte die Regierung ... antratt',\n",
+       "   'Autor': '',\n",
+       "   'Mitwirkender': '',\n",
+       "   'Signatur': '24.D.4',\n",
+       "   'Veröffentlichungsdatum': '1672',\n",
+       "   'Veröffentlichungsort': 'Stockholm',\n",
+       "   'Sprache': 'German'}],\n",
+       " [{'Titel': 'Aufzüge und Ritterspiele, so bey des duchlauchtigsten, hochgebornen Fürsten und Herrn Herrn Friedrich Wilhelms Hertzogen zu Sachsen ... gehalten worden auf S. H. durchl. Residentz Vestung zu Altenburg in Monat Junio 1654',\n",
+       "   'Autor': '',\n",
+       "   'Mitwirkender': '',\n",
+       "   'Signatur': '24.D.5',\n",
+       "   'Veröffentlichungsdatum': '1658',\n",
+       "   'Veröffentlichungsort': 'Schleßwig',\n",
+       "   'Sprache': 'German'}],\n",
+       " [],\n",
+       " [],\n",
+       " [{'Titel': \"Feste fatte sopra l'Arno in Fiorenza per le nozze del Sno Pre di Toscana l'anno 1608 disegnata da Giulio Parigi.\",\n",
+       "   'Autor': 'Parigi, Giulio',\n",
+       "   'Mitwirkender': '',\n",
+       "   'Signatur': '839505-E',\n",
+       "   'Veröffentlichungsdatum': '1608',\n",
+       "   'Veröffentlichungsort': 'Florenz',\n",
+       "   'Sprache': 'Italian'}]]"
+      ]
+     },
+     "execution_count": 76,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "onb_md"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "id": "0fac2293-33f2-41dc-b6da-fc17cb4c2209",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Standort</th>\n",
+       "      <th>Regal</th>\n",
+       "      <th>Systematik</th>\n",
+       "      <th>Signatur (rot: ÖNB)</th>\n",
+       "      <th>Kurztitel</th>\n",
+       "      <th>Anmerkungen</th>\n",
+       "      <th>ÖNB Signatur</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>24.D.6</td>\n",
+       "      <td>Festa eqvestria stvtgardiae celebrat</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>587972-F</td>\n",
+       "      <td>Architet di L. B. Alberti, Tom. I</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>587972-F</td>\n",
+       "      <td>Architetv di L. B. Alberti, Tom. II</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>587972-F</td>\n",
+       "      <td>Architet di L. B. Alberti, Tom. III</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>24.D.7</td>\n",
+       "      <td>Pompe funèbre du très-pieux et très-puissant Prince Albert</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>24.D.8</td>\n",
+       "      <td>Fvner Car. V. Imper</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>51</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>24.D.4</td>\n",
+       "      <td>Certam eqvest Caroli XI. holmiae</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>24.D.5</td>\n",
+       "      <td>Certam eqvestr in Saxonia</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>65</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>60.120-E</td>\n",
+       "      <td>Academie de le spee par G. Thibvat</td>\n",
+       "      <td>Etikett: Nicht ausheben! Umsignieren auf MF 3106</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>81</th>\n",
+       "      <td>UG5 Depot</td>\n",
+       "      <td>95.0</td>\n",
+       "      <td>Illustrierte Bücher Thulden</td>\n",
+       "      <td>392.023-E</td>\n",
+       "      <td>Travavx d'Vlisse par Theodore van Thvlden</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>83</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>839505-E</td>\n",
+       "      <td>Feste fatte sopral arno in fior</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     Standort  Regal                   Systematik Signatur (rot: ÖNB)   \n",
+       "13  UG3 Depot    NaN                 Galeriewerke              24.D.6  \\\n",
+       "15  UG3 Depot    NaN                 Galeriewerke            587972-F   \n",
+       "16  UG3 Depot    NaN                 Galeriewerke            587972-F   \n",
+       "17  UG3 Depot    NaN                 Galeriewerke            587972-F   \n",
+       "29  UG3 Depot    NaN                 Galeriewerke              24.D.7   \n",
+       "43  UG3 Depot    NaN                 Galeriewerke              24.D.8   \n",
+       "51  UG3 Depot    NaN                 Galeriewerke              24.D.4   \n",
+       "52  UG3 Depot    NaN                 Galeriewerke              24.D.5   \n",
+       "65  UG3 Depot    NaN                 Galeriewerke            60.120-E   \n",
+       "81  UG5 Depot   95.0  Illustrierte Bücher Thulden           392.023-E   \n",
+       "83  UG3 Depot    NaN                 Galeriewerke            839505-E   \n",
+       "\n",
+       "                                                     Kurztitel   \n",
+       "13                        Festa eqvestria stvtgardiae celebrat  \\\n",
+       "15                           Architet di L. B. Alberti, Tom. I   \n",
+       "16                         Architetv di L. B. Alberti, Tom. II   \n",
+       "17                         Architet di L. B. Alberti, Tom. III   \n",
+       "29  Pompe funèbre du très-pieux et très-puissant Prince Albert   \n",
+       "43                                         Fvner Car. V. Imper   \n",
+       "51                            Certam eqvest Caroli XI. holmiae   \n",
+       "52                                   Certam eqvestr in Saxonia   \n",
+       "65                          Academie de le spee par G. Thibvat   \n",
+       "81                   Travavx d'Vlisse par Theodore van Thvlden   \n",
+       "83                             Feste fatte sopral arno in fior   \n",
+       "\n",
+       "                                         Anmerkungen  ÖNB Signatur  \n",
+       "13                                               NaN          True  \n",
+       "15                                               NaN          True  \n",
+       "16                                               NaN          True  \n",
+       "17                                               NaN          True  \n",
+       "29                                               NaN          True  \n",
+       "43                                               NaN          True  \n",
+       "51                                               NaN          True  \n",
+       "52                                               NaN          True  \n",
+       "65  Etikett: Nicht ausheben! Umsignieren auf MF 3106          True  \n",
+       "81                                               NaN          True  \n",
+       "83                                               NaN          True  "
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "onb_baende"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "id": "dac1e7bd-793c-4841-876c-09ce04c66e33",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Standort</th>\n",
+       "      <th>Regal</th>\n",
+       "      <th>Systematik</th>\n",
+       "      <th>Signatur (rot: ÖNB)</th>\n",
+       "      <th>Kurztitel</th>\n",
+       "      <th>Anmerkungen</th>\n",
+       "      <th>ÖNB Signatur</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>93</th>\n",
+       "      <td>UG3 Depot</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Galeriewerke</td>\n",
+       "      <td>K.S.E-201</td>\n",
+       "      <td>Bidloo Corpor Human Anatomia</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     Standort  Regal    Systematik Signatur (rot: ÖNB)   \n",
+       "93  UG3 Depot    NaN  Galeriewerke           K.S.E-201  \\\n",
+       "\n",
+       "                       Kurztitel Anmerkungen  ÖNB Signatur  \n",
+       "93  Bidloo Corpor Human Anatomia         NaN         False  "
+      ]
+     },
+     "execution_count": 100,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "alb_df[alb_df['Kurztitel'].str.contains('Human')]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a869f088-af09-462e-ad10-0d201ee982a6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Notebooks/String_matching.ipynb b/Notebooks/String_matching.ipynb
index de8428e3b3d8e30a0b4454b01845a84ee1313966..b3aa06cc5cf5ed438de913e387d6adfc1849c6e9 100644
--- a/Notebooks/String_matching.ipynb
+++ b/Notebooks/String_matching.ipynb
@@ -115,7 +115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 1189,
    "id": "c0f4a42a-7e21-41e8-833c-2dd2f9d1985e",
    "metadata": {
     "tags": []
@@ -125,7 +125,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "7\n"
+      "1\n"
      ]
     },
     {
@@ -162,141 +162,36 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>3207</th>\n",
-       "      <td>14.377</td>\n",
-       "      <td>72</td>\n",
-       "      <td>Poëtica</td>\n",
-       "      <td>Poëtæ Gallici unà cum Dramaticis</td>\n",
-       "      <td>Folio</td>\n",
-       "      <td>480</td>\n",
-       "      <td>14.377_072_00</td>\n",
-       "      <td>912 Les oeuvres Poëtiques du Sr. Rousseau, vide Codd. Mss.</td>\n",
-       "      <td>Les oeuvres Poetiques du Sr Rousseau vide Codd Mss</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3253</th>\n",
+       "      <th>4652</th>\n",
        "      <td>14.377</td>\n",
-       "      <td>77</td>\n",
-       "      <td>Poëtica</td>\n",
-       "      <td>Poëtæ Gallici unà cum Dramaticis</td>\n",
-       "      <td>Quarto</td>\n",
-       "      <td>485</td>\n",
-       "      <td>14.377_077_02</td>\n",
-       "      <td>Les Oeuvres Poëtiques du S. Rousseau. 2 Vol. grand Pap. Londres. 1723. Tonson. n. 638. NB. V. inter illos in fol.</td>\n",
-       "      <td>Les Oeuvres Poetiques du S Rousseau 2 Vol grand Pap Londres 1723 Tonson n 638 NB V inter illos in fol</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3402</th>\n",
-       "      <td>14.377</td>\n",
-       "      <td>90</td>\n",
-       "      <td>Poëtica</td>\n",
-       "      <td>Poëtæ Gallici unà cum Dramaticis</td>\n",
+       "      <td>231</td>\n",
+       "      <td>Philologia</td>\n",
+       "      <td>Epistolographi Gallici, &amp; Italici</td>\n",
        "      <td>Octavo und kleiner</td>\n",
-       "      <td>494</td>\n",
-       "      <td>14.377_090_07</td>\n",
-       "      <td>Les Oeuvres Poëtiques du S. Rouseau, avec l'anti-Rousseau par (Gâcon ) 12.° 3 Vol. Rotterdam. 1712. n. 1231</td>\n",
-       "      <td>Les Oeuvres Poetiques du S Rouseau avec l'anti-Rousseau par Gacon  12° 3 Vol Rotterdam 1712 n 1231</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8930</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>513</td>\n",
-       "      <td>Codices Manuscripti</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Folio</td>\n",
-       "      <td>1341</td>\n",
-       "      <td>14.378_513_02</td>\n",
-       "      <td>149 Les Oeuvres du S. Rousseau. Mss.</td>\n",
-       "      <td>Les Oeuvres du S Rousseau Mss</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9014</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>522</td>\n",
-       "      <td>Codices Manuscripti</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Quarto</td>\n",
-       "      <td>1348</td>\n",
-       "      <td>14.378_522_02</td>\n",
-       "      <td>50 Les Epigrammes du S.r Rousseau approuvées par l'autheur. Mss. ce sont ? set aences  jusque au mois de Fevrier 1710. n. CCVI</td>\n",
-       "      <td>Les Epigrammes du Sr Rousseau approuvees par l'autheur Mss ce sont ? set aences  jusque au mois de Fevrier 1710 n CCVI</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9044</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>524</td>\n",
-       "      <td>Codices Manuscripti</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Quarto</td>\n",
-       "      <td>1348*</td>\n",
-       "      <td>14.378_524_07</td>\n",
-       "      <td>80. Odes sacrées ou cantiques de Pseaumes par le S. Rousseau, Mspt. n. CCVII.</td>\n",
-       "      <td>Odes sacrees ou cantiques de Pseaumes par le S Rousseau Mspt n CCVII</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9065</th>\n",
-       "      <td>14.378</td>\n",
-       "      <td>530</td>\n",
-       "      <td>Codices Manuscripti</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Octavo und kleiner</td>\n",
-       "      <td>1352</td>\n",
-       "      <td>14.378_530_02</td>\n",
-       "      <td>28 89. Rousseau Sr. Epigrammes approuvées par lui memne. chart: Sæc: XVIII.</td>\n",
-       "      <td>89 Rousseau Sr Epigrammes approuvees par lui memne chart Saec XVIII</td>\n",
+       "      <td>627</td>\n",
+       "      <td>14.377_231_03</td>\n",
+       "      <td>Lettres edifiantes &amp; curieuses des Missionaires Iesuites. 12.° 21 Vol. Paris. 1717. &amp; Ann. Suivantes n. 1622.</td>\n",
+       "      <td>Lettres edifiantes &amp; curieuses des Missionaires Iesuites 12° 21 Vol Paris 1717 &amp; Ann Suivantes n 1622</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "      volume  page number             category   \n",
-       "3207  14.377           72              Poëtica  \\\n",
-       "3253  14.377           77              Poëtica   \n",
-       "3402  14.377           90              Poëtica   \n",
-       "8930  14.378          513  Codices Manuscripti   \n",
-       "9014  14.378          522  Codices Manuscripti   \n",
-       "9044  14.378          524  Codices Manuscripti   \n",
-       "9065  14.378          530  Codices Manuscripti   \n",
-       "\n",
-       "                           subcategory              format   \n",
-       "3207  Poëtæ Gallici unà cum Dramaticis               Folio  \\\n",
-       "3253  Poëtæ Gallici unà cum Dramaticis              Quarto   \n",
-       "3402  Poëtæ Gallici unà cum Dramaticis  Octavo und kleiner   \n",
-       "8930                               NaN               Folio   \n",
-       "9014                               NaN              Quarto   \n",
-       "9044                               NaN              Quarto   \n",
-       "9065                               NaN  Octavo und kleiner   \n",
+       "      volume  page number    category                        subcategory   \n",
+       "4652  14.377          231  Philologia  Epistolographi Gallici, & Italici  \\\n",
        "\n",
-       "     handwritten page number       entry_ID   \n",
-       "3207                     480  14.377_072_00  \\\n",
-       "3253                     485  14.377_077_02   \n",
-       "3402                     494  14.377_090_07   \n",
-       "8930                    1341  14.378_513_02   \n",
-       "9014                    1348  14.378_522_02   \n",
-       "9044                   1348*  14.378_524_07   \n",
-       "9065                    1352  14.378_530_02   \n",
+       "                  format handwritten page number       entry_ID   \n",
+       "4652  Octavo und kleiner                     627  14.377_231_03  \\\n",
        "\n",
-       "                                                                                                                               entry   \n",
-       "3207                                                                      912 Les oeuvres Poëtiques du Sr. Rousseau, vide Codd. Mss.  \\\n",
-       "3253               Les Oeuvres Poëtiques du S. Rousseau. 2 Vol. grand Pap. Londres. 1723. Tonson. n. 638. NB. V. inter illos in fol.   \n",
-       "3402                     Les Oeuvres Poëtiques du S. Rouseau, avec l'anti-Rousseau par (Gâcon ) 12.° 3 Vol. Rotterdam. 1712. n. 1231   \n",
-       "8930                                                                                            149 Les Oeuvres du S. Rousseau. Mss.   \n",
-       "9014  50 Les Epigrammes du S.r Rousseau approuvées par l'autheur. Mss. ce sont ? set aences  jusque au mois de Fevrier 1710. n. CCVI   \n",
-       "9044                                                   80. Odes sacrées ou cantiques de Pseaumes par le S. Rousseau, Mspt. n. CCVII.   \n",
-       "9065                                                     28 89. Rousseau Sr. Epigrammes approuvées par lui memne. chart: Sæc: XVIII.   \n",
+       "                                                                                                              entry   \n",
+       "4652  Lettres edifiantes & curieuses des Missionaires Iesuites. 12.° 21 Vol. Paris. 1717. & Ann. Suivantes n. 1622.  \\\n",
        "\n",
-       "                                                                                                               cleaned entry  \n",
-       "3207                                                                      Les oeuvres Poetiques du Sr Rousseau vide Codd Mss  \n",
-       "3253                   Les Oeuvres Poetiques du S Rousseau 2 Vol grand Pap Londres 1723 Tonson n 638 NB V inter illos in fol  \n",
-       "3402                      Les Oeuvres Poetiques du S Rouseau avec l'anti-Rousseau par Gacon  12° 3 Vol Rotterdam 1712 n 1231  \n",
-       "8930                                                                                           Les Oeuvres du S Rousseau Mss  \n",
-       "9014  Les Epigrammes du Sr Rousseau approuvees par l'autheur Mss ce sont ? set aences  jusque au mois de Fevrier 1710 n CCVI  \n",
-       "9044                                                    Odes sacrees ou cantiques de Pseaumes par le S Rousseau Mspt n CCVII  \n",
-       "9065                                                     89 Rousseau Sr Epigrammes approuvees par lui memne chart Saec XVIII  "
+       "                                                                                              cleaned entry  \n",
+       "4652  Lettres edifiantes & curieuses des Missionaires Iesuites 12° 21 Vol Paris 1717 & Ann Suivantes n 1622  "
       ]
      },
-     "execution_count": 114,
+     "execution_count": 1189,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -305,7 +200,7 @@
     "def search_in_entry(df, string):\n",
     "    return df[df['cleaned entry'].str.contains(string)]\n",
     "\n",
-    "info = search_in_entry(search_in_entry(entry_df, 'Rousseau'), '')\n",
+    "info = search_in_entry(search_in_entry(entry_df, 'edifiantes'), '')\n",
     "print(len(info))\n",
     "info"
    ]
@@ -344,7 +239,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 1176,
    "id": "20facf5d-d609-498e-9907-7ebdffc09e15",
    "metadata": {
     "tags": []
@@ -354,55 +249,55 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "BE.1.M.2.(2)\n",
-      "B1762317\n",
-      "Appendix Ignatiana in qua continentur Ignatii epistolae germinae Ignatiique martyrium a Philone, Agathopode et aliis (etc.)\n",
-      "Usher, Jacobus\n",
-      "Londini\n",
-      "1647\n",
-      "nan\n"
+      "BE.5.V.45\n",
+      "+Z175390902\n",
+      "Principes de philosophie ou preuves naturelles de l'existence de Dieu et de l'immortalite de l'ame\n",
+      "Genest, Charles-Claude\n",
+      "Paris\n",
+      "1716\n",
+      "B\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "Signatur                                                                                                                                          BE.1.M.2.(2)\n",
-       "Barcode                                                                                                                                               B1762317\n",
-       "Titel                              Appendix Ignatiana in qua continentur Ignatii epistolae germinae Ignatiique martyrium a Philone, Agathopode et aliis (etc.)\n",
-       "Autor                                                                                                                                           Usher, Jacobus\n",
-       "Mitwirkender                                                                                                                                               NaN\n",
-       "Anfang Veröffentlichungsdatum                                                                                                                           1647.0\n",
-       "Ende Veröffentlichungsdatum                                                                                                                                NaN\n",
-       "Veröffentlichungsdatum                                                                                                                                    1647\n",
-       "Veröffentlichungsort                                                                                                                                   Londini\n",
-       "Veröffentlichungsort (normiert)                                                                                                                            NaN\n",
-       "Sprache                                                                                                                                                  Latin\n",
-       "Schlagwörter                                                                                                                       Ignatius--Antiochenus---110\n",
-       "Schlagwörter (mit GND)                                                                        Ignatius--Antiochenus---110$Dp--(DE-588)118555340;AT-OBV--ONB-AK\n",
-       "Vorbesitzer                                                                                                                                                NaN\n",
-       "Typ                                                                                                                                                        NaN\n",
-       "Bemerkungen                                                                                                                                                NaN\n",
-       "Gültiger Barcode                                                                                                                                           NaN\n",
-       "Dateiname                                                                                                                                                  NaN\n",
-       "Wappenklassifizierung                                                                                                                                      NaN\n",
-       "p_A                                                                                                                                                        NaN\n",
-       "p_B                                                                                                                                                        NaN\n",
-       "p_C                                                                                                                                                        NaN\n",
-       "p_N                                                                                                                                                        NaN\n",
-       "Farbklassifizierung                                                                                                                                        NaN\n",
-       "p_blue                                                                                                                                                     NaN\n",
-       "p_red                                                                                                                                                      NaN\n",
-       "p_yellow                                                                                                                                                   NaN\n",
-       "Name: 463, dtype: object"
+       "Signatur                                                                                                                                                                                                                                                                                               BE.5.V.45\n",
+       "Barcode                                                                                                                                                                                                                                                                                              +Z175390902\n",
+       "Titel                                                                                                                                                                                                         Principes de philosophie ou preuves naturelles de l'existence de Dieu et de l'immortalite de l'ame\n",
+       "Autor                                                                                                                                                                                                                                                                                     Genest, Charles-Claude\n",
+       "Mitwirkender                                                                                                                                                                                                                                                                                                 NaN\n",
+       "Anfang Veröffentlichungsdatum                                                                                                                                                                                                                                                                             1716.0\n",
+       "Ende Veröffentlichungsdatum                                                                                                                                                                                                                                                                                  NaN\n",
+       "Veröffentlichungsdatum                                                                                                                                                                                                                                                                                      1716\n",
+       "Veröffentlichungsort                                                                                                                                                                                                                                                                                       Paris\n",
+       "Veröffentlichungsort (normiert)                                                                                                                                                                                                                                                                              NaN\n",
+       "Sprache                                                                                                                                                                                                                                                                                                   French\n",
+       "Schlagwörter                                                                                                                                                                                                      Gedicht; Gottesbeweis; Seele; Unsterblichkeit; Belletristische Darstellung; Lyrik; Französisch\n",
+       "Schlagwörter (mit GND)             Gottesbeweis$Ds--(DE-588)4021668-8;Belletristische Darstellung$Af;AT-OBV--ONB-AK;Seele$Ds--(DE-588)4054146-0;Unsterblichkeit$Ds--(DE-588)4061874-2;Belletristische Darstellung$Af;AT-OBV--ONB-AK;Lyrik$Ds--(DE-588)4036774-5;Französisch$Ds--(DE-588)4113615-9;AT-OBV--ONB-AK\n",
+       "Vorbesitzer                                                                                                                                                                                                                                                                                                  NaN\n",
+       "Typ                                                                                                                                                                                                                                                                                             Gedicht--bellobv\n",
+       "Bemerkungen                                                                                                                                                                                                                                                                                                  NaN\n",
+       "Gültiger Barcode                                                                                                                                                                                                                                                                                      Z175390902\n",
+       "Dateiname                                                                                                                                                                                                                                                                                Z175390902_00000001.jpg\n",
+       "Wappenklassifizierung                                                                                                                                                                                                                                                                                          B\n",
+       "p_A                                                                                                                                                                                                                                                                                                     0.000108\n",
+       "p_B                                                                                                                                                                                                                                                                                                      0.99959\n",
+       "p_C                                                                                                                                                                                                                                                                                                     0.000184\n",
+       "p_N                                                                                                                                                                                                                                                                                                     0.000118\n",
+       "Farbklassifizierung                                                                                                                                                                                                                                                                                          red\n",
+       "p_blue                                                                                                                                                                                                                                                                                                  0.000051\n",
+       "p_red                                                                                                                                                                                                                                                                                                   0.999859\n",
+       "p_yellow                                                                                                                                                                                                                                                                                                 0.00009\n",
+       "Name: 14220, dtype: object"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 1176,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "BE_entry = BE_df.loc[463]\n",
+    "BE_entry = BE_df.loc[14220]\n",
     "print(BE_entry['Signatur'])\n",
     "print(BE_entry['Barcode'])\n",
     "print(BE_entry['Titel'])\n",
@@ -3087,8 +2982,647 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 156,
    "id": "a5507d98-a6be-4108-a70f-c09404f8de79",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Zwischenstand speichern\n",
+    "comp_BE_no_dup.to_excel('../Daten/Vorhersagen/WIP_final_BE_2.xlsx')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 724,
+   "id": "315500ea-0225-4b2c-b568-082f5250dd4e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "comp_BE_no_dup = pd.read_excel('../Daten/Vorhersagen/WIP_final_BE_2.xlsx', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 772,
+   "id": "ef48a633-fb96-4ae2-b024-9f8e6f7beb01",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Signatur', 'Barcode', 'Titel', 'Autor', 'Mitwirkender',\n",
+       "       'Anfang Veröffentlichungsdatum', 'Ende Veröffentlichungsdatum',\n",
+       "       'Veröffentlichungsdatum', 'Veröffentlichungsort',\n",
+       "       'Veröffentlichungsort (normiert)', 'Sprache', 'Dateiname',\n",
+       "       'Wappenklassifizierung', 'p_A', 'p_B', 'p_C', 'p_N',\n",
+       "       'Farbklassifizierung', 'p_blue', 'p_red', 'p_yellow', 'IIIF Manifest',\n",
+       "       'hs. Katalog', 'hs. Katalog Konfidenz', 'hs. Katalogband',\n",
+       "       'hs. Katalogseite Digitalisat', 'Wissensklasse', 'Wissensunterklasse',\n",
+       "       'Formatangabe', 'hs. Katalogseite Handschrift', 'hs. Katalogeintrag ID',\n",
+       "       'hs. Katalogeintrag', 'hs. Katalog Image URL', 'dup_title',\n",
+       "       'copy_from'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 772,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "comp_BE_no_dup.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 870,
+   "id": "ffc781e2-21f4-48c8-9e01-f57f677b11a6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "22614    https://iiif.onb.ac.at/presentation/ABO/Z22196790X/manifest\n",
+       "897      https://iiif.onb.ac.at/presentation/ABO/Z165135008/manifest\n",
+       "898      https://iiif.onb.ac.at/presentation/ABO/Z165135100/manifest\n",
+       "900      https://iiif.onb.ac.at/presentation/ABO/Z165135203/manifest\n",
+       "901      https://iiif.onb.ac.at/presentation/ABO/Z165135306/manifest\n",
+       "                                    ...                             \n",
+       "22236    https://iiif.onb.ac.at/presentation/ABO/Z200809106/manifest\n",
+       "22237    https://iiif.onb.ac.at/presentation/ABO/Z200809209/manifest\n",
+       "22238    https://iiif.onb.ac.at/presentation/ABO/Z200809301/manifest\n",
+       "22239    https://iiif.onb.ac.at/presentation/ABO/Z200809404/manifest\n",
+       "22240    https://iiif.onb.ac.at/presentation/ABO/Z200809507/manifest\n",
+       "Name: IIIF Manifest, Length: 3364, dtype: object"
+      ]
+     },
+     "execution_count": 870,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "comp_BE_no_dup[comp_BE_no_dup['hs. Katalogeintrag ID'].isin(entry_df[entry_df['format'] == 'Octavo und kleiner']['entry_ID'])]['IIIF Manifest'].dropna()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67d13156-4cc6-4db4-92f3-41031203ce7c",
+   "metadata": {},
+   "source": [
+    "# Cluster im Regal ermitteln"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 882,
+   "id": "3c414e9c-021d-4172-87ce-32fb6a08c9a6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "7950\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_4673/1024163498.py:6: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  has_hw_catalog['first_num'] = has_hw_catalog['control'].apply(lambda x: int(match.search(str(x))[0]))\n"
+     ]
+    }
+   ],
+   "source": [
+    "s = '7950, 9247'\n",
+    "\n",
+    "match = re.compile('\\d{1,4}\\Z|\\d{1,4}|\\d{1,4}\\?,')\n",
+    "print(match.search(s)[0])\n",
+    "\n",
+    "has_hw_catalog['first_num'] = has_hw_catalog['control'].apply(lambda x: int(match.search(str(x))[0]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 905,
+   "id": "cc5e0d88-a88e-4926-8f1a-65d0415ea3dc",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "no_sort_m = has_hw_catalog['first_num'].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1060,
+   "id": "acff6cc5-e585-47b2-a91f-f1191d47de04",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def get_longest_sequence(lis):\n",
+    "    differences = [abs(lis[n+1] - lis[n]) for n in range(len(lis) - 1)]\n",
+    "    ids = [entry_df.loc[n]['entry_ID'] for n in lis]\n",
+    "    \n",
+    "    is_nearby = [n < 10 for n in differences]\n",
+    "    \n",
+    "    c = 0\n",
+    "    seq_length = []\n",
+    "    for i, num in enumerate(is_nearby):\n",
+    "        if num:\n",
+    "            c += 1\n",
+    "        else:\n",
+    "            c = 0\n",
+    "        seq_length.append((i, c))\n",
+    "    \n",
+    "    sort_seq = sorted(seq_length, key=lambda x: x[1], reverse=True)\n",
+    "    places = pd.Series([n[0] - n[1] + 1  for n in sort_seq], name='location')\n",
+    "    places_no_dup = places.drop_duplicates()\n",
+    "    seq_df = pd.DataFrame(places_no_dup)\n",
+    "    seq_df['length'] = [sort_seq[x][1] for x in seq_df.index]\n",
+    "    seq_df['first_BE_location'] = seq_df['location'].apply(lambda x: has_hw_catalog.iloc[x]['input_id'])\n",
+    "    seq_df['last_location'] = seq_df['location'] + seq_df['length']\n",
+    "    seq_df['last_BE_location'] = seq_df['last_location'].apply(lambda x: has_hw_catalog.iloc[x]['input_id'])\n",
+    "    seq_df['first_hw_id'] = seq_df['location'].apply(lambda x: ids[x])\n",
+    "    seq_df['last_hw_id'] = seq_df['last_location'].apply(lambda x: ids[x])\n",
+    "    \n",
+    "    return seq_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1061,
+   "id": "bbb4d50a-72f8-4368-9966-0d0ea590c06f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "seq = get_longest_sequence(no_sort_m)\n",
+    "seq['erste Signatur'] = seq['first_BE_location'].apply(lambda x: BE_df.loc[x]['Signatur'])\n",
+    "seq['letzte Signatur'] = seq['last_BE_location'].apply(lambda x: BE_df.loc[x]['Signatur'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1089,
+   "id": "81ed055b-34f7-420f-8391-9ffc8f8efc89",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>location</th>\n",
+       "      <th>length</th>\n",
+       "      <th>first_BE_location</th>\n",
+       "      <th>last_location</th>\n",
+       "      <th>last_BE_location</th>\n",
+       "      <th>first_hw_id</th>\n",
+       "      <th>last_hw_id</th>\n",
+       "      <th>erste Signatur</th>\n",
+       "      <th>letzte Signatur</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>931</td>\n",
+       "      <td>24</td>\n",
+       "      <td>4261</td>\n",
+       "      <td>955</td>\n",
+       "      <td>4285</td>\n",
+       "      <td>14.377_079_01</td>\n",
+       "      <td>14.377_079_23</td>\n",
+       "      <td>BE.11.L.31</td>\n",
+       "      <td>BE.11.L.35.(4)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4116</td>\n",
+       "      <td>22</td>\n",
+       "      <td>15231</td>\n",
+       "      <td>4138</td>\n",
+       "      <td>15275</td>\n",
+       "      <td>14.378_211_03</td>\n",
+       "      <td>14.378_215_01</td>\n",
+       "      <td>BE.6.M.1-16.(Vol.1)</td>\n",
+       "      <td>BE.6.M.39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>907</td>\n",
+       "      <td>19</td>\n",
+       "      <td>4237</td>\n",
+       "      <td>926</td>\n",
+       "      <td>4256</td>\n",
+       "      <td>14.377_078_01</td>\n",
+       "      <td>14.377_078_19</td>\n",
+       "      <td>BE.11.L.18</td>\n",
+       "      <td>BE.11.L.22.(3)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>3524</td>\n",
+       "      <td>18</td>\n",
+       "      <td>13400</td>\n",
+       "      <td>3542</td>\n",
+       "      <td>13418</td>\n",
+       "      <td>14.376_325_03</td>\n",
+       "      <td>14.376_328_01</td>\n",
+       "      <td>BE.5.N.30</td>\n",
+       "      <td>BE.5.N.39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>2599</td>\n",
+       "      <td>17</td>\n",
+       "      <td>10388</td>\n",
+       "      <td>2616</td>\n",
+       "      <td>10405</td>\n",
+       "      <td>14.376_259_05</td>\n",
+       "      <td>14.376_262_01</td>\n",
+       "      <td>BE.3.W.14</td>\n",
+       "      <td>BE.3.W.29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>3674</td>\n",
+       "      <td>17</td>\n",
+       "      <td>13828</td>\n",
+       "      <td>3691</td>\n",
+       "      <td>13846</td>\n",
+       "      <td>14.376_435_04</td>\n",
+       "      <td>14.376_438_01</td>\n",
+       "      <td>BE.5.R.30</td>\n",
+       "      <td>BE.5.R.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>4240</td>\n",
+       "      <td>17</td>\n",
+       "      <td>15600</td>\n",
+       "      <td>4257</td>\n",
+       "      <td>15618</td>\n",
+       "      <td>14.377_080_00</td>\n",
+       "      <td>14.377_081_08</td>\n",
+       "      <td>BE.6.Q.21</td>\n",
+       "      <td>BE.6.Q.29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>1536</td>\n",
+       "      <td>16</td>\n",
+       "      <td>6440</td>\n",
+       "      <td>1552</td>\n",
+       "      <td>6456</td>\n",
+       "      <td>14.378_425_01</td>\n",
+       "      <td>14.378_426_06</td>\n",
+       "      <td>BE.12.Q.27</td>\n",
+       "      <td>BE.12.Q.41.(Adl)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2508</td>\n",
+       "      <td>16</td>\n",
+       "      <td>10085</td>\n",
+       "      <td>2524</td>\n",
+       "      <td>10139</td>\n",
+       "      <td>14.378_109_02</td>\n",
+       "      <td>14.378_111_05</td>\n",
+       "      <td>BE.3.S.10-12.(Vol.1)</td>\n",
+       "      <td>BE.3.S.44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>701</td>\n",
+       "      <td>15</td>\n",
+       "      <td>3500</td>\n",
+       "      <td>716</td>\n",
+       "      <td>3515</td>\n",
+       "      <td>14.377_158_06</td>\n",
+       "      <td>14.377_161_06</td>\n",
+       "      <td>BE.10.X.39</td>\n",
+       "      <td>BE.10.X.54</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>1942</td>\n",
+       "      <td>15</td>\n",
+       "      <td>8153</td>\n",
+       "      <td>1957</td>\n",
+       "      <td>8177</td>\n",
+       "      <td>14.377_433_09</td>\n",
+       "      <td>14.377_435_04</td>\n",
+       "      <td>BE.2.R.27-32.(Vol.1)</td>\n",
+       "      <td>BE.2.R.48-53.(Vol.1)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>3094</td>\n",
+       "      <td>15</td>\n",
+       "      <td>12064</td>\n",
+       "      <td>3109</td>\n",
+       "      <td>12084</td>\n",
+       "      <td>14.376_385_02</td>\n",
+       "      <td>14.376_387_05</td>\n",
+       "      <td>BE.4.S.28</td>\n",
+       "      <td>BE.4.S.49</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>339</td>\n",
+       "      <td>14</td>\n",
+       "      <td>1455</td>\n",
+       "      <td>353</td>\n",
+       "      <td>1469</td>\n",
+       "      <td>14.376_110_02</td>\n",
+       "      <td>14.376_107_02</td>\n",
+       "      <td>BE.1.X.35</td>\n",
+       "      <td>BE.1.X.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>52</th>\n",
+       "      <td>3226</td>\n",
+       "      <td>14</td>\n",
+       "      <td>12534</td>\n",
+       "      <td>3240</td>\n",
+       "      <td>12650</td>\n",
+       "      <td>14.377_164_00</td>\n",
+       "      <td>14.377_166_03</td>\n",
+       "      <td>BE.4.Y.1.(Vol.1)</td>\n",
+       "      <td>BE.4.Z.37</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>55</th>\n",
+       "      <td>3906</td>\n",
+       "      <td>14</td>\n",
+       "      <td>14472</td>\n",
+       "      <td>3920</td>\n",
+       "      <td>14490</td>\n",
+       "      <td>14.377_170_02</td>\n",
+       "      <td>14.377_171_04</td>\n",
+       "      <td>BE.5.Y.10</td>\n",
+       "      <td>BE.5.Y.29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>58</th>\n",
+       "      <td>4900</td>\n",
+       "      <td>14</td>\n",
+       "      <td>19260</td>\n",
+       "      <td>4914</td>\n",
+       "      <td>19275</td>\n",
+       "      <td>14.377_279_01</td>\n",
+       "      <td>14.377_283_01</td>\n",
+       "      <td>BE.8.N.15.(Vol.1)</td>\n",
+       "      <td>BE.8.N.25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>59</th>\n",
+       "      <td>5360</td>\n",
+       "      <td>14</td>\n",
+       "      <td>20942</td>\n",
+       "      <td>5374</td>\n",
+       "      <td>20973</td>\n",
+       "      <td>14.377_349_02</td>\n",
+       "      <td>14.377_352_04</td>\n",
+       "      <td>BE.9.K.10.11.(Vol.1)</td>\n",
+       "      <td>BE.9.K.36.37.(Vol.1)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>69</th>\n",
+       "      <td>3188</td>\n",
+       "      <td>13</td>\n",
+       "      <td>12443</td>\n",
+       "      <td>3201</td>\n",
+       "      <td>12466</td>\n",
+       "      <td>14.377_120_05</td>\n",
+       "      <td>14.377_122_03</td>\n",
+       "      <td>BE.4.X.17</td>\n",
+       "      <td>BE.4.X.29.(Vol.1)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>82</th>\n",
+       "      <td>1108</td>\n",
+       "      <td>12</td>\n",
+       "      <td>4822</td>\n",
+       "      <td>1120</td>\n",
+       "      <td>4835</td>\n",
+       "      <td>14.378_480_06</td>\n",
+       "      <td>14.378_481_06</td>\n",
+       "      <td>BE.11.Q.10</td>\n",
+       "      <td>BE.11.Q.19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>85</th>\n",
+       "      <td>2170</td>\n",
+       "      <td>12</td>\n",
+       "      <td>8764</td>\n",
+       "      <td>2182</td>\n",
+       "      <td>8777</td>\n",
+       "      <td>14.376_234_04</td>\n",
+       "      <td>14.376_238_01</td>\n",
+       "      <td>BE.2.X.52</td>\n",
+       "      <td>BE.2.X.65-68.(Vol.1)</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    location  length  first_BE_location  last_location  last_BE_location   \n",
+       "0        931      24               4261            955              4285  \\\n",
+       "3       4116      22              15231           4138             15275   \n",
+       "8        907      19               4237            926              4256   \n",
+       "13      3524      18              13400           3542             13418   \n",
+       "17      2599      17              10388           2616             10405   \n",
+       "19      3674      17              13828           3691             13846   \n",
+       "21      4240      17              15600           4257             15618   \n",
+       "24      1536      16               6440           1552              6456   \n",
+       "25      2508      16              10085           2524             10139   \n",
+       "31       701      15               3500            716              3515   \n",
+       "35      1942      15               8153           1957              8177   \n",
+       "38      3094      15              12064           3109             12084   \n",
+       "43       339      14               1455            353              1469   \n",
+       "52      3226      14              12534           3240             12650   \n",
+       "55      3906      14              14472           3920             14490   \n",
+       "58      4900      14              19260           4914             19275   \n",
+       "59      5360      14              20942           5374             20973   \n",
+       "69      3188      13              12443           3201             12466   \n",
+       "82      1108      12               4822           1120              4835   \n",
+       "85      2170      12               8764           2182              8777   \n",
+       "\n",
+       "      first_hw_id     last_hw_id        erste Signatur       letzte Signatur  \n",
+       "0   14.377_079_01  14.377_079_23            BE.11.L.31        BE.11.L.35.(4)  \n",
+       "3   14.378_211_03  14.378_215_01   BE.6.M.1-16.(Vol.1)             BE.6.M.39  \n",
+       "8   14.377_078_01  14.377_078_19            BE.11.L.18        BE.11.L.22.(3)  \n",
+       "13  14.376_325_03  14.376_328_01             BE.5.N.30             BE.5.N.39  \n",
+       "17  14.376_259_05  14.376_262_01             BE.3.W.14             BE.3.W.29  \n",
+       "19  14.376_435_04  14.376_438_01             BE.5.R.30             BE.5.R.48  \n",
+       "21  14.377_080_00  14.377_081_08             BE.6.Q.21             BE.6.Q.29  \n",
+       "24  14.378_425_01  14.378_426_06            BE.12.Q.27      BE.12.Q.41.(Adl)  \n",
+       "25  14.378_109_02  14.378_111_05  BE.3.S.10-12.(Vol.1)             BE.3.S.44  \n",
+       "31  14.377_158_06  14.377_161_06            BE.10.X.39            BE.10.X.54  \n",
+       "35  14.377_433_09  14.377_435_04  BE.2.R.27-32.(Vol.1)  BE.2.R.48-53.(Vol.1)  \n",
+       "38  14.376_385_02  14.376_387_05             BE.4.S.28             BE.4.S.49  \n",
+       "43  14.376_110_02  14.376_107_02             BE.1.X.35             BE.1.X.48  \n",
+       "52  14.377_164_00  14.377_166_03      BE.4.Y.1.(Vol.1)             BE.4.Z.37  \n",
+       "55  14.377_170_02  14.377_171_04             BE.5.Y.10             BE.5.Y.29  \n",
+       "58  14.377_279_01  14.377_283_01     BE.8.N.15.(Vol.1)             BE.8.N.25  \n",
+       "59  14.377_349_02  14.377_352_04  BE.9.K.10.11.(Vol.1)  BE.9.K.36.37.(Vol.1)  \n",
+       "69  14.377_120_05  14.377_122_03             BE.4.X.17     BE.4.X.29.(Vol.1)  \n",
+       "82  14.378_480_06  14.378_481_06            BE.11.Q.10            BE.11.Q.19  \n",
+       "85  14.376_234_04  14.376_238_01             BE.2.X.52  BE.2.X.65-68.(Vol.1)  "
+      ]
+     },
+     "execution_count": 1089,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "seq[:20]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5d5632b9-5446-44ef-9590-0592641018c0",
+   "metadata": {},
+   "source": [
+    "# Nacharbeiten bei duplizierten Titeln"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1196,
+   "id": "dd3fd2d0-1558-4505-afe8-7af70c49cb0e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "dup_candidates = comp_BE_no_dup[comp_BE_no_dup['dup_title'] & (comp_BE_no_dup['hs. Katalog'] == 1) & (comp_BE_no_dup['copy_from'] == -1)]\n",
+    "low_sim = []\n",
+    "\n",
+    "for i, row in dup_candidates.iterrows():\n",
+    "    title = row['Titel']\n",
+    "    sig = row['Signatur']\n",
+    "    dup_group = comp_BE_no_dup[(comp_BE_no_dup['Titel'] == title) & (comp_BE_no_dup['copy_from'] != -1)]\n",
+    "    for j, dup in dup_group.iterrows():\n",
+    "        sim_score = fuzz.ratio(sig, dup['Signatur'])\n",
+    "        if 80 < sim_score < 90: # Signatur ähnlich\n",
+    "            dup['sim_score'] = sim_score\n",
+    "            dup['other_sig'] = sig\n",
+    "            low_sim.append(dup)\n",
+    "\n",
+    "low_sim_df = pd.DataFrame(low_sim)\n",
+    "\n",
+    "dod_id = {\n",
+    "    '14.376': 51202, \n",
+    "    '14.377': 51184,\n",
+    "    '14.378': 51219\n",
+    "}\n",
+    "\n",
+    "lettres_edifiantes = comp_BE_no_dup[comp_BE_no_dup['Titel'] == 'Lettres Edifiantes Et Curieuses, Ecrites Des Missions Etrangeres, par quelques Missionaires de la Compagnie de Jesus']\n",
+    "l_m = entry_df.loc[4652]\n",
+    "\n",
+    "for i, row in lettres_edifiantes.iterrows():\n",
+    "    comp_BE_no_dup.at[i, 'hs. Katalog'] = 1\n",
+    "    comp_BE_no_dup.at[i, 'hs. Katalog Konfidenz'] = 'sicher'\n",
+    "    comp_BE_no_dup.at[i, 'hs. Katalogband'] = l_m['volume']\n",
+    "    comp_BE_no_dup.at[i, 'hs. Katalogseite Digitalisat'] = str(l_m['page number'])\n",
+    "    comp_BE_no_dup.at[i, 'Wissensklasse'] = l_m['category']\n",
+    "    comp_BE_no_dup.at[i, 'Wissensunterklasse'] = l_m['subcategory']\n",
+    "    comp_BE_no_dup.at[i, 'Formatangabe'] = l_m['format']\n",
+    "    comp_BE_no_dup.at[i, 'hs. Katalogseite Handschrift'] = l_m['handwritten page number']\n",
+    "    comp_BE_no_dup.at[i, 'hs. Katalogeintrag ID'] = l_m['entry_ID']\n",
+    "    comp_BE_no_dup.at[i, 'hs. Katalogeintrag'] = l_m['entry']\n",
+    "    comp_BE_no_dup.at[i, 'hs. Katalog Image URL'] = f\"https://iiif.onb.ac.at/images/DOD/{dod_id[str(l_m['volume'])]}/{l_m['page number']:08}.jp2/full/full/0/native.jpg\"\n",
+    "\n",
+    "delete_hw_match_for = [14220, 13616, 15587]\n",
+    "take_from_entry_df = [3411, '-', 3247]\n",
+    "\n",
+    "for i, num in enumerate(delete_hw_match_for):\n",
+    "    if take_from_entry_df[i] != '-':\n",
+    "        entry = entry_df.loc[take_from_entry_df[i]]\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalog'] = 1\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalog Konfidenz'] = 'sicher'\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogband'] = entry['volume']\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogseite Digitalisat'] = str(entry['page number'])\n",
+    "        comp_BE_no_dup.at[num, 'Wissensklasse'] = entry['category']\n",
+    "        comp_BE_no_dup.at[num, 'Wissensunterklasse'] = entry['subcategory']\n",
+    "        comp_BE_no_dup.at[num, 'Formatangabe'] = entry['format']\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogseite Handschrift'] = entry['handwritten page number']\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogeintrag ID'] = entry['entry_ID']\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogeintrag'] = entry['entry']\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalog Image URL'] = f\"https://iiif.onb.ac.at/images/DOD/{dod_id[str(entry['volume'])]}/{entry['page number']:08}.jp2/full/full/0/native.jpg\"\n",
+    "    else:\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalog'] = 0\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalog Konfidenz'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogband'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogseite Digitalisat'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'Wissensklasse'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'Wissensunterklasse'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'Formatangabe'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogseite Handschrift'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogeintrag ID'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalogeintrag'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'hs. Katalog Image URL'] = ''\n",
+    "        comp_BE_no_dup.at[num, 'copy_from'] = -1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd067c70-ba6d-4d82-9489-3e9b332dad3f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "comp_BE_no_dup.to_excel('../Daten/Vorhersagen/WIP_final_BE_3.xlsx')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12fcd3ad-222e-4afe-baea-7be69b3ae5cc",
    "metadata": {},
    "outputs": [],
    "source": []