diff --git a/Daten/Vorhersagen/WIP_final_BE_4.xlsx b/Daten/Vorhersagen/WIP_final_BE_4.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..924f96a707e2989dc1a5073c0fc333ea351b3cb8
Binary files /dev/null and b/Daten/Vorhersagen/WIP_final_BE_4.xlsx differ
diff --git a/Notebooks/Completing_BE_data.ipynb b/Notebooks/Completing_BE_data.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..5258ed79b43b8bf322fddbf7435f0a8ccd817c3a
--- /dev/null
+++ b/Notebooks/Completing_BE_data.ipynb
@@ -0,0 +1,980 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "a910c4c5-3a61-462b-ac07-c9545fe7ae40",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import re\n",
+    "import numpy as np\n",
+    "from thefuzz import fuzz, process\n",
+    "from tqdm.notebook import tqdm\n",
+    "import matplotlib.pyplot as plt\n",
+    "import requests\n",
+    "import json\n",
+    "from IPython.display import display\n",
+    "\n",
+    "pd.set_option('display.max_colwidth', None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "83013484-2a55-4819-8b30-b2f8cbbe7981",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "entry_df = pd.read_excel('../Daten/Katalogabgleich/Einträge.xlsx', index_col=0)\n",
+    "\n",
+    "def prepare_string(string):\n",
+    "    new = re.sub(r'[àáâãå]', 'a', string)\n",
+    "    new = re.sub(r'[èéêë]', 'e', new)\n",
+    "    new = re.sub(r'[ìíîï]', 'i', new)\n",
+    "    new = re.sub(r'[òóôõ]', 'o', new)\n",
+    "    new = re.sub(r'[ùúû]', 'u', new)\n",
+    "    new = re.sub(r'æ', 'ae', new)\n",
+    "    new = re.sub('[.,:;()¬]|^[CLXVI]+? |^\\d+? |^\\d+?\\.+? |^\\.+ ?|= |# ', '', new)\n",
+    "    return new\n",
+    "\n",
+    "entry_df['cleaned entry'] = entry_df['entry'].apply(lambda x: prepare_string(x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 142,
+   "id": "1279d6ea-48a2-4f65-9cfa-b1f92eac16f1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "BE_df = pd.read_excel('../Daten/Vorhersagen/WIP_final_BE_3.xlsx', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "id": "ddf5d11c-5f72-4bc8-ab8f-0a1e0f01e60d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# fix 22871 without metadata\n",
+    "i = 22871\n",
+    "BE_df.at[i, 'Titel'] = 'De La coronica general de toda Espana y especialmente del Reyno de Valencia. etc'\n",
+    "BE_df.at[i, 'Autor'] = 'Beuter, Pero-Anton'\n",
+    "BE_df.at[i, 'Mitwirkender'] = ''\n",
+    "BE_df.at[i, 'Anfang Veröffentlichungsdatum'] = '1546'\n",
+    "BE_df.at[i, 'Ende Veröffentlichungsdatum'] = '1551'\n",
+    "BE_df.at[i, 'Veröffentlichungsdatum'] = '1546-1551'\n",
+    "BE_df.at[i, 'Veröffentlichungsort'] = 'Valencia'\n",
+    "BE_df.at[i, 'Veröffentlichungsort (normiert)'] = 'Valencia'\n",
+    "BE_df.at[i, 'Sprache'] = 'Spanish'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "317bb2ea-42b9-4b34-b13e-d25b4c66da2b",
+   "metadata": {},
+   "source": [
+    "# nicht-BE-Signaturen matchen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "e9c00ca9-c051-4e3a-93cf-133031ca9e7f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "no_BE = BE_df[~(BE_df['Signatur'].str.contains('BE') | BE_df['Signatur'].str.contains('Ink'))]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "ca242a1c-baf8-4183-a565-a3797d6f4747",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a7b5d48f5fcd4cbbbf56291a871746c5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/804 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "better_matches = []\n",
+    "scorer = fuzz.token_set_ratio\n",
+    "\n",
+    "for index, row in tqdm(no_BE.iterrows(), total=len(no_BE)):\n",
+    "    keys = ['Autor', 'Mitwirkender', 'Titel', 'Veröffentlichungsort', 'Anfang Veröffentlichungsdatum']\n",
+    "    comb_string = ''\n",
+    "    for key in keys:\n",
+    "        val = row[key]\n",
+    "        if not pd.isna(val):\n",
+    "            if key == 'Autor' or key == 'Mitwirkender':\n",
+    "                if ',' in val:  # falls name, vorname\n",
+    "                    val = val.split(',')[0]\n",
+    "                val = val.split(' ')[0]\n",
+    "            elif key == 'Titel':\n",
+    "                val = prepare_string(val)\n",
+    "            elif key == 'Anfang Veröffentlichungsdatum':\n",
+    "                val = str(int(val))\n",
+    "            else: # key == 'Veröffentlichungsort'\n",
+    "                pass\n",
+    "            comb_string += val + ' '\n",
+    "    \n",
+    "    matches_lis = process.extract(comb_string, entry_df['cleaned entry'], scorer=scorer, limit=5)\n",
+    "    flat_matches = []\n",
+    "    for match in matches_lis:\n",
+    "        flat_matches.append(match[0])\n",
+    "        flat_matches.append(match[1])\n",
+    "        flat_matches.append(match[2])\n",
+    "    better_matches.append([comb_string] + flat_matches)\n",
+    "\n",
+    "matches_df = pd.DataFrame(better_matches, columns=['input', 'match_1', 'score_1', 'id_1', 'match_2', 'score_2', 'id_2', 'match_3', 'score_3', 'id_3', 'match_4', 'score_4', 'id_4', 'match_5', 'score_5', 'id_5'])\n",
+    "matches_df['control'] = ''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "e9dd6e64-a45d-4f25-9ad1-624cfc5268fb",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "matches_df_no_score = matches_df.drop(['score_1', 'score_2', 'score_3', 'score_4', 'score_5'], axis=1)\n",
+    "matches_df_no_score.insert(1, 'input_id', no_BE.index)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "b1f7e1a3-b886-496c-a9ca-ec484d196c24",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "matches_df_no_score.to_excel('../Daten/Katalogabgleich/Kandidaten/no_BE_Kandidaten.xlsx', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "a4639759-8344-452e-96ce-cfca485165a4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "9591"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(BE_df[BE_df['Wappenklassifizierung'].isin(['A', 'B', 'C']) | (BE_df['hs. Katalog'] == 1)])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c964166c-dfad-4c55-bffd-0174e430e0db",
+   "metadata": {},
+   "source": [
+    "# Fehlende Inkunabeln in BE_df Tabelle eintragen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 184,
+   "id": "88ad2906-9c95-4033-b922-791360ec3d6c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "23670\n"
+     ]
+    }
+   ],
+   "source": [
+    "ink_corrections = [\n",
+    "    {'Signatur': 'Ink 12.F.7', 'Titel': 'Expositio super toto psalterio', 'Autor': 'Turrecremata, Johannes de', 'Mitwirkender': 'Udalricus Gallus', 'Veröffentlichungsdatum': '4. Oktober 1470', 'Anfang Veröffentlichungsdatum': 1470, 'Veröffentlichungsort': 'Rome', 'Veröffentlichungsort (normiert)': 'Rom', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 2.E.13', 'Titel': 'Psalterium latinum', 'Autor': '', 'Mitwirkender': 'Reyser, Georg', 'Veröffentlichungsdatum': 'nicht nach 1489', 'Anfang Veröffentlichungsdatum': 1489, 'Veröffentlichungsort': 'Würzburg', 'Veröffentlichungsort (normiert)': 'Würzburg', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 3.F.32', 'Titel': 'Erotemata', 'Autor': 'Chalcondylas, Demetrius', 'Mitwirkender': 'Uldericus Scinzenzeler', 'Veröffentlichungsdatum': 'um 1493', 'Anfang Veröffentlichungsdatum': 1493, 'Veröffentlichungsort': 'Milan', 'Veröffentlichungsort (normiert)': 'Mailand', 'Sprache': 'Ancient Greek (to 1453)'},\n",
+    "    {'Signatur': 'Ink 32-248', 'Titel': 'De animalibus', 'Autor': 'Avicenna', 'Mitwirkender': 'Scotus, Michael; Gregori, Giovanni de', 'Veröffentlichungsdatum': 'um 1500', 'Anfang Veröffentlichungsdatum': 1500, 'Veröffentlichungsort': 'Venedig', 'Veröffentlichungsort (normiert)': 'Venedig', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 4.F.8', 'Titel': 'Opera', 'Autor': 'Sallustius Crispus, Gaius', 'Mitwirkender': 'Vindelinus de Spira', 'Veröffentlichungsdatum': 'um 1470', 'Anfang Veröffentlichungsdatum': 1470, 'Veröffentlichungsort': 'Venice', 'Veröffentlichungsort (normiert)': 'Venedig', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 5.B.1', 'Titel': 'Historia naturalis', 'Autor': 'Plinius Secundus, Gaius', 'Mitwirkender': 'Jenson, Nicolaus', 'Veröffentlichungsdatum': '1472', 'Anfang Veröffentlichungsdatum': 1472, 'Veröffentlichungsort': 'Venice', 'Veröffentlichungsort (normiert)': 'Venedig', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 5.C.11', 'Barcode': 'dtl_5304244', 'Titel': 'Lancelot du Lac', 'Autor': '', 'Mitwirkender': 'Vérard, Antoine', 'Veröffentlichungsdatum': '1494', 'Anfang Veröffentlichungsdatum': 1494, 'Veröffentlichungsort': 'Paris', 'Veröffentlichungsort (normiert)': 'Paris', 'Sprache': 'French'},\n",
+    "    {'Signatur': 'Ink 5.E.16', 'Titel': 'Missale mixtum secundum regulam Beati Isidori, dictum Mozarabes', 'Autor': '', 'Mitwirkender': 'Hagembach, Peter', 'Veröffentlichungsdatum': '9. Jan. 1500', 'Anfang Veröffentlichungsdatum': 1500, 'Veröffentlichungsort': 'Toledo', 'Veröffentlichungsort (normiert)': 'Toledo', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 5.E.19', 'Titel': 'Commentarii', 'Autor': 'Caesar, Gaius Julius', 'Mitwirkender': 'Zarotus, Antonius', 'Veröffentlichungsdatum': '10. Feb. 1477', 'Anfang Veröffentlichungsdatum': 1477, 'Veröffentlichungsort': 'Milan', 'Veröffentlichungsort (normiert)': 'Mailand', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 5.E.24', 'Titel': 'Orthographia', 'Autor': 'Tortellius, Johannes', 'Mitwirkender': 'Pincius, Philippus', 'Veröffentlichungsdatum': '12. Apr. 1493', 'Anfang Veröffentlichungsdatum': 1493, 'Veröffentlichungsort': 'Venice', 'Veröffentlichungsort (normiert)': 'Venedig', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 5.E.25', 'Titel': 'Metamorphosis sive De asino aureo', 'Autor': 'Apuleius, Madaurensis', 'Mitwirkender': 'Faelli, Benedictus Hectoris', 'Veröffentlichungsdatum': '1. Aug. 1500', 'Anfang Veröffentlichungsdatum': 1500, 'Veröffentlichungsort': 'Bologna', 'Veröffentlichungsort (normiert)': 'Bologna', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 7.E.12', 'Titel': 'Scriptores rei rusticae', 'Autor': '', 'Mitwirkender': 'Beroaldus, Philippus; Benedictus Hectoris', 'Veröffentlichungsdatum': '19. Sept. 1494', 'Anfang Veröffentlichungsdatum': 1494, 'Veröffentlichungsort': 'Bologna', 'Veröffentlichungsort (normiert)': 'Bologna', 'Sprache': 'Latin'},\n",
+    "    {'Signatur': 'Ink 9.F.2', 'Titel': 'Liber de vita christi ac pontificum omnium', 'Autor': 'Platina, Bartholomaeus', 'Mitwirkender': 'Koberger, Anton', 'Veröffentlichungsdatum': '11. Aug. 1481', 'Anfang Veröffentlichungsdatum': 1481, 'Veröffentlichungsort': 'Nürnberg', 'Veröffentlichungsort (normiert)': 'Nürnberg', 'Sprache': 'Latin'}\n",
+    "]\n",
+    "\n",
+    "max_BE_index = max(BE_df.index) + 1\n",
+    "print(max_BE_index)\n",
+    "ink_add_df = pd.DataFrame(ink_corrections)\n",
+    "ink_add_df.index = ink_add_df.index + max_BE_index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 185,
+   "id": "9408275f-b159-454a-ab52-d77d86ef3ce9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Signatur</th>\n",
+       "      <th>Barcode</th>\n",
+       "      <th>Titel</th>\n",
+       "      <th>Autor</th>\n",
+       "      <th>Mitwirkender</th>\n",
+       "      <th>Anfang Veröffentlichungsdatum</th>\n",
+       "      <th>Ende Veröffentlichungsdatum</th>\n",
+       "      <th>Veröffentlichungsdatum</th>\n",
+       "      <th>Veröffentlichungsort</th>\n",
+       "      <th>Veröffentlichungsort (normiert)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>hs. Katalogseite Digitalisat</th>\n",
+       "      <th>Wissensklasse</th>\n",
+       "      <th>Wissensunterklasse</th>\n",
+       "      <th>Formatangabe</th>\n",
+       "      <th>hs. Katalogseite Handschrift</th>\n",
+       "      <th>hs. Katalogeintrag ID</th>\n",
+       "      <th>hs. Katalogeintrag</th>\n",
+       "      <th>hs. Katalog Image URL</th>\n",
+       "      <th>dup_title</th>\n",
+       "      <th>copy_from</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>23376</th>\n",
+       "      <td>*28.A.79.(Vol.1)</td>\n",
+       "      <td>Z222907107</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23383</th>\n",
+       "      <td>*28.A.79.(Vol.10)</td>\n",
+       "      <td>Z222908100</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23384</th>\n",
+       "      <td>*28.A.79.(Vol.12)</td>\n",
+       "      <td>Z222908306</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23385</th>\n",
+       "      <td>*28.A.79.(Vol.13)</td>\n",
+       "      <td>Z222908409</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23386</th>\n",
+       "      <td>*28.A.79.(Vol.14)</td>\n",
+       "      <td>Z222908501</td>\n",
+       "      <td>Histoire des ouvrages des scavans</td>\n",
+       "      <td>Basnage de Beauval, Henri</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1687.0</td>\n",
+       "      <td>1709.0</td>\n",
+       "      <td>1687-1709</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>Rotterdam</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23636</th>\n",
+       "      <td>Ink 9.F.4</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Opera</td>\n",
+       "      <td>Sallustius Crispus, Gaius</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1481.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>23 Dec. 1481</td>\n",
+       "      <td>Venice</td>\n",
+       "      <td>Venedig</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22640</th>\n",
+       "      <td>Ink 9.F.5</td>\n",
+       "      <td>1460328-10</td>\n",
+       "      <td>Biblia ; Interpretationes Hebraicorum nominum</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Wild, Leonhard</td>\n",
+       "      <td>1481.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1481</td>\n",
+       "      <td>Venedig</td>\n",
+       "      <td>Venedig</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>True</td>\n",
+       "      <td>-3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22650</th>\n",
+       "      <td>SA.71.E.58</td>\n",
+       "      <td>Z252861302</td>\n",
+       "      <td>Dialogue sur la musique des anciens</td>\n",
+       "      <td>Chateauneuf, Francois abbe de</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1725.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1725</td>\n",
+       "      <td>Paris</td>\n",
+       "      <td>Paris</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22651</th>\n",
+       "      <td>SA.71.F.74</td>\n",
+       "      <td>Z252867808</td>\n",
+       "      <td>Friderici Adolfi Lampe De Cymbalis Veterum Libri Tres</td>\n",
+       "      <td>Ember, Paul</td>\n",
+       "      <td>Hase, Cornelius &lt;&lt;von&gt;&gt;; Röell, Herman Alexander</td>\n",
+       "      <td>1703.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1703</td>\n",
+       "      <td>Trajecti Ad Rhenum</td>\n",
+       "      <td>Utrecht</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23439</th>\n",
+       "      <td>SA.73.B.48</td>\n",
+       "      <td>Z25920770X</td>\n",
+       "      <td>Claudii Ptolomaei harmonicorum libri tres. Ex Codd. Mss. Undecim, nunc primum graece editus. Johannes Wallis ... recensuit, ed. (etc.)</td>\n",
+       "      <td>Ptolemaeus, Claudius</td>\n",
+       "      <td>Wallis, Johannes</td>\n",
+       "      <td>1682.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1682</td>\n",
+       "      <td>Oxford</td>\n",
+       "      <td>Oxford</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>False</td>\n",
+       "      <td>-1.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>22874 rows × 35 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                Signatur     Barcode   \n",
+       "23376   *28.A.79.(Vol.1)  Z222907107  \\\n",
+       "23383  *28.A.79.(Vol.10)  Z222908100   \n",
+       "23384  *28.A.79.(Vol.12)  Z222908306   \n",
+       "23385  *28.A.79.(Vol.13)  Z222908409   \n",
+       "23386  *28.A.79.(Vol.14)  Z222908501   \n",
+       "...                  ...         ...   \n",
+       "23636          Ink 9.F.4         NaN   \n",
+       "22640          Ink 9.F.5  1460328-10   \n",
+       "22650         SA.71.E.58  Z252861302   \n",
+       "22651         SA.71.F.74  Z252867808   \n",
+       "23439         SA.73.B.48  Z25920770X   \n",
+       "\n",
+       "                                                                                                                                        Titel   \n",
+       "23376                                                                                                       Histoire des ouvrages des scavans  \\\n",
+       "23383                                                                                                       Histoire des ouvrages des scavans   \n",
+       "23384                                                                                                       Histoire des ouvrages des scavans   \n",
+       "23385                                                                                                       Histoire des ouvrages des scavans   \n",
+       "23386                                                                                                       Histoire des ouvrages des scavans   \n",
+       "...                                                                                                                                       ...   \n",
+       "23636                                                                                                                                   Opera   \n",
+       "22640                                                                                           Biblia ; Interpretationes Hebraicorum nominum   \n",
+       "22650                                                                                                     Dialogue sur la musique des anciens   \n",
+       "22651                                                                                   Friderici Adolfi Lampe De Cymbalis Veterum Libri Tres   \n",
+       "23439  Claudii Ptolomaei harmonicorum libri tres. Ex Codd. Mss. Undecim, nunc primum graece editus. Johannes Wallis ... recensuit, ed. (etc.)   \n",
+       "\n",
+       "                               Autor   \n",
+       "23376      Basnage de Beauval, Henri  \\\n",
+       "23383      Basnage de Beauval, Henri   \n",
+       "23384      Basnage de Beauval, Henri   \n",
+       "23385      Basnage de Beauval, Henri   \n",
+       "23386      Basnage de Beauval, Henri   \n",
+       "...                              ...   \n",
+       "23636      Sallustius Crispus, Gaius   \n",
+       "22640                            NaN   \n",
+       "22650  Chateauneuf, Francois abbe de   \n",
+       "22651                    Ember, Paul   \n",
+       "23439           Ptolemaeus, Claudius   \n",
+       "\n",
+       "                                           Mitwirkender   \n",
+       "23376                                               NaN  \\\n",
+       "23383                                               NaN   \n",
+       "23384                                               NaN   \n",
+       "23385                                               NaN   \n",
+       "23386                                               NaN   \n",
+       "...                                                 ...   \n",
+       "23636                                               NaN   \n",
+       "22640                                    Wild, Leonhard   \n",
+       "22650                                               NaN   \n",
+       "22651  Hase, Cornelius <<von>>; Röell, Herman Alexander   \n",
+       "23439                                  Wallis, Johannes   \n",
+       "\n",
+       "      Anfang Veröffentlichungsdatum Ende Veröffentlichungsdatum   \n",
+       "23376                        1687.0                      1709.0  \\\n",
+       "23383                        1687.0                      1709.0   \n",
+       "23384                        1687.0                      1709.0   \n",
+       "23385                        1687.0                      1709.0   \n",
+       "23386                        1687.0                      1709.0   \n",
+       "...                             ...                         ...   \n",
+       "23636                        1481.0                         NaN   \n",
+       "22640                        1481.0                         NaN   \n",
+       "22650                        1725.0                         NaN   \n",
+       "22651                        1703.0                         NaN   \n",
+       "23439                        1682.0                         NaN   \n",
+       "\n",
+       "      Veröffentlichungsdatum Veröffentlichungsort   \n",
+       "23376              1687-1709            Rotterdam  \\\n",
+       "23383              1687-1709            Rotterdam   \n",
+       "23384              1687-1709            Rotterdam   \n",
+       "23385              1687-1709            Rotterdam   \n",
+       "23386              1687-1709            Rotterdam   \n",
+       "...                      ...                  ...   \n",
+       "23636           23 Dec. 1481               Venice   \n",
+       "22640                   1481              Venedig   \n",
+       "22650                   1725                Paris   \n",
+       "22651                   1703   Trajecti Ad Rhenum   \n",
+       "23439                   1682               Oxford   \n",
+       "\n",
+       "      Veröffentlichungsort (normiert)  ... hs. Katalogseite Digitalisat   \n",
+       "23376                       Rotterdam  ...                          NaN  \\\n",
+       "23383                       Rotterdam  ...                          NaN   \n",
+       "23384                       Rotterdam  ...                          NaN   \n",
+       "23385                       Rotterdam  ...                          NaN   \n",
+       "23386                       Rotterdam  ...                          NaN   \n",
+       "...                               ...  ...                          ...   \n",
+       "23636                         Venedig  ...                          NaN   \n",
+       "22640                         Venedig  ...                          NaN   \n",
+       "22650                           Paris  ...                          NaN   \n",
+       "22651                         Utrecht  ...                          NaN   \n",
+       "23439                          Oxford  ...                          NaN   \n",
+       "\n",
+       "      Wissensklasse Wissensunterklasse  Formatangabe   \n",
+       "23376           NaN                NaN           NaN  \\\n",
+       "23383           NaN                NaN           NaN   \n",
+       "23384           NaN                NaN           NaN   \n",
+       "23385           NaN                NaN           NaN   \n",
+       "23386           NaN                NaN           NaN   \n",
+       "...             ...                ...           ...   \n",
+       "23636           NaN                NaN           NaN   \n",
+       "22640           NaN                NaN           NaN   \n",
+       "22650           NaN                NaN           NaN   \n",
+       "22651           NaN                NaN           NaN   \n",
+       "23439           NaN                NaN           NaN   \n",
+       "\n",
+       "       hs. Katalogseite Handschrift  hs. Katalogeintrag ID   \n",
+       "23376                           NaN                    NaN  \\\n",
+       "23383                           NaN                    NaN   \n",
+       "23384                           NaN                    NaN   \n",
+       "23385                           NaN                    NaN   \n",
+       "23386                           NaN                    NaN   \n",
+       "...                             ...                    ...   \n",
+       "23636                           NaN                    NaN   \n",
+       "22640                           NaN                    NaN   \n",
+       "22650                           NaN                    NaN   \n",
+       "22651                           NaN                    NaN   \n",
+       "23439                           NaN                    NaN   \n",
+       "\n",
+       "       hs. Katalogeintrag hs. Katalog Image URL  dup_title  copy_from  \n",
+       "23376                 NaN                   NaN       True       -1.0  \n",
+       "23383                 NaN                   NaN       True       -1.0  \n",
+       "23384                 NaN                   NaN       True       -1.0  \n",
+       "23385                 NaN                   NaN       True       -1.0  \n",
+       "23386                 NaN                   NaN       True       -1.0  \n",
+       "...                   ...                   ...        ...        ...  \n",
+       "23636                 NaN                   NaN       True       -3.0  \n",
+       "22640                 NaN                   NaN       True       -3.0  \n",
+       "22650                 NaN                   NaN      False       -1.0  \n",
+       "22651                 NaN                   NaN      False       -1.0  \n",
+       "23439                 NaN                   NaN      False       -1.0  \n",
+       "\n",
+       "[22874 rows x 35 columns]"
+      ]
+     },
+     "execution_count": 185,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "BE_with_Ink_df = pd.concat([BE_df, ink_add_df])\n",
+    "BE_with_Ink_df.sort_values(by='Signatur', inplace=True)\n",
+    "BE_with_Ink_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bd4ecfac-f115-4f61-bb8c-557505db00e6",
+   "metadata": {},
+   "source": [
+    "# hs. Matches der Inkunabeln eintragen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 157,
+   "id": "f8f8d054-aa6d-4629-8cdc-ddfae03e812e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "ink_matches = pd.read_excel('../Daten/Katalog/Inkunabeln-Eugeniana.xlsx')\n",
+    "ink_matches_dropna = ink_matches.dropna(subset='entry_ID')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 158,
+   "id": "41d1a6ef-6751-4efb-bb19-38edfbe6b254",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "entries = pd.read_excel('../Daten/Katalogabgleich/Einträge.xlsx', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 193,
+   "id": "51d82520-cf1f-4369-81ea-2f146be81b65",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "dod_id = {\n",
+    "    '14.376': 51202, \n",
+    "    '14.377': 51184,\n",
+    "    '14.378': 51219\n",
+    "}\n",
+    "\n",
+    "BE_ids_matches = {}\n",
+    "\n",
+    "for i, row in ink_matches_dropna.iterrows():\n",
+    "    sig = row['Signatur'].replace('.', '').replace(' ', '')\n",
+    "    entry_id = row['entry_ID']\n",
+    "    entry_index = entries[entries['entry_ID'] == entry_id].index\n",
+    "    BE_index = BE_with_Ink_df[BE_with_Ink_df['Signatur'].str.replace(r'\\.| ', '', regex=True) == sig].index\n",
+    "    if len(BE_index) == 0:\n",
+    "        print('Signature', sig, 'not found')\n",
+    "    elif len(BE_index) == 1:\n",
+    "        i = BE_index.values[0]\n",
+    "        if i not in BE_ids_matches.keys():\n",
+    "            BE_ids_matches[i] = [entry_index.values[0]]\n",
+    "        else:\n",
+    "            BE_ids_matches[i] += [entry_index.values[0]]\n",
+    "    else:\n",
+    "        print('More than one match for signature', sig)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 214,
+   "id": "2b30e44b-fcb1-4256-9213-a57d59b90a57",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys([23450, 23509, 23595, 23543, 23488, 23475, 23508, 23573, 23463, 23468, 23566, 23496, 23478, 23531, 23557, 23621, 23670, 23638, 22618, 23671, 23552, 23600, 23614, 23539, 23479, 23452, 23592, 23513, 23563, 23629, 23560, 23480, 23446, 23519, 23669, 23481, 23447, 23593, 23565, 23529, 23536, 23538, 23641, 23672, 23632, 23589, 23646, 23532, 23547, 23516, 23472, 22619, 23612, 23630, 23476, 23576, 23673, 22621, 23515, 23618, 22622, 23619, 23572, 23651, 23489, 23527, 23665, 23455, 23533, 23631, 23564, 23449, 23492, 23640, 23659, 23483, 23652, 23554, 23570, 23521, 23627, 22623, 23626, 23548, 23490, 23502, 23577, 23517, 23493, 23556, 23603, 23654, 23500, 23674, 23465, 23474, 22625, 23491, 23616, 23608, 23507, 23551, 23610, 23540, 23562, 23571, 23494, 23663, 23473, 23530, 23645, 23585, 23624, 23588, 23458, 23578, 22626, 22627, 23469, 22628, 23526, 23675, 22629, 23445, 23676, 22630, 23657, 23598, 23639, 22631, 23510, 23511, 23512, 23459, 23499, 23451, 23653, 23622, 23677, 23597, 23678, 23544, 23582, 23628, 23666, 23679, 23680, 22632, 23574, 23537, 23524, 23606, 23604, 23485, 23581, 23613, 23609, 23466, 23504, 23460, 23546, 23664, 23497, 23471, 23584, 23528, 23607, 23575, 23611, 23482, 23656, 23586, 23506, 23561, 23658, 22633, 23569, 23545, 23596, 22634, 23580, 23462, 23643, 23650, 23525, 23623, 23542, 23467, 23487, 22635, 23464, 23501, 22636, 23681, 23649, 23662, 23520, 23587, 23590, 23448, 23503, 23484, 23498, 23615, 23661, 23591, 23559, 23602, 22637, 23594, 23642, 23523, 23568, 23535, 23605, 23617, 23668, 23456, 23634, 23549, 22638, 23599, 23555, 23637, 23682, 22639, 23660, 23583, 23461, 23636, 22640])"
+      ]
+     },
+     "execution_count": 214,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "BE_ids_matches.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 216,
+   "id": "c8b7b940-ef03-49f0-8fc4-d174faee86fe",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for BE_id, matches in BE_ids_matches.items():\n",
+    "    match_dict = {\n",
+    "        'hs. Katalog': 1,\n",
+    "        'hs. Katalog Konfidenz': [],\n",
+    "        'hs. Katalogband': [],\n",
+    "        'hs. Katalogseite Digitalisat': [],\n",
+    "        'Wissensklasse': [],\n",
+    "        'Wissensunterklasse': [],\n",
+    "        'Formatangabe': [],\n",
+    "        'hs. Katalogseite Handschrift': [],\n",
+    "        'hs. Katalogeintrag ID': [],\n",
+    "        'hs. Katalogeintrag': [],\n",
+    "        'hs. Katalog Image URL': []\n",
+    "    }\n",
+    "    for m in matches:\n",
+    "        hw_entry = entries.loc[m]\n",
+    "        match_dict['hs. Katalog Konfidenz'] += ['sicher']\n",
+    "        match_dict['hs. Katalogband'] += [str(hw_entry['volume'])]\n",
+    "        match_dict['hs. Katalogseite Digitalisat'] += [str(hw_entry['page number'])]\n",
+    "        match_dict['Wissensklasse'] += [hw_entry['category']]\n",
+    "        match_dict['Wissensunterklasse'] += [hw_entry['subcategory'] if not pd.isna(hw_entry['subcategory']) else '']\n",
+    "        match_dict['Formatangabe'] += [hw_entry['format'] if not pd.isna(hw_entry['format']) else '']\n",
+    "        match_dict['hs. Katalogseite Handschrift'] += [hw_entry['handwritten page number']]\n",
+    "        match_dict['hs. Katalogeintrag ID'] += [hw_entry['entry_ID']]\n",
+    "        match_dict['hs. Katalogeintrag'] += [hw_entry['entry']]\n",
+    "        match_dict['hs. Katalog Image URL'] += [f\"https://iiif.onb.ac.at/images/DOD/{dod_id[str(hw_entry['volume'])]}/{hw_entry['page number']:08}.jp2/full/full/0/native.jpg\"]\n",
+    "    \n",
+    "    for key, val in match_dict.items():\n",
+    "        if key != 'hs. Katalog':\n",
+    "            val = ' | '.join(val)\n",
+    "        BE_with_Ink_df.at[BE_id, key] = val"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3288bfdc-876f-4aa7-89ea-448584339183",
+   "metadata": {},
+   "source": [
+    "# simple und komplexe Klassifizierung der Eugeniana-Daten"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 244,
+   "id": "06fbcb36-61b6-44d9-bb5c-79df81e02f89",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "determined: 2 expected: 2\n",
+      "determined: 1 expected: 1\n",
+      "determined: 1 expected: 1\n",
+      "determined: 0 expected: 0\n"
+     ]
+    }
+   ],
+   "source": [
+    "def complex_classify(BE_row):\n",
+    "    if (BE_row['Wappenklassifizierung'] in ['A', 'B', 'C']) or (BE_row['hs. Katalog Konfidenz'] == 'sicher'):\n",
+    "        return 2\n",
+    "    elif (BE_row['hs. Katalog Konfidenz'] == 'unsicher') or (BE_row['Anfang Veröffentlichungsdatum'] <= 1736):\n",
+    "        return 1\n",
+    "    else:\n",
+    "        return 0\n",
+    "\n",
+    "sample_ids = [22871, 2954, 6695, 9396]\n",
+    "expected_class = [2, 1, 1, 0]\n",
+    "for ind in sample_ids:\n",
+    "    print('determined:', complex_classify(non_BE_df.loc[ind]), 'expected:', expected_class[sample_ids.index(ind)])\n",
+    "    \n",
+    "BE_with_Ink_df['Einfache Klassifizierung'] = BE_with_Ink_df['Wappenklassifizierung'].isin(['A', 'B', 'C']) | (BE_with_Ink_df['hs. Katalog'] == 1)\n",
+    "BE_with_Ink_df['Komplexe Klassifizierung'] = BE_with_Ink_df.apply(lambda x: complex_classify(x), axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 256,
+   "id": "b9ee6630-7122-44d9-9d2f-c8113e5a8e97",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Einfache Klassifizierung\n",
+       "False    13069\n",
+       "True      9805\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 256,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "BE_with_Ink_df['Einfache Klassifizierung'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 255,
+   "id": "175c1147-0063-45e4-8498-39ee390ea7a0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Komplexe Klassifizierung\n",
+       "0    10131\n",
+       "2     9568\n",
+       "1     3175\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 255,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "BE_with_Ink_df['Komplexe Klassifizierung'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ecc028b-9b46-4bf8-ae24-2aa9c58366bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BE_with_Ink_df.to_excel('../Daten/Vorhersagen/WIP_final_BE_4.xlsx', "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Notebooks/Non_BE_matching.ipynb b/Notebooks/Non_BE_matching.ipynb
deleted file mode 100644
index d3e0d151be1294683383470eb20e4450188dffc1..0000000000000000000000000000000000000000
--- a/Notebooks/Non_BE_matching.ipynb
+++ /dev/null
@@ -1,207 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "a910c4c5-3a61-462b-ac07-c9545fe7ae40",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import re\n",
-    "import numpy as np\n",
-    "from thefuzz import fuzz, process\n",
-    "from tqdm.notebook import tqdm\n",
-    "import matplotlib.pyplot as plt\n",
-    "import requests\n",
-    "import json\n",
-    "from IPython.display import display\n",
-    "\n",
-    "pd.set_option('display.max_colwidth', None)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "83013484-2a55-4819-8b30-b2f8cbbe7981",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "entry_df = pd.read_excel('../Daten/Katalogabgleich/Einträge.xlsx', index_col=0)\n",
-    "\n",
-    "def prepare_string(string):\n",
-    "    new = re.sub(r'[àáâãå]', 'a', string)\n",
-    "    new = re.sub(r'[èéêë]', 'e', new)\n",
-    "    new = re.sub(r'[ìíîï]', 'i', new)\n",
-    "    new = re.sub(r'[òóôõ]', 'o', new)\n",
-    "    new = re.sub(r'[ùúû]', 'u', new)\n",
-    "    new = re.sub(r'æ', 'ae', new)\n",
-    "    new = re.sub('[.,:;()¬]|^[CLXVI]+? |^\\d+? |^\\d+?\\.+? |^\\.+ ?|= |# ', '', new)\n",
-    "    return new\n",
-    "\n",
-    "entry_df['cleaned entry'] = entry_df['entry'].apply(lambda x: prepare_string(x))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "1279d6ea-48a2-4f65-9cfa-b1f92eac16f1",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "non_BE_df = pd.read_excel('../Daten/Vorhersagen/WIP_final_BE_3.xlsx', index_col=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "ddf5d11c-5f72-4bc8-ab8f-0a1e0f01e60d",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# fix 22871 without metadata\n",
-    "i = 22871\n",
-    "non_BE_df.at[i, 'Titel'] = 'De La coronica general de toda Espana y especialmente del Reyno de Valencia. etc'\n",
-    "non_BE_df.at[i, 'Autor'] = 'Beuter, Pero-Anton'\n",
-    "non_BE_df.at[i, 'Mitwirkender'] = ''\n",
-    "non_BE_df.at[i, 'Anfang Veröffentlichungsdatum'] = '1546'\n",
-    "non_BE_df.at[i, 'Ende Veröffentlichungsdatum'] = '1551'\n",
-    "non_BE_df.at[i, 'Veröffentlichungsdatum'] = '1546-1551'\n",
-    "non_BE_df.at[i, 'Veröffentlichungsort'] = 'Valencia'\n",
-    "non_BE_df.at[i, 'Veröffentlichungsort (normiert)'] = 'Valencia'\n",
-    "non_BE_df.at[i, 'Sprache'] = 'Spanish'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "e9c00ca9-c051-4e3a-93cf-133031ca9e7f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "no_BE = non_BE_df[~(non_BE_df['Signatur'].str.contains('BE') | non_BE_df['Signatur'].str.contains('Ink'))]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "ca242a1c-baf8-4183-a565-a3797d6f4747",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a7b5d48f5fcd4cbbbf56291a871746c5",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/804 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "better_matches = []\n",
-    "scorer = fuzz.token_set_ratio\n",
-    "\n",
-    "for index, row in tqdm(no_BE.iterrows(), total=len(no_BE)):\n",
-    "    keys = ['Autor', 'Mitwirkender', 'Titel', 'Veröffentlichungsort', 'Anfang Veröffentlichungsdatum']\n",
-    "    comb_string = ''\n",
-    "    for key in keys:\n",
-    "        val = row[key]\n",
-    "        if not pd.isna(val):\n",
-    "            if key == 'Autor' or key == 'Mitwirkender':\n",
-    "                if ',' in val:  # falls name, vorname\n",
-    "                    val = val.split(',')[0]\n",
-    "                val = val.split(' ')[0]\n",
-    "            elif key == 'Titel':\n",
-    "                val = prepare_string(val)\n",
-    "            elif key == 'Anfang Veröffentlichungsdatum':\n",
-    "                val = str(int(val))\n",
-    "            else: # key == 'Veröffentlichungsort'\n",
-    "                pass\n",
-    "            comb_string += val + ' '\n",
-    "    \n",
-    "    matches_lis = process.extract(comb_string, entry_df['cleaned entry'], scorer=scorer, limit=5)\n",
-    "    flat_matches = []\n",
-    "    for match in matches_lis:\n",
-    "        flat_matches.append(match[0])\n",
-    "        flat_matches.append(match[1])\n",
-    "        flat_matches.append(match[2])\n",
-    "    better_matches.append([comb_string] + flat_matches)\n",
-    "\n",
-    "matches_df = pd.DataFrame(better_matches, columns=['input', 'match_1', 'score_1', 'id_1', 'match_2', 'score_2', 'id_2', 'match_3', 'score_3', 'id_3', 'match_4', 'score_4', 'id_4', 'match_5', 'score_5', 'id_5'])\n",
-    "matches_df['control'] = ''"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "e9dd6e64-a45d-4f25-9ad1-624cfc5268fb",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "matches_df_no_score = matches_df.drop(['score_1', 'score_2', 'score_3', 'score_4', 'score_5'], axis=1)\n",
-    "matches_df_no_score.insert(1, 'input_id', no_BE.index)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "b1f7e1a3-b886-496c-a9ca-ec484d196c24",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "matches_df_no_score.to_excel('../Daten/Katalogabgleich/Kandidaten/no_BE_Kandidaten.xlsx', index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a4639759-8344-452e-96ce-cfca485165a4",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}