diff --git a/Daten/Vorhersagen/BE_final.xlsx b/Daten/Vorhersagen/BE_final.xlsx index d9db3718bfecc713720c8615353b068e2a5eb420..ecd26b736be57c349ee73c16bab96c94d3fced88 100644 Binary files a/Daten/Vorhersagen/BE_final.xlsx and b/Daten/Vorhersagen/BE_final.xlsx differ diff --git a/Notebooks/Completing_BE_data.ipynb b/Notebooks/Completing_BE_data.ipynb index 30e35afcc8071ef8f5523a841ce6fdbb789610cf..0326f2b06974ec1e7b5a7df9af78658e5c7dca52 100644 --- a/Notebooks/Completing_BE_data.ipynb +++ b/Notebooks/Completing_BE_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 20, + "execution_count": 2, "id": "a910c4c5-3a61-462b-ac07-c9545fe7ae40", "metadata": { "tags": [] @@ -859,7 +859,19 @@ }, { "cell_type": "code", - "execution_count": 260, + "execution_count": 3, + "id": "7349ee90-3dcd-4dd4-815c-65219adb0866", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "final_BE_df = pd.read_excel('../Daten/Vorhersagen/BE_final.xlsx', index_col=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "927de1ce-4152-4983-ad07-17d45de2cf47", "metadata": { "tags": [] @@ -869,30 +881,32 @@ "data": { "text/plain": [ "hs. Katalog Konfidenz\n", - "sicher 8339\n", - "sicher | sicher 190\n", + "sicher 9103\n", + "sicher | sicher 215\n", "sicher | unsicher 83\n", - "unsicher 26\n", + "unsicher 29\n", "unsicher | unsicher 19\n", "sicher | sicher | sicher 3\n", "sicher | sicher | sicher | sicher 1\n", "Name: count, dtype: int64" ] }, - "execution_count": 260, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "BE_with_Ink_df['hs. Katalog Konfidenz'].value_counts()" + "final_BE_df['hs. Katalog Konfidenz'].value_counts()" ] }, { "cell_type": "code", - "execution_count": 271, + "execution_count": 7, "id": "06fbcb36-61b6-44d9-bb5c-79df81e02f89", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -917,15 +931,15 @@ "sample_ids = [22871, 2954, 6695, 9396]\n", "expected_class = [2, 1, 1, 0]\n", "for ind in sample_ids:\n", - " print('determined:', complex_classify(non_BE_df.loc[ind]), 'expected:', expected_class[sample_ids.index(ind)])\n", + " print('determined:', complex_classify(final_BE_df.loc[ind]), 'expected:', expected_class[sample_ids.index(ind)])\n", " \n", - "BE_with_Ink_df['Einfache Klassifizierung'] = BE_with_Ink_df['Wappenklassifizierung'].isin(['A', 'B', 'C']) | (BE_with_Ink_df['hs. Katalog'] == 1)\n", - "BE_with_Ink_df['Komplexe Klassifizierung'] = BE_with_Ink_df.apply(lambda x: complex_classify(x), axis=1)" + "final_BE_df['Einfache Klassifizierung'] = final_BE_df['Wappenklassifizierung'].isin(['A', 'B', 'C']) | (final_BE_df['hs. Katalog'] == 1)\n", + "final_BE_df['Komplexe Klassifizierung'] = final_BE_df.apply(lambda x: complex_classify(x), axis=1)" ] }, { "cell_type": "code", - "execution_count": 276, + "execution_count": 8, "id": "b9ee6630-7122-44d9-9d2f-c8113e5a8e97", "metadata": { "tags": [] @@ -935,24 +949,24 @@ "data": { "text/plain": [ "Einfache Klassifizierung\n", - "False 13069\n", + "False 13063\n", "True 9805\n", "Name: count, dtype: int64" ] }, - "execution_count": 276, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "BE_with_Ink_df['Einfache Klassifizierung'].value_counts()" + "final_BE_df['Einfache Klassifizierung'].value_counts()" ] }, { "cell_type": "code", - "execution_count": 277, - "id": "175c1147-0063-45e4-8498-39ee390ea7a0", + "execution_count": 9, + "id": "2515e963-e3b9-4ef7-adff-5d9b482b77bc", "metadata": { "tags": [] }, @@ -961,32 +975,40 @@ "data": { "text/plain": [ "Komplexe Klassifizierung\n", - "0 10114\n", + "0 10109\n", "2 9761\n", - "1 2999\n", + "1 2998\n", "Name: count, dtype: int64" ] }, - "execution_count": 277, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "BE_with_Ink_df['Komplexe Klassifizierung'].value_counts()" + "final_BE_df['Komplexe Klassifizierung'].value_counts()" ] }, { "cell_type": "code", - "execution_count": 275, + "execution_count": null, "id": "9ecc028b-9b46-4bf8-ae24-2aa9c58366bb", "metadata": { "tags": [] }, "outputs": [], "source": [ - "BE_with_Ink_df.to_excel('../Daten/Vorhersagen/WIP_final_BE_4.xlsx')" + "final_BE_df.to_excel('../Daten/Vorhersagen/BE_final.xlsx')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80f4037b-3055-4c44-b730-9943809b4092", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {