diff --git a/comparenew-and-old-output.ipynb b/comparenew-and-old-output.ipynb index 840a0c3e172b7f405b68a95856368a4597b73838..42820fb590c0ebdc36e0b4977b31bc58810f5b7f 100644 --- a/comparenew-and-old-output.ipynb +++ b/comparenew-and-old-output.ipynb @@ -22,7 +22,7 @@ "metadata": {}, "outputs": [], "source": [ - "old = pd.read_excel('./travelogues_extraction/script/old_output_for_comparision/TravelogueD18_script_output_2020-07-13-10 20 28_20200707.xlsx')" + "old = pd.read_excel('./travelogues_extraction/script/old_output_for_comparision/TravelogueD18_script_output_2020-07-13-10 20 28_20200707.xlsx', types=str)" ] }, { @@ -388,7 +388,7 @@ "metadata": {}, "outputs": [], "source": [ - "new = pd.read_excel('./travelogues_extraction/script/output/TravelogueD18_script_output_2020-07-17-17:48:57_20200707.xlsx', index_col=0)" + "new = pd.read_excel('./travelogues_extraction/script/output/TravelogueD18_script_output_2020-07-17-17:48:57_20200707.xlsx', index_col=0, dtype=str)" ] }, { @@ -444,7 +444,7 @@ " \n", " AC03114611\n", " AC03114611\n", - " 9.900045e+17\n", + " 990004456580603338\n", " http://data.onb.ac.at/ABO/%2BZ42124907\n", " Z42124907\n", " Keate, George ; http://d-nb.info/gnd/124956432\n", @@ -468,7 +468,7 @@ " \n", " AC03826205\n", " AC03826205\n", - " 9.900059e+17\n", + " 990005906350603338\n", " http://data.onb.ac.at/ABO/%2BZ59423102;\\nhttp:...\n", " Z59423102;\\nZ171040301\n", " Phillip, Arthur ; http://d-nb.info/gnd/118982796\n", @@ -492,7 +492,7 @@ " \n", " AC09792500\n", " AC09792500\n", - " 9.900295e+17\n", + " 990029473670603338\n", " http://data.onb.ac.at/ABO/%2BZ148207602\n", " Z148207602\n", " Cranz, David ; http://d-nb.info/gnd/116717548\n", @@ -516,7 +516,7 @@ " \n", " AC09836279\n", " AC09836279\n", - " 9.900299e+17\n", + " 990029921640603338\n", " http://data.onb.ac.at/ABO/%2BZ124931607\n", " Z124931607\n", " Ellis, Henry ; http://d-nb.info/gnd/119218666\n", @@ -540,7 +540,7 @@ " \n", " AC07705435\n", " AC07705435\n", - " 9.900165e+17\n", + " 990016481420603338\n", " http://data.onb.ac.at/ABO/%2BZ69824303\n", " Z69824303\n", " Le Gentil de la Galaisière, Guillaume J. ; htt...\n", @@ -567,12 +567,12 @@ "" ], "text/plain": [ - " Systemnummer MMS-ID \\\n", - "AC03114611 AC03114611 9.900045e+17 \n", - "AC03826205 AC03826205 9.900059e+17 \n", - "AC09792500 AC09792500 9.900295e+17 \n", - "AC09836279 AC09836279 9.900299e+17 \n", - "AC07705435 AC07705435 9.900165e+17 \n", + " Systemnummer MMS-ID \\\n", + "AC03114611 AC03114611 990004456580603338 \n", + "AC03826205 AC03826205 990005906350603338 \n", + "AC09792500 AC09792500 990029473670603338 \n", + "AC09836279 AC09836279 990029921640603338 \n", + "AC07705435 AC07705435 990016481420603338 \n", "\n", " Volltext \\\n", "AC03114611 http://data.onb.ac.at/ABO/%2BZ42124907 \n", @@ -817,7 +817,7 @@ "metadata": {}, "outputs": [], "source": [ - "new_worked = new[new.Systemnummer.isin(old.Systemnummer)]" + "new_worked = new[~new.Systemnummer.isna()]" ] }, { @@ -878,28 +878,6 @@ "cell_type": "code", "execution_count": 18, "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "Can only compare identically-labeled DataFrame objects", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnew_worked\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mold_comparision\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/Documents/onb-homeoffice-local/TraveloguesExtraktion/venv/lib/python3.7/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mf\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 837\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_indexed_same\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 838\u001b[0m raise ValueError(\n\u001b[0;32m--> 839\u001b[0;31m \u001b[0;34m\"Can only compare identically-labeled DataFrame objects\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 840\u001b[0m )\n\u001b[1;32m 841\u001b[0m \u001b[0mnew_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdispatch_to_series\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr_rep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Can only compare identically-labeled DataFrame objects" - ] - } - ], - "source": [ - "new_worked == old_comparision" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, "outputs": [ { "data": { @@ -907,7 +885,7 @@ "47" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -918,7 +896,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -927,7 +905,7 @@ "(48,)" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -938,7 +916,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -947,7 +925,7 @@ "(48,)" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -958,7 +936,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -967,7 +945,7 @@ "Index(['Verfasser ; GND-ID'], dtype='object')" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -978,7 +956,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -987,7 +965,7 @@ "Index(['VerfasserGND ; GND-ID'], dtype='object')" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1005,7 +983,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -1014,7 +992,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1023,7 +1001,7 @@ "Index([], dtype='object')" ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1034,7 +1012,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -1043,7 +1021,7 @@ "Index([], dtype='object')" ] }, - "execution_count": 26, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1058,264 +1036,166 @@ "metadata": {}, "outputs": [], "source": [ - "comparision = new_worked == old_comparision" + "def compare(column: str, left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:\n", + " left = left[column].astype(str)\n", + " right = right[column].astype(str)\n", + " ok = left != right\n", + " left = left[ok]\n", + " right = right[ok]\n", + " left.name = 'old-' + column\n", + " right.name = 'new-' + column\n", + " df = pd.concat([left, right], axis=1)\n", + " return df.astype(str)" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ - "not_the_same = {\n", - " column: comparision.shape[0] - n\n", - " for column, n\n", - " in comparision.sum().iteritems()\n", - " if comparision.shape[0] > n\n", - "}" + "old_comparision = old_comparision.astype(str)" ] }, { "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'MMS-ID': 479,\n", - " 'Volltext': 44,\n", - " 'Barcode': 44,\n", - " 'Verfasser ; GND-ID': 754,\n", - " 'Werktitel': 640,\n", - " 'Reihentitel ; Bandzählung': 763,\n", - " 'Haupttitel ; Titelzusatz ; Verantwortlichkeitsangabe': 677,\n", - " 'Bandzählung ; Titel des Bandes': 474,\n", - " 'Ausgabe': 725,\n", - " 'Verlagsort': 3,\n", - " 'Verlagsort normiert ; GND-ID': 762,\n", - " 'Druckort normiert ; GND-ID': 763,\n", - " 'Verleger und Drucker': 120,\n", - " 'Verleger normiert ; GND-ID': 755,\n", - " 'Drucker ; GND-ID': 763,\n", - " 'Erscheinungsjahr': 8,\n", - " 'Kollation': 3,\n", - " 'Illustrationen': 292,\n", - " 'Anzahl Illustrationen': 366,\n", - " 'Anzahl Karten': 578,\n", - " 'Format': 2,\n", - " 'Anm. zu Illustrationen': 17,\n", - " 'Bibliografie': 22,\n", - " 'Anmerkungen': 316,\n", - " 'Anm. zu Kollation': 690,\n", - " 'Sprache': 2,\n", - " 'Originalsprache': 407,\n", - " 'Bemerkung zur Sprache': 382,\n", - " 'Standardnummer': 506,\n", - " 'Weitere Verfasser ; GND-ID': 757,\n", - " 'Herausgeber ; GND-ID': 749,\n", - " 'Übersetzer ; GND-ID': 751,\n", - " 'Beiträger ; GND-ID': 753,\n", - " 'Weitere Beteiligte ; GND-ID': 763,\n", - " 'Illustratoren ; GND-ID': 736,\n", - " 'Widmender ; GND-ID': 761,\n", - " 'Widmungsempfänger ; GND-ID': 752,\n", - " 'Art des Inhalts': 283,\n", - " 'Inhalt': 748,\n", - " 'Werke in Relation': 763,\n", - " 'Schlagworte': 754,\n", - " 'Marker': 574,\n", - " 'Zusammenstellung': 692,\n", - " 'Signatur': 763,\n", - " 'Standort': 249,\n", - " 'VD17': 500}" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "not_the_same" - ] - }, - { - "cell_type": "markdown", + "execution_count": 43, "metadata": {}, + "outputs": [], "source": [ - "# mms-id" + "for c in old_comparision.columns:\n", + " old_comparision[c] = old_comparision[c].str.replace('https:', 'http:')" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 44, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AC03114611 9.900045e+17\n", - "AC03826205 9.900059e+17\n", - "AC09792500 9.900295e+17\n", - "AC09836279 9.900299e+17\n", - "AC07705435 9.900165e+17\n", - "Name: MMS-ID, dtype: float64" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "new_worked['MMS-ID'].head()" + "comparision_by_columns = {\n", + " column: compare(column, old_comparision, new_worked)\n", + " for column in old_comparision.columns\n", + "}" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ - "new_worked['MMS-ID'] = new_worked['MMS-ID'].astype(str)" + "comparision_sizes = {\n", + " column: df.shape[0]\n", + " for column, df in comparision_by_columns.items()\n", + "}" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 46, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AC03114611 990004456580603136\n", - "AC03826205 990005906350603264\n", - "AC09792500 990029473670603264\n", - "AC09836279 990029921640603136\n", - "AC07705435 990016481420603136\n", - "Name: MMS-ID, dtype: object" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "new_worked['MMS-ID'].head()" + "not_the_same = {\n", + " c: s for c, s in comparision_sizes.items() if s > 0\n", + "}" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Systemnummer\n", - "AC03114611 990004456580603338\n", - "AC03826205 990005906350603338\n", - "AC09792500 990029473670603338\n", - "AC09836279 990029921640603338\n", - "AC07705435 990016481420603338\n", - "Name: MMS-ID, dtype: int64" + "{'Volltext': 2,\n", + " 'Verfasser ; GND-ID': 668,\n", + " 'Werktitel': 441,\n", + " 'Reihentitel ; Bandzählung': 82,\n", + " 'Haupttitel ; Titelzusatz ; Verantwortlichkeitsangabe': 677,\n", + " 'Bandzählung ; Titel des Bandes': 153,\n", + " 'Verlagsort': 3,\n", + " 'Verlagsort normiert ; GND-ID': 740,\n", + " 'Druckort normiert ; GND-ID': 330,\n", + " 'Verleger und Drucker': 66,\n", + " 'Verleger normiert ; GND-ID': 694,\n", + " 'Drucker ; GND-ID': 324,\n", + " 'Erscheinungsjahr': 8,\n", + " 'Kollation': 1,\n", + " 'Illustrationen': 9,\n", + " 'Anzahl Illustrationen': 411,\n", + " 'Anzahl Karten': 186,\n", + " 'Anm. zu Illustrationen': 12,\n", + " 'Bibliografie': 1,\n", + " 'Anmerkungen': 1,\n", + " 'Anm. zu Kollation': 6,\n", + " 'Sprache': 2,\n", + " 'Originalsprache': 29,\n", + " 'Bemerkung zur Sprache': 10,\n", + " 'Standardnummer': 6,\n", + " 'Weitere Verfasser ; GND-ID': 90,\n", + " 'Herausgeber ; GND-ID': 129,\n", + " 'Übersetzer ; GND-ID': 172,\n", + " 'Beiträger ; GND-ID': 57,\n", + " 'Weitere Beteiligte ; GND-ID': 17,\n", + " 'Illustratoren ; GND-ID': 202,\n", + " 'Widmender ; GND-ID': 70,\n", + " 'Widmungsempfänger ; GND-ID': 92,\n", + " 'Art des Inhalts': 136,\n", + " 'Inhalt': 12,\n", + " 'Werke in Relation': 98,\n", + " 'Schlagworte': 744,\n", + " 'Marker': 570,\n", + " 'Zusammenstellung': 1,\n", + " 'Signatur': 758,\n", + " 'Standort': 244,\n", + " 'VD17': 7}" ] }, - "execution_count": 36, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "old_comparision['MMS-ID'].head()" + "not_the_same" ] }, { "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/phylogram/Documents/onb-homeoffice-local/TraveloguesExtraktion/venv/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " \"\"\"Entry point for launching an IPython kernel.\n" - ] - } - ], - "source": [ - "old_comparision['MMS-ID'] = old_comparision['MMS-ID'].astype(str)" - ] - }, - { - "cell_type": "markdown", + "execution_count": 56, "metadata": {}, + "outputs": [], "source": [ - "oida" + "ci = iter(not_the_same.keys())" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 168, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "Systemnummer\n", - "AC03114611 990004456580603338\n", - "AC03826205 990005906350603338\n", - "AC09792500 990029473670603338\n", - "AC09836279 990029921640603338\n", - "AC07705435 990016481420603338\n", - "Name: MMS-ID, dtype: object" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" + "ename": "StopIteration", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomparision_by_columns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mci\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mStopIteration\u001b[0m: " + ] } ], "source": [ - "old_comparision['MMS-ID'].head()" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "def compare(column: str) -> pd.DataFrame:\n", - " old_column = old_comparision[column]\n", - " new_column = new_worked[column]\n", - " old_diff = old_column[old_column != new_column]\n", - " new_diff = new_column[old_column != new_column]\n", - " return pd.DataFrame({\n", - " 'old_value': old_diff,\n", - " 'new_value': new_diff,\n", - " }, index=old_diff.index)" + "cd = comparision_by_columns[next(ci)]" ] }, { "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "mms_different = compare('MMS-ID')" - ] - }, - { - "cell_type": "code", - "execution_count": 41, + "execution_count": 166, "metadata": {}, "outputs": [ { @@ -1339,8 +1219,8 @@ " \n", " \n", " \n", - " old_value\n", - " new_value\n", + " old-VD17\n", + " new-VD17\n", " \n", " \n", " Systemnummer\n", @@ -1350,90 +1230,147 @@ " \n", " \n", " \n", - " AC03114611\n", - " 990004456580603338\n", - " 990004456580603136\n", - " \n", - " \n", - " AC03826205\n", - " 990005906350603338\n", - " 990005906350603264\n", - " \n", - " \n", - " AC09792500\n", - " 990029473670603338\n", - " 990029473670603264\n", - " \n", - " \n", - " AC09836279\n", - " 990029921640603338\n", - " 990029921640603136\n", - " \n", - " \n", - " AC07705435\n", - " 990016481420603338\n", - " 990016481420603136\n", + " AC13781336\n", + " VD18 90245792; vd18\n", + " VD18 90245792 ; vd18\n", " \n", " \n", - " ...\n", - " ...\n", - " ...\n", + " AC13690893\n", + " VD18 90266536; urn:nbn:de:bvb:12-bsb10469449-5\n", + " urn:nbn:de:bvb:12-bsb10469449-5 ; VD18 90266536\n", " \n", " \n", - " AC14487653\n", - " 990057677380603338\n", - " 990057677380603136\n", + " AC13690886\n", + " urn:nbn:de:bvb:12-bsb10469448-5; VD18 90266528\n", + " urn:nbn:de:bvb:12-bsb10469448-5 ; VD18 90266528\n", " \n", " \n", - " AC14494673\n", - " 990057713410603338\n", - " 990057713410603264\n", + " AC03125444\n", + " urn:nbn:at:at-moz:2-68173; VD18 10196951\n", + " VD18 10196951 ; urn:nbn:at:at-moz:2-68173\n", " \n", " \n", - " AC14494679\n", - " 990057713430603338\n", - " 990057713430603264\n", + " AC03125437\n", + " VD18 10196951; urn:nbn:at:at-moz:2-68134\n", + " VD18 10196951 ; urn:nbn:at:at-moz:2-68134\n", " \n", " \n", - " AC06335423\n", - " 990057719990603338\n", - " 990057719990603264\n", + " AC15034985\n", + " NDSLARCHBUE0139703; VD18 11290722-001; FSLHB00...\n", + " VD18 11290722-001 ; FSLHB000037286 ; NDSLARCHB...\n", " \n", " \n", - " AC14499956\n", - " 990057728090603338\n", - " 990057728090603264\n", + " AC08513325\n", + " VD18 1057364X; urn:nbn:de:bvb:12-bsb10302394-3\n", + " urn:nbn:de:bvb:12-bsb10302394-3 ; VD18 1057364X\n", " \n", " \n", "\n", - "

479 rows × 2 columns

\n", "" ], "text/plain": [ - " old_value new_value\n", - "Systemnummer \n", - "AC03114611 990004456580603338 990004456580603136\n", - "AC03826205 990005906350603338 990005906350603264\n", - "AC09792500 990029473670603338 990029473670603264\n", - "AC09836279 990029921640603338 990029921640603136\n", - "AC07705435 990016481420603338 990016481420603136\n", - "... ... ...\n", - "AC14487653 990057677380603338 990057677380603136\n", - "AC14494673 990057713410603338 990057713410603264\n", - "AC14494679 990057713430603338 990057713430603264\n", - "AC06335423 990057719990603338 990057719990603264\n", - "AC14499956 990057728090603338 990057728090603264\n", + " old-VD17 \\\n", + "Systemnummer \n", + "AC13781336 VD18 90245792; vd18 \n", + "AC13690893 VD18 90266536; urn:nbn:de:bvb:12-bsb10469449-5 \n", + "AC13690886 urn:nbn:de:bvb:12-bsb10469448-5; VD18 90266528 \n", + "AC03125444 urn:nbn:at:at-moz:2-68173; VD18 10196951 \n", + "AC03125437 VD18 10196951; urn:nbn:at:at-moz:2-68134 \n", + "AC15034985 NDSLARCHBUE0139703; VD18 11290722-001; FSLHB00... \n", + "AC08513325 VD18 1057364X; urn:nbn:de:bvb:12-bsb10302394-3 \n", "\n", - "[479 rows x 2 columns]" + " new-VD17 \n", + "Systemnummer \n", + "AC13781336 VD18 90245792 ; vd18 \n", + "AC13690893 urn:nbn:de:bvb:12-bsb10469449-5 ; VD18 90266536 \n", + "AC13690886 urn:nbn:de:bvb:12-bsb10469448-5 ; VD18 90266528 \n", + "AC03125444 VD18 10196951 ; urn:nbn:at:at-moz:2-68173 \n", + "AC03125437 VD18 10196951 ; urn:nbn:at:at-moz:2-68134 \n", + "AC15034985 VD18 11290722-001 ; FSLHB000037286 ; NDSLARCHB... \n", + "AC08513325 urn:nbn:de:bvb:12-bsb10302394-3 ; VD18 1057364X " ] }, - "execution_count": 41, + "execution_count": 166, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "mms_different" + "cd" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 ~~~~~~~\n", + "VD18 90245792; vd18\n", + "-----------\n", + "VD18 90245792 ; vd18\n", + "-----------\n", + "xxxxxxxxxxxxxxxxx\n", + "1 ~~~~~~~\n", + "VD18 90266536; urn:nbn:de:bvb:12-bsb10469449-5\n", + "-----------\n", + "urn:nbn:de:bvb:12-bsb10469449-5 ; VD18 90266536\n", + "-----------\n", + "xxxxxxxxxxxxxxxxx\n", + "2 ~~~~~~~\n", + "urn:nbn:de:bvb:12-bsb10469448-5; VD18 90266528\n", + "-----------\n", + "urn:nbn:de:bvb:12-bsb10469448-5 ; VD18 90266528\n", + "-----------\n", + "xxxxxxxxxxxxxxxxx\n" + ] + } + ], + "source": [ + "for i in range(3):\n", + " print(i, '~~~~~~~')\n", + " for v in cd.iloc[i]:\n", + " print(v)\n", + " print('-----------')\n", + " print('xxxxxxxxxxxxxxxxx')" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Zweytes Abtheilung; Des Grafen Beniowski Reise aus Kamtschatka über Kanton nach Europa\n", + "Zweytes Abtheilung ; Des Grafen Beniowski Reise aus Kamtschatka über Kanton nach Europa\n" + ] + } + ], + "source": [ + "for v in c.iloc[0]:\n", + " print(v)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "diff = compare('MMS-ID', old_comparision, new_worked)" ] }, {