diff --git a/comparenew-and-old-output.ipynb b/comparenew-and-old-output.ipynb
index 840a0c3e172b7f405b68a95856368a4597b73838..42820fb590c0ebdc36e0b4977b31bc58810f5b7f 100644
--- a/comparenew-and-old-output.ipynb
+++ b/comparenew-and-old-output.ipynb
@@ -22,7 +22,7 @@
"metadata": {},
"outputs": [],
"source": [
- "old = pd.read_excel('./travelogues_extraction/script/old_output_for_comparision/TravelogueD18_script_output_2020-07-13-10 20 28_20200707.xlsx')"
+ "old = pd.read_excel('./travelogues_extraction/script/old_output_for_comparision/TravelogueD18_script_output_2020-07-13-10 20 28_20200707.xlsx', types=str)"
]
},
{
@@ -388,7 +388,7 @@
"metadata": {},
"outputs": [],
"source": [
- "new = pd.read_excel('./travelogues_extraction/script/output/TravelogueD18_script_output_2020-07-17-17:48:57_20200707.xlsx', index_col=0)"
+ "new = pd.read_excel('./travelogues_extraction/script/output/TravelogueD18_script_output_2020-07-17-17:48:57_20200707.xlsx', index_col=0, dtype=str)"
]
},
{
@@ -444,7 +444,7 @@
"
\n",
" AC03114611 | \n",
" AC03114611 | \n",
- " 9.900045e+17 | \n",
+ " 990004456580603338 | \n",
" http://data.onb.ac.at/ABO/%2BZ42124907 | \n",
" Z42124907 | \n",
" Keate, George ; http://d-nb.info/gnd/124956432 | \n",
@@ -468,7 +468,7 @@
"
\n",
" AC03826205 | \n",
" AC03826205 | \n",
- " 9.900059e+17 | \n",
+ " 990005906350603338 | \n",
" http://data.onb.ac.at/ABO/%2BZ59423102;\\nhttp:... | \n",
" Z59423102;\\nZ171040301 | \n",
" Phillip, Arthur ; http://d-nb.info/gnd/118982796 | \n",
@@ -492,7 +492,7 @@
"
\n",
" AC09792500 | \n",
" AC09792500 | \n",
- " 9.900295e+17 | \n",
+ " 990029473670603338 | \n",
" http://data.onb.ac.at/ABO/%2BZ148207602 | \n",
" Z148207602 | \n",
" Cranz, David ; http://d-nb.info/gnd/116717548 | \n",
@@ -516,7 +516,7 @@
"
\n",
" AC09836279 | \n",
" AC09836279 | \n",
- " 9.900299e+17 | \n",
+ " 990029921640603338 | \n",
" http://data.onb.ac.at/ABO/%2BZ124931607 | \n",
" Z124931607 | \n",
" Ellis, Henry ; http://d-nb.info/gnd/119218666 | \n",
@@ -540,7 +540,7 @@
"
\n",
" AC07705435 | \n",
" AC07705435 | \n",
- " 9.900165e+17 | \n",
+ " 990016481420603338 | \n",
" http://data.onb.ac.at/ABO/%2BZ69824303 | \n",
" Z69824303 | \n",
" Le Gentil de la Galaisière, Guillaume J. ; htt... | \n",
@@ -567,12 +567,12 @@
""
],
"text/plain": [
- " Systemnummer MMS-ID \\\n",
- "AC03114611 AC03114611 9.900045e+17 \n",
- "AC03826205 AC03826205 9.900059e+17 \n",
- "AC09792500 AC09792500 9.900295e+17 \n",
- "AC09836279 AC09836279 9.900299e+17 \n",
- "AC07705435 AC07705435 9.900165e+17 \n",
+ " Systemnummer MMS-ID \\\n",
+ "AC03114611 AC03114611 990004456580603338 \n",
+ "AC03826205 AC03826205 990005906350603338 \n",
+ "AC09792500 AC09792500 990029473670603338 \n",
+ "AC09836279 AC09836279 990029921640603338 \n",
+ "AC07705435 AC07705435 990016481420603338 \n",
"\n",
" Volltext \\\n",
"AC03114611 http://data.onb.ac.at/ABO/%2BZ42124907 \n",
@@ -817,7 +817,7 @@
"metadata": {},
"outputs": [],
"source": [
- "new_worked = new[new.Systemnummer.isin(old.Systemnummer)]"
+ "new_worked = new[~new.Systemnummer.isna()]"
]
},
{
@@ -878,28 +878,6 @@
"cell_type": "code",
"execution_count": 18,
"metadata": {},
- "outputs": [
- {
- "ename": "ValueError",
- "evalue": "Can only compare identically-labeled DataFrame objects",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnew_worked\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mold_comparision\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m~/Documents/onb-homeoffice-local/TraveloguesExtraktion/venv/lib/python3.7/site-packages/pandas/core/ops/__init__.py\u001b[0m in \u001b[0;36mf\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 837\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_indexed_same\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 838\u001b[0m raise ValueError(\n\u001b[0;32m--> 839\u001b[0;31m \u001b[0;34m\"Can only compare identically-labeled DataFrame objects\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 840\u001b[0m )\n\u001b[1;32m 841\u001b[0m \u001b[0mnew_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdispatch_to_series\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr_rep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mValueError\u001b[0m: Can only compare identically-labeled DataFrame objects"
- ]
- }
- ],
- "source": [
- "new_worked == old_comparision"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
"outputs": [
{
"data": {
@@ -907,7 +885,7 @@
"47"
]
},
- "execution_count": 19,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -918,7 +896,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -927,7 +905,7 @@
"(48,)"
]
},
- "execution_count": 20,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -938,7 +916,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -947,7 +925,7 @@
"(48,)"
]
},
- "execution_count": 21,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -958,7 +936,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -967,7 +945,7 @@
"Index(['Verfasser ; GND-ID'], dtype='object')"
]
},
- "execution_count": 22,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -978,7 +956,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -987,7 +965,7 @@
"Index(['VerfasserGND ; GND-ID'], dtype='object')"
]
},
- "execution_count": 23,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -1005,7 +983,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
@@ -1014,7 +992,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -1023,7 +1001,7 @@
"Index([], dtype='object')"
]
},
- "execution_count": 25,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1034,7 +1012,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -1043,7 +1021,7 @@
"Index([], dtype='object')"
]
},
- "execution_count": 26,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -1058,264 +1036,166 @@
"metadata": {},
"outputs": [],
"source": [
- "comparision = new_worked == old_comparision"
+ "def compare(column: str, left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:\n",
+ " left = left[column].astype(str)\n",
+ " right = right[column].astype(str)\n",
+ " ok = left != right\n",
+ " left = left[ok]\n",
+ " right = right[ok]\n",
+ " left.name = 'old-' + column\n",
+ " right.name = 'new-' + column\n",
+ " df = pd.concat([left, right], axis=1)\n",
+ " return df.astype(str)"
]
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
- "not_the_same = {\n",
- " column: comparision.shape[0] - n\n",
- " for column, n\n",
- " in comparision.sum().iteritems()\n",
- " if comparision.shape[0] > n\n",
- "}"
+ "old_comparision = old_comparision.astype(str)"
]
},
{
"cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'MMS-ID': 479,\n",
- " 'Volltext': 44,\n",
- " 'Barcode': 44,\n",
- " 'Verfasser ; GND-ID': 754,\n",
- " 'Werktitel': 640,\n",
- " 'Reihentitel ; Bandzählung': 763,\n",
- " 'Haupttitel ; Titelzusatz ; Verantwortlichkeitsangabe': 677,\n",
- " 'Bandzählung ; Titel des Bandes': 474,\n",
- " 'Ausgabe': 725,\n",
- " 'Verlagsort': 3,\n",
- " 'Verlagsort normiert ; GND-ID': 762,\n",
- " 'Druckort normiert ; GND-ID': 763,\n",
- " 'Verleger und Drucker': 120,\n",
- " 'Verleger normiert ; GND-ID': 755,\n",
- " 'Drucker ; GND-ID': 763,\n",
- " 'Erscheinungsjahr': 8,\n",
- " 'Kollation': 3,\n",
- " 'Illustrationen': 292,\n",
- " 'Anzahl Illustrationen': 366,\n",
- " 'Anzahl Karten': 578,\n",
- " 'Format': 2,\n",
- " 'Anm. zu Illustrationen': 17,\n",
- " 'Bibliografie': 22,\n",
- " 'Anmerkungen': 316,\n",
- " 'Anm. zu Kollation': 690,\n",
- " 'Sprache': 2,\n",
- " 'Originalsprache': 407,\n",
- " 'Bemerkung zur Sprache': 382,\n",
- " 'Standardnummer': 506,\n",
- " 'Weitere Verfasser ; GND-ID': 757,\n",
- " 'Herausgeber ; GND-ID': 749,\n",
- " 'Übersetzer ; GND-ID': 751,\n",
- " 'Beiträger ; GND-ID': 753,\n",
- " 'Weitere Beteiligte ; GND-ID': 763,\n",
- " 'Illustratoren ; GND-ID': 736,\n",
- " 'Widmender ; GND-ID': 761,\n",
- " 'Widmungsempfänger ; GND-ID': 752,\n",
- " 'Art des Inhalts': 283,\n",
- " 'Inhalt': 748,\n",
- " 'Werke in Relation': 763,\n",
- " 'Schlagworte': 754,\n",
- " 'Marker': 574,\n",
- " 'Zusammenstellung': 692,\n",
- " 'Signatur': 763,\n",
- " 'Standort': 249,\n",
- " 'VD17': 500}"
- ]
- },
- "execution_count": 29,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "not_the_same"
- ]
- },
- {
- "cell_type": "markdown",
+ "execution_count": 43,
"metadata": {},
+ "outputs": [],
"source": [
- "# mms-id"
+ "for c in old_comparision.columns:\n",
+ " old_comparision[c] = old_comparision[c].str.replace('https:', 'http:')"
]
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 44,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "AC03114611 9.900045e+17\n",
- "AC03826205 9.900059e+17\n",
- "AC09792500 9.900295e+17\n",
- "AC09836279 9.900299e+17\n",
- "AC07705435 9.900165e+17\n",
- "Name: MMS-ID, dtype: float64"
- ]
- },
- "execution_count": 30,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "new_worked['MMS-ID'].head()"
+ "comparision_by_columns = {\n",
+ " column: compare(column, old_comparision, new_worked)\n",
+ " for column in old_comparision.columns\n",
+ "}"
]
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
- "new_worked['MMS-ID'] = new_worked['MMS-ID'].astype(str)"
+ "comparision_sizes = {\n",
+ " column: df.shape[0]\n",
+ " for column, df in comparision_by_columns.items()\n",
+ "}"
]
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 46,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "AC03114611 990004456580603136\n",
- "AC03826205 990005906350603264\n",
- "AC09792500 990029473670603264\n",
- "AC09836279 990029921640603136\n",
- "AC07705435 990016481420603136\n",
- "Name: MMS-ID, dtype: object"
- ]
- },
- "execution_count": 35,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "new_worked['MMS-ID'].head()"
+ "not_the_same = {\n",
+ " c: s for c, s in comparision_sizes.items() if s > 0\n",
+ "}"
]
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "Systemnummer\n",
- "AC03114611 990004456580603338\n",
- "AC03826205 990005906350603338\n",
- "AC09792500 990029473670603338\n",
- "AC09836279 990029921640603338\n",
- "AC07705435 990016481420603338\n",
- "Name: MMS-ID, dtype: int64"
+ "{'Volltext': 2,\n",
+ " 'Verfasser ; GND-ID': 668,\n",
+ " 'Werktitel': 441,\n",
+ " 'Reihentitel ; Bandzählung': 82,\n",
+ " 'Haupttitel ; Titelzusatz ; Verantwortlichkeitsangabe': 677,\n",
+ " 'Bandzählung ; Titel des Bandes': 153,\n",
+ " 'Verlagsort': 3,\n",
+ " 'Verlagsort normiert ; GND-ID': 740,\n",
+ " 'Druckort normiert ; GND-ID': 330,\n",
+ " 'Verleger und Drucker': 66,\n",
+ " 'Verleger normiert ; GND-ID': 694,\n",
+ " 'Drucker ; GND-ID': 324,\n",
+ " 'Erscheinungsjahr': 8,\n",
+ " 'Kollation': 1,\n",
+ " 'Illustrationen': 9,\n",
+ " 'Anzahl Illustrationen': 411,\n",
+ " 'Anzahl Karten': 186,\n",
+ " 'Anm. zu Illustrationen': 12,\n",
+ " 'Bibliografie': 1,\n",
+ " 'Anmerkungen': 1,\n",
+ " 'Anm. zu Kollation': 6,\n",
+ " 'Sprache': 2,\n",
+ " 'Originalsprache': 29,\n",
+ " 'Bemerkung zur Sprache': 10,\n",
+ " 'Standardnummer': 6,\n",
+ " 'Weitere Verfasser ; GND-ID': 90,\n",
+ " 'Herausgeber ; GND-ID': 129,\n",
+ " 'Übersetzer ; GND-ID': 172,\n",
+ " 'Beiträger ; GND-ID': 57,\n",
+ " 'Weitere Beteiligte ; GND-ID': 17,\n",
+ " 'Illustratoren ; GND-ID': 202,\n",
+ " 'Widmender ; GND-ID': 70,\n",
+ " 'Widmungsempfänger ; GND-ID': 92,\n",
+ " 'Art des Inhalts': 136,\n",
+ " 'Inhalt': 12,\n",
+ " 'Werke in Relation': 98,\n",
+ " 'Schlagworte': 744,\n",
+ " 'Marker': 570,\n",
+ " 'Zusammenstellung': 1,\n",
+ " 'Signatur': 758,\n",
+ " 'Standort': 244,\n",
+ " 'VD17': 7}"
]
},
- "execution_count": 36,
+ "execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "old_comparision['MMS-ID'].head()"
+ "not_the_same"
]
},
{
"cell_type": "code",
- "execution_count": 37,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/phylogram/Documents/onb-homeoffice-local/TraveloguesExtraktion/venv/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
- " \"\"\"Entry point for launching an IPython kernel.\n"
- ]
- }
- ],
- "source": [
- "old_comparision['MMS-ID'] = old_comparision['MMS-ID'].astype(str)"
- ]
- },
- {
- "cell_type": "markdown",
+ "execution_count": 56,
"metadata": {},
+ "outputs": [],
"source": [
- "oida"
+ "ci = iter(not_the_same.keys())"
]
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 168,
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "Systemnummer\n",
- "AC03114611 990004456580603338\n",
- "AC03826205 990005906350603338\n",
- "AC09792500 990029473670603338\n",
- "AC09836279 990029921640603338\n",
- "AC07705435 990016481420603338\n",
- "Name: MMS-ID, dtype: object"
- ]
- },
- "execution_count": 38,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "StopIteration",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomparision_by_columns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mci\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mStopIteration\u001b[0m: "
+ ]
}
],
"source": [
- "old_comparision['MMS-ID'].head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [],
- "source": [
- "def compare(column: str) -> pd.DataFrame:\n",
- " old_column = old_comparision[column]\n",
- " new_column = new_worked[column]\n",
- " old_diff = old_column[old_column != new_column]\n",
- " new_diff = new_column[old_column != new_column]\n",
- " return pd.DataFrame({\n",
- " 'old_value': old_diff,\n",
- " 'new_value': new_diff,\n",
- " }, index=old_diff.index)"
+ "cd = comparision_by_columns[next(ci)]"
]
},
{
"cell_type": "code",
- "execution_count": 40,
- "metadata": {},
- "outputs": [],
- "source": [
- "mms_different = compare('MMS-ID')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
+ "execution_count": 166,
"metadata": {},
"outputs": [
{
@@ -1339,8 +1219,8 @@
" \n",
" \n",
" | \n",
- " old_value | \n",
- " new_value | \n",
+ " old-VD17 | \n",
+ " new-VD17 | \n",
"
\n",
" \n",
" Systemnummer | \n",
@@ -1350,90 +1230,147 @@
"
\n",
" \n",
" \n",
- " AC03114611 | \n",
- " 990004456580603338 | \n",
- " 990004456580603136 | \n",
- "
\n",
- " \n",
- " AC03826205 | \n",
- " 990005906350603338 | \n",
- " 990005906350603264 | \n",
- "
\n",
- " \n",
- " AC09792500 | \n",
- " 990029473670603338 | \n",
- " 990029473670603264 | \n",
- "
\n",
- " \n",
- " AC09836279 | \n",
- " 990029921640603338 | \n",
- " 990029921640603136 | \n",
- "
\n",
- " \n",
- " AC07705435 | \n",
- " 990016481420603338 | \n",
- " 990016481420603136 | \n",
+ " AC13781336 | \n",
+ " VD18 90245792; vd18 | \n",
+ " VD18 90245792 ; vd18 | \n",
"
\n",
" \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
+ " AC13690893 | \n",
+ " VD18 90266536; urn:nbn:de:bvb:12-bsb10469449-5 | \n",
+ " urn:nbn:de:bvb:12-bsb10469449-5 ; VD18 90266536 | \n",
"
\n",
" \n",
- " AC14487653 | \n",
- " 990057677380603338 | \n",
- " 990057677380603136 | \n",
+ " AC13690886 | \n",
+ " urn:nbn:de:bvb:12-bsb10469448-5; VD18 90266528 | \n",
+ " urn:nbn:de:bvb:12-bsb10469448-5 ; VD18 90266528 | \n",
"
\n",
" \n",
- " AC14494673 | \n",
- " 990057713410603338 | \n",
- " 990057713410603264 | \n",
+ " AC03125444 | \n",
+ " urn:nbn:at:at-moz:2-68173; VD18 10196951 | \n",
+ " VD18 10196951 ; urn:nbn:at:at-moz:2-68173 | \n",
"
\n",
" \n",
- " AC14494679 | \n",
- " 990057713430603338 | \n",
- " 990057713430603264 | \n",
+ " AC03125437 | \n",
+ " VD18 10196951; urn:nbn:at:at-moz:2-68134 | \n",
+ " VD18 10196951 ; urn:nbn:at:at-moz:2-68134 | \n",
"
\n",
" \n",
- " AC06335423 | \n",
- " 990057719990603338 | \n",
- " 990057719990603264 | \n",
+ " AC15034985 | \n",
+ " NDSLARCHBUE0139703; VD18 11290722-001; FSLHB00... | \n",
+ " VD18 11290722-001 ; FSLHB000037286 ; NDSLARCHB... | \n",
"
\n",
" \n",
- " AC14499956 | \n",
- " 990057728090603338 | \n",
- " 990057728090603264 | \n",
+ " AC08513325 | \n",
+ " VD18 1057364X; urn:nbn:de:bvb:12-bsb10302394-3 | \n",
+ " urn:nbn:de:bvb:12-bsb10302394-3 ; VD18 1057364X | \n",
"
\n",
" \n",
"\n",
- "479 rows × 2 columns
\n",
""
],
"text/plain": [
- " old_value new_value\n",
- "Systemnummer \n",
- "AC03114611 990004456580603338 990004456580603136\n",
- "AC03826205 990005906350603338 990005906350603264\n",
- "AC09792500 990029473670603338 990029473670603264\n",
- "AC09836279 990029921640603338 990029921640603136\n",
- "AC07705435 990016481420603338 990016481420603136\n",
- "... ... ...\n",
- "AC14487653 990057677380603338 990057677380603136\n",
- "AC14494673 990057713410603338 990057713410603264\n",
- "AC14494679 990057713430603338 990057713430603264\n",
- "AC06335423 990057719990603338 990057719990603264\n",
- "AC14499956 990057728090603338 990057728090603264\n",
+ " old-VD17 \\\n",
+ "Systemnummer \n",
+ "AC13781336 VD18 90245792; vd18 \n",
+ "AC13690893 VD18 90266536; urn:nbn:de:bvb:12-bsb10469449-5 \n",
+ "AC13690886 urn:nbn:de:bvb:12-bsb10469448-5; VD18 90266528 \n",
+ "AC03125444 urn:nbn:at:at-moz:2-68173; VD18 10196951 \n",
+ "AC03125437 VD18 10196951; urn:nbn:at:at-moz:2-68134 \n",
+ "AC15034985 NDSLARCHBUE0139703; VD18 11290722-001; FSLHB00... \n",
+ "AC08513325 VD18 1057364X; urn:nbn:de:bvb:12-bsb10302394-3 \n",
"\n",
- "[479 rows x 2 columns]"
+ " new-VD17 \n",
+ "Systemnummer \n",
+ "AC13781336 VD18 90245792 ; vd18 \n",
+ "AC13690893 urn:nbn:de:bvb:12-bsb10469449-5 ; VD18 90266536 \n",
+ "AC13690886 urn:nbn:de:bvb:12-bsb10469448-5 ; VD18 90266528 \n",
+ "AC03125444 VD18 10196951 ; urn:nbn:at:at-moz:2-68173 \n",
+ "AC03125437 VD18 10196951 ; urn:nbn:at:at-moz:2-68134 \n",
+ "AC15034985 VD18 11290722-001 ; FSLHB000037286 ; NDSLARCHB... \n",
+ "AC08513325 urn:nbn:de:bvb:12-bsb10302394-3 ; VD18 1057364X "
]
},
- "execution_count": 41,
+ "execution_count": 166,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "mms_different"
+ "cd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 167,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 ~~~~~~~\n",
+ "VD18 90245792; vd18\n",
+ "-----------\n",
+ "VD18 90245792 ; vd18\n",
+ "-----------\n",
+ "xxxxxxxxxxxxxxxxx\n",
+ "1 ~~~~~~~\n",
+ "VD18 90266536; urn:nbn:de:bvb:12-bsb10469449-5\n",
+ "-----------\n",
+ "urn:nbn:de:bvb:12-bsb10469449-5 ; VD18 90266536\n",
+ "-----------\n",
+ "xxxxxxxxxxxxxxxxx\n",
+ "2 ~~~~~~~\n",
+ "urn:nbn:de:bvb:12-bsb10469448-5; VD18 90266528\n",
+ "-----------\n",
+ "urn:nbn:de:bvb:12-bsb10469448-5 ; VD18 90266528\n",
+ "-----------\n",
+ "xxxxxxxxxxxxxxxxx\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i in range(3):\n",
+ " print(i, '~~~~~~~')\n",
+ " for v in cd.iloc[i]:\n",
+ " print(v)\n",
+ " print('-----------')\n",
+ " print('xxxxxxxxxxxxxxxxx')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Zweytes Abtheilung; Des Grafen Beniowski Reise aus Kamtschatka über Kanton nach Europa\n",
+ "Zweytes Abtheilung ; Des Grafen Beniowski Reise aus Kamtschatka über Kanton nach Europa\n"
+ ]
+ }
+ ],
+ "source": [
+ "for v in c.iloc[0]:\n",
+ " print(v)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "diff = compare('MMS-ID', old_comparision, new_worked)"
]
},
{