diff --git a/Extract_Bibliographic_Info_From_Alma.ipynb b/Extract_Bibliographic_Info_From_Alma.ipynb
index ad5487c62d48e32c3b1b3e21a5c854a8a8cfe97d..0b3de211359835a35273c8c60cddaf2732f339e6 100644
--- a/Extract_Bibliographic_Info_From_Alma.ipynb
+++ b/Extract_Bibliographic_Info_From_Alma.ipynb
@@ -109,7 +109,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mapping = pd.read_csv('mapping.csv')\n",
+ "mapping = pd.read_csv('minimal_mapping.csv')\n",
"mapping = mapping.where((pd.notnull(mapping)), None)"
]
},
@@ -155,26 +155,26 @@
"
\n",
" \n",
" | 0 | \n",
- " 009 | \n",
+ " 100 1 _ $$a | \n",
" None | \n",
" None | \n",
- " Systemnummer | \n",
+ " Verfasser | \n",
"
\n",
" \n",
- " | 33 | \n",
- " AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j | \n",
+ " 23 | \n",
+ " 689 * * $$a | \n",
" None | \n",
- " None | \n",
- " Signatur | \n",
+ " ; | \n",
+ " Schlagworte | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " pattern selector collector label\n",
- "0 009 None None Systemnummer\n",
- "33 AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j None None Signatur"
+ " pattern selector collector label\n",
+ "0 100 1 _ $$a None None Verfasser\n",
+ "23 689 * * $$a None ; Schlagworte"
]
},
"execution_count": 5,
@@ -227,10 +227,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Column 'Signatur' with build_extractor parameters\n",
- "\tpattern 'AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j'\n",
+ "Column 'Schlagworte' with build_extractor parameters\n",
+ "\tpattern '689 * * $$a'\n",
"\tselector 'None'\n",
- "\tcollector_character 'None'\n",
+ "\tcollector_character ';'\n",
"\n"
]
}
@@ -290,7 +290,8 @@
"def uid_list_to_excel(uid_list, excel_file_name_stem):\n",
" data = [get_bibliographic_for_uid(uid) for uid in uid_list]\n",
" df = pd.DataFrame(data)\n",
- " df = post(df)\n",
+ " # Do post processing only if fields 'Signatur' and 'Barcode' are available\n",
+ " # df = post(df)\n",
" df.to_excel(f'Output/{excel_file_name_stem}_{now()}.xlsx', index=False)"
]
},
@@ -360,10 +361,17 @@
"def get_bibliographic_for_uid(uid):\n",
" try:\n",
" marc_xml = alma.by_mms_id(uid)\n",
- " parent_uid = find_parent_id_in_child_xml(marc_xml)\n",
- " if parent_uid:\n",
- " parent_xml = fetch_parent_xml(parent_uid)\n",
- " parent_title, parent_categories, parent_contents = inherit_from_parent(parent_xml)\n",
+ " try:\n",
+ " parent_uid = find_parent_id_in_child_xml(marc_xml)\n",
+ " if parent_uid:\n",
+ " try:\n",
+ " parent_xml = fetch_parent_xml(parent_uid)\n",
+ " except:\n",
+ " print(f\"XML of parent {parent_uid} for child {uid} not fetchable.\", file=sys.stderr)\n",
+ " else:\n",
+ " parent_title, parent_categories, parent_contents = inherit_from_parent(parent_xml)\n",
+ " except Exception as e:\n",
+ " print(f'No parent for child record \"{uid}\" not found: {str(e)}', file=sys.stderr)\n",
" except almasru.NoRecord:\n",
" print(f'No record for unique ID \"{uid}\" found.', file=sys.stderr)\n",
" d = OrderedDict()\n",
@@ -372,7 +380,7 @@
" d[\"Systemnummer\"] = uid\n",
" return d\n",
" except Exception as e:\n",
- " print(f'Exception encountered while fetching bibliographic data: {str(e)}', file=sys.stderr)\n",
+ " print(f'Exception encountered while fetching bibliographic data for {uid}: {str(e)}', file=sys.stderr)\n",
" else:\n",
" d = OrderedDict()\n",
" for column, extractor in column_extractors.items():\n",
@@ -517,13 +525,15 @@
"outputs": [],
"source": [
"uid_list = load_uid_list('Input/Alma_Export_en-US.xlsx')\n",
- "file_name_stem = 'Test'"
+ "file_name_stem = 'minimal_mapping'"
]
},
{
"cell_type": "code",
"execution_count": 18,
- "metadata": {},
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [],
"source": [
"uid_list_to_excel(uid_list, file_name_stem)"
@@ -553,7 +563,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.8"
+ "version": "3.6.9"
}
},
"nbformat": 4,
diff --git a/Output/minimal_mapping_2019-11-19T162248.xlsx b/Output/minimal_mapping_2019-11-19T162248.xlsx
deleted file mode 100644
index b35f9131b43ce15585d419070ee6dc27b3d99ab3..0000000000000000000000000000000000000000
Binary files a/Output/minimal_mapping_2019-11-19T162248.xlsx and /dev/null differ
diff --git a/Output/minimal_mapping_2019-12-03T135751.xlsx b/Output/minimal_mapping_2019-12-03T135751.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..f94b0c45bb7232f0ddef97f883bda027ed2096a7
Binary files /dev/null and b/Output/minimal_mapping_2019-12-03T135751.xlsx differ