diff --git a/Extract_Bibliographic_Info_From_Alma.ipynb b/Extract_Bibliographic_Info_From_Alma.ipynb index ad5487c62d48e32c3b1b3e21a5c854a8a8cfe97d..0b3de211359835a35273c8c60cddaf2732f339e6 100644 --- a/Extract_Bibliographic_Info_From_Alma.ipynb +++ b/Extract_Bibliographic_Info_From_Alma.ipynb @@ -109,7 +109,7 @@ "metadata": {}, "outputs": [], "source": [ - "mapping = pd.read_csv('mapping.csv')\n", + "mapping = pd.read_csv('minimal_mapping.csv')\n", "mapping = mapping.where((pd.notnull(mapping)), None)" ] }, @@ -155,26 +155,26 @@ " \n", " \n", " 0\n", - " 009\n", + " 100 1 _ $$a\n", " None\n", " None\n", - " Systemnummer\n", + " Verfasser\n", " \n", " \n", - " 33\n", - " AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j\n", + " 23\n", + " 689 * * $$a\n", " None\n", - " None\n", - " Signatur\n", + " ;\n", + " Schlagworte\n", " \n", " \n", "\n", "" ], "text/plain": [ - " pattern selector collector label\n", - "0 009 None None Systemnummer\n", - "33 AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j None None Signatur" + " pattern selector collector label\n", + "0 100 1 _ $$a None None Verfasser\n", + "23 689 * * $$a None ; Schlagworte" ] }, "execution_count": 5, @@ -227,10 +227,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "Column 'Signatur' with build_extractor parameters\n", - "\tpattern 'AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j'\n", + "Column 'Schlagworte' with build_extractor parameters\n", + "\tpattern '689 * * $$a'\n", "\tselector 'None'\n", - "\tcollector_character 'None'\n", + "\tcollector_character ';'\n", "\n" ] } @@ -290,7 +290,8 @@ "def uid_list_to_excel(uid_list, excel_file_name_stem):\n", " data = [get_bibliographic_for_uid(uid) for uid in uid_list]\n", " df = pd.DataFrame(data)\n", - " df = post(df)\n", + " # Do post processing only if fields 'Signatur' and 'Barcode' are available\n", + " # df = post(df)\n", " df.to_excel(f'Output/{excel_file_name_stem}_{now()}.xlsx', index=False)" ] }, @@ -360,10 +361,17 @@ "def get_bibliographic_for_uid(uid):\n", " try:\n", " marc_xml = alma.by_mms_id(uid)\n", - " parent_uid = find_parent_id_in_child_xml(marc_xml)\n", - " if parent_uid:\n", - " parent_xml = fetch_parent_xml(parent_uid)\n", - " parent_title, parent_categories, parent_contents = inherit_from_parent(parent_xml)\n", + " try:\n", + " parent_uid = find_parent_id_in_child_xml(marc_xml)\n", + " if parent_uid:\n", + " try:\n", + " parent_xml = fetch_parent_xml(parent_uid)\n", + " except:\n", + " print(f\"XML of parent {parent_uid} for child {uid} not fetchable.\", file=sys.stderr)\n", + " else:\n", + " parent_title, parent_categories, parent_contents = inherit_from_parent(parent_xml)\n", + " except Exception as e:\n", + " print(f'No parent for child record \"{uid}\" not found: {str(e)}', file=sys.stderr)\n", " except almasru.NoRecord:\n", " print(f'No record for unique ID \"{uid}\" found.', file=sys.stderr)\n", " d = OrderedDict()\n", @@ -372,7 +380,7 @@ " d[\"Systemnummer\"] = uid\n", " return d\n", " except Exception as e:\n", - " print(f'Exception encountered while fetching bibliographic data: {str(e)}', file=sys.stderr)\n", + " print(f'Exception encountered while fetching bibliographic data for {uid}: {str(e)}', file=sys.stderr)\n", " else:\n", " d = OrderedDict()\n", " for column, extractor in column_extractors.items():\n", @@ -517,13 +525,15 @@ "outputs": [], "source": [ "uid_list = load_uid_list('Input/Alma_Export_en-US.xlsx')\n", - "file_name_stem = 'Test'" + "file_name_stem = 'minimal_mapping'" ] }, { "cell_type": "code", "execution_count": 18, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "uid_list_to_excel(uid_list, file_name_stem)" @@ -553,7 +563,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/Output/minimal_mapping_2019-11-19T162248.xlsx b/Output/minimal_mapping_2019-11-19T162248.xlsx deleted file mode 100644 index b35f9131b43ce15585d419070ee6dc27b3d99ab3..0000000000000000000000000000000000000000 Binary files a/Output/minimal_mapping_2019-11-19T162248.xlsx and /dev/null differ diff --git a/Output/minimal_mapping_2019-12-03T135751.xlsx b/Output/minimal_mapping_2019-12-03T135751.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f94b0c45bb7232f0ddef97f883bda027ed2096a7 Binary files /dev/null and b/Output/minimal_mapping_2019-12-03T135751.xlsx differ