diff --git a/Extract_Bibliographic_Info_From_Alma.ipynb b/Extract_Bibliographic_Info_From_Alma.ipynb index 3ed2eddda18e70c813fdc00caebd8d318e2b6576..49b1428a978aa406cb887cbc48c793a1146a8088 100644 --- a/Extract_Bibliographic_Info_From_Alma.ipynb +++ b/Extract_Bibliographic_Info_From_Alma.ipynb @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -105,11 +105,11 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "mapping = pd.read_csv('minimal_mapping.csv')\n", + "mapping = pd.read_csv('mapping.csv')\n", "mapping = mapping.where((pd.notnull(mapping)), None)" ] }, @@ -122,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -155,29 +155,29 @@ " \n", " \n", " 0\n", - " 100 1 _ $$a\n", + " 009\n", " None\n", " None\n", - " Verfasser\n", + " Systemnummer\n", " \n", " \n", - " 23\n", - " 689 * * $$a\n", + " 33\n", + " AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j\n", " None\n", - " ;\n", - " Schlagworte\n", + " None\n", + " Signatur\n", " \n", " \n", "\n", "" ], "text/plain": [ - " pattern selector collector label\n", - "0 100 1 _ $$a None None Verfasser\n", - "23 689 * * $$a None ; Schlagworte" + " pattern selector collector label\n", + "0 009 None None Systemnummer\n", + "33 AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j None None Signatur" ] }, - "execution_count": 23, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -202,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -220,17 +220,17 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Column 'Schlagworte' with build_extractor parameters\n", - "\tpattern '689 * * $$a'\n", + "Column 'Signatur' with build_extractor parameters\n", + "\tpattern 'AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j'\n", "\tselector 'None'\n", - "\tcollector_character ';'\n", + "\tcollector_character 'None'\n", "\n" ] } @@ -256,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -283,14 +283,14 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def uid_list_to_excel(uid_list, excel_file_name_stem):\n", " data = [get_bibliographic_for_uid(uid) for uid in uid_list]\n", " df = pd.DataFrame(data)\n", - " # df = post(df)\n", + " df = post(df)\n", " df.to_excel(f'Output/{excel_file_name_stem}_{now()}.xlsx', index=False)" ] }, @@ -303,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -324,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -353,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -393,7 +393,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -419,7 +419,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -440,7 +440,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -477,7 +477,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -512,7 +512,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -522,7 +522,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ diff --git a/Output/Test_2019-11-19T162840.xlsx b/Output/Test_2019-11-19T162840.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..faaf96cfe5a5eeec44ef8ac66c9ecffdee564072 Binary files /dev/null and b/Output/Test_2019-11-19T162840.xlsx differ diff --git a/mapping.csv b/mapping.csv index 44b9528c7d95a31ff3ee2b7ebe1fafbee452b4bd..165b57d1763da9b41b83ac09073529e816d58449 100644 --- a/mapping.csv +++ b/mapping.csv @@ -1,35 +1,35 @@ -MARC controlfield,MARC extra selector,Liste,MARC-XML controlfield,Label,Comment -009,,,"",Systemnummer,AC-Nummer -001,,,"",MMS-ID,MMS-ID -856 4 0 $$u,,,"",Volltext,Volltextlink aus 856 4 0 $$u -100 1 _ $$a ; 100 1 _ $$0,,," ; ",Verfasser ; GND-ID,Autor von Verfasserwerken. Bei Titelwerken leer. Als Trennzeichen Strichpunkt. Danach aus Subfield 0 GND-ID -240 1 0 $$a> 240 1 0 $$t >130 0 _ $$a > 130 0 _ $$t,,,"OR OR OR ",Werktitel,Bei Verfasserwerken 240 1 0Bei Titelwerken 130 0 -490 1 _ $$a ; 490 1 _ $$v,,," ; ",Reihentitel ; Bandzählung,Reihentitel in 490 1 _ ; Trennzeichen ; Bandzählung in Subfield $$v -245 0 0 $$a > 245 1 0 $$a ; 245 0 0 $$b > 245 1 0 $$b,,," OR ; OR ",Haupttitel ; Titelzusatz,Hauptitel aus Subfield $$a ; Trennzeichen ; Titelzusatz aus Subfield $$b -245 0 0 $$n > 245 1 0 $$n ; 245 0 0 $$p > 245 1 0 $$p,,," ; ",Bandzählung ; Titel des Bandes,Bandzählung aus $$n ; Trennzeichen ; Titel des Bandes aus $$p -250 0 0 $$a,,,"",Ausgabe,Ausgabebezeichnung -264 _ 1 $$a,,,"",Verlagsort, -751 _ _ $$a ;751 _ _ $$0,,;," ; ",Verlagsort normiert ; GND-ID,Wert aus 751 ; Trennzeichen ; Subfield 0 mit GND-ID ; Wiederholbar -264 _ 1 $$b > 264 _ 3 $$b,,," OR ",Verleger und Drucker, -700 1 _ $$a > 710 2 _ $$a;700 1 _ $$0 > 710 2 _ $$0,$$4 pbl > $$4 pbl;$$4 pbl > $$4 pbl,;,"&=pbl ; > &=pbl ; ",Verleger normiert ; GND-ID,"Unter den 700 1 _ denjenigen, der im Subfield 4 den Wert pbl hat. Wenn dort kein Wert dann aus 710 Ind1=2 die Körperschaft." -700 1 _ $$a ; 700 1 _ $$0,$$4 prt ; $$4 prt,,"&=prt",Drucker ; GND-ID,"Unter den 700 1 _ denjenigen, der im Subfield 4 den Wert prt hat." -264 _ 1 $$c>264 _ 3 $$c,,,"",Erscheinungsjahr, -300 _ _ $$a,,,"",Kollation, -300 _ _ $$b,,,"",Illustrationen, -300 _ _ $$c,,,"",Format, -500 _ _ $$a,,," ;;;",Allgemeine Anmerkungen, -041 1 _ $$a>041 _ _ $$a,,;,">",Sprache, -041 1 _ $$h,,,"",Originalsprache, -024 7 _ $$a,,,"",Standardnummer, -700 1 _ $$a ; 700 1 _ $$0,$$4 aut ; $$4 aut,;," ; ",Weitere Verfasser ; GND-ID, -700 1 _ $$a ; 700 1 _ $$0,$$4 edt ; $$4 edt,;,"&= edt ; ",Herausgeber ; GND-ID,"Alle weiteren 700 1 _ , die nicht prt oder pbl." -700 1 _ $$a ; 700 1 _ $$0,$$4 trl ; $$4 trl,;,"&= trl ; ",Übersetzer ; GND-ID, -700 1 _ $$a ; 700 1 _ $$0,$$4 ctb ; $$4 ctb,;,"&= ctb ; ",Beiträger ; GND-ID, -700 1 _ $$a ; 700 1 _ $$0 ; 700 1 _ $$a ; 700 1 _ $$0,$$4 oth ; $$4 oth ; $$4 com ; $$4 com,;,"&= oth OR com ; ",Weitere Beteiligte ; GND-ID, -700 1 _ $$a ; 700 1 _ $$0 ; 700 1 _ $$a ; 700 1 _ $$0,$$4 egr ; $$4 egr ; $$4 ill ; $$4 ill,;,"&=egr OR ill ; ",Illustratoren ; GND-ID, -700 1 _ $$a > 710 2 _ $$a;700 1 _ $$0 > 710 2 _ $$0,$$4 dte > $$4 dte;$$4 dte > $$4 dte,;,"&=dte ; ",Widmungsempfänger ; GND-ID, -655 _ 7 $$a,,;,"",Art des Inhalts, -505 8 0 $$a >505 8 0 $$t,,;,"OR",Inhalt,Enthält in der Regel längeren Text -700 1 2 $$a ;700 1 2 $$t > 730 0 2 $$t,,;," ; OR ",Enthaltenen Werke,Verfasser des Enthaltenen Werkes ; Werktitel -689 * * $$a,,;,"",Schlagworte,Alle Schlagworte aus 689 getrennt durch ';' -AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j,,,"&&",Signatur,"Signatur aus Subfield $$d, danach ohne Trennzeichen, nur mit Leerzeichen $$i, danach ohne Trennzeichen, nur mit Leerzeichen $$j" +"pattern","selector","collector","label" +"009",,,"Systemnummer" +"001",,,"MMS-ID" +"856 4 0 $$u",,,"Volltext" +"100 1 _ $$a ; 100 1 _ $$0",,,"Verfasser ; GND-ID" +"240 1 0 $$a> 240 1 0 $$t >130 0 _ $$a > 130 0 _ $$t",,,"Werktitel" +"490 1 _ $$a ; 490 1 _ $$v",,,"Reihentitel ; Bandzählung" +"245 0 0 $$a > 245 1 0 $$a ; 245 0 0 $$b > 245 1 0 $$b",,,"Haupttitel ; Titelzusatz" +"245 0 0 $$n > 245 1 0 $$n ; 245 0 0 $$p > 245 1 0 $$p",,,"Bandzählung ; Titel des Bandes" +"250 0 0 $$a",,,"Ausgabe" +"264 _ 1 $$a",,,"Verlagsort" +"751 _ _ $$a ;751 _ _ $$0",,";","Verlagsort normiert ; GND-ID" +"264 _ 1 $$b > 264 _ 3 $$b",,,"Verleger und Drucker" +"700 1 _ $$a > 710 2 _ $$a;700 1 _ $$0 > 710 2 _ $$0","$$4 pbl > $$4 pbl;$$4 pbl > $$4 pbl",";","Verleger normiert ; GND-ID" +"700 1 _ $$a ; 700 1 _ $$0","$$4 prt ; $$4 prt",,"Drucker ; GND-ID" +"264 _ 1 $$c>264 _ 3 $$c",,,"Erscheinungsjahr" +"300 _ _ $$a",,,"Kollation" +"300 _ _ $$b",,,"Illustrationen" +"300 _ _ $$c",,,"Format" +"500 _ _ $$a",,,"Allgemeine Anmerkungen" +"041 1 _ $$a>041 _ _ $$a",,";","Sprache" +"041 1 _ $$h",,,"Originalsprache" +"024 7 _ $$a",,,"Standardnummer" +"700 1 _ $$a ; 700 1 _ $$0","$$4 aut ; $$4 aut",";","Weitere Verfasser ; GND-ID" +"700 1 _ $$a ; 700 1 _ $$0","$$4 edt ; $$4 edt",";","Herausgeber ; GND-ID" +"700 1 _ $$a ; 700 1 _ $$0","$$4 trl ; $$4 trl",";","Übersetzer ; GND-ID" +"700 1 _ $$a ; 700 1 _ $$0","$$4 ctb ; $$4 ctb",";","Beiträger ; GND-ID" +"700 1 _ $$a ; 700 1 _ $$0 ; 700 1 _ $$a ; 700 1 _ $$0","$$4 oth ; $$4 oth ; $$4 com ; $$4 com",";","Weitere Beteiligte ; GND-ID" +"700 1 _ $$a ; 700 1 _ $$0 ; 700 1 _ $$a ; 700 1 _ $$0","$$4 egr ; $$4 egr ; $$4 ill ; $$4 ill",";","Illustratoren ; GND-ID" +"700 1 _ $$a > 710 2 _ $$a;700 1 _ $$0 > 710 2 _ $$0","$$4 dte > $$4 dte;$$4 dte > $$4 dte",";","Widmungsempfänger ; GND-ID" +"655 _ 7 $$a",,";","Art des Inhalts" +"505 8 0 $$a >505 8 0 $$t",,";","Inhalt" +"700 1 2 $$a ;700 1 2 $$t > 730 0 2 $$t",,";","Enthaltenen Werke" +"689 * * $$a",,";","Schlagworte" +"AVA _ _ $$d ; AVA _ _ $$i ; AVA _ _ $$j",,,"Signatur"