diff --git a/txtDownloader.ipynb b/txtDownloader.ipynb index 865ad535a5f4b37f77ddcad65df14bedb92488dc..da12ebebac18aec7ceea5b61f3718036eaed3429 100644 --- a/txtDownloader.ipynb +++ b/txtDownloader.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -10,40 +10,76 @@ "from zipfile import ZipFile" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "e.g.:+Z196807705" + ] + }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+Z196807705\n" + ] + } + ], "source": [ - "url=\"https://iiif.onb.ac.at/presentation/ABO/+Z196807705/manifest/\"" + "barcode=input() " ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://iiif.onb.ac.at/presentation/ABO/+Z196807705/manifest/'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url=\"https://iiif.onb.ac.at/presentation/ABO/\"+barcode+\"/manifest/\"\n", + "url" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000001.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000002.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000003.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000004.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000005.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000006.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000007.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000008.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000009.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000010.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000011.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000012.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000013.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000014.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000015.txt\n", - "https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000016.txt\n" + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000001.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000002.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000003.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000004.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000005.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000006.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000007.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000008.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000009.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000010.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000011.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000012.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000013.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000014.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000015.txt\n", + "downloading https://iiif.onb.ac.at/presentation/ABO/Z196807705/resource/00000016.txt\n" ] } ], @@ -52,17 +88,24 @@ "data = json.loads(response.read())\n", "\n", "i=0\n", - "zipObj = ZipFile('downloadTXT.zip', 'w')\n", + "zipObj = ZipFile(barcode + \".zip\", \"w\")\n", + "all_txt = open(\"all.txt\",\"wb\")\n", "\n", "for page in data[\"sequences\"][0][\"canvases\"]:\n", " txt_url = page[\"otherContent\"][0][\"resources\"][0][\"resource\"][\"@id\"]\n", " i=i+1\n", - " print(txt_url)\n", - " output_file = str(i)+\".txt\"\n", - " with urllib.request.urlopen(txt_url) as response, open(output_file, 'wb') as out_file:\n", + " print(\"downloading \" + txt_url)\n", + " output_file = str(i) + \".txt\"\n", + " with urllib.request.urlopen(txt_url) as response, open(output_file, \"wb\") as out_file:\n", " shutil.copyfileobj(response, out_file)\n", - " zipObj.write(os.path.abspath(output_file))\n", - " \n", + " zipObj.write(os.path.abspath(output_file),output_file)\n", + " txt_page = open(output_file, \"rb\")\n", + " shutil.copyfileobj(txt_page, all_txt)\n", + " txt_page.close\n", + " os.remove(output_file)\n", + "zipObj.write(\"all.txt\")\n", + "all_txt.close()\n", + "os.remove(\"all.txt\")\n", "zipObj.close()" ] },