From 55fb76e336a82665532645bd2a0f0043a9519b2c Mon Sep 17 00:00:00 2001 From: Georg Petz Date: Fri, 25 Jun 2021 09:10:27 +0200 Subject: [PATCH] update --- .gitignore | 2 ++ txtDownloader.ipynb | 43 +++++++++++++++---------------------------- 2 files changed, 17 insertions(+), 28 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8f38060 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ + +venv/ diff --git a/txtDownloader.ipynb b/txtDownloader.ipynb index 7039e21..f010c36 100644 --- a/txtDownloader.ipynb +++ b/txtDownloader.ipynb @@ -15,50 +15,39 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "e.g.:+Z196807705" + "Insert a Barcode from the ABO project into the input box, e.g.:+Z196807705." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "+Z196807705\n" + "+Z196807705\n", + "URL of the IIIF Manifest: https://iiif.onb.ac.at/presentation/ABO/+Z196807705/manifest/\n" ] } ], "source": [ - "barcode=input() " + "barcode=input()\n", + "url=\"https://iiif.onb.ac.at/presentation/ABO/\"+barcode+\"/manifest/\"\n", + "print (\"URL of the IIIF Manifest: \" + url)" ] }, { - "cell_type": "code", - "execution_count": 3, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'https://iiif.onb.ac.at/presentation/ABO/+Z196807705/manifest/'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "url=\"https://iiif.onb.ac.at/presentation/ABO/\"+barcode+\"/manifest/\"\n", - "url" + "Click on {Barcode}.zip to actually download the zip to your computer." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -89,10 +78,10 @@ "+Z196807705.zip
" ], "text/plain": [ - "/home/georg/Projekte/sacha/sacha-txt-downloader/+Z196807705.zip" + "C:\\Projekte\\sacha-txt-downloader\\+Z196807705.zip" ] }, - "execution_count": 4, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -101,15 +90,13 @@ "response = urllib.request.urlopen(url)\n", "data = json.loads(response.read())\n", "\n", - "i=0\n", "zipObj = ZipFile(barcode + \".zip\", \"w\")\n", "all_txt = open(\"all.txt\",\"wb\")\n", "\n", "for page in data[\"sequences\"][0][\"canvases\"]:\n", " txt_url = page[\"otherContent\"][0][\"resources\"][0][\"resource\"][\"@id\"]\n", - " i=i+1\n", " print(\"downloading \" + txt_url)\n", - " output_file = str(i) + \".txt\"\n", + " output_file = str(txt_url.split('/')[-1])\n", " with urllib.request.urlopen(txt_url) as response, open(output_file, \"wb\") as out_file:\n", " shutil.copyfileobj(response, out_file)\n", " zipObj.write(os.path.abspath(output_file),output_file)\n", @@ -148,9 +135,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} -- GitLab