diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..8f380606104f82e87c1de7ec02cba63e2d2a11d7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ + +venv/ diff --git a/txtDownloader.ipynb b/txtDownloader.ipynb index 7039e217b6cc3f6481a4908105dd80f28f2120cb..f010c36a735425442205f2d32f46b4cec1e7895f 100644 --- a/txtDownloader.ipynb +++ b/txtDownloader.ipynb @@ -15,50 +15,39 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "e.g.:+Z196807705" + "Insert a Barcode from the ABO project into the input box, e.g.:+Z196807705." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "+Z196807705\n" + "+Z196807705\n", + "URL of the IIIF Manifest: https://iiif.onb.ac.at/presentation/ABO/+Z196807705/manifest/\n" ] } ], "source": [ - "barcode=input() " + "barcode=input()\n", + "url=\"https://iiif.onb.ac.at/presentation/ABO/\"+barcode+\"/manifest/\"\n", + "print (\"URL of the IIIF Manifest: \" + url)" ] }, { - "cell_type": "code", - "execution_count": 3, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'https://iiif.onb.ac.at/presentation/ABO/+Z196807705/manifest/'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "url=\"https://iiif.onb.ac.at/presentation/ABO/\"+barcode+\"/manifest/\"\n", - "url" + "Click on {Barcode}.zip to actually download the zip to your computer." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -89,10 +78,10 @@ "+Z196807705.zip
" ], "text/plain": [ - "/home/georg/Projekte/sacha/sacha-txt-downloader/+Z196807705.zip" + "C:\\Projekte\\sacha-txt-downloader\\+Z196807705.zip" ] }, - "execution_count": 4, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -101,15 +90,13 @@ "response = urllib.request.urlopen(url)\n", "data = json.loads(response.read())\n", "\n", - "i=0\n", "zipObj = ZipFile(barcode + \".zip\", \"w\")\n", "all_txt = open(\"all.txt\",\"wb\")\n", "\n", "for page in data[\"sequences\"][0][\"canvases\"]:\n", " txt_url = page[\"otherContent\"][0][\"resources\"][0][\"resource\"][\"@id\"]\n", - " i=i+1\n", " print(\"downloading \" + txt_url)\n", - " output_file = str(i) + \".txt\"\n", + " output_file = str(txt_url.split('/')[-1])\n", " with urllib.request.urlopen(txt_url) as response, open(output_file, \"wb\") as out_file:\n", " shutil.copyfileobj(response, out_file)\n", " zipObj.write(os.path.abspath(output_file),output_file)\n", @@ -148,9 +135,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +}