From 361264aef739a0c69708534b2741bd802415e8de Mon Sep 17 00:00:00 2001 From: Georg Petz Date: Wed, 7 Apr 2021 12:06:25 +0200 Subject: [PATCH] add zip --- txtDownloader.ipynb | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/txtDownloader.ipynb b/txtDownloader.ipynb index 9d60c5f..865ad53 100644 --- a/txtDownloader.ipynb +++ b/txtDownloader.ipynb @@ -2,16 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "import urllib.request, json, shutil" + "import urllib.request, json, shutil, os\n", + "from zipfile import ZipFile" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -51,6 +52,7 @@ "data = json.loads(response.read())\n", "\n", "i=0\n", + "zipObj = ZipFile('downloadTXT.zip', 'w')\n", "\n", "for page in data[\"sequences\"][0][\"canvases\"]:\n", " txt_url = page[\"otherContent\"][0][\"resources\"][0][\"resource\"][\"@id\"]\n", @@ -58,7 +60,10 @@ " print(txt_url)\n", " output_file = str(i)+\".txt\"\n", " with urllib.request.urlopen(txt_url) as response, open(output_file, 'wb') as out_file:\n", - " shutil.copyfileobj(response, out_file)" + " shutil.copyfileobj(response, out_file)\n", + " zipObj.write(os.path.abspath(output_file))\n", + " \n", + "zipObj.close()" ] }, { -- GitLab