diff --git a/txtDownloader.ipynb b/txtDownloader.ipynb index 9d60c5f692064b618c5e9c03953f27b363768c75..865ad535a5f4b37f77ddcad65df14bedb92488dc 100644 --- a/txtDownloader.ipynb +++ b/txtDownloader.ipynb @@ -2,16 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "import urllib.request, json, shutil" + "import urllib.request, json, shutil, os\n", + "from zipfile import ZipFile" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -51,6 +52,7 @@ "data = json.loads(response.read())\n", "\n", "i=0\n", + "zipObj = ZipFile('downloadTXT.zip', 'w')\n", "\n", "for page in data[\"sequences\"][0][\"canvases\"]:\n", " txt_url = page[\"otherContent\"][0][\"resources\"][0][\"resource\"][\"@id\"]\n", @@ -58,7 +60,10 @@ " print(txt_url)\n", " output_file = str(i)+\".txt\"\n", " with urllib.request.urlopen(txt_url) as response, open(output_file, 'wb') as out_file:\n", - " shutil.copyfileobj(response, out_file)" + " shutil.copyfileobj(response, out_file)\n", + " zipObj.write(os.path.abspath(output_file))\n", + " \n", + "zipObj.close()" ] }, {