diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..343235841252bc9b2be18c2dcaa1fbd468e4279e --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/git/https%3A%2F%2Flabs.onb.ac.at%2Fgitlab%2Fgeorgp%2Fsacha-txt-downloader/HEAD?filepath=txtDownloader.ipynb) \ No newline at end of file diff --git a/txtDownloader.ipynb b/txtDownloader.ipynb index 7a2b9916f74f39bb8361fc0efda522f242b20abe..9d60c5f692064b618c5e9c03953f27b363768c75 100644 --- a/txtDownloader.ipynb +++ b/txtDownloader.ipynb @@ -2,16 +2,16 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import urllib.request, json" + "import urllib.request, json, shutil" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -50,9 +50,15 @@ "response = urllib.request.urlopen(url)\n", "data = json.loads(response.read())\n", "\n", + "i=0\n", + "\n", "for page in data[\"sequences\"][0][\"canvases\"]:\n", " txt_url = page[\"otherContent\"][0][\"resources\"][0][\"resource\"][\"@id\"]\n", - " print(txt_url)" + " i=i+1\n", + " print(txt_url)\n", + " output_file = str(i)+\".txt\"\n", + " with urllib.request.urlopen(txt_url) as response, open(output_file, 'wb') as out_file:\n", + " shutil.copyfileobj(response, out_file)" ] }, { @@ -79,7 +85,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.8.5" } }, "nbformat": 4,