From b1ce712a157c6dee039a2c82bbc4e87e8f18569a Mon Sep 17 00:00:00 2001 From: Georg Petz Date: Tue, 6 Apr 2021 15:04:00 +0200 Subject: [PATCH] add mybinder --- README.md | 1 + txtDownloader.ipynb | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..3432358 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/git/https%3A%2F%2Flabs.onb.ac.at%2Fgitlab%2Fgeorgp%2Fsacha-txt-downloader/HEAD?filepath=txtDownloader.ipynb) \ No newline at end of file diff --git a/txtDownloader.ipynb b/txtDownloader.ipynb index 7a2b991..9d60c5f 100644 --- a/txtDownloader.ipynb +++ b/txtDownloader.ipynb @@ -2,16 +2,16 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import urllib.request, json" + "import urllib.request, json, shutil" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -50,9 +50,15 @@ "response = urllib.request.urlopen(url)\n", "data = json.loads(response.read())\n", "\n", + "i=0\n", + "\n", "for page in data[\"sequences\"][0][\"canvases\"]:\n", " txt_url = page[\"otherContent\"][0][\"resources\"][0][\"resource\"][\"@id\"]\n", - " print(txt_url)" + " i=i+1\n", + " print(txt_url)\n", + " output_file = str(i)+\".txt\"\n", + " with urllib.request.urlopen(txt_url) as response, open(output_file, 'wb') as out_file:\n", + " shutil.copyfileobj(response, out_file)" ] }, { @@ -79,7 +85,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.8.5" } }, "nbformat": 4, -- GitLab