From 12ce6eefb05286f4af64fd8e4afdc370619d1426 Mon Sep 17 00:00:00 2001 From: onb1259 <onb1259@onb.ac.at> Date: Wed, 15 Nov 2023 13:15:48 +0100 Subject: [PATCH] updated iiif_utils to newest version --- iiif_utils.py | 59 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/iiif_utils.py b/iiif_utils.py index b739cb4..70e7860 100644 --- a/iiif_utils.py +++ b/iiif_utils.py @@ -2,6 +2,7 @@ import requests import json import threading import os +from time import sleep def get_json_by_url(url): @@ -23,32 +24,66 @@ def get_imgurls_from_manifesturl(url): def get_imgurls_from_collectionurl(url): res = [] - i = 0 for manifest_url in get_manifesturls_from_collectionurl(url): tmp = get_imgurls_from_manifesturl(manifest_url) - print(i) - i += 1 for el in tmp: res.append(el) return res +def downlaod_image(img_url, img_path): + img_bytes = requests.get(img_url).content + with open(img_path, '+wb') as img_file: + img_file.write(img_bytes) + def download_images(img_urls, img_paths): for u, n in zip(img_urls, img_paths): - img_bytes = requests.get(u).content - with open(n, '+wb') as img_file: - img_file.write(img_bytes) + downlaod_image(u, n) -def download_images_multithreded(img_urls, img_paths, nb_threads=5): - chunk_idcs = list(range(0, len(img_urls), int(len(img_urls)/nb_threads))) + [len(img_urls)] +def download_images_multithreded(img_urls, img_paths, nb_threads=10): + chunk_idcs = list(range(0, len(img_urls), int(len(img_urls)/nb_threads) if int(len(img_urls)/nb_threads)!=0 else 1)) + [len(img_urls)] chunks_urls = [img_urls[i:j] for i,j in zip(chunk_idcs[:-1], chunk_idcs[1:])] chunks_paths = [img_paths[i:j] for i,j in zip(chunk_idcs[:-1], chunk_idcs[1:])] + i = 0 for urls, paths in zip(chunks_urls, chunks_paths): - threading.Thread(target=download_images, args=[urls, paths]).start() + threading.Thread(target=download_images, args=[urls, paths], name=f'DownloadThread-{i}').start() + i += 1 + # wait for all download threads to finish + while any(['DownloadThread' in e.name for e in threading.enumerate()]): + sleep(1) def create_paths_from_iiifurls(img_urls, base_path=''): - res = [el.split('images/')[1] for el in img_urls] - res = ['_'.join(el.split('/')[:-4]) for el in res] - res = [el+'.'+u.split('.')[-1] for el, u in zip(res, img_urls)] + res = [e.replace(':', '-').replace('/', '_') for e in img_urls] res = [os.path.join(base_path, el) for el in res] return res +def add_annotations(list_of_boundingboxes, manifest): + for i, _ in enumerate(manifest['sequences'][0]['canvases']): + id = manifest['sequences'][0]['canvases'][i]['images'][0]['@id'] + bounding_box = list_of_boundingboxes[i] + # if bounding box is None, skip + if not bounding_box: + continue + annotation = { + "@context": "http://iiif.io/api/presentation/2/context.json", + "@type": "sc:AnnotationList", + "@id": f"{id}/annotations", + "resources": [ + { + "@type": "oa:Annotation", + "motivation": "sc:painting", + "resource": { + "@type": "oa:SpecificResource", + "selector": { + "@type": "oa:FragmentSelector", + "value": f"xywh={bounding_box[0]},{bounding_box[1]},{bounding_box[0]+bounding_box[2]},{bounding_box[2]+bounding_box[3]}" + }, + "style": "rect", + "label": "Detected figure" + }, + "on": f"{id}" + } + ] + } + manifest['sequences'][0]['canvases'][i]['otherContent'].append(annotation) + return manifest + -- GitLab