From 12ce6eefb05286f4af64fd8e4afdc370619d1426 Mon Sep 17 00:00:00 2001
From: onb1259 <onb1259@onb.ac.at>
Date: Wed, 15 Nov 2023 13:15:48 +0100
Subject: [PATCH] updated iiif_utils to newest version

---
 iiif_utils.py | 59 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 47 insertions(+), 12 deletions(-)

diff --git a/iiif_utils.py b/iiif_utils.py
index b739cb4..70e7860 100644
--- a/iiif_utils.py
+++ b/iiif_utils.py
@@ -2,6 +2,7 @@ import requests
 import json
 import threading
 import os
+from time import sleep
 
 
 def get_json_by_url(url):
@@ -23,32 +24,66 @@ def get_imgurls_from_manifesturl(url):
 
 def get_imgurls_from_collectionurl(url):
     res = []
-    i = 0
     for manifest_url in get_manifesturls_from_collectionurl(url):
         tmp = get_imgurls_from_manifesturl(manifest_url)
-        print(i)
-        i += 1
         for el in tmp:
             res.append(el)
     return res
 
+def downlaod_image(img_url, img_path):
+    img_bytes = requests.get(img_url).content
+    with open(img_path, '+wb') as img_file:
+        img_file.write(img_bytes)
+
 def download_images(img_urls, img_paths):
     for u, n in zip(img_urls, img_paths):
-        img_bytes = requests.get(u).content
-        with open(n, '+wb') as img_file:
-            img_file.write(img_bytes)
+        downlaod_image(u, n)
 
-def download_images_multithreded(img_urls, img_paths, nb_threads=5):
-    chunk_idcs = list(range(0, len(img_urls), int(len(img_urls)/nb_threads))) + [len(img_urls)]
+def download_images_multithreded(img_urls, img_paths, nb_threads=10):
+    chunk_idcs = list(range(0, len(img_urls), int(len(img_urls)/nb_threads) if int(len(img_urls)/nb_threads)!=0 else 1)) + [len(img_urls)]
     chunks_urls = [img_urls[i:j] for i,j in zip(chunk_idcs[:-1], chunk_idcs[1:])]
     chunks_paths = [img_paths[i:j] for i,j in zip(chunk_idcs[:-1], chunk_idcs[1:])]
+    i = 0
     for urls, paths in zip(chunks_urls, chunks_paths):
-        threading.Thread(target=download_images, args=[urls, paths]).start()
+        threading.Thread(target=download_images, args=[urls, paths], name=f'DownloadThread-{i}').start()
+        i += 1
+    # wait for all download threads to finish
+    while any(['DownloadThread' in e.name for e in threading.enumerate()]):
+        sleep(1)
 
 def create_paths_from_iiifurls(img_urls, base_path=''):
-    res = [el.split('images/')[1] for el in img_urls]
-    res = ['_'.join(el.split('/')[:-4]) for el in res]
-    res = [el+'.'+u.split('.')[-1] for el, u in zip(res, img_urls)]
+    res = [e.replace(':', '-').replace('/', '_') for e in img_urls]
     res = [os.path.join(base_path, el) for el in res]
     return res
 
+def add_annotations(list_of_boundingboxes, manifest):
+    for i, _ in enumerate(manifest['sequences'][0]['canvases']):
+        id = manifest['sequences'][0]['canvases'][i]['images'][0]['@id']
+        bounding_box = list_of_boundingboxes[i]
+        # if bounding box is None, skip
+        if not bounding_box:
+            continue
+        annotation = {
+            "@context": "http://iiif.io/api/presentation/2/context.json",
+            "@type": "sc:AnnotationList",
+            "@id": f"{id}/annotations",
+            "resources": [
+            {
+                "@type": "oa:Annotation",
+                "motivation": "sc:painting",
+                "resource": {
+                    "@type": "oa:SpecificResource",
+                    "selector": {
+                    "@type": "oa:FragmentSelector",
+                    "value": f"xywh={bounding_box[0]},{bounding_box[1]},{bounding_box[0]+bounding_box[2]},{bounding_box[2]+bounding_box[3]}"
+                    },
+                    "style": "rect",
+                    "label": "Detected figure"
+               },
+                "on": f"{id}"
+            }
+            ]
+        }
+        manifest['sequences'][0]['canvases'][i]['otherContent'].append(annotation)
+    return manifest
+
-- 
GitLab