From f01e549e6189f1d2db3f0d2ab7cee565ffef65de Mon Sep 17 00:00:00 2001 From: onb1259 <onb1259@onb.ac.at> Date: Mon, 13 Nov 2023 11:27:39 +0100 Subject: [PATCH] added functionality to add iiif annotations to manifest --- extract_figures.ipynb | 13 ++++++++++--- iiif_utils.py | 43 +++++++++++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/extract_figures.ipynb b/extract_figures.ipynb index 13bdbd8..b305145 100644 --- a/extract_figures.ipynb +++ b/extract_figures.ipynb @@ -21,12 +21,13 @@ "outputs": [], "source": [ "import os, shutil\n", + "import json\n", "from zipapp import zipfile\n", "from PIL import Image\n", "from ultralytics import YOLO\n", "from IPython.display import display, FileLink, HTML\n", "\n", - "from iiif_utils import get_imgurls_from_manifesturl, create_paths_from_iiifurls, download_images_multithreded\n", + "from iiif_utils import get_imgurls_from_manifesturl, create_paths_from_iiifurls, download_images_multithreded, add_annotations, get_json_by_url\n", "\n", "# Url of iiif manifest\n", "input_manifest = 'https://iiif.onb.ac.at/presentation/ABO/+Z105572305/manifest'" @@ -69,17 +70,23 @@ "os.mkdir(RESULTS_DIR)\n", "model = YOLO('model_extract_figures.pt')\n", "counter = 0\n", - "for path in imgpaths:\n", + "list_of_bbs = [None] * len(imgpaths)\n", + "for j, path in enumerate(imgpaths):\n", " result = model.predict(path, verbose=False)\n", " img = Image.open(path)\n", " for i, bb in enumerate(result[0].boxes.xyxy.tolist()):\n", " #print(f'Figure detected in: {path}')\n", " counter += 1\n", " bb = [int(e) for e in bb]\n", + " list_of_bbs[j] = bb\n", " img_cropped = img.crop(bb)\n", " path_cropped = os.path.join(RESULTS_DIR, os.path.basename(path.rsplit('.', 1)[0] + '_crop' + str(i) + '.' + path.rsplit('.', 1)[1]))\n", " img_cropped.save(path_cropped)\n", - "print(f'Total number of extracted figures: {counter}')" + "print(f'Total number of extracted figures: {counter}')\n", + "\n", + "manifest_annotated = add_annotations(list_of_bbs, get_json_by_url(input_manifest))\n", + "with open(os.path.join(RESULTS_DIR, 'annotated_manifest.json'), 'w+') as f:\n", + " json.dump(manifest_annotated, f, indent=2)" ] }, { diff --git a/iiif_utils.py b/iiif_utils.py index 723c2e8..69418c9 100644 --- a/iiif_utils.py +++ b/iiif_utils.py @@ -56,14 +56,37 @@ def create_paths_from_iiifurls(img_urls, base_path=''): res = [os.path.join(base_path, el) for el in res] return res +def add_annotations(list_of_boundingboxes, manifest): + for i, _ in enumerate(manifest['sequences'][0]['canvases']): + id = manifest['sequences'][0]['canvases'][i]['images'][0]['@id'] + bounding_box = list_of_boundingboxes[i] + # if bounding box is None, skip + if not bounding_box: + continue + annotation = { + "@context": "http://iiif.io/api/presentation/2/context.json", + "@type": "sc:AnnotationList", + "@id": f"{id}/annotations", + "resources": [ + { + "@type": "oa:Annotation", + "motivation": "sc:painting", + "resource": { + "@type": "oa:Choice", + "default": { + "@type": "oa:SpecificResource", + "selector": { + "@type": "oa:FragmentSelector", + "value": f"xywh={bounding_box[0]},{bounding_box[1]},{bounding_box[0]+bounding_box[2]},{bounding_box[2]+bounding_box[3]}" + }, + "style": "rect", + "label": "Detected figure" + } + }, + "on": f"{id}" + } + ] + } + manifest['sequences'][0]['canvases'][i]['otherContent'].append(annotation) + return manifest -# collection_url = 'https://iiif.onb.ac.at/presentation/collection/labs_botanical_illustrations' -# urls = get_imgurls_from_collectionurl(collection_url) -# paths = create_paths_from_iiifurls(urls, 'downloads') -# download_images_multithreded(urls, paths) - -# collection_url = 'https://iiif.onb.ac.at/presentation/collection/apz_1841' -# manifest_url = 'https://iiif.onb.ac.at/presentation/ANNO/apz18411229/manifest/' -# urls = get_imgurls_from_manifesturl(manifest_url) -# paths = create_paths_from_iiifurls(urls, 'downloads') -# download_images_multithreded(urls, paths) -- GitLab