"print(f'Total number of downloaded pages: {len(os.listdir(IMAGES_DIR))}')"
]
]
},
},
{
{
...
@@ -96,7 +97,7 @@
...
@@ -96,7 +97,7 @@
"source": [
"source": [
"# Create a zip file with extracted figures and show them\n",
"# Create a zip file with extracted figures and show them\n",
"\n",
"\n",
"RESULT_ZIP = 'exatrcted_figures.zip'\n",
"RESULT_ZIP = 'extracted_figures.zip'\n",
"with zipfile.ZipFile(RESULT_ZIP, 'w') as myzip:\n",
"with zipfile.ZipFile(RESULT_ZIP, 'w') as myzip:\n",
" for f in os.listdir(RESULTS_DIR): \n",
" for f in os.listdir(RESULTS_DIR): \n",
" myzip.write(os.path.join(RESULTS_DIR, f))\n",
" myzip.write(os.path.join(RESULTS_DIR, f))\n",
...
...
%% Cell type:markdown id:82ab6471 tags:
%% Cell type:markdown id:82ab6471 tags:
# Extract figures
# Extract figures
This notebook uses a YOLOv8 model trained on five annotated ABO books (1700 pages) to extract figures from them.
This notebook uses a YOLOv8 model trained on five annotated ABO books (1700 pages) to extract figures from them.
After giving the iiif manifest url of the book from which you want to extract images, the book pages get dwonloaded, the figure recognition model gets applied and the resulting bounding boxes get extracted and can be donwloaded as a zip file.
After giving the iiif manifest url of the book from which you want to extract images, the book pages get dwonloaded, the figure recognition model gets applied and the resulting bounding boxes get extracted and can be donwloaded as a zip file.