diff --git a/3 - Images and Text.ipynb b/3 - Images and Text.ipynb index 0d9db2736e96643e37629af3dfdce7aec3a0a851..8e2895f63b7da5356c695370782eda7bbd4c4345 100644 --- a/3 - Images and Text.ipynb +++ b/3 - Images and Text.ipynb @@ -205,35 +205,30 @@ } }, "source": [ - "* The ONB Labs viewers use IIIF: [https://labs.onb.ac.at/en/dataset/akon/](https://labs.onb.ac.at/en/dataset/akon/)\n", - "\n", - "**TODO**: Available viewers, available data sources (europeana, ?), applications" + "* The ONB Labs viewers use IIIF: [https://labs.onb.ac.at/en/dataset/akon/](https://labs.onb.ac.at/en/dataset/akon/)\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "subslide" + "slide_type": "fragment" } }, - "source": [] + "source": [ + "* Awesome IIIF-related resources : [https://github.com/IIIF/awesome-iiif](https://github.com/IIIF/awesome-iiif)" + ] }, { "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "subslide" + "slide_type": "fragment" } }, - "source": [] + "source": [ + "* [https://showcase.iiif.io/](https://showcase.iiif.io/)" + ] }, { "cell_type": "markdown", @@ -269,6 +264,75 @@ " " ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Overview OCR formats" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "* ALTO (Analyzed Layout and Text Object)\n", + " * OCR (Optical Character Recognition) data representation format\n", + " * XML Schema\n", + " * [https://github.com/altoxml](https://github.com/altoxml)\n", + " * [https://www.loc.gov/standards/alto/](https://www.loc.gov/standards/alto/)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "* 3 ALTO main elements\n", + " * ``\n", + " * metadata and general settings (e.g. measurement units) about the ALTO file\n", + " * ``\n", + " * text and paragraph styles\n", + " * ``\n", + " * content information\n", + " * subdivided into `` elements" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "![ALTO page element](./media/alto.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "* hOCR\n", + " * alternative to ALTO\n", + " * based on XHTML\n", + " * not used in the ONB Labs" + ] + }, { "cell_type": "code", "execution_count": null, @@ -294,7 +358,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.7" } }, "nbformat": 4, diff --git a/media/alto.png b/media/alto.png new file mode 100644 index 0000000000000000000000000000000000000000..12480d2f78bb4bf2a3e89719786d8c36b6121abc Binary files /dev/null and b/media/alto.png differ