From 008f3147a5e5cfae5a38c79461661bc3e4ebd216 Mon Sep 17 00:00:00 2001 From: Stefan Karner Date: Wed, 24 Apr 2019 10:18:01 +0200 Subject: [PATCH] Add Notebooks, script and requirements.txt --- LOC Colors - Data Management.ipynb | 1509 ++++++++++++++++++++++++++++ LOC Colors - Production.ipynb | 787 +++++++++++++++ create_swatches.py | 166 +++ requirements.txt | 5 + 4 files changed, 2467 insertions(+) create mode 100644 LOC Colors - Data Management.ipynb create mode 100644 LOC Colors - Production.ipynb create mode 100644 create_swatches.py create mode 100644 requirements.txt diff --git a/LOC Colors - Data Management.ipynb b/LOC Colors - Data Management.ipynb new file mode 100644 index 0000000..c4ba067 --- /dev/null +++ b/LOC Colors - Data Management.ipynb @@ -0,0 +1,1509 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LOC Colors - Data Management" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Export data as minimal JSON files - only the essentials to create the swatches in the browser*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use the created swatches for [https://labs.onb.ac.at/en/topic/akon-swatches/](https://labs.onb.ac.at/en/topic/akon-swatches/), I have to create a JSON file looking like this:\n", + "\n", + "```json\n", + "[[\"AK111_461\", [\"#f4e6cd\", \"#cac4b2\", \"#7e8077\", \"#3e4139\", \"#2f3431\", \"#000304\"], \"Nonza\", \"gelaufen 1903\"],\n", + "[\"AK111_072\", [\"#e2d7c1\", \"#a19c8f\", \"#504e42\", \"#494a44\", \"#010500\", \"#393c39\"], \"Kirchberg am Walde\", \"gelaufen 1914\"],\n", + "[\"AK111_077\", [\"#454234\", \"#3e3b1f\", \"#7f7e77\", \"#a9b8be\", \"#3b4347\", \"#425a6b\"], \"Kirchberg am Wechsel\", \"gelaufen 1913\"]]\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The first part (the id and the colors) are part of the created swatches, the second part (a name and an approximate date) are part of the metadata to download here: [https://labs.onb.ac.at/gitlab/labs-team/raw-metadata/raw/master/akon_postcards_public_domain.csv.bz2?inline=false](https://labs.onb.ac.at/gitlab/labs-team/raw-metadata/raw/master/akon_postcards_public_domain.csv.bz2?inline=false)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read created swatches" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('historic_postcards_color_swatches.csv.bz2', compression='bz2')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0akon_idimage_linkhex_colorshtml
2191421914AK036_405https://iiif.onb.ac.at/images/AKON/AK036_405/4...['#f2e3c1', '#e6dec6', '#8e8a7a', '#7b7864', '...<a href=\"https://iiif.onb.ac.at/images/AKON/AK...
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 akon_id \\\n", + "21914 21914 AK036_405 \n", + "\n", + " image_link \\\n", + "21914 https://iiif.onb.ac.at/images/AKON/AK036_405/4... \n", + "\n", + " hex_colors \\\n", + "21914 ['#f2e3c1', '#e6dec6', '#8e8a7a', '#7b7864', '... \n", + "\n", + " html \n", + "21914 \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idhex_colors
14980AK010_595['#cfbfa4', '#62583b', '#c8c7b6', '#4f5144', '...
7474AK084_243['#aca693', '#414028', '#444537', '#5c5f57', '...
23730AK043_578['#4e502c', '#444735', '#51554a', '#dae1d1', '...
30352AK085_096['#b5aa9d', '#f3e7d7', '#756d62', '#211a0f', '...
22389AK038_067['#f2e9cb', '#989384', '#545245', '#fcf7db', '...
\n", + "" + ], + "text/plain": [ + " akon_id hex_colors\n", + "14980 AK010_595 ['#cfbfa4', '#62583b', '#c8c7b6', '#4f5144', '...\n", + "7474 AK084_243 ['#aca693', '#414028', '#444537', '#5c5f57', '...\n", + "23730 AK043_578 ['#4e502c', '#444735', '#51554a', '#dae1d1', '...\n", + "30352 AK085_096 ['#b5aa9d', '#f3e7d7', '#756d62', '#211a0f', '...\n", + "22389 AK038_067 ['#f2e9cb', '#989384', '#545245', '#fcf7db', '..." + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id_and_hex_colors.sample(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parse Color Array" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to properly export the color array as a JSON array later, convert the data representation slightly." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "id_and_hex_colors['colors'] = id_and_hex_colors['hex_colors'].apply(\n", + " lambda c: json.loads(c.replace(\"'\", '\"'))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idhex_colorscolors
1291AK116_455['#f4ece0', '#d4d2ce', '#716d5c', '#65665e', '...[#f4ece0, #d4d2ce, #716d5c, #65665e, #b2b4bb, ...
\n", + "
" + ], + "text/plain": [ + " akon_id hex_colors \\\n", + "1291 AK116_455 ['#f4ece0', '#d4d2ce', '#716d5c', '#65665e', '... \n", + "\n", + " colors \n", + "1291 [#f4ece0, #d4d2ce, #716d5c, #65665e, #b2b4bb, ... " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id_and_hex_colors.sample()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Do you see the subtle difference? The entry in the colors column is now an array with strings _without_ the single quotes `'`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "id_and_colors = id_and_hex_colors[['akon_id', 'colors']].copy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add Metadata From Original Records" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next up: Combining the id and the color array with names and dates. Read the metadata dump directly from the link found on [https://labs.onb.ac.at/en/dataset/akon/](https://labs.onb.ac.at/en/dataset/akon/):" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (13) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " interactivity=interactivity, compiler=compiler, result=result)\n" + ] + } + ], + "source": [ + "original = pd.read_csv('https://labs.onb.ac.at/gitlab/labs-team/raw-metadata/raw/master/akon_postcards_public_domain.csv.bz2?inline=false', compression='bz2')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idnamedate
16251AK016_087Neulengbachvor 1907
23864AK044_401Gloriette1907
33292AK085_517Milanovor 1905
13875AK008_070Schloß Schönbrunn1906
12223AK004_040Pürgg1909
4129AK125_097Altausseegelaufen 1901
23756AK044_080Radstädter Tauern1907
26479AK069_067Attersee1906
14251AK109_329Josefsthalvor 1907
28518AK063_083Gaußig1908
\n", + "
" + ], + "text/plain": [ + " akon_id name date\n", + "16251 AK016_087 Neulengbach vor 1907\n", + "23864 AK044_401 Gloriette 1907\n", + "33292 AK085_517 Milano vor 1905\n", + "13875 AK008_070 Schloß Schönbrunn 1906\n", + "12223 AK004_040 Pürgg 1909\n", + "4129 AK125_097 Altaussee gelaufen 1901\n", + "23756 AK044_080 Radstädter Tauern 1907\n", + "26479 AK069_067 Attersee 1906\n", + "14251 AK109_329 Josefsthal vor 1907\n", + "28518 AK063_083 Gaußig 1908" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "original[['akon_id', 'name', 'date']].sample(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are the columns needed." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "original_info = original[['akon_id', 'name', 'date']].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idnamedate
24871AK048_377Reichenau an der Rax1925
4191AK054_543Aflenz Kurortvor 1905
\n", + "
" + ], + "text/plain": [ + " akon_id name date\n", + "24871 AK048_377 Reichenau an der Rax 1925\n", + "4191 AK054_543 Aflenz Kurort vor 1905" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "original_info.sample(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pandas offers a handy function for merging two dataframes _not on the index_, but on a shared column:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "colors_and_info = pd.merge(id_and_colors, original_info, on='akon_id')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idcolorsnamedate
16356AK016_310[#5f5a57, #9c938c, #4d453f, #cabbab, #3c3b39, ...Kapellen1908
1117AK116_129[#dcc9ab, #cec0a7, #a8a290, #48473a, #6c6c62, ...Garstengelaufen 1902
\n", + "
" + ], + "text/plain": [ + " akon_id colors name \\\n", + "16356 AK016_310 [#5f5a57, #9c938c, #4d453f, #cabbab, #3c3b39, ... Kapellen \n", + "1117 AK116_129 [#dcc9ab, #cec0a7, #a8a290, #48473a, #6c6c62, ... Garsten \n", + "\n", + " date \n", + "16356 1908 \n", + "1117 gelaufen 1902 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "colors_and_info.sample(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That's exactly what's needed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save to JSON-File" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pandas can export the data exactly in the target format:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "colors_and_info.to_json('historic_postcards__id_colors_name_date.json', orient='values')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extract Subset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That's almost all. For the preview on [https://labs.onb.ac.at/en/dataset/akon/](https://labs.onb.ac.at/en/dataset/akon/) I need a subset of 100 swatches and save them:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "id_and_colors_100 = colors_and_info.iloc[:100]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idcolorsnamedate
99AK111_207[#403e2c, #f9f8eb, #7e7f6b, #35362d, #32342f, ...Klosterneuburggelaufen 1908
34AK111_293[#e6d8bf, #c7c1b1, #4a4c3c, #aeafa5, #464740, ...Komotauvor 1905
86AK111_184[#f7edd6, #d1cbb8, #f6f0db, #2d2a1b, #9e9f93, ...Klausenburggelaufen 1904
3AK111_026[#e2cba6, #9e8e73, #574c39, #3c311a, #4c473b, ...Kierling1922
39AK111_072[#e5d9c2, #c0baac, #928e81, #4c493e, #484943, ...Kirchberg am Waldegelaufen 1914
\n", + "
" + ], + "text/plain": [ + " akon_id colors \\\n", + "99 AK111_207 [#403e2c, #f9f8eb, #7e7f6b, #35362d, #32342f, ... \n", + "34 AK111_293 [#e6d8bf, #c7c1b1, #4a4c3c, #aeafa5, #464740, ... \n", + "86 AK111_184 [#f7edd6, #d1cbb8, #f6f0db, #2d2a1b, #9e9f93, ... \n", + "3 AK111_026 [#e2cba6, #9e8e73, #574c39, #3c311a, #4c473b, ... \n", + "39 AK111_072 [#e5d9c2, #c0baac, #928e81, #4c493e, #484943, ... \n", + "\n", + " name date \n", + "99 Klosterneuburg gelaufen 1908 \n", + "34 Komotau vor 1905 \n", + "86 Klausenburg gelaufen 1904 \n", + "3 Kierling 1922 \n", + "39 Kirchberg am Walde gelaufen 1914 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id_and_colors_100.sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "id_and_colors_100.to_json('historic_postcards__id_colors_name_date__100.json', orient='values')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And done!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below there's a fast-forward, compact version of what's been done above. No need to do all this again." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compact (with other data source)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (13) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " interactivity=interactivity, compiler=compiler, result=result)\n" + ] + } + ], + "source": [ + "colors_hsv_clip = pd.read_csv('akon_with_hsv_clip50_color_swatches.csv.bz2', compression='bz2')\n", + "raw_data = pd.read_csv('akon_postcards_public_domain_1925.csv.bz2', compression='bz2')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## View Data Format" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0akon_idimage_linkhex_colorshtml
1191411914AK003_285https://iiif.onb.ac.at/images/AKON/AK003_285/2...['#050300', '#eee2c9', '#b6af9e', '#fdf7da', '...<a href=\"https://iiif.onb.ac.at/images/AKON/AK...
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 akon_id \\\n", + "11914 11914 AK003_285 \n", + "\n", + " image_link \\\n", + "11914 https://iiif.onb.ac.at/images/AKON/AK003_285/2... \n", + "\n", + " hex_colors \\\n", + "11914 ['#050300', '#eee2c9', '#b6af9e', '#fdf7da', '... \n", + "\n", + " html \n", + "11914
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0akon_ididaltitudebuildingcitycolorcommentmountainother...feature_classfeature_codegeoname_idlatitudelongitudenamecountry_idadmin_name_1admin_code_1geo
2343523435AK042_53325265434.0NaNFrohnleitenFalseNaNNaNNaN...PPPLA32779202.047.2666715.31667FrohnleitenATNaNNaN47.26667, 15.31667
\n", + "

1 rows × 30 columns

\n", + "" + ], + "text/plain": [ + " Unnamed: 0 akon_id id altitude building city color \\\n", + "23435 23435 AK042_533 25265 434.0 NaN Frohnleiten False \n", + "\n", + " comment mountain other ... feature_class feature_code \\\n", + "23435 NaN NaN NaN ... P PPLA3 \n", + "\n", + " geoname_id latitude longitude name country_id admin_name_1 \\\n", + "23435 2779202.0 47.26667 15.31667 Frohnleiten AT NaN \n", + "\n", + " admin_code_1 geo \n", + "23435 NaN 47.26667, 15.31667 \n", + "\n", + "[1 rows x 30 columns]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.sample()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Combine Data" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "combined_data = pd.merge(colors_hsv_clip[['akon_id', 'hex_colors', 'image_link']],\n", + " raw_data[['akon_id', 'name', 'date']],\n", + " on='akon_id')" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idhex_colorsimage_linknamedate
23217AK041_595['#ada896', '#fcf6d5', '#767467', '#484739', '...https://iiif.onb.ac.at/images/AKON/AK041_595/5...Ötscher1909
\n", + "
" + ], + "text/plain": [ + " akon_id hex_colors \\\n", + "23217 AK041_595 ['#ada896', '#fcf6d5', '#767467', '#484739', '... \n", + "\n", + " image_link name date \n", + "23217 https://iiif.onb.ac.at/images/AKON/AK041_595/5... Ötscher 1909 " + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_data.sample()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Flatten hex_colors" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "combined_data['hex_colors_list'] = combined_data['hex_colors'].apply(lambda c: json.loads(c.replace(\"'\", '\"')))" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idhex_colorsimage_linknamedatehex_colors_list
15996AK014_589['#020100', '#fbfae8', '#88887e', '#64645a', '...https://iiif.onb.ac.at/images/AKON/AK014_589/5...Maria Taferl1909[#020100, #fbfae8, #88887e, #64645a, #4d4f49, ...
\n", + "
" + ], + "text/plain": [ + " akon_id hex_colors \\\n", + "15996 AK014_589 ['#020100', '#fbfae8', '#88887e', '#64645a', '... \n", + "\n", + " image_link name date \\\n", + "15996 https://iiif.onb.ac.at/images/AKON/AK014_589/5... Maria Taferl 1909 \n", + "\n", + " hex_colors_list \n", + "15996 [#020100, #fbfae8, #88887e, #64645a, #4d4f49, ... " + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_data.sample()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sanitize and Reorder" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "combined_data = combined_data.drop(columns=['hex_colors']).copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idimage_linknamedatehex_colors_list
19590AK028_177https://iiif.onb.ac.at/images/AKON/AK028_177/1...Frohnleiten1906[#020100, #a8a599, #7b7a6f, #fbf9e5, #4b4b40, ...
\n", + "
" + ], + "text/plain": [ + " akon_id image_link \\\n", + "19590 AK028_177 https://iiif.onb.ac.at/images/AKON/AK028_177/1... \n", + "\n", + " name date hex_colors_list \n", + "19590 Frohnleiten 1906 [#020100, #a8a599, #7b7a6f, #fbf9e5, #4b4b40, ... " + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_data.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "combined_data = combined_data.rename(columns={'hex_colors_list': 'hex_colors'})" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idimage_linknamedatehex_colors
33304AK087_042https://iiif.onb.ac.at/images/AKON/AK087_042/0...Abcoudevor 1905[#f8eacd, #aca391, #5b5747, #6e6a5e, #525148, ...
\n", + "
" + ], + "text/plain": [ + " akon_id image_link name \\\n", + "33304 AK087_042 https://iiif.onb.ac.at/images/AKON/AK087_042/0... Abcoude \n", + "\n", + " date hex_colors \n", + "33304 vor 1905 [#f8eacd, #aca391, #5b5747, #6e6a5e, #525148, ... " + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_data.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
akon_idhex_colorsimage_linknamedate
25575AK031_287[#444626, #caccbc, #4a4d41, #48504f, #5b7073, ...https://iiif.onb.ac.at/images/AKON/AK031_287/2...Ebensee1907
\n", + "
" + ], + "text/plain": [ + " akon_id hex_colors \\\n", + "25575 AK031_287 [#444626, #caccbc, #4a4d41, #48504f, #5b7073, ... \n", + "\n", + " image_link name date \n", + "25575 https://iiif.onb.ac.at/images/AKON/AK031_287/2... Ebensee 1907 " + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_data = combined_data[['akon_id', 'hex_colors', 'image_link', 'name', 'date']]\n", + "combined_data.sample()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample and Write" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "combined_data.iloc[:100].to_json('swatches_100.json', orient='values')" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "combined_data.to_json('swatches_all.json', orient='values')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Alternate Data Format Without Link" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "sans_link = combined_data[['akon_id', 'hex_colors', 'name', 'date']]" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "sans_link.iloc[:100].to_json('swatches_100_nolink.json', orient='values')" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "sans_link.to_json('swatches_all_nolink.json', orient='values')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 User Default", + "language": "python", + "name": "python_3_user_default" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/LOC Colors - Production.ipynb b/LOC Colors - Production.ipynb new file mode 100644 index 0000000..c70a5b8 --- /dev/null +++ b/LOC Colors - Production.ipynb @@ -0,0 +1,787 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LOC Colors - Production" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Calculate color swatches for historic postcards*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "from sys import exit\n", + "from io import BytesIO\n", + "from colorsys import rgb_to_hsv, hsv_to_rgb\n", + "from scipy.cluster.vq import kmeans\n", + "from numpy import array" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DEFAULT_NUM_COLORS = 6\n", + "# default minimum and maximum values are used to clamp the color values to a specific range\n", + "# originally this was set to 170 and 200, but I'm running with 0 and 256 in order to \n", + "# not clamp the values. This can also be set as a parameter. \n", + "DEFAULT_MINV = 0\n", + "DEFAULT_MAXV = 256\n", + "\n", + "THUMB_SIZE = (200, 200)\n", + "SCALE = 256.0\n", + "\n", + "def down_scale(x):\n", + " return x / SCALE\n", + "\n", + "def up_scale(x):\n", + " return int(x * SCALE)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The original code by Laura Wrubel uses the [RGB (red, green, blue) color space](https://en.wikipedia.org/wiki/RGB_color_space) for most color computations.\n", + "\n", + "We're using the [HSV (hue, saturation, value) color space](https://en.wikipedia.org/wiki/HSL_and_HSV) for clustering in the hope of getting prettier and more colorful results for our historic postcards.\n", + "\n", + "That necessitates modifying some utility functions:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def clamp_hsv(color, min_v, max_v):\n", + " \"\"\"\n", + " Clamps a color such that the value (lightness) is between min_v and max_v.\n", + " \"\"\"\n", + " # use down_scale to convert color to value between 0-1 as expected by rgb_hsv\n", + " h, s, v = [down_scale(c) for c in color]\n", + " # also convert the min_v and max_v to values between 0-1\n", + " min_v, max_v = map(down_scale, (min_v, max_v))\n", + " # get the maximum of the min value and the color's value (therefore bumping it up if needed)\n", + " # then get the minimum of that number and the max_v (bumping the value down if needed)\n", + " v = min(max(min_v, v), max_v)\n", + " # apply upscale to get the h, s, v(which has been clamped) back to 0-255, return as tuple\n", + " return tuple(map(up_scale, (h, s, v)))\n", + "\n", + "\n", + "def order_by_hue_hsv(colors):\n", + " \"\"\"\n", + " Orders colors by hue.\n", + " \"\"\"\n", + " hsvs = [list(map(down_scale, color)) for color in colors]\n", + " hsvs.sort(key=lambda t: t[0])\n", + " return [tuple(map(up_scale, hsv_to_rgb(*hsv))) for hsv in hsvs]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All postcards are scanned in front of a black background, and many contain a lot of very dark colors. This function lets us experiment on removing all colors under a certain saturation or value threshold: colorless (grey-ish) and dark colors, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def clip_hsv(colors_hsv, min_s, min_v):\n", + " min_s = down_scale(min_s)\n", + " min_v = down_scale(min_v)\n", + " hsvs = [tuple(map(down_scale, color)) for color in colors_hsv]\n", + " hsvs = filter(lambda hsv: (hsv[1] >= min_s) and (hsv[2] >= min_v), hsvs)\n", + " return [tuple(map(up_scale, hsv)) for hsv in hsvs]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If a certain color appears more than once in the picture (when `count >= 1`), we add it more than once to the dataset. This way, large areas of a single color factor in heavily in the resulting clusters:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def get_colors(img, colorspace='HSV'):\n", + " \"\"\"\n", + " Returns a list of all the image's colors.\n", + " \"\"\"\n", + " w, h = img.size\n", + " # convert('RGB') converts the image's pixels info to RGB \n", + " # getcolors() returns an unsorted list of (count, pixel) values\n", + " # w * h ensures that maxcolors parameter is set so that each pixel could be unique\n", + " # there are three values returned in a list\n", + " # return [color for count, color in img.convert(colorspace).getcolors(w * h)]\n", + " return [single_color for count, color in img.convert(colorspace).getcolors(w * h) for single_color in [color] * count]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def hexify(rgb):\n", + " return \"#{0:02x}{1:02x}{2:02x}\".format(*rgb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For experimentation, allow scaling of the colorspace (effectively making clustering along scaled down axes more likely, and along scaled up axes less likely), clipping of pixels with low saturation and/or low value.\n", + "\n", + "The scaling is inverted after the clustering algorithm is executed." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def colorz(image_url, n=DEFAULT_NUM_COLORS, min_v=DEFAULT_MINV, max_v=DEFAULT_MAXV,\n", + " order_colors=True, coefficients=[1.0, 1.0, 1.0], clip_colors=False, min_clip_s=20, min_clip_v=20):\n", + " \"\"\"\n", + " Get the n most dominant colors of an image.\n", + " Clamps value to between min_v and max_v.\n", + "\n", + " Total number of colors returned is n, optionally ordered by hue.\n", + " Returns as a list of RGB triples.\n", + "\n", + " \"\"\"\n", + " try:\n", + " r = requests.get(image_url)\n", + " except ValueError:\n", + " print(\"{0} was not a valid URL.\".format(image_file))\n", + " exit(1)\n", + " img = Image.open(BytesIO(r.content))\n", + " img.thumbnail(THUMB_SIZE) # replace with a thumbnail with same aspect ratio, no larger than THUMB_SIZE\n", + "\n", + " obs = get_colors(img, 'HSV') # gets a list of RGB/HSV colors (e.g. (213, 191, 152)) for each pixel\n", + " # adjust the value of each color, if you've chosen to change minimum and maximum values\n", + " clamped = [clamp_hsv(color, min_v, max_v) for color in obs]\n", + " clipped = clip_hsv(clamped, min_clip_s, min_clip_v) if clip_colors else clamped\n", + " # turns the list of colors into a numpy array of floats, then applies scipy's k-means function\n", + " clusters, _ = kmeans(array(clipped).astype(float) * coefficients, n)\n", + " normalized_clusters = clusters / coefficients\n", + " colors = order_by_hue_hsv(normalized_clusters) if order_colors else normalized_clusters\n", + " \n", + " hex_colors = list(map(hexify, colors)) # turn RGB into hex colors for web\n", + " return hex_colors" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def draw_row_with_links(link_and_colors):\n", + " html = \"\"\n", + " url, colors = link_and_colors\n", + " for count, color in enumerate(colors):\n", + " square = ''.format(((count * 30)), 0, color)\n", + " html += square\n", + " full_html = '
{1}'.format(url, html, len(colors) * 30)\n", + " return full_html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at how different parameters affect how the swatches look." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll need an image link. Grab a link to a IIIF manifest from [https://labs.onb.ac.at/en/dataset/akon/](https://labs.onb.ac.at/en/dataset/akon/) or take the one provided down below." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@context': 'https://iiif.io/api/presentation/2/context.json',\n", + " '@id': 'https://iiif.onb.ac.at/presentation/AKON/AK115_479/manifest',\n", + " '@type': 'sc:Manifest',\n", + " 'label': 'Dresden',\n", + " 'metadata': [{'label': [{'@value': 'Id', '@language': 'en'},\n", + " {'@value': 'Id', '@language': 'ger'}],\n", + " 'value': 'AK115_479'},\n", + " {'label': [{'@value': 'Title', '@language': 'en'},\n", + " {'@value': 'Titel', '@language': 'ger'}],\n", + " 'value': 'Dresden'},\n", + " {'label': [{'@value': 'Place', '@language': 'en'},\n", + " {'@value': 'Ort', '@language': 'ger'}],\n", + " 'value': \"Dresden\"},\n", + " {'label': [{'@value': 'Year', '@language': 'en'},\n", + " {'@value': 'Jahr', '@language': 'ger'}],\n", + " 'value': '1906'},\n", + " {'label': [{'@value': 'Disseminator', '@language': 'en'},\n", + " {'@value': 'Anbieter', '@language': 'ger'}],\n", + " 'value': \"Ansichtskarten Online\"},\n", + " {'label': [{'@value': 'Physical Location', '@language': 'en'},\n", + " {'@value': 'Standort', '@language': 'ger'}],\n", + " 'value': 'Niederösterreichische Landesbibliothek 1672 - ÖNB'}],\n", + " 'description': 'Ministerium, Dampferlandeplatz',\n", + " 'viewingDirection': 'left-to-right',\n", + " 'viewingHint': 'paged',\n", + " 'license': 'http://creativecommons.org/publicdomain/mark/1.0/',\n", + " 'attribution': [{'@value': 'Austrian National Library', '@language': 'en'},\n", + " {'@value': 'Österreichische Nationalbibliothek', '@language': 'ger'}],\n", + " 'logo': 'https://iiif.onb.ac.at/logo/',\n", + " 'seeAlso': [{'@id': 'http://data.onb.ac.at/AKON/AK115_479',\n", + " 'format': 'text/html'},\n", + " {'@id': 'http://data.onb.ac.at/AKON/AK115_479.rdf',\n", + " 'format': 'application/rdf+xml'}],\n", + " 'sequences': [{'@context': 'https://iiif.io/api/presentation/2/context.json',\n", + " '@id': 'https://iiif.onb.ac.at/presentation/AKON/AK115_479/sequence/normal',\n", + " '@type': 'sc:Sequence',\n", + " 'startCanvas': 'https://iiif.onb.ac.at/presentation/AKON/AK115_479/canvas/479',\n", + " 'canvases': [{'@context': 'https://iiif.io/api/presentation/2/context.json',\n", + " '@id': 'https://iiif.onb.ac.at/presentation/AKON/AK115_479/canvas/479',\n", + " '@type': 'sc:Canvas',\n", + " 'label': 'Dresden',\n", + " 'height': 1462,\n", + " 'width': 2200,\n", + " 'images': [{'@context': 'https://iiif.io/api/presentation/2/context.json',\n", + " '@id': 'https://iiif.onb.ac.at/presentation/AKON/AK115_479/annotation/479',\n", + " '@type': 'oa:Annotation',\n", + " 'motivation': 'sc:painting',\n", + " 'resource': {'@id': 'https://iiif.onb.ac.at/images/AKON/AK115_479/479/full/full/0/native.jpg',\n", + " '@type': 'dctypes:Image',\n", + " 'height': 1462,\n", + " 'width': 2200,\n", + " 'format': 'image/jpeg',\n", + " 'service': {'@context': 'https://iiif.io/api/image/2/context.json',\n", + " '@id': 'https://iiif.onb.ac.at/images/AKON/AK115_479/479',\n", + " 'profile': 'https://iiif.io/api/image/2/level2.json'}},\n", + " 'on': 'https://iiif.onb.ac.at/presentation/AKON/AK115_479/canvas/479'}]}]}]}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = requests.get('https://iiif.onb.ac.at/presentation/AKON/AK115_479/manifest/')\n", + "r.json()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The image link can be found under `sequences[*].canvases[*].images[*].resource.@id`" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "image_link = 'https://iiif.onb.ac.at/images/AKON/AK115_479/479/full/!200,200/0/native.jpg'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's look at it. For our calculation, we'll use a much smaller variant of the image. Using the [IIIF Image API](https://iiif.io/api/image/2.1/), we can request an image of a certain size. To do this, we'll substitute the second `full` parameter by `!200,200`, meaning the resulting image should fit inside a 200x200 square." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/jpeg": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import IPython.display as ipd\n", + "\n", + "im_r = requests.get(image_link)\n", + "ipd.display(ipd.Image(im_r.content))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create the color swatches..." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['#beaea3', '#494440', '#211d11', '#313c3f', '#8e9da2', '#534a4e']" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols1 = colorz(image_link)\n", + "cols1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "...and display them as well:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "def display_colors(color_array, link):\n", + " html = draw_row_with_links((link, color_array))\n", + " ipd.display(ipd.HTML(html))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display_colors(cols1, image_link)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['#c4aa9e', '#5b5651', '#242411', '#8a9597', '#435158', '#5d5559']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols2 = colorz(image_link, coefficients=[1.0, 2.0, 0.6])\n", + "cols2" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display_colors(cols2, image_link)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['#c8aea2', '#4b4540', '#3d3422', '#4b5e62', '#829396', '#594f52']" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols3 = colorz(image_link, coefficients=[1.0, 2.0, 0.6], clip_colors=True, min_clip_v=30)\n", + "cols3" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display_colors(cols3, image_link)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['#c9ac9f', '#49413b', '#332c1c', '#8ba9b0', '#2f393d', '#5d6b72']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols5 = colorz(image_link, clip_colors=True, min_clip_s=30, min_clip_v=30)\n", + "cols5" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display_colors(cols5, image_link)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is getting tedious.\n", + "\n", + "Let's define a function that computes swatches and then displays the original image and the swatches side by side:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "def colorize_and_display(image_link=image_link, **kwargs):\n", + " cols = colorz(image_link, **kwargs)\n", + " display_colors(cols, image_link)\n", + " ipd.display(ipd.Image(requests.get(image_link).content))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/jpeg": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "colorize_and_display(clip_colors=True, min_clip_s=50, min_clip_v=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD//gAXR2VuZXJhdGVkIGJ5IElJUEltYWdl/9sAQwADAgIDAgIDAwMDBAMDBAUIBQUEBAUKBwcGCAwKDAwLCgsLDQ4SEA0OEQ4LCxAWEBETFBUVFQwPFxgWFBgSFBUU/9sAQwEDBAQFBAUJBQUJFA0LDRQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQU/8AAEQgAhADIAwEiAAIRAQMRAf/EAB8AAAEFAQEBAQEBAAAAAAAAAAABAgMEBQYHCAkKC//EALUQAAIBAwMCBAMFBQQEAAABfQECAwAEEQUSITFBBhNRYQcicRQygZGhCCNCscEVUtHwJDNicoIJChYXGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+v/EAB8BAAMBAQEBAQEBAQEAAAAAAAABAgMEBQYHCAkKC//EALURAAIBAgQEAwQHBQQEAAECdwABAgMRBAUhMQYSQVEHYXETIjKBCBRCkaGxwQkjM1LwFWJy0QoWJDThJfEXGBkaJicoKSo1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoKDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uLj5OXm5+jp6vLz9PX29/j5+v/aAAwDAQACEQMRAD8A+I/gb8B5PjHBq9x/bK6VFYPFGf8AR/NaQuHP95cY2+vevVW/YXjwCPGLgHru0wcf+Ra0v2ELI3Oh+LSCRi6txkf7j19TNpsjpENxKuC2PYd+ffFenQo05Q5pbng4rF14VnCD0R8jx/sJM8Zb/hMtuOmdN/8AttA/YSLsQvjLcD0P9mcEf9/favrP7Xa21yLSWeOKYqJDEWw3uc9P8+9WLfU9Lkgt3W8hMNwnmws5GGj/AL/0yBznrx9dPY0f6ZgsVjGr6/d/wD5H/wCGDn7+Mh7/APEt6fX97701f2Dbh2wni8Nzj/kG9f8AyLX2bMdLsZJVuLmK3dHMfluQuTgHAB9AR+dbGhQ2Gp2101pfWiMmd2JlBQKedxGcduR61y1pYWiryZ6GFhmOJklBad7HxTZ/8E8NXvWYReKxkAEZ0w5x/wB/aszf8E39eh358Tg7ccrphI6nv5vtX3vbaktlKjnV7WOCXeq8CNd4QglTyNwKk85GAPlyCSxvEUseZW1qxZi8cnyyfKUHl7gMsSPuy87iTv74GfG+v0n8END6WOUYlL36uvp/wD8/G/4J+X8RKyeL1TnGG0wjv/11/wA8Vcsf+CdGq36s0XiwEL3GmE/+1a+8ZNVW+0JEu9WsJr8SD/SSVAKh1YqMd9gYZx79RW9pXi3SIrFVt2tmiXGWDg7SQCOf+BDn3961eNotWUdTm/szGRbbneK7L/gHwba/8EvNeuYy6+Mogg6A6YQf1lFZ+o/8E2dV01wk3jJMeq6Z9f8Apr9P85r9BbLx1ZCMSrqNu0EjBIz5oIJ27sZHqpB/Gr0fivSb6dba5uLNmYfKDMuSORkc/wCyefY+lRHFRvdrQ1ngKyjeMn9x+bT/APBPm7jYq3jLn1OlH/47/nNNg/4J66hOwC+MYhxn/kGnPt/y0r9FdU0zSrwyLYywOUAZ4I5QxAPqByKzIrdLdtwjMZHykEH8s+n+etehTqUaiukeDU+s0pcs5fgfn5L/AME99QR3z40j29VI00nP/kWoj/wT/vkj3HxnFnp/yDT+Z/e/5/Ovv+fToJHZto3NwQePzH61nPYKvzOrFQMlW6jJz6EDn/PWumNOm+hzuvXW0j4Ug/4J8atICU8YQZ6cWDf/AByrMP8AwTk1y5CmLxbA2Rkn+zmwPx8yvvXQ0htnkM0W6NVwCTk9ent0rpf+Eit7KyLbQ8g5KA989a8ytiqNOfIonr4fDYmrDnlP8Efn3Yf8ExtavyF/4TmyiYEbg+nP8v1/eVduv+CWevQW7Sx+O7S4IGfLXTWBI/GSvuiPxDZrMPsw2Ix3H5jxz9OldBY6mvllWzkjAB6Z9fp/jXP7duXkep9VtT3uz8xJv2ANSt3dJfGFurDqP7Obr/38pg/YE1FQN3jG3BzzjT2P/s9foJ4g0FbzUJJE+Tcx3EHj/P8An63rHwNbi0bcWZyM7QcZxz6/SvXboRipSPml9dnNwj08j86v+GCdQMZb/hMbc9CB/Z7f/HKWX9gW/RW/4rG23YzzYHnr/wBNPb9a/QG68HlG2RIQV4Ge/b/P/wCus2bwzNA7FY259B+ufyrSMaMtjCVXFx3/ACPzU+LH7KuqfCzwfeeIJdag1C3tjGJI1gaNvnkWMdSe7iivq79szTZLb4BeJS6kbTZhiQf+fuHp+v50Vy1IqMmo7Hp4apOpTvPc82/4J5WUd1onjVmJV1ubXJPZdj+v419nWfh9ZC7SKCvQKR26j8TXyR/wTZtzP4c8bHA2rfWxBPYmNvx9P85r7QgcLHFJFLgygeWwOC+Rn8eMn6A1yPHOlJw7HespjiI+2vqzIt/BttcBwbO1d5VCM+wFioyQpOOQM/8Ajxq3B4JsjZkvZwOFJVFkjXAzlTgEcDk8e5Hc50YZ2W4Egbac4y3T6Zx25+mRVo6pbwxDzCFOMDIyep4wPx9ufxrN5iuhUcna66GRc+FLG5jIntbaVySczW4kwcYJHHXGMn6VqaF4X0u1tnhSxtosgb0SFQhJxnIXg5wOfYelZb6tJPKjR2FxJEpz542rweO7Z9O1X9H1AXmoNbt8rjhBImOQT05ORjFZvEUsRpURrHC18Gr0ZmhLo+jGOKGW0tJIYshIjbrtVeDwCOASAfqPah9K8PyRvu0u0aYYbzGgTdn7uc49AR/jVmXSWEYV0DjbjIP+ffpWZ/ZyyMUYM2SAVLbmGQDg8n/J9K2pwwy1itTmqYvGtWk9Bg8MaHfKsDaTp3ljIQrbou3r04yM89KrP4f0u2823WwslRztdBAu1wCCN3GDzg/h24q//ZSguUeQseWUNx17jFRX2nNJGSI33DBMjd/wHXvWeJowrJKNkzTCYyrQu5ttFi30zTkWUfZrM7pDK4MSnLngscjBOAAep49qz207TZdQMi6bZPJGQDL9nTjbuC4wvbe2PTcemaSLTZnAQAjA3fMep5/XgdvSn2thLbzFiQ2cMM9CM/0roo0VTjyy1OPE4udaXNFtL1Oi0Sw0+2R/JsoYZ5Budo4ghYknJOAOSaWTSrdsSALknHzcnqeD71RN2iptMbFlAOUOPrz3qtLqss74VvL2cheTgZ/z+lEKUk3yscq1KcVzK7KWpsbCUj7q9AAeBzz1/wA+1VWvIZF2mGNiTkA9/bFTaqZrtjuBZh1Yk9M8/r7elVm0wsr5Kq3IwD09v5/r9a9KDSVpPU8mpGTleCdh0JimRgECkjkhgG/T86ZcadFLAoRAsu7I2jGOO45/rUcNhLC8mXLMOmeeOT9ev+e9SNBiNfmC5bG48jNZVaNKfvM3o4itS91GctsYlVnkVWOACQcZ9B7cjH405dVntEk3oWZCcEDp09v8/wAr0lsTks+4jI+fuevr6/zqNvnjZCxDN90Nzgeo56/556VySpUrXR2wxNe+qKS6tMDJO6gyN2LZznkn3HH16Vuab4lkZEUocdM9c47jB5HSsF7NJPMUOuT/AA85OCev5Z+oqO3s5oYgc4xgYI5/zz/Ookk42epvRk4z5lod/aawJkAMYxnHK5xz6/hViK8t4wNwJyOcnnAzXI6XcPMypv2tjg9B0zjj0q7eSNbovlkDjOR+f9P/ANdciTi7Jnsv2dSHM0eN/t4Wls/7MHi64iXBElhhiPW9g4/Kiua/bY1K4m/Z08WwtI3l+bZbl4A/4/ISB1orugmlq7nkT5L+4rI+MPgJ8bvFvwi0fUk8NNZlb+7Bkju7YyhmRBjowPRu1e06N+3b4709NMuNa8P6dfacspjNxbRSxeeVjKMBIXZS2HBwOcjng186fCvTo9RsLy3MSyyXF1HbL5qFkRWHLHBBJGQdvQ45FeqXHg7X5rG4tpFtZLCOGSRYpI52RgIZXC48zA4WMZwOCPasJ0YTbbRpCvVhZRlofbfhb49eD/GHhKy1mDWrTT47s+WYLyeOKaGTnKMu4nIIPTIOOCcjNy7+K/gmziIXxfoguHYKS+pQ53cdcN6HpnvX502Pg3TddRlZjAUAyHtpZiAQTjdnkcNjv19qkn+HQs7ZY49Q8pJNkeHtnRVJKnG4ZHGV6eo7VxPAx3Umdyx9S1nFH6cyX8Wk2Ety13GLYxFw7SABEA659OM8fWr3hu6tbqyiu7WaK5WaJZBLEwdXU8ghhwR0Ix/WvgLwp4q8Z698KdQ8INrZ/s2yWGQSwA/aEt3VwbbexX5OAcHGAAu45Arc8J/EL4weCJdL8K6Rf2Emio/2K0uNRtUkljCjBQZbJC7cKCce+MY8+dFxbTkrnoQxCkk+V2Z98JqDW87C4uCqcsHkkxtA6gkk9Oua+dvgf+0RafET4u+LbBo70afqhhvtLiuxApj8qJYpt+yRiC+Ito5ztzx3+avjB4h+KPxLmvLO81hNU0Bf30lqClpD7AxFiSBsyAc4Pucnhm+GV5p0WnXst08t0sVtfxWkUalskb12EMRkbe3PtnGdqUIRi+aWrMKzlOS5Y6I/WOW7tmTEY8vB+deCA3Oc/wCfSla9RlIIcN689eef1r5L/Z3+P1zDPq+h+M710txJ9qsdWuVkMjhljzCVCdBlmHQjLAjpn2PWPjj4F02FJJfEMC+YVQD7PM3J74CH2z25pxVVapNo19nhrWdk/M9Da/mXMascMNvOMDn6fQd+v5AWa8BmkBjJIGAOvt1/z715zZ/GbwVqyrJbeI7YsrANuSSPJ54+ZR3/AJe1Xx8U/CocGPXbIk8n95gjrwOfb9frWqrVf5X9zMHhqD2a/A7xdMiiV2uZkO5QcsM5/Lt/WoYoI0jbJ/eDkhf/AK9cZ/ws7wxPk/29aHaMHZIOnTJ9u1WoviV4ZZhjWLRscgeZ0OT/AJ6//X09vV7P8SFg6HdfgdkpjRQPlLA4LHIz79Pr/nqyZVLO6ngDaFA4H8/1Pp7muTX4meF4EAk1mzQ/3d/+eMf59WH4n+Fndyuv2BGepmAJyff/AD+VRzVHrZ/czsjCgly3X4HQXFus0hYp8w5BzzUEVsVZFcxkDgYyBx3wP5cVkf8ACxvCwZS3iDTtrMNu+6UBiTgdTzyePXNSyfEPwy2wHX9NUMSoP2lMFh17/T9Kaq1IrUX1ejLVWNhLUkqF2g7QB1BA/wA/1p404oPmIYgDJxkVgz/FHwvatbrJ4h00G5cor/aoyuQrN8zZwv3SMk8kqByRVW8+MHg23ZRL4t0WMnIU/bo+3XndjuPzqHUbVhewgux0senL8wZFCnjGAB39fenNpMcSFUCkf3ioyfbiufb4keH1kjA1i0KOoMbLIpVsnGAQcZ9uv5GpF+Iugm7NuNa07f5K3AJukwYyAVYHPQhgQe4NTGq47jdCMuxcFj9jVhHGVYgbiR1/z/nvUIeVIipRiB0BP3fb8Kjt/iHoVzqF3bDVbBlto4XaSOcFcyMVCk9M/c7/AMYzjim3XxE8IxWJuZfEGlxxbC6s15GnAz0DNn1xxXSq6etjm+r20UtDwX9tOB1/Z18TOQ2fMst27Gc/a4hg/r/nNFSfto+IdK1T9mrxMbO7trgyPp7oYZFfIa7jK4I65CN+Cmiu2hPni2ebiKfsp8vkfBPw9l1ODQ7x9OhVpDcsDLLeLbquI07sy5PJ7966j/hNr20nMeoi5LNGqMtvfiRAQxywZXwdwyMDgYGOlefeGoLe40s+d5uRcvjy3CjGyP1U1uJY6eTjbckgZ5kU/wDslTKqoto5r2Ny28d3FtbTJC90HMY+aO5VASAQDgnnqMj2+uWzfEHUL6yNrOkrrG+6LzbtWG3gfNnvx/8AW6AYr6RpwZm3S57Dz1X+cdJY6NbXN0scEdzKXHCRzoSWyBgYQ/3un+Tj7fTcOY7Hw78V73w4t+Baw3K3YXDPdR7kKq47qeu8nnnp+O1pPxjK61pV5PbfZo7WSWQulxAxDOki7lBIHG5OD/tewrzoaPaOke+Kdpj0CXCoCMDGAYzxz61atPDlvcMVt9OuLhgu4r9sQ5HrgIDjGaxfsqju9zWFacVZM7a8+Kn2fVtTsLVxPpF27ot5NIvmrGDIIxtjBBIDY4zyTzjmp9Q+IMF5Bpy28k63VjZ28Tu9t8rMkQHXBJGQ59MD655C20BJC8Ufh5ZbiANJIBLKdqICX3qGyAMZJyMAHn0kubOa6+wrb6Baw5u0ijSKW4YNvyPLO6ZhyGzxg/nzLhTvaxvDFTW5oeKPHNnr+gafZaLeXEV5DLt+aMRptAIXDrkknuOnAx3FZ2ga1eWupR2+q3zalHBsJhZHkZMsr9CODgd+ea37j4e+ItOE0P8Awi8dl5JLSt9k3QwoCRvZ3yqrkH5mIHIweaq6VaBNWs7nULnSXgumd28n7O7MQjFiY4vmyQpxxyQAOcA7U3GCtG9jGrOVWXNJaltfF+pxak7WUIt7O9uDHbwxWpEkhycIpxljyOATnA+p3bzUddsR51xbajZ5kw73MAj2kjOMNgYx1JPH0qxo/wAI/E134bfxRYXMNnb6bIVjlt/Ojni8zEbYVU8wAhjxj7pJ6HFSaV8GPE+oTXl1Gl5fRyxNDM/2KdjICv3V85QcngbscZ4PWtY1JpPlT/r5mfsYyetx1hrXiK1i+0tpd3c27xtKssloyEovLFfuggA8+gPTmrdt8TZmuLNzpF8FdSVWJMhifXAJ4GOOMVoaH8PPiKGIl0nXZIJictcqkmQMleG3MDn2GBk8mvR9N+GXxDv57a5stHvtEmjt/skcsV7bRqkZwcMJQ7dQvK88cdMnRV6ito/6+YLCxtueW6Jq3iTTzql5rHh/xLdfaI82slvPJbRQMS5yQhAbkrx/sjtTPGvjmY2ejWd9oOp201vOstwJplc3EJ3AKTuySQSvOT09q+tvhx8OvG3huFxqfjG/mheFo0tI7iOYxljncJJIM7lOdpA7nduwMd/AniSxsoo4plvRHCkYkvJhvfHG59qqCx6nAGc9K1i9ee1mU0+XkvdH5wXHxP1y6u79oLrUowEOMsqmOMvkDIOD6DGPb30NM+It9e2SxWGkandSPGInlkCKd+OWzvxkkZxX1r8U/hp4v+IV/pV4bTQreOwdyQzkfaC7R/K+VOR8gAH+13rWPwthuJnab4deAME8hbQnIyfVPas5R9o9f6/EIvkWj/A+KvFGo6v4jhtEuPD+o2/kFtr+TkE9DnLY/wAM1zOpaPrasn+halBb7xtzCAuAf9lj2UevT2r7+m+DukI48v4f+ELeVGDK0CNGcjuCqjFc5r/gzTtH1nw9p9t4I8PfbNcuJ7eGZLiZChS2mnJbAycrER9cUJNLlshp6812fD1xcXupSzy251SSaZykCWqPJ845JAB3Y6846k1c0TRfEem6bqUDaRqrvd+V5Dm0lzGULFSpIyDkrgj0r6p8C/srax4T837Wum6nDI0jMPt9xau4ZiwJaJUOQccA44HHAx0V/wDAK7vbeaNFubAbdqi18T3zBSOm3zNw7DqD9MU3Hm3Q4u2vN+B8cXcXidGjt7u8vLUxMfKiuorgMjOmxiRsJywU5I689apXfibVNBsrawuNUhRop0uolmtpuSUZctuhO7OV+8COMivp3xh8DvEHg2ziutL8X6xYPd6na27r/aER3tPcxozcQKS/fJJ+73Gc+e/Gb4e67Hd6HNq3jZvE4SeaO3S4ubcfZ5DEX3MRGuAdi5GMkAgEHrNulvxJ5LptM8B8ZPrEui311ezC6juRG7Tujq8hMitu+ZRnkHn0H1FFdd44viPhPqNja6xY3UMbQ+daRef5hVZLdBIN0YG0MFGSRkycZxRWtNpoizW5hfBiz1SXSbmWzOlSQrelWiv0SSQNsj+ZULoSAM5wcfoa9ttvhjqF5ctAvinStMhHlKzS6JaZ5JByDK5PAGD/ABEnOMbj5r+zXpzXuj6pIjFHivFwwcqVzGoJGASe3FfRVlpAtI8oROzbQ0oPzEDHr9Dnj8PU5ObUm6OK0v4Y3dna3Ln4m6VcFrQo0dppRVoyyElkZIByMDBJ6Z4BIrF174aXWmrph0jXfEHi69X7QyhI4oQi7VLEvMu5hkL65ACgDdke76Vp0Fvp0QClspt3LgkjA5z3qOOO3stSjfzFULE0SeZ8vBIJ4Xr0HX29TTVO2wcyRwWmeDvF3xC8JWOk+K/Dtlbw3Mam8v4JIYbx3Xc6OwEWFywQYU9M+prjPC/7M2szXSxXukxqkaZnlvLx1TJdtvleU4JAAHU+/pX0XbeJLa3hIR89PmzkH9Pf9aVfGUSxIVbAZsZ7ev8AI1Spt9Be2jE860f9nDw9pNvDcT2am8xmSeDUbmFsmMqfuvjuR0HUjpxXM/EL4KWWqXyWmjXFlp1td3MVokc9obuRGZpMuskjDbhNuMfMMEbsV7Nc+KXkOd+Rzlm9Kzg9tcXNvOzITHIsn7xcbSO/4ZpqlboHtk+pzngH4A+I9I0uXT9Y8aXV1Yl9yW0Klo3jPBUiQtwRnj3PWujufgtpEMsZtY7a2jGMrBbRISRjBLbdx6DqTXYjX7dQIzJhmyBwe2M8fjU0t0hVmLKSTkoDyKfIuo1VfRmB4B+FPh7wCJ5tNtJmu5GYNdee3mKG2blByODsXoOdoz0FdrolpZ6YJXtbdYpJCHdwPmfPqT1796xn1BFWRj82eMjrnvii01dI1OE2/wCzg8fX9afJoR7R9TsLe8LuwUjb1BJ/+v6CoNP1PWo9a1I350hdDAjSwNtJMb0uADIZlYCMJ8w27ST69a51tQjt97tNjjG1n/i68j8qp+IPF0dnotu/2mL7W948ZjeTLIvlxk8ZyOoP40uSzH7XRnpg11FUN5gYNjPJ/P61HJ4lwgVGIBABVenr1614VD8VrJkkxe28+wjcY3LDqByV9yB9SB3q2nj5bzTp42ZCsodPlZ/Q7gO+eDnuMH0rb2Whh7fW1zvfiD4juLPw1+6uHhk/tHTE3xAM21tQt1YYKkcqzDp9MVtP4nRWYySqrk+uC3pwa8P8ffEFZrARkR+YdS059xyOl/A2B+VY3if41R6ZJJtZZHyWBjBAx6d/84pezS3F7bse9eKvihpvgrQLnVtWvILPTbdkRp5g4jUtIEUEqrEDLqOFOM54GSPGPEvx70zxB4v8MXGna74XVfD13LfvJca8iicSW9zblUO3J4lDdM9cgDBPinin4uav4q02+tlaC2gkVSiun+rZTuVs4yWyPoCB715F4L8B6JdGzn1TWfItYZy0mnyq/mGNWXhdqdX5AGR0ySODWE7L4TelJy+I/Q/wB8bNJ8faXFLbajp0twLb7VPa291JJJAGwCrbokAwWxj27cgO8c/F/T/AvhTVNcv3+0RWKoWWIjezMwRV9Mliv+NfLWmfEDR9B1yE+EtIfT7X7Ilm5ubgvuUSO5YEHAJ3KOOP3a4A74HxJ/aClS9uPDV5odtrllKIiwkbcGLKGXaMfeBPFKLjytvcJNudo6o1/Fn7TVp8VZrRriXxPY3lnOs9lY6TbW4tZJEYsjzu8zE7Sc5UDr3wMLo8llrmhWsvifxZ40sYDHNFd2FreyXsbxqw2FWDybmJZARtH3XGMYJ53wxpUl1Etv8A8InD4ejQ4kuNQ1k29weMENFCFJbGOCqjr07dNa6TcWMok0m0s7hdkjb77UruYYBB/wBWu4KenGT16+ufObKEt7HnH7Q/jJvEtjpsVkmrJptrbC0e41DTntXvGWTf5khOAzZYDgDhV44FFaHxe0q9uPhvf6he/wBhzTRvbOZ7JJGm+Z1H3ztAzuBIwQQBjFFXDYc7p6jP2YZQuh62rMFU3a5/79j/AAr3SG6kY7SXZVPIYng5HOOa+ef2drnydD1hSwXN0p+9jPyL7f5/n7BDqKAIFY89NrcdDzz9D1//AFd1Ne6eVVb53Y7r7ZK0JUxCROwxu57ce3+PWq324R/OsCRlh2A5HPJ4+tc+t9JJFguFQ9xgkD3rH1G9LucXDBzxx1P0x71bSMlKXc6ifVTcmUIcqByNpwP8+v60y0uJ7mN/LDA9iy4Jz64/z1rldNllViPMPQMdpPI68joK0Y3nMThZ2IBwVcAEEH0oVgd+51FtdzRx53sFHJBP+RnpT7Od7hw3IJOflHH09vSueN9cBFUTMSGwCqrz/wCO+tNs9R86WRIr2S4ljG94oNshRfUhVyB74x79MFhp2PQotSVCAu7kZ2jqB+f0/Sllv23MgfDBscj35zXnlvrv2mNZLeW5vUfIDxmNk4zzkDkcHkdeDTrO61jVrgw2gbzFJLMzKV4xwPlDZ+YcFR9aLBzdzuA15G05WXKhsoCoG3sQcduh9ck9qpTf2jc6ZJNBcahbsgzIw8hfKxn1U55AHpgk5yAK898J+JtS8Wa14j022uJbltOEe1NPBfcHJUghVVlKkBSCeDkduczRvDOo+JtSS1sbCfR7p742hk1K2UygpGWO8xtv3EuDhyW4JOdxJzlKyNqcHJ2sWF+M+ixaIjw2Or6vcwwqbgsJApG9UJAZmjyc7gvHHGRVfSPiHd+LfC+vW0FuNN1i3txfWyRov/HyFMKKylG4/wBZwGOCvQEEnzbxDfeJPAnibWfDPifTrN7uwMcWI45djqzq0W0KyExFfmB+VsYzzwca98b6vrKq8l2lmjcQxWcYREH90NtaRzuZzgv3B9jwSqVO6R6EKNNbps9S1jw4/ifTrK7stV8QeIdW+3obi9tkZIIkCxeWCA+Ad5Dbgg9NuQDWxph8R+C/CMVheW+qieRHkFxLbPcxl5CT+8mQfKwYkAEZyoyuOa8f8AeLZ/Cvitdb1iC41rVNNWNrKK6m3LBcKrK5k3HKldzkAfxgZ6V9a/D74oeI/id4R1HUbTTU0qWCSOGE3Vyojv5W3EpDghmUBVztbPzjsDnNSqxlpK5rKnRmrWseJ+K/EV+J9Jh1u1h0DTLya3ma7mmZnhRblc/uyOSuAxUn7rZ9a8zufEv264cQie9dAVRYxhQeAd24dMgsB1+bB6cfSHx98A+HZpvD954j102t5a/ZLKS3mGJVMlwpmcQnLlVQP1JJyvJIOfGfFnxW0vTbrXdH8F6Rbvps1ywsp7i1CNAu7jYoPzYCxbS+eVJxkmtm5y1kzCNOnHRIgRL6G0829tWt53AKxJGvOVB7uOcEnHX8Kv6b8PtQvre0v72KCw0+4xIj3JVCYycb1QOSeh6jnHtXQeEdDvIvDNt4k8T3xBdQYLaUgPdNgkFic5XJz+f40/EfiQz3U9zdXUIuZgqSr52SoyMLj+EcAcAHAxnBxSeisi4wT1eiLrN4V0aUAu+olOA20QJu544DE/mOore074gQ28Mr6dpVpZvs2vOljGJXHozbMn/PWvL5L+bUZoLPTtOur64lOI0t1aRnJViAqgZPKntnGOlek+E/2d/iD4s2LdQQ+F7EgFpLtg0wHPRVy2enDFRUqDe5onGOxQufFKQXkf2xxG9wCUR9qjgZOB07+lRah8RYrSFxYabc6jPDBMrRQKAoJClmOOQoAJJx/XHtHhr9l3wd4c1qG41Sa+8S3SI/7u5UpBuJHJ2HPb7rMc88HFdj4ifwtaaDqehadZAedHJbTxaZbp8hZSGDMfkRsN0Yg9ODxkUErlc0mfKXxgfxNrfwy8YSajpLaLZ6Rc2UU8V0rLLI8rxOiDK4yoZSw4wCvriiu3/ad1/U9Q+Dmuwz2ttZxNc2sky27F2kcSQqGdiByQqdBn5RnjglaRMpXvqeIfAWQxaFqpBwDdgEEcH5F4r0saqsUwj3OuMAhx8w9/YdO9eUfBJimiang7c3OC4zn7i8Y9Px/wDr96b3b85kDKWznbtwP5ev510Rk0jyqkb1GdEdYAQruGFAHJA59P8AOKovqiSSPGsm8kDdyTj/APX71z13q5YEK2SCSFBB7/n71lnV5F+YhVJPViwA9z6/lV85monajVCSyCUF/wDe+ZeO4/8AretWopNU8pJIbY3NntfzHjYM6lSuAEAyR83J6D5c/eFeetq9y8YVJAvQZCjj35H9KuW8HheSCQat4x1jSo5C0k0b2fnR+ayMGwUQ8EhV5OcH/Z+YdS3Q2hS5t2dF4i8QppF+Ybq0vNRuXRP3USBlDZYncScAkegJOM4rkJviFObZI7fTrWF1mBCs/msrK4OMDAz8vIIBA/OrWpy/DtNNuXsfG2q6tqqKi2tvfWHl22FIyHIUEYG7GAc9AB36Hwr4Bt/EOl3DTXC2yiBJZBsMSglnUPFI+GU/u2BVkYHcMDcVIxnVl0R1U6EdnucDfePtblliWTWCiqclIGVABtxyUCsSTnO4/jWFea7LNcSlru42SqMxW0j/ALwgfxM7M345PWvolP2O7xbUR6bajVtT89oZItRumtoYYgr+XMSnJLMqfKA45PXBr2bUfgf4B8Daro94l/p/hfTLSTzpbS6ihuJdRIbIBklywXHBWPGc81zupNnTGlCKPirwVoHjDWbqUeFtH1O8kJ2MbRJZPLycEswIwegye3Fd18P/AIffEiPQP7a0a/8AtOkfb90a6a/2tpJiq8rHbludijL5CgE/PyK+ktY+JPw18P6lPqem299qmoOpj3QySR26LiMbFVmCop8pD8inkZ78+ea7+1o+jaTnQrHT9EsIj5UI0+ETFD2C8KnT/ZNTZ7stSitEcf8AFz9kf4j6h4gs1tnu/GQFpvm1KJQixvvIMaKTvb5VDbjnIfAHBzxvw/8ABfhv4c+IbC78Ta3azX1nqESPb2bCWWJg3B2EgEKy/MCykeueKqfFX4+eL/FWbGLV557GeM+bMZSI3OcFVJO3gDPy/wB70rynS5BZXCXEt4GKMGCwru5HbdwB+GaaYaM9U8X/ALS/jWC41zw9pmtx3mgmaW3tb6TSrO3v3gDkKfNjT5WwBypPHQjisXwv8UtbFhNb+d4h1Z3Iknt5tem8iYAYAkWFUl2YHQSr9etUV8TLPpFxpNrpFkkcxUCVog8vByDn8BwfX6V3fgj4P+L9Y029jbTHsvPMTQy3Z8lcAtklTgn+E8A/jTTXQl3ZzWs3t/rFrpwuYrayh81Ils7C0SJYsliANoJbHTLszHuTV7SU0/w8bgW0cGo6jLIwiErbzu5GFVec8Drzz9K9y0X4C2dsIh4svkuIbVlkQ2EpRGYKF+clAQODyPXqMV3XwtttEsLfXDp2lW+nmw1i50/ztpMj7NhyWPP8fQk1a7kqLPHdN+H3xK8afZobqL7BZLLlF1aWSNETbgbYsBuDkjKj3r0PQP2atItzbNrt/NrBjCfJEv2aJtpz0UljyezDpXd614k03RWEk98qSysRGuATN6hUHLY6nb074FcnP8SdTvGEen2nlRAY+03PzbzzysYI492bPse5exfLfc9E03SNC8FadK9jbWmk2iLmVgiQooHd246epNQ3nxFS2IXS7ZrySQDbNcExQ455HG5jjngYIGSw7+Rz6jPPMlxd3U99c78iabBwRn7irhUx/sqM555qWLWHXEjEudvU4BAz6j1P4fjnKui1E7W+1e51iST7ZfPciQ5EEYMcK8/3Qcvx/fLewHWltEjKCMYhjQABFwqoOwx269OnNcmdXjhP7zBdeVBO4L2OPyqRNeRvPAceWjEuWyNv1Hbk/rU7lqy2OJ/afljPwo1jY25jLboQOQv7+M/5yaK5f9oPxSNa+Gt9DbIHtvNhLXLDAl/epjZjr05PTpjPYrSJhPc8u+D6xroN67ldovG4LYz+7THb3rq7u4CEnLs2STg5PPoM4/Sud+EGk3GoeCtRns7Wa5ktr2Rrgwru2R+VFgkenD84PQ9hxPc3sUkm4AN+ROcegrSDTWh51WLjPUfNcyyNs3EHAHOBkfrj+tVpGYDPmMzj/ZHI/CkEw3FRnYe5OOPTH/16rmWJQd/zDBHzcfTir0ISL2i2v9s3bQfb7LTYkXzDPqV0sEeM4wM8sfZQT+uOkuG8D+HdHu573WtS8S3ZiP7vRrMR20Z9558ZGcdEJ56Vxf2qFwMKvGMAjIJ9uuf51xeo67c3zXUITcrEfMDgYHrmofqbwS7Dta8WRX1881rpkdoC+RvbLdeM4AGfwre8KfFPxRok+dNvls12FCPKDgrzxhs57/nXFw6a0hbzHRB6L8x/Dt39a1rOyWzLPsMh4yrkkNyc5AwazudCaWx7LH8dPE8kUX2rX9S1C5jjFu1mbgxRSDABI2YOTgdec8jmuZ1zx9dPfpLB5MLMS06TMZJUIP3SxXOf+A5rkFtJ763kdyY1wfliG0AYB6DGP61v6V4QtvN37PMTBCCTJxkEg5HB/wD11K90Pi3M2TxhctdXNxDLNJuXaFP3E6ZA6/3evHU+tRJYXTwxiO0EcAYhDEvyjHH3uT6e30r0ay0eziiAl2qWQRsUY56dBn3rcsr+005gqx7VUZGRnn8aV2VGB5jYfCjX/EE7SSw/YoX4D3bYIx0O372Pwr0rwt8B9D0+RZtQvLm9lXAKQ4iG7nIznP54/Ctc+MVK7FOwE42k8f5zV3Tte811ijOMDPAyAAec+gAoujSx2Hh+w0rwralNH0qC1YcCdU3vnPTeefxwK6Gz8UXkjyqSYweUfqWHOQD/AJ+teZ3vjmwsUyZnu7jP3EICe7bhnI68jj+uNffES4vFdBObWEDaqW+VZh/tHqenQnHHTPXPnLSbPXtQ8X2GmPMk0jyS9ZY1G+UDHTGcLntuIrlJfGMsFrPFpVpDpFvNK08ggKmRmbGWJ+6pOADtBPHDHrXnseuxQWuFB+Zic7eB6/y6dqf/AG/FIm1XJB6rwOOaOdjUUdJDrqwzlljQu4xveQtI3OQGZiSe/c1N/bdxIGYArn0GfqOvTrXKRaxboBgkKOV4Pzfl/KpY/EUcaOqgMNvO719B/j70uZmljfj8QtNJi4dAmdyOQS2fU+vt1HOfQ1Dea8Y5WaJw2DtO7kr6nH+f0rAk1GGUEjjAxgn/AD/hz+b9Pvi7lYR5jg8vnOPU1Nwsat14ijtbYyPM+du7CgZIx+frVIarcanMv2gC2tFOFhkwHkP+2P12/nnpVK6nNzqHkt5giRRJI7jO5s/Kv04yR7Cqsl7JIrMu6O0IyzgZMi89D2GATnv29S7isZ/xi1NZvBWo2iHzGjaLzHUAKuJUwPrz07Z+lFZHjXcfhpfOyGObZBKwVQArtOhIH0zjj0PpyVtTd0YVFqeOWt5PalmgmkgLDBMblc/lTmv7pmObmY55OZDzRRWtjEaL+6bLG5myRyfMP+NILqc8maTJ5++aKKSGPW9uIiSlxKpPGQ5HFRedJ83zse/3jRRQhiedIACHbp608XUxP+tfnr8xooo6iHLe3AJ/fy8D++akGr3xUA3k+F6fvDx+tFFOw0C6pe/Ni8nGD2kNOi1zUYnEkd/cpIvIZZmBB+uaKKVguB1e/dyzX1wWOSWMrZP609Nc1Fk2/b7nZnO3zmx+WaKKLDGnV7/n/Tbj/v63+NN/tW9wV+2T4z/z1b/GiilZBcDq17/z+T8f9NW9frSrql6f+Xyf/v4f8aKKEguC6vfbSPtk4BPI81uf1pV1e+6fbJwB/wBNW/xoop2C4n9r3wX/AI/bjHp5rY/nQmr364AvbgfSVv8AGiinZBdjm1vUH3Mb65JPX963P6+5pp1i/IIN7cY9PNbHf3oopJIV2R3Gp3d4oWe6mnUHgSSFgPzNFFFPYTP/2Q==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "colorize_and_display(clip_colors=True, min_clip_s=50, min_clip_v=30)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/jpeg": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "colorize_and_display(clip_colors=True, min_clip_s=20, min_clip_v=30)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/jpeg": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "colorize_and_display(clip_colors=True, min_clip_s=20, min_clip_v=30, coefficients=[1.0, 2.0, 0.6])" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/jpeg": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "colorize_and_display(clip_colors=True, coefficients=[1.0, 2.0, 0.6], min_clip_s=20, min_clip_v=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This looks like a winner to me. We'll use `create_swatches.py` to create swatches for all available images in batch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 User Default", + "language": "python", + "name": "python_3_user_default" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/create_swatches.py b/create_swatches.py new file mode 100644 index 0000000..b20dfd5 --- /dev/null +++ b/create_swatches.py @@ -0,0 +1,166 @@ +# Create all swatches for public domain AKON + +import pandas as pd +import requests +import jsonpath_ng as jp + +from PIL import Image +from io import BytesIO +from colorsys import rgb_to_hsv, hsv_to_rgb +from scipy.cluster.vq import kmeans +from numpy import array + + +DEFAULT_NUM_COLORS = 6 +SCALE = 256.0 + +DEFAULT_MINV = 0 +DEFAULT_MAXV = 256 + + +def get_raw_data(): + df = pd.read_csv(filepath_or_buffer='https://labs.onb.ac.at/gitlab/labs-team/raw-metadata/raw/master/akon_postcards_public_domain.csv.bz2', compression='bz2') + out_df = df[['akon_id']] + return out_df + + +jp_image_link = jp.parse('sequences[*].canvases[*].images[*].resource.@id') + +def get_image_link_for_akon_id(akon_id): + r = requests.get(f'https://iiif.onb.ac.at/presentation/AKON/{akon_id}/manifest') + r.raise_for_status() + links = [m.value for m in jp_image_link.find(r.json())] + if len(links) > 1: + print(f'{len(links)} images found for id {akon_id}.', file=sys.stderr) + return links[0] + + +def get_clamped_image_link(original_iiif_image_link, max_width=200, max_height=200): + slices = original_iiif_image_link.split('/') + slices[-3] = f'!{max_width:d},{max_height:d}' + return '/'.join(slices) + + +def down_scale(x): + return x / SCALE + + +def up_scale(x): + return int(x * SCALE) + + +def clamp_hsv(color, min_v, max_v): + """ + Clamps a color such that the value (lightness) is between min_v and max_v. + """ + # use down_scale to convert color to value between 0-1 as expected by rgb_hsv + h, s, v = [down_scale(c) for c in color] + # also convert the min_v and max_v to values between 0-1 + min_v, max_v = map(down_scale, (min_v, max_v)) + # get the maximum of the min value and the color's value (therefore bumping it up if needed) + # then get the minimum of that number and the max_v (bumping the value down if needed) + v = min(max(min_v, v), max_v) + # apply upscale to get the h, s, v(which has been clamped) back to 0-255, return as tuple + return tuple(map(up_scale, (h, s, v))) + + +def clip_hsv(colors_hsv, min_v): + min_v = down_scale(min_v) + hsvs = [tuple(map(down_scale, color)) for color in colors_hsv] + hsvs = filter(lambda hsv: hsv[2] >= min_v, hsvs) + return [tuple(map(up_scale, hsv)) for hsv in hsvs] + + +def order_by_hue_hsv(colors): + """ + Orders colors by hue. + """ + hsvs = [list(map(down_scale, color)) for color in colors] + hsvs.sort(key=lambda t: t[0]) + return [tuple(map(up_scale, hsv_to_rgb(*hsv))) for hsv in hsvs] + + +def get_colors(img): + """ + Returns a list of all the image's colors. + """ + w, h = img.size + # convert('HSV') converts the image's pixels info to HSV + # getcolors() returns an unsorted list of (count, pixel) values + # w * h ensures that maxcolors parameter is set so that each pixel could be unique + # there are three values returned in a list + return [single_color for count, color in img.convert('HSV').getcolors(w * h) for single_color in [color] * count] + + +def hexify(rgb): + return "#{0:02x}{1:02x}{2:02x}".format(*rgb) + + +def colorz(image_url, n=DEFAULT_NUM_COLORS, min_v=DEFAULT_MINV, max_v=DEFAULT_MAXV, + order_colors=True, coefficients=[1.0, 1.0, 1.0], clip_colors=False, min_clip_s=20, min_clip_v=20): + """ + Get the n most dominant colors of an image. + + Total number of colors returned is n, optionally ordered by hue. + Returns as a list of RGB triples. + + """ + r = requests.get(image_url) + r.raise_for_status() + img = Image.open(BytesIO(r.content)) + + obs = get_colors(img) # gets a list of HSV colors (e.g. (213, 191, 152)) for each pixel + # adjust the value of each color, if you've chosen to change minimum and maximum values + clamped = [clamp_hsv(color, min_v, max_v) for color in obs] + clipped = clip_hsv(clamped, min_clip_v) if clip_colors else clamped + # turns the list of colors into a numpy array of floats, then applies scipy's k-means function + clusters, _ = kmeans(array(clipped).astype(float) * coefficients, n) + normalized_clusters = clusters / coefficients + colors = order_by_hue_hsv(normalized_clusters) if order_colors else normalized_clusters + + hex_colors = list(map(hexify, colors)) # turn RGB into hex colors for web + return hex_colors + + +def draw_row_with_links(link_and_colors): + html = "" + url = link_and_colors[0] + for count, color in enumerate(link_and_colors[1]): + square = ''.format(((count * 30)), 0, color) + html += square + full_html = '{1}'.format(url, html, len(link_and_colors) * 30) + return full_html + + +def get_link_and_colors(akon_id): + image_link = get_image_link_for_akon_id(akon_id) + thumb_link = get_clamped_image_link(image_link) + color_swatches = colorz(thumb_link, 6, True) + return image_link, color_swatches + + +def get_color_swatches(image_link): + print(image_link, end='') + thumb_link = get_clamped_image_link(image_link) + print('.', end='') + color_swatches = colorz(thumb_link, 6, order_colors=True, coefficients=[1.0, 2.0, 0.6], clip_colors=True, min_clip_s=20, min_clip_v=20) + print('.') + return color_swatches + + +if __name__ == '__main__': + FILE_NAME = 'historic_postcards_color_swatches.csv.bz2' + + df = get_raw_data() + print('Data loaded.') + # For testing, use only a tiny part of the available data + # df = df.sample(100).copy() + df['image_link'] = df['akon_id'].apply(get_image_link_for_akon_id) + print('Image links created.') + df['hex_colors'] = df['image_link'].apply(get_color_swatches) + print('Colors computed.') + df['html'] = df[['image_link', 'hex_colors']].apply(draw_row_with_links, axis='columns') + print('HTML created.') + df.to_csv(FILE_NAME, compression='bz2') + print(f'Output file {FILE_NAME} written.') + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7b1f347 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +pandas +jsonpath_ng +Pillow +requests +scipy -- GitLab