diff --git a/LOC Colors - Data Management.ipynb b/LOC Colors - Data Management.ipynb
index c4ba0673c0cdf5c071a3a5358481cbb4bb91a11d..b69e3ab95c2d7c988ca6eeb7c969ff608cbfc1e6 100644
--- a/LOC Colors - Data Management.ipynb
+++ b/LOC Colors - Data Management.ipynb
@@ -56,7 +56,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df = pd.read_csv('historic_postcards_color_swatches.csv.bz2', compression='bz2')"
+ "df = pd.read_csv('https://labs.onb.ac.at/gitlab/labs-team/color-swatches-data/-/raw/master/historic_postcards_color_swatches.csv.bz2?inline=false', compression='bz2')"
]
},
{
@@ -812,677 +812,6 @@
"source": [
"And done!"
]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": []
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Below there's a fast-forward, compact version of what's been done above. No need to do all this again."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Compact (with other data source)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Load Data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 65,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (13) have mixed types. Specify dtype option on import or set low_memory=False.\n",
- " interactivity=interactivity, compiler=compiler, result=result)\n"
- ]
- }
- ],
- "source": [
- "colors_hsv_clip = pd.read_csv('akon_with_hsv_clip50_color_swatches.csv.bz2', compression='bz2')\n",
- "raw_data = pd.read_csv('akon_postcards_public_domain_1925.csv.bz2', compression='bz2')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## View Data Format"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 66,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Unnamed: 0 | \n",
- " akon_id | \n",
- " image_link | \n",
- " hex_colors | \n",
- " html | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 11914 | \n",
- " 11914 | \n",
- " AK003_285 | \n",
- " https://iiif.onb.ac.at/images/AKON/AK003_285/2... | \n",
- " ['#050300', '#eee2c9', '#b6af9e', '#fdf7da', '... | \n",
- " <a href=\"https://iiif.onb.ac.at/images/AKON/AK... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Unnamed: 0 akon_id \\\n",
- "11914 11914 AK003_285 \n",
- "\n",
- " image_link \\\n",
- "11914 https://iiif.onb.ac.at/images/AKON/AK003_285/2... \n",
- "\n",
- " hex_colors \\\n",
- "11914 ['#050300', '#eee2c9', '#b6af9e', '#fdf7da', '... \n",
- "\n",
- " html \n",
- "11914 \n",
- "\n",
- "\n",
- " \n",
- " \n",
- " | \n",
- " Unnamed: 0 | \n",
- " akon_id | \n",
- " id | \n",
- " altitude | \n",
- " building | \n",
- " city | \n",
- " color | \n",
- " comment | \n",
- " mountain | \n",
- " other | \n",
- " ... | \n",
- " feature_class | \n",
- " feature_code | \n",
- " geoname_id | \n",
- " latitude | \n",
- " longitude | \n",
- " name | \n",
- " country_id | \n",
- " admin_name_1 | \n",
- " admin_code_1 | \n",
- " geo | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 23435 | \n",
- " 23435 | \n",
- " AK042_533 | \n",
- " 25265 | \n",
- " 434.0 | \n",
- " NaN | \n",
- " Frohnleiten | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " P | \n",
- " PPLA3 | \n",
- " 2779202.0 | \n",
- " 47.26667 | \n",
- " 15.31667 | \n",
- " Frohnleiten | \n",
- " AT | \n",
- " NaN | \n",
- " NaN | \n",
- " 47.26667, 15.31667 | \n",
- "
\n",
- " \n",
- "
\n",
- "1 rows × 30 columns
\n",
- ""
- ],
- "text/plain": [
- " Unnamed: 0 akon_id id altitude building city color \\\n",
- "23435 23435 AK042_533 25265 434.0 NaN Frohnleiten False \n",
- "\n",
- " comment mountain other ... feature_class feature_code \\\n",
- "23435 NaN NaN NaN ... P PPLA3 \n",
- "\n",
- " geoname_id latitude longitude name country_id admin_name_1 \\\n",
- "23435 2779202.0 47.26667 15.31667 Frohnleiten AT NaN \n",
- "\n",
- " admin_code_1 geo \n",
- "23435 NaN 47.26667, 15.31667 \n",
- "\n",
- "[1 rows x 30 columns]"
- ]
- },
- "execution_count": 67,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "raw_data.sample()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Combine Data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 68,
- "metadata": {},
- "outputs": [],
- "source": [
- "combined_data = pd.merge(colors_hsv_clip[['akon_id', 'hex_colors', 'image_link']],\n",
- " raw_data[['akon_id', 'name', 'date']],\n",
- " on='akon_id')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 69,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " akon_id | \n",
- " hex_colors | \n",
- " image_link | \n",
- " name | \n",
- " date | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 23217 | \n",
- " AK041_595 | \n",
- " ['#ada896', '#fcf6d5', '#767467', '#484739', '... | \n",
- " https://iiif.onb.ac.at/images/AKON/AK041_595/5... | \n",
- " Ötscher | \n",
- " 1909 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " akon_id hex_colors \\\n",
- "23217 AK041_595 ['#ada896', '#fcf6d5', '#767467', '#484739', '... \n",
- "\n",
- " image_link name date \n",
- "23217 https://iiif.onb.ac.at/images/AKON/AK041_595/5... Ötscher 1909 "
- ]
- },
- "execution_count": 69,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "combined_data.sample()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Flatten hex_colors"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 70,
- "metadata": {},
- "outputs": [],
- "source": [
- "combined_data['hex_colors_list'] = combined_data['hex_colors'].apply(lambda c: json.loads(c.replace(\"'\", '\"')))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 71,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " akon_id | \n",
- " hex_colors | \n",
- " image_link | \n",
- " name | \n",
- " date | \n",
- " hex_colors_list | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 15996 | \n",
- " AK014_589 | \n",
- " ['#020100', '#fbfae8', '#88887e', '#64645a', '... | \n",
- " https://iiif.onb.ac.at/images/AKON/AK014_589/5... | \n",
- " Maria Taferl | \n",
- " 1909 | \n",
- " [#020100, #fbfae8, #88887e, #64645a, #4d4f49, ... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " akon_id hex_colors \\\n",
- "15996 AK014_589 ['#020100', '#fbfae8', '#88887e', '#64645a', '... \n",
- "\n",
- " image_link name date \\\n",
- "15996 https://iiif.onb.ac.at/images/AKON/AK014_589/5... Maria Taferl 1909 \n",
- "\n",
- " hex_colors_list \n",
- "15996 [#020100, #fbfae8, #88887e, #64645a, #4d4f49, ... "
- ]
- },
- "execution_count": 71,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "combined_data.sample()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Sanitize and Reorder"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 73,
- "metadata": {},
- "outputs": [],
- "source": [
- "combined_data = combined_data.drop(columns=['hex_colors']).copy()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 74,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " akon_id | \n",
- " image_link | \n",
- " name | \n",
- " date | \n",
- " hex_colors_list | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 19590 | \n",
- " AK028_177 | \n",
- " https://iiif.onb.ac.at/images/AKON/AK028_177/1... | \n",
- " Frohnleiten | \n",
- " 1906 | \n",
- " [#020100, #a8a599, #7b7a6f, #fbf9e5, #4b4b40, ... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " akon_id image_link \\\n",
- "19590 AK028_177 https://iiif.onb.ac.at/images/AKON/AK028_177/1... \n",
- "\n",
- " name date hex_colors_list \n",
- "19590 Frohnleiten 1906 [#020100, #a8a599, #7b7a6f, #fbf9e5, #4b4b40, ... "
- ]
- },
- "execution_count": 74,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "combined_data.sample()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 78,
- "metadata": {},
- "outputs": [],
- "source": [
- "combined_data = combined_data.rename(columns={'hex_colors_list': 'hex_colors'})"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 79,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " akon_id | \n",
- " image_link | \n",
- " name | \n",
- " date | \n",
- " hex_colors | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 33304 | \n",
- " AK087_042 | \n",
- " https://iiif.onb.ac.at/images/AKON/AK087_042/0... | \n",
- " Abcoude | \n",
- " vor 1905 | \n",
- " [#f8eacd, #aca391, #5b5747, #6e6a5e, #525148, ... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " akon_id image_link name \\\n",
- "33304 AK087_042 https://iiif.onb.ac.at/images/AKON/AK087_042/0... Abcoude \n",
- "\n",
- " date hex_colors \n",
- "33304 vor 1905 [#f8eacd, #aca391, #5b5747, #6e6a5e, #525148, ... "
- ]
- },
- "execution_count": 79,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "combined_data.sample()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 81,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " akon_id | \n",
- " hex_colors | \n",
- " image_link | \n",
- " name | \n",
- " date | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 25575 | \n",
- " AK031_287 | \n",
- " [#444626, #caccbc, #4a4d41, #48504f, #5b7073, ... | \n",
- " https://iiif.onb.ac.at/images/AKON/AK031_287/2... | \n",
- " Ebensee | \n",
- " 1907 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " akon_id hex_colors \\\n",
- "25575 AK031_287 [#444626, #caccbc, #4a4d41, #48504f, #5b7073, ... \n",
- "\n",
- " image_link name date \n",
- "25575 https://iiif.onb.ac.at/images/AKON/AK031_287/2... Ebensee 1907 "
- ]
- },
- "execution_count": 81,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "combined_data = combined_data[['akon_id', 'hex_colors', 'image_link', 'name', 'date']]\n",
- "combined_data.sample()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Sample and Write"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 82,
- "metadata": {},
- "outputs": [],
- "source": [
- "combined_data.iloc[:100].to_json('swatches_100.json', orient='values')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 83,
- "metadata": {},
- "outputs": [],
- "source": [
- "combined_data.to_json('swatches_all.json', orient='values')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Alternate Data Format Without Link"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 84,
- "metadata": {},
- "outputs": [],
- "source": [
- "sans_link = combined_data[['akon_id', 'hex_colors', 'name', 'date']]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 85,
- "metadata": {},
- "outputs": [],
- "source": [
- "sans_link.iloc[:100].to_json('swatches_100_nolink.json', orient='values')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 86,
- "metadata": {},
- "outputs": [],
- "source": [
- "sans_link.to_json('swatches_all_nolink.json', orient='values')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -1506,4 +835,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index 29eb82deabde0950d4e33d59d11790bcf80a7439..17a46566d3a6d8e1665181736e7a5f7e97e64099 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/git/https%3A%2F%2Flabs.onb.ac.at%2Fgitlab%2Flabs-team%2Fcolor-swatches/master)
+
Wanna take a look at the colors in our historic postcards?
Thanks to Laura Wrubel from the George Washington University Libraries in Washington DC, now you can!
diff --git a/requirements.txt b/requirements.txt
index 7b1f347978a45956c65de107a119a30dc9305d69..9a5589938683b9df9aeaecbea5dc4354a457c251 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-pandas
-jsonpath_ng
-Pillow
-requests
-scipy
+pandas==1.0.4
+jsonpath-ng==1.5.1
+Pillow==7.1.2
+requests==2.23.0
+scipy==1.4.1