From 3c1acf93489daba0463ade01392be7db3bc0be16 Mon Sep 17 00:00:00 2001 From: Christoph Steindl Date: Wed, 10 Jun 2020 14:20:58 +0200 Subject: [PATCH 1/2] Debug for binder --- LOC Colors - Data Management.ipynb | 4 ++-- README.md | 2 ++ requirements.txt | 10 +++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/LOC Colors - Data Management.ipynb b/LOC Colors - Data Management.ipynb index c4ba067..658542c 100644 --- a/LOC Colors - Data Management.ipynb +++ b/LOC Colors - Data Management.ipynb @@ -56,7 +56,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv('historic_postcards_color_swatches.csv.bz2', compression='bz2')" + "df = pd.read_csv('https://labs.onb.ac.at/gitlab/labs-team/color-swatches-data/-/raw/master/historic_postcards_color_swatches.csv.bz2?inline=false', compression='bz2')" ] }, { @@ -1506,4 +1506,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/README.md b/README.md index 29eb82d..17a4656 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/git/https%3A%2F%2Flabs.onb.ac.at%2Fgitlab%2Flabs-team%2Fcolor-swatches/master) + Wanna take a look at the colors in our historic postcards? Thanks to Laura Wrubel from the George Washington University Libraries in Washington DC, now you can! diff --git a/requirements.txt b/requirements.txt index 7b1f347..9a55899 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -pandas -jsonpath_ng -Pillow -requests -scipy +pandas==1.0.4 +jsonpath-ng==1.5.1 +Pillow==7.1.2 +requests==2.23.0 +scipy==1.4.1 -- GitLab From d2f2720092d1f020b340af8e08b189a987244516 Mon Sep 17 00:00:00 2001 From: Christoph Steindl Date: Wed, 10 Jun 2020 14:20:58 +0200 Subject: [PATCH 2/2] Debug for binder --- LOC Colors - Data Management.ipynb | 675 +---------------------------- README.md | 2 + requirements.txt | 10 +- 3 files changed, 9 insertions(+), 678 deletions(-) diff --git a/LOC Colors - Data Management.ipynb b/LOC Colors - Data Management.ipynb index c4ba067..b69e3ab 100644 --- a/LOC Colors - Data Management.ipynb +++ b/LOC Colors - Data Management.ipynb @@ -56,7 +56,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv('historic_postcards_color_swatches.csv.bz2', compression='bz2')" + "df = pd.read_csv('https://labs.onb.ac.at/gitlab/labs-team/color-swatches-data/-/raw/master/historic_postcards_color_swatches.csv.bz2?inline=false', compression='bz2')" ] }, { @@ -812,677 +812,6 @@ "source": [ "And done!" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below there's a fast-forward, compact version of what's been done above. No need to do all this again." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Compact (with other data source)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Data" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (13) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " interactivity=interactivity, compiler=compiler, result=result)\n" - ] - } - ], - "source": [ - "colors_hsv_clip = pd.read_csv('akon_with_hsv_clip50_color_swatches.csv.bz2', compression='bz2')\n", - "raw_data = pd.read_csv('akon_postcards_public_domain_1925.csv.bz2', compression='bz2')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## View Data Format" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0akon_idimage_linkhex_colorshtml
1191411914AK003_285https://iiif.onb.ac.at/images/AKON/AK003_285/2...['#050300', '#eee2c9', '#b6af9e', '#fdf7da', '...<a href=\"https://iiif.onb.ac.at/images/AKON/AK...
\n", - "
" - ], - "text/plain": [ - " Unnamed: 0 akon_id \\\n", - "11914 11914 AK003_285 \n", - "\n", - " image_link \\\n", - "11914 https://iiif.onb.ac.at/images/AKON/AK003_285/2... \n", - "\n", - " hex_colors \\\n", - "11914 ['#050300', '#eee2c9', '#b6af9e', '#fdf7da', '... \n", - "\n", - " html \n", - "11914 \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0akon_ididaltitudebuildingcitycolorcommentmountainother...feature_classfeature_codegeoname_idlatitudelongitudenamecountry_idadmin_name_1admin_code_1geo
2343523435AK042_53325265434.0NaNFrohnleitenFalseNaNNaNNaN...PPPLA32779202.047.2666715.31667FrohnleitenATNaNNaN47.26667, 15.31667
\n", - "

1 rows × 30 columns

\n", - "" - ], - "text/plain": [ - " Unnamed: 0 akon_id id altitude building city color \\\n", - "23435 23435 AK042_533 25265 434.0 NaN Frohnleiten False \n", - "\n", - " comment mountain other ... feature_class feature_code \\\n", - "23435 NaN NaN NaN ... P PPLA3 \n", - "\n", - " geoname_id latitude longitude name country_id admin_name_1 \\\n", - "23435 2779202.0 47.26667 15.31667 Frohnleiten AT NaN \n", - "\n", - " admin_code_1 geo \n", - "23435 NaN 47.26667, 15.31667 \n", - "\n", - "[1 rows x 30 columns]" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw_data.sample()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Combine Data" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [], - "source": [ - "combined_data = pd.merge(colors_hsv_clip[['akon_id', 'hex_colors', 'image_link']],\n", - " raw_data[['akon_id', 'name', 'date']],\n", - " on='akon_id')" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
akon_idhex_colorsimage_linknamedate
23217AK041_595['#ada896', '#fcf6d5', '#767467', '#484739', '...https://iiif.onb.ac.at/images/AKON/AK041_595/5...Ötscher1909
\n", - "
" - ], - "text/plain": [ - " akon_id hex_colors \\\n", - "23217 AK041_595 ['#ada896', '#fcf6d5', '#767467', '#484739', '... \n", - "\n", - " image_link name date \n", - "23217 https://iiif.onb.ac.at/images/AKON/AK041_595/5... Ötscher 1909 " - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "combined_data.sample()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Flatten hex_colors" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [], - "source": [ - "combined_data['hex_colors_list'] = combined_data['hex_colors'].apply(lambda c: json.loads(c.replace(\"'\", '\"')))" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
akon_idhex_colorsimage_linknamedatehex_colors_list
15996AK014_589['#020100', '#fbfae8', '#88887e', '#64645a', '...https://iiif.onb.ac.at/images/AKON/AK014_589/5...Maria Taferl1909[#020100, #fbfae8, #88887e, #64645a, #4d4f49, ...
\n", - "
" - ], - "text/plain": [ - " akon_id hex_colors \\\n", - "15996 AK014_589 ['#020100', '#fbfae8', '#88887e', '#64645a', '... \n", - "\n", - " image_link name date \\\n", - "15996 https://iiif.onb.ac.at/images/AKON/AK014_589/5... Maria Taferl 1909 \n", - "\n", - " hex_colors_list \n", - "15996 [#020100, #fbfae8, #88887e, #64645a, #4d4f49, ... " - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "combined_data.sample()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Sanitize and Reorder" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [], - "source": [ - "combined_data = combined_data.drop(columns=['hex_colors']).copy()" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
akon_idimage_linknamedatehex_colors_list
19590AK028_177https://iiif.onb.ac.at/images/AKON/AK028_177/1...Frohnleiten1906[#020100, #a8a599, #7b7a6f, #fbf9e5, #4b4b40, ...
\n", - "
" - ], - "text/plain": [ - " akon_id image_link \\\n", - "19590 AK028_177 https://iiif.onb.ac.at/images/AKON/AK028_177/1... \n", - "\n", - " name date hex_colors_list \n", - "19590 Frohnleiten 1906 [#020100, #a8a599, #7b7a6f, #fbf9e5, #4b4b40, ... " - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "combined_data.sample()" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [], - "source": [ - "combined_data = combined_data.rename(columns={'hex_colors_list': 'hex_colors'})" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
akon_idimage_linknamedatehex_colors
33304AK087_042https://iiif.onb.ac.at/images/AKON/AK087_042/0...Abcoudevor 1905[#f8eacd, #aca391, #5b5747, #6e6a5e, #525148, ...
\n", - "
" - ], - "text/plain": [ - " akon_id image_link name \\\n", - "33304 AK087_042 https://iiif.onb.ac.at/images/AKON/AK087_042/0... Abcoude \n", - "\n", - " date hex_colors \n", - "33304 vor 1905 [#f8eacd, #aca391, #5b5747, #6e6a5e, #525148, ... " - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "combined_data.sample()" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
akon_idhex_colorsimage_linknamedate
25575AK031_287[#444626, #caccbc, #4a4d41, #48504f, #5b7073, ...https://iiif.onb.ac.at/images/AKON/AK031_287/2...Ebensee1907
\n", - "
" - ], - "text/plain": [ - " akon_id hex_colors \\\n", - "25575 AK031_287 [#444626, #caccbc, #4a4d41, #48504f, #5b7073, ... \n", - "\n", - " image_link name date \n", - "25575 https://iiif.onb.ac.at/images/AKON/AK031_287/2... Ebensee 1907 " - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "combined_data = combined_data[['akon_id', 'hex_colors', 'image_link', 'name', 'date']]\n", - "combined_data.sample()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Sample and Write" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [], - "source": [ - "combined_data.iloc[:100].to_json('swatches_100.json', orient='values')" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [], - "source": [ - "combined_data.to_json('swatches_all.json', orient='values')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Alternate Data Format Without Link" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [], - "source": [ - "sans_link = combined_data[['akon_id', 'hex_colors', 'name', 'date']]" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [], - "source": [ - "sans_link.iloc[:100].to_json('swatches_100_nolink.json', orient='values')" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [], - "source": [ - "sans_link.to_json('swatches_all_nolink.json', orient='values')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1506,4 +835,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/README.md b/README.md index 29eb82d..17a4656 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/git/https%3A%2F%2Flabs.onb.ac.at%2Fgitlab%2Flabs-team%2Fcolor-swatches/master) + Wanna take a look at the colors in our historic postcards? Thanks to Laura Wrubel from the George Washington University Libraries in Washington DC, now you can! diff --git a/requirements.txt b/requirements.txt index 7b1f347..9a55899 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -pandas -jsonpath_ng -Pillow -requests -scipy +pandas==1.0.4 +jsonpath-ng==1.5.1 +Pillow==7.1.2 +requests==2.23.0 +scipy==1.4.1 -- GitLab