Skip to content
All the Mountains.ipynb 100 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# All the Mountains"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "*Extract all postcards depicting a mountain*"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "*(as far as we know from the metadata)*"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "pd.set_option('display.max_columns', 100)  # Output up to 100 columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kst/tmp/dingsdi/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3049: DtypeWarning: Columns (13) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
    "df = pd.read_csv('https://labs.onb.ac.at/gitlab/labs-team/raw-metadata/raw/master/akon_postcards_public_domain.csv.bz2', compression='bz2')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## View Data"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>akon_id</th>\n",
       "      <th>id</th>\n",
       "      <th>altitude</th>\n",
       "      <th>building</th>\n",
       "      <th>city</th>\n",
       "      <th>color</th>\n",
       "      <th>comment</th>\n",
       "      <th>mountain</th>\n",
       "      <th>other</th>\n",
       "      <th>photographer</th>\n",
       "      <th>publisher</th>\n",
       "      <th>publisher_place</th>\n",
       "      <th>region</th>\n",
       "      <th>water_body</th>\n",
       "      <th>year</th>\n",
       "      <th>inventory_number</th>\n",
       "      <th>signature</th>\n",
       "      <th>revision_date</th>\n",
       "      <th>date</th>\n",
       "      <th>feature_class</th>\n",
       "      <th>feature_code</th>\n",
       "      <th>geoname_id</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>name</th>\n",
       "      <th>country_id</th>\n",
       "      <th>admin_name_1</th>\n",
       "      <th>admin_code_1</th>\n",
       "      <th>geo</th>\n",
       "      <th>download_link</th>\n",
       "      <th>download_link_256x256</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7125</th>\n",
       "      <td>7125</td>\n",
       "      <td>AK103_180</td>\n",
       "      <td>67901</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Laurenskerk</td>\n",
       "      <td>Alkmaar</td>\n",
       "      <td>False</td>\n",
       "      <td>gel. 1911</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2014-09-03 12:10:00.735</td>\n",
       "      <td>gelaufen 1911</td>\n",
       "      <td>P</td>\n",
       "      <td>PPL</td>\n",
       "      <td>2759899.0</td>\n",
       "      <td>52.63167</td>\n",
       "      <td>4.74861</td>\n",
       "      <td>Alkmaar</td>\n",
       "      <td>NL</td>\n",
       "      <td>Nord-Holland</td>\n",
       "      <td>07</td>\n",
       "      <td>52.63167, 4.74861</td>\n",
       "      <td>https://iiif.onb.ac.at/images/AKON/AK103_180/1...</td>\n",
       "      <td>https://iiif.onb.ac.at/images/AKON/AK103_180/1...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6850</th>\n",
       "      <td>6850</td>\n",
       "      <td>AK080_116</td>\n",
       "      <td>49953</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Stuttgart</td>\n",
       "      <td>False</td>\n",
       "      <td>1913 gel</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Schaller</td>\n",
       "      <td>Stuttgart</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2014-08-25 14:42:50.107</td>\n",
       "      <td>gelaufen 1913</td>\n",
       "      <td>P</td>\n",
       "      <td>PPLA</td>\n",
       "      <td>2825297.0</td>\n",
       "      <td>48.78232</td>\n",
       "      <td>9.17702</td>\n",
       "      <td>Stuttgart</td>\n",
       "      <td>DE</td>\n",
       "      <td>Baden-Württemberg</td>\n",
       "      <td>01</td>\n",
       "      <td>48.78232, 9.17702</td>\n",
Loading full blame...