Newer
Older
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"df = pd.read_csv('https://labs.onb.ac.at/gitlab/labs-team/raw-metadata/raw/master/anno_labs_issues.csv.bz2', compression='bz2')"
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>manifest_id</th>\n",
" <th>aid</th>\n",
" <th>year</th>\n",
" <th>day</th>\n",
" <th>dc_title</th>\n",
" <th>dc_title_additional</th>\n",
" <th>subjects</th>\n",
" <th>place_of_publications</th>\n",
" <th>languages</th>\n",
" <th>dc_type</th>\n",
" <th>...</th>\n",
" <th>modification_datetime</th>\n",
" <th>longer_page_id</th>\n",
" <th>dc_date</th>\n",
" <th>link_pdf</th>\n",
" <th>link_old</th>\n",
" <th>has_ocr</th>\n",
" <th>meta_id</th>\n",
" <th>page_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>fug15050701</td>\n",
" <td>fug</td>\n",
" <td>1505</td>\n",
" <td>15050701</td>\n",
" <td>Fugger - Zeitungen</td>\n",
" <td>Tageszeitung</td>\n",
" <td>o.O.</td>\n",
" <td>...</td>\n",
" <td>2013-06-27 13:28:35</td>\n",
" <td>1</td>\n",
" <td>1505-07-01</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno?aid=fug...</td>\n",
" <td>1122198</td>\n",
" <td>2</td>\n",
" <td>fug15680120</td>\n",
" <td>fug</td>\n",
" <td>1568</td>\n",
" <td>15680120</td>\n",
" <td>Fugger - Zeitungen</td>\n",
" <td>Tageszeitung</td>\n",
" <td>o.O.</td>\n",
" <td>...</td>\n",
" <td>2013-06-27 13:27:59</td>\n",
" <td>1</td>\n",
" <td>1568-01-20</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno?aid=fug...</td>\n",
" <td>1122201</td>\n",
" <td>1</td>\n",
" <td>fug15680124</td>\n",
" <td>fug</td>\n",
" <td>1568</td>\n",
" <td>15680124</td>\n",
" <td>Fugger - Zeitungen</td>\n",
" <td>Tageszeitung</td>\n",
" <td>o.O.</td>\n",
" <td>...</td>\n",
" <td>2013-06-27 13:27:58</td>\n",
" <td>1</td>\n",
" <td>1568-01-24</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno?aid=fug...</td>\n",
" <td>1122202</td>\n",
" <td>2</td>\n",
" <td>fug15680228</td>\n",
" <td>fug</td>\n",
" <td>1568</td>\n",
" <td>15680228</td>\n",
" <td>Fugger - Zeitungen</td>\n",
" <td>Tageszeitung</td>\n",
" <td>o.O.</td>\n",
" <td>...</td>\n",
" <td>2013-06-27 13:27:59</td>\n",
" <td>1</td>\n",
" <td>1568-02-28</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno?aid=fug...</td>\n",
" <td>1122203</td>\n",
" <td>1</td>\n",
" <td>fug15680304</td>\n",
" <td>fug</td>\n",
" <td>1568</td>\n",
" <td>15680304</td>\n",
" <td>Fugger - Zeitungen</td>\n",
" <td>Tageszeitung</td>\n",
" <td>o.O.</td>\n",
" <td>...</td>\n",
" <td>2013-06-27 13:27:58</td>\n",
" <td>1</td>\n",
" <td>1568-03-04</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno?aid=fug...</td>\n",
" <td>1122204</td>\n",
" <td>4</td>\n",
"<p>5 rows × 21 columns</p>\n",
" manifest_id aid year day dc_title dc_title_additional \\\n",
"0 fug15050701 fug 1505 15050701 Fugger - Zeitungen NaN \n",
"1 fug15680120 fug 1568 15680120 Fugger - Zeitungen NaN \n",
"2 fug15680124 fug 1568 15680124 Fugger - Zeitungen NaN \n",
Loading full blame...