Newer
Older
" <td>...</td>\n",
" <td>2013-06-27 13:27:58</td>\n",
" <td>1</td>\n",
" <td>1568-03-04</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno_pdf.pl?...</td>\n",
" <td>http://anno.onb.ac.at/cgi-content/anno?aid=fug...</td>\n",
" <td>0</td>\n",
" <td>1122204</td>\n",
" <td>4</td>\n",
"<p>5 rows × 21 columns</p>\n",
" manifest_id aid year day dc_title dc_title_additional \\\n",
"0 fug15050701 fug 1505 15050701 Fugger - Zeitungen NaN \n",
"1 fug15680120 fug 1568 15680120 Fugger - Zeitungen NaN \n",
"2 fug15680124 fug 1568 15680124 Fugger - Zeitungen NaN \n",
"3 fug15680228 fug 1568 15680228 Fugger - Zeitungen NaN \n",
"4 fug15680304 fug 1568 15680304 Fugger - Zeitungen NaN \n",
"\n",
" subjects place_of_publications languages dc_type ... meta_type \\\n",
"0 Tageszeitung o.O. de newspaper ... brz \n",
"1 Tageszeitung o.O. de newspaper ... brz \n",
"2 Tageszeitung o.O. de newspaper ... brz \n",
"3 Tageszeitung o.O. de newspaper ... brz \n",
"4 Tageszeitung o.O. de newspaper ... brz \n",
" ini_type modification_datetime longer_page_id dc_date \\\n",
"0 anno 2013-06-27 13:28:35 1 1505-07-01 \n",
"1 anno 2013-06-27 13:27:59 1 1568-01-20 \n",
"2 anno 2013-06-27 13:27:58 1 1568-01-24 \n",
"3 anno 2013-06-27 13:27:59 1 1568-02-28 \n",
"4 anno 2013-06-27 13:27:58 1 1568-03-04 \n",
" link_pdf \\\n",
"0 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n",
"1 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n",
"2 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n",
"3 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n",
"4 http://anno.onb.ac.at/cgi-content/anno_pdf.pl?... \n",
" link_old has_ocr meta_id \\\n",
"0 http://anno.onb.ac.at/cgi-content/anno?aid=fug... 0 1122198 \n",
"1 http://anno.onb.ac.at/cgi-content/anno?aid=fug... 0 1122201 \n",
"2 http://anno.onb.ac.at/cgi-content/anno?aid=fug... 0 1122202 \n",
"3 http://anno.onb.ac.at/cgi-content/anno?aid=fug... 0 1122203 \n",
"4 http://anno.onb.ac.at/cgi-content/anno?aid=fug... 0 1122204 \n",
" page_count \n",
"0 2 \n",
"1 1 \n",
"2 2 \n",
"3 1 \n",
"4 4 \n",
"\n",
"[5 rows x 21 columns]"
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"older.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1505"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"min(older['year'])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(pages[pages['manifest_id'].isin(older['manifest_id'])])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(pages)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}