Skip to content
sample9.ipynb 3.1 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a WebarchivSession Object with convenience methods for easy access with your API-Key "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from webarchiv import WebarchivSession\n",
    "\n",
    "apikey = '2pm8i0hnmpcTK4Oj4CUeBoZd7vywrm4c'\n",
    "w = WebarchivSession(apikey)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"http://sport.orf.at/l/stories/2003717/\"\n",
    "response = w.wayback_search(\"http://sport.orf.at/l/stories/2003717/\", \"20110101000000\", \"20120401000000\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "if response.status_code != 200:\n",
    "    print(\"Error \", response.status_code)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "http://wayback/web/20110401202828/http://sport.orf.at/l/stories/2003717/\n",
      "http://wayback/web/20110704202825/http://sport.orf.at/l/stories/2003717/\n",
      "A watchlist with all captures mentioned above was generated. The code for this watchlist is Zp. \n",
      "end\n"
     ]
    }
   ],
   "source": [
    "lastchecksum = ''\n",
    "captures = []\n",
    "for capture in response.json()['hits']:\n",
    "    capturedate = capture['c']\n",
    "\n",
    "    resp = w.fragment_checksum_html(url, capturedate, \".odd td\", 3)\n",
    "    checksum = resp.json()['checksum']\n",
    "    returncode = resp.json()['returncode']\n",
    "\n",
    "    if returncode != 0:\n",
    "        continue\n",
    "\n",
    "    if checksum != lastchecksum:\n",
    "        #print(resp.json())\n",
    "        print(\"http://wayback/web/\" + capturedate + \"/\" + url)\n",
    "        capture = {\"url\": url, \"timestamp\": capturedate}\n",
    "        captures.append(capture)\n",
    "        #print(capturedate + \" \" + checksum)\n",
    "\n",
    "    lastchecksum = checksum\n",
    "\n",
    "if len(captures) > 0:\n",
    "    response = w.create_watchlist(captures)\n",
    "    print (\"A watchlist with all captures mentioned above was generated. The code for this watchlist is \" +  response.json() + \". \" )\n",
    "    \n",
    "    \n",
    "print(\"end\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}