Skip to content
GitLab
Explore
Sign in
aponb
webarchive-api
Compare revisions
17c7d67fac70283e6e56d01a9e13ca0e0b091996 to d1b3f7a21a474abee8c5b8461d1799875c393fc6
Commits on Source (2)
fix: import datetime was missing
· ccc57de7
onbpre
authored
Mar 20, 2019
ccc57de7
get snapshots from captures
· d1b3f7a2
onbpre
authored
Mar 20, 2019
d1b3f7a2
Expand all
Hide whitespace changes
Inline
Side-by-side
sample2.ipynb
View file @
d1b3f7a2
%% Cell type:markdown id: tags:
Create a WebarchivSession Object with convenience methods for easy access with your API-Key
%% Cell type:code id: tags:
```
python
import
datetime
from
webarchiv
import
WebarchivSession
apikey
=
'
Zz2tQls7fuaocX2pjrfc2npojqbGwXL2
'
w
=
WebarchivSession
(
apikey
)
```
%% Cell type:markdown id: tags:
Submit a URL search to get all archived capturedates of the requested URL
%% Cell type:code id: tags:
```
python
url
=
"
http://www.onb.ac.at
"
response
=
w
.
wayback_search
(
url
)
```
%% Cell type:markdown id: tags:
The search always returns the full response. Checking for status_code 200 before extracting the response is always a good idea
%% Cell type:code id: tags:
```
python
if
response
.
status_code
!=
200
:
print
(
"
Something went wrong ...
"
)
exit
(
1
)
```
%% Cell type:markdown id: tags:
Now it is safe to extract the response
%% Cell type:markdown id: tags:
Get the total number of captures of the requested URL
%% Cell type:code id: tags:
```
python
print
(
response
.
json
()[
'
total
'
])
```
%% Output
24
26
24
73
%% Cell type:markdown id: tags:
Get the Archiveurl of the oldest Capuredate of the requested URL
%% Cell type:code id: tags:
```
python
capturedate
=
datetime
.
datetime
.
fromtimestamp
(
response
.
json
()[
'
hits
'
][
0
][
'
c
'
]).
strftime
(
'
%Y%m%d%H%M%S
'
)
captureurl
=
'
https://webarchiv.onb.ac.at/web/
'
+
capturedate
+
'
/
'
+
url
print
(
captureurl
)
```
%% Output
https://webarchiv.onb.ac.at/web/20090916183601/http://www.onb.ac.at
%% Cell type:code id: tags:
```
python
```
...
...
%% Cell type:markdown id: tags:
Create a WebarchivSession Object with convenience methods for easy access with your API-Key
%% Cell type:code id: tags:
```
python
import
datetime
from
webarchiv
import
WebarchivSession
apikey
=
'
Zz2tQls7fuaocX2pjrfc2npojqbGwXL2
'
w
=
WebarchivSession
(
apikey
)
```
%% Cell type:markdown id: tags:
Submit a URL search to get all archived capturedates of the requested URL
%% Cell type:code id: tags:
```
python
url
=
"
http://www.onb.ac.at
"
response
=
w
.
wayback_search
(
url
)
```
%% Cell type:markdown id: tags:
The search always returns the full response. Checking for status_code 200 before extracting the response is always a good idea
%% Cell type:code id: tags:
```
python
if
response
.
status_code
!=
200
:
print
(
"
Something went wrong ...
"
)
exit
(
1
)
```
%% Cell type:markdown id: tags:
Now it is safe to extract the response
%% Cell type:markdown id: tags:
Get the total number of captures of the requested URL
%% Cell type:code id: tags:
```
python
print
(
response
.
json
()[
'
total
'
])
```
%% Output
24
26
24
73
%% Cell type:markdown id: tags:
Get the Archiveurl of the oldest Capuredate of the requested URL
%% Cell type:code id: tags:
```
python
capturedate
=
datetime
.
datetime
.
fromtimestamp
(
response
.
json
()[
'
hits
'
][
0
][
'
c
'
]).
strftime
(
'
%Y%m%d%H%M%S
'
)
captureurl
=
'
https://webarchiv.onb.ac.at/web/
'
+
capturedate
+
'
/
'
+
url
print
(
captureurl
)
```
%% Output
https://webarchiv.onb.ac.at/web/20090916183601/http://www.onb.ac.at
%% Cell type:code id: tags:
```
python
```
...
...
sample7.ipynb
0 → 100644
View file @
d1b3f7a2
This diff is collapsed.
Click to expand it.
webarchiv.py
View file @
d1b3f7a2
...
...
@@ -18,6 +18,15 @@ class WebarchivSession:
"""
return
'
0.1.0
'
@property
def
api_path
(
self
):
"""
Protocol, domain and path prefix for the Webarchive API,
with a single positional format string placeholder
for the REST operation and parameters.
"""
return
'
https://webarchiv.onb.ac.at/api/
'
@property
def
base_url
(
self
):
"""
...
...
@@ -25,7 +34,7 @@ class WebarchivSession:
with a single positional format string placeholder
for the REST operation and parameters.
"""
return
'
https://webarchiv.onb.ac.at/api
/{}
'
return
self
.
api_path
+
'
/{}
'
@property
def
_error_template
(
self
):
...
...
@@ -239,6 +248,8 @@ class WebarchivSession:
self
.
_display_http_error
(
e
)
print
(
'
Error:
'
.
format
(
query_string
))
def
getSnapshotUrl
(
self
,
id
):
return
self
.
api_path
+
'
snapshot/
'
+
id
+
"
?t=
"
+
self
.
token
+
"
&apikey=
"
+
self
.
api_key
;
if
__name__
==
'
__main__
'
:
# noinspection SpellCheckingInspection
...
...