Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
W
webarchive-api
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
aponb
webarchive-api
Commits
60c8c0ff
Commit
60c8c0ff
authored
May 22, 2019
by
Stefan Karner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Refactor method names
parent
8dc2b37d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
16 deletions
+17
-16
webarchiv.py
webarchiv.py
+17
-16
No files found.
webarchiv.py
View file @
60c8c0ff
...
...
@@ -107,8 +107,9 @@ class WebarchivSession:
print
(
self
.
_error_template
.
format
(
status_code
=
e
.
response
.
status_code
,
response_text
=
e
.
response
.
text
),
file
=
sys
.
stderr
)
def
_handle_response_errors
(
self
,
r
):
@
staticmethod
def
_handle_response_errors
(
r
):
if
r
.
status_code
==
403
:
print
(
'Forbidden. Invalid Token or ApiKey transmitted'
,
file
=
sys
.
stderr
)
return
r
...
...
@@ -166,7 +167,7 @@ class WebarchivSession:
try
:
response
=
self
.
_get
(
op
=
'/search/fulltext'
,
params
=
params
)
return
self
.
wait
ForR
esponse
(
response
)
return
self
.
wait
_for_r
esponse
(
response
)
except
HTTPError
as
e
:
self
.
_display_http_error
(
e
)
...
...
@@ -192,7 +193,7 @@ class WebarchivSession:
try
:
response
=
self
.
_get
(
op
=
'/search/fulltext/seed'
,
params
=
params
)
return
self
.
wait
ForR
esponse
(
response
)
return
self
.
wait
_for_r
esponse
(
response
)
except
HTTPError
as
e
:
self
.
_display_http_error
(
e
)
...
...
@@ -217,7 +218,7 @@ class WebarchivSession:
try
:
response
=
self
.
_get
(
op
=
'/search/fulltext/capture'
,
params
=
params
)
return
self
.
wait
ForR
esponse
(
response
)
return
self
.
wait
_for_r
esponse
(
response
)
except
HTTPError
as
e
:
self
.
_display_http_error
(
e
)
...
...
@@ -241,13 +242,13 @@ class WebarchivSession:
try
:
response
=
self
.
_get
(
op
=
'/search/wayback'
,
params
=
params
)
return
self
.
wait
ForR
esponse
(
response
)
return
self
.
wait
_for_r
esponse
(
response
)
except
HTTPError
as
e
:
self
.
_display_http_error
(
e
)
print
(
'Error:'
.
format
(
query_string
))
def
wait
ForR
esponse
(
self
,
response
):
def
wait
_for_r
esponse
(
self
,
response
):
"""
Polls until the server responds with a result
"""
...
...
@@ -296,7 +297,7 @@ class WebarchivSession:
try
:
response
=
self
.
_get
(
op
=
'/search/domainname'
,
params
=
params
)
return
self
.
wait
ForR
esponse
(
response
)
return
self
.
wait
_for_r
esponse
(
response
)
except
HTTPError
as
e
:
self
.
_display_http_error
(
e
)
...
...
@@ -319,30 +320,30 @@ class WebarchivSession:
try
:
response
=
self
.
_get
(
op
=
'/search/fulltext/histogram'
,
params
=
params
)
return
self
.
wait
ForR
esponse
(
response
)
return
self
.
wait
_for_r
esponse
(
response
)
except
HTTPError
as
e
:
self
.
_display_http_error
(
e
)
print
(
'Error:'
.
format
(
query_string
))
def
get
SnapshotU
rl
(
self
,
seed
,
capture
,
onlysvg
):
def
get
_snapshot_u
rl
(
self
,
seed
,
capture
,
onlysvg
):
return
self
.
api_path
+
'snapshot?capture='
+
capture
+
'&t='
+
self
.
token
+
'&apikey='
+
self
.
api_key
+
'&onlysvg='
+
onlysvg
+
'&seed='
+
seed
@
staticmethod
def
result
ContainsS
eeds
(
response
):
def
result
_contains_s
eeds
(
response
):
try
:
return
response
.
json
()[
'subtype'
]
==
2
except
:
return
False
@
staticmethod
def
result
ContainsC
aptures
(
response
):
def
result
_contains_c
aptures
(
response
):
try
:
return
response
.
json
()[
'subtype'
]
==
3
except
:
return
False
def
save
P
age
(
self
,
url
):
def
save
_p
age
(
self
,
url
):
self
.
connect
()
r
=
requests
.
post
(
self
.
base_url
.
format
(
'savepage'
),
data
=
'''{{
...
...
@@ -357,7 +358,7 @@ class WebarchivSession:
)
return
r
def
fragment
ChecksumH
tml
(
self
,
seed
,
capture
,
selector
,
occurrence
):
def
fragment
_checksum_h
tml
(
self
,
seed
,
capture
,
selector
,
occurrence
):
try
:
response
=
self
.
_post
(
op
=
'/fragment/checksum/html'
,
json
=
{
"seed"
:
seed
,
...
...
@@ -367,7 +368,7 @@ class WebarchivSession:
"extractortype"
:
EXTRACTOR_HTML
})
response
=
self
.
status_query
(
response
)
return
self
.
wait
ForR
esponse
(
response
)
return
self
.
wait
_for_r
esponse
(
response
)
except
HTTPError
as
e
:
self
.
_display_http_error
(
e
)
...
...
@@ -393,7 +394,7 @@ if __name__ == '__main__':
for
capture
in
response
.
json
()[
'hits'
]:
capturedate
=
capture
[
'c'
]
resp
=
w
.
fragment
ChecksumH
tml
(
url
,
capturedate
,
".odd td"
,
3
)
resp
=
w
.
fragment
_checksum_h
tml
(
url
,
capturedate
,
".odd td"
,
3
)
checksum
=
resp
.
json
()[
'checksum'
]
returncode
=
resp
.
json
()[
'returncode'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment