From 732a122c4460b28af67d5acbfd625b89b579781d Mon Sep 17 00:00:00 2001 From: Stefan Karner Date: Tue, 7 May 2019 15:55:44 +0200 Subject: [PATCH] Add fulltext search methods 'seed' and 'capture' --- webarchiv.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/webarchiv.py b/webarchiv.py index ea466ba..189c7b1 100644 --- a/webarchiv.py +++ b/webarchiv.py @@ -149,6 +149,56 @@ class WebarchivSession: self._display_http_error(e) print('Query for "{}" not added'.format(query_string)) + def fulltext_search_within_domain(self, query_string, domain, from_=None, to_=None): + """ + Start a fulltext seed search query in the Webarchive. + + :param query_string: String to search for + :param domain: Search only within this domain name + :param from_: Optional earliest date bound for the search + in the format YYYYMM. + :param to_: Optional latest date bound for the search + in the format YYYYMM. + :return: HTTP Response object + """ + params = {'q': query_string, 'g': domain} + if from_: + params['from'] = from_ + if to_: + params['to'] = to_ + + try: + response = self._get(op='/search/fulltext/seed', params=params) + return self.waitForResponse(response) + + except HTTPError as e: + self._display_http_error(e) + + def fulltext_search_within_url(self, query_string, url, pagesize=10, from_=None, to_=None): + """ + Start a fulltext capture search query in the Webarchive. + + :param query_string: String to search for + :param url: Search only captures starting at this exact web address + :param from_: Optional earliest date bound for the search + in the format YYYYMM. + :param to_: Optional latest date bound for the search + in the format YYYYMM. + :return: HTTP Response object + """ + params = {'q': query_string, 'g': url, 'pagesize': pagesize} + if from_: + params['from'] = from_ + if to_: + params['to'] = to_ + + try: + response = self._get(op='/search/fulltext/capture', params=params) + return self.waitForResponse(response) + + except HTTPError as e: + self._display_http_error(e) + def wayback_search(self, query_string, from_=None, to_=None): """ Start a wayback search query in the Webarchive. -- GitLab