diff --git a/webarchiv.py b/webarchiv.py index ea466bac4a2b695a197260892f95d9a8fef22de2..189c7b173af8658a20597227ff2ef89eb0977488 100644 --- a/webarchiv.py +++ b/webarchiv.py @@ -149,6 +149,56 @@ class WebarchivSession: self._display_http_error(e) print('Query for "{}" not added'.format(query_string)) + def fulltext_search_within_domain(self, query_string, domain, from_=None, to_=None): + """ + Start a fulltext seed search query in the Webarchive. + + :param query_string: String to search for + :param domain: Search only within this domain name + :param from_: Optional earliest date bound for the search + in the format YYYYMM. + :param to_: Optional latest date bound for the search + in the format YYYYMM. + :return: HTTP Response object + """ + params = {'q': query_string, 'g': domain} + if from_: + params['from'] = from_ + if to_: + params['to'] = to_ + + try: + response = self._get(op='/search/fulltext/seed', params=params) + return self.waitForResponse(response) + + except HTTPError as e: + self._display_http_error(e) + + def fulltext_search_within_url(self, query_string, url, pagesize=10, from_=None, to_=None): + """ + Start a fulltext capture search query in the Webarchive. + + :param query_string: String to search for + :param url: Search only captures starting at this exact web address + :param from_: Optional earliest date bound for the search + in the format YYYYMM. + :param to_: Optional latest date bound for the search + in the format YYYYMM. + :return: HTTP Response object + """ + params = {'q': query_string, 'g': url, 'pagesize': pagesize} + if from_: + params['from'] = from_ + if to_: + params['to'] = to_ + + try: + response = self._get(op='/search/fulltext/capture', params=params) + return self.waitForResponse(response) + + except HTTPError as e: + self._display_http_error(e) + def wayback_search(self, query_string, from_=None, to_=None): """ Start a wayback search query in the Webarchive.