From dd0887be186d208846cdc7c3df13dde020dfa957 Mon Sep 17 00:00:00 2001 From: Alexandre FLAMENT Date: Fri, 2 Sep 2022 07:33:20 +0000 Subject: [PATCH] xpath engine: change raise_for_httperror to no_result_for_http_status no_result_for_http_status contains a list of HTTP status. These HTTP status are seen an empty result list. In other cases an exception is thrown as usual. Previously raise_for_httperror were ignoring all HTTP error, which make defective engines invisible in the stats. --- searx/engines/xpath.py | 23 ++++++++++++++++++----- searx/settings.yml | 2 +- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 97656705a..f9528e92d 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -22,6 +22,7 @@ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list +from searx.network import raise_for_httperror search_url = None """ @@ -60,9 +61,14 @@ lang_all = 'en' '''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is selected. ''' -raise_for_httperror = True -'''True by default: raise an exception if the HTTP code of response is ``>= -300``''' + +no_result_for_http_status = [] +'''Return empty result for these HTTP status codes instead of throwing an error. + +.. code:: yaml + + no_result_for_http_status: [] +''' soft_max_redirects = 0 '''Maximum redirects, soft limit. Record an error but don't stop the engine''' @@ -179,12 +185,19 @@ def request(query, params): params['url'] = search_url.format(**fargs) params['soft_max_redirects'] = soft_max_redirects - params['raise_for_httperror'] = raise_for_httperror + + params['raise_for_httperror'] = False + return params -def response(resp): +def response(resp): # pylint: disable=too-many-branches '''Scrap *results* from the response (see :ref:`engine results`).''' + if no_result_for_http_status and resp.status_code in no_result_for_http_status: + return [] + + raise_for_httperror(resp) + results = [] dom = html.fromstring(resp.text) is_onion = 'onions' in categories diff --git a/searx/settings.yml b/searx/settings.yml index f83c104fd..5587ed51a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1808,7 +1808,7 @@ engines: url_xpath: //div[@class="upper-synonyms"]/a/@href content_xpath: //div[@class="synonyms-list-group"] title_xpath: //div[@class="upper-synonyms"]/a - raise_for_httperror: false + no_result_for_http_status: [404] about: website: https://www.woxikon.de/ wikidata_id: # No Wikidata ID