xpath engine: change raise_for_httperror to no_result_for_http_status

no_result_for_http_status contains a list of HTTP status.
These HTTP status are seen an empty result list.
In other cases an exception is thrown as usual.

Previously raise_for_httperror were ignoring all HTTP error,
which make defective engines invisible in the stats.
This commit is contained in:
Alexandre FLAMENT 2022-09-02 07:33:20 +00:00 committed by Markus Heiser
parent a15dfa5ee1
commit dd0887be18
2 changed files with 19 additions and 6 deletions

View file

@ -22,6 +22,7 @@ from urllib.parse import urlencode
from lxml import html
from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list
from searx.network import raise_for_httperror
search_url = None
"""
@ -60,9 +61,14 @@ lang_all = 'en'
'''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is
selected.
'''
raise_for_httperror = True
'''True by default: raise an exception if the HTTP code of response is ``>=
300``'''
no_result_for_http_status = []
'''Return empty result for these HTTP status codes instead of throwing an error.
.. code:: yaml
no_result_for_http_status: []
'''
soft_max_redirects = 0
'''Maximum redirects, soft limit. Record an error but don't stop the engine'''
@ -179,12 +185,19 @@ def request(query, params):
params['url'] = search_url.format(**fargs)
params['soft_max_redirects'] = soft_max_redirects
params['raise_for_httperror'] = raise_for_httperror
params['raise_for_httperror'] = False
return params
def response(resp):
def response(resp): # pylint: disable=too-many-branches
'''Scrap *results* from the response (see :ref:`engine results`).'''
if no_result_for_http_status and resp.status_code in no_result_for_http_status:
return []
raise_for_httperror(resp)
results = []
dom = html.fromstring(resp.text)
is_onion = 'onions' in categories

View file

@ -1808,7 +1808,7 @@ engines:
url_xpath: //div[@class="upper-synonyms"]/a/@href
content_xpath: //div[@class="synonyms-list-group"]
title_xpath: //div[@class="upper-synonyms"]/a
raise_for_httperror: false
no_result_for_http_status: [404]
about:
website: https://www.woxikon.de/
wikidata_id: # No Wikidata ID