From 0cedb1c6d8d38c911176cab954d858fe937cef71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9on=20Tiek=C3=B6tter?= Date: Mon, 21 Nov 2022 23:55:04 +0100 Subject: [PATCH] Add search.suspended_times settings Make suspended_time changeable in settings.yml Allow different values to be set for different exceptions. Co-authored-by: Alexandre Flament --- searx/exceptions.py | 18 ++++++++++-- searx/network/raise_for_httperror.py | 14 ++++++++-- searx/settings.yml | 14 ++++++++++ searx/settings_defaults.py | 8 ++++++ tests/unit/test_exceptions.py | 41 ++++++++++++++++++++++++++++ 5 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 tests/unit/test_exceptions.py diff --git a/searx/exceptions.py b/searx/exceptions.py index 43c8bab40..af81bfb23 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -69,11 +69,19 @@ class SearxEngineAPIException(SearxEngineResponseException): class SearxEngineAccessDeniedException(SearxEngineResponseException): """The website is blocking the access""" - def __init__(self, suspended_time=24 * 3600, message='Access denied'): + SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied" + + def __init__(self, suspended_time=None, message='Access denied'): + suspended_time = suspended_time or self._get_default_suspended_time() super().__init__(message + ', suspended_time=' + str(suspended_time)) self.suspended_time = suspended_time self.message = message + def _get_default_suspended_time(self): + from searx import get_setting + + return get_setting(self.SUSPEND_TIME_SETTING) + class SearxEngineCaptchaException(SearxEngineAccessDeniedException): """The website has returned a CAPTCHA @@ -81,7 +89,9 @@ class SearxEngineCaptchaException(SearxEngineAccessDeniedException): By default, searx stops sending requests to this engine for 1 day. """ - def __init__(self, suspended_time=24 * 3600, message='CAPTCHA'): + SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha" + + def __init__(self, suspended_time=None, message='CAPTCHA'): super().__init__(message=message, suspended_time=suspended_time) @@ -91,7 +101,9 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException): By default, searx stops sending requests to this engine for 1 hour. """ - def __init__(self, suspended_time=3600, message='Too many request'): + SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests" + + def __init__(self, suspended_time=None, message='Too many request'): super().__init__(message=message, suspended_time=suspended_time) diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py index 414074977..7fc2b7877 100644 --- a/searx/network/raise_for_httperror.py +++ b/searx/network/raise_for_httperror.py @@ -9,6 +9,7 @@ from searx.exceptions import ( SearxEngineTooManyRequestsException, SearxEngineAccessDeniedException, ) +from searx import get_setting def is_cloudflare_challenge(resp): @@ -33,15 +34,22 @@ def raise_for_cloudflare_captcha(resp): if is_cloudflare_challenge(resp): # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha- # suspend for 2 weeks - raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15) + raise SearxEngineCaptchaException( + message='Cloudflare CAPTCHA', suspended_time=get_setting('search.suspended_times.cf_SearxEngineCaptcha') + ) if is_cloudflare_firewall(resp): - raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24) + raise SearxEngineAccessDeniedException( + message='Cloudflare Firewall', + suspended_time=get_setting('search.suspended_times.cf_SearxEngineAccessDenied'), + ) def raise_for_recaptcha(resp): if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text: - raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7) + raise SearxEngineCaptchaException( + message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha') + ) def raise_for_captcha(resp): diff --git a/searx/settings.yml b/searx/settings.yml index 2226a12de..565cffe97 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -45,6 +45,20 @@ search: ban_time_on_fail: 5 # max ban time in seconds after engine errors max_ban_time_on_fail: 120 + suspend_times: + # Engine suspension time after error (in seconds; set to 0 to disable) + # For error "Access denied" and "HTTP error [402, 403]" + SearxEngineAccessDenied: 86400 + # For error "CAPTCHA" + SearxEngineCaptcha: 86400 + # For error "Too many request" and "HTTP error 429" + SearxEngineTooManyRequests: 3600 + # Cloudflare CAPTCHA + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + # ReCAPTCHA + recaptcha_SearxEngineCaptcha: 604800 + # remove format to deny access, use lower case. # formats: [html, csv, json, rss] formats: diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index cfa1bb47c..7baa23cac 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -160,6 +160,14 @@ SCHEMA = { 'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES), 'ban_time_on_fail': SettingsValue(numbers.Real, 5), 'max_ban_time_on_fail': SettingsValue(numbers.Real, 120), + 'suspended_times': { + 'SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400), + 'SearxEngineCaptcha': SettingsValue(numbers.Real, 86400), + 'SearxEngineTooManyRequests': SettingsValue(numbers.Real, 3600), + 'cf_SearxEngineCaptcha': SettingsValue(numbers.Real, 1296000), + 'cf_SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400), + 'recaptcha_SearxEngineCaptcha': SettingsValue(numbers.Real, 604800), + }, 'formats': SettingsValue(list, OUTPUT_FORMATS), }, 'server': { diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py new file mode 100644 index 000000000..13d004322 --- /dev/null +++ b/tests/unit/test_exceptions.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +from tests import SearxTestCase +import searx.exceptions +from searx import get_setting + + +class TestExceptions(SearxTestCase): + def test_default_suspend_time(self): + with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e: + raise searx.exceptions.SearxEngineAccessDeniedException() + self.assertEqual( + e.exception.suspended_time, + get_setting(searx.exceptions.SearxEngineAccessDeniedException.SUSPEND_TIME_SETTING), + ) + + with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e: + raise searx.exceptions.SearxEngineCaptchaException() + self.assertEqual( + e.exception.suspended_time, get_setting(searx.exceptions.SearxEngineCaptchaException.SUSPEND_TIME_SETTING) + ) + + with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e: + raise searx.exceptions.SearxEngineTooManyRequestsException() + self.assertEqual( + e.exception.suspended_time, + get_setting(searx.exceptions.SearxEngineTooManyRequestsException.SUSPEND_TIME_SETTING), + ) + + def test_custom_suspend_time(self): + with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e: + raise searx.exceptions.SearxEngineAccessDeniedException(suspended_time=1337) + self.assertEqual(e.exception.suspended_time, 1337) + + with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e: + raise searx.exceptions.SearxEngineCaptchaException(suspended_time=1409) + self.assertEqual(e.exception.suspended_time, 1409) + + with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e: + raise searx.exceptions.SearxEngineTooManyRequestsException(suspended_time=1543) + self.assertEqual(e.exception.suspended_time, 1543)