From 2274d55d5a4dea76b645e3495673545fea0fe529 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 13 Nov 2023 19:12:50 +0100 Subject: [PATCH] [mod] add option max_page Related: https://github.com/searxng/searxng/issues/2982 Closes: https://github.com/searxng/searxng/issues/2972 Signed-off-by: Markus Heiser --- searx/engines/__init__.py | 1 + searx/engines/google.py | 1 + searx/engines/google_images.py | 1 + searx/engines/google_scholar.py | 1 + searx/engines/google_videos.py | 1 + searx/search/processors/abstract.py | 5 +++++ searx/settings.yml | 1 + searx/settings_defaults.py | 1 + 8 files changed, 12 insertions(+) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index da2b2037e..0bea37ca8 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -45,6 +45,7 @@ ENGINE_DEFAULT_ARGS = { "using_tor_proxy": False, "send_accept_language_header": False, "tokens": [], + "max_page": 0, } # set automatically when an engine does not have any tab category DEFAULT_CATEGORY = 'other' diff --git a/searx/engines/google.py b/searx/engines/google.py index 51c6acbf2..90b58e270 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -48,6 +48,7 @@ about = { # engine dependent config categories = ['general', 'web'] paging = True +max_page = 50 time_range_support = True safesearch = True diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 1f9759c96..d2d33d408 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -47,6 +47,7 @@ about = { # engine dependent config categories = ['images', 'web'] paging = True +max_page = 50 time_range_support = True safesearch = True send_accept_language_header = True diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index 6f33d1e1a..8d11c956f 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -51,6 +51,7 @@ about = { # engine dependent config categories = ['science', 'scientific publications'] paging = True +max_page = 50 language_support = True time_range_support = True safesearch = False diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index f922e1f70..0b1a51115 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -57,6 +57,7 @@ about = { categories = ['videos', 'web'] paging = True +max_page = 50 language_support = True time_range_support = True safesearch = True diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index 0cabec97a..baa031a06 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -150,6 +150,11 @@ class EngineProcessor(ABC): if search_query.pageno > 1 and not self.engine.paging: return None + # if max page is reached, skip + max_page = self.engine.max_page or settings['search']['max_page'] + if max_page and max_page < search_query.pageno: + return None + # if time_range is not supported, skip if search_query.time_range and not self.engine.time_range_support: return None diff --git a/searx/settings.yml b/searx/settings.yml index 926cddb59..727b95345 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -32,6 +32,7 @@ search: # Default search language - leave blank to detect from browser information or # use codes from 'languages.py' default_lang: "auto" + # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages # Available languages # languages: # - all diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index a0d0daa09..6a56fdd7d 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -169,6 +169,7 @@ SCHEMA = { 'recaptcha_SearxEngineCaptcha': SettingsValue(numbers.Real, 604800), }, 'formats': SettingsValue(list, OUTPUT_FORMATS), + 'max_page': SettingsValue(int, 0), }, 'server': { 'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'),