From 0f43b39eac44d548143b3944a2bfa26c039b2068 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 10 Sep 2021 12:43:33 +0200 Subject: [PATCH] [enh] add hostname_replace plugin * backport of https://github.com/searx/searx/pull/2724 * allow to remove result if the replacement is the boolean value false --- docs/dev/search_api.rst | 8 ++++---- searx/plugins/__init__.py | 2 ++ searx/plugins/hostname_replace.py | 32 +++++++++++++++++++++++++++++++ searx/settings.yml | 12 +++++++++++- 4 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 searx/plugins/hostname_replace.py diff --git a/docs/dev/search_api.rst b/docs/dev/search_api.rst index 5fcdc4560..7a5f3cf98 100644 --- a/docs/dev/search_api.rst +++ b/docs/dev/search_api.rst @@ -100,17 +100,17 @@ Parameters :default: ``HTTPS_rewrite``, ``Self_Informations``, ``Search_on_category_select``, ``Tracker_URL_remover`` - :values: [ ``DOAI_rewrite``, ``HTTPS_rewrite``, ``Infinite_scroll``, + :values: ``DOAI_rewrite``, ``HTTPS_rewrite``, ``Infinite_scroll``, ``Vim-like_hotkeys``, ``Self_Informations``, ``Tracker_URL_remover``, - ``Search_on_category_select`` ] + ``Search_on_category_select``, ``Hostname_replace`` ``disabled_plugins``: optional List of disabled plugins. - :default: ``DOAI_rewrite``, ``Infinite_scroll``, ``Vim-like_hotkeys`` + :default: ``DOAI_rewrite``, ``Infinite_scroll``, ``Vim-like_hotkeys``, ``Hostname_replace`` :values: ``DOAI_rewrite``, ``HTTPS_rewrite``, ``Infinite_scroll``, ``Vim-like_hotkeys``, ``Self_Informations``, ``Tracker_URL_remover``, - ``Search_on_category_select`` + ``Search_on_category_select``, ``Hostname_replace`` ``enabled_engines`` : optional : *all* :origin:`engines ` List of enabled engines. diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 3a35f7025..1153c9ed1 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -31,6 +31,7 @@ from searx.plugins import (oa_doi_rewrite, hash_plugin, infinite_scroll, self_info, + hostname_replace, search_on_category_select, tracker_url_remover, vim_hotkeys) @@ -182,6 +183,7 @@ plugins.register(oa_doi_rewrite) plugins.register(hash_plugin) plugins.register(infinite_scroll) plugins.register(self_info) +plugins.register(hostname_replace) plugins.register(search_on_category_select) plugins.register(tracker_url_remover) plugins.register(vim_hotkeys) diff --git a/searx/plugins/hostname_replace.py b/searx/plugins/hostname_replace.py new file mode 100644 index 000000000..778b84615 --- /dev/null +++ b/searx/plugins/hostname_replace.py @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later + +import re +from urllib.parse import urlunparse +from searx import settings +from searx.plugins import logger +from flask_babel import gettext + +name = gettext('Hostname replace') +description = gettext('Rewrite result hostnames or remove results based on the hostname') +default_on = False +preference_section = 'general' + +plugin_id = 'hostname_replace' + +replacements = {re.compile(p): r for (p, r) in settings[plugin_id].items()} if plugin_id in settings else {} + +logger = logger.getChild(plugin_id) +parsed = 'parsed_url' + + +def on_result(request, search, result): + if parsed not in result: + return True + for (pattern, replacement) in replacements.items(): + if pattern.search(result[parsed].netloc): + if not replacement: + return False + result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc)) + result['url'] = urlunparse(result[parsed]) + + return True diff --git a/searx/settings.yml b/searx/settings.yml index e5eb9dd65..8e2aeb5e9 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -150,7 +150,17 @@ outgoing: # # enabled_plugins: # - "HTTPS rewrite" -# - ... +# - "Hostname replace" # see configuration below + +# "Hostname replace" plugin configuration example: +# hostname_replace: +# '(.*\.)?youtube\.com$': 'invidious.example.com' +# '(.*\.)?youtu\.be$': 'invidious.example.com' +# '(.*\.)?youtube-noocookie\.com$': 'yotter.example.com' +# '(.*\.)?reddit\.com$': 'teddit.example.com' +# '(.*\.)?redd\.it$': 'teddit.example.com' +# '(www\.)?twitter\.com$': 'nitter.example.com' +# 'spam\.example\.com': false # remove results from spam.example.com checker: # disable checker when in debug mode