From 38431d2e142b7da6a9b48aad203f02a2eff7e6fd Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 29 May 2023 19:46:37 +0200 Subject: [PATCH] [fix] correct determination of the IP for the request For correct determination of the IP to the request the function botdetection.get_real_ip() is implemented. This fonction is used in the ip_limit and link_token method of the botdetection and it is used in the self_info plugin. A documentation about the X-Forwarded-For header has been added. [1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566211059 Signed-off-by: Markus Heiser --- searx/botdetection/__init__.py | 20 +++++++++++++++++--- searx/botdetection/ip_limit.py | 6 ++---- searx/botdetection/limiter.toml | 7 ++++++- searx/botdetection/link_token.py | 7 +++---- searx/plugins/self_info.py | 31 +++++++------------------------ tests/unit/test_plugins.py | 12 +++++++----- 6 files changed, 42 insertions(+), 41 deletions(-) diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py index b4de0f9c8..c903b0bb4 100644 --- a/searx/botdetection/__init__.py +++ b/searx/botdetection/__init__.py @@ -2,11 +2,25 @@ # lint: pylint """.. _botdetection src: -Bot detection methods ---------------------- +X-Forwarded-For +=============== -The methods implemented in this python package are use by the :ref:`limiter src`. +.. attention:: + + A correct setup of the HTTP request headers ``X-Forwarded-For`` and + ``X-Real-IP`` is essential to be able to assign a request to an IP correctly: + + - `NGINX RequestHeader`_ + - `Apache RequestHeader`_ + +.. _NGINX RequestHeader: + https://docs.searxng.org/admin/installation-nginx.html#nginx-s-searxng-site +.. _Apache RequestHeader: + https://docs.searxng.org/admin/installation-apache.html#apache-s-searxng-site + +.. autofunction:: searx.botdetection.get_real_ip """ from ._helpers import dump_request +from ._helpers import get_real_ip diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index e7fa57187..268285dd9 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -49,7 +49,7 @@ from searx import logger from searx.redislib import incr_sliding_window, drop_counter from . import link_token -from ._helpers import too_many_requests +from ._helpers import too_many_requests, get_real_ip logger = logger.getChild('botdetection.ip_limit') @@ -89,9 +89,7 @@ def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkz # pylint: disable=too-many-return-statements redis_client = redisdb.client() - client_ip = request.headers.get('X-Forwarded-For', '') - if not client_ip: - logger.error("missing HTTP header X-Forwarded-For") + client_ip = get_real_ip(request) if request.args.get('format', 'html') != 'html': c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW) diff --git a/searx/botdetection/limiter.toml b/searx/botdetection/limiter.toml index 28c4e7589..af797d32c 100644 --- a/searx/botdetection/limiter.toml +++ b/searx/botdetection/limiter.toml @@ -1,3 +1,8 @@ [botdetection.ip_limit] -link_token = false \ No newline at end of file +link_token = false + +[real_ip] + +# Number of values to trust for X-Forwarded-For. +x_for = 1 diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py index 376d06d61..a83214a33 100644 --- a/searx/botdetection/link_token.py +++ b/searx/botdetection/link_token.py @@ -43,6 +43,7 @@ import flask from searx import logger from searx import redisdb from searx.redislib import secret_hash +from ._helpers import get_real_ip TOKEN_LIVE_TIME = 600 """Livetime (sec) of limiter's CSS token.""" @@ -73,7 +74,7 @@ def is_suspicious(request: flask.Request, renew: bool = False): if not redis_client.get(ping_key): logger.warning( "missing ping (IP: %s) / request: %s", - request.headers.get('X-Forwarded-For', ''), + get_real_ip(request), ping_key, ) return True @@ -111,9 +112,7 @@ def get_ping_key(request: flask.Request): PING_KEY + "[" + secret_hash( - request.headers.get('X-Forwarded-For', '') - + request.headers.get('Accept-Language', '') - + request.headers.get('User-Agent', '') + get_real_ip(request) + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '') ) + "]" ) diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index fbe4518b5..8079ee0d4 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -1,21 +1,11 @@ -''' -searx is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-module-docstring,invalid-name -searx is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with searx. If not, see < http://www.gnu.org/licenses/ >. - -(C) 2015 by Adam Tauber, -''' -from flask_babel import gettext import re +from flask_babel import gettext + +from searx.botdetection._helpers import get_real_ip name = gettext('Self Information') description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".') @@ -28,18 +18,11 @@ query_examples = '' p = re.compile('.*user[ -]agent.*', re.IGNORECASE) -# attach callback to the post search hook -# request: flask request object -# ctx: the whole local context of the pre search hook def post_search(request, search): if search.search_query.pageno > 1: return True if search.search_query.query == 'ip': - x_forwarded_for = request.headers.getlist("X-Forwarded-For") - if x_forwarded_for: - ip = x_forwarded_for[0] - else: - ip = request.remote_addr + ip = get_real_ip(request) search.result_container.answers['ip'] = {'answer': ip} elif p.match(search.search_query.query): ua = request.user_agent diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 28df835e5..0d555fdc0 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -50,9 +50,13 @@ class SelfIPTest(SearxTestCase): self.assertTrue(len(store.plugins) == 1) # IP test - request = Mock(remote_addr='127.0.0.1') - request.headers.getlist.return_value = [] - search = get_search_mock(query='ip', pageno=1) + request = Mock() + request.remote_addr = '127.0.0.1' + request.headers = {'X-Forwarded-For': '1.2.3.4, 127.0.0.1', 'X-Real-IP': '127.0.0.1'} + search = get_search_mock( + query='ip', + pageno=1, + ) store.call(store.plugins, 'post_search', request, search) self.assertTrue('127.0.0.1' in search.result_container.answers["ip"]["answer"]) @@ -62,7 +66,6 @@ class SelfIPTest(SearxTestCase): # User agent test request = Mock(user_agent='Mock') - request.headers.getlist.return_value = [] search = get_search_mock(query='user-agent', pageno=1) store.call(store.plugins, 'post_search', request, search) @@ -98,7 +101,6 @@ class HashPluginTest(SearxTestCase): self.assertTrue(len(store.plugins) == 1) request = Mock(remote_addr='127.0.0.1') - request.headers.getlist.return_value = [] # MD5 search = get_search_mock(query='md5 test', pageno=1)