diff --git a/requirements.txt b/requirements.txt index e503a961d..b5efb8d59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ setproctitle==1.3.2 redis==4.4.0 markdown-it-py==2.1.0 typing_extensions==4.4.0 +fasttext==0.9.2 diff --git a/searx/data/lid.176.ftz b/searx/data/lid.176.ftz new file mode 100755 index 000000000..1fb85b357 Binary files /dev/null and b/searx/data/lid.176.ftz differ diff --git a/searx/plugins/autodetect_search_language.py b/searx/plugins/autodetect_search_language.py new file mode 100644 index 000000000..3bcb80098 --- /dev/null +++ b/searx/plugins/autodetect_search_language.py @@ -0,0 +1,19 @@ +import fasttext +import os +from flask_babel import gettext + +name = gettext('Autodetect search language') +description = gettext('Automatically detect the query search language and switch to it.') +preference_section = 'general' +default_on = False + + +fasttext.FastText.eprint = lambda x: None +model = fasttext.load_model(os.path.dirname(os.path.realpath(__file__)) + '/../data/lid.176.ftz') + + +def pre_search(request, search): + lang = model.predict(search.search_query.query, k=1) + if lang[1][0] >= 0.3: + search.search_query.lang = lang[0][0].split('__label__')[1] + return True diff --git a/searx/settings.yml b/searx/settings.yml index 21793124d..9dc2199e5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -196,6 +196,7 @@ outgoing: # - 'Open Access DOI rewrite' # - 'Vim-like hotkeys' # - 'Tor check plugin' +# - 'Autodetect search language' # Configuration of the "Hostname replace" plugin: #