Merge pull request #967 from return42/language-filter

[mod] add flags to the languages filter
This commit is contained in:
Alexandre Flament 2022-03-28 21:36:20 +02:00 committed by GitHub
commit 0379856712
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 143 additions and 80 deletions

View file

@ -2,70 +2,70 @@
# list of language codes # list of language codes
# this file is generated automatically by utils/fetch_languages.py # this file is generated automatically by utils/fetch_languages.py
language_codes = ( language_codes = (
('af-ZA', 'Afrikaans', '', 'Afrikaans'), ('af-ZA', 'Afrikaans', '', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
('ar-EG', 'العربية', '', 'Arabic'), ('ar-EG', 'العربية', '', 'Arabic', '\U0001f1ea\U0001f1ec'),
('be-BY', 'Беларуская', '', 'Belarusian'), ('be-BY', 'Беларуская', '', 'Belarusian', '\U0001f1e7\U0001f1fe'),
('bg-BG', 'Български', '', 'Bulgarian'), ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
('ca-ES', 'Català', '', 'Catalan'), ('ca-ES', 'Català', '', 'Catalan', '\U0001f1ea\U0001f1f8'),
('cs-CZ', 'Čeština', '', 'Czech'), ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
('da-DK', 'Dansk', '', 'Danish'), ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
('de', 'Deutsch', '', 'German'), ('de', 'Deutsch', '', 'German', '\U0001f310'),
('de-AT', 'Deutsch', 'Österreich', 'German'), ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
('de-CH', 'Deutsch', 'Schweiz', 'German'), ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
('de-DE', 'Deutsch', 'Deutschland', 'German'), ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
('el-GR', 'Ελληνικά', '', 'Greek'), ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
('en', 'English', '', 'English'), ('en', 'English', '', 'English', '\U0001f310'),
('en-AU', 'English', 'Australia', 'English'), ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
('en-CA', 'English', 'Canada', 'English'), ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
('en-GB', 'English', 'United Kingdom', 'English'), ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
('en-IE', 'English', 'Ireland', 'English'), ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
('en-MY', 'English', 'Malaysia', 'English'), ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'),
('en-NZ', 'English', 'New Zealand', 'English'), ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
('en-US', 'English', 'United States', 'English'), ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
('es', 'Español', '', 'Spanish'), ('es', 'Español', '', 'Spanish', '\U0001f310'),
('es-AR', 'Español', 'Argentina', 'Spanish'), ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
('es-CL', 'Español', 'Chile', 'Spanish'), ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
('es-ES', 'Español', 'España', 'Spanish'), ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
('es-MX', 'Español', 'México', 'Spanish'), ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
('et-EE', 'Eesti', '', 'Estonian'), ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
('fa-IR', 'فارسی', '', 'Persian'), ('fa-IR', 'فارسی', '', 'Persian', '\U0001f1ee\U0001f1f7'),
('fi-FI', 'Suomi', '', 'Finnish'), ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
('fr', 'Français', '', 'French'), ('fr', 'Français', '', 'French', '\U0001f310'),
('fr-BE', 'Français', 'Belgique', 'French'), ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
('fr-CA', 'Français', 'Canada', 'French'), ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
('fr-CH', 'Français', 'Suisse', 'French'), ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
('fr-FR', 'Français', 'France', 'French'), ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
('he-IL', 'עברית', '', 'Hebrew'), ('he-IL', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'),
('hi-IN', 'हिन्दी', '', 'Hindi'), ('hi-IN', 'हिन्दी', '', 'Hindi', '\U0001f1ee\U0001f1f3'),
('hr-HR', 'Hrvatski', '', 'Croatian'), ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'),
('hu-HU', 'Magyar', '', 'Hungarian'), ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
('id-ID', 'Indonesia', '', 'Indonesian'), ('id-ID', 'Indonesia', '', 'Indonesian', '\U0001f1ee\U0001f1e9'),
('is-IS', 'Íslenska', '', 'Icelandic'), ('is-IS', 'Íslenska', '', 'Icelandic', '\U0001f1ee\U0001f1f8'),
('it-IT', 'Italiano', '', 'Italian'), ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
('ja-JP', '日本語', '', 'Japanese'), ('ja-JP', '日本語', '', 'Japanese', '\U0001f1ef\U0001f1f5'),
('ko-KR', '한국어', '', 'Korean'), ('ko-KR', '한국어', '', 'Korean', '\U0001f1f0\U0001f1f7'),
('lt-LT', 'Lietuvių', '', 'Lithuanian'), ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
('lv-LV', 'Latviešu', '', 'Latvian'), ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'),
('nl', 'Nederlands', '', 'Dutch'), ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
('nl-BE', 'Nederlands', 'België', 'Dutch'), ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
('nl-NL', 'Nederlands', 'Nederland', 'Dutch'), ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
('pl-PL', 'Polski', '', 'Polish'), ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'),
('pt', 'Português', '', 'Portuguese'), ('pt', 'Português', '', 'Portuguese', '\U0001f310'),
('pt-BR', 'Português', 'Brasil', 'Portuguese'), ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
('pt-PT', 'Português', 'Portugal', 'Portuguese'), ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
('ro-RO', 'Română', '', 'Romanian'), ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
('ru-RU', 'Русский', '', 'Russian'), ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
('sk-SK', 'Slovenčina', '', 'Slovak'), ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'),
('sl-SI', 'Slovenščina', '', 'Slovenian'), ('sl-SI', 'Slovenščina', '', 'Slovenian', '\U0001f1f8\U0001f1ee'),
('sr-RS', 'Српски', '', 'Serbian'), ('sr-RS', 'Српски', '', 'Serbian', '\U0001f1f7\U0001f1f8'),
('sv-SE', 'Svenska', '', 'Swedish'), ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
('sw-TZ', 'Kiswahili', '', 'Swahili'), ('sw-TZ', 'Kiswahili', '', 'Swahili', '\U0001f1f9\U0001f1ff'),
('th-TH', 'ไทย', '', 'Thai'), ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
('tr-TR', 'Türkçe', '', 'Turkish'), ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
('uk-UA', 'Українська', '', 'Ukrainian'), ('uk-UA', 'Українська', '', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
('vi-VN', 'Tiếng Việt', '', 'Vietnamese'), ('vi-VN', 'Tiếng Việt', '', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
('zh', '中文', '', 'Chinese'), ('zh', '中文', '', 'Chinese', '\U0001f310'),
('zh-CN', '中文', '中国', 'Chinese'), ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
('zh-HK', '中文', '中國香港特別行政區', 'Chinese'), ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
('zh-TW', '中文', '台灣', 'Chinese'), ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
) )

View file

@ -85,7 +85,7 @@ class LanguageParser(QueryPartParser):
# check if any language-code is equal with # check if any language-code is equal with
# declared language-codes # declared language-codes
for lc in language_codes: for lc in language_codes:
lang_id, lang_name, country, english_name = map(str.lower, lc) lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
# if correct language-code is found # if correct language-code is found
# set it as new search-language # set it as new search-language
@ -128,7 +128,7 @@ class LanguageParser(QueryPartParser):
for lc in language_codes: for lc in language_codes:
if lc[0] not in settings['search']['languages']: if lc[0] not in settings['search']['languages']:
continue continue
lang_id, lang_name, country, english_name = map(str.lower, lc) lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
# check if query starts with language-id # check if query starts with language-id
if lang_id.startswith(value): if lang_id.startswith(value):

View file

@ -2,7 +2,7 @@
<label class="visually-hidden" for="language">{{ _('Language') }}</label> <label class="visually-hidden" for="language">{{ _('Language') }}</label>
<select class="language form-control {{ custom_select_class(rtl) }}" id="language" name="language" accesskey="l"> <select class="language form-control {{ custom_select_class(rtl) }}" id="language" name="language" accesskey="l">
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%} {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
{{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
</option> </option>

View file

@ -1,8 +1,8 @@
<select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}} <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%} {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
{{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}} {% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}
</option> </option>
{%- endfor -%} {%- endfor -%}
</select> </select>

View file

@ -116,8 +116,8 @@
<p class="value">{{- '' -}} <p class="value">{{- '' -}}
<select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}} <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%} {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option>
{%- endfor -%} {%- endfor -%}
</select>{{- '' -}} </select>{{- '' -}}
</p> </p>

View file

@ -12,12 +12,13 @@ Output files: :origin:`searx/data/engines_languages.json` and
""" """
# pylint: disable=invalid-name # pylint: disable=invalid-name
from unicodedata import lookup
import json import json
from pathlib import Path from pathlib import Path
from pprint import pformat from pprint import pformat
from babel import Locale, UnknownLocaleError from babel import Locale, UnknownLocaleError
from babel.languages import get_global from babel.languages import get_global
from babel.core import parse_locale
from searx import settings, searx_dir from searx import settings, searx_dir
from searx.engines import load_engines, engines from searx.engines import load_engines, engines
@ -61,6 +62,57 @@ def get_locale(lang_code):
return None return None
lang2emoji = {
'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger
'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina
'jp': '\U0001F1EF\U0001F1F5', # Japanese
'ua': '\U0001F1FA\U0001F1E6', # Ukrainian
'he': '\U0001F1EE\U0001F1F7', # Hebrew
}
def get_unicode_flag(lang_code):
"""Determine a unicode flag (emoji) that fits to the ``lang_code``"""
emoji = lang2emoji.get(lang_code.lower())
if emoji:
return emoji
if len(lang_code) == 2:
return '\U0001F310'
language = territory = script = variant = ''
try:
language, territory, script, variant = parse_locale(lang_code, '-')
except ValueError as exc:
print(exc)
# https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
if not territory:
# https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
emoji = lang2emoji.get(language)
if not emoji:
print(
"%s --> language: %s / territory: %s / script: %s / variant: %s"
% (lang_code, language, territory, script, variant)
)
return emoji
emoji = lang2emoji.get(territory.lower())
if emoji:
return emoji
try:
c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
# print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
except KeyError as exc:
print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
return None
return c1 + c2
# Join all language lists. # Join all language lists.
def join_language_lists(engines_languages): def join_language_lists(engines_languages):
language_list = {} language_list = {}
@ -113,7 +165,10 @@ def join_language_lists(engines_languages):
print("ERROR: %s --> %s" % (locale, exc)) print("ERROR: %s --> %s" % (locale, exc))
locale = None locale = None
language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()} language_list[short_code]['countries'][lang_code] = {
'country_name': country_name,
'counter': set(),
}
# count engine for both language_country combination and language alone # count engine for both language_country combination and language alone
language_list[short_code]['counter'].add(engine_name) language_list[short_code]['counter'].add(engine_name)
@ -167,11 +222,9 @@ def filter_language_list(all_languages):
# add language without countries too if there's more than one country to choose from # add language without countries too if there's more than one country to choose from
if len(filtered_countries) > 1: if len(filtered_countries) > 1:
filtered_countries[lang] = _copy_lang_data(lang) filtered_countries[lang] = _copy_lang_data(lang, None)
elif len(filtered_countries) == 1: elif len(filtered_countries) == 1:
# if there's only one country per language, it's not necessary to show country name
lang_country = next(iter(filtered_countries)) lang_country = next(iter(filtered_countries))
filtered_countries[lang_country]['country_name'] = None
# if no country has enough engines try to get most likely country code from babel # if no country has enough engines try to get most likely country code from babel
if not filtered_countries: if not filtered_countries:
@ -183,15 +236,22 @@ def filter_language_list(all_languages):
lang_country = "{lang}-{country}".format(lang=lang, country=country_code) lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
if lang_country: if lang_country:
filtered_countries[lang_country] = _copy_lang_data(lang) filtered_countries[lang_country] = _copy_lang_data(lang, None)
else: else:
filtered_countries[lang] = _copy_lang_data(lang) filtered_countries[lang] = _copy_lang_data(lang, None)
filtered_languages_with_countries.update(filtered_countries) filtered_languages_with_countries.update(filtered_countries)
return filtered_languages_with_countries return filtered_languages_with_countries
class UnicodeEscape(str):
"""Escape unicode string in :py:obj:`pprint.pformat`"""
def __repr__(self):
return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
# Write languages.py. # Write languages.py.
def write_languages_file(languages): def write_languages_file(languages):
file_headers = ( file_headers = (
@ -209,11 +269,14 @@ def write_languages_file(languages):
if name is None: if name is None:
print("ERROR: languages['%s'] --> %s" % (code, languages[code])) print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
continue continue
flag = get_unicode_flag(code) or ''
item = ( item = (
code, code,
languages[code]['name'].split(' (')[0], languages[code]['name'].split(' (')[0],
languages[code].get('country_name') or '', languages[code].get('country_name') or '',
languages[code].get('english_name') or '', languages[code].get('english_name') or '',
UnicodeEscape(flag),
) )
language_codes.append(item) language_codes.append(item)