[mod] add flags to the languages filter

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-03-16 18:07:00 +01:00
parent 7625da9fa0
commit 2841abaf55
6 changed files with 139 additions and 78 deletions

View file

@ -2,70 +2,61 @@
# list of language codes # list of language codes
# this file is generated automatically by utils/fetch_languages.py # this file is generated automatically by utils/fetch_languages.py
language_codes = ( language_codes = (
('af-ZA', 'Afrikaans', '', 'Afrikaans'), ('ar-EG', 'العربية', '', 'Arabic', '\U0001f1ea\U0001f1ec'),
('ar-EG', 'العربية', '', 'Arabic'), ('bg-BG', 'Български', '', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
('be-BY', 'Беларуская', '', 'Belarusian'), ('ca-ES', 'Català', '', 'Catalan', '\U0001f1ea\U0001f1f8'),
('bg-BG', 'Български', '', 'Bulgarian'), ('cs-CZ', 'Čeština', '', 'Czech', '\U0001f1e8\U0001f1ff'),
('ca-ES', 'Català', '', 'Catalan'), ('da-DK', 'Dansk', '', 'Danish', '\U0001f1e9\U0001f1f0'),
('cs-CZ', 'Čeština', '', 'Czech'), ('de', 'Deutsch', '', 'German', '\U0001f1e9\U0001f1ea'),
('da-DK', 'Dansk', '', 'Danish'), ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
('de', 'Deutsch', '', 'German'), ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
('de-AT', 'Deutsch', 'Österreich', 'German'), ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
('de-CH', 'Deutsch', 'Schweiz', 'German'), ('el-GR', 'Ελληνικά', '', 'Greek', '\U0001f1ec\U0001f1f7'),
('de-DE', 'Deutsch', 'Deutschland', 'German'), ('en', 'English', '', 'English', '\U0001f1ec\U0001f1e7'),
('el-GR', 'Ελληνικά', '', 'Greek'), ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
('en', 'English', '', 'English'), ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
('en-AU', 'English', 'Australia', 'English'), ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
('en-CA', 'English', 'Canada', 'English'), ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
('en-GB', 'English', 'United Kingdom', 'English'), ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'),
('en-IE', 'English', 'Ireland', 'English'), ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
('en-MY', 'English', 'Malaysia', 'English'), ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
('en-NZ', 'English', 'New Zealand', 'English'), ('es', 'Español', '', 'Spanish', '\U0001f1ea\U0001f1f8'),
('en-US', 'English', 'United States', 'English'), ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
('es', 'Español', '', 'Spanish'), ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
('es-AR', 'Español', 'Argentina', 'Spanish'), ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
('es-CL', 'Español', 'Chile', 'Spanish'), ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
('es-ES', 'Español', 'España', 'Spanish'), ('et-EE', 'Eesti', '', 'Estonian', '\U0001f1ea\U0001f1ea'),
('es-MX', 'Español', 'México', 'Spanish'), ('fi-FI', 'Suomi', '', 'Finnish', '\U0001f1eb\U0001f1ee'),
('et-EE', 'Eesti', '', 'Estonian'), ('fr', 'Français', '', 'French', '\U0001f1eb\U0001f1f7'),
('fa-IR', 'فارسی', '', 'Persian'), ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
('fi-FI', 'Suomi', '', 'Finnish'), ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
('fr', 'Français', '', 'French'), ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
('fr-BE', 'Français', 'Belgique', 'French'), ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
('fr-CA', 'Français', 'Canada', 'French'), ('he-IL', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'),
('fr-CH', 'Français', 'Suisse', 'French'), ('hr-HR', 'Hrvatski', '', 'Croatian', '\U0001f1ed\U0001f1f7'),
('fr-FR', 'Français', 'France', 'French'), ('hu-HU', 'Magyar', '', 'Hungarian', '\U0001f1ed\U0001f1fa'),
('he-IL', 'עברית', '', 'Hebrew'), ('it-IT', 'Italiano', '', 'Italian', '\U0001f1ee\U0001f1f9'),
('hi-IN', 'हिन्दी', '', 'Hindi'), ('ja-JP', '日本語', '', 'Japanese', '\U0001f1ef\U0001f1f5'),
('hr-HR', 'Hrvatski', '', 'Croatian'), ('ko-KR', '한국어', '', 'Korean', '\U0001f1f0\U0001f1f7'),
('hu-HU', 'Magyar', '', 'Hungarian'), ('lt-LT', 'Lietuvių', '', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
('id-ID', 'Indonesia', '', 'Indonesian'), ('lv-LV', 'Latviešu', '', 'Latvian', '\U0001f1f1\U0001f1fb'),
('is-IS', 'Íslenska', '', 'Icelandic'), ('nl', 'Nederlands', '', 'Dutch', '\U0001f1f3\U0001f1f1'),
('it-IT', 'Italiano', '', 'Italian'), ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
('ja-JP', '日本語', '', 'Japanese'), ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
('ko-KR', '한국어', '', 'Korean'), ('pl-PL', 'Polski', '', 'Polish', '\U0001f1f5\U0001f1f1'),
('lt-LT', 'Lietuvių', '', 'Lithuanian'), ('pt', 'Português', '', 'Portuguese', '\U0001f1f5\U0001f1f9'),
('lv-LV', 'Latviešu', '', 'Latvian'), ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
('nl', 'Nederlands', '', 'Dutch'), ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
('nl-BE', 'Nederlands', 'België', 'Dutch'), ('ro-RO', 'Română', '', 'Romanian', '\U0001f1f7\U0001f1f4'),
('nl-NL', 'Nederlands', 'Nederland', 'Dutch'), ('ru-RU', 'Русский', '', 'Russian', '\U0001f1f7\U0001f1fa'),
('pl-PL', 'Polski', '', 'Polish'), ('sk-SK', 'Slovenčina', '', 'Slovak', '\U0001f1f8\U0001f1f0'),
('pt', 'Português', '', 'Portuguese'), ('sl-SI', 'Slovenščina', '', 'Slovenian', '\U0001f1f8\U0001f1ee'),
('pt-BR', 'Português', 'Brasil', 'Portuguese'), ('sv-SE', 'Svenska', '', 'Swedish', '\U0001f1f8\U0001f1ea'),
('pt-PT', 'Português', 'Portugal', 'Portuguese'), ('th-TH', 'ไทย', '', 'Thai', '\U0001f1f9\U0001f1ed'),
('ro-RO', 'Română', '', 'Romanian'), ('tr-TR', 'Türkçe', '', 'Turkish', '\U0001f1f9\U0001f1f7'),
('ru-RU', 'Русский', '', 'Russian'), ('uk-UA', 'Українська', '', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
('sk-SK', 'Slovenčina', '', 'Slovak'), ('zh', '中文', '', 'Chinese', '\U0001f1e8\U0001f1f3'),
('sl-SI', 'Slovenščina', '', 'Slovenian'), ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
('sr-RS', 'Српски', '', 'Serbian'), ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
('sv-SE', 'Svenska', '', 'Swedish'), ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
('sw-TZ', 'Kiswahili', '', 'Swahili'),
('th-TH', 'ไทย', '', 'Thai'),
('tr-TR', 'Türkçe', '', 'Turkish'),
('uk-UA', 'Українська', '', 'Ukrainian'),
('vi-VN', 'Tiếng Việt', '', 'Vietnamese'),
('zh', '中文', '', 'Chinese'),
('zh-CN', '中文', '中国', 'Chinese'),
('zh-HK', '中文', '中國香港特別行政區', 'Chinese'),
('zh-TW', '中文', '台灣', 'Chinese'),
) )

View file

@ -85,7 +85,7 @@ class LanguageParser(QueryPartParser):
# check if any language-code is equal with # check if any language-code is equal with
# declared language-codes # declared language-codes
for lc in language_codes: for lc in language_codes:
lang_id, lang_name, country, english_name = map(str.lower, lc) lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
# if correct language-code is found # if correct language-code is found
# set it as new search-language # set it as new search-language
@ -128,7 +128,7 @@ class LanguageParser(QueryPartParser):
for lc in language_codes: for lc in language_codes:
if lc[0] not in settings['search']['languages']: if lc[0] not in settings['search']['languages']:
continue continue
lang_id, lang_name, country, english_name = map(str.lower, lc) lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
# check if query starts with language-id # check if query starts with language-id
if lang_id.startswith(value): if lang_id.startswith(value):

View file

@ -2,7 +2,7 @@
<label class="visually-hidden" for="language">{{ _('Language') }}</label> <label class="visually-hidden" for="language">{{ _('Language') }}</label>
<select class="language form-control {{ custom_select_class(rtl) }}" id="language" name="language" accesskey="l"> <select class="language form-control {{ custom_select_class(rtl) }}" id="language" name="language" accesskey="l">
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%} {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
{{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}}
</option> </option>

View file

@ -1,8 +1,8 @@
<select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}} <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%} {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
{{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id -}} {% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}
</option> </option>
{%- endfor -%} {%- endfor -%}
</select> </select>

View file

@ -116,8 +116,8 @@
<p class="value">{{- '' -}} <p class="value">{{- '' -}}
<select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}} <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%} {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option>
{%- endfor -%} {%- endfor -%}
</select>{{- '' -}} </select>{{- '' -}}
</p> </p>

View file

@ -12,12 +12,13 @@ Output files: :origin:`searx/data/engines_languages.json` and
""" """
# pylint: disable=invalid-name # pylint: disable=invalid-name
from unicodedata import lookup
import json import json
from pathlib import Path from pathlib import Path
from pprint import pformat from pprint import pformat
from babel import Locale, UnknownLocaleError from babel import Locale, UnknownLocaleError
from babel.languages import get_global from babel.languages import get_global
from babel.core import parse_locale
from searx import settings, searx_dir from searx import settings, searx_dir
from searx.engines import load_engines, engines from searx.engines import load_engines, engines
@ -61,6 +62,62 @@ def get_locale(lang_code):
return None return None
lang2emoji = {
'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger
'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina
'jp': '\U0001F1EF\U0001F1F5', # Japanese
'ua': '\U0001F1FA\U0001F1E6', # Ukrainian
'he': '\U0001F1EE\U0001F1F7', # Hebrew
'zh': '\U0001F1E8\U0001F1F3', # China (zh)
}
def get_unicode_flag(lang_code):
"""Determine a unicode flag (emoji) that fits to the ``lang_code``"""
emoji = lang2emoji.get(lang_code.lower())
if emoji:
return emoji
if len(lang_code) == 2:
l_code = lang_code.lower()
c_code = lang_code.upper()
if c_code == 'EN':
c_code = 'GB'
lang_code = "%s-%s" % (l_code, c_code)
language = territory = script = variant = ''
try:
language, territory, script, variant = parse_locale(lang_code, '-')
except ValueError as exc:
print(exc)
# https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
if not territory:
# https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
emoji = lang2emoji.get(language)
if not emoji:
print(
"%s --> language: %s / territory: %s / script: %s / variant: %s"
% (lang_code, language, territory, script, variant)
)
return emoji
emoji = lang2emoji.get(territory.lower())
if emoji:
return emoji
try:
c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
# print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
except KeyError as exc:
print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
return None
return c1 + c2
# Join all language lists. # Join all language lists.
def join_language_lists(engines_languages): def join_language_lists(engines_languages):
language_list = {} language_list = {}
@ -113,7 +170,10 @@ def join_language_lists(engines_languages):
print("ERROR: %s --> %s" % (locale, exc)) print("ERROR: %s --> %s" % (locale, exc))
locale = None locale = None
language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()} language_list[short_code]['countries'][lang_code] = {
'country_name': country_name,
'counter': set(),
}
# count engine for both language_country combination and language alone # count engine for both language_country combination and language alone
language_list[short_code]['counter'].add(engine_name) language_list[short_code]['counter'].add(engine_name)
@ -167,7 +227,7 @@ def filter_language_list(all_languages):
# add language without countries too if there's more than one country to choose from # add language without countries too if there's more than one country to choose from
if len(filtered_countries) > 1: if len(filtered_countries) > 1:
filtered_countries[lang] = _copy_lang_data(lang) filtered_countries[lang] = _copy_lang_data(lang, None)
elif len(filtered_countries) == 1: elif len(filtered_countries) == 1:
# if there's only one country per language, it's not necessary to show country name # if there's only one country per language, it's not necessary to show country name
lang_country = next(iter(filtered_countries)) lang_country = next(iter(filtered_countries))
@ -183,15 +243,22 @@ def filter_language_list(all_languages):
lang_country = "{lang}-{country}".format(lang=lang, country=country_code) lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
if lang_country: if lang_country:
filtered_countries[lang_country] = _copy_lang_data(lang) filtered_countries[lang_country] = _copy_lang_data(lang, None)
else: else:
filtered_countries[lang] = _copy_lang_data(lang) filtered_countries[lang] = _copy_lang_data(lang, None)
filtered_languages_with_countries.update(filtered_countries) filtered_languages_with_countries.update(filtered_countries)
return filtered_languages_with_countries return filtered_languages_with_countries
class UnicodeEscape(str):
"""Escape unicode string in :py:obj:`pprint.pformat`"""
def __repr__(self):
return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
# Write languages.py. # Write languages.py.
def write_languages_file(languages): def write_languages_file(languages):
file_headers = ( file_headers = (
@ -209,11 +276,14 @@ def write_languages_file(languages):
if name is None: if name is None:
print("ERROR: languages['%s'] --> %s" % (code, languages[code])) print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
continue continue
flag = get_unicode_flag(code) or ''
item = ( item = (
code, code,
languages[code]['name'].split(' (')[0], languages[code]['name'].split(' (')[0],
languages[code].get('country_name') or '', languages[code].get('country_name') or '',
languages[code].get('english_name') or '', languages[code].get('english_name') or '',
UnicodeEscape(flag),
) )
language_codes.append(item) language_codes.append(item)