diff --git a/searx/languages.py b/searx/languages.py index e83f3b878..2c50a0af0 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -2,70 +2,70 @@ # list of language codes # this file is generated automatically by utils/fetch_languages.py language_codes = ( - ('af-ZA', 'Afrikaans', '', 'Afrikaans'), - ('ar-EG', 'العربية', '', 'Arabic'), - ('be-BY', 'Беларуская', '', 'Belarusian'), - ('bg-BG', 'Български', '', 'Bulgarian'), - ('ca-ES', 'Català', '', 'Catalan'), - ('cs-CZ', 'Čeština', '', 'Czech'), - ('da-DK', 'Dansk', '', 'Danish'), - ('de', 'Deutsch', '', 'German'), - ('de-AT', 'Deutsch', 'Österreich', 'German'), - ('de-CH', 'Deutsch', 'Schweiz', 'German'), - ('de-DE', 'Deutsch', 'Deutschland', 'German'), - ('el-GR', 'Ελληνικά', '', 'Greek'), - ('en', 'English', '', 'English'), - ('en-AU', 'English', 'Australia', 'English'), - ('en-CA', 'English', 'Canada', 'English'), - ('en-GB', 'English', 'United Kingdom', 'English'), - ('en-IE', 'English', 'Ireland', 'English'), - ('en-MY', 'English', 'Malaysia', 'English'), - ('en-NZ', 'English', 'New Zealand', 'English'), - ('en-US', 'English', 'United States', 'English'), - ('es', 'Español', '', 'Spanish'), - ('es-AR', 'Español', 'Argentina', 'Spanish'), - ('es-CL', 'Español', 'Chile', 'Spanish'), - ('es-ES', 'Español', 'España', 'Spanish'), - ('es-MX', 'Español', 'México', 'Spanish'), - ('et-EE', 'Eesti', '', 'Estonian'), - ('fa-IR', 'فارسی', '', 'Persian'), - ('fi-FI', 'Suomi', '', 'Finnish'), - ('fr', 'Français', '', 'French'), - ('fr-BE', 'Français', 'Belgique', 'French'), - ('fr-CA', 'Français', 'Canada', 'French'), - ('fr-CH', 'Français', 'Suisse', 'French'), - ('fr-FR', 'Français', 'France', 'French'), - ('he-IL', 'עברית', '', 'Hebrew'), - ('hi-IN', 'हिन्दी', '', 'Hindi'), - ('hr-HR', 'Hrvatski', '', 'Croatian'), - ('hu-HU', 'Magyar', '', 'Hungarian'), - ('id-ID', 'Indonesia', '', 'Indonesian'), - ('is-IS', 'Íslenska', '', 'Icelandic'), - ('it-IT', 'Italiano', '', 'Italian'), - ('ja-JP', '日本語', '', 'Japanese'), - ('ko-KR', '한국어', '', 'Korean'), - ('lt-LT', 'Lietuvių', '', 'Lithuanian'), - ('lv-LV', 'Latviešu', '', 'Latvian'), - ('nl', 'Nederlands', '', 'Dutch'), - ('nl-BE', 'Nederlands', 'België', 'Dutch'), - ('nl-NL', 'Nederlands', 'Nederland', 'Dutch'), - ('pl-PL', 'Polski', '', 'Polish'), - ('pt', 'Português', '', 'Portuguese'), - ('pt-BR', 'Português', 'Brasil', 'Portuguese'), - ('pt-PT', 'Português', 'Portugal', 'Portuguese'), - ('ro-RO', 'Română', '', 'Romanian'), - ('ru-RU', 'Русский', '', 'Russian'), - ('sk-SK', 'Slovenčina', '', 'Slovak'), - ('sl-SI', 'Slovenščina', '', 'Slovenian'), - ('sr-RS', 'Српски', '', 'Serbian'), - ('sv-SE', 'Svenska', '', 'Swedish'), - ('sw-TZ', 'Kiswahili', '', 'Swahili'), - ('th-TH', 'ไทย', '', 'Thai'), - ('tr-TR', 'Türkçe', '', 'Turkish'), - ('uk-UA', 'Українська', '', 'Ukrainian'), - ('vi-VN', 'Tiếng Việt', '', 'Vietnamese'), - ('zh', '中文', '', 'Chinese'), - ('zh-CN', '中文', '中国', 'Chinese'), - ('zh-HK', '中文', '中國香港特別行政區', 'Chinese'), - ('zh-TW', '中文', '台灣', 'Chinese'), + ('af-ZA', 'Afrikaans', '', 'Afrikaans', '\U0001f1ff\U0001f1e6'), + ('ar-EG', 'العربية', '', 'Arabic', '\U0001f1ea\U0001f1ec'), + ('be-BY', 'Беларуская', '', 'Belarusian', '\U0001f1e7\U0001f1fe'), + ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'), + ('ca-ES', 'Català', '', 'Catalan', '\U0001f1ea\U0001f1f8'), + ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'), + ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'), + ('de', 'Deutsch', '', 'German', '\U0001f310'), + ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'), + ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'), + ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'), + ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'), + ('en', 'English', '', 'English', '\U0001f310'), + ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'), + ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'), + ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'), + ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'), + ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'), + ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'), + ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'), + ('es', 'Español', '', 'Spanish', '\U0001f310'), + ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'), + ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'), + ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'), + ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'), + ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'), + ('fa-IR', 'فارسی', '', 'Persian', '\U0001f1ee\U0001f1f7'), + ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'), + ('fr', 'Français', '', 'French', '\U0001f310'), + ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'), + ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'), + ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'), + ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'), + ('he-IL', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'), + ('hi-IN', 'हिन्दी', '', 'Hindi', '\U0001f1ee\U0001f1f3'), + ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'), + ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'), + ('id-ID', 'Indonesia', '', 'Indonesian', '\U0001f1ee\U0001f1e9'), + ('is-IS', 'Íslenska', '', 'Icelandic', '\U0001f1ee\U0001f1f8'), + ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'), + ('ja-JP', '日本語', '', 'Japanese', '\U0001f1ef\U0001f1f5'), + ('ko-KR', '한국어', '', 'Korean', '\U0001f1f0\U0001f1f7'), + ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'), + ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'), + ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'), + ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'), + ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'), + ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'), + ('pt', 'Português', '', 'Portuguese', '\U0001f310'), + ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'), + ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'), + ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'), + ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'), + ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'), + ('sl-SI', 'Slovenščina', '', 'Slovenian', '\U0001f1f8\U0001f1ee'), + ('sr-RS', 'Српски', '', 'Serbian', '\U0001f1f7\U0001f1f8'), + ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'), + ('sw-TZ', 'Kiswahili', '', 'Swahili', '\U0001f1f9\U0001f1ff'), + ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'), + ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'), + ('uk-UA', 'Українська', '', 'Ukrainian', '\U0001f1fa\U0001f1e6'), + ('vi-VN', 'Tiếng Việt', '', 'Vietnamese', '\U0001f1fb\U0001f1f3'), + ('zh', '中文', '', 'Chinese', '\U0001f310'), + ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'), + ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'), + ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'), ) diff --git a/searx/query.py b/searx/query.py index f5f628823..9a1398771 100644 --- a/searx/query.py +++ b/searx/query.py @@ -85,7 +85,7 @@ class LanguageParser(QueryPartParser): # check if any language-code is equal with # declared language-codes for lc in language_codes: - lang_id, lang_name, country, english_name = map(str.lower, lc) + lang_id, lang_name, country, english_name, _flag = map(str.lower, lc) # if correct language-code is found # set it as new search-language @@ -128,7 +128,7 @@ class LanguageParser(QueryPartParser): for lc in language_codes: if lc[0] not in settings['search']['languages']: continue - lang_id, lang_name, country, english_name = map(str.lower, lc) + lang_id, lang_name, country, english_name, _flag = map(str.lower, lc) # check if query starts with language-id if lang_id.startswith(value): diff --git a/searx/templates/oscar/languages.html b/searx/templates/oscar/languages.html index 0846caa96..c0ecb440e 100644 --- a/searx/templates/oscar/languages.html +++ b/searx/templates/oscar/languages.html @@ -2,7 +2,7 @@ {{- '' -}} - {%- for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) -%} + {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%} {%- endfor -%} diff --git a/searx/templates/simple/preferences.html b/searx/templates/simple/preferences.html index 2ef83757b..fef003d5e 100644 --- a/searx/templates/simple/preferences.html +++ b/searx/templates/simple/preferences.html @@ -116,8 +116,8 @@

{{- '' -}} {{- '' -}}

diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index 754180c47..92083f39f 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -12,12 +12,13 @@ Output files: :origin:`searx/data/engines_languages.json` and """ # pylint: disable=invalid-name - +from unicodedata import lookup import json from pathlib import Path from pprint import pformat from babel import Locale, UnknownLocaleError from babel.languages import get_global +from babel.core import parse_locale from searx import settings, searx_dir from searx.engines import load_engines, engines @@ -61,6 +62,57 @@ def get_locale(lang_code): return None +lang2emoji = { + 'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger + 'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina + 'jp': '\U0001F1EF\U0001F1F5', # Japanese + 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian + 'he': '\U0001F1EE\U0001F1F7', # Hebrew +} + + +def get_unicode_flag(lang_code): + """Determine a unicode flag (emoji) that fits to the ``lang_code``""" + + emoji = lang2emoji.get(lang_code.lower()) + if emoji: + return emoji + + if len(lang_code) == 2: + return '\U0001F310' + + language = territory = script = variant = '' + try: + language, territory, script, variant = parse_locale(lang_code, '-') + except ValueError as exc: + print(exc) + + # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 + if not territory: + # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag + emoji = lang2emoji.get(language) + if not emoji: + print( + "%s --> language: %s / territory: %s / script: %s / variant: %s" + % (lang_code, language, territory, script, variant) + ) + return emoji + + emoji = lang2emoji.get(territory.lower()) + if emoji: + return emoji + + try: + c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0]) + c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1]) + # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 )) + except KeyError as exc: + print("%s --> territory: %s --> %s" % (lang_code, territory, exc)) + return None + + return c1 + c2 + + # Join all language lists. def join_language_lists(engines_languages): language_list = {} @@ -113,7 +165,10 @@ def join_language_lists(engines_languages): print("ERROR: %s --> %s" % (locale, exc)) locale = None - language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()} + language_list[short_code]['countries'][lang_code] = { + 'country_name': country_name, + 'counter': set(), + } # count engine for both language_country combination and language alone language_list[short_code]['counter'].add(engine_name) @@ -167,11 +222,9 @@ def filter_language_list(all_languages): # add language without countries too if there's more than one country to choose from if len(filtered_countries) > 1: - filtered_countries[lang] = _copy_lang_data(lang) + filtered_countries[lang] = _copy_lang_data(lang, None) elif len(filtered_countries) == 1: - # if there's only one country per language, it's not necessary to show country name lang_country = next(iter(filtered_countries)) - filtered_countries[lang_country]['country_name'] = None # if no country has enough engines try to get most likely country code from babel if not filtered_countries: @@ -183,15 +236,22 @@ def filter_language_list(all_languages): lang_country = "{lang}-{country}".format(lang=lang, country=country_code) if lang_country: - filtered_countries[lang_country] = _copy_lang_data(lang) + filtered_countries[lang_country] = _copy_lang_data(lang, None) else: - filtered_countries[lang] = _copy_lang_data(lang) + filtered_countries[lang] = _copy_lang_data(lang, None) filtered_languages_with_countries.update(filtered_countries) return filtered_languages_with_countries +class UnicodeEscape(str): + """Escape unicode string in :py:obj:`pprint.pformat`""" + + def __repr__(self): + return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'" + + # Write languages.py. def write_languages_file(languages): file_headers = ( @@ -209,11 +269,14 @@ def write_languages_file(languages): if name is None: print("ERROR: languages['%s'] --> %s" % (code, languages[code])) continue + + flag = get_unicode_flag(code) or '' item = ( code, languages[code]['name'].split(' (')[0], languages[code].get('country_name') or '', languages[code].get('english_name') or '', + UnicodeEscape(flag), ) language_codes.append(item)