From ed66ed758dbd1c926296aa227da8a82fff9166e1 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 16 Feb 2024 20:46:18 +0000 Subject: [PATCH] [mod] reduce memory footprint by not calling babel.Locale.parse at runtime babel.Locale.parse loads more than 60MB in RAM. The only purpose is to get: LOCALE_NAMES - searx.data.LOCALES["LOCALE_NAMES"] RTL_LOCALES - searx.data.LOCALES["RTL_LOCALES"] This commit calls babel.Locale.parse when the translations are update from weblate and stored in:: searx/data/locales.json This file can be build by:: ./manage data.locales By store these variables in searx.data when the translations are updated we save round about 65MB (usually 4 worker = 260MB of RAM saved. Suggested-by: https://github.com/searxng/searxng/discussions/2633#discussioncomment-8490494 Co-authored-by: Markus Heiser --- docs/dev/searxng_extra/update.rst | 10 ++ docs/src/searx.locales.rst | 7 +- searx/data/__init__.py | 2 + searx/data/locales.json | 69 ++++++++++ searx/locales.py | 129 +++++++++---------- searx/sxng_locales.py | 6 +- searxng_extra/update/update_engine_traits.py | 6 +- searxng_extra/update/update_locales.py | 103 +++++++++++++++ utils/lib_sxng_data.sh | 11 ++ utils/lib_sxng_weblate.sh | 5 + 10 files changed, 269 insertions(+), 79 deletions(-) create mode 100644 searx/data/locales.json create mode 100644 searxng_extra/update/update_locales.py diff --git a/docs/dev/searxng_extra/update.rst b/docs/dev/searxng_extra/update.rst index a125303e0..dc3b06744 100644 --- a/docs/dev/searxng_extra/update.rst +++ b/docs/dev/searxng_extra/update.rst @@ -78,6 +78,16 @@ Scripts to update static data in :origin:`searx/data/` .. automodule:: searxng_extra.update.update_pygments :members: +.. _update_locales.py: + +``update_locales.py`` +===================== + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_locales + :members: + ``update_wikidata_units.py`` ============================ diff --git a/docs/src/searx.locales.rst b/docs/src/searx.locales.rst index 0de49a5e1..9882e7890 100644 --- a/docs/src/searx.locales.rst +++ b/docs/src/searx.locales.rst @@ -10,11 +10,6 @@ Locales :backlinks: entry .. automodule:: searx.locales - :members: + :members: -SearXNG's locale codes -====================== - -.. automodule:: searx.sxng_locales - :members: diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 0822f4ac8..c79d1042f 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -15,6 +15,7 @@ __all__ = [ 'EXTERNAL_BANGS', 'OSM_KEYS_TAGS', 'ENGINE_DESCRIPTIONS', + 'LOCALES', 'ahmia_blacklist_loader', ] @@ -50,3 +51,4 @@ EXTERNAL_BANGS = _load('external_bangs.json') OSM_KEYS_TAGS = _load('osm_keys_tags.json') ENGINE_DESCRIPTIONS = _load('engine_descriptions.json') ENGINE_TRAITS = _load('engine_traits.json') +LOCALES = _load('locales.json') diff --git a/searx/data/locales.json b/searx/data/locales.json new file mode 100644 index 000000000..cb45b1601 --- /dev/null +++ b/searx/data/locales.json @@ -0,0 +1,69 @@ +{ + "LOCALE_NAMES": { + "af": "Afrikaans", + "ar": "العربية (Arabic)", + "bg": "Български (Bulgarian)", + "bn": "বাংলা (Bangla)", + "bo": "བོད་སྐད་ (Tibetan)", + "ca": "Català (Catalan)", + "cs": "Čeština (Czech)", + "cy": "Cymraeg (Welsh)", + "da": "Dansk (Danish)", + "de": "Deutsch (German)", + "dv": "ދިވެހި (Dhivehi)", + "el-GR": "Ελληνικά, Ελλάδα (Greek, Greece)", + "en": "English", + "eo": "Esperanto", + "es": "Español (Spanish)", + "et": "Eesti (Estonian)", + "eu": "Euskara (Basque)", + "fa-IR": "فارسی, ایران (Persian, Iran)", + "fi": "Suomi (Finnish)", + "fil": "Filipino", + "fr": "Français (French)", + "gl": "Galego (Galician)", + "he": "עברית (Hebrew)", + "hr": "Hrvatski (Croatian)", + "hu": "Magyar (Hungarian)", + "ia": "Interlingua", + "id": "Indonesia (Indonesian)", + "it": "Italiano (Italian)", + "ja": "日本語 (Japanese)", + "ko": "한국어 (Korean)", + "lt": "Lietuvių (Lithuanian)", + "lv": "Latviešu (Latvian)", + "ml": "മലയാളം (Malayalam)", + "ms": "Melayu (Malay)", + "nb-NO": "Norsk bokmål, Norge (Norwegian bokmål, Norway)", + "nl": "Nederlands (Dutch)", + "nl-BE": "Nederlands, België (Dutch, Belgium)", + "oc": "Occitan", + "pa": "ਪੰਜਾਬੀ (Punjabi)", + "pap": "Papiamento", + "pl": "Polski (Polish)", + "pt": "Português (Portuguese)", + "pt-BR": "Português, Brasil (Portuguese, Brazil)", + "ro": "Română (Romanian)", + "ru": "Русский (Russian)", + "si": "සිංහල (Sinhala)", + "sk": "Slovenčina (Slovak)", + "sl": "Slovenščina (Slovenian)", + "sr": "Српски (Serbian)", + "sv": "Svenska (Swedish)", + "szl": "Ślōnski (Silesian)", + "ta": "தமிழ் (Tamil)", + "te": "తెలుగు (Telugu)", + "th": "ไทย (Thai)", + "tr": "Türkçe (Turkish)", + "uk": "Українська (Ukrainian)", + "vi": "Tiếng việt (Vietnamese)", + "zh-HK": "中文, 中國香港特別行政區 (Chinese, Hong Kong SAR China)", + "zh-Hans-CN": "中文, 中国 (Chinese, China)", + "zh-Hant-TW": "中文, 台灣 (Chinese, Taiwan)" + }, + "RTL_LOCALES": [ + "fa-IR", + "ar", + "he" + ] +} \ No newline at end of file diff --git a/searx/locales.py b/searx/locales.py index 655f365ab..c2fa030b1 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -1,12 +1,36 @@ # -*- coding: utf-8 -*- # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. +""" +SearXNG’s locale data +===================== + +The variables :py:obj:`RTL_LOCALES` and :py:obj:`LOCALE_NAMES` are loaded from +:origin:`searx/data/locales.json` / see :py:obj:`locales_initialize` and +:ref:`update_locales.py`. + +.. hint:: + + Whenever the value of :py:obj:`ADDITIONAL_TRANSLATIONS` or + :py:obj:`LOCALE_BEST_MATCH` is modified, the + :origin:`searx/data/locales.json` needs to be rebuild:: + + ./manage data.locales + +SearXNG's locale codes +====================== + +.. automodule:: searx.sxng_locales + :members: + + +SearXNG’s locale implementations +================================ """ -from typing import Set, Optional, List -import os -import pathlib +from __future__ import annotations + +from pathlib import Path import babel from babel.support import Translations @@ -15,7 +39,11 @@ import babel.core import flask_babel import flask from flask.ctx import has_request_context -from searx import logger +from searx import ( + data, + logger, + searx_dir, +) logger = logger.getChild('locales') @@ -30,7 +58,7 @@ LOCALE_NAMES = {} :meta hide-value: """ -RTL_LOCALES: Set[str] = set() +RTL_LOCALES: set[str] = set() """List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see :py:obj:`locales_initialize`).""" @@ -52,7 +80,7 @@ LOCALE_BEST_MATCH = { "pap": "pt-BR", } """Map a locale we do not have a translations for to a locale we have a -translation for. By example: use Taiwan version of the translation for Hong +translation for. By example: use Taiwan version of the translation for Hong Kong.""" @@ -90,74 +118,37 @@ def get_translations(): return _flask_babel_get_translations() -def get_locale_descr(locale, locale_name): - """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR' - - :param locale: instance of :py:class:`Locale` - :param locale_name: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*) - """ - - native_language, native_territory = _get_locale_descr(locale, locale_name) - english_language, english_territory = _get_locale_descr(locale, 'en') - - if native_territory == english_territory: - english_territory = None - - if not native_territory and not english_territory: - if native_language == english_language: - return native_language - return native_language + ' (' + english_language + ')' - - result = native_language + ', ' + native_territory + ' (' + english_language - if english_territory: - return result + ', ' + english_territory + ')' - return result + ')' +_TR_LOCALES: list[str] = [] -def _get_locale_descr(locale, language_code): - language_name = locale.get_language_name(language_code).capitalize() - if language_name and ('a' <= language_name[0] <= 'z'): - language_name = language_name.capitalize() - territory_name = locale.get_territory_name(language_code) - return language_name, territory_name +def get_translation_locales() -> list[str]: + """Returns the list of transaltion locales (*underscore*). The list is + generated from the translation folders in :origin:`searx/translations`""" + + global _TR_LOCALES # pylint:disable=global-statement + if _TR_LOCALES: + return _TR_LOCALES + + tr_locales = [] + for folder in (Path(searx_dir) / 'translations').iterdir(): + if not folder.is_dir(): + continue + if not (folder / 'LC_MESSAGES').is_dir(): + continue + tr_locales.append(folder.name) + _TR_LOCALES = sorted(tr_locales) + return _TR_LOCALES -def locales_initialize(directory=None): +def locales_initialize(): """Initialize locales environment of the SearXNG session. - monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations` - init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES` """ - - directory = directory or pathlib.Path(__file__).parent / 'translations' - logger.debug("locales_initialize: %s", directory) flask_babel.get_translations = get_translations - - for tag, descr in ADDITIONAL_TRANSLATIONS.items(): - locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-') - LOCALE_NAMES[tag] = descr - if locale.text_direction == 'rtl': - RTL_LOCALES.add(tag) - - for tag in LOCALE_BEST_MATCH: - descr = LOCALE_NAMES.get(tag) - if not descr: - locale = babel.Locale.parse(tag, sep='-') - LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_')) - if locale.text_direction == 'rtl': - RTL_LOCALES.add(tag) - - for dirname in sorted(os.listdir(directory)): - # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations - if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')): - continue - tag = dirname.replace('_', '-') - descr = LOCALE_NAMES.get(tag) - if not descr: - locale = babel.Locale.parse(dirname) - LOCALE_NAMES[tag] = get_locale_descr(locale, dirname) - if locale.text_direction == 'rtl': - RTL_LOCALES.add(tag) + LOCALE_NAMES.update(data.LOCALES["LOCALE_NAMES"]) + RTL_LOCALES.update(data.LOCALES["RTL_LOCALES"]) def region_tag(locale: babel.Locale) -> str: @@ -177,7 +168,7 @@ def language_tag(locale: babel.Locale) -> str: return sxng_lang -def get_locale(locale_tag: str) -> Optional[babel.Locale]: +def get_locale(locale_tag: str) -> babel.Locale | None: """Returns a :py:obj:`babel.Locale` object parsed from argument ``locale_tag``""" try: @@ -190,7 +181,7 @@ def get_locale(locale_tag: str) -> Optional[babel.Locale]: def get_official_locales( territory: str, languages=None, regional: bool = False, de_facto: bool = True -) -> Set[babel.Locale]: +) -> set[babel.Locale]: """Returns a list of :py:obj:`babel.Locale` with languages from :py:obj:`babel.languages.get_official_languages`. @@ -376,7 +367,7 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): return default -def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]: +def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str | None = None) -> str | None: """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``. :param str searxng_locale: SearXNG's internal representation of locale (de, @@ -425,7 +416,7 @@ def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Opti return get_engine_locale(searxng_locale, engine_locales, default=fallback) -def build_engine_locales(tag_list: List[str]): +def build_engine_locales(tag_list: list[str]): """From a list of locale tags a dictionary is build that can be passed by argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function is mainly used by :py:obj:`match_locale` and is similar to what the diff --git a/searx/sxng_locales.py b/searx/sxng_locales.py index 1ea673d7c..27f892386 100644 --- a/searx/sxng_locales.py +++ b/searx/sxng_locales.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- '''List of SearXNG's locale codes. -This file is generated automatically by:: +.. hint:: - ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py + Don't modify this file, this file is generated by:: + + ./manage data.traits ''' sxng_locales = ( diff --git a/searxng_extra/update/update_engine_traits.py b/searxng_extra/update/update_engine_traits.py index 46892cc2b..faab198d2 100755 --- a/searxng_extra/update/update_engine_traits.py +++ b/searxng_extra/update/update_engine_traits.py @@ -31,9 +31,11 @@ languages_file_header = """\ # -*- coding: utf-8 -*- '''List of SearXNG's locale codes. -This file is generated automatically by:: +.. hint:: - ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py + Don't modify this file, this file is generated by:: + + ./manage data.traits ''' sxng_locales = ( diff --git a/searxng_extra/update/update_locales.py b/searxng_extra/update/update_locales.py new file mode 100644 index 000000000..e823ebaf1 --- /dev/null +++ b/searxng_extra/update/update_locales.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Update locale names in :origin:`searx/data/locales.json` used by +:ref:`searx.locales` + +- :py:obj:`searx.locales.RTL_LOCALES` +- :py:obj:`searx.locales.LOCALE_NAMES` +""" +from __future__ import annotations + +from typing import Set +import json +from pathlib import Path +import os + +import babel +import babel.languages +import babel.core + +from searx import searx_dir +from searx.locales import ( + ADDITIONAL_TRANSLATIONS, + LOCALE_BEST_MATCH, + get_translation_locales, +) + +LOCALE_DATA_FILE = Path(searx_dir) / 'data' / 'locales.json' +TRANSLATOINS_FOLDER = Path(searx_dir) / 'translations' + + +def main(): + + LOCALE_NAMES = {} + RTL_LOCALES: Set[str] = set() + + for tag, descr in ADDITIONAL_TRANSLATIONS.items(): + locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-') + LOCALE_NAMES[tag] = descr + if locale.text_direction == 'rtl': + RTL_LOCALES.add(tag) + + for tag in LOCALE_BEST_MATCH: + descr = LOCALE_NAMES.get(tag) + if not descr: + locale = babel.Locale.parse(tag, sep='-') + LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_')) + if locale.text_direction == 'rtl': + RTL_LOCALES.add(tag) + + for tr_locale in get_translation_locales(): + sxng_tag = tr_locale.replace('_', '-') + descr = LOCALE_NAMES.get(sxng_tag) + if not descr: + locale = babel.Locale.parse(tr_locale) + LOCALE_NAMES[sxng_tag] = get_locale_descr(locale, tr_locale) + if locale.text_direction == 'rtl': + RTL_LOCALES.add(sxng_tag) + + content = { + "LOCALE_NAMES": LOCALE_NAMES, + "RTL_LOCALES": list(RTL_LOCALES), + } + + with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f: + json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False) + + +def get_locale_descr(locale: babel.Locale, tr_locale): + """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR' + + :param locale: instance of :py:class:`Locale` + :param tr_locale: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*) + """ + + native_language, native_territory = _get_locale_descr(locale, tr_locale) + english_language, english_territory = _get_locale_descr(locale, 'en') + + if native_territory == english_territory: + english_territory = None + + if not native_territory and not english_territory: + # none territory name + if native_language == english_language: + return native_language + return native_language + ' (' + english_language + ')' + + else: + result = native_language + ', ' + native_territory + ' (' + english_language + if english_territory: + return result + ', ' + english_territory + ')' + return result + ')' + + +def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]: + language_name = locale.get_language_name(tr_locale).capitalize() # type: ignore + if language_name and ('a' <= language_name[0] <= 'z'): + language_name = language_name.capitalize() + territory_name: str = locale.get_territory_name(tr_locale) # type: ignore + return language_name, territory_name + + +if __name__ == "__main__": + main() diff --git a/utils/lib_sxng_data.sh b/utils/lib_sxng_data.sh index 549e6dbec..50a932f6d 100755 --- a/utils/lib_sxng_data.sh +++ b/utils/lib_sxng_data.sh @@ -7,6 +7,7 @@ data.: all : update searx/sxng_locales.py and searx/data/* traits : update searx/data/engine_traits.json & searx/sxng_locales.py useragents: update searx/data/useragents.json with the most recent versions of Firefox + locales : update searx/data/locales.json from babel EOF } @@ -16,6 +17,7 @@ data.all() { pyenv.activate data.traits data.useragents + data.locales build_msg DATA "update searx/data/osm_keys_tags.json" pyenv.cmd python searxng_extra/update/update_osm_keys_tags.py @@ -49,6 +51,15 @@ data.useragents() { dump_return $? } +data.locales() { + ( set -e + pyenv.activate + build_msg DATA "update searx/data/locales.json" + python searxng_extra/update/update_locales.py + ) + dump_return $? +} + docs.prebuild() { build_msg DOCS "build ${DOCS_BUILD}/includes" ( diff --git a/utils/lib_sxng_weblate.sh b/utils/lib_sxng_weblate.sh index f52b75d7c..f2b19257c 100755 --- a/utils/lib_sxng_weblate.sh +++ b/utils/lib_sxng_weblate.sh @@ -96,10 +96,15 @@ weblate.translations.commit() { build_msg BABEL 'compile translation catalogs into binary MO files' pybabel compile --statistics \ -d "searx/translations" + + # update searx/data/translation_labels.json + data.locales + # git add/commit (no push) commit_body=$(cd "${TRANSLATIONS_WORKTREE}"; git log --pretty=format:'%h - %as - %aN <%ae>' "${existing_commit_hash}..HEAD") commit_message=$(echo -e "[translations] update from Weblate\n\n${commit_body}") git add searx/translations + git add searx/data/locales.json git commit -m "${commit_message}" ) exitcode=$?