[mod] reduce memory footprint by not calling babel.Locale.parse at runtime

babel.Locale.parse loads more than 60MB in RAM.  The only purpose is to get:

    LOCALE_NAMES   - searx.data.LOCALES["LOCALE_NAMES"]
    RTL_LOCALES    - searx.data.LOCALES["RTL_LOCALES"]

This commit calls babel.Locale.parse when the translations are update from
weblate and stored in::

    searx/data/locales.json

This file can be build by::

    ./manage data.locales

By store these variables in searx.data when the translations are updated we save
round about 65MB (usually 4 worker = 260MB of RAM saved.

Suggested-by: https://github.com/searxng/searxng/discussions/2633#discussioncomment-8490494
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Alexandre Flament 2024-02-16 20:46:18 +00:00 committed by Markus Heiser
parent 76845ea42c
commit ed66ed758d
10 changed files with 269 additions and 79 deletions

View file

@ -78,6 +78,16 @@ Scripts to update static data in :origin:`searx/data/`
.. automodule:: searxng_extra.update.update_pygments
:members:
.. _update_locales.py:
``update_locales.py``
=====================
:origin:`[source] <searxng_extra/update/update_locales.py>`
.. automodule:: searxng_extra.update.update_locales
:members:
``update_wikidata_units.py``
============================

View file

@ -10,11 +10,6 @@ Locales
:backlinks: entry
.. automodule:: searx.locales
:members:
:members:
SearXNG's locale codes
======================
.. automodule:: searx.sxng_locales
:members:

View file

@ -15,6 +15,7 @@ __all__ = [
'EXTERNAL_BANGS',
'OSM_KEYS_TAGS',
'ENGINE_DESCRIPTIONS',
'LOCALES',
'ahmia_blacklist_loader',
]
@ -50,3 +51,4 @@ EXTERNAL_BANGS = _load('external_bangs.json')
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
ENGINE_TRAITS = _load('engine_traits.json')
LOCALES = _load('locales.json')

69
searx/data/locales.json Normal file
View file

@ -0,0 +1,69 @@
{
"LOCALE_NAMES": {
"af": "Afrikaans",
"ar": "العربية (Arabic)",
"bg": "Български (Bulgarian)",
"bn": "বাংলা (Bangla)",
"bo": "བོད་སྐད་ (Tibetan)",
"ca": "Català (Catalan)",
"cs": "Čeština (Czech)",
"cy": "Cymraeg (Welsh)",
"da": "Dansk (Danish)",
"de": "Deutsch (German)",
"dv": "ދިވެހި (Dhivehi)",
"el-GR": "Ελληνικά, Ελλάδα (Greek, Greece)",
"en": "English",
"eo": "Esperanto",
"es": "Español (Spanish)",
"et": "Eesti (Estonian)",
"eu": "Euskara (Basque)",
"fa-IR": "فارسی, ایران (Persian, Iran)",
"fi": "Suomi (Finnish)",
"fil": "Filipino",
"fr": "Français (French)",
"gl": "Galego (Galician)",
"he": "עברית (Hebrew)",
"hr": "Hrvatski (Croatian)",
"hu": "Magyar (Hungarian)",
"ia": "Interlingua",
"id": "Indonesia (Indonesian)",
"it": "Italiano (Italian)",
"ja": "日本語 (Japanese)",
"ko": "한국어 (Korean)",
"lt": "Lietuvių (Lithuanian)",
"lv": "Latviešu (Latvian)",
"ml": "മലയാളം (Malayalam)",
"ms": "Melayu (Malay)",
"nb-NO": "Norsk bokmål, Norge (Norwegian bokmål, Norway)",
"nl": "Nederlands (Dutch)",
"nl-BE": "Nederlands, België (Dutch, Belgium)",
"oc": "Occitan",
"pa": "ਪੰਜਾਬੀ (Punjabi)",
"pap": "Papiamento",
"pl": "Polski (Polish)",
"pt": "Português (Portuguese)",
"pt-BR": "Português, Brasil (Portuguese, Brazil)",
"ro": "Română (Romanian)",
"ru": "Русский (Russian)",
"si": "සිංහල (Sinhala)",
"sk": "Slovenčina (Slovak)",
"sl": "Slovenščina (Slovenian)",
"sr": "Српски (Serbian)",
"sv": "Svenska (Swedish)",
"szl": "Ślōnski (Silesian)",
"ta": "தமிழ் (Tamil)",
"te": "తెలుగు (Telugu)",
"th": "ไทย (Thai)",
"tr": "Türkçe (Turkish)",
"uk": "Українська (Ukrainian)",
"vi": "Tiếng việt (Vietnamese)",
"zh-HK": "中文, 中國香港特別行政區 (Chinese, Hong Kong SAR China)",
"zh-Hans-CN": "中文, 中国 (Chinese, China)",
"zh-Hant-TW": "中文, 台灣 (Chinese, Taiwan)"
},
"RTL_LOCALES": [
"fa-IR",
"ar",
"he"
]
}

View file

@ -1,12 +1,36 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
"""
SearXNGs locale data
=====================
The variables :py:obj:`RTL_LOCALES` and :py:obj:`LOCALE_NAMES` are loaded from
:origin:`searx/data/locales.json` / see :py:obj:`locales_initialize` and
:ref:`update_locales.py`.
.. hint::
Whenever the value of :py:obj:`ADDITIONAL_TRANSLATIONS` or
:py:obj:`LOCALE_BEST_MATCH` is modified, the
:origin:`searx/data/locales.json` needs to be rebuild::
./manage data.locales
SearXNG's locale codes
======================
.. automodule:: searx.sxng_locales
:members:
SearXNGs locale implementations
================================
"""
from typing import Set, Optional, List
import os
import pathlib
from __future__ import annotations
from pathlib import Path
import babel
from babel.support import Translations
@ -15,7 +39,11 @@ import babel.core
import flask_babel
import flask
from flask.ctx import has_request_context
from searx import logger
from searx import (
data,
logger,
searx_dir,
)
logger = logger.getChild('locales')
@ -30,7 +58,7 @@ LOCALE_NAMES = {}
:meta hide-value:
"""
RTL_LOCALES: Set[str] = set()
RTL_LOCALES: set[str] = set()
"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
:py:obj:`locales_initialize`)."""
@ -52,7 +80,7 @@ LOCALE_BEST_MATCH = {
"pap": "pt-BR",
}
"""Map a locale we do not have a translations for to a locale we have a
translation for. By example: use Taiwan version of the translation for Hong
translation for. By example: use Taiwan version of the translation for Hong
Kong."""
@ -90,74 +118,37 @@ def get_translations():
return _flask_babel_get_translations()
def get_locale_descr(locale, locale_name):
"""Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
:param locale: instance of :py:class:`Locale`
:param locale_name: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
"""
native_language, native_territory = _get_locale_descr(locale, locale_name)
english_language, english_territory = _get_locale_descr(locale, 'en')
if native_territory == english_territory:
english_territory = None
if not native_territory and not english_territory:
if native_language == english_language:
return native_language
return native_language + ' (' + english_language + ')'
result = native_language + ', ' + native_territory + ' (' + english_language
if english_territory:
return result + ', ' + english_territory + ')'
return result + ')'
_TR_LOCALES: list[str] = []
def _get_locale_descr(locale, language_code):
language_name = locale.get_language_name(language_code).capitalize()
if language_name and ('a' <= language_name[0] <= 'z'):
language_name = language_name.capitalize()
territory_name = locale.get_territory_name(language_code)
return language_name, territory_name
def get_translation_locales() -> list[str]:
"""Returns the list of transaltion locales (*underscore*). The list is
generated from the translation folders in :origin:`searx/translations`"""
global _TR_LOCALES # pylint:disable=global-statement
if _TR_LOCALES:
return _TR_LOCALES
tr_locales = []
for folder in (Path(searx_dir) / 'translations').iterdir():
if not folder.is_dir():
continue
if not (folder / 'LC_MESSAGES').is_dir():
continue
tr_locales.append(folder.name)
_TR_LOCALES = sorted(tr_locales)
return _TR_LOCALES
def locales_initialize(directory=None):
def locales_initialize():
"""Initialize locales environment of the SearXNG session.
- monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations`
- init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`
"""
directory = directory or pathlib.Path(__file__).parent / 'translations'
logger.debug("locales_initialize: %s", directory)
flask_babel.get_translations = get_translations
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
LOCALE_NAMES[tag] = descr
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
for tag in LOCALE_BEST_MATCH:
descr = LOCALE_NAMES.get(tag)
if not descr:
locale = babel.Locale.parse(tag, sep='-')
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
for dirname in sorted(os.listdir(directory)):
# Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations
if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')):
continue
tag = dirname.replace('_', '-')
descr = LOCALE_NAMES.get(tag)
if not descr:
locale = babel.Locale.parse(dirname)
LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
LOCALE_NAMES.update(data.LOCALES["LOCALE_NAMES"])
RTL_LOCALES.update(data.LOCALES["RTL_LOCALES"])
def region_tag(locale: babel.Locale) -> str:
@ -177,7 +168,7 @@ def language_tag(locale: babel.Locale) -> str:
return sxng_lang
def get_locale(locale_tag: str) -> Optional[babel.Locale]:
def get_locale(locale_tag: str) -> babel.Locale | None:
"""Returns a :py:obj:`babel.Locale` object parsed from argument
``locale_tag``"""
try:
@ -190,7 +181,7 @@ def get_locale(locale_tag: str) -> Optional[babel.Locale]:
def get_official_locales(
territory: str, languages=None, regional: bool = False, de_facto: bool = True
) -> Set[babel.Locale]:
) -> set[babel.Locale]:
"""Returns a list of :py:obj:`babel.Locale` with languages from
:py:obj:`babel.languages.get_official_languages`.
@ -376,7 +367,7 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
return default
def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str | None = None) -> str | None:
"""Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
:param str searxng_locale: SearXNG's internal representation of locale (de,
@ -425,7 +416,7 @@ def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Opti
return get_engine_locale(searxng_locale, engine_locales, default=fallback)
def build_engine_locales(tag_list: List[str]):
def build_engine_locales(tag_list: list[str]):
"""From a list of locale tags a dictionary is build that can be passed by
argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
is mainly used by :py:obj:`match_locale` and is similar to what the

View file

@ -1,9 +1,11 @@
# -*- coding: utf-8 -*-
'''List of SearXNG's locale codes.
This file is generated automatically by::
.. hint::
./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
Don't modify this file, this file is generated by::
./manage data.traits
'''
sxng_locales = (

View file

@ -31,9 +31,11 @@ languages_file_header = """\
# -*- coding: utf-8 -*-
'''List of SearXNG's locale codes.
This file is generated automatically by::
.. hint::
./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
Don't modify this file, this file is generated by::
./manage data.traits
'''
sxng_locales = (

View file

@ -0,0 +1,103 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Update locale names in :origin:`searx/data/locales.json` used by
:ref:`searx.locales`
- :py:obj:`searx.locales.RTL_LOCALES`
- :py:obj:`searx.locales.LOCALE_NAMES`
"""
from __future__ import annotations
from typing import Set
import json
from pathlib import Path
import os
import babel
import babel.languages
import babel.core
from searx import searx_dir
from searx.locales import (
ADDITIONAL_TRANSLATIONS,
LOCALE_BEST_MATCH,
get_translation_locales,
)
LOCALE_DATA_FILE = Path(searx_dir) / 'data' / 'locales.json'
TRANSLATOINS_FOLDER = Path(searx_dir) / 'translations'
def main():
LOCALE_NAMES = {}
RTL_LOCALES: Set[str] = set()
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
LOCALE_NAMES[tag] = descr
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
for tag in LOCALE_BEST_MATCH:
descr = LOCALE_NAMES.get(tag)
if not descr:
locale = babel.Locale.parse(tag, sep='-')
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
if locale.text_direction == 'rtl':
RTL_LOCALES.add(tag)
for tr_locale in get_translation_locales():
sxng_tag = tr_locale.replace('_', '-')
descr = LOCALE_NAMES.get(sxng_tag)
if not descr:
locale = babel.Locale.parse(tr_locale)
LOCALE_NAMES[sxng_tag] = get_locale_descr(locale, tr_locale)
if locale.text_direction == 'rtl':
RTL_LOCALES.add(sxng_tag)
content = {
"LOCALE_NAMES": LOCALE_NAMES,
"RTL_LOCALES": list(RTL_LOCALES),
}
with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f:
json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
def get_locale_descr(locale: babel.Locale, tr_locale):
"""Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
:param locale: instance of :py:class:`Locale`
:param tr_locale: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
"""
native_language, native_territory = _get_locale_descr(locale, tr_locale)
english_language, english_territory = _get_locale_descr(locale, 'en')
if native_territory == english_territory:
english_territory = None
if not native_territory and not english_territory:
# none territory name
if native_language == english_language:
return native_language
return native_language + ' (' + english_language + ')'
else:
result = native_language + ', ' + native_territory + ' (' + english_language
if english_territory:
return result + ', ' + english_territory + ')'
return result + ')'
def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:
language_name = locale.get_language_name(tr_locale).capitalize() # type: ignore
if language_name and ('a' <= language_name[0] <= 'z'):
language_name = language_name.capitalize()
territory_name: str = locale.get_territory_name(tr_locale) # type: ignore
return language_name, territory_name
if __name__ == "__main__":
main()

View file

@ -7,6 +7,7 @@ data.:
all : update searx/sxng_locales.py and searx/data/*
traits : update searx/data/engine_traits.json & searx/sxng_locales.py
useragents: update searx/data/useragents.json with the most recent versions of Firefox
locales : update searx/data/locales.json from babel
EOF
}
@ -16,6 +17,7 @@ data.all() {
pyenv.activate
data.traits
data.useragents
data.locales
build_msg DATA "update searx/data/osm_keys_tags.json"
pyenv.cmd python searxng_extra/update/update_osm_keys_tags.py
@ -49,6 +51,15 @@ data.useragents() {
dump_return $?
}
data.locales() {
( set -e
pyenv.activate
build_msg DATA "update searx/data/locales.json"
python searxng_extra/update/update_locales.py
)
dump_return $?
}
docs.prebuild() {
build_msg DOCS "build ${DOCS_BUILD}/includes"
(

View file

@ -96,10 +96,15 @@ weblate.translations.commit() {
build_msg BABEL 'compile translation catalogs into binary MO files'
pybabel compile --statistics \
-d "searx/translations"
# update searx/data/translation_labels.json
data.locales
# git add/commit (no push)
commit_body=$(cd "${TRANSLATIONS_WORKTREE}"; git log --pretty=format:'%h - %as - %aN <%ae>' "${existing_commit_hash}..HEAD")
commit_message=$(echo -e "[translations] update from Weblate\n\n${commit_body}")
git add searx/translations
git add searx/data/locales.json
git commit -m "${commit_message}"
)
exitcode=$?