From ffea5d8ef5540bc4be08b2b26e1819d5401f854d Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 3 Jan 2022 12:40:06 +0100 Subject: [PATCH] [docs] add documentation for the scripts in searxng_extra/update Signed-off-by: Markus Heiser --- docs/dev/searxng_extra/index.rst | 9 +- docs/dev/searxng_extra/update.rst | 88 +++++++++++++++++++ .../update/update_ahmia_blacklist.py | 17 ++-- searxng_extra/update/update_currencies.py | 6 ++ .../update/update_engine_descriptions.py | 7 ++ searxng_extra/update/update_external_bangs.py | 13 +-- .../update/update_firefox_version.py | 14 ++- searxng_extra/update/update_languages.py | 10 ++- searxng_extra/update/update_osm_keys_tags.py | 5 +- searxng_extra/update/update_wikidata_units.py | 12 ++- 10 files changed, 157 insertions(+), 24 deletions(-) create mode 100644 docs/dev/searxng_extra/update.rst diff --git a/docs/dev/searxng_extra/index.rst b/docs/dev/searxng_extra/index.rst index f38bb3154..c2b5c312b 100644 --- a/docs/dev/searxng_extra/index.rst +++ b/docs/dev/searxng_extra/index.rst @@ -1,14 +1,15 @@ .. _searxng_extra: -====================================================== -Tooling box ``searxng_extra`` for developers and users -====================================================== +============================= +Tooling box ``searxng_extra`` +============================= -In the folder :origin:`searxng_extra/` we maintain some tools useful for +In the folder :origin:`searxng_extra/` we maintain some tools useful for CI and developers. .. toctree:: :maxdepth: 2 :caption: Contents + update standalone_searx.py diff --git a/docs/dev/searxng_extra/update.rst b/docs/dev/searxng_extra/update.rst new file mode 100644 index 000000000..d05c81409 --- /dev/null +++ b/docs/dev/searxng_extra/update.rst @@ -0,0 +1,88 @@ +========================= +``searxng_extra/update/`` +========================= + +:origin:`[source] ` + +Scripts to update static data in :origin:`searx/data/` + +.. _update_ahmia_blacklist.py: + +``update_ahmia_blacklist.py`` +============================= + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_ahmia_blacklist + :members: + + +``update_currencies.py`` +======================== + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_currencies + :members: + +``update_engine_descriptions.py`` +================================= + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_engine_descriptions + :members: + + +``update_external_bangs.py`` +============================ + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_external_bangs + :members: + + +``update_firefox_version.py`` +============================= + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_firefox_version + :members: + + +``update_languages.py`` +======================= + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_languages + :members: + + +``update_osm_keys_tags.py`` +=========================== + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_osm_keys_tags + :members: + + +``update_pygments.py`` +====================== + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_pygments + :members: + + +``update_wikidata_units.py`` +============================ + +:origin:`[source] ` + +.. automodule:: searxng_extra.update.update_wikidata_units + :members: diff --git a/searxng_extra/update/update_ahmia_blacklist.py b/searxng_extra/update/update_ahmia_blacklist.py index f7695deae..57fb78b34 100755 --- a/searxng_extra/update/update_ahmia_blacklist.py +++ b/searxng_extra/update/update_ahmia_blacklist.py @@ -1,10 +1,14 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""This script saves `Ahmia's blacklist`_ for onion sites. -# This script saves Ahmia's blacklist for onion sites. -# More info in https://ahmia.fi/blacklist/ +Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data +... <.github/workflows/data-update.yml>`). + +.. _Ahmia's blacklist: https://ahmia.fi/blacklist/ + +""" -# set path from os.path import join import requests @@ -26,6 +30,7 @@ def get_ahmia_blacklist_filename(): return join(join(searx_dir, "data"), "ahmia_blacklist.txt") -blacklist = fetch_ahmia_blacklist() -with open(get_ahmia_blacklist_filename(), "w") as f: - f.write('\n'.join(blacklist)) +if __name__ == '__main__': + blacklist = fetch_ahmia_blacklist() + with open(get_ahmia_blacklist_filename(), "w") as f: + f.write('\n'.join(blacklist)) diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py index 3373e2455..cdff4cbc9 100755 --- a/searxng_extra/update/update_currencies.py +++ b/searxng_extra/update/update_currencies.py @@ -1,6 +1,12 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine. + +Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). + +""" import re import unicodedata import json diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py index 51cfc7cc2..bab1a0349 100755 --- a/searxng_extra/update/update_engine_descriptions.py +++ b/searxng_extra/update/update_engine_descriptions.py @@ -1,6 +1,13 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""Fetch website description from websites and from +:origin:`searx/engines/wikidata.py` engine. + +Output file: :origin:`searx/data/engine_descriptions.json`. + +""" + import json from urllib.parse import urlparse from os.path import join diff --git a/searxng_extra/update/update_external_bangs.py b/searxng_extra/update/update_external_bangs.py index d5c6b585a..be3aade0f 100755 --- a/searxng_extra/update/update_external_bangs.py +++ b/searxng_extra/update/update_external_bangs.py @@ -1,17 +1,20 @@ #!/usr/bin/env python # lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later -""" -Update searx/data/external_bangs.json using the duckduckgo bangs. +"""Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs +(:origin:`CI Update data ... <.github/workflows/data-update.yml>`). + +https://duckduckgo.com/newbang loads: -https://duckduckgo.com/newbang loads * a javascript which provides the bang version ( https://duckduckgo.com/bv1.js ) * a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example ) This script loads the javascript, then the bangs. -The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ), -but most probably it will requires to update RE_BANG_VERSION +The javascript URL may change in the future ( for example +https://duckduckgo.com/bv2.js ), but most probably it will requires to update +RE_BANG_VERSION + """ # pylint: disable=C0116 diff --git a/searxng_extra/update/update_firefox_version.py b/searxng_extra/update/update_firefox_version.py index 750e955fd..163982b16 100755 --- a/searxng_extra/update/update_firefox_version.py +++ b/searxng_extra/update/update_firefox_version.py @@ -1,6 +1,13 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""Fetch firefox useragent signatures + +Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). + +""" + import json import requests import re @@ -66,6 +73,7 @@ def get_useragents_filename(): return join(join(searx_dir, "data"), "useragents.json") -useragents["versions"] = fetch_firefox_last_versions() -with open(get_useragents_filename(), "w") as f: - json.dump(useragents, f, indent=4, ensure_ascii=False) +if __name__ == '__main__': + useragents["versions"] = fetch_firefox_last_versions() + with open(get_useragents_filename(), "w", encoding='utf-8') as f: + json.dump(useragents, f, indent=4, ensure_ascii=False) diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index f37345808..9a71566a9 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -1,9 +1,13 @@ #!/usr/bin/env python # SPDX-License-Identifier: AGPL-3.0-or-later +"""This script generates languages.py from intersecting each engine's supported +languages. -# This script generates languages.py from intersecting each engine's supported languages. -# -# Output files: searx/data/engines_languages.json and searx/languages.py +Output files: :origin:`searx/data/engines_languages.json` and +:origin:`searx/languages.py` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). + +""" import json from pathlib import Path diff --git a/searxng_extra/update/update_osm_keys_tags.py b/searxng_extra/update/update_osm_keys_tags.py index 2916cbff1..1d691c194 100755 --- a/searxng_extra/update/update_osm_keys_tags.py +++ b/searxng_extra/update/update_osm_keys_tags.py @@ -5,7 +5,10 @@ To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for example `OSM tags API`_ (sidenote: the actual change log from -map.atownsend.org.uk_ might be useful to normalize OSM tags) +map.atownsend.org.uk_ might be useful to normalize OSM tags). + +Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ... +<.github/workflows/data-update.yml>`). .. _Wikidata Query Service: https://query.wikidata.org/ .. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc diff --git a/searxng_extra/update/update_wikidata_units.py b/searxng_extra/update/update_wikidata_units.py index 43a872b1b..e999b6cfd 100755 --- a/searxng_extra/update/update_wikidata_units.py +++ b/searxng_extra/update/update_wikidata_units.py @@ -3,6 +3,13 @@ # lint: pylint # pylint: disable=missing-module-docstring +"""Fetch units from :origin:`searx/engines/wikidata.py` engine. + +Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data +... <.github/workflows/data-update.yml>`). + +""" + import json import collections @@ -54,5 +61,6 @@ def get_wikidata_units_filename(): return join(join(searx_dir, "data"), "wikidata_units.json") -with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: - json.dump(get_data(), f, indent=4, ensure_ascii=False) +if __name__ == '__main__': + with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: + json.dump(get_data(), f, indent=4, ensure_ascii=False)