[pylint] add scripts from searxng_extra/update to pylint

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-01-03 12:58:48 +01:00
parent ffea5d8ef5
commit 295876abaa
5 changed files with 49 additions and 38 deletions

View file

@ -1,4 +1,5 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""This script saves `Ahmia's blacklist`_ for onion sites.
@ -21,9 +22,7 @@ def fetch_ahmia_blacklist():
resp = requests.get(URL, timeout=3.0)
if resp.status_code != 200:
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
else:
blacklist = resp.text.split()
return blacklist
return resp.text.split()
def get_ahmia_blacklist_filename():
@ -32,5 +31,5 @@ def get_ahmia_blacklist_filename():
if __name__ == '__main__':
blacklist = fetch_ahmia_blacklist()
with open(get_ahmia_blacklist_filename(), "w") as f:
with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f:
f.write('\n'.join(blacklist))

View file

@ -1,4 +1,5 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
@ -7,13 +8,15 @@ Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
<.github/workflows/data-update.yml>`).
"""
# pylint: disable=invalid-name
import re
import unicodedata
import json
# set path
from sys import path
from os.path import realpath, dirname, join
from os.path import join
from searx import searx_dir
from searx.locales import LOCALE_NAMES

View file

@ -1,4 +1,5 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch website description from websites and from
@ -8,6 +9,8 @@ Output file: :origin:`searx/data/engine_descriptions.json`.
"""
# pylint: disable=invalid-name, global-statement
import json
from urllib.parse import urlparse
from os.path import join
@ -109,7 +112,7 @@ def get_wikipedia_summary(lang, pageid):
response.raise_for_status()
api_result = json.loads(response.text)
return api_result.get('extract')
except:
except Exception: # pylint: disable=broad-except
return None
@ -141,7 +144,7 @@ def get_website_description(url, lang1, lang2=None):
try:
response = searx.network.get(url, headers=headers, timeout=10)
response.raise_for_status()
except Exception:
except Exception: # pylint: disable=broad-except
return (None, None)
try:

View file

@ -1,4 +1,5 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch firefox useragent signatures
@ -9,20 +10,21 @@ Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
"""
import json
import requests
import re
from os.path import dirname, join
from os.path import join
from urllib.parse import urlparse, urljoin
from distutils.version import LooseVersion, StrictVersion
from distutils.version import LooseVersion
import requests
from lxml import html
from searx import searx_dir
URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
RELEASE_PATH = '/pub/firefox/releases/'
NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$')
# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$')
# BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$')
# ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$')
#
useragents = {
@ -39,20 +41,19 @@ def fetch_firefox_versions():
resp = requests.get(URL, timeout=2.0)
if resp.status_code != 200:
raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
else:
dom = html.fromstring(resp.text)
versions = []
dom = html.fromstring(resp.text)
versions = []
for link in dom.xpath('//a/@href'):
url = urlparse(urljoin(URL, link))
path = url.path
if path.startswith(RELEASE_PATH):
version = path[len(RELEASE_PATH) : -1]
if NORMAL_REGEX.match(version):
versions.append(LooseVersion(version))
for link in dom.xpath('//a/@href'):
url = urlparse(urljoin(URL, link))
path = url.path
if path.startswith(RELEASE_PATH):
version = path[len(RELEASE_PATH) : -1]
if NORMAL_REGEX.match(version):
versions.append(LooseVersion(version))
list.sort(versions, reverse=True)
return versions
list.sort(versions, reverse=True)
return versions
def fetch_firefox_last_versions():

View file

@ -1,4 +1,6 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""This script generates languages.py from intersecting each engine's supported
languages.
@ -9,6 +11,8 @@ Output files: :origin:`searx/data/engines_languages.json` and
"""
# pylint: disable=invalid-name
import json
from pathlib import Path
from pprint import pformat
@ -28,7 +32,7 @@ languages_file = Path(searx_dir) / 'languages.py'
def fetch_supported_languages():
set_timeout_for_thread(10.0)
engines_languages = dict()
engines_languages = {}
names = list(engines)
names.sort()
@ -36,7 +40,7 @@ def fetch_supported_languages():
if hasattr(engines[engine_name], 'fetch_supported_languages'):
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
if type(engines_languages[engine_name]) == list:
if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck
engines_languages[engine_name] = sorted(engines_languages[engine_name])
print("fetched languages from %s engines" % len(engines_languages))
@ -59,7 +63,7 @@ def get_locale(lang_code):
# Join all language lists.
def join_language_lists(engines_languages):
language_list = dict()
language_list = {}
for engine_name in engines_languages:
for lang_code in engines_languages[engine_name]:
@ -95,7 +99,7 @@ def join_language_lists(engines_languages):
'name': language_name,
'english_name': english_name,
'counter': set(),
'countries': dict(),
'countries': {},
}
# add language with country if not in list
@ -123,6 +127,7 @@ def join_language_lists(engines_languages):
def filter_language_list(all_languages):
min_engines_per_lang = 13
min_engines_per_country = 7
# pylint: disable=consider-using-dict-items, consider-iterating-dictionary
main_engines = [
engine_name
for engine_name in engines.keys()
@ -142,7 +147,7 @@ def filter_language_list(all_languages):
}
def _copy_lang_data(lang, country_name=None):
new_dict = dict()
new_dict = {}
new_dict['name'] = all_languages[lang]['name']
new_dict['english_name'] = all_languages[lang]['english_name']
if country_name:
@ -150,10 +155,10 @@ def filter_language_list(all_languages):
return new_dict
# for each language get country codes supported by most engines or at least one country code
filtered_languages_with_countries = dict()
filtered_languages_with_countries = {}
for lang, lang_data in filtered_languages.items():
countries = lang_data['countries']
filtered_countries = dict()
filtered_countries = {}
# get language's country codes with enough supported engines
for lang_country, country_data in countries.items():
@ -215,7 +220,7 @@ def write_languages_file(languages):
language_codes = tuple(language_codes)
with open(languages_file, 'w') as new_file:
with open(languages_file, 'w', encoding='utf-8') as new_file:
file_content = "{file_headers} {language_codes},\n)\n".format(
# fmt: off
file_headers = '\n'.join(file_headers),
@ -228,7 +233,7 @@ def write_languages_file(languages):
if __name__ == "__main__":
load_engines(settings['engines'])
engines_languages = fetch_supported_languages()
all_languages = join_language_lists(engines_languages)
filtered_languages = filter_language_list(all_languages)
write_languages_file(filtered_languages)
_engines_languages = fetch_supported_languages()
_all_languages = join_language_lists(_engines_languages)
_filtered_languages = filter_language_list(_all_languages)
write_languages_file(_filtered_languages)