From d58760ef751a2df47fa7f118c04d0502d1b37d16 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Wed, 28 Feb 2024 19:22:00 +0100 Subject: [PATCH] [mod] pypi engine: use packages.html --- searx/engines/pypi.py | 68 +++++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 18 +----------- 2 files changed, 69 insertions(+), 17 deletions(-) create mode 100644 searx/engines/pypi.py diff --git a/searx/engines/pypi.py b/searx/engines/pypi.py new file mode 100644 index 000000000..e49de11e5 --- /dev/null +++ b/searx/engines/pypi.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""pypi.org + +""" + +from urllib.parse import urlencode +from dateutil import parser + +from lxml import html +from searx.utils import ( + eval_xpath_getindex, + eval_xpath_list, + extract_text, +) + +# about +about = { + "website": "https://pypi.org", + "wikidata_id": "Q2984686", + "official_api_documentation": "https://warehouse.readthedocs.io/api-reference/index.html", + "use_official_api": False, + "require_api_key": False, + "results": "HTML", +} + +categories = ['it', 'packages'] + + +# engine dependent config +first_page_num = 1 +base_url = "https://pypi.org" +search_url = base_url + '/search/?{query}' + + +def request(query, params): + args = { + "q": query, + "page": params['pageno'], + } + params['url'] = search_url.format(query=urlencode(args)) + return params + + +def response(resp): + results = [] + dom = html.fromstring(resp.text) + for entry in eval_xpath_list(dom, '/html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"]'): + url = base_url + extract_text(eval_xpath_getindex(entry, './@href', 0)) # type: ignore + title = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__name"]', 0)) + version = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__version"]', 0)) + created_at = extract_text( + eval_xpath_getindex(entry, './h3/span[@class="package-snippet__created"]/time/@datetime', 0) + ) + content = extract_text(eval_xpath_getindex(entry, './p', 0)) + results.append( + { + "template": "packages.html", + "url": url, + "title": title, + 'package_name': title, + "content": content, + "version": version, + 'publishedDate': parser.parse(created_at), # type: ignore + } + ) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 7d1eeb190..6c1f7bdd0 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1457,23 +1457,7 @@ engines: - name: pypi shortcut: pypi - engine: xpath - paging: true - search_url: https://pypi.org/search/?q={query}&page={pageno} - results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"] - url_xpath: ./@href - title_xpath: ./h3/span[@class="package-snippet__name"] - content_xpath: ./p - suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"] - first_page_num: 1 - categories: [it, packages] - about: - website: https://pypi.org - wikidata_id: Q2984686 - official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html - use_official_api: false - require_api_key: false - results: HTML + engine: pypi - name: qwant qwant_categ: web