[mod] engine: Anna's Archive - additionl settings (content, sort, ext)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2023-06-27 18:51:27 +02:00
parent eafc2906f1
commit e2df6b77a3
3 changed files with 98 additions and 9 deletions

View file

@ -0,0 +1,2 @@
.. automodule:: searx.engines.annas_archive
:members:

View file

@ -1,14 +1,59 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint # lint: pylint
"""Anna's Archive """.. _annas_archive engine:
==============
Anna's Archive
==============
.. _Anna's Archive: https://annas-archive.org/
.. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive
`Anna's Archive`_ is a free non-profit online shadow library metasearch engine
providing access to a variety of book resources (also via IPFS), created by a
team of anonymous archivists (AnnaArchivist_).
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
Configuration
=============
The engine has the following additional settings:
- :py:obj:`aa_content`
- :py:obj:`aa_ext`
- :py:obj:`aa_sort`
With this options a SearXNG maintainer is able to configure **additional**
engines for specific searches in Anna's Archive. For example a engine to search
for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``.
.. code:: yaml
- name: annas articles
engine: annas_archive
shortcut: aaa
aa_content: 'journal_article'
aa_ext: 'pdf'
aa_sort: 'newest'
Implementations
===============
""" """
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from urllib.parse import quote from urllib.parse import quote
from lxml import html from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list from searx.utils import extract_text, eval_xpath, eval_xpath_list
from searx.enginelib.traits import EngineTraits from searx.enginelib.traits import EngineTraits
from searx.data import ENGINE_TRAITS
# about # about
about: Dict[str, Any] = { about: Dict[str, Any] = {
@ -26,6 +71,31 @@ paging: bool = False
# search-url # search-url
base_url: str = "https://annas-archive.org" base_url: str = "https://annas-archive.org"
aa_content: str = ""
"""Anan's search form field **Content** / possible values::
journal_article, book_any, book_fiction, book_unknown, book_nonfiction,
book_comic, magazine, standards_document
To not filter use an empty string (default).
"""
aa_sort: str = ''
"""Sort Anna's results, possible values::
newest, oldest, largest, smallest
To sort by *most relevant* use an empty string (default)."""
aa_ext: str = ''
"""Filter Anna's results by a file ending. Common filters for example are
``pdf`` and ``epub``.
.. note::
Anna's Archive is a beta release: Filter results by file extension does not
really work on Anna's Archive.
"""
# xpath queries # xpath queries
xpath_results: str = '//main//a[starts-with(@href,"/md5")]' xpath_results: str = '//main//a[starts-with(@href,"/md5")]'
@ -36,13 +106,24 @@ xpath_publisher: str = './/div[contains(@class, "text-sm")]'
xpath_file_info: str = './/div[contains(@class, "text-xs")]' xpath_file_info: str = './/div[contains(@class, "text-xs")]'
def request(query, params: Dict[str, Any]) -> Dict[str, Any]: def init(engine_settings=None): # pylint: disable=unused-argument
search_url: str = base_url + "/search?q={search_query}&lang={lang}" """Check of engine's settings."""
lang: str = "" traits = EngineTraits(**ENGINE_TRAITS['annas archive'])
if params["language"] != "all":
lang = params["language"]
params["url"] = search_url.format(search_query=quote(query), lang=lang) if aa_content and aa_content not in traits.custom['content']:
raise ValueError(f'invalid setting content: {aa_content}')
if aa_sort and aa_sort not in traits.custom['sort']:
raise ValueError(f'invalid setting sort: {aa_sort}')
if aa_ext and aa_ext not in traits.custom['ext']:
raise ValueError(f'invalid setting ext: {aa_ext}')
def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
q = quote(query)
lang = traits.get_language(params["language"], traits.all_locale) # type: ignore
params["url"] = base_url + f"/search?lang={lang or ''}&content={aa_content}&ext={aa_ext}&sort={aa_sort}&q={q}"
return params return params

View file

@ -299,11 +299,17 @@ engines:
- name: annas archive - name: annas archive
engine: annas_archive engine: annas_archive
paging: false
categories: files
disabled: true disabled: true
shortcut: aa shortcut: aa
# - name: annas articles
# engine: annas_archive
# shortcut: aaa
# # https://docs.searxng.org/src/searx.engines.annas_archive.html
# aa_content: 'journal_article' # book_any .. magazine, standards_document
# aa_ext: 'pdf' # pdf, epub, ..
# aa_sort: 'newest' # newest, oldest, largest, smallest
- name: apk mirror - name: apk mirror
engine: apkmirror engine: apkmirror
timeout: 4.0 timeout: 4.0