From 401561cb580454ef73d08072dbad0da1a5e897aa Mon Sep 17 00:00:00 2001 From: Paolo Basso <12545838+paolobasso99@users.noreply.github.com> Date: Sat, 24 Jun 2023 18:58:27 +0200 Subject: [PATCH] [mod] engine torznab - refactor & option to hide links - torznab engine using types and clearer code - torznab option to hide torrent and magnet links. - document the torznab engine - add myself to authors Closes: https://github.com/searxng/searxng/issues/1124 Signed-off-by: Markus Heiser --- AUTHORS.rst | 1 + docs/src/searx.engines.torznab.rst | 2 + searx/engines/torznab.py | 266 ++++++++++++++++++++--------- searx/settings.yml | 15 +- 4 files changed, 201 insertions(+), 83 deletions(-) create mode 100644 docs/src/searx.engines.torznab.rst diff --git a/AUTHORS.rst b/AUTHORS.rst index a285c5450..58fed35c9 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -168,3 +168,4 @@ features or generally made searx better: - Milad Laly @Milad-Laly - @llmII - @blob42 ``_ +- Paolo Basso `` \ No newline at end of file diff --git a/docs/src/searx.engines.torznab.rst b/docs/src/searx.engines.torznab.rst new file mode 100644 index 000000000..0b96e18ec --- /dev/null +++ b/docs/src/searx.engines.torznab.rst @@ -0,0 +1,2 @@ +.. automodule:: searx.engines.torznab + :members: diff --git a/searx/engines/torznab.py b/searx/engines/torznab.py index a48017c13..dc24919b5 100644 --- a/searx/engines/torznab.py +++ b/searx/engines/torznab.py @@ -1,21 +1,83 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Torznab WebAPI +""".. _torznab engine: -A engine that implements the `torznab WebAPI`_. +============== +Torznab WebAPI +============== -.. _torznab WebAPI: https://torznab.github.io/spec-1.3-draft/torznab +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + +Torznab_ is an API specification that provides a standardized way to query +torrent site for content. It is used by a number of torrent applications, +including Prowlarr_ and Jackett_. + +Using this engine together with Prowlarr_ or Jackett_ allows you to search +a huge number of torrent sites which are not directly supported. + +Configuration +============= + +The engine has the following settings: + +``base_url``: + Torznab endpoint URL. + +``api_key``: + The API key to use for authentication. + +``torznab_categories``: + The categories to use for searching. This is a list of category IDs. See + Prowlarr-categories_ or Jackett-categories_ for more information. + +``show_torrent_files``: + Whether to show the torrent file in the search results. Be carful as using + this with Prowlarr_ or Jackett_ leaks the API key. This should be used only + if you are querying a Torznab endpoint without authentication or if the + instance is private. Be aware that private trackers may ban you if you share + the torrent file. Defaults to ``false``. + +``show_magnet_links``: + Whether to show the magnet link in the search results. Be aware that private + trackers may ban you if you share the magnet link. Defaults to ``true``. + +.. _Torznab: + https://torznab.github.io/spec-1.3-draft/index.html +.. _Prowlarr: + https://github.com/Prowlarr/Prowlarr +.. _Jackett: + https://github.com/Jackett/Jackett +.. _Prowlarr-categories: + https://wiki.servarr.com/en/prowlarr/cardigann-yml-definition#categories +.. _Jackett-categories: + https://github.com/Jackett/Jackett/wiki/Jackett-Categories + + +Implementations +=============== """ +from __future__ import annotations +from typing import TYPE_CHECKING +from typing import List, Dict, Any from datetime import datetime from urllib.parse import quote -from lxml import etree +from lxml import etree # type: ignore from searx.exceptions import SearxEngineAPIException -# about -about = { +if TYPE_CHECKING: + import httpx + import logging + + logger: logging.Logger + +# engine settings +about: Dict[str, Any] = { "website": None, "wikidata_id": None, "official_api_documentation": "https://torznab.github.io/spec-1.3-draft", @@ -23,27 +85,30 @@ about = { "require_api_key": False, "results": 'XML', } - -categories = ['files'] -paging = False -time_range_support = False +categories: List[str] = ['files'] +paging: bool = False +time_range_support: bool = False # defined in settings.yml # example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab" -base_url = '' -api_key = '' +base_url: str = '' +api_key: str = '' # https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories -torznab_categories = [] +torznab_categories: List[str] = [] +show_torrent_files: bool = False +show_magnet_links: bool = True def init(engine_settings=None): # pylint: disable=unused-argument + """Initialize the engine.""" if len(base_url) < 1: raise ValueError('missing torznab base_url') -def request(query, params): +def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]: + """Build the request params.""" + search_url: str = base_url + '?t=search&q={search_query}' - search_url = base_url + '?t=search&q={search_query}' if len(api_key) > 0: search_url += '&apikey={api_key}' if len(torznab_categories) > 0: @@ -56,88 +121,135 @@ def request(query, params): return params -def response(resp): +def response(resp: httpx.Response) -> List[Dict[str, Any]]: + """Parse the XML response and return a list of results.""" results = [] - search_results = etree.XML(resp.content) - # handle errors - # https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes + # handle errors: https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes if search_results.tag == "error": raise SearxEngineAPIException(search_results.get("description")) - for item in search_results[0].iterfind('item'): - result = {'template': 'torrent.html'} - - enclosure = item.find('enclosure') - - result["filesize"] = int(enclosure.get('length')) - - link = get_property(item, 'link') - guid = get_property(item, 'guid') - comments = get_property(item, 'comments') - - # define url - result["url"] = enclosure.get('url') - if comments is not None and comments.startswith('http'): - result["url"] = comments - elif guid is not None and guid.startswith('http'): - result["url"] = guid - - # define torrent file url - result["torrentfile"] = None - if enclosure.get('url').startswith("http"): - result["torrentfile"] = enclosure.get('url') - elif link is not None and link.startswith('http'): - result["torrentfile"] = link - - # define magnet link - result["magnetlink"] = get_torznab_attr(item, 'magneturl') - if result["magnetlink"] is None: - if enclosure.get('url').startswith("magnet"): - result["magnetlink"] = enclosure.get('url') - elif link is not None and link.startswith('magnet'): - result["magnetlink"] = link - - result["title"] = get_property(item, 'title') - result["files"] = get_property(item, 'files') - - result["publishedDate"] = None - try: - result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z') - except (ValueError, TypeError) as e: - logger.debug("ignore exception (publishedDate): %s", e) - - result["seed"] = get_torznab_attr(item, 'seeders') - - # define leech - result["leech"] = get_torznab_attr(item, 'leechers') - if result["leech"] is None and result["seed"] is not None: - peers = get_torznab_attr(item, 'peers') - if peers is not None: - result["leech"] = int(peers) - int(result["seed"]) + channel: etree.Element = search_results[0] + item: etree.Element + for item in channel.iterfind('item'): + result: Dict[str, Any] = build_result(item) results.append(result) return results -def get_property(item, property_name): - property_element = item.find(property_name) +def build_result(item: etree.Element) -> Dict[str, Any]: + """Build a result from a XML item.""" + # extract attributes from XML + # see https://torznab.github.io/spec-1.3-draft/torznab/Specification-v1.3.html#predefined-attributes + enclosure: etree.Element | None = item.find('enclosure') + enclosure_url: str | None = None + if enclosure is not None: + enclosure_url = enclosure.get('url') + + size = get_attribute(item, 'size') + if not size and enclosure: + size = enclosure.get('length') + if size: + size = int(size) + + guid = get_attribute(item, 'guid') + comments = get_attribute(item, 'comments') + pubDate = get_attribute(item, 'pubDate') + seeders = get_torznab_attribute(item, 'seeders') + leechers = get_torznab_attribute(item, 'leechers') + peers = get_torznab_attribute(item, 'peers') + + # map attributes to searx result + result: Dict[str, Any] = { + 'template': 'torrent.html', + 'title': get_attribute(item, 'title'), + 'filesize': size, + 'files': get_attribute(item, 'files'), + 'seed': seeders, + 'leech': _map_leechers(leechers, seeders, peers), + 'url': _map_result_url(guid, comments), + 'publishedDate': _map_published_date(pubDate), + 'torrentfile': None, + 'magnetlink': None, + } + + link = get_attribute(item, 'link') + if show_torrent_files: + result['torrentfile'] = _map_torrent_file(link, enclosure_url) + if show_magnet_links: + magneturl = get_torznab_attribute(item, 'magneturl') + result['magnetlink'] = _map_magnet_link(magneturl, guid, enclosure_url, link) + return result + + +def _map_result_url(guid: str | None, comments: str | None) -> str | None: + if guid and guid.startswith('http'): + return guid + if comments and comments.startswith('http'): + return comments + return None + + +def _map_leechers(leechers: str | None, seeders: str | None, peers: str | None) -> str | None: + if leechers: + return leechers + if seeders and peers: + return str(int(peers) - int(seeders)) + return None + + +def _map_published_date(pubDate: str | None) -> datetime | None: + if pubDate is not None: + try: + return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %z') + except (ValueError, TypeError) as e: + logger.debug("ignore exception (publishedDate): %s", e) + return None + + +def _map_torrent_file(link: str | None, enclosure_url: str | None) -> str | None: + if link and link.startswith('http'): + return link + if enclosure_url and enclosure_url.startswith('http'): + return enclosure_url + return None + + +def _map_magnet_link( + magneturl: str | None, + guid: str | None, + enclosure_url: str | None, + link: str | None, +) -> str | None: + if magneturl and magneturl.startswith('magnet'): + return magneturl + if guid and guid.startswith('magnet'): + return guid + if enclosure_url and enclosure_url.startswith('magnet'): + return enclosure_url + if link and link.startswith('magnet'): + return link + return None + + +def get_attribute(item: etree.Element, property_name: str) -> str | None: + """Get attribute from item.""" + property_element: etree.Element | None = item.find(property_name) if property_element is not None: return property_element.text - return None -def get_torznab_attr(item, attr_name): - element = item.find( - './/torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name), +def get_torznab_attribute(item: etree.Element, attribute_name: str) -> str | None: + """Get torznab special attribute from item.""" + element: etree.Element | None = item.find( + './/torznab:attr[@name="{attribute_name}"]'.format(attribute_name=attribute_name), {'torznab': 'http://torznab.com/schemas/2015/feed'}, ) - if element is not None: return element.get("value") - return None diff --git a/searx/settings.yml b/searx/settings.yml index 3046910b5..b6bb0a0e3 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1392,15 +1392,18 @@ engines: shortcut: tch # torznab engine lets you query any torznab compatible indexer. Using this - # engine in combination with Jackett (https://github.com/Jackett/Jackett) - # opens the possibility to query a lot of public and private indexers directly - # from SearXNG. - # - name: torznab + # engine in combination with Jackett opens the possibility to query a lot of + # public and private indexers directly from SearXNG. More details at: + # https://docs.searxng.org/src/searx.engines.torznab.html + # + # - name: Torznab EZTV # engine: torznab - # shortcut: trz - # base_url: http://localhost:9117/api/v2.0/indexers/all/results/torznab + # shortcut: eztv + # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab # enable_http: true # if using localhost # api_key: xxxxxxxxxxxxxxx + # show_magnet_links: true + # show_torrent_files: false # # https://github.com/Jackett/Jackett/wiki/Jackett-Categories # torznab_categories: # optional # - 2000