[mod] Peertube: re-engineered & upgrade to data_type: traits_v1

- fetch_traits(): Fetch languages from peertube's search-index source code.

  [mod] Include migration of the request methode from 'supported_languages'
        to 'traits' (EngineTraits) object.
  [fix] old supported_languages_url is no longer valid since the sources
        has been moved to a different path.

- fixed code to pass pylint
- request(): complete re-implementation based on the API docs [1]
- response(): complete re-implementation, adds serveral fields missed before
- add source code documentation

[1] https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-10-02 23:52:11 +02:00
parent 6e5f22e558
commit a7fe22770a
4 changed files with 197 additions and 77 deletions

View file

@ -0,0 +1,19 @@
.. _peertube engines:
================
Peertube Engines
================
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
.. _peertube video engine:
Peertube Video
==============
.. automodule:: searx.engines.peertube
:members:

View file

@ -1468,31 +1468,32 @@
"peertube": {
"all_locale": null,
"custom": {},
"data_type": "supported_languages",
"languages": {},
"data_type": "traits_v1",
"languages": {
"ca": "ca",
"cs": "cs",
"de": "de",
"el": "el",
"en": "en",
"eo": "eo",
"es": "es",
"eu": "eu",
"fi": "fi",
"fr": "fr",
"gd": "gd",
"it": "it",
"ja": "ja",
"nl": "nl",
"pl": "pl",
"pt": "pt",
"ru": "ru",
"sv": "sv",
"zh": "zh",
"zh_Hans": "zh",
"zh_Hant": "zh"
},
"regions": {},
"supported_languages": [
"ca",
"cs",
"de",
"el",
"en",
"eo",
"es",
"eu",
"fi",
"fr",
"gd",
"it",
"ja",
"nl",
"oc",
"pl",
"pt",
"ru",
"sv",
"zh"
]
"supported_languages": {}
},
"qwant": {
"all_locale": null,
@ -4531,4 +4532,4 @@
"zh_cht"
]
}
}
}

View file

@ -1,18 +1,30 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
peertube (Videos)
# lint: pylint
"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
(more or less) the same REST API and the schema of the JSON result is identical.
"""
from json import loads
from datetime import datetime
import re
from urllib.parse import urlencode
from searx.utils import html_to_text
from datetime import datetime
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
import babel
from searx import network
from searx.locales import language_tag
from searx.utils import html_to_text
from searx.enginelib.traits import EngineTraits
traits: EngineTraits
# about
about = {
# pylint: disable=line-too-long
"website": 'https://joinpeertube.org',
"wikidata_id": 'Q50938515',
"official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
"official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
@ -22,66 +34,155 @@ about = {
categories = ["videos"]
paging = True
base_url = "https://peer.tube"
supported_languages_url = 'https://peer.tube/api/v1/videos/languages'
"""Base URL of the Peertube instance. A list of instances is available at:
- https://instances.joinpeertube.org/instances
"""
time_range_support = True
time_range_table = {
'day': relativedelta(),
'week': relativedelta(weeks=-1),
'month': relativedelta(months=-1),
'year': relativedelta(years=-1),
}
safesearch = True
safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
def minute_to_hm(minute):
if isinstance(minute, int):
return "%d:%02d" % (divmod(minute, 60))
return None
# do search-request
def request(query, params):
sanitized_url = base_url.rstrip("/")
pageno = (params["pageno"] - 1) * 15
search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}"
query_dict = {"search": query}
language = params["language"].split("-")[0]
if "all" != language and language in supported_languages:
query_dict["languageOneOf"] = language
params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
"""Assemble request for the Peertube API"""
if not query:
return False
# eng_region = traits.get_region(params['searxng_locale'], 'en_US')
eng_lang = traits.get_language(params['searxng_locale'], None)
params['url'] = (
base_url.rstrip("/")
+ "/api/v1/search/videos?"
+ urlencode(
{
'search': query,
'searchTarget': 'search-index', # Vidiversum
'resultType': 'videos',
'start': (params['pageno'] - 1) * 10,
'count': 10,
# -createdAt: sort by date ascending / createdAt: date descending
'sort': '-match', # sort by *match descending*
'nsfw': safesearch_table[params['safesearch']],
}
)
)
if eng_lang is not None:
params['url'] += '&languageOneOf[]=' + eng_lang
params['url'] += '&boostLanguages[]=' + eng_lang
if params['time_range'] in time_range_table:
time = datetime.now().date() + time_range_table[params['time_range']]
params['url'] += '&startDate=' + time.isoformat()
return params
def _get_offset_from_pageno(pageno):
return (pageno - 1) * 15 + 1
# get response from search-request
def response(resp):
sanitized_url = base_url.rstrip("/")
return video_response(resp)
def video_response(resp):
"""Parse video response from SepiaSearch and Peertube instances."""
results = []
search_res = loads(resp.text)
json_data = resp.json()
# return empty array if there are no results
if "data" not in search_res:
if 'data' not in json_data:
return []
# parse results
for res in search_res["data"]:
title = res["name"]
url = sanitized_url + "/videos/watch/" + res["uuid"]
description = res["description"]
if description:
content = html_to_text(res["description"])
else:
content = ""
thumbnail = sanitized_url + res["thumbnailPath"]
publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
for result in json_data['data']:
metadata = [
x
for x in [
result.get('channel', {}).get('displayName'),
result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
', '.join(result.get('tags', [])),
]
if x
]
results.append(
{
"template": "videos.html",
"url": url,
"title": title,
"content": content,
"publishedDate": publishedDate,
"iframe_src": sanitized_url + res["embedPath"],
"thumbnail": thumbnail,
'url': result['url'],
'title': result['name'],
'content': html_to_text(result.get('description') or ''),
'author': result.get('account', {}).get('displayName'),
'length': minute_to_hm(result.get('duration')),
'template': 'videos.html',
'publishedDate': parse(result['publishedAt']),
'iframe_src': result.get('embedUrl'),
'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
'metadata': ' | '.join(metadata),
}
)
# return results
return results
def _fetch_supported_languages(resp):
videolanguages = resp.json()
peertube_languages = list(videolanguages.keys())
return peertube_languages
def fetch_traits(engine_traits: EngineTraits):
"""Fetch languages from peertube's search-index source code.
See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
.. _8ed5c729 - Refactor and redesign client:
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
.. _videoLanguages:
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
"""
resp = network.get(
'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
# the response from search-index repository is very slow
timeout=60,
)
if not resp.ok:
print("ERROR: response from peertube is not OK.")
return
js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
if not js_lang:
print("ERROR: can't determine languages from peertube")
return
for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
try:
eng_tag = lang.group(1)
if eng_tag == 'oc':
# Occitanis not known by babel, its closest relative is Catalan
# but 'ca' is already in the list of engine_traits.languages -->
# 'oc' will be ignored.
continue
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
except babel.UnknownLocaleError:
print("ERROR: %s is unknown by babel" % eng_tag)
continue
conflict = engine_traits.languages.get(sxng_tag)
if conflict:
if conflict != eng_tag:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
continue
engine_traits.languages[sxng_tag] = eng_tag
engine_traits.languages['zh_Hans'] = 'zh'
engine_traits.languages['zh_Hant'] = 'zh'

View file

@ -1758,9 +1758,8 @@ engines:
engine: peertube
shortcut: ptb
paging: true
# https://instances.joinpeertube.org/instances
base_url: https://peertube.biz/
# base_url: https://tube.tardis.world/
# alternatives see: https://instances.joinpeertube.org/instances
# base_url: https://tube.4aem.com
categories: videos
disabled: true
timeout: 6.0