From 73364e158ec88a56383bd3b56023e4fb768d0b9b Mon Sep 17 00:00:00 2001 From: Bnyro Date: Sat, 5 Aug 2023 19:46:04 +0200 Subject: [PATCH] [feat] engine: brave - support for images --- requirements.txt | 1 + searx/engines/brave.py | 65 ++++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 6 ++++ 3 files changed, 72 insertions(+) create mode 100644 searx/engines/brave.py diff --git a/requirements.txt b/requirements.txt index c8371795a..4d0b8da8e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,4 @@ markdown-it-py==3.0.0 typing_extensions==4.7.1 fasttext-predict==0.9.2.1 pytomlpp==1.0.13 +chompjs==1.2.2 \ No newline at end of file diff --git a/searx/engines/brave.py b/searx/engines/brave.py new file mode 100644 index 000000000..e2acf64f4 --- /dev/null +++ b/searx/engines/brave.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Brave (General, news, videos, images) +""" + +from urllib.parse import urlencode +from lxml import html +from searx.utils import extract_text, eval_xpath, eval_xpath_list +import chompjs, json +import re + +about = { + "website": 'https://search.brave.com/', + "wikidata_id": 'Q22906900', + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} +base_url = "https://search.brave.com/" +paging = False +categories = ['images', 'videos', 'news'] # images, videos, news + +def request(query, params): + args = { + 'q': query, + 'spellcheck': 1, + } + params["url"] = f"{base_url}{categories[0]}?{urlencode(args)}" + +def get_image_results(text): + results = [] + + datastr = "" + for line in text.split("\n"): + if "const data = " in line: + datastr = line.replace("const data = ", "").strip()[:-1] + break + + json_data = chompjs.parse_js_object(datastr) + + for result in json_data[1]["data"]["body"]["response"]["results"]: + results.append( + { + 'template': 'images.html', + 'url': result['url'], + 'thumbnail_src': result['thumbnail']['src'], + 'img_src': result['properties']['url'], + 'content': result['description'], + 'title': result['title'], + 'source': result['source'], + 'img_format': result['properties']['format'], + } + ) + + return results + +def response(resp): + dom = html.fromstring(resp.text) + + match categories[0]: + case 'images': + return get_image_results(resp.text) + case _: + return [] \ No newline at end of file diff --git a/searx/settings.yml b/searx/settings.yml index aa8e61ae8..d41b1edfd 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1843,6 +1843,12 @@ engines: require_api_key: false results: HTML + - name: brave.images + shortcut: braveimg + engine: brave + categories: images + disabled: false + - name: petalsearch shortcut: pts engine: xpath