From 559b8606189ff44eee6e8bcde2763daf3c2cbd7c Mon Sep 17 00:00:00 2001 From: Bnyro Date: Wed, 24 Jan 2024 19:06:13 +0100 Subject: [PATCH] [feat] engine: implementation of goodreads --- searx/engines/goodreads.py | 58 ++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 6 ++++ 2 files changed, 64 insertions(+) create mode 100644 searx/engines/goodreads.py diff --git a/searx/engines/goodreads.py b/searx/engines/goodreads.py new file mode 100644 index 000000000..7af04ae58 --- /dev/null +++ b/searx/engines/goodreads.py @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Goodreads (books) +""" + +from urllib.parse import urlencode + +from lxml import html +from searx.utils import extract_text, eval_xpath, eval_xpath_list + +about = { + 'website': 'https://www.goodreads.com', + 'wikidata_id': 'Q2359213', + 'official_api_documentation': None, + 'use_official_api': False, + 'require_api_key': False, + 'results': 'HTML', +} +categories = [] +paging = True + +base_url = "https://www.goodreads.com" + +results_xpath = "//table//tr" +thumbnail_xpath = ".//img[contains(@class, 'bookCover')]/@src" +url_xpath = ".//a[contains(@class, 'bookTitle')]/@href" +title_xpath = ".//a[contains(@class, 'bookTitle')]" +author_xpath = ".//a[contains(@class, 'authorName')]" +info_text_xpath = ".//span[contains(@class, 'uitext')]" + + +def request(query, params): + args = { + 'q': query, + 'page': params['pageno'], + } + + params['url'] = f"{base_url}/search?{urlencode(args)}" + return params + + +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for result in eval_xpath_list(dom, results_xpath): + results.append( + { + 'url': base_url + extract_text(eval_xpath(result, url_xpath)), + 'title': extract_text(eval_xpath(result, title_xpath)), + 'img_src': extract_text(eval_xpath(result, thumbnail_xpath)), + 'content': extract_text(eval_xpath(result, info_text_xpath)), + 'metadata': extract_text(eval_xpath(result, author_xpath)), + } + ) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 0498a470a..ebd6d5463 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -807,6 +807,12 @@ engines: require_api_key: false results: JSON + - name: goodreads + engine: goodreads + shortcut: good + timeout: 4.0 + disabled: true + - name: google engine: google shortcut: go