From a80bf1ba9779ffaa0651d365dff1508aabf4931f Mon Sep 17 00:00:00 2001 From: Pierre Chevalier Date: Wed, 17 Mar 2021 16:43:09 +0100 Subject: [PATCH] [enh] Add Springer Nature engine Springer Nature is a global publisher dedicated to providing service to research community [1] with official API [2]. To test this PR, first get your API key following this page: https://dev.springernature.com/signup In searx/engines/springer.py at line 24, add this API key. I left my own key, commented out in the line aboce. Feel free to use it, if needed. [1] https://www.springernature.com/ [2] https://dev.springernature.com/ --- Makefile | 1 + searx/engines/springer.py | 74 +++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 10 ++++++ 3 files changed, 85 insertions(+) create mode 100644 searx/engines/springer.py diff --git a/Makefile b/Makefile index f8d6359c4..ca32fa6b1 100644 --- a/Makefile +++ b/Makefile @@ -194,6 +194,7 @@ PYLINT_FILES=\ searx/engines/meilisearch.py \ searx/engines/solidtorrents.py \ searx/engines/solr.py \ + searx/engines/springer.py \ searx/engines/google_scholar.py \ searx/engines/yahoo_news.py \ searx/engines/apkmirror.py \ diff --git a/searx/engines/springer.py b/searx/engines/springer.py new file mode 100644 index 000000000..a9c32d8a9 --- /dev/null +++ b/searx/engines/springer.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Springer Nature (science) + +""" + +# pylint: disable=missing-function-docstring + +from datetime import datetime +from json import loads +from urllib.parse import urlencode + +from searx import logger +from searx.exceptions import SearxEngineAPIException + +logger = logger.getChild('Springer Nature engine') + +about = { + "website": 'https://www.springernature.com/', + "wikidata_id": 'Q21096327', + "official_api_documentation": 'https://dev.springernature.com/', + "use_official_api": True, + "require_api_key": True, + "results": 'JSON', +} + +categories = ['science'] +paging = True +nb_per_page = 10 +api_key = 'unset' + +base_url = 'https://api.springernature.com/metadata/json?' + +def request(query, params): + if api_key == 'unset': + raise SearxEngineAPIException('missing Springer-Nature API key') + args = urlencode({ + 'q' : query, + 's' : nb_per_page * (params['pageno'] - 1), + 'p' : nb_per_page, + 'api_key' : api_key + }) + params['url'] = base_url + args + logger.debug("query_url --> %s", params['url']) + return params + + +def response(resp): + results = [] + json_data = loads(resp.text) + + for record in json_data['records']: + content = record['abstract'][0:500] + if len(record['abstract']) > len(content): + content += "..." + published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') + + metadata = [record[x] for x in [ + 'publicationName', + 'identifier', + 'contentType', + ] if record.get(x) is not None] + + metadata = ' / '.join(metadata) + if record.get('startingPage') and record.get('endingPage') is not None: + metadata += " (%(startingPage)s-%(endingPage)s)" % record + + results.append({ + 'title': record['title'], + 'url': record['url'][0]['value'].replace('http://', 'https://', 1), + 'content' : content, + 'publishedDate' : published, + 'metadata' : metadata + }) + return results diff --git a/searx/settings.yml b/searx/settings.yml index e4c672507..8d336d32a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -961,6 +961,16 @@ engines: # query_fields : '' # query fields # enable_http : True + - name : springer nature + engine : springer + # get your API key from: https://dev.springernature.com/signup + # api_key : "a69685087d07eca9f13db62f65b8f601" # working API key, for test & debug + # set api_key and comment out disabled .. + disabled: True + shortcut : springer + categories : science + timeout : 6.0 + - name : startpage engine : startpage shortcut : sp