Merge pull request #353 from return42/stackexchange

[mod] engines - add Stack Exchange API v2.3
This commit is contained in:
Alexandre Flament 2021-09-29 10:38:50 +02:00 committed by GitHub
commit a582cf3d82
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 80 additions and 65 deletions

View file

@ -0,0 +1,65 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Stack Exchange API v2.3
* https://api.stackexchange.com/
"""
import html
from json import loads
from urllib.parse import urlencode
about = {
"website": 'https://stackexchange.com',
"wikidata_id": 'Q3495447',
"official_api_documentation": 'https://api.stackexchange.com/docs',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
paging = True
pagesize = 10
api_site = 'stackoverflow'
api_sort= 'activity'
api_order = 'desc'
# https://api.stackexchange.com/docs/advanced-search
search_api = 'https://api.stackexchange.com/2.3/search/advanced?'
def request(query, params):
args = urlencode({
'q' : query,
'page' : params['pageno'],
'pagesize' : pagesize,
'site' : api_site,
'sort' : api_sort,
'order': 'desc',
})
params['url'] = search_api + args
return params
def response(resp):
results = []
json_data = loads(resp.text)
for result in json_data['items']:
content = "[%s]" % ", ".join(result['tags'])
content += " %s" % result['owner']['display_name']
if result['is_answered']:
content += ' // is answered'
content += " // score: %s" % result['score']
results.append({
'url': "https://%s.com/q/%s" % (api_site, result['question_id']),
'title': html.unescape(result['title']),
'content': html.unescape(content),
})
return results

View file

@ -1,64 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Stackoverflow (IT)
"""
from urllib.parse import urlencode, urljoin
from lxml import html
from searx.utils import extract_text
from searx.exceptions import SearxEngineCaptchaException
# about
about = {
"website": 'https://stackoverflow.com/',
"wikidata_id": 'Q549037',
"official_api_documentation": 'https://api.stackexchange.com/docs',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['it']
paging = True
# search-url
url = 'https://stackoverflow.com/'
search_url = url + 'search?{query}&page={pageno}'
# specific xpath variables
results_xpath = '//div[contains(@class,"question-summary")]'
link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
content_xpath = './/div[@class="excerpt"]'
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
return params
# get response from search-request
def response(resp):
if resp.url.path.startswith('/nocaptcha'):
raise SearxEngineCaptchaException()
results = []
dom = html.fromstring(resp.text)
# parse results
for result in dom.xpath(results_xpath):
link = result.xpath(link_xpath)[0]
href = urljoin(url, link.attrib.get('href'))
title = extract_text(link)
content = extract_text(result.xpath(content_xpath))
# append result
results.append({'url': href,
'title': title,
'content': content})
# return results
return results

View file

@ -1125,8 +1125,22 @@ engines:
shortcut: sc
- name: stackoverflow
engine: stackoverflow
engine: stackexchange
shortcut: st
api_site: 'stackoverflow'
categories: it
- name: askubuntu
engine: stackexchange
shortcut: ubuntu
api_site: 'askubuntu'
categories: it
- name: superuser
engine: stackexchange
shortcut: su
api_site: 'superuser'
categories: it
- name: searchcode code
engine: searchcode_code