Merge pull request #353 from return42/stackexchange

[mod] engines - add Stack Exchange API v2.3
2024-06-02 21:39:22 +00:00 · 2021-09-29 10:38:50 +02:00 · 2021-09-29 10:38:50 +02:00 · a582cf3d82
parent 7124fd1704 ecb3912bd0
commit a582cf3d82
3 changed files with 80 additions and 65 deletions
--- a/searx/engines/stackexchange.py
+++ b/searx/engines/stackexchange.py
@ -0,0 +1,65 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Stack Exchange API v2.3
+
+* https://api.stackexchange.com/
+
+"""
+
+import html
+from json import loads
+from urllib.parse import urlencode
+
+about = {
+    "website": 'https://stackexchange.com',
+    "wikidata_id": 'Q3495447',
+    "official_api_documentation": 'https://api.stackexchange.com/docs',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
+paging = True
+pagesize = 10
+
+api_site = 'stackoverflow'
+api_sort= 'activity'
+api_order = 'desc'
+
+# https://api.stackexchange.com/docs/advanced-search
+search_api = 'https://api.stackexchange.com/2.3/search/advanced?'
+
+def request(query, params):
+
+    args = urlencode({
+        'q' : query,
+        'page' : params['pageno'],
+        'pagesize' : pagesize,
+        'site' : api_site,
+        'sort' : api_sort,
+        'order': 'desc',
+        })
+    params['url'] = search_api + args
+
+    return params
+
+def response(resp):
+
+    results = []
+    json_data = loads(resp.text)
+
+    for result in json_data['items']:
+
+        content = "[%s]" % ", ".join(result['tags'])
+        content += " %s" % result['owner']['display_name']
+        if result['is_answered']:
+            content += ' // is answered'
+        content += " // score: %s" % result['score']
+
+        results.append({
+            'url': "https://%s.com/q/%s" % (api_site, result['question_id']),
+            'title':  html.unescape(result['title']),
+            'content': html.unescape(content),
+        })
+
+    return results
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@ -1,64 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Stackoverflow (IT)
-"""
-
-from urllib.parse import urlencode, urljoin
-from lxml import html
-from searx.utils import extract_text
-from searx.exceptions import SearxEngineCaptchaException
-
-# about
-about = {
-    "website": 'https://stackoverflow.com/',
-    "wikidata_id": 'Q549037',
-    "official_api_documentation": 'https://api.stackexchange.com/docs',
-    "use_official_api": False,
-    "require_api_key": False,
-    "results": 'HTML',
-}
-
-# engine dependent config
-categories = ['it']
-paging = True
-
-# search-url
-url = 'https://stackoverflow.com/'
-search_url = url + 'search?{query}&page={pageno}'
-
-# specific xpath variables
-results_xpath = '//div[contains(@class,"question-summary")]'
-link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
-content_xpath = './/div[@class="excerpt"]'
-
-
-# do search-request
-def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
-
-    return params
-
-
-# get response from search-request
-def response(resp):
-    if resp.url.path.startswith('/nocaptcha'):
-        raise SearxEngineCaptchaException()
-
-    results = []
-
-    dom = html.fromstring(resp.text)
-
-    # parse results
-    for result in dom.xpath(results_xpath):
-        link = result.xpath(link_xpath)[0]
-        href = urljoin(url, link.attrib.get('href'))
-        title = extract_text(link)
-        content = extract_text(result.xpath(content_xpath))
-
-        # append result
-        results.append({'url': href,
-                        'title': title,
-                        'content': content})
-
-    # return results
-    return results
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -1125,8 +1125,22 @@ engines:
    shortcut: sc

  - name: stackoverflow
-    engine: stackoverflow
+    engine: stackexchange
    shortcut: st
+    api_site: 'stackoverflow'
+    categories: it
+
+  - name: askubuntu
+    engine: stackexchange
+    shortcut: ubuntu
+    api_site: 'askubuntu'
+    categories: it
+
+  - name: superuser
+    engine: stackexchange
+    shortcut: su
+    api_site: 'superuser'
+    categories: it

  - name: searchcode code
    engine: searchcode_code