From 4a36a3044d6e39bc60d026d99ed7a010f6505a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Mon, 30 Nov 2020 08:35:15 +0100 Subject: [PATCH] Add recoll engine (#2325) recoll is a local search engine based on Xapian: http://www.lesbonscomptes.com/recoll/ By itself recoll does not offer web or API access, this can be achieved using recoll-webui: https://framagit.org/medoc92/recollwebui.git This engine uses a custom 'files' result template set `base_url` to the location where recoll-webui can be reached set `dl_prefix` to a location where the file hierarchy as indexed by recoll can be reached set `search_dir` to the part of the indexed file hierarchy to be searched, use an empty string to search the entire search domain --- docs/admin/engines.rst | 25 ++++- docs/admin/engines/recoll.rst | 50 +++++++++ docs/dev/engine_overview.rst | 3 + searx/engines/recoll.py | 104 ++++++++++++++++++ searx/settings.yml | 22 ++++ .../themes/oscar/less/logicodev/results.less | 19 ++++ searx/templates/oscar/macros.html | 26 +++++ .../oscar/result_templates/files.html | 55 +++++++++ 8 files changed, 298 insertions(+), 6 deletions(-) create mode 100644 docs/admin/engines/recoll.rst create mode 100644 searx/engines/recoll.py create mode 100644 searx/templates/oscar/result_templates/files.html diff --git a/docs/admin/engines.rst b/docs/admin/engines.rst index 4d1872dfc..f1ac03699 100644 --- a/docs/admin/engines.rst +++ b/docs/admin/engines.rst @@ -1,14 +1,28 @@ -.. _engines generic: - ======= Engines ======= +Special Engine Settings +======================= + .. sidebar:: Further reading .. - :ref:`settings engine` - - :ref:`engine settings` - - :ref:`engine file` + - :ref:`engine settings` & :ref:`engine file` + +.. toctree:: + :maxdepth: 1 + + engines/recoll.rst + + +.. _engines generic: + +General Engine Settings +======================= + +Explanation of the :ref:`general engine configuration` shown in the table +:ref:`configured engines`. ============= =========== ==================== ============ :ref:`engine settings` :ref:`engine file` @@ -30,8 +44,6 @@ Disabled **D** Show errors **DE** ============= =========== ================================= -Configuration defaults (at built time): - .. _configured engines: .. jinja:: webapp @@ -73,3 +85,4 @@ Configuration defaults (at built time): - {{(mod.display_error_messages and "y") or ""}} {% endfor %} + diff --git a/docs/admin/engines/recoll.rst b/docs/admin/engines/recoll.rst new file mode 100644 index 000000000..cba2e81f7 --- /dev/null +++ b/docs/admin/engines/recoll.rst @@ -0,0 +1,50 @@ +.. _engine recoll: + +====== +Recoll +====== + +.. sidebar:: info + + - `Recoll `_ + - `recoll-webui `_ + +Recoll_ is a desktop full-text search tool based on Xapian. By itself Recoll_ +does not offer web or API access, this can be achieved using recoll-webui_ + + + +Configuration +============= + +You must configure the following settings: + +``base_url``: + Location where recoll-webui can be reached. + +``mount_prefix``: + Location where the file hierarchy is mounted on your *local* filesystem. + +``dl_prefix``: + Location where the file hierarchy as indexed by recoll can be reached. + +``search_dir``: + Part of the indexed file hierarchy to be search, if empty the full domain is + searched. + + +Example +======= + +Scenario: + +#. Recoll indexes a local filesystem mounted in ``/export/documents/reference``, +#. the Recoll search inteface can be reached at https://recoll.example.org/ and +#. the contents of this filesystem can be reached though https://download.example.org/reference + +.. code:: yaml + + base_url: https://recoll.example.org/ + mount_prefix: /export/documents + dl_prefix: https://download.example.org + search_dir: '' diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst index 268995a45..5e3483fd7 100644 --- a/docs/dev/engine_overview.rst +++ b/docs/dev/engine_overview.rst @@ -18,6 +18,9 @@ engines. Adapters are stored under the folder :origin:`searx/engines`. :depth: 3 :backlinks: entry + +.. _general engine configuration: + general engine configuration ============================ diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py new file mode 100644 index 000000000..5a956b8bf --- /dev/null +++ b/searx/engines/recoll.py @@ -0,0 +1,104 @@ +""" + Recoll (local search engine) + + @using-api yes + @results JSON + @stable yes + @parse url, content, size, abstract, author, mtype, subtype, time, \ + filename, label, type, embedded +""" + +from datetime import date, timedelta +from json import loads +from urllib.parse import urlencode, quote + +# engine dependent config +time_range_support = True + +# parameters from settings.yml +base_url = None +search_dir = '' +mount_prefix = None +dl_prefix = None + +# embedded +embedded_url = '<{ttype} controls height="166px" ' +\ + 'src="{url}" type="{mtype}">' + + +# helper functions +def get_time_range(time_range): + sw = { + 'day': 1, + 'week': 7, + 'month': 30, + 'year': 365 + } + + offset = sw.get(time_range, 0) + if not offset: + return '' + + return (date.today() - timedelta(days=offset)).isoformat() + + +# do search-request +def request(query, params): + search_after = get_time_range(params['time_range']) + search_url = base_url + 'json?{query}&highlight=0' + params['url'] = search_url.format(query=urlencode({ + 'query': query, + 'after': search_after, + 'dir': search_dir})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + response_json = loads(resp.text) + + if not response_json: + return [] + + for result in response_json.get('results', []): + title = result['label'] + url = result['url'].replace('file://' + mount_prefix, dl_prefix) + content = '{}'.format(result['snippet']) + + # append result + item = {'url': url, + 'title': title, + 'content': content, + 'template': 'files.html'} + + if result['size']: + item['size'] = int(result['size']) + + for parameter in ['filename', 'abstract', 'author', 'mtype', 'time']: + if result[parameter]: + item[parameter] = result[parameter] + + # facilitate preview support for known mime types + if 'mtype' in result and '/' in result['mtype']: + (mtype, subtype) = result['mtype'].split('/') + item['mtype'] = mtype + item['subtype'] = subtype + + if mtype in ['audio', 'video']: + item['embedded'] = embedded_url.format( + ttype=mtype, + url=quote(url.encode('utf8'), '/:'), + mtype=result['mtype']) + + if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']: + item['img_src'] = url + + results.append(item) + + if 'nres' in response_json: + results.append({'number_of_results': response_json['nres']}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 8af1a17f1..33ae234d5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -650,6 +650,28 @@ engines: shortcut : qws categories : social media +# - name: library +# engine: recoll +# shortcut: lib +# base_url: 'https://recoll.example.org/' +# search_dir: '' +# mount_prefix: /export +# dl_prefix: 'https://download.example.org' +# timeout: 30.0 +# categories: files +# disabled: True + +# - name: recoll library reference +# engine: recoll +# base_url: 'https://recoll.example.org/' +# search_dir: reference +# mount_prefix: /export +# dl_prefix: 'https://download.example.org' +# shortcut: libr +# timeout: 30.0 +# categories: files +# disabled: True + - name : reddit engine : reddit shortcut : re diff --git a/searx/static/themes/oscar/less/logicodev/results.less b/searx/static/themes/oscar/less/logicodev/results.less index 9926d6e53..33965fb33 100644 --- a/searx/static/themes/oscar/less/logicodev/results.less +++ b/searx/static/themes/oscar/less/logicodev/results.less @@ -51,6 +51,11 @@ float: right; } +.result-abstract { + margin-top: 0.5em; + margin-bottom: 0.8em; +} + .external-link { color: @dark-green; font-size: 12px; @@ -124,6 +129,20 @@ } } +.result-metadata { + clear: both; + margin: 1em; + + td { + padding-right: 1em; + color: @gray; + } + + td:first-of-type { + color: @dark-gray; + } +} + // map formating of results .result-map { clear: both; diff --git a/searx/templates/oscar/macros.html b/searx/templates/oscar/macros.html index 57a90aaa2..2bc1e7805 100644 --- a/searx/templates/oscar/macros.html +++ b/searx/templates/oscar/macros.html @@ -47,6 +47,20 @@ {%- endif -%} {%- endmacro %} + +{% macro result_footer_nocache(result) -%} +
+
+ {% for engine in result.engines %} + {{ engine }} + {% endfor %} + {% if proxify %} + {{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }} + {% endif %} +
+ +{%- endmacro %} + {% macro result_footer_rtl(result, id) -%}
{{- "" -}} @@ -68,6 +82,18 @@ {%- endif %} {%- endmacro %} + +{% macro result_footer_nocache_rtl(result) -%} +
+ {% for engine in result.engines %} + {{ engine }} + {% endfor %} + {% if proxify %} + {{ result_link(proxify(result.url), icon('sort') + _('proxied'), "text-info") }} + {% endif %} + +{%- endmacro %} + {% macro preferences_item_header(info, label, rtl, id) -%} {% if rtl %}
diff --git a/searx/templates/oscar/result_templates/files.html b/searx/templates/oscar/result_templates/files.html new file mode 100644 index 000000000..5e3894e0a --- /dev/null +++ b/searx/templates/oscar/result_templates/files.html @@ -0,0 +1,55 @@ +{% from 'oscar/macros.html' import result_header, result_sub_header, result_footer_nocache, result_footer_nocache_rtl, icon with context %} + +{{ result_header(result, favicons) }} +{{ result_sub_header(result) }} + +{% if result.embedded %} + +{% endif %} + +{% if result.embedded %} +
+ {{ result.embedded|safe }} +
+{% endif %} + +{% if result.abstract %}

{{ result.abstract|safe }}

{% endif %} + +{% if result.img_src %} +
+
+{{ result.title|striptags }} +{% if result.content %}

{{ result.content|safe }}

{% endif %} +
+
+{% else %} +{% if result.content %}

{{ result.content|safe }}

{% endif %} +{% endif %} + + +{% if result.author %}{% endif %} + +{% if result.filename %}{% endif %} + +{% if result.size %} +{% endif %} + +{% if result.time %}{% endif %} + +{% if result.mtype %}{% endif %} + + +{% if rtl %} +{{ result_footer_nocache_rtl(result) }} +{% else %} +{{ result_footer_nocache(result) }} +{% endif %}