[enh] settings unification - new dependency: pyyaml

This commit is contained in:
asciimoo 2014-01-19 00:17:02 +01:00
parent 39ebe1d519
commit 3afdd1d994
10 changed files with 150 additions and 121 deletions

2
.gitignore vendored
View file

@ -1,6 +1,8 @@
env env
engines.cfg engines.cfg
.installed.cfg .installed.cfg
.coverage
covearge/
setup.cfg setup.cfg
*.pyc *.pyc

View file

@ -25,8 +25,7 @@ List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instanc
* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx` * clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
* install dependencies: `pip install -r requirements.txt` * install dependencies: `pip install -r requirements.txt`
* edit your [searx/settings.py](https://github.com/asciimoo/searx/blob/master/searx/settings.py) (set your `secret_key`!) * edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!)
* rename `engines.cfg_sample` to `engines.cfg`
* run `python searx/webapp.py` to start the application * run `python searx/webapp.py` to start the application
For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation) For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)

View file

@ -1,99 +0,0 @@
[wikipedia]
engine = mediawiki
url = https://en.wikipedia.org/
number_of_results = 1
[bing]
engine = bing
locale = en-US
[currency]
engine=currency_convert
categories = general
[deviantart]
engine = deviantart
categories = images
[ddg definitions]
engine = duckduckgo_definitions
[duckduckgo]
engine = duckduckgo
locale = en-us
[filecrop]
engine = filecrop
categories = files
[flickr]
engine = flickr
categories = images
[github]
engine = github
categories = it
[google]
engine = json_engine
search_url = https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
categories = general
url_query = /responseData/results/unescapedUrl
content_query = /responseData/results/content
title_query = /responseData/results/titleNoFormatting
[google images]
engine = google_images
categories = images
[piratebay]
engine = piratebay
categories = videos, music, files
[soundcloud]
engine = soundcloud
categories = music
[stackoverflow]
engine = stackoverflow
categories = it
[startpage]
engine = startpage
[twitter]
engine = twitter
categories = social media
[urbandictionary]
engine = xpath
search_url = http://www.urbandictionary.com/define.php?term={query}
url_xpath = //div[@class="word"]//a/@href
title_xpath = //div[@class="word"]//a
content_xpath = //div[@class="definition"]
[yahoo]
engine = xpath
search_url = http://search.yahoo.com/search?p={query}
results_xpath = //div[@class="res"]
url_xpath = .//h3/a/@href
title_xpath = .//h3/a
content_xpath = .//div[@class="abstr"]
suggestion_xpath = //div[@id="satat"]//a
[youtube]
engine = youtube
categories = videos
[dailymotion]
engine = dailymotion
locale = en_US
categories = videos
[vimeo]
engine = vimeo
categories = videos
results_xpath = //div[@id="browse_content"]/ol/li
url_xpath=./a/@href
title_xpath=./a/div[@class="data"]/p[@class="title"]/text()
content_xpath=./a/img/@src

View file

@ -1,3 +1,4 @@
flask flask
grequests grequests
lxml lxml
pyyaml

View file

@ -0,0 +1,22 @@
from os import environ
from os.path import realpath, dirname, join
try:
from yaml import load
except:
from sys import exit, stderr
stderr.write('[E] install pyyaml\n')
exit(2)
searx_dir = realpath(dirname(realpath(__file__))+'/../')
engine_dir = dirname(realpath(__file__))
if 'SEARX_SETTINGS_PATH' in environ:
settings_path = environ['SEARX_SETTINGS_PATH']
else:
settings_path = join(searx_dir, 'settings.yml')
with open(settings_path) as settings_yaml:
settings = load(settings_yaml)

View file

@ -24,15 +24,11 @@ from operator import itemgetter
from urlparse import urlparse from urlparse import urlparse
from searx import settings from searx import settings
from searx.utils import gen_useragent from searx.utils import gen_useragent
import ConfigParser
import sys import sys
from datetime import datetime from datetime import datetime
engine_dir = dirname(realpath(__file__)) engine_dir = dirname(realpath(__file__))
searx_dir = join(engine_dir, '../../')
engines_config = ConfigParser.SafeConfigParser()
engines_config.read(join(searx_dir, 'engines.cfg'))
number_of_searches = 0 number_of_searches = 0
engines = {} engines = {}
@ -48,24 +44,23 @@ def load_module(filename):
module.name = modname module.name = modname
return module return module
if not engines_config.sections(): if not 'engines' in settings or not settings['engines']:
print '[E] Error no engines found. Edit your engines.cfg' print '[E] Error no engines found. Edit your settings.yml'
exit(2) exit(2)
for engine_config_name in engines_config.sections(): for engine_data in settings['engines']:
engine_data = engines_config.options(engine_config_name) engine_name = engine_data['engine']
engine = load_module(engines_config.get(engine_config_name, 'engine')+'.py') engine = load_module(engine_name+'.py')
engine.name = engine_config_name
for param_name in engine_data: for param_name in engine_data:
if param_name == 'engine': if param_name == 'engine':
continue continue
if param_name == 'categories': if param_name == 'categories':
if engines_config.get(engine_config_name, param_name) == 'none': if engine_data['categories'] == 'none':
engine.categories = [] engine.categories = []
else: else:
engine.categories = map(str.strip, engines_config.get(engine_config_name, param_name).split(',')) engine.categories = map(str.strip, engine_data['categories'].split(','))
continue continue
setattr(engine, param_name, engines_config.get(engine_config_name, param_name)) setattr(engine, param_name, engine_data[param_name])
for engine_attr in dir(engine): for engine_attr in dir(engine):
if engine_attr.startswith('_'): if engine_attr.startswith('_'):
continue continue
@ -170,7 +165,7 @@ def search(query, request, selected_engines):
request_args = dict(headers = request_params['headers'] request_args = dict(headers = request_params['headers']
,hooks = dict(response=callback) ,hooks = dict(response=callback)
,cookies = request_params['cookies'] ,cookies = request_params['cookies']
,timeout = settings.request_timeout ,timeout = settings['server']['request_timeout']
) )
if request_params['method'] == 'GET': if request_params['method'] == 'GET':

View file

@ -41,7 +41,7 @@ from searx.utils import highlight_content, html_to_text
app = Flask(__name__) app = Flask(__name__)
app.secret_key = settings.secret_key app.secret_key = settings['server']['secret_key']
opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?> opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
@ -58,8 +58,8 @@ opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
def get_base_url(): def get_base_url():
if settings.base_url: if settings['server']['base_url']:
hostname = settings.base_url hostname = settings['server']['base_url']
else: else:
scheme = 'http' scheme = 'http'
if request.is_secure: if request.is_secure:
@ -243,9 +243,9 @@ def run():
from gevent import monkey from gevent import monkey
monkey.patch_all() monkey.patch_all()
app.run(debug = settings.debug app.run(debug = settings['server']['debug']
,use_debugger = settings.debug ,use_debugger = settings['server']['debug']
,port = settings.port ,port = settings['server']['port']
) )

107
settings.yml Normal file
View file

@ -0,0 +1,107 @@
server:
port : 8888
secret_key : "ultrasecretkey" # change this!
debug : True
request_timeout : 3.0 # seconds
base_url: False
engines:
- name : wikipedia
engine : mediawiki
url : https://en.wikipedia.org/
number_of_results : 1
- name : bing
engine : bing
locale : en-US
- name : currency
engine : currency_convert
categories : general
- name : deviantart
engine : deviantart
categories : images
- name : ddg definitions
engine : duckduckgo_definitions
- name : duckduckgo
engine : duckduckgo
locale : en-us
- name : filecrop
engine : filecrop
categories : files
- name : flickr
engine : flickr
categories : images
- name : github
engine : github
categories : it
- name : google
engine : json_engine
search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
categories : general
url_query : /responseData/results/unescapedUrl
content_query : /responseData/results/content
title_query : /responseData/results/titleNoFormatting
- name : google images
engine : google_images
categories : images
- name : piratebay
engine : piratebay
categories : videos, music, files
- name : soundcloud
engine : soundcloud
categories : music
- name : stackoverflow
engine : stackoverflow
categories : it
- name : startpage
engine : startpage
- name : twitter
engine : twitter
categories : social media
- name : urbandictionary
engine : xpath
search_url : http://www.urbandictionary.com/define.php?term={query}
url_xpath : //div[@class="word"]//a/@href
title_xpath : //div[@class="word"]//a
content_xpath : //div[@class="definition"]
- name : yahoo
engine : xpath
search_url : http://search.yahoo.com/search?p={query}
results_xpath : //div[@class="res"]
url_xpath : .//h3/a/@href
title_xpath : .//h3/a
content_xpath : .//div[@class="abstr"]
suggestion_xpath : //div[@id="satat"]//a
- name : youtube
engine : youtube
categories : videos
- name : dailymotion
engine : dailymotion
locale : en_US
categories : videos
- name : vimeo
engine : vimeo
categories : videos
results_xpath : //div[@id="browse_content"]/ol/li
url_xpath : ./a/@href
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
content_xpath : ./a/img/@src

View file

@ -32,6 +32,7 @@ setup(
'flask', 'flask',
'grequests', 'grequests',
'lxml', 'lxml',
'pyyaml',
'setuptools', 'setuptools',
], ],
extras_require={ extras_require={

View file

@ -16,6 +16,7 @@ mccabe = 0.2.1
pep8 = 1.4.6 pep8 = 1.4.6
plone.testing = 4.0.8 plone.testing = 4.0.8
pyflakes = 0.7.3 pyflakes = 0.7.3
pyyaml = 3.10
requests = 2.2.0 requests = 2.2.0
robotframework-debuglibrary = 0.3 robotframework-debuglibrary = 0.3
robotframework-httplibrary = 0.4.2 robotframework-httplibrary = 0.4.2