From 144f89bf785408a193d09f659a5442032c06de74 Mon Sep 17 00:00:00 2001 From: Thomas Pointhuber Date: Mon, 1 Sep 2014 15:10:05 +0200 Subject: [PATCH] add comments to google-engines --- searx/engines/google.py | 30 +++++++++++++++++++++++++----- searx/engines/google_images.py | 31 ++++++++++++++++++++++++++----- searx/engines/google_news.py | 33 ++++++++++++++++++++++++++------- 3 files changed, 77 insertions(+), 17 deletions(-) diff --git a/searx/engines/google.py b/searx/engines/google.py index 2c6a98af3..80c7cc746 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -1,37 +1,57 @@ -#!/usr/bin/env python +## Google (Web) +# +# @website https://www.google.com +# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! +# +# @using-api yes +# @results JSON +# @stable yes (but deprecated) +# @parse url, title, content from urllib import urlencode from json import loads +# engine dependent config categories = ['general'] - -url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa - paging = True language_support = True +# search-url +url = 'https://ajax.googleapis.com/' +search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa + +# do search-request def request(query, params): offset = (params['pageno'] - 1) * 8 + language = 'en-US' if params['language'] != 'all': language = params['language'].replace('_', '-') + params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), language=language) + return params +# get response from search-request def response(resp): results = [] + search_res = loads(resp.text) + # return empty array if there are no results if not search_res.get('responseData', {}).get('results'): return [] + # parse results for result in search_res['responseData']['results']: + # append result results.append({'url': result['unescapedUrl'], 'title': result['titleNoFormatting'], 'content': result['content']}) + + # return results return results diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index e810ee07d..6c99f2801 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -1,37 +1,58 @@ -#!/usr/bin/env python +## Google (Images) +# +# @website https://www.google.com +# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! +# +# @using-api yes +# @results JSON +# @stable yes (but deprecated) +# @parse url, title, img_src from urllib import urlencode from json import loads +# engine dependent config categories = ['images'] +paging = True +# search-url url = 'https://ajax.googleapis.com/' search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}' # noqa -paging = True +# do search-request def request(query, params): offset = (params['pageno'] - 1) * 8 + params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset) + return params +# get response from search-request def response(resp): results = [] + search_res = loads(resp.text) - if not search_res.get('responseData'): - return [] - if not search_res['responseData'].get('results'): + + # return empty array if there are no results + if not search_res.get('responseData', {}).get('results'): return [] + + # parse results for result in search_res['responseData']['results']: href = result['originalContextUrl'] title = result['title'] if not result['url']: continue + + # append result results.append({'url': href, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'}) + + # return results return results diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 72b7a0661..becc7e21d 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -1,43 +1,62 @@ -#!/usr/bin/env python +## Google (News) +# +# @website https://www.google.com +# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! +# +# @using-api yes +# @results JSON +# @stable yes (but deprecated) +# @parse url, title, content, publishedDate from urllib import urlencode from json import loads from dateutil import parser +# search-url categories = ['news'] - -url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa - paging = True language_support = True +# engine dependent config +url = 'https://ajax.googleapis.com/' +search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa + +# do search-request def request(query, params): offset = (params['pageno'] - 1) * 8 + language = 'en-US' if params['language'] != 'all': language = params['language'].replace('_', '-') + params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), language=language) + return params +# get response from search-request def response(resp): results = [] + search_res = loads(resp.text) + # return empty array if there are no results if not search_res.get('responseData', {}).get('results'): return [] + # parse results for result in search_res['responseData']['results']: - -# Mon, 10 Mar 2014 16:26:15 -0700 + # parse publishedDate publishedDate = parser.parse(result['publishedDate']) + # append result results.append({'url': result['unescapedUrl'], 'title': result['titleNoFormatting'], 'publishedDate': publishedDate, 'content': result['content']}) + + # return results return results