From 5568f24d6ca9ae9a7eca27c107982b2689e40105 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sun, 25 Nov 2018 23:32:48 -0600 Subject: [PATCH] [fix] check language aliases when setting search language --- searx/engines/bing_images.py | 2 +- searx/engines/bing_videos.py | 2 +- searx/engines/google.py | 2 +- searx/engines/google_news.py | 2 +- searx/engines/qwant.py | 2 +- searx/engines/swisscows.py | 2 +- searx/engines/wikidata.py | 2 +- searx/engines/wikipedia.py | 2 +- tests/unit/engines/test_bing_images.py | 1 + tests/unit/engines/test_bing_videos.py | 1 + tests/unit/engines/test_google.py | 9 ++++++++- tests/unit/engines/test_google_news.py | 1 + tests/unit/engines/test_qwant.py | 1 + tests/unit/engines/test_swisscows.py | 1 + tests/unit/engines/test_wikidata.py | 1 + tests/unit/engines/test_wikipedia.py | 9 +++++++-- 16 files changed, 29 insertions(+), 11 deletions(-) diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 876011f1d..e2495200c 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -55,7 +55,7 @@ def request(query, params): query=urlencode({'q': query}), offset=offset) - language = match_language(params['language'], supported_languages).lower() + language = match_language(params['language'], supported_languages, language_aliases).lower() params['cookies']['SRCHHPGUSR'] = \ 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 7002ac861..bf17f9168 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -48,7 +48,7 @@ def request(query, params): 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') # language cookie - language = match_language(params['language'], supported_languages).lower() + language = match_language(params['language'], supported_languages, language_aliases).lower() params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1' # query and paging diff --git a/searx/engines/google.py b/searx/engines/google.py index 62e7d1170..49d7f6499 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -166,7 +166,7 @@ def extract_text_from_dom(result, xpath): def request(query, params): offset = (params['pageno'] - 1) * 10 - language = match_language(params['language'], supported_languages) + language = match_language(params['language'], supported_languages, language_aliases) language_array = language.split('-') if params['language'].find('-') > 0: country = params['language'].split('-')[1] diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index aadcb76df..bf5995c88 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -51,7 +51,7 @@ def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), search_options=urlencode(search_options)) - language = match_language(params['language'], supported_languages).split('-')[0] + language = match_language(params['language'], supported_languages, language_aliases).split('-')[0] if language: params['url'] += '&lr=lang_' + language diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 4b0f1c87c..1bef07cc7 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -46,7 +46,7 @@ def request(query, params): offset=offset) # add language tag - language = match_language(params['language'], supported_languages) + language = match_language(params['language'], supported_languages, language_aliases) params['url'] += '&locale=' + language.replace('-', '_').lower() return params diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index ff4df24b7..0001d56bf 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -36,7 +36,7 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=') # do search-request def request(query, params): - region = match_language(params['language'], supported_languages) + region = match_language(params['language'], supported_languages, language_aliases) ui_language = region.split('-')[0] search_path = search_string.format( diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index c315b30da..2485b6528 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -68,7 +68,7 @@ def response(resp): html = fromstring(resp.text) search_results = html.xpath(wikidata_ids_xpath) - language = match_language(resp.search_params['language'], supported_languages).split('-')[0] + language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0] # TODO: make requests asynchronous to avoid timeout when result_count > 1 for search_result in search_results[:result_count]: diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 6cd17e378..23f23db4d 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -31,7 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' # set language in base_url def url_lang(lang): - return match_language(lang, supported_languages).split('-')[0] + return match_language(lang, supported_languages, language_aliases).split('-')[0] # do search-request diff --git a/tests/unit/engines/test_bing_images.py b/tests/unit/engines/test_bing_images.py index cb1d550d9..e65e6cc04 100644 --- a/tests/unit/engines/test_bing_images.py +++ b/tests/unit/engines/test_bing_images.py @@ -9,6 +9,7 @@ class TestBingImagesEngine(SearxTestCase): def test_request(self): bing_images.supported_languages = ['fr-FR', 'en-US'] + bing_images.language_aliases = {} query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 diff --git a/tests/unit/engines/test_bing_videos.py b/tests/unit/engines/test_bing_videos.py index 24387c888..1ae1fad37 100644 --- a/tests/unit/engines/test_bing_videos.py +++ b/tests/unit/engines/test_bing_videos.py @@ -9,6 +9,7 @@ class TestBingVideosEngine(SearxTestCase): def test_request(self): bing_videos.supported_languages = ['fr-FR', 'en-US'] + bing_videos.language_aliases = {} query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py index 413b67769..5282c6713 100644 --- a/tests/unit/engines/test_google.py +++ b/tests/unit/engines/test_google.py @@ -15,7 +15,8 @@ class TestGoogleEngine(SearxTestCase): return response def test_request(self): - google.supported_languages = ['en', 'fr', 'zh-CN'] + google.supported_languages = ['en', 'fr', 'zh-CN', 'iw'] + google.language_aliases = {'he': 'iw'} query = 'test_query' dicto = defaultdict(dict) @@ -41,6 +42,12 @@ class TestGoogleEngine(SearxTestCase): self.assertIn('zh-CN', params['url']) self.assertIn('zh-CN', params['headers']['Accept-Language']) + dicto['language'] = 'he' + params = google.request(query, dicto) + self.assertIn('google.com', params['url']) + self.assertIn('iw', params['url']) + self.assertIn('iw', params['headers']['Accept-Language']) + def test_response(self): self.assertRaises(AttributeError, google.response, None) self.assertRaises(AttributeError, google.response, []) diff --git a/tests/unit/engines/test_google_news.py b/tests/unit/engines/test_google_news.py index a041a79b9..f363444be 100644 --- a/tests/unit/engines/test_google_news.py +++ b/tests/unit/engines/test_google_news.py @@ -10,6 +10,7 @@ class TestGoogleNewsEngine(SearxTestCase): def test_request(self): google_news.supported_languages = ['en-US', 'fr-FR'] + google_news.language_aliases = {} query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 diff --git a/tests/unit/engines/test_qwant.py b/tests/unit/engines/test_qwant.py index 86bfb22da..0cd1bd832 100644 --- a/tests/unit/engines/test_qwant.py +++ b/tests/unit/engines/test_qwant.py @@ -8,6 +8,7 @@ class TestQwantEngine(SearxTestCase): def test_request(self): qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR'] + qwant.language_aliases = {} query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py index 133f636de..6013abd89 100644 --- a/tests/unit/engines/test_swisscows.py +++ b/tests/unit/engines/test_swisscows.py @@ -8,6 +8,7 @@ class TestSwisscowsEngine(SearxTestCase): def test_request(self): swisscows.supported_languages = ['de-AT', 'de-DE'] + swisscows.language_aliases = {} query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py index 62a409781..e5870fd00 100644 --- a/tests/unit/engines/test_wikidata.py +++ b/tests/unit/engines/test_wikidata.py @@ -27,6 +27,7 @@ class TestWikidataEngine(SearxTestCase): self.assertRaises(AttributeError, wikidata.response, '[]') wikidata.supported_languages = ['en', 'es'] + wikidata.language_aliases = {} response = mock.Mock(text='', search_params={"language": "en"}) self.assertEqual(wikidata.response(response), []) diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py index 21d0225ed..5b7408f41 100644 --- a/tests/unit/engines/test_wikipedia.py +++ b/tests/unit/engines/test_wikipedia.py @@ -8,7 +8,8 @@ from searx.testing import SearxTestCase class TestWikipediaEngine(SearxTestCase): def test_request(self): - wikipedia.supported_languages = ['fr', 'en'] + wikipedia.supported_languages = ['fr', 'en', 'no'] + wikipedia.language_aliases = {'nb': 'no'} query = 'test_query' dicto = defaultdict(dict) @@ -25,9 +26,13 @@ class TestWikipediaEngine(SearxTestCase): self.assertIn('Test_Query', params['url']) self.assertNotIn('test_query', params['url']) + dicto['language'] = 'nb' + params = wikipedia.request(query, dicto) + self.assertIn('no.wikipedia.org', params['url']) + dicto['language'] = 'xx' params = wikipedia.request(query, dicto) - self.assertIn('en', params['url']) + self.assertIn('en.wikipedia.org', params['url']) def test_response(self): dicto = defaultdict(dict)