Merge pull request #2350 from dalf/mod-engines-report-captcha

[mod] stackoverflow & yandex: detect CAPTCHA response
This commit is contained in:
Alexandre Flament 2020-12-03 13:50:12 +01:00 committed by GitHub
commit cec73b5dcf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 2 deletions

View file

@ -10,9 +10,10 @@
@parse url, title, content
"""
from urllib.parse import urlencode, urljoin
from urllib.parse import urlencode, urljoin, urlparse
from lxml import html
from searx.utils import extract_text
from searx.exceptions import SearxEngineCaptchaException
# engine dependent config
categories = ['it']
@ -37,6 +38,10 @@ def request(query, params):
# get response from search-request
def response(resp):
resp_url = urlparse(resp.url)
if resp_url.path.startswith('/nocaptcha'):
raise SearxEngineCaptchaException()
results = []
dom = html.fromstring(resp.text)

View file

@ -9,9 +9,10 @@
@parse url, title, content
"""
from urllib.parse import urlencode
from urllib.parse import urlencode, urlparse
from lxml import html
from searx import logger
from searx.exceptions import SearxEngineCaptchaException
logger = logger.getChild('yandex engine')
@ -47,6 +48,10 @@ def request(query, params):
# get response from search-request
def response(resp):
resp_url = urlparse(resp.url)
if resp_url.path.startswith('/showcaptcha'):
raise SearxEngineCaptchaException()
dom = html.fromstring(resp.text)
results = []