Fix relative urls that do not start with '/'

This commit is contained in:
xywei 2020-07-23 11:12:19 -05:00
parent 9f2446c444
commit 1d4657b714
No known key found for this signature in database
GPG key ID: F038F98DF34297D9

View file

@ -61,6 +61,10 @@ def extract_url(xpath_results, search_url):
# fix relative url to the search engine
url = urljoin(search_url, url)
# fix relative urls that fall through the crack
if '://' not in url:
url = urljoin(search_url, url)
# normalize url
url = normalize_url(url)