Skip to content

Commit

Permalink
Detect non-URLs from forbidden ASCII characters in naive url detector
Browse files Browse the repository at this point in the history
  • Loading branch information
arza-zara committed Oct 2, 2019
1 parent 949eeee commit 5008b3b
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
4 changes: 3 additions & 1 deletion qutebrowser/utils/urlutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@ def _is_url_naive(urlstr):
return False

host = url.host()
return '.' in host and not host.endswith('.')
tld = r'\.([^.0-9_-]+|xn--[a-z0-9-]+)$'
forbidden = r'[\u0000-\u002c\u002f-\u002f\u003a-\u0060\u007b-\u00b6]'
return bool(re.search(tld, host) and not re.search(forbidden, host))


def _is_url_dns(urlstr):
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/utils/test_urlutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,10 @@ def test_get_search_url_invalid(url):
(True, True, True, 'qutebrowser.org'),
(True, True, True, ' qutebrowser.org '),
(True, True, False, 'http://user:password@example.com/foo?bar=baz#fish'),
(True, True, True, 'existing-tld.domains'),
# Internationalized domain names
(True, True, True, '\u4E2D\u56FD.\u4E2D\u56FD'), # Chinese TLD
(True, True, True, 'xn--fiqs8s.xn--fiqs8s'), # The same in punycode
# IPs
(True, True, False, '127.0.0.1'),
(True, True, False, '::1'),
Expand Down Expand Up @@ -367,6 +371,8 @@ def test_get_search_url_invalid(url):
(False, True, True, 'deadbeef'),
(False, True, True, 'hello.'),
(False, True, False, 'site:cookies.com oatmeal raisin'),
(False, True, True, 'example.search_string'),
(False, True, True, 'example_search.string'),
# no DNS because there is no host
(False, True, False, 'foo::bar'),
# Valid search term with autosearch
Expand Down

0 comments on commit 5008b3b

Please sign in to comment.