fix crash for https?://\..* links ('.'.encode('idna') fails)
This commit is contained in:
13
urlbot.py
13
urlbot.py
@@ -130,6 +130,19 @@ def extract_url(data):
|
|||||||
result = re.findall("(https?://[^\s>]+)", data)
|
result = re.findall("(https?://[^\s>]+)", data)
|
||||||
if result:
|
if result:
|
||||||
for url in result:
|
for url in result:
|
||||||
|
# urllib.request is broken:
|
||||||
|
# >>> '.'.encode('idna')
|
||||||
|
# ....
|
||||||
|
# UnicodeError: label empty or too long
|
||||||
|
# >>> '.a.'.encode('idna')
|
||||||
|
# ....
|
||||||
|
# UnicodeError: label empty or too long
|
||||||
|
# >>> 'a.a.'.encode('idna')
|
||||||
|
# b'a.a.'
|
||||||
|
if re.match(r'https?://\.', url):
|
||||||
|
logger('warn', 'bug tiggered, invalid url: %s' % url)
|
||||||
|
continue
|
||||||
|
|
||||||
ratelimit_touch()
|
ratelimit_touch()
|
||||||
if ratelimit_exceeded():
|
if ratelimit_exceeded():
|
||||||
return False
|
return False
|
||||||
|
|||||||
Reference in New Issue
Block a user