mirror of
http://aero2k.de/t/repos/urlbot-native.git
synced 2017-09-06 15:25:38 +02:00
http://a../ also triggers, remove fix, wrap exception arround
This commit is contained in:
17
urlbot.py
17
urlbot.py
@@ -130,6 +130,10 @@ def extract_url(data):
|
|||||||
result = re.findall("(https?://[^\s>]+)", data)
|
result = re.findall("(https?://[^\s>]+)", data)
|
||||||
if result:
|
if result:
|
||||||
for url in result:
|
for url in result:
|
||||||
|
ratelimit_touch()
|
||||||
|
if ratelimit_exceeded():
|
||||||
|
return False
|
||||||
|
|
||||||
# urllib.request is broken:
|
# urllib.request is broken:
|
||||||
# >>> '.'.encode('idna')
|
# >>> '.'.encode('idna')
|
||||||
# ....
|
# ....
|
||||||
@@ -139,15 +143,11 @@ def extract_url(data):
|
|||||||
# UnicodeError: label empty or too long
|
# UnicodeError: label empty or too long
|
||||||
# >>> 'a.a.'.encode('idna')
|
# >>> 'a.a.'.encode('idna')
|
||||||
# b'a.a.'
|
# b'a.a.'
|
||||||
if re.match(r'https?://\.', url):
|
|
||||||
logger('warn', 'bug tiggered, invalid url: %s' % url)
|
|
||||||
continue
|
|
||||||
|
|
||||||
ratelimit_touch()
|
|
||||||
if ratelimit_exceeded():
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
try:
|
||||||
(status, title) = extract_title(url)
|
(status, title) = extract_title(url)
|
||||||
|
except UnicodeError:
|
||||||
|
(status, title) = (4, None)
|
||||||
|
|
||||||
if 0 == status:
|
if 0 == status:
|
||||||
title = title.strip()
|
title = title.strip()
|
||||||
@@ -183,6 +183,9 @@ def extract_url(data):
|
|||||||
message = 'No title: %s' % url
|
message = 'No title: %s' % url
|
||||||
elif 3 == status:
|
elif 3 == status:
|
||||||
message = title
|
message = title
|
||||||
|
elif 4 == status:
|
||||||
|
message = 'Bug triggered, invalid URL/domain part: %s' % url
|
||||||
|
logger('warn', message)
|
||||||
else:
|
else:
|
||||||
message = 'some error occurred when fetching %s' % url
|
message = 'some error occurred when fetching %s' % url
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user