From e4d23e07c09bd48b91fb84b11ad3594e8898065b Mon Sep 17 00:00:00 2001 From: urlbot Date: Thu, 9 Oct 2014 22:48:23 +0200 Subject: [PATCH] fix crash for https?://\..* links ('.'.encode('idna') fails) --- urlbot.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/urlbot.py b/urlbot.py index 13da26d..5d383f6 100755 --- a/urlbot.py +++ b/urlbot.py @@ -130,6 +130,19 @@ def extract_url(data): result = re.findall("(https?://[^\s>]+)", data) if result: for url in result: +# urllib.request is broken: +# >>> '.'.encode('idna') +# .... +# UnicodeError: label empty or too long +# >>> '.a.'.encode('idna') +# .... +# UnicodeError: label empty or too long +# >>> 'a.a.'.encode('idna') +# b'a.a.' + if re.match(r'https?://\.', url): + logger('warn', 'bug tiggered, invalid url: %s' % url) + continue + ratelimit_touch() if ratelimit_exceeded(): return False