From 76fd4645c8bed2cc3b4ceaecde4cf64f7e00233a Mon Sep 17 00:00:00 2001
From: urlbot <urlbot@eagle.local.yeeer.net>
Date: Fri, 10 Oct 2014 00:01:22 +0200
Subject: [PATCH] http://a../ also triggers, remove fix, wrap exception arround

---
 urlbot.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/urlbot.py b/urlbot.py
index 5d383f6..928f3fd 100755
--- a/urlbot.py
+++ b/urlbot.py
@@ -130,6 +130,10 @@ def extract_url(data):
 	result = re.findall("(https?://[^\s>]+)", data)
 	if result:
 		for url in result:
+			ratelimit_touch()
+			if ratelimit_exceeded():
+				return False
+
 # urllib.request is broken:
 # >>> '.'.encode('idna')
 # ....
@@ -139,15 +143,11 @@ def extract_url(data):
 # UnicodeError: label empty or too long
 # >>> 'a.a.'.encode('idna')
 # b'a.a.'
-			if re.match(r'https?://\.', url):
-				logger('warn', 'bug tiggered, invalid url: %s' % url)
-				continue
 
-			ratelimit_touch()
-			if ratelimit_exceeded():
-				return False
-
-			(status, title) = extract_title(url)
+			try:
+				(status, title) = extract_title(url)
+			except UnicodeError:
+				(status, title) = (4, None)
 
 			if 0 == status:
 				title = title.strip()
@@ -183,6 +183,9 @@ def extract_url(data):
 				message = 'No title: %s' % url
 			elif 3 == status:
 				message = title
+			elif 4 == status:
+				message = 'Bug triggered, invalid URL/domain part: %s' % url
+				logger('warn', message)
 			else:
 				message = 'some error occurred when fetching %s' % url