workaround error in HTMLParser with unicode input

2014-08-20 02:44:11 +02:00
parent 1f3d2577f0
commit a70515ad6a
1 changed files with 6 additions and 1 deletions
--- a/eventlooper.py
+++ b/eventlooper.py
@@ -73,7 +73,12 @@ def extract_title(url):

 		result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE)
 		if result:
-			return (0, parser.unescape(result.groups()[0]))
+			try:
+				expanded_html = parser.unescape(result.groups()[0])
+			except UnicodeDecodeError as e: # idk why this can happen, but it does
+				logger('warn', 'parser.unescape() expoded here: ' + str(e))
+				expanded_html = result.groups()[0]
+			return (0, expanded_html)
 		else:
 			return (2, 'no title')