mirror of
http://aero2k.de/t/repos/urlbot-native.git
synced 2017-09-06 15:25:38 +02:00
workaround error in HTMLParser with unicode input
This commit is contained in:
@@ -73,7 +73,12 @@ def extract_title(url):
|
|||||||
|
|
||||||
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE)
|
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE)
|
||||||
if result:
|
if result:
|
||||||
return (0, parser.unescape(result.groups()[0]))
|
try:
|
||||||
|
expanded_html = parser.unescape(result.groups()[0])
|
||||||
|
except UnicodeDecodeError as e: # idk why this can happen, but it does
|
||||||
|
logger('warn', 'parser.unescape() expoded here: ' + str(e))
|
||||||
|
expanded_html = result.groups()[0]
|
||||||
|
return (0, expanded_html)
|
||||||
else:
|
else:
|
||||||
return (2, 'no title')
|
return (2, 'no title')
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user