1
0
mirror of http://aero2k.de/t/repos/urlbot-native.git synced 2017-09-06 15:25:38 +02:00

decode HTML entities in <title>s

This commit is contained in:
urlbot
2014-08-10 22:10:00 +02:00
parent 9d8e5983a9
commit 0e7e53feb3

View File

@@ -1,6 +1,7 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*-
import sys, os, re, time, urllib, pickle, random import sys, os, re, time, urllib, pickle, random, HTMLParser
BUFSIZ = 8192 BUFSIZ = 8192
delay = 0.100 # seconds delay = 0.100 # seconds
@@ -26,7 +27,12 @@ def debug_enabled():
def e(data): def e(data):
if data: if data:
if unicode == type(data):
return data.encode('utf8')
elif str == type(data):
return data.encode('string-escape') return data.encode('string-escape')
else:
return data
else: else:
return "''" return "''"
@@ -67,7 +73,7 @@ def extract_title(url):
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE) result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE)
if result: if result:
return (0, result.groups()[0]) return (0, parser.unescape(result.groups()[0]))
else: else:
return (2, 'no title') return (2, 'no title')
@@ -82,7 +88,8 @@ def chat_write(message, prefix='/say '):
else: else:
try: try:
fd = open(fifo_path, 'wb') fd = open(fifo_path, 'wb')
fd.write(prefix + message) msg = unicode(prefix) + unicode(message)
fd.write(msg.encode('utf8'))
fd.close() fd.close()
except IOError: except IOError:
logger('err', "couldn't print to fifo " + fifo_path) logger('err', "couldn't print to fifo " + fifo_path)
@@ -238,6 +245,7 @@ def print_version_git():
if '__main__' == __name__: if '__main__' == __name__:
print_version_git() print_version_git()
parser = HTMLParser.HTMLParser()
while 1: while 1:
try: try: