mirror of
http://aero2k.de/t/repos/urlbot-native.git
synced 2017-09-06 15:25:38 +02:00
decode HTML entities in <title>s
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import sys, os, re, time, urllib, pickle, random
|
import sys, os, re, time, urllib, pickle, random, HTMLParser
|
||||||
|
|
||||||
BUFSIZ = 8192
|
BUFSIZ = 8192
|
||||||
delay = 0.100 # seconds
|
delay = 0.100 # seconds
|
||||||
@@ -26,7 +27,12 @@ def debug_enabled():
|
|||||||
|
|
||||||
def e(data):
|
def e(data):
|
||||||
if data:
|
if data:
|
||||||
|
if unicode == type(data):
|
||||||
|
return data.encode('utf8')
|
||||||
|
elif str == type(data):
|
||||||
return data.encode('string-escape')
|
return data.encode('string-escape')
|
||||||
|
else:
|
||||||
|
return data
|
||||||
else:
|
else:
|
||||||
return "''"
|
return "''"
|
||||||
|
|
||||||
@@ -67,7 +73,7 @@ def extract_title(url):
|
|||||||
|
|
||||||
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE)
|
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE)
|
||||||
if result:
|
if result:
|
||||||
return (0, result.groups()[0])
|
return (0, parser.unescape(result.groups()[0]))
|
||||||
else:
|
else:
|
||||||
return (2, 'no title')
|
return (2, 'no title')
|
||||||
|
|
||||||
@@ -82,7 +88,8 @@ def chat_write(message, prefix='/say '):
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
fd = open(fifo_path, 'wb')
|
fd = open(fifo_path, 'wb')
|
||||||
fd.write(prefix + message)
|
msg = unicode(prefix) + unicode(message)
|
||||||
|
fd.write(msg.encode('utf8'))
|
||||||
fd.close()
|
fd.close()
|
||||||
except IOError:
|
except IOError:
|
||||||
logger('err', "couldn't print to fifo " + fifo_path)
|
logger('err', "couldn't print to fifo " + fifo_path)
|
||||||
@@ -238,6 +245,7 @@ def print_version_git():
|
|||||||
|
|
||||||
if '__main__' == __name__:
|
if '__main__' == __name__:
|
||||||
print_version_git()
|
print_version_git()
|
||||||
|
parser = HTMLParser.HTMLParser()
|
||||||
|
|
||||||
while 1:
|
while 1:
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user