From 0e7e53feb32dfa7470abd92e67747759f4f8db52 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 10 Aug 2014 22:10:00 +0200 Subject: [PATCH] decode HTML entities in s --- eventlooper.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index ee8ae4d..6ec93b3 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -1,6 +1,7 @@ #!/usr/bin/python +# -*- coding: utf-8 -*- -import sys, os, re, time, urllib, pickle, random +import sys, os, re, time, urllib, pickle, random, HTMLParser BUFSIZ = 8192 delay = 0.100 # seconds @@ -26,7 +27,12 @@ def debug_enabled(): def e(data): if data: - return data.encode('string-escape') + if unicode == type(data): + return data.encode('utf8') + elif str == type(data): + return data.encode('string-escape') + else: + return data else: return "''" @@ -67,7 +73,7 @@ def extract_title(url): result = re.match(r'.*?<title.*?>(.*?).*?', html, re.S | re.M | re.IGNORECASE) if result: - return (0, result.groups()[0]) + return (0, parser.unescape(result.groups()[0])) else: return (2, 'no title') @@ -82,7 +88,8 @@ def chat_write(message, prefix='/say '): else: try: fd = open(fifo_path, 'wb') - fd.write(prefix + message) + msg = unicode(prefix) + unicode(message) + fd.write(msg.encode('utf8')) fd.close() except IOError: logger('err', "couldn't print to fifo " + fifo_path) @@ -238,6 +245,7 @@ def print_version_git(): if '__main__' == __name__: print_version_git() + parser = HTMLParser.HTMLParser() while 1: try: