From acc5242de0847f7cbc870aae5053999845b260d0 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 28 Sep 2014 18:03:08 +0200 Subject: [PATCH] Python3 migration; trashed e(); html->html_text rename --- common.py | 19 +++----------- local_config.py.skel | 6 ++--- plugins.py | 32 ++++++++++++++---------- urlbot.py | 59 ++++++++++++++++++++++---------------------- 4 files changed, 56 insertions(+), 60 deletions(-) diff --git a/common.py b/common.py index c94932f..c961257 100644 --- a/common.py +++ b/common.py @@ -1,8 +1,8 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: utf-8 -*- if '__main__' == __name__: - print '''this is a library file, which is not meant to be executed''' + print('''this is a library file, which is not meant to be executed''') exit(-1) import sys, os, time, pickle @@ -27,22 +27,11 @@ def debug_enabled(): # return True return False -def e(data): - if data: - if unicode == type(data): - return data.encode('utf8') - elif str == type(data): - return data.encode('string-escape') - else: - return data - else: - return "''" - def logger(severity, message): # sev = ( 'err', 'warn', 'info' ) # if severity in sev: args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message) - sys.stderr.write(e('%s %s %s: %s' % args) + '\n') + sys.stderr.write('%s %s %s: %s\n' % args) def conf_save(obj): with open(conf('persistent_storage'), 'wb') as fd: @@ -62,7 +51,7 @@ def get_version_git(): first_line = p.stdout.readline() if 0 == p.wait(): - return "version (Git) '%s'" % e(first_line.strip()) + return "version (Git) '%s'" % str(first_line.strip()) else: return "(unknown version)" diff --git a/local_config.py.skel b/local_config.py.skel index 1208031..dbb806a 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import time, sys @@ -7,7 +7,7 @@ try: logger except NameError: logger = _logger if '__main__' == __name__: - print '''this is a config file, which is not meant to be executed''' + print('''this is a config file, which is not meant to be executed''') exit(-1) config = {} @@ -28,7 +28,7 @@ config['persistent_storage'] = 'urlbot.persistent' config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) def conf(val): - if val in config.keys(): + if val in list(config.keys()): return config[val] logger('warn', 'conf(): unknown key ' + str(val)) return None diff --git a/plugins.py b/plugins.py index bb4744b..35db5a4 100644 --- a/plugins.py +++ b/plugins.py @@ -1,8 +1,8 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: utf-8 -*- if '__main__' == __name__: - print '''this is a plugin file, which is not meant to be executed''' + print('''this is a plugin file, which is not meant to be executed''') exit(-1) import time, random @@ -67,7 +67,7 @@ def data_parse_other(data): args = {} - if 'args' in p.keys(): + if 'args' in list(p.keys()): for a in p['args']: if None == a: continue @@ -81,7 +81,7 @@ def data_parse_other(data): ret = p['func'](args) if None != ret: - if 'msg' in ret.keys(): + if 'msg' in list(ret.keys()): ratelimit_touch(RATE_CHAT) chat_write(ret['msg']) @@ -167,9 +167,9 @@ def command_unicode(args): return { 'msg': ( - args['reply_user'] + u''': ┌────────┐''', - args['reply_user'] + u''': │Unicode!│''', - args['reply_user'] + u''': └────────┘''' + args['reply_user'] + ''': ┌────────┐''', + args['reply_user'] + ''': │Unicode!│''', + args['reply_user'] + ''': └────────┘''' ) } @@ -202,7 +202,7 @@ def command_dice(args): else: rnd = random.randint(1, 6) - dice_char = [u'◇', u'⚀', u'⚁', u'⚂', u'⚃', u'⚄', u'⚅'] + dice_char = ['◇', '⚀', '⚁', '⚂', '⚃', '⚄', '⚅'] return { 'msg': 'rolling a dice for %s: %s (%d)' %(args['reply_user'], dice_char[rnd], rnd) } @@ -298,7 +298,7 @@ def data_parse_commands(data): args = {} - if 'args' in p.keys(): + if 'args' in list(p.keys()): for a in p['args']: if None == a: continue @@ -316,13 +316,19 @@ def data_parse_commands(data): ret = p['func'](args) if None != ret: - if 'msg' in ret.keys(): - if str == type(ret['msg']) or unicode == type(ret['msg']): + if 'msg' in list(ret.keys()): + if str == type(ret['msg']): # FIXME 2to3 ratelimit_touch(RATE_CHAT) + if ratelimit_exceeded(RATE_CHAT): + return False + chat_write(ret['msg']) else: for line in ret['msg']: ratelimit_touch(RATE_CHAT) + if ratelimit_exceeded(RATE_CHAT): + return False + chat_write(line) return None @@ -332,7 +338,7 @@ def data_parse_commands(data): if ratelimit_exceeded(RATE_GLOBAL): return False - if 'msg' in ret.keys(): + if 'msg' in list(ret.keys()): chat_write(ret['msg']) funcs = {} @@ -368,7 +374,7 @@ def register(func_type, auto=False): # FIXME: this is broken. dir() returns str, but not # the addr of the functions which we'd need here. for f in _dir: - print 'testing(%s)' % f + print('testing(%s)' % f) if not f.startswith(func_type + '_'): continue diff --git a/urlbot.py b/urlbot.py index 9825a16..260836f 100755 --- a/urlbot.py +++ b/urlbot.py @@ -1,7 +1,8 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: utf-8 -*- -import sys, os, re, time, urllib, pickle, HTMLParser, stat +import sys, os, stat, re, time, pickle +import urllib.request, urllib.parse, urllib.error, html.parser from local_config import conf, set_conf from common import * @@ -11,17 +12,17 @@ hist_flag = True parser = None -class urllib_user_agent_wrapper(urllib.FancyURLopener): +class urllib_user_agent_wrapper(urllib.request.FancyURLopener): version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''' def fetch_page(url): logger('info', 'fetching page ' + url) try: - urllib._urlopener = urllib_user_agent_wrapper() - response = urllib.urlopen(url) - html = response.read(BUFSIZ) # ignore more than BUFSIZ + urllib.request._urlopener = urllib_user_agent_wrapper() + response = urllib.request.urlopen(url) + html_text = response.read(BUFSIZ) # ignore more than BUFSIZ response.close() - return (html, response.headers) + return (html_text, response.headers) except IOError as e: logger('warn', 'failed: ' + e.errno) @@ -36,8 +37,8 @@ def extract_title(url): logger('info', 'extracting title from ' + url) - (html, headers) = fetch_page(url) - if html: + (html_text, headers) = fetch_page(url) + if html_text: charset = '' if 'content-type' in headers: logger('debug', 'content-type: ' + headers['content-type']) @@ -48,21 +49,21 @@ def extract_title(url): charset = re.sub('.*charset=(?P\S+).*', '\g', headers['content-type'], re.IGNORECASE) - result = re.match(r'.*?(.*?).*?', html, re.S | re.M | re.IGNORECASE) + if '' != charset: + try: + html_text = html_text.decode(charset) + except LookupError: + logger('warn', 'invalid charset in ' + headers['content-type']) + + if str != type(html_text): + html_text = str(html_text) + + result = re.match(r'.*?(.*?).*?', html_text, re.S | re.M | re.IGNORECASE) if result: match = result.groups()[0] -# if 'charset=UTF-8' in headers['content-type']: -# match = unicode(match) - if None == parser: - parser = HTMLParser.HTMLParser() - - if '' != charset: - try: - match = match.decode(charset) - except LookupError: - logger('warn', 'invalid charset in ' + headers['content-type']) + parser = html.parser.HTMLParser() try: expanded_html = parser.unescape(match) @@ -79,15 +80,15 @@ def chat_write(message, prefix='/say '): set_conf('request_counter', conf('request_counter') + 1) if debug_enabled(): - print message + print(message) else: try: fd = open(fifo_path, 'wb') - +# FIXME 2to3 # FIXME: somehow, unicode chars can end up inside a message, # which seems to make both unicode() and ''.encode('utf8') fail. try: - msg = unicode(prefix) + unicode(message) + '\n' + msg = str(prefix) + str(message) + '\n' msg = msg.encode('utf8') except UnicodeDecodeError: msg = prefix + message + '\n' @@ -132,16 +133,16 @@ def extract_url(data): (status, title) = extract_title(r) if 0 == status: - message = 'Title: %s: %s' % (title.strip(), e(r)) + message = 'Title: %s: %s' % (title.strip(), r) elif 1 == status: logger('info', 'no message sent for non-text %s (%s)' %(r, title)) continue elif 2 == status: - message = 'No title: %s' % (e(r)) + message = 'No title: %s' % r elif 3 == status: message = title else: - message = 'some error occurred when fetching %s' % e(r) + message = 'some error occurred when fetching %s' % r message = message.replace('\n', '\\n') @@ -159,7 +160,7 @@ def parse_pn(data): def parse_delete(filepath): try: - fd = open(filepath, 'rb') + fd = open(filepath, 'r') except IOError: logger('err', 'file has vanished: ' + filepath) return False @@ -196,7 +197,7 @@ plugins.ratelimit_touch = ratelimit_touch plugins.register_all() if '__main__' == __name__: - print sys.argv[0] + ' ' + VERSION + print(sys.argv[0] + ' ' + VERSION) if not os.path.exists(fifo_path): logger('error', 'fifo_path "%s" does not exist, exiting' % fifo_path) @@ -214,5 +215,5 @@ if '__main__' == __name__: time.sleep(delay) except KeyboardInterrupt: - print "" + print("") exit(130)