1
0
mirror of http://aero2k.de/t/repos/urlbot-native.git synced 2017-09-06 15:25:38 +02:00

Python3 migration; trashed e(); html->html_text rename

This commit is contained in:
urlbot
2014-09-28 18:03:08 +02:00
parent ab63828019
commit acc5242de0
4 changed files with 56 additions and 60 deletions

View File

@@ -1,8 +1,8 @@
#!/usr/bin/python #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
if '__main__' == __name__: if '__main__' == __name__:
print '''this is a library file, which is not meant to be executed''' print('''this is a library file, which is not meant to be executed''')
exit(-1) exit(-1)
import sys, os, time, pickle import sys, os, time, pickle
@@ -27,22 +27,11 @@ def debug_enabled():
# return True # return True
return False return False
def e(data):
if data:
if unicode == type(data):
return data.encode('utf8')
elif str == type(data):
return data.encode('string-escape')
else:
return data
else:
return "''"
def logger(severity, message): def logger(severity, message):
# sev = ( 'err', 'warn', 'info' ) # sev = ( 'err', 'warn', 'info' )
# if severity in sev: # if severity in sev:
args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message) args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message)
sys.stderr.write(e('%s %s %s: %s' % args) + '\n') sys.stderr.write('%s %s %s: %s\n' % args)
def conf_save(obj): def conf_save(obj):
with open(conf('persistent_storage'), 'wb') as fd: with open(conf('persistent_storage'), 'wb') as fd:
@@ -62,7 +51,7 @@ def get_version_git():
first_line = p.stdout.readline() first_line = p.stdout.readline()
if 0 == p.wait(): if 0 == p.wait():
return "version (Git) '%s'" % e(first_line.strip()) return "version (Git) '%s'" % str(first_line.strip())
else: else:
return "(unknown version)" return "(unknown version)"

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/python3
import time, sys import time, sys
@@ -7,7 +7,7 @@ try: logger
except NameError: logger = _logger except NameError: logger = _logger
if '__main__' == __name__: if '__main__' == __name__:
print '''this is a config file, which is not meant to be executed''' print('''this is a config file, which is not meant to be executed''')
exit(-1) exit(-1)
config = {} config = {}
@@ -28,7 +28,7 @@ config['persistent_storage'] = 'urlbot.persistent'
config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) config['enhanced-random-user'] = ( 'FIXME', 'FIXME' )
def conf(val): def conf(val):
if val in config.keys(): if val in list(config.keys()):
return config[val] return config[val]
logger('warn', 'conf(): unknown key ' + str(val)) logger('warn', 'conf(): unknown key ' + str(val))
return None return None

View File

@@ -1,8 +1,8 @@
#!/usr/bin/python #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
if '__main__' == __name__: if '__main__' == __name__:
print '''this is a plugin file, which is not meant to be executed''' print('''this is a plugin file, which is not meant to be executed''')
exit(-1) exit(-1)
import time, random import time, random
@@ -67,7 +67,7 @@ def data_parse_other(data):
args = {} args = {}
if 'args' in p.keys(): if 'args' in list(p.keys()):
for a in p['args']: for a in p['args']:
if None == a: continue if None == a: continue
@@ -81,7 +81,7 @@ def data_parse_other(data):
ret = p['func'](args) ret = p['func'](args)
if None != ret: if None != ret:
if 'msg' in ret.keys(): if 'msg' in list(ret.keys()):
ratelimit_touch(RATE_CHAT) ratelimit_touch(RATE_CHAT)
chat_write(ret['msg']) chat_write(ret['msg'])
@@ -167,9 +167,9 @@ def command_unicode(args):
return { return {
'msg': 'msg':
( (
args['reply_user'] + u''': ┌────────┐''', args['reply_user'] + ''': ┌────────┐''',
args['reply_user'] + u''': │Unicode!│''', args['reply_user'] + ''': │Unicode!│''',
args['reply_user'] + u''': └────────┘''' args['reply_user'] + ''': └────────┘'''
) )
} }
@@ -202,7 +202,7 @@ def command_dice(args):
else: else:
rnd = random.randint(1, 6) rnd = random.randint(1, 6)
dice_char = [u'', u'', u'', u'', u'', u'', u''] dice_char = ['', '', '', '', '', '', '']
return { return {
'msg': 'rolling a dice for %s: %s (%d)' %(args['reply_user'], dice_char[rnd], rnd) 'msg': 'rolling a dice for %s: %s (%d)' %(args['reply_user'], dice_char[rnd], rnd)
} }
@@ -298,7 +298,7 @@ def data_parse_commands(data):
args = {} args = {}
if 'args' in p.keys(): if 'args' in list(p.keys()):
for a in p['args']: for a in p['args']:
if None == a: continue if None == a: continue
@@ -316,13 +316,19 @@ def data_parse_commands(data):
ret = p['func'](args) ret = p['func'](args)
if None != ret: if None != ret:
if 'msg' in ret.keys(): if 'msg' in list(ret.keys()):
if str == type(ret['msg']) or unicode == type(ret['msg']): if str == type(ret['msg']): # FIXME 2to3
ratelimit_touch(RATE_CHAT) ratelimit_touch(RATE_CHAT)
if ratelimit_exceeded(RATE_CHAT):
return False
chat_write(ret['msg']) chat_write(ret['msg'])
else: else:
for line in ret['msg']: for line in ret['msg']:
ratelimit_touch(RATE_CHAT) ratelimit_touch(RATE_CHAT)
if ratelimit_exceeded(RATE_CHAT):
return False
chat_write(line) chat_write(line)
return None return None
@@ -332,7 +338,7 @@ def data_parse_commands(data):
if ratelimit_exceeded(RATE_GLOBAL): if ratelimit_exceeded(RATE_GLOBAL):
return False return False
if 'msg' in ret.keys(): if 'msg' in list(ret.keys()):
chat_write(ret['msg']) chat_write(ret['msg'])
funcs = {} funcs = {}
@@ -368,7 +374,7 @@ def register(func_type, auto=False):
# FIXME: this is broken. dir() returns str, but not # FIXME: this is broken. dir() returns str, but not
# the addr of the functions which we'd need here. # the addr of the functions which we'd need here.
for f in _dir: for f in _dir:
print 'testing(%s)' % f print('testing(%s)' % f)
if not f.startswith(func_type + '_'): if not f.startswith(func_type + '_'):
continue continue

View File

@@ -1,7 +1,8 @@
#!/usr/bin/python #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys, os, re, time, urllib, pickle, HTMLParser, stat import sys, os, stat, re, time, pickle
import urllib.request, urllib.parse, urllib.error, html.parser
from local_config import conf, set_conf from local_config import conf, set_conf
from common import * from common import *
@@ -11,17 +12,17 @@ hist_flag = True
parser = None parser = None
class urllib_user_agent_wrapper(urllib.FancyURLopener): class urllib_user_agent_wrapper(urllib.request.FancyURLopener):
version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''' version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0'''
def fetch_page(url): def fetch_page(url):
logger('info', 'fetching page ' + url) logger('info', 'fetching page ' + url)
try: try:
urllib._urlopener = urllib_user_agent_wrapper() urllib.request._urlopener = urllib_user_agent_wrapper()
response = urllib.urlopen(url) response = urllib.request.urlopen(url)
html = response.read(BUFSIZ) # ignore more than BUFSIZ html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
response.close() response.close()
return (html, response.headers) return (html_text, response.headers)
except IOError as e: except IOError as e:
logger('warn', 'failed: ' + e.errno) logger('warn', 'failed: ' + e.errno)
@@ -36,8 +37,8 @@ def extract_title(url):
logger('info', 'extracting title from ' + url) logger('info', 'extracting title from ' + url)
(html, headers) = fetch_page(url) (html_text, headers) = fetch_page(url)
if html: if html_text:
charset = '' charset = ''
if 'content-type' in headers: if 'content-type' in headers:
logger('debug', 'content-type: ' + headers['content-type']) logger('debug', 'content-type: ' + headers['content-type'])
@@ -48,21 +49,21 @@ def extract_title(url):
charset = re.sub('.*charset=(?P<charset>\S+).*', charset = re.sub('.*charset=(?P<charset>\S+).*',
'\g<charset>', headers['content-type'], re.IGNORECASE) '\g<charset>', headers['content-type'], re.IGNORECASE)
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE) if '' != charset:
try:
html_text = html_text.decode(charset)
except LookupError:
logger('warn', 'invalid charset in ' + headers['content-type'])
if str != type(html_text):
html_text = str(html_text)
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html_text, re.S | re.M | re.IGNORECASE)
if result: if result:
match = result.groups()[0] match = result.groups()[0]
# if 'charset=UTF-8' in headers['content-type']:
# match = unicode(match)
if None == parser: if None == parser:
parser = HTMLParser.HTMLParser() parser = html.parser.HTMLParser()
if '' != charset:
try:
match = match.decode(charset)
except LookupError:
logger('warn', 'invalid charset in ' + headers['content-type'])
try: try:
expanded_html = parser.unescape(match) expanded_html = parser.unescape(match)
@@ -79,15 +80,15 @@ def chat_write(message, prefix='/say '):
set_conf('request_counter', conf('request_counter') + 1) set_conf('request_counter', conf('request_counter') + 1)
if debug_enabled(): if debug_enabled():
print message print(message)
else: else:
try: try:
fd = open(fifo_path, 'wb') fd = open(fifo_path, 'wb')
# FIXME 2to3
# FIXME: somehow, unicode chars can end up inside a <str> message, # FIXME: somehow, unicode chars can end up inside a <str> message,
# which seems to make both unicode() and ''.encode('utf8') fail. # which seems to make both unicode() and ''.encode('utf8') fail.
try: try:
msg = unicode(prefix) + unicode(message) + '\n' msg = str(prefix) + str(message) + '\n'
msg = msg.encode('utf8') msg = msg.encode('utf8')
except UnicodeDecodeError: except UnicodeDecodeError:
msg = prefix + message + '\n' msg = prefix + message + '\n'
@@ -132,16 +133,16 @@ def extract_url(data):
(status, title) = extract_title(r) (status, title) = extract_title(r)
if 0 == status: if 0 == status:
message = 'Title: %s: %s' % (title.strip(), e(r)) message = 'Title: %s: %s' % (title.strip(), r)
elif 1 == status: elif 1 == status:
logger('info', 'no message sent for non-text %s (%s)' %(r, title)) logger('info', 'no message sent for non-text %s (%s)' %(r, title))
continue continue
elif 2 == status: elif 2 == status:
message = 'No title: %s' % (e(r)) message = 'No title: %s' % r
elif 3 == status: elif 3 == status:
message = title message = title
else: else:
message = 'some error occurred when fetching %s' % e(r) message = 'some error occurred when fetching %s' % r
message = message.replace('\n', '\\n') message = message.replace('\n', '\\n')
@@ -159,7 +160,7 @@ def parse_pn(data):
def parse_delete(filepath): def parse_delete(filepath):
try: try:
fd = open(filepath, 'rb') fd = open(filepath, 'r')
except IOError: except IOError:
logger('err', 'file has vanished: ' + filepath) logger('err', 'file has vanished: ' + filepath)
return False return False
@@ -196,7 +197,7 @@ plugins.ratelimit_touch = ratelimit_touch
plugins.register_all() plugins.register_all()
if '__main__' == __name__: if '__main__' == __name__:
print sys.argv[0] + ' ' + VERSION print(sys.argv[0] + ' ' + VERSION)
if not os.path.exists(fifo_path): if not os.path.exists(fifo_path):
logger('error', 'fifo_path "%s" does not exist, exiting' % fifo_path) logger('error', 'fifo_path "%s" does not exist, exiting' % fifo_path)
@@ -214,5 +215,5 @@ if '__main__' == __name__:
time.sleep(delay) time.sleep(delay)
except KeyboardInterrupt: except KeyboardInterrupt:
print "" print("")
exit(130) exit(130)