Python3 migration; trashed e(); html->html_text rename

2014-09-28 18:03:08 +02:00
parent ab63828019
commit acc5242de0
4 changed files with 56 additions and 60 deletions
--- a/common.py
+++ b/common.py
@@ -1,8 +1,8 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-

 if '__main__' == __name__:
-	print '''this is a library file, which is not meant to be executed'''
+	print('''this is a library file, which is not meant to be executed''')
 	exit(-1)

 import sys, os, time, pickle
@@ -27,22 +27,11 @@ def debug_enabled():
 #	return True
 	return False

-def e(data):
-	if data:
-		if unicode == type(data):
-			return data.encode('utf8')
-		elif str == type(data):
-			return data.encode('string-escape')
-		else:
-			return data
-	else:
-		return "''"
-
 def logger(severity, message):
 #	sev = ( 'err', 'warn', 'info' )
 #	if severity in sev:
 	args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message)
-	sys.stderr.write(e('%s %s %s: %s' % args) + '\n')
+	sys.stderr.write('%s %s %s: %s\n' % args)

 def conf_save(obj):
 	with open(conf('persistent_storage'), 'wb') as fd:
@@ -62,7 +51,7 @@ def get_version_git():
 	first_line = p.stdout.readline()

 	if 0 == p.wait():
-		return "version (Git) '%s'" % e(first_line.strip())
+		return "version (Git) '%s'" % str(first_line.strip())
 	else:
 		return "(unknown version)"

--- a/local_config.py.skel
+++ b/local_config.py.skel
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3

 import time, sys

@@ -7,7 +7,7 @@ try: logger
 except NameError: logger = _logger

 if '__main__' == __name__:
-	print '''this is a config file, which is not meant to be executed'''
+	print('''this is a config file, which is not meant to be executed''')
 	exit(-1)

 config = {}
@@ -28,7 +28,7 @@ config['persistent_storage'] = 'urlbot.persistent'
 config['enhanced-random-user'] = ( 'FIXME', 'FIXME' )

 def conf(val):
-	if val in config.keys():
+	if val in list(config.keys()):
 		return config[val]
 	logger('warn', 'conf(): unknown key ' + str(val))
 	return None
--- a/plugins.py
+++ b/plugins.py
@@ -1,8 +1,8 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-

 if '__main__' == __name__:
-	print '''this is a plugin file, which is not meant to be executed'''
+	print('''this is a plugin file, which is not meant to be executed''')
 	exit(-1)

 import time, random
@@ -67,7 +67,7 @@ def data_parse_other(data):

 		args = {}

-		if 'args' in p.keys():
+		if 'args' in list(p.keys()):
 			for a in p['args']:
 				if None == a: continue

@@ -81,7 +81,7 @@ def data_parse_other(data):
 		ret = p['func'](args)

 		if None != ret:
-			if 'msg' in ret.keys():
+			if 'msg' in list(ret.keys()):
 				ratelimit_touch(RATE_CHAT)
 				chat_write(ret['msg'])

@@ -167,9 +167,9 @@ def command_unicode(args):
 		return {
 			'msg': 
 				(
-					args['reply_user'] + u''': ┌────────┐''',
-					args['reply_user'] + u''': │Unicode!│''',
-					args['reply_user'] + u''': └────────┘'''
+					args['reply_user'] + ''': ┌────────┐''',
+					args['reply_user'] + ''': │Unicode!│''',
+					args['reply_user'] + ''': └────────┘'''
 				)
 		}

@@ -202,7 +202,7 @@ def command_dice(args):
 		else:
 			rnd = random.randint(1, 6)

-		dice_char = [u'◇', u'⚀', u'⚁', u'⚂', u'⚃', u'⚄', u'⚅']
+		dice_char = ['◇', '⚀', '⚁', '⚂', '⚃', '⚄', '⚅']
 		return {
 			'msg': 'rolling a dice for %s: %s (%d)' %(args['reply_user'], dice_char[rnd], rnd)
 		}
@@ -298,7 +298,7 @@ def data_parse_commands(data):

 		args = {}

-		if 'args' in p.keys():
+		if 'args' in list(p.keys()):
 			for a in p['args']:
 				if None == a: continue

@@ -316,13 +316,19 @@ def data_parse_commands(data):
 		ret = p['func'](args)

 		if None != ret:
-			if 'msg' in ret.keys():
-				if str == type(ret['msg']) or unicode == type(ret['msg']):
+			if 'msg' in list(ret.keys()):
+				if str == type(ret['msg']): # FIXME 2to3
 					ratelimit_touch(RATE_CHAT)
+					if ratelimit_exceeded(RATE_CHAT):
+						return False
+
 					chat_write(ret['msg'])
 				else:
 					for line in ret['msg']:
 						ratelimit_touch(RATE_CHAT)
+						if ratelimit_exceeded(RATE_CHAT):
+							return False
+
 						chat_write(line)

 			return None
@@ -332,7 +338,7 @@ def data_parse_commands(data):
 		if ratelimit_exceeded(RATE_GLOBAL):
 			return False

-		if 'msg' in ret.keys():
+		if 'msg' in list(ret.keys()):
 			chat_write(ret['msg'])

 funcs = {}
@@ -368,7 +374,7 @@ def register(func_type, auto=False):
 		# FIXME: this is broken. dir() returns str, but not
 		# the addr of the functions which we'd need here.
 		for f in _dir:
-			print 'testing(%s)' % f
+			print('testing(%s)' % f)
 			if not f.startswith(func_type + '_'):
 				continue

--- a/urlbot.py
+++ b/urlbot.py
@@ -1,7 +1,8 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-

-import sys, os, re, time, urllib, pickle, HTMLParser, stat
+import sys, os, stat, re, time, pickle
+import urllib.request, urllib.parse, urllib.error, html.parser
 from local_config import conf, set_conf
 from common import *

@@ -11,17 +12,17 @@ hist_flag = True

 parser = None

-class urllib_user_agent_wrapper(urllib.FancyURLopener):
+class urllib_user_agent_wrapper(urllib.request.FancyURLopener):
 	version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0'''

 def fetch_page(url):
 	logger('info', 'fetching page ' + url)
 	try:
-		urllib._urlopener = urllib_user_agent_wrapper()
-		response = urllib.urlopen(url)
-		html = response.read(BUFSIZ) # ignore more than BUFSIZ
+		urllib.request._urlopener = urllib_user_agent_wrapper()
+		response = urllib.request.urlopen(url)
+		html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
 		response.close()
-		return (html, response.headers)
+		return (html_text, response.headers)
 	except IOError as e:
 		logger('warn', 'failed: ' + e.errno)

@@ -36,8 +37,8 @@ def extract_title(url):

 	logger('info', 'extracting title from ' + url)

-	(html, headers) = fetch_page(url)
-	if html:
+	(html_text, headers) = fetch_page(url)
+	if html_text:
 		charset = ''
 		if 'content-type' in headers:
 			logger('debug', 'content-type: ' + headers['content-type'])
@@ -48,21 +49,21 @@ def extract_title(url):
 			charset = re.sub('.*charset=(?P<charset>\S+).*',
 				'\g<charset>', headers['content-type'], re.IGNORECASE)

-		result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE)
+		if '' != charset:
+			try:
+				html_text = html_text.decode(charset)
+			except LookupError:
+				logger('warn', 'invalid charset in ' + headers['content-type'])
+
+		if str != type(html_text):
+			html_text = str(html_text)
+
+		result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html_text, re.S | re.M | re.IGNORECASE)
 		if result:
 			match = result.groups()[0]

-#			if 'charset=UTF-8' in headers['content-type']:
-#				match = unicode(match)
-
 			if None == parser:
-				parser = HTMLParser.HTMLParser()
-
-			if '' != charset:
-				try:
-					match = match.decode(charset)
-				except LookupError:
-					logger('warn', 'invalid charset in ' + headers['content-type'])
+				parser = html.parser.HTMLParser()

 			try:
 				expanded_html = parser.unescape(match)
@@ -79,15 +80,15 @@ def chat_write(message, prefix='/say '):
 	set_conf('request_counter', conf('request_counter') + 1)

 	if debug_enabled():
-		print message
+		print(message)
 	else:
 		try:
 			fd = open(fifo_path, 'wb')
-
+# FIXME 2to3
 			# FIXME: somehow, unicode chars can end up inside a <str> message,
 			# which seems to make both unicode() and ''.encode('utf8') fail.
 			try:
-				msg = unicode(prefix) + unicode(message) + '\n'
+				msg = str(prefix) + str(message) + '\n'
 				msg = msg.encode('utf8')
 			except UnicodeDecodeError:
 				msg = prefix + message + '\n'
@@ -132,16 +133,16 @@ def extract_url(data):
 			(status, title) = extract_title(r)

 			if 0 == status:
-				message = 'Title: %s: %s' % (title.strip(), e(r))
+				message = 'Title: %s: %s' % (title.strip(), r)
 			elif 1 == status:
 				logger('info', 'no message sent for non-text %s (%s)' %(r, title))
 				continue
 			elif 2 == status:
-				message = 'No title: %s' % (e(r))
+				message = 'No title: %s' % r
 			elif 3 == status:
 				message = title
 			else:
-				message = 'some error occurred when fetching %s' % e(r)
+				message = 'some error occurred when fetching %s' % r

 			message = message.replace('\n', '\\n')

@@ -159,7 +160,7 @@ def parse_pn(data):

 def parse_delete(filepath):
 	try:
-		fd = open(filepath, 'rb')
+		fd = open(filepath, 'r')
 	except IOError:
 		logger('err', 'file has vanished: ' + filepath)
 		return False
@@ -196,7 +197,7 @@ plugins.ratelimit_touch = ratelimit_touch
 plugins.register_all()

 if '__main__' == __name__:
-	print sys.argv[0] + ' ' + VERSION
+	print(sys.argv[0] + ' ' + VERSION)

 	if not os.path.exists(fifo_path):
 		logger('error', 'fifo_path "%s" does not exist, exiting' % fifo_path)
@@ -214,5 +215,5 @@ if '__main__' == __name__:

 			time.sleep(delay)
 		except KeyboardInterrupt:
-			print ""
+			print("")
 			exit(130)