From d94d62335fa9e4c2030e84e6b1b1a472f092d043 Mon Sep 17 00:00:00 2001
From: Thorsten <mail@aero2k.de>
Date: Sat, 28 Nov 2015 18:41:40 +0100
Subject: [PATCH] move url extraction to plugins

---
 common.py  |  6 ++--
 plugins.py | 69 ++++++++++++++++++++++++++++++++++++++++++--
 urlbot.py  | 85 ------------------------------------------------------
 3 files changed, 70 insertions(+), 90 deletions(-)

diff --git a/common.py b/common.py
index 6a886b7..48b392b 100644
--- a/common.py
+++ b/common.py
@@ -50,9 +50,9 @@ def conf_set(key, value):
 	conf_save(blob)
 
 
-def conf_get(key):
+def conf_get(key, default=None):
 	blob = conf_load()
-	return blob.get(key)
+	return blob.get(key, default)
 
 Bucket = namedtuple("BucketConfig", ["history", "period", "max_hist_len"])
 
@@ -170,7 +170,7 @@ def extract_title(url):
 	log = logging.getLogger(__name__)
 	global parser
 
-	if 'repo/urlbot.git' in url:
+	if 'repo/urlbot-native.git' in url:
 		log.info('repo URL found: ' + url)
 		return 3, 'wee, that looks like my home repo!'
 
diff --git a/plugins.py b/plugins.py
index cb1f3ca..e2d3046 100644
--- a/plugins.py
+++ b/plugins.py
@@ -9,10 +9,9 @@ import types
 import unicodedata
 import urllib.parse
 import urllib.request
-# from common import *
 
 from common import conf_load, conf_save, RATE_GLOBAL, RATE_NO_SILENCE, VERSION, RATE_INTERACTIVE, BUFSIZ, \
-	USER_AGENT, extract_title, RATE_FUN, RATE_NO_LIMIT
+	USER_AGENT, extract_title, RATE_FUN, RATE_NO_LIMIT, conf_get, RATE_URL
 from local_config import set_conf, conf
 from string_constants import excuses, moin_strings_hi, moin_strings_bye, cakes
 
@@ -1043,6 +1042,72 @@ def reset_jobs(argv, **args):
 		return {'msg': 'done.'}
 
 
+@pluginfunction('resolve-url-title', 'extract titles from urls', ptypes_PARSE, ratelimit_class=RATE_URL)
+def resolve_url_title(**args):
+	user = args['reply_user']
+	user_pref_nospoiler = conf_get('user_pref', {}).get(user, {}).get('spoiler', False)
+	if user_pref_nospoiler:
+		log.info('nospoiler in userconf')
+		return
+
+	result = re.findall(r'(https?://[^\s>]+)', args['data'])
+	if not result:
+		return
+
+	out = []
+	for url in result:
+		if any([re.match(b, url) for b in conf('url_blacklist')]):
+			log.info('url blacklist match for ' + url)
+			break
+
+		# urllib.request is broken:
+		# >>> '.'.encode('idna')
+		# ....
+		# UnicodeError: label empty or too long
+		# >>> '.a.'.encode('idna')
+		# ....
+		# UnicodeError: label empty or too long
+		# >>> 'a.a.'.encode('idna')
+		# b'a.a.'
+
+		try:
+			(status, title) = extract_title(url)
+		except UnicodeError as e:
+			(status, title) = (4, str(e))
+
+		if 0 == status:
+			title = title.strip()
+			message = 'Title: %s' % title
+		elif 1 == status:
+			if conf('image_preview'):
+				# of course it's fake, but it looks interesting at least
+				char = r""",._-+=\|/*`~"'"""
+				message = 'No text but %s, 1-bit ASCII art preview: [%c]' % (
+					title, random.choice(char)
+				)
+			else:
+				log.info('no message sent for non-text %s (%s)' % (url, title))
+				continue
+		elif 2 == status:
+			message = '(No title)'
+		elif 3 == status:
+			message = title
+		elif 4 == status:
+			message = 'Bug triggered (%s), invalid URL/domain part: %s' % (title, url)
+			log.warn(message)
+		else:
+			message = 'some error occurred when fetching %s' % url
+
+		message = message.replace('\n', '\\n')
+
+		log.info('adding to out buf: ' + message)
+		out.append(message)
+
+	return {
+		'msg': out
+	}
+
+
 def else_command(args):
 	log.info('sent short info')
 	return {
diff --git a/urlbot.py b/urlbot.py
index 141cbc5..0b983a5 100755
--- a/urlbot.py
+++ b/urlbot.py
@@ -155,77 +155,6 @@ class UrlBot(IdleBot):
 						mtype='groupchat'
 					)
 
-	# TODO: plugin?
-	def extract_url(self, data, msg_obj):
-		result = re.findall(r'(https?://[^\s>]+)', data)
-		if not result:
-			return
-
-		ret = None
-		out = []
-		for url in result:
-			# if rate_limit(RATE_NO_SILENCE | RATE_GLOBAL):
-			# 	return False
-
-			flag = False
-			for b in conf('url_blacklist'):
-				if re.match(b, url):
-					flag = True
-					self.logger.info('url blacklist match for ' + url)
-					break
-
-			if flag:
-				# an URL has matched the blacklist, continue to the next URL
-				continue
-
-			# urllib.request is broken:
-			# >>> '.'.encode('idna')
-			# ....
-			# UnicodeError: label empty or too long
-			# >>> '.a.'.encode('idna')
-			# ....
-			# UnicodeError: label empty or too long
-			# >>> 'a.a.'.encode('idna')
-			# b'a.a.'
-
-			try:
-				(status, title) = extract_title(url)
-			except UnicodeError as e:
-				(status, title) = (4, str(e))
-
-			if 0 == status:
-				title = title.strip()
-				message = 'Title: %s' % title
-			elif 1 == status:
-				if conf('image_preview'):
-					# of course it's fake, but it looks interesting at least
-					char = r""",._-+=\|/*`~"'"""
-					message = 'No text but %s, 1-bit ASCII art preview: [%c]' % (
-						title, random.choice(char)
-					)
-				else:
-					self.logger.info('no message sent for non-text %s (%s)' % (url, title))
-					continue
-			elif 2 == status:
-				message = '(No title)'
-			elif 3 == status:
-				message = title
-			elif 4 == status:
-				message = 'Bug triggered (%s), invalid URL/domain part: %s' % (title, url)
-				self.logger.warn(message)
-			else:
-				message = 'some error occurred when fetching %s' % url
-
-			message = message.replace('\n', '\\n')
-
-			self.logger.info('adding to out buf: ' + message)
-			out.append(message)
-			ret = True
-
-		if ret and rate_limit(RATE_URL | RATE_GLOBAL):
-			self.send_reply(out, msg_obj)
-		return ret
-
 	def handle_msg(self, msg_obj):
 		"""
 		called for incoming messages
@@ -245,20 +174,6 @@ class UrlBot(IdleBot):
 			self.logger.info('no spoiler for: ' + content)
 			return
 
-		arg_user = msg_obj['mucnick']
-		blob_userpref = conf_load().get('user_pref', [])
-		nospoiler = False
-
-		if arg_user in blob_userpref:
-			if 'spoiler' in blob_userpref[arg_user]:
-				if not blob_userpref[arg_user]['spoiler']:
-					self.logger.info('nospoiler from conf')
-					nospoiler = True
-
-		if not nospoiler:
-			# TODO: why not make this a plugin?
-			self.extract_url(content, msg_obj)
-
 		self.data_parse_commands(msg_obj)
 		self.data_parse_other(msg_obj)