refactor urlbot plugin structure and code style

This commit is contained in:
Thorsten S
2015-11-20 21:07:48 +01:00
parent 6b11dbd2e2
commit 1082d968e6
8 changed files with 571 additions and 579 deletions

View File

@@ -1,11 +1,12 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import html.parser
import logging
import os
import pickle
import re
import sys
import urllib.request
if '__main__' == __name__:
print('''this is a library file, which is not meant to be executed''')
exit(-1)
import sys, time, pickle, os, logging
from local_config import conf from local_config import conf
RATE_GLOBAL = 0x01 RATE_GLOBAL = 0x01
@@ -22,21 +23,12 @@ basedir = '.'
if 2 == len(sys.argv): if 2 == len(sys.argv):
basedir = sys.argv[1] basedir = sys.argv[1]
logging.basicConfig(
level=logging.INFO,
format=sys.argv[0]+' %(asctime)s %(levelname).1s %(funcName)-15s %(message)s'
)
log = logging.getLogger()
log.plugin = log.info # ... probably fix this sometime (FIXME)
def debug_enabled():
# return True
return False
def conf_save(obj): def conf_save(obj):
with open(conf('persistent_storage'), 'wb') as fd: with open(conf('persistent_storage'), 'wb') as fd:
return pickle.dump(obj, fd) return pickle.dump(obj, fd)
def conf_load(): def conf_load():
path = conf('persistent_storage') path = conf('persistent_storage')
if os.path.isfile(path): if os.path.isfile(path):
@@ -46,6 +38,7 @@ def conf_load():
else: else:
return {} return {}
def get_version_git(): def get_version_git():
import subprocess import subprocess
@@ -63,4 +56,75 @@ def get_version_git():
else: else:
return "(unknown version)" return "(unknown version)"
VERSION = get_version_git() VERSION = get_version_git()
def fetch_page(url):
log = logging.getLogger(__name__)
log.info('fetching page ' + url)
try:
request = urllib.request.Request(url)
request.add_header('User-Agent', USER_AGENT)
response = urllib.request.urlopen(request)
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
response.close()
return 0, html_text, response.headers
except Exception as e:
log.warn('failed: %s' % e)
return 1, str(e), 'dummy'
def extract_title(url):
log = logging.getLogger(__name__)
global parser
if 'repo/urlbot.git' in url:
log.info('repo URL found: ' + url)
return 3, 'wee, that looks like my home repo!'
log.info('extracting title from ' + url)
(code, html_text, headers) = fetch_page(url)
if 1 == code:
return 3, 'failed: %s for %s' % (html_text, url)
if not html_text:
return -1, 'error'
charset = ''
if 'content-type' in headers:
log.debug('content-type: ' + headers['content-type'])
if 'text/' != headers['content-type'][:len('text/')]:
return 1, headers['content-type']
charset = re.sub(
r'.*charset=(?P<charset>\S+).*',
r'\g<charset>', headers['content-type'], re.IGNORECASE
)
if '' != charset:
try:
html_text = html_text.decode(charset)
except LookupError:
log.warn("invalid charset in '%s': '%s'" % (headers['content-type'], charset))
if str != type(html_text):
html_text = str(html_text)
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html_text, re.S | re.M | re.IGNORECASE)
if result:
match = result.groups()[0]
if not parser:
parser = html.parser.HTMLParser()
try:
expanded_html = parser.unescape(match)
except UnicodeDecodeError as e: # idk why this can happen, but it does
log.warn('parser.unescape() expoded here: ' + str(e))
expanded_html = match
return 0, expanded_html
else:
return 2, 'no title'

View File

@@ -1,7 +1,11 @@
#!/usr/bin/python3 #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging
import time
from common import * import sys
from common import VERSION, EVENTLOOP_DELAY
try: try:
from local_config import conf, set_conf from local_config import conf, set_conf
@@ -16,12 +20,14 @@ except ImportError:
' ' * len(sys.argv[0]) ' ' * len(sys.argv[0])
) )
) )
sys.exit(1)
from sleekxmpp import ClientXMPP from sleekxmpp import ClientXMPP
got_hangup = False got_hangup = False
class bot(ClientXMPP):
class IdleBot(ClientXMPP):
def __init__(self, jid, password, rooms, nick): def __init__(self, jid, password, rooms, nick):
ClientXMPP.__init__(self, jid, password) ClientXMPP.__init__(self, jid, password)
@@ -31,22 +37,27 @@ class bot(ClientXMPP):
self.add_event_handler('session_start', self.session_start) self.add_event_handler('session_start', self.session_start)
self.add_event_handler('groupchat_message', self.muc_message) self.add_event_handler('groupchat_message', self.muc_message)
def session_start(self, event): self.logger = logging.getLogger(__name__)
def session_start(self, _):
self.get_roster() self.get_roster()
self.send_presence() self.send_presence()
for room in self.rooms: for room in self.rooms:
log.info('%s: joining' % room) self.logger.info('%s: joining' % room)
ret = self.plugin['xep_0045'].joinMUC( ret = self.plugin['xep_0045'].joinMUC(
room, room,
self.nick, self.nick,
wait=True wait=True
) )
log.info('%s: joined with code %s' % (room, ret)) self.logger.info('%s: joined with code %s' % (room, ret))
def muc_message(self, msg_obj): def muc_message(self, msg_obj):
global got_hangup """
Handle muc messages, return if irrelevant content or die by hangup.
:param msg_obj:
:return:
"""
# don't talk to yourself # don't talk to yourself
if msg_obj['mucnick'] == self.nick: if msg_obj['mucnick'] == self.nick:
return return
@@ -55,35 +66,51 @@ class bot(ClientXMPP):
return return
if msg_obj['body'].startswith(conf('bot_user')) and 'hangup' in msg_obj['body']: if msg_obj['body'].startswith(conf('bot_user')) and 'hangup' in msg_obj['body']:
log.warn("got 'hangup' from '%s': '%s'" % ( self.logger.warn("got 'hangup' from '%s': '%s'" % (
msg_obj['mucnick'], msg_obj['body'] msg_obj['mucnick'], msg_obj['body']
)) ))
global got_hangup
got_hangup = True got_hangup = True
sys.exit(1) return
if '__main__' == __name__: def start(botclass, active=False):
log.info(VERSION) logging.basicConfig(
level=logging.INFO,
format=sys.argv[0] + ' %(asctime)s %(levelname).1s %(funcName)-15s %(message)s'
)
logger = logging.getLogger(__name__)
logger.info(VERSION)
xmpp = bot( bot = botclass(
jid=conf('jid'), jid=conf('jid'),
password=conf('password'), password=conf('password'),
rooms=conf('rooms'), rooms=conf('rooms'),
nick=conf('bot_user') nick=conf('bot_user')
) )
import plugins
xmpp.connect() if active:
xmpp.register_plugin('xep_0045') plugins.register_all()
xmpp.process() if plugins.plugin_enabled_get(plugins.command_dsa_watcher):
# first result is lost.
plugins.command_dsa_watcher(['dsa-watcher', 'crawl'])
bot.connect()
bot.register_plugin('xep_0045')
bot.process()
while 1: while 1:
try: try:
# do nothing here, just idle if not plugins.event_trigger():
if got_hangup: bot.disconnect()
xmpp.disconnect()
sys.exit(1) sys.exit(1)
time.sleep(EVENTLOOP_DELAY) time.sleep(EVENTLOOP_DELAY)
except KeyboardInterrupt: except KeyboardInterrupt:
print('') print('')
exit(130) exit(130)
if '__main__' == __name__:
start(IdleBot)

View File

@@ -1,12 +1,6 @@
#!/usr/bin/python3 #!/usr/bin/python3
import time, sys import time
try:
log
except NameError:
import logging
log = logging.getLogger()
if '__main__' == __name__: if '__main__' == __name__:
print('''this is a config file, which is not meant to be executed''') print('''this is a config file, which is not meant to be executed''')
@@ -17,7 +11,7 @@ config = {
'password': 'FIXME', 'password': 'FIXME',
'rooms': ['FIXME'], 'rooms': ['FIXME'],
'src-url': 'FIXME', 'src-url': 'http://aero2k.de/t/repos/urlbot.git',
'bot_user': 'native-urlbot', 'bot_user': 'native-urlbot',
'bot_owner': 'FIXME', 'bot_owner': 'FIXME',
@@ -56,12 +50,16 @@ config = {
'dsa_watcher_interval': 15 * 60 'dsa_watcher_interval': 15 * 60
} }
def conf(val): def conf(val):
import logging
logger = logging.getLogger(__name__)
if val in list(config.keys()): if val in list(config.keys()):
return config[val] return config[val]
log.warn('conf(): unknown key ' + str(val)) logger.warn('conf(): unknown key ' + str(val))
return None return None
def set_conf(key, val): def set_conf(key, val):
config[key] = val config[key] = val
return None return None

View File

@@ -1,19 +1,21 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json
if '__main__' == __name__: import random
print('''this is a plugin file, which is not meant to be executed''') import re
exit(-1) import time
import time, random, unicodedata, re, sys, urllib.request, json
import types
import traceback import traceback
import types
import unicodedata
import urllib.parse import urllib.parse
from local_config import conf, set_conf import urllib.request
from common import *
# from common import *
from common import conf_load, conf_save, log, RATE_GLOBAL, RATE_NO_SILENCE, VERSION, RATE_INTERACTIVE, BUFSIZ, \
USER_AGENT
from local_config import set_conf, conf
from string_constants import excuses, moin_strings_hi, moin_strings_bye, cakes from string_constants import excuses, moin_strings_hi, moin_strings_bye, cakes
from urlbot import extract_title from urlbot import extract_title
from functools import wraps
ptypes_PARSE = 'parser' ptypes_PARSE = 'parser'
ptypes_COMMAND = 'command' ptypes_COMMAND = 'command'
@@ -21,20 +23,20 @@ ptypes = [ptypes_PARSE, ptypes_COMMAND]
joblist = [] joblist = []
plugins = {p : [] for p in ptypes} plugins = {p: [] for p in ptypes}
got_hangup = False got_hangup = False
def plugin_enabled_get(plugin):
def plugin_enabled_get(urlbot_plugin):
blob = conf_load() blob = conf_load()
if 'plugin_conf' in blob: if 'plugin_conf' in blob:
if plugin.plugin_name in blob['plugin_conf']: if urlbot_plugin.plugin_name in blob['plugin_conf']:
return blob['plugin_conf'][plugin.plugin_name].get( return blob['plugin_conf'][urlbot_plugin.plugin_name].get('enabled', urlbot_plugin.is_enabled)
'enabled', plugin.is_enabled
) return urlbot_plugin.is_enabled
return plugin.is_enabled
def plugin_enabled_set(plugin, enabled): def plugin_enabled_set(plugin, enabled):
if conf('persistent_locked'): if conf('persistent_locked'):
@@ -47,7 +49,7 @@ def plugin_enabled_set(plugin, enabled):
if 'plugin_conf' not in blob: if 'plugin_conf' not in blob:
blob['plugin_conf'] = {} blob['plugin_conf'] = {}
if not plugin.plugin_name in blob['plugin_conf']: if plugin.plugin_name not in blob['plugin_conf']:
blob['plugin_conf'][plugin.plugin_name] = {} blob['plugin_conf'][plugin.plugin_name] = {}
blob['plugin_conf'][plugin.plugin_name]['enabled'] = enabled blob['plugin_conf'][plugin.plugin_name]['enabled'] = enabled
@@ -57,8 +59,15 @@ def plugin_enabled_set(plugin, enabled):
return True return True
def pluginfunction(name, desc, plugin_type, ratelimit_class=RATE_GLOBAL, enabled=True): def pluginfunction(name, desc, plugin_type, ratelimit_class=RATE_GLOBAL, enabled=True):
''' A decorator to make a plugin out of a function ''' """A decorator to make a plugin out of a function
:param enabled:
:param ratelimit_class:
:param plugin_type:
:param desc:
:param name:
"""
if plugin_type not in ptypes: if plugin_type not in ptypes:
raise TypeError('Illegal plugin_type: %s' % plugin_type) raise TypeError('Illegal plugin_type: %s' % plugin_type)
@@ -70,11 +79,14 @@ def pluginfunction(name, desc, plugin_type, ratelimit_class=RATE_GLOBAL, enabled
f.plugin_type = plugin_type f.plugin_type = plugin_type
f.ratelimit_class = ratelimit_class f.ratelimit_class = ratelimit_class
return f return f
return decorate return decorate
def register_event(t, callback, args): def register_event(t, callback, args):
joblist.append((t, callback, args)) joblist.append((t, callback, args))
@pluginfunction('mental_ill', 'parse mental illness', ptypes_PARSE, ratelimit_class=RATE_NO_SILENCE | RATE_GLOBAL) @pluginfunction('mental_ill', 'parse mental illness', ptypes_PARSE, ratelimit_class=RATE_NO_SILENCE | RATE_GLOBAL)
def parse_mental_ill(**args): def parse_mental_ill(**args):
min_ill = 3 min_ill = 3
@@ -91,12 +103,16 @@ def parse_mental_ill(**args):
flag = True flag = True
break break
if True == flag: if flag:
log.plugin('sent mental illness reply') log.plugin('sent mental illness reply')
return { return {
'msg': '''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % args['reply_user'] 'msg': (
'''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' %
args['reply_user']
)
} }
@pluginfunction('debbug', 'parse Debian bug numbers', ptypes_PARSE, ratelimit_class=RATE_NO_SILENCE | RATE_GLOBAL) @pluginfunction('debbug', 'parse Debian bug numbers', ptypes_PARSE, ratelimit_class=RATE_NO_SILENCE | RATE_GLOBAL)
def parse_debbug(**args): def parse_debbug(**args):
bugs = re.findall(r'#(\d{4,})', args['data']) bugs = re.findall(r'#(\d{4,})', args['data'])
@@ -121,6 +137,7 @@ def parse_debbug(**args):
'msg': out 'msg': out
} }
@pluginfunction('cve', 'parse a CVE handle', ptypes_PARSE, ratelimit_class=RATE_NO_SILENCE | RATE_GLOBAL) @pluginfunction('cve', 'parse a CVE handle', ptypes_PARSE, ratelimit_class=RATE_NO_SILENCE | RATE_GLOBAL)
def parse_cve(**args): def parse_cve(**args):
cves = re.findall(r'(CVE-\d\d\d\d-\d+)', args['data'].upper()) cves = re.findall(r'(CVE-\d\d\d\d-\d+)', args['data'].upper())
@@ -132,6 +149,7 @@ def parse_cve(**args):
'msg': ['https://security-tracker.debian.org/tracker/%s' % c for c in cves] 'msg': ['https://security-tracker.debian.org/tracker/%s' % c for c in cves]
} }
@pluginfunction('dsa', 'parse a DSA handle', ptypes_PARSE, ratelimit_class=RATE_NO_SILENCE | RATE_GLOBAL) @pluginfunction('dsa', 'parse a DSA handle', ptypes_PARSE, ratelimit_class=RATE_NO_SILENCE | RATE_GLOBAL)
def parse_dsa(**args): def parse_dsa(**args):
dsas = re.findall(r'(DSA-\d\d\d\d-\d+)', args['data'].upper()) dsas = re.findall(r'(DSA-\d\d\d\d-\d+)', args['data'].upper())
@@ -143,6 +161,7 @@ def parse_dsa(**args):
'msg': ['https://security-tracker.debian.org/tracker/%s' % d for d in dsas] 'msg': ['https://security-tracker.debian.org/tracker/%s' % d for d in dsas]
} }
@pluginfunction('skynet', 'parse skynet', ptypes_PARSE) @pluginfunction('skynet', 'parse skynet', ptypes_PARSE)
def parse_skynet(**args): def parse_skynet(**args):
if 'skynet' in args['data'].lower(): if 'skynet' in args['data'].lower():
@@ -151,6 +170,7 @@ def parse_skynet(**args):
'msg': '''I'm an independent bot and have nothing to do with other artificial intelligence systems!''' 'msg': '''I'm an independent bot and have nothing to do with other artificial intelligence systems!'''
} }
@pluginfunction('moin', 'parse hi/bye', ptypes_PARSE) @pluginfunction('moin', 'parse hi/bye', ptypes_PARSE)
def parse_moin(**args): def parse_moin(**args):
for direction in [moin_strings_hi, moin_strings_bye]: for direction in [moin_strings_hi, moin_strings_bye]:
@@ -187,6 +207,7 @@ def parse_moin(**args):
) )
} }
@pluginfunction('latex', r'reacts on \LaTeX', ptypes_PARSE) @pluginfunction('latex', r'reacts on \LaTeX', ptypes_PARSE)
def parse_latex(**args): def parse_latex(**args):
if r'\LaTeX' in args['data']: if r'\LaTeX' in args['data']:
@@ -194,6 +215,7 @@ def parse_latex(**args):
'msg': '''LaTeX is way too complex for me, I'm happy with fmt(1)''' 'msg': '''LaTeX is way too complex for me, I'm happy with fmt(1)'''
} }
@pluginfunction('me-action', 'reacts to /me.*%{bot_user}', ptypes_PARSE) @pluginfunction('me-action', 'reacts to /me.*%{bot_user}', ptypes_PARSE)
def parse_slash_me(**args): def parse_slash_me(**args):
if args['data'].lower().startswith('/me') and (conf('bot_user') in args['data'].lower()): if args['data'].lower().startswith('/me') and (conf('bot_user') in args['data'].lower()):
@@ -211,31 +233,6 @@ def parse_slash_me(**args):
'msg': args['reply_user'] + ': %s' % random.choice(me_replys) 'msg': args['reply_user'] + ': %s' % random.choice(me_replys)
} }
#@pluginfunction('dummy_parser', 'dummy_parser desc', ptypes_PARSE)
#def parse_skynet(**args):
# if 'dummy_parser' in args['data'].lower():
# log.plugin('dummy_parser triggered')
# return {
# 'msg': 'dummy_parser triggered'
# }
def data_parse_other(msg_obj):
data = msg_obj['body']
reply_user = msg_obj['mucnick']
for p in plugins[ptypes_PARSE]:
if ratelimit_exceeded(p.ratelimit_class):
continue
if not plugin_enabled_get(p):
continue
ret = p(reply_user=reply_user, data=data)
if None != ret:
if 'msg' in list(ret.keys()):
ratelimit_touch(RATE_CHAT)
send_reply(ret['msg'], msg_obj)
@pluginfunction('help', 'print help for a command or all known commands', ptypes_COMMAND) @pluginfunction('help', 'print help for a command or all known commands', ptypes_COMMAND)
def command_help(argv, **args): def command_help(argv, **args):
@@ -245,7 +242,7 @@ def command_help(argv, **args):
if 'help' != command: if 'help' != command:
return return
if None == what: if not what:
log.plugin('empty help request, sent all commands') log.plugin('empty help request, sent all commands')
commands = args['cmd_list'] commands = args['cmd_list']
commands.sort() commands.sort()
@@ -260,10 +257,8 @@ def command_help(argv, **args):
] ]
} }
flag = False
for p in plugins[ptypes_COMMAND] + plugins[ptypes_PARSE]: for p in plugins[ptypes_COMMAND] + plugins[ptypes_PARSE]:
if what == p.plugin_name: if what == p.plugin_name:
flag = True
log.plugin('sent help for %s' % what) log.plugin('sent help for %s' % what)
return { return {
'msg': args['reply_user'] + ': help for %s %s %s: %s' % ( 'msg': args['reply_user'] + ': help for %s %s %s: %s' % (
@@ -272,8 +267,6 @@ def command_help(argv, **args):
what, p.plugin_desc what, p.plugin_desc
) )
} }
if not flag:
log.plugin('no help found for %s' % what) log.plugin('no help found for %s' % what)
return { return {
'msg': args['reply_user'] + ': no such command: %s' % what 'msg': args['reply_user'] + ': no such command: %s' % what
@@ -290,6 +283,7 @@ def command_version(argv, **args):
'msg': args['reply_user'] + (''': I'm running ''' + VERSION) 'msg': args['reply_user'] + (''': I'm running ''' + VERSION)
} }
@pluginfunction('klammer', 'prints an anoying paper clip aka. Karl Klammer', ptypes_COMMAND) @pluginfunction('klammer', 'prints an anoying paper clip aka. Karl Klammer', ptypes_COMMAND)
def command_klammer(argv, **args): def command_klammer(argv, **args):
if 'klammer' != argv[0]: if 'klammer' != argv[0]:
@@ -308,6 +302,7 @@ def command_klammer(argv, **args):
) )
} }
@pluginfunction('unikot', 'prints an unicode string', ptypes_COMMAND) @pluginfunction('unikot', 'prints an unicode string', ptypes_COMMAND)
def command_unicode(argv, **args): def command_unicode(argv, **args):
if 'unikot' != argv[0]: if 'unikot' != argv[0]:
@@ -323,9 +318,10 @@ def command_unicode(argv, **args):
) )
} }
@pluginfunction('source', 'prints git URL', ptypes_COMMAND) @pluginfunction('source', 'prints git URL', ptypes_COMMAND)
def command_source(argv, **args): def command_source(argv, **_):
if not argv[0] in ('source', 'src'): if argv[0] not in ('source', 'src'):
return return
log.plugin('sent source URL') log.plugin('sent source URL')
@@ -333,13 +329,11 @@ def command_source(argv, **args):
'msg': 'My source code can be found at %s' % conf('src-url') 'msg': 'My source code can be found at %s' % conf('src-url')
} }
@pluginfunction('dice', 'rolls a dice, optional N times', ptypes_COMMAND, ratelimit_class=RATE_INTERACTIVE) @pluginfunction('dice', 'rolls a dice, optional N times', ptypes_COMMAND, ratelimit_class=RATE_INTERACTIVE)
def command_dice(argv, **args): def command_dice(argv, **args):
if 'dice' != argv[0]: if 'dice' != argv[0]:
return return
count = 0
try: try:
count = 1 if len(argv) < 2 else int(argv[1]) count = 1 if len(argv) < 2 else int(argv[1])
except ValueError as e: except ValueError as e:
@@ -361,7 +355,6 @@ def command_dice(argv, **args):
) )
for i in range(count): for i in range(count):
rnd = 0
if args['reply_user'] in conf('enhanced-random-user'): if args['reply_user'] in conf('enhanced-random-user'):
rnd = 0 # this might confuse users. good. rnd = 0 # this might confuse users. good.
log.plugin('sent random (enhanced)') log.plugin('sent random (enhanced)')
@@ -377,6 +370,7 @@ def command_dice(argv, **args):
'msg': msg 'msg': msg
} }
@pluginfunction('choose', 'chooses randomly between arguments', ptypes_COMMAND, ratelimit_class=RATE_INTERACTIVE) @pluginfunction('choose', 'chooses randomly between arguments', ptypes_COMMAND, ratelimit_class=RATE_INTERACTIVE)
def command_choose(argv, **args): def command_choose(argv, **args):
if 'choose' != argv[0]: if 'choose' != argv[0]:
@@ -396,6 +390,7 @@ def command_choose(argv, **args):
'msg': '%s: I prefer %s!' % (args['reply_user'], choice) 'msg': '%s: I prefer %s!' % (args['reply_user'], choice)
} }
@pluginfunction('uptime', 'prints uptime', ptypes_COMMAND) @pluginfunction('uptime', 'prints uptime', ptypes_COMMAND)
def command_uptime(argv, **args): def command_uptime(argv, **args):
if 'uptime' != argv[0]: if 'uptime' != argv[0]:
@@ -412,9 +407,11 @@ def command_uptime(argv, **args):
log.plugin('sent statistics') log.plugin('sent statistics')
return { return {
'msg': args['reply_user'] + (''': happily serving for %d second%s, %d request%s so far.''' % (u, plural_uptime, conf('request_counter'), plural_request)) 'msg': args['reply_user'] + (''': happily serving for %d second%s, %d request%s so far.''' % (
u, plural_uptime, int(conf('request_counter')), plural_request))
} }
@pluginfunction('ping', 'sends pong', ptypes_COMMAND, ratelimit_class=RATE_INTERACTIVE) @pluginfunction('ping', 'sends pong', ptypes_COMMAND, ratelimit_class=RATE_INTERACTIVE)
def command_ping(argv, **args): def command_ping(argv, **args):
if 'ping' != argv[0]: if 'ping' != argv[0]:
@@ -435,6 +432,7 @@ def command_ping(argv, **args):
'msg': msg 'msg': msg
} }
@pluginfunction('info', 'prints info message', ptypes_COMMAND) @pluginfunction('info', 'prints info message', ptypes_COMMAND)
def command_info(argv, **args): def command_info(argv, **args):
if 'info' != argv[0]: if 'info' != argv[0]:
@@ -442,9 +440,15 @@ def command_info(argv, **args):
log.plugin('sent long info') log.plugin('sent long info')
return { return {
'msg': args['reply_user'] + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master %s. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'help'.''' % (conf('bot_owner'), conf('hist_max_count'), conf('hist_max_time'))) 'msg': args['reply_user'] + (
''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further
questions, please talk to my master %s. I'm rate limited and shouldn't post more
than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message
(emergency only, please). For other commands, highlight me with 'help'.''' % (
conf('bot_owner'), int(conf('hist_max_count')), int(conf('hist_max_time'))))
} }
@pluginfunction('teatimer', 'sets a tea timer to $1 or currently %d seconds' % conf('tea_steep_time'), ptypes_COMMAND) @pluginfunction('teatimer', 'sets a tea timer to $1 or currently %d seconds' % conf('tea_steep_time'), ptypes_COMMAND)
def command_teatimer(argv, **args): def command_teatimer(argv, **args):
if 'teatimer' != argv[0]: if 'teatimer' != argv[0]:
@@ -471,13 +475,16 @@ def command_teatimer(argv, **args):
'msg': args['reply_user'] + ': time format error: ' + str(e) 'msg': args['reply_user'] + ': time format error: ' + str(e)
} }
register_event(ready, send_reply, (args['reply_user'] + ': Your tea is ready!', args['msg_obj']))
return { return {
'msg': args['reply_user'] + ': Tea timer set to %s' % time.strftime( 'msg': args['reply_user'] + ': Tea timer set to %s' % time.strftime(
'%F.%T', time.localtime(ready) '%F.%T', time.localtime(ready)
) ),
'event': {
'time': ready,
'msg': (args['reply_user'] + ': Your tea is ready!', args['msg_obj'])
} }
}
@pluginfunction('decode', 'prints the long description of an unicode character', ptypes_COMMAND) @pluginfunction('decode', 'prints the long description of an unicode character', ptypes_COMMAND)
def command_decode(argv, **args): def command_decode(argv, **args):
@@ -522,6 +529,7 @@ def command_decode(argv, **args):
'msg': [args['reply_user'] + ': decoding %s:' % argv[1]] + out 'msg': [args['reply_user'] + ': decoding %s:' % argv[1]] + out
} }
@pluginfunction('show-blacklist', 'show the current URL blacklist, optionally filtered', ptypes_COMMAND) @pluginfunction('show-blacklist', 'show the current URL blacklist, optionally filtered', ptypes_COMMAND)
def command_show_blacklist(argv, **args): def command_show_blacklist(argv, **args):
if 'show-blacklist' != argv[0]: if 'show-blacklist' != argv[0]:
@@ -537,18 +545,18 @@ def command_show_blacklist(argv, **args):
'' if not argv1 else ' (limited to %s)' % argv1 '' if not argv1 else ' (limited to %s)' % argv1
) )
] + [ ] + [
b for b in conf('url_blacklist') b for b in conf('url_blacklist') if not argv1 or argv1 in b
if not argv1 or argv1 in b
] ]
} }
def usersetting_get(argv, args): def usersetting_get(argv, args):
blob = conf_load() blob = conf_load()
arg_user = args['reply_user'] arg_user = args['reply_user']
arg_key = argv[1] arg_key = argv[1]
if not arg_user in blob['user_pref']: if arg_user not in blob['user_pref']:
return { return {
'msg': args['reply_user'] + ': user key not found' 'msg': args['reply_user'] + ': user key not found'
} }
@@ -560,6 +568,7 @@ def usersetting_get(argv, args):
) )
} }
@pluginfunction('set', 'modify a user setting', ptypes_COMMAND) @pluginfunction('set', 'modify a user setting', ptypes_COMMAND)
def command_usersetting(argv, **args): def command_usersetting(argv, **args):
if 'set' != argv[0]: if 'set' != argv[0]:
@@ -570,17 +579,17 @@ def command_usersetting(argv, **args):
arg_key = argv[1] if len(argv) > 1 else None arg_key = argv[1] if len(argv) > 1 else None
arg_val = argv[2] if len(argv) > 2 else None arg_val = argv[2] if len(argv) > 2 else None
if not arg_key in settings: if arg_key not in settings:
return { return {
'msg': args['reply_user'] + ': known settings: ' + (', '.join(settings)) 'msg': args['reply_user'] + ': known settings: ' + (', '.join(settings))
} }
if not arg_val in ['on', 'off', None]: if arg_val not in ['on', 'off', None]:
return { return {
'msg': args['reply_user'] + ': possible values for %s: on, off' % arg_key 'msg': args['reply_user'] + ': possible values for %s: on, off' % arg_key
} }
if None == arg_val: if not arg_val:
# display current value # display current value
return usersetting_get(argv, args) return usersetting_get(argv, args)
@@ -595,7 +604,7 @@ def command_usersetting(argv, **args):
if 'user_pref' not in blob: if 'user_pref' not in blob:
blob['user_pref'] = {} blob['user_pref'] = {}
if not arg_user in blob['user_pref']: if arg_user not in blob['user_pref']:
blob['user_pref'][arg_user] = {} blob['user_pref'][arg_user] = {}
blob['user_pref'][arg_user][arg_key] = 'on' == arg_val blob['user_pref'][arg_user][arg_key] = 'on' == arg_val
@@ -606,6 +615,7 @@ def command_usersetting(argv, **args):
# display value written to db # display value written to db
return usersetting_get(argv, args) return usersetting_get(argv, args)
@pluginfunction('cake', 'displays a cake ASCII art', ptypes_COMMAND) @pluginfunction('cake', 'displays a cake ASCII art', ptypes_COMMAND)
def command_cake(argv, **args): def command_cake(argv, **args):
if 'cake' != argv[0]: if 'cake' != argv[0]:
@@ -615,18 +625,19 @@ def command_cake(argv, **args):
'msg': args['reply_user'] + ': %s' % (random.sample(cakes, 1)[0]) 'msg': args['reply_user'] + ': %s' % (random.sample(cakes, 1)[0])
} }
#TODO: send a hint if someone types plugin as command
# TODO: send a hint if someone types plugin as command
@pluginfunction('plugin', "'disable' or 'enable' plugins", ptypes_COMMAND) @pluginfunction('plugin', "'disable' or 'enable' plugins", ptypes_COMMAND)
def command_plugin_activation(argv, **args): def command_plugin_activation(argv, **args):
command = argv[0] command = argv[0]
plugin = argv[1] if len(argv) > 1 else None plugin = argv[1] if len(argv) > 1 else None
if not command in ('enable', 'disable'): if command not in ('enable', 'disable'):
return return
log.plugin('plugin activation plugin called') log.plugin('plugin activation plugin called')
if None == plugin: if not plugin:
return { return {
'msg': args['reply_user'] + ': no plugin given' 'msg': args['reply_user'] + ': no plugin given'
} }
@@ -649,6 +660,7 @@ def command_plugin_activation(argv, **args):
'msg': args['reply_user'] + ': unknown plugin %s' % plugin 'msg': args['reply_user'] + ': unknown plugin %s' % plugin
} }
@pluginfunction('wp-en', 'crawl the english Wikipedia', ptypes_COMMAND) @pluginfunction('wp-en', 'crawl the english Wikipedia', ptypes_COMMAND)
def command_wp_en(argv, **args): def command_wp_en(argv, **args):
if 'wp-en' != argv[0]: if 'wp-en' != argv[0]:
@@ -659,6 +671,7 @@ def command_wp_en(argv, **args):
return command_wp(argv, lang='en', **args) return command_wp(argv, lang='en', **args)
@pluginfunction('wp', 'crawl the german Wikipedia', ptypes_COMMAND) @pluginfunction('wp', 'crawl the german Wikipedia', ptypes_COMMAND)
def command_wp(argv, lang='de', **args): def command_wp(argv, lang='de', **args):
if 'wp' != argv[0]: if 'wp' != argv[0]:
@@ -721,6 +734,7 @@ def command_wp(argv, lang='de', **args):
'msg': 'json data seem to be broken' 'msg': 'json data seem to be broken'
} }
@pluginfunction('excuse', 'prints BOFH style excuses', ptypes_COMMAND) @pluginfunction('excuse', 'prints BOFH style excuses', ptypes_COMMAND)
def command_dummy(argv, **args): def command_dummy(argv, **args):
if 'excuse' != argv[0]: if 'excuse' != argv[0]:
@@ -734,6 +748,7 @@ def command_dummy(argv, **args):
'msg': args['reply_user'] + ': ' + excuse 'msg': args['reply_user'] + ': ' + excuse
} }
@pluginfunction('show-moinlist', 'show the current moin reply list, optionally filtered', ptypes_COMMAND) @pluginfunction('show-moinlist', 'show the current moin reply list, optionally filtered', ptypes_COMMAND)
def command_show_moinlist(argv, **args): def command_show_moinlist(argv, **args):
if 'show-moinlist' != argv[0]: if 'show-moinlist' != argv[0]:
@@ -755,6 +770,7 @@ def command_show_moinlist(argv, **args):
) )
} }
@pluginfunction('list', 'list plugin and parser status', ptypes_COMMAND) @pluginfunction('list', 'list plugin and parser status', ptypes_COMMAND)
def command_list(argv, **args): def command_list(argv, **args):
if 'list' != argv[0]: if 'list' != argv[0]:
@@ -768,7 +784,7 @@ def command_list(argv, **args):
} }
# if not given, asume both # if not given, asume both
if not 'command' in argv and not 'parser' in argv: if 'command' not in argv and 'parser' not in argv:
argv.append('command') argv.append('command')
argv.append('parser') argv.append('parser')
@@ -793,7 +809,9 @@ def command_list(argv, **args):
msg.append('parsers: %s' % ', '.join([p.plugin_name for p in out_parser])) msg.append('parsers: %s' % ', '.join([p.plugin_name for p in out_parser]))
return {'msg': msg} return {'msg': msg}
@pluginfunction('record', 'record a message for a now offline user (usage: record {user} {some message})', ptypes_COMMAND)
@pluginfunction(
'record', 'record a message for a now offline user (usage: record {user} {some message})', ptypes_COMMAND)
def command_record(argv, **args): def command_record(argv, **args):
if 'record' != argv[0]: if 'record' != argv[0]:
return return
@@ -818,7 +836,7 @@ def command_record(argv, **args):
if 'user_records' not in blob: if 'user_records' not in blob:
blob['user_records'] = {} blob['user_records'] = {}
if not target_user in blob['user_records']: if target_user not in blob['user_records']:
blob['user_records'][target_user] = [] blob['user_records'][target_user] = []
blob['user_records'][target_user].append(message) blob['user_records'][target_user].append(message)
@@ -830,6 +848,7 @@ def command_record(argv, **args):
'msg': '%s: message saved for %s' % (args['reply_user'], target_user) 'msg': '%s: message saved for %s' % (args['reply_user'], target_user)
} }
@pluginfunction('show-records', 'show current offline records', ptypes_COMMAND) @pluginfunction('show-records', 'show current offline records', ptypes_COMMAND)
def command_show_recordlist(argv, **args): def command_show_recordlist(argv, **args):
if 'show-records' != argv[0]: if 'show-records' != argv[0]:
@@ -851,8 +870,12 @@ def command_show_recordlist(argv, **args):
) )
} }
@pluginfunction('dsa-watcher', 'automatically crawls for newly published Debian Security Announces', ptypes_COMMAND) @pluginfunction('dsa-watcher', 'automatically crawls for newly published Debian Security Announces', ptypes_COMMAND)
def command_dsa_watcher(argv, **args): def command_dsa_watcher(argv, **_):
"""
TODO: rewrite so that a last_dsa_date is used instead, then all DSAs since then printed and the date set to now()
"""
if 'dsa-watcher' != argv[0]: if 'dsa-watcher' != argv[0]:
return return
@@ -866,7 +889,6 @@ def command_dsa_watcher(argv, **args):
dsa = conf_load().get('plugin_conf', {}).get('last_dsa', 1000) dsa = conf_load().get('plugin_conf', {}).get('last_dsa', 1000)
url = 'https://security-tracker.debian.org/tracker/DSA-%d-1' % dsa url = 'https://security-tracker.debian.org/tracker/DSA-%d-1' % dsa
err = None
try: try:
request = urllib.request.Request(url) request = urllib.request.Request(url)
@@ -875,8 +897,11 @@ def command_dsa_watcher(argv, **args):
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
except Exception as e: except Exception as e:
err = e err = e
if '404' not in str(err):
if not err: msg = 'error for %s: %s' % (url, err)
log.warn(msg)
out.append(msg)
else:
if str != type(html_text): if str != type(html_text):
html_text = str(html_text) html_text = str(html_text)
@@ -886,7 +911,7 @@ def command_dsa_watcher(argv, **args):
if result: if result:
package = result.groups()[0] package = result.groups()[0]
send_reply('new Debian Security Announce found (%s): %s' % (str(package).replace(' - security update', ''), url)) out.append('new Debian Security Announce found (%s): %s' % (str(package).replace(' - security update', ''), url))
if conf('persistent_locked'): if conf('persistent_locked'):
msg = "couldn't get exclusive lock" msg = "couldn't get exclusive lock"
@@ -899,7 +924,7 @@ def command_dsa_watcher(argv, **args):
if 'plugin_conf' not in blob: if 'plugin_conf' not in blob:
blob['plugin_conf'] = {} blob['plugin_conf'] = {}
if not 'last_dsa' in blob['plugin_conf']: if 'last_dsa' not in blob['plugin_conf']:
blob['plugin_conf']['last_dsa'] = 3308 # FIXME: fixed value blob['plugin_conf']['last_dsa'] = 3308 # FIXME: fixed value
blob['plugin_conf']['last_dsa'] += 1 blob['plugin_conf']['last_dsa'] += 1
@@ -910,37 +935,28 @@ def command_dsa_watcher(argv, **args):
msg = 'new Debian Security Announce found (%s): %s' % (package, url) msg = 'new Debian Security Announce found (%s): %s' % (package, url)
log.plugin(msg) log.plugin(msg)
out.append(msg) out.append(msg)
else:
if not '404' in str(err):
msg = 'error for %s: %s' % (url, err)
log.warn(msg)
out.append(msg)
log.plugin('no dsa for %d, trying again...' % dsa) log.plugin('no dsa for %d, trying again...' % dsa)
# that's good, no error, just 404 -> DSA not released yet # that's good, no error, just 404 -> DSA not released yet
crawl_at = time.time() + conf('dsa_watcher_interval') crawl_at = time.time() + conf('dsa_watcher_interval')
register_event(crawl_at, command_dsa_watcher, (['dsa-watcher', 'crawl'],)) # register_event(crawl_at, command_dsa_watcher, (['dsa-watcher', 'crawl'],))
msg = 'next crawl set to %s' % time.strftime('%F.%T', time.localtime(crawl_at)) msg = 'next crawl set to %s' % time.strftime('%F.%T', time.localtime(crawl_at))
log.plugin(msg) log.plugin(msg)
out.append(msg) out.append(msg)
return {'msg': out} return {
'msg': out,
'event': {
'time': crawl_at,
'command': (command_dsa_watcher, (['dsa-watcher', 'crawl'],))
}
}
else: else:
msg = 'wrong argument' msg = 'wrong argument'
log.warn(msg) log.warn(msg)
return {'msg': msg} return {'msg': msg}
#@pluginfunction('dummy', 'dummy description', ptypes_COMMAND)
#def command_dummy(argv, **args):
# if 'dummy' != argv[0]:
# return
#
# log.plugin('dummy plugin called')
#
# return {
# 'msg': args['reply_user'] + ': dummy plugin called'
# }
def else_command(args): def else_command(args):
log.plugin('sent short info') log.plugin('sent short info')
@@ -948,110 +964,20 @@ def else_command(args):
'msg': args['reply_user'] + ''': I'm a bot (highlight me with 'info' for more information).''' 'msg': args['reply_user'] + ''': I'm a bot (highlight me with 'info' for more information).'''
} }
def data_parse_commands(msg_obj):
global got_hangup
data = msg_obj['body']
words = data.split()
if 2 > len(words): # need at least two words
return None
# don't reply if beginning of the text matches bot_user
if not data.startswith(conf('bot_user')):
return None
if 'hangup' in data:
log.warn('received hangup: ' + data)
got_hangup = True
sys.exit(1)
return None
reply_user = msg_obj['mucnick']
for p in plugins[ptypes_COMMAND]:
if ratelimit_exceeded(p.ratelimit_class):
continue
if not plugin_enabled_get(p):
continue
ret = p(
data=data,
cmd_list=[pl.plugin_name for pl in plugins[ptypes_COMMAND]],
parser_list=[pl.plugin_name for pl in plugins[ptypes_PARSE]],
reply_user=reply_user,
msg_obj=msg_obj,
argv=words[1:]
)
if None != ret:
if 'msg' in list(ret.keys()):
ratelimit_touch(RATE_CHAT)
if ratelimit_exceeded(RATE_CHAT):
return False
send_reply(ret['msg'], msg_obj)
return None
ret = else_command({'reply_user': reply_user})
if None != ret:
if ratelimit_exceeded(RATE_GLOBAL):
return False
if 'msg' in list(ret.keys()):
send_reply(ret['msg'], msg_obj)
if debug_enabled():
def _send_reply(a, msg_obj):
log.info('send_reply[%s]' % msg_obj, a)
def _conf(ignored):
return 'bot'
def _ratelimit_exceeded(ignored=None):
return False
def _ratelimit_touch(ignored=None):
return True
try:
send_reply
except NameError:
send_reply = _send_reply
try:
conf
except NameError:
conf = _conf
try:
ratelimit_exceeded
except NameError:
ratelimit_exceeded = _ratelimit_exceeded
try:
ratelimit_touch
except NameError:
ratelimit_touch = _ratelimit_touch
log.info('debugging enabled')
def register(func_type): def register(func_type):
''' """
Register plugins. Register plugins.
Arguments: :param func_type: plugin functions with this type (ptypes) will be loaded
func_type -- plugin functions with this type (ptypes) will be loaded """
'''
functions = [ functions = [
f for ignored, f in globals().items() f for ignored, f in globals().items() if all([
if isinstance(f, types.FunctionType),
type(f) == types.FunctionType f.__dict__.get('is_plugin', False),
and f.__dict__.get('is_plugin', False) f.plugin_type == func_type
and f.plugin_type == func_type ])
] ]
log.info('auto-reg %s: %s' % (func_type, ', '.join( log.info('auto-reg %s: %s' % (func_type, ', '.join(
@@ -1061,27 +987,29 @@ def register(func_type):
for f in functions: for f in functions:
register_plugin(f, func_type) register_plugin(f, func_type)
def register_plugin(function, func_type): def register_plugin(function, func_type):
try: try:
plugins[func_type].append(function) plugins[func_type].append(function)
except Exception as e: except Exception as e:
log.warn('registering %s failed: %s, %s' % log.warn('registering %s failed: %s, %s' % (function, e, traceback.format_exc()))
(function, e, traceback.format_exc()))
def register_all(): def register_all():
register(ptypes_PARSE) register(ptypes_PARSE)
register(ptypes_COMMAND) register(ptypes_COMMAND)
def event_trigger(): def event_trigger():
if got_hangup: if got_hangup:
return False return False
if 0 == len(joblist): if 0 == len(joblist):
return return True
now = time.time() now = time.time()
for (i, (t, callback, args)) in enumerate(joblist): for (i, (t, callback, args)) in enumerate(joblist):
if t < now: if t < now:
callback(*args) callback(*args)
del(joblist[i]) del (joblist[i])

View File

@@ -19,12 +19,13 @@ except ImportError:
) )
) )
sys.exit(-1) sys.exit(10)
import time import time
t = -time.time() t = -time.time()
class bot(ClientXMPP):
class Bot(ClientXMPP):
def __init__(self, jid, password, room, nick): def __init__(self, jid, password, room, nick):
ClientXMPP.__init__(self, jid, password) ClientXMPP.__init__(self, jid, password)
@@ -67,7 +68,7 @@ if '__main__' == __name__:
format='%(levelname)-8s %(message)s' format='%(levelname)-8s %(message)s'
) )
xmpp = bot( xmpp = Bot(
jid=conf('jid'), jid=conf('jid'),
password=conf('password'), password=conf('password'),
room=conf('room'), room=conf('room'),

View File

@@ -1,11 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
if '__main__' == __name__: # retrieved from http://pages.cs.wisc.edu/~ballard/bofh/excuses
print('''this file just contains a bunch of strings and is not meant to be executed''')
exit(-1)
# retrived from http://pages.cs.wisc.edu/~ballard/bofh/excuses
excuses = ''' excuses = '''
clock speed clock speed
solar flares solar flares
@@ -487,7 +482,8 @@ moin_strings_bye = [
'bye', 'bye',
] ]
cakes = [ "No cake for you!", cakes = [
"No cake for you!",
("The Enrichment Center is required to remind you " ("The Enrichment Center is required to remind you "
"that you will be baked, and then there will be cake."), "that you will be baked, and then there will be cake."),
"The cake is a lie!", "The cake is a lie!",

View File

@@ -2,7 +2,7 @@
To be executed with nose To be executed with nose
""" """
import unittest import unittest
from urlbot import fetch_page from common import fetch_page
class TestEventlooper(unittest.TestCase): class TestEventlooper(unittest.TestCase):

582
urlbot.py
View File

@@ -1,9 +1,15 @@
#!/usr/bin/python3 #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys, re, time, pickle, random import random
import urllib.request, urllib.parse, urllib.error, html.parser import re
from common import * import sys
import time
from common import conf_load, conf_save, \
extract_title, RATE_GLOBAL, RATE_CHAT
from idlebot import IdleBot, start
from plugins import plugins, ptypes_COMMAND, plugin_enabled_get, ptypes_PARSE, register_event, else_command
try: try:
from local_config import conf, set_conf from local_config import conf, set_conf
@@ -11,291 +17,46 @@ except ImportError:
sys.stderr.write(''' sys.stderr.write('''
%s: E: local_config.py isn't tracked because of included secrets and %s: E: local_config.py isn't tracked because of included secrets and
%s site specific configurations. Rename local_config.py.skel and %s site specific configurations. Rename local_config.py.skel and
%s adjust to you needs. %s adjust to your needs.
'''[1:] % ( '''[1:] % (
sys.argv[0], sys.argv[0],
' ' * len(sys.argv[0]), ' ' * len(sys.argv[0]),
' ' * len(sys.argv[0]) ' ' * len(sys.argv[0])
) ))
) sys.exit(1)
sys.exit(-1)
from sleekxmpp import ClientXMPP class UrlBot(IdleBot):
# rate limiting to 5 messages per 10 minutes
hist_ts = []
hist_flag = True
parser = None
def fetch_page(url):
log.info('fetching page ' + url)
try:
request = urllib.request.Request(url)
request.add_header('User-Agent', USER_AGENT)
response = urllib.request.urlopen(request)
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
response.close()
return (0, html_text, response.headers)
except Exception as e:
log.warn('failed: %s' % e)
return (1, str(e), 'dummy')
return (-1, None, None)
def extract_title(url):
global parser
if 'repo/urlbot.git' in url:
log.info('repo URL found: ' + url)
return (3, 'wee, that looks like my home repo!')
log.info('extracting title from ' + url)
(code, html_text, headers) = fetch_page(url)
if 1 == code:
return (3, 'failed: %s for %s' % (html_text, url))
if not html_text:
return (-1, 'error')
charset = ''
if 'content-type' in headers:
log.debug('content-type: ' + headers['content-type'])
if 'text/' != headers['content-type'][:len('text/')]:
return (1, headers['content-type'])
charset = re.sub(
r'.*charset=(?P<charset>\S+).*',
r'\g<charset>', headers['content-type'], re.IGNORECASE
)
if '' != charset:
try:
html_text = html_text.decode(charset)
except LookupError:
log.warn("invalid charset in '%s': '%s'" % (headers['content-type'], charset))
if str != type(html_text):
html_text = str(html_text)
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html_text, re.S | re.M | re.IGNORECASE)
if result:
match = result.groups()[0]
if None == parser:
parser = html.parser.HTMLParser()
try:
expanded_html = parser.unescape(match)
except UnicodeDecodeError as e: # idk why this can happen, but it does
log.warn('parser.unescape() expoded here: ' + str(e))
expanded_html = match
return (0, expanded_html)
else:
return (2, 'no title')
def send_reply(message, msg_obj=None):
set_conf('request_counter', conf('request_counter') + 1)
if str is not type(message):
message = '\n'.join(message)
if debug_enabled():
print(message)
else:
if msg_obj:
xmpp.send_message(
mto=msg_obj['from'].bare,
mbody=message,
mtype='groupchat'
)
else: # unset msg_obj == broadcast
for room in xmpp.rooms:
xmpp.send_message(
mto=room,
mbody=message,
mtype='groupchat'
)
def ratelimit_touch(ignored=None): # FIXME: separate counters
hist_ts.append(time.time())
if conf('hist_max_count') < len(hist_ts):
hist_ts.pop(0)
def ratelimit_exceeded(ignored=None): # FIXME: separate counters
global hist_flag
if conf('hist_max_count') < len(hist_ts):
first = hist_ts.pop(0)
if (time.time() - first) < conf('hist_max_time'):
if hist_flag:
hist_flag = False
# FIXME: this is very likely broken now
send_reply('(rate limited to %d messages in %d seconds, try again at %s)' % (conf('hist_max_count'), conf('hist_max_time'), time.strftime('%T %Z', time.localtime(hist_ts[0] + conf('hist_max_time')))))
log.warn('rate limiting exceeded: ' + pickle.dumps(hist_ts))
return True
hist_flag = True
return False
def extract_url(data, msg_obj):
result = re.findall(r'(https?://[^\s>]+)', data)
if not result:
return
ret = None
out = []
for url in result:
ratelimit_touch()
if ratelimit_exceeded(msg_obj):
return False
flag = False
for b in conf('url_blacklist'):
if not None is re.match(b, url):
flag = True
log.info('url blacklist match for ' + url)
break
if flag:
# an URL has matched the blacklist, continue to the next URL
continue
# urllib.request is broken:
# >>> '.'.encode('idna')
# ....
# UnicodeError: label empty or too long
# >>> '.a.'.encode('idna')
# ....
# UnicodeError: label empty or too long
# >>> 'a.a.'.encode('idna')
# b'a.a.'
try:
(status, title) = extract_title(url)
except UnicodeError as e:
(status, title) = (4, str(e))
if 0 == status:
title = title.strip()
message = 'Title: %s' % title
elif 1 == status:
if conf('image_preview'):
# of course it's fake, but it looks interesting at least
char = r""",._-+=\|/*`~"'"""
message = 'No text but %s, 1-bit ASCII art preview: [%c]' % (
title, random.choice(char)
)
else:
log.info('no message sent for non-text %s (%s)' % (url, title))
continue
elif 2 == status:
message = '(No title)'
elif 3 == status:
message = title
elif 4 == status:
message = 'Bug triggered (%s), invalid URL/domain part: %s' % (title, url)
log.warn(message)
else:
message = 'some error occurred when fetching %s' % url
message = message.replace('\n', '\\n')
log.info('adding to out buf: ' + message)
out.append(message)
ret = True
if True == ret:
send_reply(out, msg_obj)
return ret
def handle_msg(msg_obj):
content = msg_obj['body']
if 'has set the subject to:' in content:
return
if sys.argv[0] in content:
log.info('silenced, this is my own log')
return
if 'nospoiler' in content:
log.info('no spoiler for: ' + content)
return
# don't react to itself
if str(msg_obj['from']).startswith(conf('bot_user')):
return
arg_user = msg_obj['mucnick']
blob_userpref = conf_load().get('user_pref', [])
nospoiler = False
if arg_user in blob_userpref:
if 'spoiler' in blob_userpref[arg_user]:
if not blob_userpref[arg_user]['spoiler']:
log.info('nospoiler from conf')
nospoiler = True
ret = None
if not nospoiler:
extract_url(content, msg_obj)
# print(' '.join(["%s->%s" % (x, msg_obj[x]) for x in msg_obj.keys()]))
plugins.data_parse_commands(msg_obj)
plugins.data_parse_other(msg_obj)
class bot(ClientXMPP):
def __init__(self, jid, password, rooms, nick): def __init__(self, jid, password, rooms, nick):
ClientXMPP.__init__(self, jid, password) super(UrlBot, self).__init__(jid, password, rooms, nick)
self.rooms = rooms self.hist_ts = {p: [] for p in plugins.ptypes}
self.nick = nick self.hist_flag = {p: True for p in plugins.ptypes}
self.add_event_handler('session_start', self.session_start)
self.add_event_handler('groupchat_message', self.muc_message)
self.add_event_handler('message', self.message) self.add_event_handler('message', self.message)
for r in self.rooms: for r in self.rooms:
self.add_event_handler('muc::%s::got_online' % r, self.muc_online) self.add_event_handler('muc::%s::got_online' % r, self.muc_online)
def session_start(self, event):
self.get_roster()
self.send_presence()
for room in self.rooms:
log.info('joining %s' % room)
self.plugin['xep_0045'].joinMUC(
room,
self.nick,
wait=True
)
def muc_message(self, msg_obj): def muc_message(self, msg_obj):
# don't talk to yourself super(UrlBot, self).muc_message(msg_obj)
if msg_obj['mucnick'] == self.nick: return self.handle_msg(msg_obj)
return
return handle_msg(msg_obj)
def message(self, msg_obj): def message(self, msg_obj):
if 'groupchat' == msg_obj['type']: if 'groupchat' == msg_obj['type']:
return return
else:
self.logger.info("Got the following PM: %s" % str(msg_obj))
def muc_online(self, msg_obj): def muc_online(self, msg_obj):
"""
Hook for muc event "user joins"
"""
# don't react to yourself # don't react to yourself
if msg_obj['muc']['nick'] == self.nick: if msg_obj['muc']['nick'] == self.nick:
return return
# TODO: move this to a undirected plugin, maybe new plugin type
arg_user = msg_obj['muc']['nick'] arg_user = msg_obj['muc']['nick']
arg_user_key = arg_user.lower() arg_user_key = arg_user.lower()
blob_userrecords = conf_load().get('user_records', {}) blob_userrecords = conf_load().get('user_records', {})
@@ -317,12 +78,12 @@ class bot(ClientXMPP):
), ),
mtype='groupchat' mtype='groupchat'
) )
log.info('sent %d offline records to room %s' % ( self.logger.info('sent %d offline records to room %s' % (
len(records), msg_obj['from'].bare len(records), msg_obj['from'].bare
)) ))
if conf('persistent_locked'): if conf('persistent_locked'):
log.warn("couldn't get exclusive lock") self.logger.warn("couldn't get exclusive lock")
return False return False
set_conf('persistent_locked', True) set_conf('persistent_locked', True)
@@ -337,49 +98,266 @@ class bot(ClientXMPP):
conf_save(blob) conf_save(blob)
set_conf('persistent_locked', False) set_conf('persistent_locked', False)
def send_reply(self, message, msg_obj=None):
"""
Send a reply to a message
"""
set_conf('request_counter', conf('request_counter') + 1)
if str is not type(message):
message = '\n'.join(message)
if conf('debug_mode'):
print(message)
else:
if msg_obj:
self.send_message(
mto=msg_obj['from'].bare,
mbody=message,
mtype='groupchat'
)
else: # unset msg_obj == broadcast
for room in self.rooms:
self.send_message(
mto=room,
mbody=message,
mtype='groupchat'
)
# TODO: plugin?
def extract_url(self, data, msg_obj):
result = re.findall(r'(https?://[^\s>]+)', data)
if not result:
return return
# def set_presence(self, msg): ret = None
# for room in self.rooms: out = []
# self.send_presence(pto=room, pstatus=msg) for url in result:
self.push_ratelimit()
if self.check_ratelimit(msg_obj):
return False
if '__main__' == __name__: flag = False
log.info(VERSION) for b in conf('url_blacklist'):
if re.match(b, url):
flag = True
self.logger.info('url blacklist match for ' + url)
break
import plugins if flag:
# an URL has matched the blacklist, continue to the next URL
continue
plugins.send_reply = send_reply # urllib.request is broken:
plugins.ratelimit_exceeded = ratelimit_exceeded # >>> '.'.encode('idna')
plugins.ratelimit_touch = ratelimit_touch # ....
# UnicodeError: label empty or too long
# >>> '.a.'.encode('idna')
# ....
# UnicodeError: label empty or too long
# >>> 'a.a.'.encode('idna')
# b'a.a.'
plugins.register_all()
logging.basicConfig(
level=logging.INFO,
format='%(levelname)-8s %(message)s'
)
xmpp = bot(
jid=conf('jid'),
password=conf('password'),
rooms=conf('rooms'),
nick=conf('bot_user')
)
xmpp.connect()
xmpp.register_plugin('xep_0045')
xmpp.process()
if plugins.plugin_enabled_get(plugins.command_dsa_watcher):
plugins.command_dsa_watcher(['dsa-watcher', 'crawl'])
while 1:
try: try:
if False == plugins.event_trigger(): (status, title) = extract_title(url)
xmpp.disconnect() except UnicodeError as e:
(status, title) = (4, str(e))
if 0 == status:
title = title.strip()
message = 'Title: %s' % title
elif 1 == status:
if conf('image_preview'):
# of course it's fake, but it looks interesting at least
char = r""",._-+=\|/*`~"'"""
message = 'No text but %s, 1-bit ASCII art preview: [%c]' % (
title, random.choice(char)
)
else:
self.logger.info('no message sent for non-text %s (%s)' % (url, title))
continue
elif 2 == status:
message = '(No title)'
elif 3 == status:
message = title
elif 4 == status:
message = 'Bug triggered (%s), invalid URL/domain part: %s' % (title, url)
self.logger.warn(message)
else:
message = 'some error occurred when fetching %s' % url
message = message.replace('\n', '\\n')
self.logger.info('adding to out buf: ' + message)
out.append(message)
ret = True
if ret:
self.send_reply(out, msg_obj)
return ret
def handle_msg(self, msg_obj):
"""
called for incoming messages
:param msg_obj:
:returns nothing
"""
content = msg_obj['body']
if 'has set the subject to:' in content:
return
if sys.argv[0] in content:
self.logger.info('silenced, this is my own log')
return
if 'nospoiler' in content:
self.logger.info('no spoiler for: ' + content)
return
arg_user = msg_obj['mucnick']
blob_userpref = conf_load().get('user_pref', [])
nospoiler = False
if arg_user in blob_userpref:
if 'spoiler' in blob_userpref[arg_user]:
if not blob_userpref[arg_user]['spoiler']:
self.logger.info('nospoiler from conf')
nospoiler = True
if not nospoiler:
# TODO: why not make this a plugin?
self.extract_url(content, msg_obj)
self.data_parse_commands(msg_obj)
self.data_parse_other(msg_obj)
def push_ratelimit(self, ratelimit_class=RATE_GLOBAL): # FIXME: separate counters
local_history = self.hist_ts[ratelimit_class]
local_history.append(time.time())
if conf('hist_max_count') < len(local_history):
local_history.pop(0)
self.hist_ts[ratelimit_class] = local_history
def check_ratelimit(self, ratelimit_class=RATE_GLOBAL): # FIXME: separate counters
local_history = self.hist_ts[ratelimit_class]
if conf('hist_max_count') < len(local_history):
first = local_history.pop(0)
self.hist_ts[ratelimit_class] = local_history
if (time.time() - first) < conf('hist_max_time'):
if self.hist_flag[ratelimit_class]:
self.hist_flag[ratelimit_class] = False
# FIXME: this is very likely broken now
self.send_reply('(rate limited to %d messages in %d seconds, try again at %s)' % (
conf('hist_max_count'),
conf('hist_max_time'),
time.strftime('%T %Z', time.localtime(local_history[0] + conf('hist_max_time')))
)
)
self.logger.warn('rate limiting exceeded: ' + local_history)
return True
self.hist_flag[ratelimit_class] = True
return False
def data_parse_commands(self, msg_obj):
"""
react to a message with the bots nick
:param msg_obj: dictionary with incoming message parameters
:returns: nothing
"""
global got_hangup
data = msg_obj['body']
words = data.split()
if 2 > len(words): # need at least two words
return None
# don't reply if beginning of the text matches bot_user
if not data.startswith(conf('bot_user')):
return None
if 'hangup' in data:
self.logger.warn('received hangup: ' + data)
got_hangup = True
sys.exit(1) sys.exit(1)
time.sleep(EVENTLOOP_DELAY) reply_user = msg_obj['mucnick']
except KeyboardInterrupt:
print('') # TODO: check how several commands/plugins in a single message behave (also with rate limiting)
exit(130) for p in plugins[ptypes_COMMAND]:
if self.check_ratelimit(p.ratelimit_class):
continue
if not plugin_enabled_get(p):
continue
ret = p(
data=data,
cmd_list=[pl.plugin_name for pl in plugins[ptypes_COMMAND]],
parser_list=[pl.plugin_name for pl in plugins[ptypes_PARSE]],
reply_user=reply_user,
msg_obj=msg_obj,
argv=words[1:]
)
if ret:
if 'event' in ret:
event = ret["event"]
if 'msg' in event:
register_event(event["time"], self.send_reply, event['msg'])
elif 'command' in event:
command = event["command"]
register_event(event["time"], command[0], command[1])
if 'msg' in list(ret.keys()):
self.push_ratelimit(RATE_CHAT)
if self.check_ratelimit(RATE_CHAT):
return False
self.send_reply(ret['msg'], msg_obj)
return None
ret = else_command({'reply_user': reply_user})
if ret:
if self.check_ratelimit(RATE_GLOBAL):
return False
if 'msg' in list(ret.keys()):
self.send_reply(ret['msg'], msg_obj)
def data_parse_other(self, msg_obj):
"""
react to any message
:param msg_obj: incoming message parameters
:return:
"""
data = msg_obj['body']
reply_user = msg_obj['mucnick']
for p in plugins[ptypes_PARSE]:
if self.check_ratelimit(p.ratelimit_class):
continue
if not plugin_enabled_get(p):
continue
ret = p(reply_user=reply_user, data=data)
if ret:
if 'msg' in list(ret.keys()):
self.push_ratelimit(RATE_CHAT)
self.send_reply(ret['msg'], msg_obj)
if '__main__' == __name__:
start(UrlBot, True)