1
0
mirror of http://aero2k.de/t/repos/urlbot-native.git synced 2017-09-06 15:25:38 +02:00
Files
urlbot-native-trex/urlbot.py

375 lines
8.6 KiB
Python
Raw Normal View History

#!/usr/bin/python3
2014-08-10 22:10:00 +02:00
# -*- coding: utf-8 -*-
2014-07-20 23:39:51 +02:00
2014-12-14 16:26:48 +01:00
import sys, re, time, pickle, random
import urllib.request, urllib.parse, urllib.error, html.parser
from common import *
2014-07-20 23:39:51 +02:00
try:
from local_config import conf, set_conf
except ImportError:
sys.stderr.write('''
%s: E: local_config.py isn't tracked because of included secrets and
%s site specific configurations. Rename local_config.py.skel and
%s adjust to you needs.
'''[1:] % (
sys.argv[0],
' ' * len(sys.argv[0]),
' ' * len(sys.argv[0])
)
)
sys.exit(-1)
from sleekxmpp import ClientXMPP
2014-07-21 04:54:50 +02:00
# rate limiting to 5 messages per 10 minutes
hist_ts = []
hist_flag = True
2014-07-21 04:54:50 +02:00
parser = None
2014-07-21 00:53:26 +02:00
def fetch_page(url):
log.info('fetching page ' + url)
try:
2014-10-05 13:58:44 +02:00
request = urllib.request.Request(url)
request.add_header('User-Agent', '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''')
response = urllib.request.urlopen(request)
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
response.close()
2014-10-01 10:21:27 +02:00
return (0, html_text, response.headers)
except Exception as e:
log.warn('failed: %s' % e)
2014-10-01 10:21:27 +02:00
return (1, str(e), 'dummy')
2014-08-09 23:39:00 +02:00
2014-10-01 10:21:27 +02:00
return (-1, None, None)
2014-07-21 00:53:26 +02:00
def extract_title(url):
global parser
if 'repo/urlbot.git' in url:
log.info('repo URL found: ' + url)
return (3, 'wee, that looks like my home repo!')
log.info('extracting title from ' + url)
2014-10-01 10:21:27 +02:00
(code, html_text, headers) = fetch_page(url)
2014-10-01 10:21:27 +02:00
if 1 == code:
2015-07-05 13:26:15 +02:00
return (3, 'failed: %s for %s' % (html_text, url))
2014-10-01 10:21:27 +02:00
if html_text:
charset = ''
2014-07-27 12:21:32 +02:00
if 'content-type' in headers:
log.debug('content-type: ' + headers['content-type'])
2014-07-27 12:21:32 +02:00
if 'text/' != headers['content-type'][:len('text/')]:
return (1, headers['content-type'])
charset = re.sub(
'.*charset=(?P<charset>\S+).*',
'\g<charset>', headers['content-type'], re.IGNORECASE
)
if '' != charset:
try:
html_text = html_text.decode(charset)
except LookupError:
2015-07-04 10:21:57 +02:00
log.warn("invalid charset in '%s': '%s'" % (headers['content-type'], charset))
if str != type(html_text):
html_text = str(html_text)
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html_text, re.S | re.M | re.IGNORECASE)
if result:
match = result.groups()[0]
if None == parser:
parser = html.parser.HTMLParser()
try:
expanded_html = parser.unescape(match)
except UnicodeDecodeError as e: # idk why this can happen, but it does
log.warn('parser.unescape() expoded here: ' + str(e))
expanded_html = match
return (0, expanded_html)
2014-08-01 20:49:07 +02:00
else:
return (2, 'no title')
2014-08-09 23:39:00 +02:00
2014-08-01 20:49:07 +02:00
return (-1, 'error')
2014-07-20 23:39:51 +02:00
def send_reply(message, msg_obj):
set_conf('request_counter', conf('request_counter') + 1)
2014-08-02 09:20:52 +02:00
2014-12-14 15:54:57 +01:00
if str is not type(message):
message = '\n'.join(message)
2014-07-21 08:28:46 +02:00
if debug_enabled():
print(message)
2014-07-21 08:28:46 +02:00
else:
xmpp.send_message(
mto = msg_obj['from'].bare,
mbody = message,
mtype = 'groupchat'
)
def ratelimit_touch(ignored=None): # FIXME: separate counters
2014-09-27 08:43:33 +02:00
hist_ts.append(time.time())
2014-07-21 04:54:50 +02:00
2014-09-27 05:56:39 +02:00
if conf('hist_max_count') < len(hist_ts):
2014-09-27 05:51:18 +02:00
hist_ts.pop(0)
def ratelimit_exceeded(ignored=None): # FIXME: separate counters
2014-09-27 05:51:18 +02:00
global hist_flag
2014-09-27 05:56:39 +02:00
if conf('hist_max_count') < len(hist_ts):
2014-07-21 04:54:50 +02:00
first = hist_ts.pop(0)
2014-09-27 08:43:33 +02:00
if (time.time() - first) < conf('hist_max_time'):
if hist_flag:
hist_flag = False
# FIXME: this is very likely broken now
2014-12-14 16:26:48 +01:00
send_reply('(rate limited to %d messages in %d seconds, try again at %s)' % (conf('hist_max_count'), conf('hist_max_time'), time.strftime('%T %Z', time.localtime(hist_ts[0] + conf('hist_max_time')))))
log.warn('rate limiting exceeded: ' + pickle.dumps(hist_ts))
2014-07-21 04:54:50 +02:00
return True
hist_flag = True
2014-07-21 04:54:50 +02:00
return False
def extract_url(data, msg_obj):
2014-07-21 09:59:09 +02:00
result = re.findall("(https?://[^\s>]+)", data)
2014-12-14 15:54:57 +01:00
if not result:
return
ret = None
2014-12-14 16:01:09 +01:00
out = []
2014-12-14 15:54:57 +01:00
for url in result:
ratelimit_touch()
if ratelimit_exceeded(msg_obj):
return False
flag = False
for b in conf('url_blacklist'):
if not None is re.match(b, url):
flag = True
log.info('url blacklist match for ' + url)
2014-12-14 16:01:09 +01:00
break
2014-12-14 15:54:57 +01:00
if flag:
# an URL has matched the blacklist, continue to the next URL
continue
# urllib.request is broken:
# >>> '.'.encode('idna')
# ....
# UnicodeError: label empty or too long
# >>> '.a.'.encode('idna')
# ....
# UnicodeError: label empty or too long
# >>> 'a.a.'.encode('idna')
# b'a.a.'
2014-12-14 15:54:57 +01:00
try:
(status, title) = extract_title(url)
except UnicodeError as e:
(status, title) = (4, str(e))
if 0 == status:
title = title.strip()
message = 'Title: %s' % title
2014-12-14 15:54:57 +01:00
elif 1 == status:
if conf('image_preview'):
# of course it's fake, but it looks interesting at least
char = """,._-+=\|/*`~"'"""
message = 'No text but %s, 1-bit ASCII art preview: [%c]' % (
title, random.choice(char)
2014-12-14 15:54:57 +01:00
)
2014-08-01 20:49:07 +02:00
else:
log.info('no message sent for non-text %s (%s)' % (url, title))
2014-12-14 15:54:57 +01:00
continue
elif 2 == status:
message = '(No title)'
2014-12-14 15:54:57 +01:00
elif 3 == status:
message = title
elif 4 == status:
message = 'Bug triggered (%s), invalid URL/domain part: %s' % (title, url)
log.warn(message)
2014-12-14 15:54:57 +01:00
else:
2015-07-05 13:26:15 +02:00
message = 'some error occurred when fetching %s' % url
2014-12-14 15:54:57 +01:00
message = message.replace('\n', '\\n')
log.info('adding to out buf: ' + message)
2014-12-14 16:01:09 +01:00
out.append(message)
2014-12-14 15:54:57 +01:00
ret = True
2014-07-22 22:23:10 +02:00
2014-12-14 16:01:09 +01:00
if True == ret:
send_reply(out, msg_obj)
2014-08-02 20:48:06 +02:00
return ret
def handle_msg(msg_obj):
content = msg_obj['body']
2014-07-20 23:39:51 +02:00
if 'has set the subject to:' in content:
return
if sys.argv[0] in content:
log.info('silenced, this is my own log')
return
2014-12-14 03:57:46 +01:00
if 'nospoiler' in content:
log.info('no spoiler for: ' + content)
2014-12-14 03:57:46 +01:00
return
2015-03-07 20:49:00 +01:00
# don't react to itself
if str(msg_obj['from']).startswith(conf('bot_user')):
return
2014-12-14 03:57:46 +01:00
arg_user = msg_obj['mucnick']
blob_userpref = conf_load().get('user_pref', [])
2014-12-14 03:57:46 +01:00
nospoiler = False
if arg_user in blob_userpref:
if 'spoiler' in blob_userpref[arg_user]:
if not blob_userpref[arg_user]['spoiler']:
log.info('nospoiler from conf')
2014-12-14 03:57:46 +01:00
nospoiler = True
ret = None
if not nospoiler:
extract_url(content, msg_obj)
2014-12-14 03:57:46 +01:00
2015-02-22 20:15:23 +01:00
# print(' '.join(["%s->%s" % (x, msg_obj[x]) for x in msg_obj.keys()]))
plugins.data_parse_commands(msg_obj)
plugins.data_parse_other(msg_obj)
2014-07-20 23:39:51 +02:00
class bot(ClientXMPP):
def __init__(self, jid, password, rooms, nick):
ClientXMPP.__init__(self, jid, password)
self.rooms = rooms
self.nick = nick
self.add_event_handler('session_start', self.session_start)
self.add_event_handler('groupchat_message', self.muc_message)
self.add_event_handler('message', self.message)
for r in self.rooms:
self.add_event_handler('muc::%s::got_online' % r, self.muc_online)
def session_start(self, event):
self.get_roster()
self.send_presence()
for room in self.rooms:
log.info('joining %s' % room)
self.plugin['xep_0045'].joinMUC(
room,
self.nick,
wait=True
)
def muc_message(self, msg_obj):
# don't talk to yourself
if msg_obj['mucnick'] == self.nick:
return
return handle_msg(msg_obj)
def message(self, msg_obj):
if 'groupchat' == msg_obj['type']:
return
def muc_online(self, msg_obj):
# don't react to yourself
if msg_obj['muc']['nick'] == self.nick:
return
arg_user = msg_obj['muc']['nick']
arg_user_key = arg_user.lower()
blob_userrecords = conf_load().get('user_records', {})
if arg_user_key in blob_userrecords:
records = blob_userrecords[arg_user_key]
if not records:
return
self.send_message(
mto=msg_obj['from'].bare,
mbody='%s, there %s %d message%s for you:\n%s' % (
arg_user,
'is' if 1 == len(records) else 'are',
len(records),
'' if 1 == len(records) else 's',
'\n'.join(records)
),
mtype='groupchat'
)
log.info('sent %d offline records to room %s' % (
len(records), msg_obj['from'].bare
))
if conf('persistent_locked'):
log.warn("couldn't get exclusive lock")
return False
set_conf('persistent_locked', True)
blob = conf_load()
if 'user_records' not in blob:
blob['user_records'] = {}
2014-12-14 01:21:30 +01:00
if arg_user_key in blob['user_records']:
blob['user_records'].pop(arg_user_key)
conf_save(blob)
set_conf('persistent_locked', False)
return
2015-06-20 15:13:12 +02:00
# def set_presence(self, msg):
# for room in self.rooms:
# self.send_presence(pto=room, pstatus=msg)
2014-11-09 16:52:22 +01:00
if '__main__' == __name__:
log.info(VERSION)
2014-11-09 16:52:22 +01:00
import plugins
2014-09-27 06:03:04 +02:00
plugins.send_reply = send_reply
2014-11-09 16:52:22 +01:00
plugins.ratelimit_exceeded = ratelimit_exceeded
plugins.ratelimit_touch = ratelimit_touch
2014-09-27 06:03:04 +02:00
2014-11-09 16:52:22 +01:00
plugins.register_all()
logging.basicConfig(
level=logging.INFO,
format='%(levelname)-8s %(message)s'
)
xmpp = bot(
jid=conf('jid'),
password=conf('password'),
rooms=conf('rooms'),
nick=conf('bot_user')
)
xmpp.connect()
xmpp.register_plugin('xep_0045')
xmpp.process()
while 1:
try:
2015-06-20 14:18:50 +02:00
if False == plugins.event_trigger():
xmpp.disconnect()
sys.exit(1)
2014-09-29 19:15:00 +02:00
time.sleep(delay)
except KeyboardInterrupt:
2014-09-28 22:44:42 +02:00
print('')
exit(130)