1
0
mirror of http://aero2k.de/t/repos/urlbot-native.git synced 2017-09-06 15:25:38 +02:00
Files
urlbot-native-trex/eventlooper.py

263 lines
6.9 KiB
Python
Raw Normal View History

2014-07-20 23:39:51 +02:00
#!/usr/bin/python
2014-08-10 22:10:00 +02:00
# -*- coding: utf-8 -*-
2014-07-20 23:39:51 +02:00
2014-08-10 22:10:00 +02:00
import sys, os, re, time, urllib, pickle, random, HTMLParser
2014-07-20 23:39:51 +02:00
BUFSIZ = 8192
delay = 0.100 # seconds
bot_user = 'urlbot'
2014-07-20 23:39:51 +02:00
basedir = '.'
if 2 == len(sys.argv): basedir = sys.argv[1]
event_files_dir = os.path.join(basedir, 'event_files')
fifo_path = os.path.join(basedir, 'cmdfifo')
2014-07-21 04:54:50 +02:00
# rate limiting to 5 messages per 10 minutes
hist_max_count = 5
2014-08-09 23:39:00 +02:00
hist_max_time = 10 * 60
2014-07-21 04:54:50 +02:00
hist_ts = []
hist_flag = True
2014-08-02 09:20:52 +02:00
uptime = -time.time()
request_counter = 0
2014-07-21 04:54:50 +02:00
2014-07-20 23:39:51 +02:00
def debug_enabled():
# return True
return False
def e(data):
2014-07-21 00:53:26 +02:00
if data:
2014-08-10 22:10:00 +02:00
if unicode == type(data):
return data.encode('utf8')
elif str == type(data):
return data.encode('string-escape')
else:
return data
2014-07-21 00:53:26 +02:00
else:
return "''"
2014-07-20 23:39:51 +02:00
def logger(severity, message):
2014-07-21 00:53:26 +02:00
# sev = ( 'err', 'warn', 'info' )
# if severity in sev:
args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message)
sys.stderr.write(e('%s %s %s: %s' % args) + '\n')
2014-07-21 00:53:26 +02:00
class urllib_user_agent_wrapper(urllib.FancyURLopener):
version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0'''
2014-07-21 00:53:26 +02:00
def fetch_page(url):
logger('info', 'fetching page ' + url)
try:
urllib._urlopener = urllib_user_agent_wrapper()
response = urllib.urlopen(url)
2014-07-21 08:28:46 +02:00
html = response.read(BUFSIZ) # ignore more than BUFSIZ
response.close()
2014-07-27 12:21:32 +02:00
return (html, response.headers)
except IOError as e:
logger('warn', 'failed: ' + e.errno)
2014-08-09 23:39:00 +02:00
2014-08-09 22:39:19 +02:00
return (None, None)
2014-07-21 00:53:26 +02:00
def extract_title(url):
if 'repo/urlbot.git' in url:
logger('info', 'repo URL found: ' + url)
return (3, 'wee, that looks like my home repo!')
2014-07-21 00:53:26 +02:00
logger('info', 'extracting title from ' + url)
2014-07-27 12:21:32 +02:00
(html, headers) = fetch_page(url)
if html:
2014-07-27 12:21:32 +02:00
if 'content-type' in headers:
if 'text/' != headers['content-type'][:len('text/')]:
return (1, headers['content-type'])
2014-08-09 23:39:00 +02:00
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S | re.M | re.IGNORECASE)
if result:
2014-08-10 22:10:00 +02:00
return (0, parser.unescape(result.groups()[0]))
2014-08-01 20:49:07 +02:00
else:
return (2, 'no title')
2014-08-09 23:39:00 +02:00
2014-08-01 20:49:07 +02:00
return (-1, 'error')
2014-07-20 23:39:51 +02:00
def chat_write(message, prefix='/say '):
2014-08-02 09:20:52 +02:00
global request_counter
request_counter += 1
2014-07-21 08:28:46 +02:00
if debug_enabled():
print message
else:
try:
fd = open(fifo_path, 'wb')
msg = unicode(prefix) + unicode(message) + '\n'
2014-08-10 22:10:00 +02:00
fd.write(msg.encode('utf8'))
2014-07-21 08:28:46 +02:00
fd.close()
except IOError:
logger('err', "couldn't print to fifo " + fifo_path)
2014-07-21 04:54:50 +02:00
def ratelimit_exceeded():
global hist_flag
2014-07-21 04:54:50 +02:00
now = time.time()
hist_ts.append(now)
if hist_max_count < len(hist_ts):
first = hist_ts.pop(0)
if (now - first) < hist_max_time:
if hist_flag:
hist_flag = False
2014-08-02 17:33:28 +02:00
chat_write('(rate limited to %d messages in %d seconds, try again at %s)' %(hist_max_count, hist_max_time, time.strftime('%T %Z', time.localtime(hist_ts[0] + hist_max_time))))
2014-07-21 04:54:50 +02:00
logger('warn', 'rate limiting exceeded: ' + pickle.dumps(hist_ts))
return True
hist_flag = True
2014-07-21 04:54:50 +02:00
return False
2014-07-20 23:39:51 +02:00
def extract_url(data):
2014-08-02 20:48:06 +02:00
ret = None
2014-07-21 09:59:09 +02:00
result = re.findall("(https?://[^\s>]+)", data)
2014-07-20 23:39:51 +02:00
if result:
for r in result:
2014-07-21 04:54:50 +02:00
if ratelimit_exceeded():
return False
2014-07-27 12:21:32 +02:00
(status, title) = extract_title(r)
2014-07-21 00:53:26 +02:00
2014-07-27 12:21:32 +02:00
if 0 == status:
2014-08-01 20:49:07 +02:00
message = 'Title: %s: %s' % (title.strip(), e(r))
elif 1 == status:
logger('info', 'no message sent for non-text %s (%s)' %(r, title))
continue
2014-08-01 20:49:07 +02:00
elif 2 == status:
message = 'No title: %s' % (e(r))
elif 3 == status:
message = title
2014-08-01 20:49:07 +02:00
else:
message = 'some error occurred when fetching %s' % e(r)
2014-07-22 22:23:10 +02:00
message = message.replace('\n', '\\n')
2014-07-20 23:39:51 +02:00
logger('info', 'printing ' + message)
2014-07-21 08:28:46 +02:00
chat_write(message)
2014-08-02 20:48:06 +02:00
ret = True
return ret
2014-08-01 20:15:23 +02:00
def mental_ill(data):
2014-08-01 20:16:32 +02:00
min_ill = 3
c = 0
2014-08-01 20:15:23 +02:00
# return True for min_ill '!' in a row
for d in data:
if '!' == d or '?' == d:
c += 1
else:
2014-08-01 20:16:32 +02:00
c = 0
2014-08-01 20:15:23 +02:00
if (min_ill <= c):
return True
2014-08-09 23:39:00 +02:00
2014-08-01 20:15:23 +02:00
return False
def parse_other(data):
reply_user = data.split(' ')[0].strip('<>')
if True == mental_ill(data):
if ratelimit_exceeded():
return False
2014-08-01 20:15:23 +02:00
chat_write('''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % reply_user)
return True
def parse_commands(data):
words = data.split(' ')
if 2 > len(words): # need at least two words
return None
# reply if beginning of the text matches bot_user
if words[1][0:len(bot_user)] == bot_user:
2014-08-01 20:15:23 +02:00
reply_user = words[0].strip('<>')
if 'hangup' in data:
chat_write('', prefix='/quit')
logger('warn', 'received hangup: ' + data)
return None
if ratelimit_exceeded():
return False
if 'command' in data:
2014-08-09 21:09:39 +02:00
chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'ping', 'uptime'"""))
elif 'unikot' in data:
chat_write(reply_user + (u''': ┌────────┐'''))
chat_write(reply_user + (u''': │Unicode!│'''))
chat_write(reply_user + (u''': └────────┘'''))
2014-08-02 09:20:52 +02:00
elif 'uptime' in data:
u = int(uptime + time.time())
plural_uptime = 's'
plural_request = 's'
if 1 == u: plural_uptime = ''
if 1 == request_counter: plural_request = ''
chat_write(reply_user + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, request_counter, plural_request)))
logger('info', 'sent statistics')
elif 'ping' in data:
if (0 == random.randint(0, 3)): # 1:4
chat_write(reply_user + ''': peng (You're dead now.)''')
logger('info', 'sent pong (variant)')
else:
chat_write(reply_user + ''': pong''')
logger('info', 'sent pong')
elif 'info' in data:
chat_write(reply_user + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please).''' %(hist_max_count, hist_max_time)))
logger('info', 'sent long info')
else:
chat_write(reply_user + (''': I'm a bot (highlight me with 'info' for more information).'''))
logger('info', 'sent short info')
2014-07-20 23:39:51 +02:00
def parse_delete(filepath):
try:
fd = open(filepath, 'rb')
2014-07-21 08:28:46 +02:00
except IOError:
2014-07-20 23:39:51 +02:00
logger('err', 'file has vanished: ' + filepath)
2014-07-21 08:28:46 +02:00
return False
2014-07-20 23:39:51 +02:00
content = fd.read(BUFSIZ) # ignore more than BUFSIZ
if content[1:1+len(bot_user)] != bot_user:
if not 'Willkommen bei debianforum.de' in content:
2014-08-02 20:48:06 +02:00
if True != extract_url(content):
parse_commands(content)
parse_other(content)
2014-07-20 23:39:51 +02:00
fd.close()
os.remove(filepath) # probably better crash here
def print_version_git():
2014-08-09 23:39:00 +02:00
import subprocess
cmd = ['git', 'log', '-n', '1', '--oneline', '--abbrev-commit']
p = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE)
first_line = p.stdout.readline()
if 0 == p.wait():
print sys.argv[0] + " version (Git) '%s'" % e(first_line.strip())
else:
print sys.argv[0] + " (unknown version)"
2014-08-09 23:50:40 +02:00
if '__main__' == __name__:
print_version_git()
2014-08-10 22:10:00 +02:00
parser = HTMLParser.HTMLParser()
while 1:
try:
for f in os.listdir(event_files_dir):
if 'mcabber-' == f[:8]:
parse_delete(os.path.join(event_files_dir, f))
2014-07-20 23:39:51 +02:00
time.sleep(delay)
except KeyboardInterrupt:
exit(130)