1
0
mirror of http://aero2k.de/t/repos/urlbot-native.git synced 2017-09-06 15:25:38 +02:00
Files
urlbot-native-trex/eventlooper.py

131 lines
2.8 KiB
Python
Raw Normal View History

2014-07-20 23:39:51 +02:00
#!/usr/bin/python
2014-07-21 04:54:50 +02:00
import sys, os, re, time, urllib, pickle
2014-07-20 23:39:51 +02:00
BUFSIZ = 8192
delay = 0.100 # seconds
bot_user = 'urlbot'
2014-07-20 23:39:51 +02:00
basedir = '.'
if 2 == len(sys.argv): basedir = sys.argv[1]
event_files_dir = os.path.join(basedir, 'event_files')
fifo_path = os.path.join(basedir, 'cmdfifo')
2014-07-21 04:54:50 +02:00
# rate limiting to 5 messages per 10 minutes
hist_max_count = 5
hist_max_time = 10 * 60
hist_ts = []
2014-07-20 23:39:51 +02:00
def debug_enabled():
# return True
return False
def e(data):
2014-07-21 00:53:26 +02:00
if data:
return data.encode('string-escape')
else:
return "''"
2014-07-20 23:39:51 +02:00
def logger(severity, message):
2014-07-21 00:53:26 +02:00
# sev = ( 'err', 'warn', 'info' )
# if severity in sev:
sys.stderr.write(e('%s: %s: %s' %(sys.argv[0], severity, message)) + '\n')
def fetch_page(url):
logger('info', 'fetching page ' + url)
try:
response = urllib.urlopen(url)
html = response.read(BUFSIZ)
response.close()
return html
except IOError as e:
logger('warn', 'failed: ' + e.errno)
2014-07-21 00:53:26 +02:00
def extract_title(url):
logger('info', 'extracting title from ' + url)
2014-07-21 00:53:26 +02:00
html = fetch_page(url)
if html:
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html, re.S|re.M)
if result:
return result.groups()[0]
2014-07-20 23:39:51 +02:00
def chat_write(message):
try:
fd = open(fifo_path, 'wb')
fd.write('/say ' + message)
fd.close()
except IOError:
logger('err', "couldn't print to fifo " + fifo_path)
2014-07-21 04:54:50 +02:00
def ratelimit_exceeded():
now = time.time()
hist_ts.append(now)
if hist_max_count < len(hist_ts):
first = hist_ts.pop(0)
if (now - first) < hist_max_time:
logger('warn', 'rate limiting exceeded: ' + pickle.dumps(hist_ts))
return True
return False
2014-07-20 23:39:51 +02:00
def extract_url(data):
result = re.findall("(https?://[^\s]+)", data)
if result:
for r in result:
2014-07-21 04:54:50 +02:00
if ratelimit_exceeded():
return False
2014-07-21 00:53:26 +02:00
title = extract_title(r)
if title:
message = 'Title: %s: %s' % (title, e(r))
else:
message = 'some error occured when fetching %s' % e(r)
2014-07-20 23:39:51 +02:00
logger('info', 'printing ' + message)
if debug_enabled():
print message
else:
chat_write(message)
def parse_commands(data):
words = data.split(' ')
if 3 > len(words): # need at least two words
return
# reply if beginning of the text matches bot_user
if words[1][0:len(bot_user)] == bot_user:
chat_write(words[0][1:-1] + ''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae.''')
2014-07-20 23:39:51 +02:00
def parse_delete(filepath):
try:
fd = open(filepath, 'rb')
except:
logger('err', 'file has vanished: ' + filepath)
return -1
content = fd.read(BUFSIZ) # ignore more than BUFSIZ
if content[1:1+len(bot_user)] != bot_user:
2014-07-20 23:39:51 +02:00
extract_url(content)
parse_commands(content)
2014-07-20 23:39:51 +02:00
fd.close()
os.remove(filepath) # probably better crash here
while 1:
try:
for f in os.listdir(event_files_dir):
if 'mcabber-' == f[:8]:
parse_delete(os.path.join(event_files_dir, f))
time.sleep(delay)
except KeyboardInterrupt:
exit(130)