2014-07-20 23:39:51 +02:00
#!/usr/bin/python
2014-08-10 22:10:00 +02:00
# -*- coding: utf-8 -*-
2014-07-20 23:39:51 +02:00
2014-09-21 20:10:37 +02:00
import sys , os , re , time , urllib , pickle , random , HTMLParser , stat
2014-09-27 06:07:44 +02:00
from local_config import conf , set_conf
2014-07-20 23:39:51 +02:00
BUFSIZ = 8192
delay = 0.100 # seconds
basedir = ' . '
if 2 == len ( sys . argv ) : basedir = sys . argv [ 1 ]
event_files_dir = os . path . join ( basedir , ' event_files ' )
fifo_path = os . path . join ( basedir , ' cmdfifo ' )
2014-07-21 04:54:50 +02:00
# rate limiting to 5 messages per 10 minutes
hist_ts = [ ]
2014-07-21 09:49:13 +02:00
hist_flag = True
2014-07-21 04:54:50 +02:00
2014-09-21 17:39:06 +02:00
parser = None
2014-07-20 23:39:51 +02:00
def debug_enabled ( ) :
# return True
return False
def e ( data ) :
2014-07-21 00:53:26 +02:00
if data :
2014-08-10 22:10:00 +02:00
if unicode == type ( data ) :
return data . encode ( ' utf8 ' )
elif str == type ( data ) :
return data . encode ( ' string-escape ' )
else :
return data
2014-07-21 00:53:26 +02:00
else :
return " ' ' "
2014-07-20 23:39:51 +02:00
def logger ( severity , message ) :
2014-07-21 00:53:26 +02:00
# sev = ( 'err', 'warn', 'info' )
# if severity in sev:
2014-08-10 00:30:58 +02:00
args = ( sys . argv [ 0 ] , time . strftime ( ' % Y- % m- %d . % H: % M: % S ' ) , severity , message )
sys . stderr . write ( e ( ' %s %s %s : %s ' % args ) + ' \n ' )
2014-07-21 00:53:26 +02:00
2014-08-04 19:32:40 +02:00
class urllib_user_agent_wrapper ( urllib . FancyURLopener ) :
version = ''' Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0 '''
2014-07-21 00:53:26 +02:00
def fetch_page ( url ) :
logger ( ' info ' , ' fetching page ' + url )
2014-07-21 02:58:29 +02:00
try :
2014-08-04 19:32:40 +02:00
urllib . _urlopener = urllib_user_agent_wrapper ( )
2014-07-21 02:58:29 +02:00
response = urllib . urlopen ( url )
2014-07-21 08:28:46 +02:00
html = response . read ( BUFSIZ ) # ignore more than BUFSIZ
2014-07-21 02:58:29 +02:00
response . close ( )
2014-07-27 12:21:32 +02:00
return ( html , response . headers )
2014-07-21 02:58:29 +02:00
except IOError as e :
logger ( ' warn ' , ' failed: ' + e . errno )
2014-08-09 23:39:00 +02:00
2014-08-09 22:39:19 +02:00
return ( None , None )
2014-07-21 00:53:26 +02:00
def extract_title ( url ) :
2014-09-21 17:39:06 +02:00
global parser
2014-08-09 20:29:38 +02:00
if ' repo/urlbot.git ' in url :
logger ( ' info ' , ' repo URL found: ' + url )
return ( 3 , ' wee, that looks like my home repo! ' )
2014-07-21 00:53:26 +02:00
logger ( ' info ' , ' extracting title from ' + url )
2014-07-21 02:58:29 +02:00
2014-07-27 12:21:32 +02:00
( html , headers ) = fetch_page ( url )
2014-07-21 02:58:29 +02:00
if html :
2014-09-21 17:39:06 +02:00
charset = ' '
2014-07-27 12:21:32 +02:00
if ' content-type ' in headers :
2014-09-21 17:39:06 +02:00
logger ( ' debug ' , ' content-type: ' + headers [ ' content-type ' ] )
2014-07-27 12:21:32 +02:00
if ' text/ ' != headers [ ' content-type ' ] [ : len ( ' text/ ' ) ] :
return ( 1 , headers [ ' content-type ' ] )
2014-09-21 17:39:06 +02:00
charset = re . sub ( ' .*charset=(?P<charset> \ S+).* ' ,
' \ g<charset> ' , headers [ ' content-type ' ] , re . IGNORECASE )
2014-08-09 23:39:00 +02:00
result = re . match ( r ' .*?<title.*?>(.*?)</title>.*? ' , html , re . S | re . M | re . IGNORECASE )
2014-07-21 02:58:29 +02:00
if result :
2014-09-14 12:05:01 +02:00
match = result . groups ( ) [ 0 ]
# if 'charset=UTF-8' in headers['content-type']:
# match = unicode(match)
2014-09-21 17:39:06 +02:00
if None == parser :
parser = HTMLParser . HTMLParser ( )
if ' ' != charset :
try :
match = match . decode ( charset )
except LookupError :
2014-09-21 19:52:49 +02:00
logger ( ' warn ' , ' invalid charset in ' + headers [ ' content-type ' ] )
2014-09-21 17:39:06 +02:00
2014-08-20 02:44:11 +02:00
try :
2014-09-14 12:05:01 +02:00
expanded_html = parser . unescape ( match )
2014-08-20 02:44:11 +02:00
except UnicodeDecodeError as e : # idk why this can happen, but it does
logger ( ' warn ' , ' parser.unescape() expoded here: ' + str ( e ) )
2014-09-14 12:05:01 +02:00
expanded_html = match
2014-08-20 02:44:11 +02:00
return ( 0 , expanded_html )
2014-08-01 20:49:07 +02:00
else :
return ( 2 , ' no title ' )
2014-08-09 23:39:00 +02:00
2014-08-01 20:49:07 +02:00
return ( - 1 , ' error ' )
2014-07-20 23:39:51 +02:00
2014-07-21 09:39:59 +02:00
def chat_write ( message , prefix = ' /say ' ) :
2014-09-27 06:07:44 +02:00
set_conf ( ' request_counter ' , conf ( ' request_counter ' ) + 1 )
2014-08-02 09:20:52 +02:00
2014-07-21 08:28:46 +02:00
if debug_enabled ( ) :
print message
else :
try :
fd = open ( fifo_path , ' wb ' )
2014-08-11 23:32:40 +02:00
# FIXME: somehow, unicode chars can end up inside a <str> message,
# which seems to make both unicode() and ''.encode('utf8') fail.
try :
msg = unicode ( prefix ) + unicode ( message ) + ' \n '
2014-08-20 18:34:23 +02:00
msg = msg . encode ( ' utf8 ' )
2014-08-11 23:32:40 +02:00
except UnicodeDecodeError :
msg = prefix + message + ' \n '
fd . write ( msg )
2014-07-21 08:28:46 +02:00
fd . close ( )
except IOError :
logger ( ' err ' , " couldn ' t print to fifo " + fifo_path )
2014-07-21 02:27:54 +02:00
2014-09-27 05:51:18 +02:00
def ratelimit_touch ( ignored = None ) : # FIXME: separate counters
2014-09-27 08:43:33 +02:00
hist_ts . append ( time . time ( ) )
2014-07-21 04:54:50 +02:00
2014-09-27 05:56:39 +02:00
if conf ( ' hist_max_count ' ) < len ( hist_ts ) :
2014-09-27 05:51:18 +02:00
hist_ts . pop ( 0 )
def ratelimit_exceeded ( ignored = None ) : # FIXME: separate counters
global hist_flag
2014-09-27 05:56:39 +02:00
if conf ( ' hist_max_count ' ) < len ( hist_ts ) :
2014-07-21 04:54:50 +02:00
first = hist_ts . pop ( 0 )
2014-09-27 08:43:33 +02:00
if ( time . time ( ) - first ) < conf ( ' hist_max_time ' ) :
2014-07-21 09:49:13 +02:00
if hist_flag :
hist_flag = False
2014-09-27 05:56:39 +02:00
chat_write ( ' (rate limited to %d messages in %d seconds, try again at %s ) ' % ( conf ( ' hist_max_count ' ) , conf ( ' hist_max_time ' ) , time . strftime ( ' % T % Z ' , time . localtime ( hist_ts [ 0 ] + conf ( ' hist_max_time ' ) ) ) ) )
2014-07-21 09:49:13 +02:00
2014-07-21 04:54:50 +02:00
logger ( ' warn ' , ' rate limiting exceeded: ' + pickle . dumps ( hist_ts ) )
return True
2014-07-21 09:49:13 +02:00
hist_flag = True
2014-07-21 04:54:50 +02:00
return False
2014-07-20 23:39:51 +02:00
def extract_url ( data ) :
2014-08-02 20:48:06 +02:00
ret = None
2014-07-21 09:59:09 +02:00
result = re . findall ( " (https?://[^ \ s>]+) " , data )
2014-07-20 23:39:51 +02:00
if result :
for r in result :
2014-09-27 05:51:18 +02:00
ratelimit_touch ( )
2014-07-21 04:54:50 +02:00
if ratelimit_exceeded ( ) :
return False
2014-07-27 12:21:32 +02:00
( status , title ) = extract_title ( r )
2014-07-21 00:53:26 +02:00
2014-07-27 12:21:32 +02:00
if 0 == status :
2014-08-01 20:49:07 +02:00
message = ' Title: %s : %s ' % ( title . strip ( ) , e ( r ) )
elif 1 == status :
2014-08-08 10:03:48 +02:00
logger ( ' info ' , ' no message sent for non-text %s ( %s ) ' % ( r , title ) )
continue
2014-08-01 20:49:07 +02:00
elif 2 == status :
message = ' No title: %s ' % ( e ( r ) )
2014-08-09 20:29:38 +02:00
elif 3 == status :
message = title
2014-08-01 20:49:07 +02:00
else :
message = ' some error occurred when fetching %s ' % e ( r )
2014-07-21 02:58:29 +02:00
2014-07-22 22:23:10 +02:00
message = message . replace ( ' \n ' , ' \\ n ' )
2014-07-20 23:39:51 +02:00
logger ( ' info ' , ' printing ' + message )
2014-07-21 08:28:46 +02:00
chat_write ( message )
2014-08-02 20:48:06 +02:00
ret = True
return ret
2014-07-21 02:27:54 +02:00
2014-09-14 12:05:01 +02:00
def parse_pn ( data ) :
## reply_user = data.split(' ')[0].strip('<>')
# since we can't determine if a user named 'foo> ' just wrote ' > bar'
# or a user 'foo' just wrote '> > bar', we can't safely answer here
logger ( ' warn ' , ' received PN: ' + data )
return False
2014-07-20 23:39:51 +02:00
def parse_delete ( filepath ) :
try :
fd = open ( filepath , ' rb ' )
2014-07-21 08:28:46 +02:00
except IOError :
2014-07-20 23:39:51 +02:00
logger ( ' err ' , ' file has vanished: ' + filepath )
2014-07-21 08:28:46 +02:00
return False
2014-07-20 23:39:51 +02:00
content = fd . read ( BUFSIZ ) # ignore more than BUFSIZ
2014-09-14 12:05:01 +02:00
fd . close ( )
os . remove ( filepath ) # probably better crash here
2014-07-20 23:39:51 +02:00
2014-09-27 05:51:18 +02:00
if content [ 1 : 1 + len ( conf ( ' bot_user ' ) ) ] == conf ( ' bot_user ' ) :
2014-09-14 12:05:01 +02:00
return
2014-07-20 23:39:51 +02:00
2014-09-14 12:05:01 +02:00
if ' has set the subject to: ' in content :
return
if content . startswith ( ' PRIV# ' ) :
parse_pn ( content )
return
2014-09-17 15:49:52 +02:00
if ' nospoiler ' in content :
logger ( ' info ' , " no spoiler for: " + content )
return
2014-07-20 23:39:51 +02:00
2014-09-14 12:05:01 +02:00
if True != extract_url ( content ) :
2014-09-27 05:32:35 +02:00
plugins . data_parse_commands ( content )
plugins . data_parse_other ( content )
2014-09-14 12:05:01 +02:00
return
2014-07-20 23:39:51 +02:00
2014-08-20 03:12:06 +02:00
def get_version_git ( ) :
2014-08-09 23:39:00 +02:00
import subprocess
2014-07-27 08:04:25 +02:00
cmd = [ ' git ' , ' log ' , ' -n ' , ' 1 ' , ' --oneline ' , ' --abbrev-commit ' ]
p = subprocess . Popen ( cmd , bufsize = 1 , stdout = subprocess . PIPE )
first_line = p . stdout . readline ( )
if 0 == p . wait ( ) :
2014-08-20 03:12:06 +02:00
return " version (Git) ' %s ' " % e ( first_line . strip ( ) )
2014-07-27 08:04:25 +02:00
else :
2014-08-20 03:12:06 +02:00
return " (unknown version) "
2014-07-27 08:04:25 +02:00
2014-09-27 03:40:27 +02:00
import plugins
2014-09-27 06:03:04 +02:00
2014-09-27 03:40:27 +02:00
plugins . chat_write = chat_write
plugins . conf = conf
plugins . logger = logger
plugins . ratelimit_exceeded = ratelimit_exceeded
2014-09-27 05:51:18 +02:00
plugins . ratelimit_touch = ratelimit_touch
2014-09-27 06:03:04 +02:00
2014-09-27 05:53:09 +02:00
plugins . random = random
2014-09-27 05:59:35 +02:00
plugins . time = time
2014-09-27 03:40:27 +02:00
2014-09-27 05:32:35 +02:00
plugins . register_all ( )
2014-08-09 23:50:40 +02:00
if ' __main__ ' == __name__ :
2014-09-27 06:12:34 +02:00
set_conf ( ' version ' , get_version_git ( ) )
print sys . argv [ 0 ] + ' ' + conf ( ' version ' )
2014-08-20 03:12:06 +02:00
2014-09-21 20:10:37 +02:00
if not os . path . exists ( fifo_path ) :
logger ( ' error ' , ' fifo_path " %s " does not exist, exiting ' % fifo_path )
exit ( 1 )
if not stat . S_ISFIFO ( os . stat ( fifo_path ) . st_mode ) :
logger ( ' error ' , ' fifo_path " %s " is not a FIFO, exiting ' % fifo_path )
exit ( 1 )
2014-08-10 00:30:58 +02:00
while 1 :
try :
for f in os . listdir ( event_files_dir ) :
if ' mcabber- ' == f [ : 8 ] :
parse_delete ( os . path . join ( event_files_dir , f ) )
2014-07-20 23:39:51 +02:00
2014-08-10 00:30:58 +02:00
time . sleep ( delay )
except KeyboardInterrupt :
2014-09-14 12:05:01 +02:00
print " "
2014-08-10 00:30:58 +02:00
exit ( 130 )