2014-07-20 23:39:51 +02:00
#!/usr/bin/python
2014-07-21 04:54:50 +02:00
import sys , os , re , time , urllib , pickle
2014-07-20 23:39:51 +02:00
BUFSIZ = 8192
delay = 0.100 # seconds
2014-07-21 02:27:54 +02:00
bot_user = ' urlbot '
2014-07-20 23:39:51 +02:00
basedir = ' . '
if 2 == len ( sys . argv ) : basedir = sys . argv [ 1 ]
event_files_dir = os . path . join ( basedir , ' event_files ' )
fifo_path = os . path . join ( basedir , ' cmdfifo ' )
2014-07-21 04:54:50 +02:00
# rate limiting to 5 messages per 10 minutes
hist_max_count = 5
hist_max_time = 10 * 60
hist_ts = [ ]
2014-07-21 09:49:13 +02:00
hist_flag = True
2014-08-02 09:20:52 +02:00
uptime = - time . time ( )
request_counter = 0
2014-07-21 04:54:50 +02:00
2014-07-20 23:39:51 +02:00
def debug_enabled ( ) :
# return True
return False
def e ( data ) :
2014-07-21 00:53:26 +02:00
if data :
return data . encode ( ' string-escape ' )
else :
return " ' ' "
2014-07-20 23:39:51 +02:00
def logger ( severity , message ) :
2014-07-21 00:53:26 +02:00
# sev = ( 'err', 'warn', 'info' )
# if severity in sev:
sys . stderr . write ( e ( ' %s : %s : %s ' % ( sys . argv [ 0 ] , severity , message ) ) + ' \n ' )
2014-08-04 19:32:40 +02:00
class urllib_user_agent_wrapper ( urllib . FancyURLopener ) :
version = ''' Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0 '''
2014-07-21 00:53:26 +02:00
def fetch_page ( url ) :
logger ( ' info ' , ' fetching page ' + url )
2014-07-21 02:58:29 +02:00
try :
2014-08-04 19:32:40 +02:00
urllib . _urlopener = urllib_user_agent_wrapper ( )
2014-07-21 02:58:29 +02:00
response = urllib . urlopen ( url )
2014-07-21 08:28:46 +02:00
html = response . read ( BUFSIZ ) # ignore more than BUFSIZ
2014-07-21 02:58:29 +02:00
response . close ( )
2014-07-27 12:21:32 +02:00
return ( html , response . headers )
2014-07-21 02:58:29 +02:00
except IOError as e :
logger ( ' warn ' , ' failed: ' + e . errno )
2014-07-21 00:53:26 +02:00
def extract_title ( url ) :
logger ( ' info ' , ' extracting title from ' + url )
2014-07-21 02:58:29 +02:00
2014-07-27 12:21:32 +02:00
( html , headers ) = fetch_page ( url )
2014-07-21 02:58:29 +02:00
if html :
2014-07-27 12:21:32 +02:00
if ' content-type ' in headers :
if ' text/ ' != headers [ ' content-type ' ] [ : len ( ' text/ ' ) ] :
return ( 1 , headers [ ' content-type ' ] )
2014-07-21 02:58:29 +02:00
result = re . match ( r ' .*?<title.*?>(.*?)</title>.*? ' , html , re . S | re . M )
if result :
2014-07-27 12:21:32 +02:00
return ( 0 , result . groups ( ) [ 0 ] )
2014-08-01 20:49:07 +02:00
else :
return ( 2 , ' no title ' )
return ( - 1 , ' error ' )
2014-07-20 23:39:51 +02:00
2014-07-21 09:39:59 +02:00
def chat_write ( message , prefix = ' /say ' ) :
2014-08-02 09:20:52 +02:00
global request_counter
request_counter + = 1
2014-07-21 08:28:46 +02:00
if debug_enabled ( ) :
print message
else :
try :
fd = open ( fifo_path , ' wb ' )
2014-07-21 09:39:59 +02:00
fd . write ( prefix + message )
2014-07-21 08:28:46 +02:00
fd . close ( )
except IOError :
logger ( ' err ' , " couldn ' t print to fifo " + fifo_path )
2014-07-21 02:27:54 +02:00
2014-07-21 04:54:50 +02:00
def ratelimit_exceeded ( ) :
2014-07-21 09:49:13 +02:00
global hist_flag
2014-07-21 04:54:50 +02:00
now = time . time ( )
hist_ts . append ( now )
if hist_max_count < len ( hist_ts ) :
first = hist_ts . pop ( 0 )
if ( now - first ) < hist_max_time :
2014-07-21 09:49:13 +02:00
if hist_flag :
hist_flag = False
2014-08-02 17:33:28 +02:00
chat_write ( ' (rate limited to %d messages in %d seconds, try again at %s ) ' % ( hist_max_count , hist_max_time , time . strftime ( ' % T % Z ' , time . localtime ( hist_ts [ 0 ] + hist_max_time ) ) ) )
2014-07-21 09:49:13 +02:00
2014-07-21 04:54:50 +02:00
logger ( ' warn ' , ' rate limiting exceeded: ' + pickle . dumps ( hist_ts ) )
return True
2014-07-21 09:49:13 +02:00
hist_flag = True
2014-07-21 04:54:50 +02:00
return False
2014-07-20 23:39:51 +02:00
def extract_url ( data ) :
2014-08-02 20:48:06 +02:00
ret = None
2014-07-21 09:59:09 +02:00
result = re . findall ( " (https?://[^ \ s>]+) " , data )
2014-07-20 23:39:51 +02:00
if result :
for r in result :
2014-07-21 04:54:50 +02:00
if ratelimit_exceeded ( ) :
return False
2014-07-27 12:21:32 +02:00
( status , title ) = extract_title ( r )
2014-07-21 00:53:26 +02:00
2014-07-27 12:21:32 +02:00
if 0 == status :
2014-08-01 20:49:07 +02:00
message = ' Title: %s : %s ' % ( title . strip ( ) , e ( r ) )
elif 1 == status :
2014-07-27 12:21:32 +02:00
# of course it's fake, but it looks interesting at least
char = """ ,._-+= \ |/*`~ " ' """
message = ' No text but %s , 1-bit ASCII art preview: [ %c ] %s ' % (
e ( title ) ,
char [ int ( time . time ( ) % len ( char ) ) ] ,
e ( r )
)
2014-08-01 20:49:07 +02:00
elif 2 == status :
message = ' No title: %s ' % ( e ( r ) )
else :
message = ' some error occurred when fetching %s ' % e ( r )
2014-07-21 02:58:29 +02:00
2014-07-22 22:23:10 +02:00
message = message . replace ( ' \n ' , ' \\ n ' )
2014-07-20 23:39:51 +02:00
logger ( ' info ' , ' printing ' + message )
2014-07-21 08:28:46 +02:00
chat_write ( message )
2014-08-02 20:48:06 +02:00
ret = True
return ret
2014-07-21 02:27:54 +02:00
2014-08-01 20:15:23 +02:00
def mental_ill ( data ) :
2014-08-01 20:16:32 +02:00
min_ill = 3
c = 0
2014-08-01 20:15:23 +02:00
# return True for min_ill '!' in a row
for d in data :
if ' ! ' == d or ' ? ' == d :
c + = 1
else :
2014-08-01 20:16:32 +02:00
c = 0
2014-08-01 20:15:23 +02:00
if ( min_ill < = c ) :
return True
return False
def parse_other ( data ) :
reply_user = data . split ( ' ' ) [ 0 ] . strip ( ' <> ' )
if True == mental_ill ( data ) :
if ratelimit_exceeded ( ) : return False
chat_write ( ''' Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example. ''' % reply_user )
return True
2014-07-21 02:27:54 +02:00
def parse_commands ( data ) :
words = data . split ( ' ' )
2014-07-21 09:39:59 +02:00
if 2 > len ( words ) : # need at least two words
2014-07-21 03:51:02 +02:00
return
2014-07-21 02:27:54 +02:00
# reply if beginning of the text matches bot_user
if words [ 1 ] [ 0 : len ( bot_user ) ] == bot_user :
2014-08-01 20:15:23 +02:00
reply_user = words [ 0 ] . strip ( ' <> ' )
2014-07-27 07:18:22 +02:00
2014-07-21 09:39:59 +02:00
if ' hangup ' in data :
chat_write ( ' ' , prefix = ' /quit ' )
logger ( ' warn ' , ' received hangup: ' + data )
2014-08-02 09:20:52 +02:00
elif ' uptime ' in data :
if ratelimit_exceeded ( ) : return False
u = int ( uptime + time . time ( ) )
plural_uptime = ' s '
plural_request = ' s '
if 1 == u : plural_uptime = ' '
if 1 == request_counter : plural_request = ' '
chat_write ( reply_user + ( ''' : happily serving for %d second %s , %d request %s so far. ''' % ( u , plural_uptime , request_counter , plural_request ) ) )
logger ( ' info ' , ' sent statistics ' )
2014-07-27 07:18:22 +02:00
elif ' ping ' in data :
if ratelimit_exceeded ( ) : return False
2014-07-27 08:04:25 +02:00
if ( 0 == ( int ( time . time ( ) ) & 3 ) ) : # 1:4
chat_write ( reply_user + ''' : peng (You ' re dead now.) ''' )
logger ( ' info ' , ' sent pong (variant) ' )
else :
chat_write ( reply_user + ''' : pong ''' )
logger ( ' info ' , ' sent pong ' )
2014-08-04 15:08:40 +02:00
elif ' info ' in data :
2014-07-27 07:18:22 +02:00
if ratelimit_exceeded ( ) : return False
chat_write ( reply_user + ( ''' : I ' m a bot, my job is to extract <title> tags from posted URLs. In case I ' m annoying or for further questions, please talk to my master Cae. I ' m rate limited and shouldn ' t post more than %d messages per %d seconds. To make me exit immediately, highlight me with ' hangup ' in the message (emergency only, please). ''' % ( hist_max_count , hist_max_time ) ) )
2014-08-04 15:08:40 +02:00
logger ( ' info ' , ' sent long info ' )
else :
if ratelimit_exceeded ( ) : return False
chat_write ( reply_user + ( ''' : I ' m a bot (highlight me with ' info ' for more information). ''' ) )
logger ( ' info ' , ' sent short info ' )
2014-07-20 23:39:51 +02:00
def parse_delete ( filepath ) :
try :
fd = open ( filepath , ' rb ' )
2014-07-21 08:28:46 +02:00
except IOError :
2014-07-20 23:39:51 +02:00
logger ( ' err ' , ' file has vanished: ' + filepath )
2014-07-21 08:28:46 +02:00
return False
2014-07-20 23:39:51 +02:00
content = fd . read ( BUFSIZ ) # ignore more than BUFSIZ
2014-07-21 02:27:54 +02:00
if content [ 1 : 1 + len ( bot_user ) ] != bot_user :
2014-07-21 09:39:59 +02:00
if not ' Willkommen bei debianforum.de ' in content :
2014-08-02 20:48:06 +02:00
if True != extract_url ( content ) :
parse_commands ( content )
parse_other ( content )
2014-07-20 23:39:51 +02:00
fd . close ( )
os . remove ( filepath ) # probably better crash here
2014-07-27 08:04:25 +02:00
def print_version_git ( ) :
import subprocess , sys
cmd = [ ' git ' , ' log ' , ' -n ' , ' 1 ' , ' --oneline ' , ' --abbrev-commit ' ]
p = subprocess . Popen ( cmd , bufsize = 1 , stdout = subprocess . PIPE )
first_line = p . stdout . readline ( )
if 0 == p . wait ( ) :
2014-08-04 15:11:54 +02:00
print sys . argv [ 0 ] + " version (Git) ' %s ' " % e ( first_line . strip ( ) )
2014-07-27 08:04:25 +02:00
else :
print sys . argv [ 0 ] + " (unknown version) "
print_version_git ( )
2014-07-20 23:39:51 +02:00
while 1 :
try :
for f in os . listdir ( event_files_dir ) :
if ' mcabber- ' == f [ : 8 ] :
parse_delete ( os . path . join ( event_files_dir , f ) )
time . sleep ( delay )
except KeyboardInterrupt :
exit ( 130 )