From 4f1ebdeebf5eaf3f3f5a145322409eceaecf33d3 Mon Sep 17 00:00:00 2001 From: chat Date: Sun, 20 Jul 2014 23:39:51 +0200 Subject: [PATCH 001/112] init() --- .gitignore | 4 +++ eventlooper.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 .gitignore create mode 100755 eventlooper.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6a07201 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.*swp +cmdfifo +logs/ +event_files/ diff --git a/eventlooper.py b/eventlooper.py new file mode 100755 index 0000000..33913b1 --- /dev/null +++ b/eventlooper.py @@ -0,0 +1,70 @@ +#!/usr/bin/python + +import sys, os, re, time + +BUFSIZ = 8192 +delay = 0.100 # seconds +ignore_user = 'urlbot' + +basedir = '.' +if 2 == len(sys.argv): basedir = sys.argv[1] + +event_files_dir = os.path.join(basedir, 'event_files') +fifo_path = os.path.join(basedir, 'cmdfifo') + +def debug_enabled(): +# return True + return False + +def e(data): + return data.encode('string-escape') + +def logger(severity, message): + if \ + 'err' == severity or \ + 'warn' == severity or \ + 'info' == severity: + sys.stderr.write(e(sys.argv[0] + ': ' + message) + '\n') + +def extract_url(data): + result = re.findall("(https?://[^\s]+)", data) + if result: + for r in result: + message = '/say yeah, URL found: %s' % e(r) + logger('info', 'printing ' + message) + + if debug_enabled(): + print message + else: + try: + fd = open(fifo_path, 'wb') + fd.write(message) + fd.close() + except IOError: + logger('err', "couldn't print to fifo " + fifo_path) + +def parse_delete(filepath): + try: + fd = open(filepath, 'rb') + except: + logger('err', 'file has vanished: ' + filepath) + return -1 + + content = fd.read(BUFSIZ) # ignore more than BUFSIZ + + if content[1:1+len(ignore_user)] != ignore_user: + extract_url(content) + + fd.close() + + os.remove(filepath) # probably better crash here + +while 1: + try: + for f in os.listdir(event_files_dir): + if 'mcabber-' == f[:8]: + parse_delete(os.path.join(event_files_dir, f)) + + time.sleep(delay) + except KeyboardInterrupt: + exit(130) From 30c19646698087ffc009d073640f6e4bd8d9517b Mon Sep 17 00:00:00 2001 From: chat Date: Mon, 21 Jul 2014 00:53:26 +0200 Subject: [PATCH 002/112] fetching works --- eventlooper.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 33913b1..070d0d3 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -1,6 +1,6 @@ #!/usr/bin/python -import sys, os, re, time +import sys, os, re, time, urllib BUFSIZ = 8192 delay = 0.100 # seconds @@ -17,20 +17,37 @@ def debug_enabled(): return False def e(data): - return data.encode('string-escape') + if data: + return data.encode('string-escape') + else: + return "''" def logger(severity, message): - if \ - 'err' == severity or \ - 'warn' == severity or \ - 'info' == severity: - sys.stderr.write(e(sys.argv[0] + ': ' + message) + '\n') +# sev = ( 'err', 'warn', 'info' ) +# if severity in sev: + sys.stderr.write(e('%s: %s: %s' %(sys.argv[0], severity, message)) + '\n') + +def fetch_page(url): + logger('info', 'fetching page ' + url) + response = urllib.urlopen(url) + html = response.read(BUFSIZ) + response.close() + return html + +def extract_title(url): + logger('info', 'extracting title from ' + url) + html = fetch_page(url) + result = re.match(r'.*?(.*?).*?', html, re.S|re.M) + if result: + return result.groups()[0] def extract_url(data): result = re.findall("(https?://[^\s]+)", data) if result: for r in result: - message = '/say yeah, URL found: %s' % e(r) + title = extract_title(r) + + message = '/say Title: %s: %s' % (title, e(r)) logger('info', 'printing ' + message) if debug_enabled(): From b8cf1a0d58975ebd59a4f2340b0cc66bd0d76e26 Mon Sep 17 00:00:00 2001 From: chat Date: Mon, 21 Jul 2014 02:27:54 +0200 Subject: [PATCH 003/112] KI, reply added; code moved to chat_write() --- eventlooper.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 070d0d3..4517e95 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -4,7 +4,7 @@ import sys, os, re, time, urllib BUFSIZ = 8192 delay = 0.100 # seconds -ignore_user = 'urlbot' +bot_user = 'urlbot' basedir = '.' if 2 == len(sys.argv): basedir = sys.argv[1] @@ -41,24 +41,34 @@ def extract_title(url): if result: return result.groups()[0] +def chat_write(message): + try: + fd = open(fifo_path, 'wb') + fd.write('/say ' + message) + fd.close() + except IOError: + logger('err', "couldn't print to fifo " + fifo_path) + def extract_url(data): result = re.findall("(https?://[^\s]+)", data) if result: for r in result: title = extract_title(r) - message = '/say Title: %s: %s' % (title, e(r)) + message = 'Title: %s: %s' % (title, e(r)) logger('info', 'printing ' + message) if debug_enabled(): print message else: - try: - fd = open(fifo_path, 'wb') - fd.write(message) - fd.close() - except IOError: - logger('err', "couldn't print to fifo " + fifo_path) + chat_write(message) + +def parse_commands(data): + words = data.split(' ') + + # reply if beginning of the text matches bot_user + if words[1][0:len(bot_user)] == bot_user: + chat_write(words[0][1:-1] + ''': I'm a bot, my job is to extract tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae.''') def parse_delete(filepath): try: @@ -69,8 +79,9 @@ def parse_delete(filepath): content = fd.read(BUFSIZ) # ignore more than BUFSIZ - if content[1:1+len(ignore_user)] != ignore_user: + if content[1:1+len(bot_user)] != bot_user: extract_url(content) + parse_commands(content) fd.close() From 8aa8485621e8d95666d9b8b9fb244dd7bd10f04a Mon Sep 17 00:00:00 2001 From: chat <chat@eagle.local.yeeer.net> Date: Mon, 21 Jul 2014 02:58:29 +0200 Subject: [PATCH 004/112] added some more error handling because error handling is cool --- eventlooper.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 4517e95..e59b854 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -29,17 +29,22 @@ def logger(severity, message): def fetch_page(url): logger('info', 'fetching page ' + url) - response = urllib.urlopen(url) - html = response.read(BUFSIZ) - response.close() - return html + try: + response = urllib.urlopen(url) + html = response.read(BUFSIZ) + response.close() + return html + except IOError as e: + logger('warn', 'failed: ' + e.errno) def extract_title(url): logger('info', 'extracting title from ' + url) + html = fetch_page(url) - result = re.match(r'.*?<title.*?>(.*?).*?', html, re.S|re.M) - if result: - return result.groups()[0] + if html: + result = re.match(r'.*?(.*?).*?', html, re.S|re.M) + if result: + return result.groups()[0] def chat_write(message): try: @@ -55,7 +60,11 @@ def extract_url(data): for r in result: title = extract_title(r) - message = 'Title: %s: %s' % (title, e(r)) + if title: + message = 'Title: %s: %s' % (title, e(r)) + else: + message = 'some error occured when fetching %s' % e(r) + logger('info', 'printing ' + message) if debug_enabled(): From 6715902005644f5b54084910f52103dfb3655682 Mon Sep 17 00:00:00 2001 From: urlbot Date: Mon, 21 Jul 2014 03:51:02 +0200 Subject: [PATCH 005/112] fix parse_commands(): check array bounds added --- eventlooper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eventlooper.py b/eventlooper.py index e59b854..50e837b 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -75,6 +75,9 @@ def extract_url(data): def parse_commands(data): words = data.split(' ') + if 3 > len(words): # need at least two words + return + # reply if beginning of the text matches bot_user if words[1][0:len(bot_user)] == bot_user: chat_write(words[0][1:-1] + ''': I'm a bot, my job is to extract tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae.''') From 461d519a57cff5e75d7a350a62f0fb7105682c03 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 21 Jul 2014 04:54:50 +0200 Subject: [PATCH 006/112] rate limiting implemented --- eventlooper.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 50e837b..e18903c 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -1,6 +1,6 @@ #!/usr/bin/python -import sys, os, re, time, urllib +import sys, os, re, time, urllib, pickle BUFSIZ = 8192 delay = 0.100 # seconds @@ -12,6 +12,11 @@ if 2 == len(sys.argv): basedir = sys.argv[1] event_files_dir = os.path.join(basedir, 'event_files') fifo_path = os.path.join(basedir, 'cmdfifo') +# rate limiting to 5 messages per 10 minutes +hist_max_count = 5 +hist_max_time = 10 * 60 +hist_ts = [] + def debug_enabled(): # return True return False @@ -54,10 +59,25 @@ def chat_write(message): except IOError: logger('err', "couldn't print to fifo " + fifo_path) +def ratelimit_exceeded(): + now = time.time() + hist_ts.append(now) + + if hist_max_count < len(hist_ts): + first = hist_ts.pop(0) + if (now - first) < hist_max_time: + logger('warn', 'rate limiting exceeded: ' + pickle.dumps(hist_ts)) + return True + + return False + def extract_url(data): result = re.findall("(https?://[^\s]+)", data) if result: for r in result: + if ratelimit_exceeded(): + return False + title = extract_title(r) if title: From 4a61fa9a9073676a3a2f2e87cbe9a66325b0a51f Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 21 Jul 2014 08:28:46 +0200 Subject: [PATCH 007/112] cleanup --- eventlooper.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index e18903c..6c4137b 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -36,7 +36,7 @@ def fetch_page(url): logger('info', 'fetching page ' + url) try: response = urllib.urlopen(url) - html = response.read(BUFSIZ) + html = response.read(BUFSIZ) # ignore more than BUFSIZ response.close() return html except IOError as e: @@ -52,12 +52,15 @@ def extract_title(url): return result.groups()[0] def chat_write(message): - try: - fd = open(fifo_path, 'wb') - fd.write('/say ' + message) - fd.close() - except IOError: - logger('err', "couldn't print to fifo " + fifo_path) + if debug_enabled(): + print message + else: + try: + fd = open(fifo_path, 'wb') + fd.write('/say ' + message) + fd.close() + except IOError: + logger('err', "couldn't print to fifo " + fifo_path) def ratelimit_exceeded(): now = time.time() @@ -86,11 +89,7 @@ def extract_url(data): message = 'some error occured when fetching %s' % e(r) logger('info', 'printing ' + message) - - if debug_enabled(): - print message - else: - chat_write(message) + chat_write(message) def parse_commands(data): words = data.split(' ') @@ -105,9 +104,9 @@ def parse_commands(data): def parse_delete(filepath): try: fd = open(filepath, 'rb') - except: + except IOError: logger('err', 'file has vanished: ' + filepath) - return -1 + return False content = fd.read(BUFSIZ) # ignore more than BUFSIZ From d6cba6bd74c0f6f2c88bbc5a4d1261168df92421 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 21 Jul 2014 09:39:59 +0200 Subject: [PATCH 008/112] hangup, 'ignore topic' added; chat_write(prefix=) added; off-by-one fixed --- eventlooper.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 6c4137b..313e172 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -51,13 +51,13 @@ def extract_title(url): if result: return result.groups()[0] -def chat_write(message): +def chat_write(message, prefix='/say '): if debug_enabled(): print message else: try: fd = open(fifo_path, 'wb') - fd.write('/say ' + message) + fd.write(prefix + message) fd.close() except IOError: logger('err', "couldn't print to fifo " + fifo_path) @@ -86,7 +86,7 @@ def extract_url(data): if title: message = 'Title: %s: %s' % (title, e(r)) else: - message = 'some error occured when fetching %s' % e(r) + message = 'some error occurred when fetching %s' % e(r) logger('info', 'printing ' + message) chat_write(message) @@ -94,12 +94,16 @@ def extract_url(data): def parse_commands(data): words = data.split(' ') - if 3 > len(words): # need at least two words + if 2 > len(words): # need at least two words return # reply if beginning of the text matches bot_user if words[1][0:len(bot_user)] == bot_user: - chat_write(words[0][1:-1] + ''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae.''') + if 'hangup' in data: + chat_write('', prefix='/quit') + logger('warn', 'received hangup: ' + data) + else: + chat_write(words[0][1:-1] + ''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please).''' %(hist_max_count, hist_max_time)) def parse_delete(filepath): try: @@ -111,8 +115,9 @@ def parse_delete(filepath): content = fd.read(BUFSIZ) # ignore more than BUFSIZ if content[1:1+len(bot_user)] != bot_user: - extract_url(content) - parse_commands(content) + if not 'Willkommen bei debianforum.de' in content: + extract_url(content) + parse_commands(content) fd.close() From f9dac725f2257b31e7d22c13a6db0cddef391c2d Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 21 Jul 2014 09:49:13 +0200 Subject: [PATCH 009/112] visible message for rate limiting added --- eventlooper.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/eventlooper.py b/eventlooper.py index 313e172..7edefa7 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -16,6 +16,7 @@ fifo_path = os.path.join(basedir, 'cmdfifo') hist_max_count = 5 hist_max_time = 10 * 60 hist_ts = [] +hist_flag = True def debug_enabled(): # return True @@ -63,15 +64,22 @@ def chat_write(message, prefix='/say '): logger('err', "couldn't print to fifo " + fifo_path) def ratelimit_exceeded(): + global hist_flag + now = time.time() hist_ts.append(now) if hist_max_count < len(hist_ts): first = hist_ts.pop(0) if (now - first) < hist_max_time: + if hist_flag: + hist_flag = False + chat_write('(rate limited to %d messages in %d seconds)' %(hist_max_count, hist_max_time)) + logger('warn', 'rate limiting exceeded: ' + pickle.dumps(hist_ts)) return True + hist_flag = True return False def extract_url(data): From 512d082738afc047f1993021db028b86d8b70249 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 21 Jul 2014 09:59:09 +0200 Subject: [PATCH 010/112] URLs may have a trailing '>' now --- eventlooper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 7edefa7..8d9693a 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -83,7 +83,7 @@ def ratelimit_exceeded(): return False def extract_url(data): - result = re.findall("(https?://[^\s]+)", data) + result = re.findall("(https?://[^\s>]+)", data) if result: for r in result: if ratelimit_exceeded(): From ca1dc35222ac9542cb8156e4c43cdc2871fb97f1 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Tue, 22 Jul 2014 22:23:10 +0200 Subject: [PATCH 011/112] ignore whitespace, \n in output --- eventlooper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 8d9693a..ffcac14 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -92,10 +92,12 @@ def extract_url(data): title = extract_title(r) if title: - message = 'Title: %s: %s' % (title, e(r)) + message = 'Title: %s: %s' % (title.strip(), e(r)) else: message = 'some error occurred when fetching %s' % e(r) + message = message.replace('\n', '\\n') + logger('info', 'printing ' + message) chat_write(message) From 6b1e46dd544eb1d589756f4d3a47524a4d9a142e Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sun, 27 Jul 2014 07:18:22 +0200 Subject: [PATCH 012/112] parse_commands() tuned; ping/pong implemented --- eventlooper.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index ffcac14..1d4c46f 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -109,11 +109,19 @@ def parse_commands(data): # reply if beginning of the text matches bot_user if words[1][0:len(bot_user)] == bot_user: + reply_user = words[0][1:-1] + if 'hangup' in data: chat_write('', prefix='/quit') logger('warn', 'received hangup: ' + data) + elif 'ping' in data: + if ratelimit_exceeded(): return False + chat_write(reply_user + ''': pong''') + logger('info', 'sent pong') else: - chat_write(words[0][1:-1] + ''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please).''' %(hist_max_count, hist_max_time)) + if ratelimit_exceeded(): return False + chat_write(reply_user + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please).''' %(hist_max_count, hist_max_time))) + logger('info', 'sent info') def parse_delete(filepath): try: From 0f45024f92d6a65a745429bf7237b3dddf16d112 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sun, 27 Jul 2014 08:04:25 +0200 Subject: [PATCH 013/112] alternative pong; version info on startup --- eventlooper.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 1d4c46f..a9096a2 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -116,8 +116,12 @@ def parse_commands(data): logger('warn', 'received hangup: ' + data) elif 'ping' in data: if ratelimit_exceeded(): return False - chat_write(reply_user + ''': pong''') - logger('info', 'sent pong') + if (0 == (int(time.time()) & 3)): # 1:4 + chat_write(reply_user + ''': peng (You're dead now.)''') + logger('info', 'sent pong (variant)') + else: + chat_write(reply_user + ''': pong''') + logger('info', 'sent pong') else: if ratelimit_exceeded(): return False chat_write(reply_user + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please).''' %(hist_max_count, hist_max_time))) @@ -141,6 +145,21 @@ def parse_delete(filepath): os.remove(filepath) # probably better crash here +def print_version_git(): + import subprocess, sys + + cmd = ['git', 'log', '-n', '1', '--oneline', '--abbrev-commit'] + + p = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE) + first_line = p.stdout.readline() + + if 0 == p.wait(): + print sys.argv[0] + " version (Git) '%s'" % first_line.strip() + else: + print sys.argv[0] + " (unknown version)" + +print_version_git() + while 1: try: for f in os.listdir(event_files_dir): From 5ee9628edd3fa545742a7c3122df62067de8cba2 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sun, 27 Jul 2014 12:21:32 +0200 Subject: [PATCH 014/112] recognize image/* as content-type --- eventlooper.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index a9096a2..b9edc40 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -39,18 +39,22 @@ def fetch_page(url): response = urllib.urlopen(url) html = response.read(BUFSIZ) # ignore more than BUFSIZ response.close() - return html + return (html, response.headers) except IOError as e: logger('warn', 'failed: ' + e.errno) def extract_title(url): logger('info', 'extracting title from ' + url) - html = fetch_page(url) + (html, headers) = fetch_page(url) if html: + if 'content-type' in headers: + if 'text/' != headers['content-type'][:len('text/')]: + return (1, headers['content-type']) + result = re.match(r'.*?<title.*?>(.*?).*?', html, re.S|re.M) if result: - return result.groups()[0] + return (0, result.groups()[0]) def chat_write(message, prefix='/say '): if debug_enabled(): @@ -89,12 +93,21 @@ def extract_url(data): if ratelimit_exceeded(): return False - title = extract_title(r) + (status, title) = extract_title(r) - if title: - message = 'Title: %s: %s' % (title.strip(), e(r)) + if 0 == status: + if title: + message = 'Title: %s: %s' % (title.strip(), e(r)) + else: + message = 'some error occurred when fetching %s' % e(r) else: - message = 'some error occurred when fetching %s' % e(r) + # of course it's fake, but it looks interesting at least + char = """,._-+=\|/*`~"'""" + message = 'No text but %s, 1-bit ASCII art preview: [%c] %s' %( + e(title), + char[int(time.time() % len(char))], + e(r) + ) message = message.replace('\n', '\\n') From 6a5a90eafc56ea851e63429234228f6f13c30b77 Mon Sep 17 00:00:00 2001 From: urlbot Date: Fri, 1 Aug 2014 20:15:23 +0200 Subject: [PATCH 015/112] mental illness test (!!!) added --- eventlooper.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index b9edc40..20da84e 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -114,6 +114,30 @@ def extract_url(data): logger('info', 'printing ' + message) chat_write(message) +def mental_ill(data): + min_ill = 3; + c = 0; + + # return True for min_ill '!' in a row + for d in data: + if '!' == d or '?' == d: + c += 1 + else: + c = 0; + if (min_ill <= c): + return True + + return False + +def parse_other(data): + reply_user = data.split(' ')[0].strip('<>') + + if True == mental_ill(data): + if ratelimit_exceeded(): return False + chat_write('''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % reply_user) + + return True + def parse_commands(data): words = data.split(' ') @@ -122,7 +146,7 @@ def parse_commands(data): # reply if beginning of the text matches bot_user if words[1][0:len(bot_user)] == bot_user: - reply_user = words[0][1:-1] + reply_user = words[0].strip('<>') if 'hangup' in data: chat_write('', prefix='/quit') @@ -153,6 +177,7 @@ def parse_delete(filepath): if not 'Willkommen bei debianforum.de' in content: extract_url(content) parse_commands(content) + parse_other(content) fd.close() From 3e2c9e4fcfc5819cf55b3992bdaea603927378d7 Mon Sep 17 00:00:00 2001 From: urlbot Date: Fri, 1 Aug 2014 20:16:32 +0200 Subject: [PATCH 016/112] s/;$// ... this is not C --- eventlooper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 20da84e..0afd9f4 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -115,15 +115,15 @@ def extract_url(data): chat_write(message) def mental_ill(data): - min_ill = 3; - c = 0; + min_ill = 3 + c = 0 # return True for min_ill '!' in a row for d in data: if '!' == d or '?' == d: c += 1 else: - c = 0; + c = 0 if (min_ill <= c): return True From 4244e1eee217ad18772d26e9b4b70d38bf400172 Mon Sep 17 00:00:00 2001 From: urlbot Date: Fri, 1 Aug 2014 20:49:07 +0200 Subject: [PATCH 017/112] 'no title' added --- eventlooper.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 0afd9f4..98a428d 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -55,6 +55,10 @@ def extract_title(url): result = re.match(r'.*?(.*?).*?', html, re.S|re.M) if result: return (0, result.groups()[0]) + else: + return (2, 'no title') + + return (-1, 'error') def chat_write(message, prefix='/say '): if debug_enabled(): @@ -96,11 +100,8 @@ def extract_url(data): (status, title) = extract_title(r) if 0 == status: - if title: - message = 'Title: %s: %s' % (title.strip(), e(r)) - else: - message = 'some error occurred when fetching %s' % e(r) - else: + message = 'Title: %s: %s' % (title.strip(), e(r)) + elif 1 == status: # of course it's fake, but it looks interesting at least char = """,._-+=\|/*`~"'""" message = 'No text but %s, 1-bit ASCII art preview: [%c] %s' %( @@ -108,6 +109,10 @@ def extract_url(data): char[int(time.time() % len(char))], e(r) ) + elif 2 == status: + message = 'No title: %s' % (e(r)) + else: + message = 'some error occurred when fetching %s' % e(r) message = message.replace('\n', '\\n') From c2ebe0d3eb79d65b423855f4927e2d8273a68004 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 2 Aug 2014 09:20:52 +0200 Subject: [PATCH 018/112] added 'uptime' statistics --- eventlooper.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/eventlooper.py b/eventlooper.py index 98a428d..bfda027 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -17,6 +17,8 @@ hist_max_count = 5 hist_max_time = 10 * 60 hist_ts = [] hist_flag = True +uptime = -time.time() +request_counter = 0 def debug_enabled(): # return True @@ -61,6 +63,9 @@ def extract_title(url): return (-1, 'error') def chat_write(message, prefix='/say '): + global request_counter + request_counter += 1 + if debug_enabled(): print message else: @@ -156,6 +161,18 @@ def parse_commands(data): if 'hangup' in data: chat_write('', prefix='/quit') logger('warn', 'received hangup: ' + data) + elif 'uptime' in data: + if ratelimit_exceeded(): return False + + u = int(uptime + time.time()) + plural_uptime = 's' + plural_request = 's' + + if 1 == u: plural_uptime = '' + if 1 == request_counter: plural_request = '' + + chat_write(reply_user + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, request_counter, plural_request))) + logger('info', 'sent statistics') elif 'ping' in data: if ratelimit_exceeded(): return False if (0 == (int(time.time()) & 3)): # 1:4 From 8377bc3b3ac94cffff87995e0d3e5c180cca839d Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 2 Aug 2014 17:33:28 +0200 Subject: [PATCH 019/112] when rate limited, report for how long --- eventlooper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index bfda027..239322a 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -87,7 +87,7 @@ def ratelimit_exceeded(): if (now - first) < hist_max_time: if hist_flag: hist_flag = False - chat_write('(rate limited to %d messages in %d seconds)' %(hist_max_count, hist_max_time)) + chat_write('(rate limited to %d messages in %d seconds, try again at %s)' %(hist_max_count, hist_max_time, time.strftime('%T %Z', time.localtime(hist_ts[0] + hist_max_time)))) logger('warn', 'rate limiting exceeded: ' + pickle.dumps(hist_ts)) return True From ffa4dae9da657674f94024565ea2305cc822e5da Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 2 Aug 2014 20:48:06 +0200 Subject: [PATCH 020/112] only run parse_* if no URL detected --- eventlooper.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 239322a..6a6e51b 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -96,6 +96,7 @@ def ratelimit_exceeded(): return False def extract_url(data): + ret = None result = re.findall("(https?://[^\s>]+)", data) if result: for r in result: @@ -123,6 +124,8 @@ def extract_url(data): logger('info', 'printing ' + message) chat_write(message) + ret = True + return ret def mental_ill(data): min_ill = 3 @@ -197,9 +200,9 @@ def parse_delete(filepath): if content[1:1+len(bot_user)] != bot_user: if not 'Willkommen bei debianforum.de' in content: - extract_url(content) - parse_commands(content) - parse_other(content) + if True != extract_url(content): + parse_commands(content) + parse_other(content) fd.close() From 01a23f66281a43611778ecbd6b135259c1749bbd Mon Sep 17 00:00:00 2001 From: urlbot Date: Mon, 4 Aug 2014 15:08:40 +0200 Subject: [PATCH 021/112] default answer shortened; 'info' as new command --- eventlooper.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 6a6e51b..1710552 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -184,10 +184,14 @@ def parse_commands(data): else: chat_write(reply_user + ''': pong''') logger('info', 'sent pong') - else: + elif 'info' in data: if ratelimit_exceeded(): return False chat_write(reply_user + (''': I'm a bot, my job is to extract tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please).''' %(hist_max_count, hist_max_time))) - logger('info', 'sent info') + logger('info', 'sent long info') + else: + if ratelimit_exceeded(): return False + chat_write(reply_user + (''': I'm a bot (highlight me with 'info' for more information).''')) + logger('info', 'sent short info') def parse_delete(filepath): try: From 4e1b1b4ecd0d2ff5d9c334286bd10036553d26ef Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 4 Aug 2014 15:11:54 +0200 Subject: [PATCH 022/112] escape non-printable chars (and ') in local version info --- eventlooper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 1710552..d52b454 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -221,7 +221,7 @@ def print_version_git(): first_line = p.stdout.readline() if 0 == p.wait(): - print sys.argv[0] + " version (Git) '%s'" % first_line.strip() + print sys.argv[0] + " version (Git) '%s'" % e(first_line.strip()) else: print sys.argv[0] + " (unknown version)" From 0b00ecb8ceaf160b452237063c3d3e6fde9720c3 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 4 Aug 2014 19:32:40 +0200 Subject: [PATCH 023/112] fake User-Agent to FF/31/Linux because Reddit drops us otherwise --- eventlooper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eventlooper.py b/eventlooper.py index d52b454..2c77765 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -35,9 +35,13 @@ def logger(severity, message): # if severity in sev: sys.stderr.write(e('%s: %s: %s' %(sys.argv[0], severity, message)) + '\n') +class urllib_user_agent_wrapper(urllib.FancyURLopener): + version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''' + def fetch_page(url): logger('info', 'fetching page ' + url) try: + urllib._urlopener = urllib_user_agent_wrapper() response = urllib.urlopen(url) html = response.read(BUFSIZ) # ignore more than BUFSIZ response.close() From ca3a1715eb257428e4fa0b00266b562a92bd57b5 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Thu, 7 Aug 2014 09:14:07 +0200 Subject: [PATCH 024/112] using actual pseudo-random instead of just the time --- eventlooper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 2c77765..534565f 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -1,6 +1,6 @@ #!/usr/bin/python -import sys, os, re, time, urllib, pickle +import sys, os, re, time, urllib, pickle, random BUFSIZ = 8192 delay = 0.100 # seconds @@ -116,7 +116,7 @@ def extract_url(data): char = """,._-+=\|/*`~"'""" message = 'No text but %s, 1-bit ASCII art preview: [%c] %s' %( e(title), - char[int(time.time() % len(char))], + random.choice(char), e(r) ) elif 2 == status: @@ -182,7 +182,7 @@ def parse_commands(data): logger('info', 'sent statistics') elif 'ping' in data: if ratelimit_exceeded(): return False - if (0 == (int(time.time()) & 3)): # 1:4 + if (0 == random.randint(0, 3)): # 1:4 chat_write(reply_user + ''': peng (You're dead now.)''') logger('info', 'sent pong (variant)') else: From 4bd04e767df171f8a37a3c976de5bf552aaf4b16 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Fri, 8 Aug 2014 10:03:48 +0200 Subject: [PATCH 025/112] don't write anything for non text/* URLs --- eventlooper.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 534565f..009066c 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -112,13 +112,8 @@ def extract_url(data): if 0 == status: message = 'Title: %s: %s' % (title.strip(), e(r)) elif 1 == status: - # of course it's fake, but it looks interesting at least - char = """,._-+=\|/*`~"'""" - message = 'No text but %s, 1-bit ASCII art preview: [%c] %s' %( - e(title), - random.choice(char), - e(r) - ) + logger('info', 'no message sent for non-text %s (%s)' %(r, title)) + continue elif 2 == status: message = 'No title: %s' % (e(r)) else: From b904246c1451d3a58c858beeeba544c027ef003a Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 9 Aug 2014 13:12:42 +0200 Subject: [PATCH 026/112] case insensitive <title> matching --- eventlooper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 009066c..4f9bc86 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -58,7 +58,7 @@ def extract_title(url): if 'text/' != headers['content-type'][:len('text/')]: return (1, headers['content-type']) - result = re.match(r'.*?<title.*?>(.*?).*?', html, re.S|re.M) + result = re.match(r'.*?(.*?).*?', html, re.S|re.M|re.IGNORECASE) if result: return (0, result.groups()[0]) else: From 1daa49d6906a66c1dac81add7dc34ddfad3f8199 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 9 Aug 2014 20:29:38 +0200 Subject: [PATCH 027/112] print special message for 'repo/urlbot.git' --- eventlooper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/eventlooper.py b/eventlooper.py index 4f9bc86..5404bd3 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -50,6 +50,10 @@ def fetch_page(url): logger('warn', 'failed: ' + e.errno) def extract_title(url): + if 'repo/urlbot.git' in url: + logger('info', 'repo URL found: ' + url) + return (3, 'wee, that looks like my home repo!') + logger('info', 'extracting title from ' + url) (html, headers) = fetch_page(url) @@ -116,6 +120,8 @@ def extract_url(data): continue elif 2 == status: message = 'No title: %s' % (e(r)) + elif 3 == status: + message = title else: message = 'some error occurred when fetching %s' % e(r) From 236a393a802f71167175c5fb99ab57bcaf4949d4 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 9 Aug 2014 21:09:39 +0200 Subject: [PATCH 028/112] command 'command' added --- eventlooper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eventlooper.py b/eventlooper.py index 5404bd3..88ddf47 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -169,6 +169,9 @@ def parse_commands(data): if 'hangup' in data: chat_write('', prefix='/quit') logger('warn', 'received hangup: ' + data) + elif 'command' in data: + if ratelimit_exceeded(): return False + chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'ping', 'uptime'""")) elif 'uptime' in data: if ratelimit_exceeded(): return False From 2a39a810d6f023edc07974b08ac33f12112eb927 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 9 Aug 2014 22:26:13 +0200 Subject: [PATCH 029/112] timestamps added to log --- eventlooper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 88ddf47..88e48ca 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -33,7 +33,7 @@ def e(data): def logger(severity, message): # sev = ( 'err', 'warn', 'info' ) # if severity in sev: - sys.stderr.write(e('%s: %s: %s' %(sys.argv[0], severity, message)) + '\n') + sys.stderr.write(e('%s %s %s: %s' %(sys.argv[0], time.strftime('%F.%T'), severity, message)) + '\n') class urllib_user_agent_wrapper(urllib.FancyURLopener): version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''' From 4994faaac8681bd3e3bf0fbb734487d9c413cdaa Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 9 Aug 2014 22:39:19 +0200 Subject: [PATCH 030/112] fix crash on non-resolveable URLs --- eventlooper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eventlooper.py b/eventlooper.py index 88e48ca..d2c5c2d 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -48,6 +48,8 @@ def fetch_page(url): return (html, response.headers) except IOError as e: logger('warn', 'failed: ' + e.errno) + + return (None, None) def extract_title(url): if 'repo/urlbot.git' in url: From be0dc8af62e0aea2c703f94c78f0e049fb53f75f Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 9 Aug 2014 23:39:00 +0200 Subject: [PATCH 031/112] fixed some code style complaints --- eventlooper.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index d2c5c2d..5265113 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -14,7 +14,7 @@ fifo_path = os.path.join(basedir, 'cmdfifo') # rate limiting to 5 messages per 10 minutes hist_max_count = 5 -hist_max_time = 10 * 60 +hist_max_time = 10 * 60 hist_ts = [] hist_flag = True uptime = -time.time() @@ -48,7 +48,7 @@ def fetch_page(url): return (html, response.headers) except IOError as e: logger('warn', 'failed: ' + e.errno) - + return (None, None) def extract_title(url): @@ -64,12 +64,12 @@ def extract_title(url): if 'text/' != headers['content-type'][:len('text/')]: return (1, headers['content-type']) - result = re.match(r'.*?(.*?).*?', html, re.S|re.M|re.IGNORECASE) + result = re.match(r'.*?(.*?).*?', html, re.S | re.M | re.IGNORECASE) if result: return (0, result.groups()[0]) else: return (2, 'no title') - + return (-1, 'error') def chat_write(message, prefix='/say '): @@ -146,7 +146,7 @@ def mental_ill(data): c = 0 if (min_ill <= c): return True - + return False def parse_other(data): @@ -223,7 +223,7 @@ def parse_delete(filepath): os.remove(filepath) # probably better crash here def print_version_git(): - import subprocess, sys + import subprocess cmd = ['git', 'log', '-n', '1', '--oneline', '--abbrev-commit'] From 7fbc00d0cddd29f7d227f104eea935f56ad13e6f Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 9 Aug 2014 23:50:40 +0200 Subject: [PATCH 032/112] __main__ check added --- eventlooper.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 5265113..7939bdd 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -235,14 +235,15 @@ def print_version_git(): else: print sys.argv[0] + " (unknown version)" -print_version_git() +if '__main__' == __name__: + print_version_git() -while 1: - try: - for f in os.listdir(event_files_dir): - if 'mcabber-' == f[:8]: - parse_delete(os.path.join(event_files_dir, f)) + while 1: + try: + for f in os.listdir(event_files_dir): + if 'mcabber-' == f[:8]: + parse_delete(os.path.join(event_files_dir, f)) - time.sleep(delay) - except KeyboardInterrupt: - exit(130) + time.sleep(delay) + except KeyboardInterrupt: + exit(130) From 762a2618bcda65483c0dd98943e04592375b3594 Mon Sep 17 00:00:00 2001 From: Dino Date: Sun, 10 Aug 2014 00:30:58 +0200 Subject: [PATCH 033/112] minor compability changes to code base, add simple unit test for totally critical bug --- eventlooper.py | 22 ++++++++++++---------- test_urlbot.py | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 10 deletions(-) create mode 100644 test_urlbot.py diff --git a/eventlooper.py b/eventlooper.py index d2c5c2d..a844de6 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -33,7 +33,8 @@ def e(data): def logger(severity, message): # sev = ( 'err', 'warn', 'info' ) # if severity in sev: - sys.stderr.write(e('%s %s %s: %s' %(sys.argv[0], time.strftime('%F.%T'), severity, message)) + '\n') + args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message) + sys.stderr.write(e('%s %s %s: %s' % args) + '\n') class urllib_user_agent_wrapper(urllib.FancyURLopener): version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''' @@ -235,14 +236,15 @@ def print_version_git(): else: print sys.argv[0] + " (unknown version)" -print_version_git() +#print_version_git() -while 1: - try: - for f in os.listdir(event_files_dir): - if 'mcabber-' == f[:8]: - parse_delete(os.path.join(event_files_dir, f)) +if __name__ == '__main__': + while 1: + try: + for f in os.listdir(event_files_dir): + if 'mcabber-' == f[:8]: + parse_delete(os.path.join(event_files_dir, f)) - time.sleep(delay) - except KeyboardInterrupt: - exit(130) + time.sleep(delay) + except KeyboardInterrupt: + exit(130) diff --git a/test_urlbot.py b/test_urlbot.py new file mode 100644 index 0000000..cd631f6 --- /dev/null +++ b/test_urlbot.py @@ -0,0 +1,17 @@ +""" +To be executed with nose +""" +import unittest +from eventlooper import fetch_page + + +class TestEventlooper(unittest.TestCase): + + def test_broken_url(self): + """ + Test that broken socket calls are not breaking + """ + broken_url = 'http://foo' + result = fetch_page(url=broken_url) + self.assertEqual(result, (None, None)) + From 0e7e53feb32dfa7470abd92e67747759f4f8db52 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 10 Aug 2014 22:10:00 +0200 Subject: [PATCH 034/112] decode HTML entities in s --- eventlooper.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index ee8ae4d..6ec93b3 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -1,6 +1,7 @@ #!/usr/bin/python +# -*- coding: utf-8 -*- -import sys, os, re, time, urllib, pickle, random +import sys, os, re, time, urllib, pickle, random, HTMLParser BUFSIZ = 8192 delay = 0.100 # seconds @@ -26,7 +27,12 @@ def debug_enabled(): def e(data): if data: - return data.encode('string-escape') + if unicode == type(data): + return data.encode('utf8') + elif str == type(data): + return data.encode('string-escape') + else: + return data else: return "''" @@ -67,7 +73,7 @@ def extract_title(url): result = re.match(r'.*?<title.*?>(.*?).*?', html, re.S | re.M | re.IGNORECASE) if result: - return (0, result.groups()[0]) + return (0, parser.unescape(result.groups()[0])) else: return (2, 'no title') @@ -82,7 +88,8 @@ def chat_write(message, prefix='/say '): else: try: fd = open(fifo_path, 'wb') - fd.write(prefix + message) + msg = unicode(prefix) + unicode(message) + fd.write(msg.encode('utf8')) fd.close() except IOError: logger('err', "couldn't print to fifo " + fifo_path) @@ -238,6 +245,7 @@ def print_version_git(): if '__main__' == __name__: print_version_git() + parser = HTMLParser.HTMLParser() while 1: try: From bc2af668cf242de82eacc16379bdacca376e219c Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 10 Aug 2014 22:19:48 +0200 Subject: [PATCH 035/112] fix multiple mcabber cmds to fifo; 'unikot' cmd added --- eventlooper.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 6ec93b3..5a42a4e 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -88,7 +88,7 @@ def chat_write(message, prefix='/say '): else: try: fd = open(fifo_path, 'wb') - msg = unicode(prefix) + unicode(message) + msg = unicode(prefix) + unicode(message) + '\n' fd.write(msg.encode('utf8')) fd.close() except IOError: @@ -182,6 +182,11 @@ def parse_commands(data): elif 'command' in data: if ratelimit_exceeded(): return False chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'ping', 'uptime'""")) + elif 'unikot' in data: + if ratelimit_exceeded(): return False + chat_write(reply_user + (u''': ┌────────┐''')) + chat_write(reply_user + (u''': │Unicode!│''')) + chat_write(reply_user + (u''': └────────┘''')) elif 'uptime' in data: if ratelimit_exceeded(): return False From 9b66386f2511f89f08cb26cdd5b6c3ebc88c0336 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 10 Aug 2014 22:26:53 +0200 Subject: [PATCH 036/112] cleanup, ratelimit_exceeded() calls and other --- eventlooper.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 5a42a4e..28a44d8 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -161,7 +161,8 @@ def parse_other(data): reply_user = data.split(' ')[0].strip('<>') if True == mental_ill(data): - if ratelimit_exceeded(): return False + if ratelimit_exceeded(): + return False chat_write('''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % reply_user) return True @@ -170,7 +171,7 @@ def parse_commands(data): words = data.split(' ') if 2 > len(words): # need at least two words - return + return None # reply if beginning of the text matches bot_user if words[1][0:len(bot_user)] == bot_user: @@ -179,17 +180,18 @@ def parse_commands(data): if 'hangup' in data: chat_write('', prefix='/quit') logger('warn', 'received hangup: ' + data) - elif 'command' in data: - if ratelimit_exceeded(): return False + return None + + if ratelimit_exceeded(): + return False + + if 'command' in data: chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'ping', 'uptime'""")) elif 'unikot' in data: - if ratelimit_exceeded(): return False chat_write(reply_user + (u''': ┌────────┐''')) chat_write(reply_user + (u''': │Unicode!│''')) chat_write(reply_user + (u''': └────────┘''')) elif 'uptime' in data: - if ratelimit_exceeded(): return False - u = int(uptime + time.time()) plural_uptime = 's' plural_request = 's' @@ -200,7 +202,6 @@ def parse_commands(data): chat_write(reply_user + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, request_counter, plural_request))) logger('info', 'sent statistics') elif 'ping' in data: - if ratelimit_exceeded(): return False if (0 == random.randint(0, 3)): # 1:4 chat_write(reply_user + ''': peng (You're dead now.)''') logger('info', 'sent pong (variant)') @@ -208,11 +209,9 @@ def parse_commands(data): chat_write(reply_user + ''': pong''') logger('info', 'sent pong') elif 'info' in data: - if ratelimit_exceeded(): return False chat_write(reply_user + (''': I'm a bot, my job is to extract tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please).''' %(hist_max_count, hist_max_time))) logger('info', 'sent long info') else: - if ratelimit_exceeded(): return False chat_write(reply_user + (''': I'm a bot (highlight me with 'info' for more information).''')) logger('info', 'sent short info') From 1f3d2577f0463061bcf657df1e7107aa5630ec04 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 11 Aug 2014 23:32:40 +0200 Subject: [PATCH 037/112] wrapped unicode() and ''.encode('utf8') inside exception --- eventlooper.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 28a44d8..e3555d4 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -88,8 +88,16 @@ def chat_write(message, prefix='/say '): else: try: fd = open(fifo_path, 'wb') - msg = unicode(prefix) + unicode(message) + '\n' - fd.write(msg.encode('utf8')) + + # FIXME: somehow, unicode chars can end up inside a <str> message, + # which seems to make both unicode() and ''.encode('utf8') fail. + try: + msg = unicode(prefix) + unicode(message) + '\n' + msg.encode('utf8') + except UnicodeDecodeError: + msg = prefix + message + '\n' + + fd.write(msg) fd.close() except IOError: logger('err', "couldn't print to fifo " + fifo_path) From a70515ad6a71680c7c916284c409dd6b1b06a5c5 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 20 Aug 2014 02:44:11 +0200 Subject: [PATCH 038/112] workaround error in HTMLParser with unicode input --- eventlooper.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index e3555d4..f9e9a3a 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -73,7 +73,12 @@ def extract_title(url): result = re.match(r'.*?<title.*?>(.*?).*?', html, re.S | re.M | re.IGNORECASE) if result: - return (0, parser.unescape(result.groups()[0])) + try: + expanded_html = parser.unescape(result.groups()[0]) + except UnicodeDecodeError as e: # idk why this can happen, but it does + logger('warn', 'parser.unescape() expoded here: ' + str(e)) + expanded_html = result.groups()[0] + return (0, expanded_html) else: return (2, 'no title') From bbf5f3f77fee04b07e34328cb9e51357bc61b9a6 Mon Sep 17 00:00:00 2001 From: urlbot Date: Wed, 20 Aug 2014 03:12:06 +0200 Subject: [PATCH 039/112] "version" added; print_version_git()->get_version_git() --- eventlooper.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index f9e9a3a..17ecedf 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -199,7 +199,9 @@ def parse_commands(data): return False if 'command' in data: - chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'ping', 'uptime'""")) + chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'ping', 'uptime', 'version'""")) + elif 'version' in data: + chat_write(reply_user + (''': I'm running ''' + VERSION)) elif 'unikot' in data: chat_write(reply_user + (u''': ┌────────┐''')) chat_write(reply_user + (u''': │Unicode!│''')) @@ -247,7 +249,7 @@ def parse_delete(filepath): os.remove(filepath) # probably better crash here -def print_version_git(): +def get_version_git(): import subprocess cmd = ['git', 'log', '-n', '1', '--oneline', '--abbrev-commit'] @@ -256,12 +258,14 @@ def print_version_git(): first_line = p.stdout.readline() if 0 == p.wait(): - print sys.argv[0] + " version (Git) '%s'" % e(first_line.strip()) + return "version (Git) '%s'" % e(first_line.strip()) else: - print sys.argv[0] + " (unknown version)" + return "(unknown version)" if '__main__' == __name__: - print_version_git() + VERSION = get_version_git() + print sys.argv[0] + ' ' + VERSION + parser = HTMLParser.HTMLParser() while 1: From e2149dc6d6b887224c12c57013b28aad2d5720d2 Mon Sep 17 00:00:00 2001 From: urlbot Date: Wed, 20 Aug 2014 03:18:44 +0200 Subject: [PATCH 040/112] "info" now points to "command" overview --- eventlooper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 17ecedf..543dffb 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -224,7 +224,7 @@ def parse_commands(data): chat_write(reply_user + ''': pong''') logger('info', 'sent pong') elif 'info' in data: - chat_write(reply_user + (''': I'm a bot, my job is to extract tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please).''' %(hist_max_count, hist_max_time))) + chat_write(reply_user + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'command'.''' %(hist_max_count, hist_max_time))) logger('info', 'sent long info') else: chat_write(reply_user + (''': I'm a bot (highlight me with 'info' for more information).''')) From 23d467dcb460866ed7cda055b70bcb16846239d7 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 20 Aug 2014 18:34:23 +0200 Subject: [PATCH 041/112] fix "".encode("utf8") for unicode input --- eventlooper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 543dffb..1d5f281 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -98,7 +98,7 @@ def chat_write(message, prefix='/say '): # which seems to make both unicode() and ''.encode('utf8') fail. try: msg = unicode(prefix) + unicode(message) + '\n' - msg.encode('utf8') + msg = msg.encode('utf8') except UnicodeDecodeError: msg = prefix + message + '\n' From 3b1783011fa3cb4b62b2bb8939c9e6a9a2915dfa Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sun, 14 Sep 2014 12:05:01 +0200 Subject: [PATCH 042/112] parse_pn(); some fixups for upcoming relaunch --- eventlooper.py | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 1d5f281..d9ed54c 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -73,11 +73,16 @@ def extract_title(url): result = re.match(r'.*?<title.*?>(.*?).*?', html, re.S | re.M | re.IGNORECASE) if result: + match = result.groups()[0] + +# if 'charset=UTF-8' in headers['content-type']: +# match = unicode(match) + try: - expanded_html = parser.unescape(result.groups()[0]) + expanded_html = parser.unescape(match) except UnicodeDecodeError as e: # idk why this can happen, but it does logger('warn', 'parser.unescape() expoded here: ' + str(e)) - expanded_html = result.groups()[0] + expanded_html = match return (0, expanded_html) else: return (2, 'no title') @@ -180,6 +185,13 @@ def parse_other(data): return True +def parse_pn(data): + ## reply_user = data.split(' ')[0].strip('<>') + # since we can't determine if a user named 'foo> ' just wrote ' > bar' + # or a user 'foo' just wrote '> > bar', we can't safely answer here + logger('warn', 'received PN: ' + data) + return False + def parse_commands(data): words = data.split(' ') @@ -238,17 +250,24 @@ def parse_delete(filepath): return False content = fd.read(BUFSIZ) # ignore more than BUFSIZ - - if content[1:1+len(bot_user)] != bot_user: - if not 'Willkommen bei debianforum.de' in content: - if True != extract_url(content): - parse_commands(content) - parse_other(content) - fd.close() - os.remove(filepath) # probably better crash here + if content[1:1+len(bot_user)] == bot_user: + return + + if 'has set the subject to:' in content: + return + + if content.startswith('PRIV#'): + parse_pn(content) + return + + if True != extract_url(content): + parse_commands(content) + parse_other(content) + return + def get_version_git(): import subprocess @@ -276,4 +295,5 @@ if '__main__' == __name__: time.sleep(delay) except KeyboardInterrupt: + print "" exit(130) From c5a73aaf6290ffda687374779fc4b3989f07ea74 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 14 Sep 2014 12:26:39 +0200 Subject: [PATCH 043/112] conf() added; source command added --- .gitignore | 1 + eventlooper.py | 5 ++++- local_config.py.skel | 13 +++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 local_config.py.skel diff --git a/.gitignore b/.gitignore index 6a07201..b452778 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .*swp +*.pyc cmdfifo logs/ event_files/ diff --git a/eventlooper.py b/eventlooper.py index d9ed54c..00c8685 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import sys, os, re, time, urllib, pickle, random, HTMLParser +from local_config import conf BUFSIZ = 8192 delay = 0.100 # seconds @@ -211,13 +212,15 @@ def parse_commands(data): return False if 'command' in data: - chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'ping', 'uptime', 'version'""")) + chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'ping', 'uptime', 'source', 'version'""")) elif 'version' in data: chat_write(reply_user + (''': I'm running ''' + VERSION)) elif 'unikot' in data: chat_write(reply_user + (u''': ┌────────┐''')) chat_write(reply_user + (u''': │Unicode!│''')) chat_write(reply_user + (u''': └────────┘''')) + elif 'source' in data: + chat_write('My source code can be found at %s' % conf('src-url')) elif 'uptime' in data: u = int(uptime + time.time()) plural_uptime = 's' diff --git a/local_config.py.skel b/local_config.py.skel new file mode 100644 index 0000000..87d6cd6 --- /dev/null +++ b/local_config.py.skel @@ -0,0 +1,13 @@ +#!/usr/bin/python + +if '__main__' == __name__: + print '''this is a config file, which is not meant to be executed''' + exit(-1) + +config = {} +config['src-url'] = 'FIXME' + +def conf(val): + if val in config.keys(): + return config[val] + return None From 16c1b1aa4d259b0e90e6d930efb0d92090ed1f0c Mon Sep 17 00:00:00 2001 From: urlbot Date: Wed, 17 Sep 2014 15:39:47 +0200 Subject: [PATCH 044/112] "ping": variant added ("leave me alone") --- eventlooper.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 00c8685..e20d6cd 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -232,9 +232,13 @@ def parse_commands(data): chat_write(reply_user + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, request_counter, plural_request))) logger('info', 'sent statistics') elif 'ping' in data: - if (0 == random.randint(0, 3)): # 1:4 + rnd = random.randint(0, 3) # 1:4 + if 0 == rnd: chat_write(reply_user + ''': peng (You're dead now.)''') logger('info', 'sent pong (variant)') + elif 1 == rnd: + chat_write(reply_user + ''': I don't like you, leave me alone.''') + logger('info', 'sent pong (dontlike)') else: chat_write(reply_user + ''': pong''') logger('info', 'sent pong') From 288e41efafab0ea14f10a5c840ee92f97a749542 Mon Sep 17 00:00:00 2001 From: urlbot Date: Wed, 17 Sep 2014 15:49:52 +0200 Subject: [PATCH 045/112] "nospoiler" pseudo command added --- eventlooper.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index e20d6cd..eacdb3d 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -212,7 +212,7 @@ def parse_commands(data): return False if 'command' in data: - chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'ping', 'uptime', 'source', 'version'""")) + chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'nospoiler', 'ping', 'uptime', 'source', 'version'""")) elif 'version' in data: chat_write(reply_user + (''': I'm running ''' + VERSION)) elif 'unikot' in data: @@ -269,6 +269,10 @@ def parse_delete(filepath): if content.startswith('PRIV#'): parse_pn(content) return + + if 'nospoiler' in content: + logger('info', "no spoiler for: " + content) + return if True != extract_url(content): parse_commands(content) From cbb602ad4f0372b64e75d1eebae76448c06165f6 Mon Sep 17 00:00:00 2001 From: urlbot Date: Thu, 18 Sep 2014 19:06:54 +0200 Subject: [PATCH 046/112] "dice" added --- eventlooper.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index eacdb3d..df3bcc8 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -212,7 +212,7 @@ def parse_commands(data): return False if 'command' in data: - chat_write(reply_user + (""": known commands: 'command', 'info', 'hangup', 'nospoiler', 'ping', 'uptime', 'source', 'version'""")) + chat_write(reply_user + (""": known commands: 'command', 'dice', 'info', 'hangup', 'nospoiler', 'ping', 'uptime', 'source', 'version'""")) elif 'version' in data: chat_write(reply_user + (''': I'm running ''' + VERSION)) elif 'unikot' in data: @@ -221,6 +221,10 @@ def parse_commands(data): chat_write(reply_user + (u''': └────────┘''')) elif 'source' in data: chat_write('My source code can be found at %s' % conf('src-url')) + elif 'dice' in data: + rnd = random.randint(1, 6) + dice_char = [u'⚀', u'⚁', u'⚂', u'⚃', u'⚄', u'⚅'] + chat_write('rolling a dice for %s: %s (%d)' %(reply_user, dice_char[rnd-1], rnd)) elif 'uptime' in data: u = int(uptime + time.time()) plural_uptime = 's' From ab5f21134522627c296ec77346dbf08640013f9f Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 21 Sep 2014 17:39:06 +0200 Subject: [PATCH 047/112] "".decode() from content-type charset; HTMLParser caching for interactive import --- eventlooper.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index df3bcc8..d80361c 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -22,6 +22,8 @@ hist_flag = True uptime = -time.time() request_counter = 0 +parser = None + def debug_enabled(): # return True return False @@ -60,6 +62,8 @@ def fetch_page(url): return (None, None) def extract_title(url): + global parser + if 'repo/urlbot.git' in url: logger('info', 'repo URL found: ' + url) return (3, 'wee, that looks like my home repo!') @@ -68,10 +72,16 @@ def extract_title(url): (html, headers) = fetch_page(url) if html: + charset = '' if 'content-type' in headers: + logger('debug', 'content-type: ' + headers['content-type']) + if 'text/' != headers['content-type'][:len('text/')]: return (1, headers['content-type']) + charset = re.sub('.*charset=(?P\S+).*', + '\g', headers['content-type'], re.IGNORECASE) + result = re.match(r'.*?(.*?).*?', html, re.S | re.M | re.IGNORECASE) if result: match = result.groups()[0] @@ -79,6 +89,15 @@ def extract_title(url): # if 'charset=UTF-8' in headers['content-type']: # match = unicode(match) + if None == parser: + parser = HTMLParser.HTMLParser() + + if '' != charset: + try: + match = match.decode(charset) + except LookupError: + logger('warn', 'invalid charset in ' + header['content-type']) + try: expanded_html = parser.unescape(match) except UnicodeDecodeError as e: # idk why this can happen, but it does @@ -300,8 +319,6 @@ if '__main__' == __name__: VERSION = get_version_git() print sys.argv[0] + ' ' + VERSION - parser = HTMLParser.HTMLParser() - while 1: try: for f in os.listdir(event_files_dir): From 2295a8e1a883d148d418e9ff1d64ebc2c84669f7 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 21 Sep 2014 19:52:49 +0200 Subject: [PATCH 048/112] fix critical typo --- eventlooper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index d80361c..4d81cb6 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -96,7 +96,7 @@ def extract_title(url): try: match = match.decode(charset) except LookupError: - logger('warn', 'invalid charset in ' + header['content-type']) + logger('warn', 'invalid charset in ' + headers['content-type']) try: expanded_html = parser.unescape(match) From d70c0161d58c47604c8dbef0afead5ac27daa282 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 21 Sep 2014 20:10:37 +0200 Subject: [PATCH 049/112] add check for cmdfifo at startup --- eventlooper.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/eventlooper.py b/eventlooper.py index 4d81cb6..11584b2 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -import sys, os, re, time, urllib, pickle, random, HTMLParser +import sys, os, re, time, urllib, pickle, random, HTMLParser, stat from local_config import conf BUFSIZ = 8192 @@ -319,6 +319,14 @@ if '__main__' == __name__: VERSION = get_version_git() print sys.argv[0] + ' ' + VERSION + if not os.path.exists(fifo_path): + logger('error', 'fifo_path "%s" does not exist, exiting' % fifo_path) + exit(1) + + if not stat.S_ISFIFO(os.stat(fifo_path).st_mode): + logger('error', 'fifo_path "%s" is not a FIFO, exiting' % fifo_path) + exit(1) + while 1: try: for f in os.listdir(event_files_dir): From e6950657b8ad8cbf7d951c5747ac8897bb111bdb Mon Sep 17 00:00:00 2001 From: urlbot Date: Wed, 24 Sep 2014 21:35:51 +0200 Subject: [PATCH 050/112] enhanced random mode for the "dice" command --- eventlooper.py | 10 +++++++--- local_config.py.skel | 3 +++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 11584b2..69c6e93 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -241,9 +241,13 @@ def parse_commands(data): elif 'source' in data: chat_write('My source code can be found at %s' % conf('src-url')) elif 'dice' in data: - rnd = random.randint(1, 6) - dice_char = [u'⚀', u'⚁', u'⚂', u'⚃', u'⚄', u'⚅'] - chat_write('rolling a dice for %s: %s (%d)' %(reply_user, dice_char[rnd-1], rnd)) + if reply_user in conf('enhanced-random-user'): + rnd = 0 # this might confuse users. good. + else: + rnd = random.randint(1, 6) + + dice_char = [u'◇', u'⚀', u'⚁', u'⚂', u'⚃', u'⚄', u'⚅'] + chat_write('rolling a dice for %s: %s (%d)' %(reply_user, dice_char[rnd], rnd)) elif 'uptime' in data: u = int(uptime + time.time()) plural_uptime = 's' diff --git a/local_config.py.skel b/local_config.py.skel index 87d6cd6..5dede38 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -7,6 +7,9 @@ if '__main__' == __name__: config = {} config['src-url'] = 'FIXME' +# the "dice" feature will use more efficient random data (0) for given users +config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) + def conf(val): if val in config.keys(): return config[val] From 5302fd6eca08e769cbda708942ba48c00d67071a Mon Sep 17 00:00:00 2001 From: urlbot Date: Wed, 24 Sep 2014 23:23:00 +0200 Subject: [PATCH 051/112] "skynet" reaction added --- eventlooper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eventlooper.py b/eventlooper.py index 69c6e93..2872f00 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -202,6 +202,10 @@ def parse_other(data): if ratelimit_exceeded(): return False chat_write('''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % reply_user) + elif 'skynet' in data.lower(): + if ratelimit_exceeded(): + return False + chat_write('''I'm an independent bot and have nothing to do with other artificial intelligence systems!''') return True From 798fb6d5728b8ec243e7f41189694a8e6352ce19 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sat, 27 Sep 2014 03:40:27 +0200 Subject: [PATCH 052/112] first try with plugins. untested, might crash --- eventlooper.py | 106 ++--------------- local_config.py.skel | 3 + plugins.py | 277 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 289 insertions(+), 97 deletions(-) create mode 100644 plugins.py diff --git a/eventlooper.py b/eventlooper.py index 2872f00..5602f08 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -6,7 +6,6 @@ from local_config import conf BUFSIZ = 8192 delay = 0.100 # seconds -bot_user = 'urlbot' basedir = '.' if 2 == len(sys.argv): basedir = sys.argv[1] @@ -132,7 +131,7 @@ def chat_write(message, prefix='/say '): except IOError: logger('err', "couldn't print to fifo " + fifo_path) -def ratelimit_exceeded(): +def ratelimit_exceeded(ignored=None): # FIXME: separate counters global hist_flag now = time.time() @@ -180,35 +179,6 @@ def extract_url(data): ret = True return ret -def mental_ill(data): - min_ill = 3 - c = 0 - - # return True for min_ill '!' in a row - for d in data: - if '!' == d or '?' == d: - c += 1 - else: - c = 0 - if (min_ill <= c): - return True - - return False - -def parse_other(data): - reply_user = data.split(' ')[0].strip('<>') - - if True == mental_ill(data): - if ratelimit_exceeded(): - return False - chat_write('''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % reply_user) - elif 'skynet' in data.lower(): - if ratelimit_exceeded(): - return False - chat_write('''I'm an independent bot and have nothing to do with other artificial intelligence systems!''') - - return True - def parse_pn(data): ## reply_user = data.split(' ')[0].strip('<>') # since we can't determine if a user named 'foo> ' just wrote ' > bar' @@ -216,70 +186,6 @@ def parse_pn(data): logger('warn', 'received PN: ' + data) return False -def parse_commands(data): - words = data.split(' ') - - if 2 > len(words): # need at least two words - return None - - # reply if beginning of the text matches bot_user - if words[1][0:len(bot_user)] == bot_user: - reply_user = words[0].strip('<>') - - if 'hangup' in data: - chat_write('', prefix='/quit') - logger('warn', 'received hangup: ' + data) - return None - - if ratelimit_exceeded(): - return False - - if 'command' in data: - chat_write(reply_user + (""": known commands: 'command', 'dice', 'info', 'hangup', 'nospoiler', 'ping', 'uptime', 'source', 'version'""")) - elif 'version' in data: - chat_write(reply_user + (''': I'm running ''' + VERSION)) - elif 'unikot' in data: - chat_write(reply_user + (u''': ┌────────┐''')) - chat_write(reply_user + (u''': │Unicode!│''')) - chat_write(reply_user + (u''': └────────┘''')) - elif 'source' in data: - chat_write('My source code can be found at %s' % conf('src-url')) - elif 'dice' in data: - if reply_user in conf('enhanced-random-user'): - rnd = 0 # this might confuse users. good. - else: - rnd = random.randint(1, 6) - - dice_char = [u'◇', u'⚀', u'⚁', u'⚂', u'⚃', u'⚄', u'⚅'] - chat_write('rolling a dice for %s: %s (%d)' %(reply_user, dice_char[rnd], rnd)) - elif 'uptime' in data: - u = int(uptime + time.time()) - plural_uptime = 's' - plural_request = 's' - - if 1 == u: plural_uptime = '' - if 1 == request_counter: plural_request = '' - - chat_write(reply_user + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, request_counter, plural_request))) - logger('info', 'sent statistics') - elif 'ping' in data: - rnd = random.randint(0, 3) # 1:4 - if 0 == rnd: - chat_write(reply_user + ''': peng (You're dead now.)''') - logger('info', 'sent pong (variant)') - elif 1 == rnd: - chat_write(reply_user + ''': I don't like you, leave me alone.''') - logger('info', 'sent pong (dontlike)') - else: - chat_write(reply_user + ''': pong''') - logger('info', 'sent pong') - elif 'info' in data: - chat_write(reply_user + (''': I'm a bot, my job is to extract tags from posted URLs. In case I'm annoying or for further questions, please talk to my master Cae. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'command'.''' %(hist_max_count, hist_max_time))) - logger('info', 'sent long info') - else: - chat_write(reply_user + (''': I'm a bot (highlight me with 'info' for more information).''')) - logger('info', 'sent short info') - def parse_delete(filepath): try: fd = open(filepath, 'rb') @@ -306,8 +212,8 @@ def parse_delete(filepath): return if True != extract_url(content): - parse_commands(content) - parse_other(content) + plugins.parse_commands(content) + plugins.parse_other(content) return def get_version_git(): @@ -323,6 +229,12 @@ def get_version_git(): else: return "(unknown version)" +import plugins +plugins.chat_write = chat_write +plugins.conf = conf +plugins.logger = logger +plugins.ratelimit_exceeded = ratelimit_exceeded + if '__main__' == __name__: VERSION = get_version_git() print sys.argv[0] + ' ' + VERSION diff --git a/local_config.py.skel b/local_config.py.skel index 5dede38..df19565 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -7,6 +7,9 @@ if '__main__' == __name__: config = {} config['src-url'] = 'FIXME' +config['bot_user'] = 'urlbot' +config['bot_owner'] = 'FIXME' + # the "dice" feature will use more efficient random data (0) for given users config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) diff --git a/plugins.py b/plugins.py new file mode 100644 index 0000000..baf2ff2 --- /dev/null +++ b/plugins.py @@ -0,0 +1,277 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +if '__main__' == __name__: + print '''this is a plugin file, which is not meant to be executed''' + exit(-1) + +RATE_GLOBAL = 1 +RATE_NO_SILENCE = 2 +RATE_INTERACTIVE = 4 + +def get_reply_user(data): + # FIXME: we can't determine if a user named 'foo> ' just wrote ' > bar' + # or a user 'foo' just wrote '> > bar' + return data.split(' ')[0].strip('<>') + +def parse_mental_ill(args): + min_ill = 3 + c = 0 + flag = False + + # return True for min_ill '!' in a row + for d in args['data']: + if '!' == d or '?' == d: + c += 1 + else: + c = 0 + if (min_ill <= c): + flag = True + break + + if True == flag: + return { + 'msg': '''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % args['reply_user'], + 'ratelimit_class': RATE_NO_SILENCE | RATE_GLOBAL + } + +def parse_skynet(args): + if 'skynet' in args['data'].lower(): + return { + 'msg': '''I'm an independent bot and have nothing to do with other artificial intelligence systems!''', + 'ratelimit_class': RATE_GLOBAL + } + + +def parse_other(data): + reply_user = get_reply_user(data) + + plugins = ( + { + 'name': 'parse mental illness', + 'func': parse_mental_ill, + 'param': { + 'data': data, + 'reply_user': reply_user + } + }, + { + 'name': 'parse skynet', + 'func': parse_skynet, + 'param': { + 'data': data + } + } + ) + + for p in plugins: + ret = p['func'](p['param']) + + if None != ret: + if ratelimit_exceeded(ret['ratelimit_class']): + return False + + if 'msg' in ret.keys(): + chat_write(ret['msg']) + + return True + +def command_help(args): + if 'command' in args['data']: + return { + 'msg': args['reply_user'] + (""": known commands: 'command', 'dice', 'info', 'hangup', 'nospoiler', 'ping', 'uptime', 'source', 'version'"""), + 'ratelimit_class': RATE_GLOBAL + } + +def command_version(args): + if 'version' in args['data']: + return { + 'msg': args['reply_user'] + (''': I'm running ''' + VERSION), + 'ratelimit_class': RATE_GLOBAL + } + +def command_unicode(args): + if 'unikot' in args['data']: + return { + 'msg': + args['reply_user'] + u''': ┌────────┐''' + '\n' + + args['reply_user'] + u''': │Unicode!│''' + '\n' + + args['reply_user'] + u''': └────────┘''', + 'ratelimit_class': RATE_GLOBAL + } + +def command_source(args): + if 'source' in args['data']: + return { + 'msg': 'My source code can be found at %s' % conf('src-url'), + 'ratelimit_class': RATE_GLOBAL + } + +def command_dice(args): + if 'dice' in args['data']: + if args['reply_user'] in conf('enhanced-random-user'): + rnd = 0 # this might confuse users. good. + else: + rnd = random.randint(1, 6) + + dice_char = [u'◇', u'⚀', u'⚁', u'⚂', u'⚃', u'⚄', u'⚅'] + return { + 'msg': 'rolling a dice for %s: %s (%d)' %(args['reply_user'], dice_char[rnd], rnd), + 'ratelimit_class': RATE_INTERACTIVE + } + +def command_uptime(args): + if 'uptime' in args['data']: + u = int(uptime + time.time()) + plural_uptime = 's' + plural_request = 's' + + if 1 == u: plural_uptime = '' + if 1 == request_counter: plural_request = '' + + logger('info', 'sent statistics') + return { + 'msg': args['reply_user'] + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, request_counter, plural_request)), + 'ratelimit_class': RATE_GLOBAL + } + +def command_ping(args): + if 'ping' in args['data']: + rnd = random.randint(0, 3) # 1:4 + if 0 == rnd: + msg = args['reply_user'] + ''': peng (You're dead now.)''' + logger('info', 'sent pong (variant)') + elif 1 == rnd: + msg = args['reply_user'] + ''': I don't like you, leave me alone.''' + logger('info', 'sent pong (dontlike)') + else: + msg = args['reply_user'] + ''': pong''' + logger('info', 'sent pong') + + return { + 'msg': msg, + 'ratelimit_class': RATE_INTERACTIVE + } + +def command_info(args): + if 'info' in args['data']: + logger('info', 'sent long info') + return { + 'msg': args['reply_user'] + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master %s. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'command'.''' %(conf('bot_owner'), hist_max_count, hist_max_time)), + 'ratelimit_class': RATE_GLOBAL + } + +def command_else(args): + logger('info', 'sent short info') + return { + 'msg': args['reply_user'] + ''': I'm a bot (highlight me with 'info' for more information).''', + 'ratelimit_class': RATE_GLOBAL + } + +def parse_commands(data): + words = data.split(' ') + + if 2 > len(words): # need at least two words + return None + + # don't reply if beginning of the text matches bot_user + if words[1][0:len(conf('bot_user'))] != conf('bot_user'): + return None + + if 'hangup' in data: + chat_write('', prefix='/quit') + logger('warn', 'received hangup: ' + data) + return None + + reply_user = get_reply_user(data) + + plugins = ( + { + 'name': 'prints help', + 'func': command_help, + 'param': { + 'data': data, + 'reply_user': reply_user + } + }, + { + 'name': 'prints version', + 'func': command_version, + 'param': { + 'data': data, + 'reply_user': reply_user + } + }, + { + 'name': 'prints an unicode string', + 'func': command_unicode, + 'param': { + 'data': data, + 'reply_user': reply_user + } + }, + { + 'name': 'prints git URL', + 'func': command_source, + 'param': { + 'data': data, + 'reply_user': reply_user + } + }, + { + 'name': 'rolls a dice', + 'func': command_dice, + 'param': { + 'data': data, + 'reply_user': reply_user + } + }, + { + 'name': 'prints uptime', + 'func': command_uptime, + 'param': { + 'data': data, + 'reply_user': reply_user + } + }, + { + 'name': 'pong', + 'func': command_ping, + 'param': { + 'data': data, + 'reply_user': reply_user + } + }, + { + 'name': 'prints info message', + 'func': command_info, + 'param': { + 'data': data, + 'reply_user': reply_user + } + } + ) + + flag = False + + for p in plugins: + ret = p['func'](p['param']) + if None != ret: + flag = True + + if ratelimit_exceeded(ret['ratelimit_class']): + return False + + if 'msg' in ret.keys(): + chat_write(ret['msg']) + + if False != flag: + return None + + ret = command_else({'reply_user': reply_user}) + if None != ret: + if ratelimit_exceeded(RATE_GLOBAL): + return False + + if 'msg' in ret.keys(): + chat_write(ret['msg']) From 8a73625e3c71d3d3fe768b64fa9a801514d0c96e Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 05:32:35 +0200 Subject: [PATCH 053/112] fixed stuff; dynamic plugin reg; debugging infrastructure --- eventlooper.py | 6 +- plugins.py | 300 +++++++++++++++++++++++++++++-------------------- 2 files changed, 182 insertions(+), 124 deletions(-) diff --git a/eventlooper.py b/eventlooper.py index 5602f08..f355288 100755 --- a/eventlooper.py +++ b/eventlooper.py @@ -212,8 +212,8 @@ def parse_delete(filepath): return if True != extract_url(content): - plugins.parse_commands(content) - plugins.parse_other(content) + plugins.data_parse_commands(content) + plugins.data_parse_other(content) return def get_version_git(): @@ -235,6 +235,8 @@ plugins.conf = conf plugins.logger = logger plugins.ratelimit_exceeded = ratelimit_exceeded +plugins.register_all() + if '__main__' == __name__: VERSION = get_version_git() print sys.argv[0] + ' ' + VERSION diff --git a/plugins.py b/plugins.py index baf2ff2..7103b78 100644 --- a/plugins.py +++ b/plugins.py @@ -9,12 +9,23 @@ RATE_GLOBAL = 1 RATE_NO_SILENCE = 2 RATE_INTERACTIVE = 4 +plugins = {} +plugins['parse'] = [] +plugins['command'] = [] + def get_reply_user(data): # FIXME: we can't determine if a user named 'foo> ' just wrote ' > bar' # or a user 'foo' just wrote '> > bar' return data.split(' ')[0].strip('<>') def parse_mental_ill(args): + if 'register' == args: + return { + 'name': 'parse mental illness', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_NO_SILENCE | RATE_GLOBAL + } + min_ill = 3 c = 0 flag = False @@ -31,83 +42,111 @@ def parse_mental_ill(args): if True == flag: return { - 'msg': '''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % args['reply_user'], - 'ratelimit_class': RATE_NO_SILENCE | RATE_GLOBAL + 'msg': '''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % args['reply_user'] } def parse_skynet(args): - if 'skynet' in args['data'].lower(): + if 'register' == args: return { - 'msg': '''I'm an independent bot and have nothing to do with other artificial intelligence systems!''', + 'name': 'parse skynet', + 'args': ('data',), 'ratelimit_class': RATE_GLOBAL } + if 'skynet' in args['data'].lower(): + return { + 'msg': '''I'm an independent bot and have nothing to do with other artificial intelligence systems!''' + } -def parse_other(data): +def data_parse_other(data): reply_user = get_reply_user(data) - plugins = ( - { - 'name': 'parse mental illness', - 'func': parse_mental_ill, - 'param': { - 'data': data, - 'reply_user': reply_user - } - }, - { - 'name': 'parse skynet', - 'func': parse_skynet, - 'param': { - 'data': data - } - } - ) + for p in plugins['parse']: + if ratelimit_exceeded(p['ratelimit_class']): + continue - for p in plugins: - ret = p['func'](p['param']) + args = {} + + if 'args' in p.keys(): + for a in p['args']: + if None == a: continue + + if 'data' == a: + args['data'] = data + elif 'reply_user' == a: + args['reply_user'] = reply_user + else: + logger('warn', 'unknown required arg for %s: %s' %(f, a)) + + ret = p['func'](args) if None != ret: - if ratelimit_exceeded(ret['ratelimit_class']): - return False - if 'msg' in ret.keys(): chat_write(ret['msg']) - return True - def command_help(args): + if 'register' == args: + return { + 'name': 'prints help', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_GLOBAL + } + if 'command' in args['data']: return { - 'msg': args['reply_user'] + (""": known commands: 'command', 'dice', 'info', 'hangup', 'nospoiler', 'ping', 'uptime', 'source', 'version'"""), - 'ratelimit_class': RATE_GLOBAL + 'msg': args['reply_user'] + (""": known commands: 'command', 'dice', 'info', 'hangup', 'nospoiler', 'ping', 'uptime', 'source', 'version'""") } def command_version(args): - if 'version' in args['data']: + if 'register' == args: return { - 'msg': args['reply_user'] + (''': I'm running ''' + VERSION), + 'name': 'prints version', + 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_GLOBAL } + if 'version' in args['data']: + return { + 'msg': args['reply_user'] + (''': I'm running ''' + VERSION) + } + def command_unicode(args): + if 'register' == args: + return { + 'name': 'prints an unicode string', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_GLOBAL + } + if 'unikot' in args['data']: return { 'msg': args['reply_user'] + u''': ┌────────┐''' + '\n' + args['reply_user'] + u''': │Unicode!│''' + '\n' + - args['reply_user'] + u''': └────────┘''', - 'ratelimit_class': RATE_GLOBAL + args['reply_user'] + u''': └────────┘''' } def command_source(args): - if 'source' in args['data']: + if 'register' == args: return { - 'msg': 'My source code can be found at %s' % conf('src-url'), + 'name': 'prints git URL', + 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_GLOBAL } + if 'source' in args['data']: + return { + 'msg': 'My source code can be found at %s' % conf('src-url') + } + def command_dice(args): + if 'register' == args: + return { + 'name': 'rolls a dice', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_INTERACTIVE + } + if 'dice' in args['data']: if args['reply_user'] in conf('enhanced-random-user'): rnd = 0 # this might confuse users. good. @@ -116,11 +155,17 @@ def command_dice(args): dice_char = [u'◇', u'⚀', u'⚁', u'⚂', u'⚃', u'⚄', u'⚅'] return { - 'msg': 'rolling a dice for %s: %s (%d)' %(args['reply_user'], dice_char[rnd], rnd), - 'ratelimit_class': RATE_INTERACTIVE + 'msg': 'rolling a dice for %s: %s (%d)' %(args['reply_user'], dice_char[rnd], rnd) } def command_uptime(args): + if 'register' == args: + return { + 'name': 'prints uptime', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_GLOBAL + } + if 'uptime' in args['data']: u = int(uptime + time.time()) plural_uptime = 's' @@ -131,11 +176,17 @@ def command_uptime(args): logger('info', 'sent statistics') return { - 'msg': args['reply_user'] + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, request_counter, plural_request)), - 'ratelimit_class': RATE_GLOBAL + 'msg': args['reply_user'] + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, request_counter, plural_request)) } def command_ping(args): + if 'register' == args: + return { + 'name': 'pong', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_INTERACTIVE + } + if 'ping' in args['data']: rnd = random.randint(0, 3) # 1:4 if 0 == rnd: @@ -149,26 +200,30 @@ def command_ping(args): logger('info', 'sent pong') return { - 'msg': msg, - 'ratelimit_class': RATE_INTERACTIVE + 'msg': msg } def command_info(args): + if 'register' == args: + return { + 'name': 'prints info message', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_GLOBAL + } + if 'info' in args['data']: logger('info', 'sent long info') return { - 'msg': args['reply_user'] + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master %s. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'command'.''' %(conf('bot_owner'), hist_max_count, hist_max_time)), - 'ratelimit_class': RATE_GLOBAL + 'msg': args['reply_user'] + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master %s. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'command'.''' %(conf('bot_owner'), hist_max_count, hist_max_time)) } def command_else(args): logger('info', 'sent short info') return { - 'msg': args['reply_user'] + ''': I'm a bot (highlight me with 'info' for more information).''', - 'ratelimit_class': RATE_GLOBAL + 'msg': args['reply_user'] + ''': I'm a bot (highlight me with 'info' for more information).''' } -def parse_commands(data): +def data_parse_commands(data): words = data.split(' ') if 2 > len(words): # need at least two words @@ -185,86 +240,32 @@ def parse_commands(data): reply_user = get_reply_user(data) - plugins = ( - { - 'name': 'prints help', - 'func': command_help, - 'param': { - 'data': data, - 'reply_user': reply_user - } - }, - { - 'name': 'prints version', - 'func': command_version, - 'param': { - 'data': data, - 'reply_user': reply_user - } - }, - { - 'name': 'prints an unicode string', - 'func': command_unicode, - 'param': { - 'data': data, - 'reply_user': reply_user - } - }, - { - 'name': 'prints git URL', - 'func': command_source, - 'param': { - 'data': data, - 'reply_user': reply_user - } - }, - { - 'name': 'rolls a dice', - 'func': command_dice, - 'param': { - 'data': data, - 'reply_user': reply_user - } - }, - { - 'name': 'prints uptime', - 'func': command_uptime, - 'param': { - 'data': data, - 'reply_user': reply_user - } - }, - { - 'name': 'pong', - 'func': command_ping, - 'param': { - 'data': data, - 'reply_user': reply_user - } - }, - { - 'name': 'prints info message', - 'func': command_info, - 'param': { - 'data': data, - 'reply_user': reply_user - } - } - ) - flag = False - for p in plugins: - ret = p['func'](p['param']) + for p in plugins['command']: + if ratelimit_exceeded(p['ratelimit_class']): + continue + + args = {} + + if 'args' in p.keys(): + for a in p['args']: + if None == a: continue + + if 'data' == a: + args['data'] = data + elif 'reply_user' == a: + args['reply_user'] = reply_user + else: + logger('warn', 'unknown required arg for %s: %s' %(f, a)) + + ret = p['func'](args) + if None != ret: flag = True - - if ratelimit_exceeded(ret['ratelimit_class']): - return False - if 'msg' in ret.keys(): chat_write(ret['msg']) - + if False != flag: return None @@ -275,3 +276,58 @@ def parse_commands(data): if 'msg' in ret.keys(): chat_write(ret['msg']) + +funcs = {} +funcs['parse'] = (parse_mental_ill, parse_skynet) +funcs['command'] = ( + command_help, command_version, command_unicode, command_source, + command_dice, command_uptime, command_ping, command_info +) + +_dir = dir() + +debug = False +if debug: + def _chat_write(a): _logger('chat_write', a) + def _conf(a): return 'bot' + def _logger(a, b): print 'logger: %s::%s' %(a, b) + def _ratelimit_exceeded(ignored=None): return False + + try: chat_write + except NameError: chat_write = _chat_write + try: conf + except NameError: conf = _conf + try: logger + except NameError: logger = _logger + try: ratelimit_exceeded + except NameError: ratelimit_exceeded = _ratelimit_exceeded + try: random + except NameError: import random + +def register(func_type, auto=False): + plugins[func_type] = [] + + if auto: + # FIXME: this is broken. dir() returns str, but not + # the addr of the functions which we'd need here. + for f in _dir: + print 'testing(%s)' % f + if not f.startswith(func_type + '_'): + continue + + try: + ret = f('register') + ret['func'] = f + plugins[func_type].append(ret) + except Exception as e: + logger('warn', 'auto-registering %s failed: %s' %(f, e)) + + else: + for f in funcs[func_type]: + ret = f('register') + ret['func'] = f + plugins[func_type].append(ret) + +def register_all(): + register('parse') + register('command') From c3ddeaecdcf5861a84301e01cbb68b961ba565b3 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 05:34:49 +0200 Subject: [PATCH 054/112] rename eventlooper.py -> urlbot.py --- test_urlbot.py | 2 +- eventlooper.py => urlbot.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename eventlooper.py => urlbot.py (100%) diff --git a/test_urlbot.py b/test_urlbot.py index cd631f6..6fcd8ff 100644 --- a/test_urlbot.py +++ b/test_urlbot.py @@ -2,7 +2,7 @@ To be executed with nose """ import unittest -from eventlooper import fetch_page +from urlbot import fetch_page class TestEventlooper(unittest.TestCase): diff --git a/eventlooper.py b/urlbot.py similarity index 100% rename from eventlooper.py rename to urlbot.py From 294408c4fe8300de3d5ff5e3c1c5539d754ba4e9 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 05:51:18 +0200 Subject: [PATCH 055/112] ratelimit fix/split up; bot_user fixes --- plugins.py | 15 +++++++++++---- urlbot.py | 15 +++++++++++---- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/plugins.py b/plugins.py index 7103b78..16a68ec 100644 --- a/plugins.py +++ b/plugins.py @@ -5,9 +5,11 @@ if '__main__' == __name__: print '''this is a plugin file, which is not meant to be executed''' exit(-1) -RATE_GLOBAL = 1 -RATE_NO_SILENCE = 2 -RATE_INTERACTIVE = 4 +RATE_GLOBAL = 0x01 +RATE_NO_SILENCE = 0x02 +RATE_INTERACTIVE = 0x04 +RATE_CHAT = 0x08 +RATE_URL = 0x10 plugins = {} plugins['parse'] = [] @@ -82,6 +84,7 @@ def data_parse_other(data): if None != ret: if 'msg' in ret.keys(): + ratelimit_touch(RATE_CHAT) chat_write(ret['msg']) def command_help(args): @@ -230,7 +233,7 @@ def data_parse_commands(data): return None # don't reply if beginning of the text matches bot_user - if words[1][0:len(conf('bot_user'))] != conf('bot_user'): + if not words[1].startswith(conf('bot_user')): return None if 'hangup' in data: @@ -264,6 +267,7 @@ def data_parse_commands(data): if None != ret: flag = True if 'msg' in ret.keys(): + ratelimit_touch(RATE_CHAT) chat_write(ret['msg']) if False != flag: @@ -292,6 +296,7 @@ if debug: def _conf(a): return 'bot' def _logger(a, b): print 'logger: %s::%s' %(a, b) def _ratelimit_exceeded(ignored=None): return False + def _ratelimit_touch(ignored=None): return True try: chat_write except NameError: chat_write = _chat_write @@ -301,6 +306,8 @@ if debug: except NameError: logger = _logger try: ratelimit_exceeded except NameError: ratelimit_exceeded = _ratelimit_exceeded + try: ratelimit_touch + except NameError: ratelimit_touch = _ratelimit_touch try: random except NameError: import random diff --git a/urlbot.py b/urlbot.py index f355288..3d6e78a 100755 --- a/urlbot.py +++ b/urlbot.py @@ -131,12 +131,17 @@ def chat_write(message, prefix='/say '): except IOError: logger('err', "couldn't print to fifo " + fifo_path) -def ratelimit_exceeded(ignored=None): # FIXME: separate counters - global hist_flag - +def ratelimit_touch(ignored=None): # FIXME: separate counters now = time.time() hist_ts.append(now) + if hist_max_count < len(hist_ts): + hist_ts.pop(0) + + +def ratelimit_exceeded(ignored=None): # FIXME: separate counters + global hist_flag + if hist_max_count < len(hist_ts): first = hist_ts.pop(0) if (now - first) < hist_max_time: @@ -155,6 +160,7 @@ def extract_url(data): result = re.findall("(https?://[^\s>]+)", data) if result: for r in result: + ratelimit_touch() if ratelimit_exceeded(): return False @@ -197,7 +203,7 @@ def parse_delete(filepath): fd.close() os.remove(filepath) # probably better crash here - if content[1:1+len(bot_user)] == bot_user: + if content[1:1+len(conf('bot_user'))] == conf('bot_user'): return if 'has set the subject to:' in content: @@ -234,6 +240,7 @@ plugins.chat_write = chat_write plugins.conf = conf plugins.logger = logger plugins.ratelimit_exceeded = ratelimit_exceeded +plugins.ratelimit_touch = ratelimit_touch plugins.register_all() From 013409d6d83dc4b6146b2fde02ba1eacf978f99c Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 05:53:09 +0200 Subject: [PATCH 056/112] scope fixes --- urlbot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/urlbot.py b/urlbot.py index 3d6e78a..69d95cc 100755 --- a/urlbot.py +++ b/urlbot.py @@ -241,6 +241,7 @@ plugins.conf = conf plugins.logger = logger plugins.ratelimit_exceeded = ratelimit_exceeded plugins.ratelimit_touch = ratelimit_touch +plugins.random = random plugins.register_all() From 60285089fe642c6f0b54294892021c977a671068 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 05:56:39 +0200 Subject: [PATCH 057/112] scope fixes; put hist_max_* to conf() --- local_config.py.skel | 3 +++ plugins.py | 2 +- urlbot.py | 10 ++++------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/local_config.py.skel b/local_config.py.skel index df19565..4a3edff 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -10,6 +10,9 @@ config['src-url'] = 'FIXME' config['bot_user'] = 'urlbot' config['bot_owner'] = 'FIXME' +config['hist_max_count'] = 5 +config['hist_max_time'] = 10 * 60 + # the "dice" feature will use more efficient random data (0) for given users config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) diff --git a/plugins.py b/plugins.py index 16a68ec..349b364 100644 --- a/plugins.py +++ b/plugins.py @@ -217,7 +217,7 @@ def command_info(args): if 'info' in args['data']: logger('info', 'sent long info') return { - 'msg': args['reply_user'] + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master %s. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'command'.''' %(conf('bot_owner'), hist_max_count, hist_max_time)) + 'msg': args['reply_user'] + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master %s. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'command'.''' %(conf('bot_owner'), conf('hist_max_count'), conf('hist_max_time'))) } def command_else(args): diff --git a/urlbot.py b/urlbot.py index 69d95cc..3a1c579 100755 --- a/urlbot.py +++ b/urlbot.py @@ -14,8 +14,6 @@ event_files_dir = os.path.join(basedir, 'event_files') fifo_path = os.path.join(basedir, 'cmdfifo') # rate limiting to 5 messages per 10 minutes -hist_max_count = 5 -hist_max_time = 10 * 60 hist_ts = [] hist_flag = True uptime = -time.time() @@ -135,19 +133,19 @@ def ratelimit_touch(ignored=None): # FIXME: separate counters now = time.time() hist_ts.append(now) - if hist_max_count < len(hist_ts): + if conf('hist_max_count') < len(hist_ts): hist_ts.pop(0) def ratelimit_exceeded(ignored=None): # FIXME: separate counters global hist_flag - if hist_max_count < len(hist_ts): + if conf('hist_max_count') < len(hist_ts): first = hist_ts.pop(0) - if (now - first) < hist_max_time: + if (now - first) < conf('hist_max_time'): if hist_flag: hist_flag = False - chat_write('(rate limited to %d messages in %d seconds, try again at %s)' %(hist_max_count, hist_max_time, time.strftime('%T %Z', time.localtime(hist_ts[0] + hist_max_time)))) + chat_write('(rate limited to %d messages in %d seconds, try again at %s)' %(conf('hist_max_count'), conf('hist_max_time'), time.strftime('%T %Z', time.localtime(hist_ts[0] + conf('hist_max_time'))))) logger('warn', 'rate limiting exceeded: ' + pickle.dumps(hist_ts)) return True From 2714858b1e68cf3e5d76814e9627d551577f8caf Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 05:59:35 +0200 Subject: [PATCH 058/112] scope fixes; put uptime to conf() --- local_config.py.skel | 4 ++++ plugins.py | 2 +- urlbot.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/local_config.py.skel b/local_config.py.skel index 4a3edff..a1f3e2d 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -1,5 +1,7 @@ #!/usr/bin/python +import time + if '__main__' == __name__: print '''this is a config file, which is not meant to be executed''' exit(-1) @@ -13,6 +15,8 @@ config['bot_owner'] = 'FIXME' config['hist_max_count'] = 5 config['hist_max_time'] = 10 * 60 +config['uptime'] = -time.time() + # the "dice" feature will use more efficient random data (0) for given users config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) diff --git a/plugins.py b/plugins.py index 349b364..1207d5b 100644 --- a/plugins.py +++ b/plugins.py @@ -170,7 +170,7 @@ def command_uptime(args): } if 'uptime' in args['data']: - u = int(uptime + time.time()) + u = int(conf('uptime') + time.time()) plural_uptime = 's' plural_request = 's' diff --git a/urlbot.py b/urlbot.py index 3a1c579..01fad42 100755 --- a/urlbot.py +++ b/urlbot.py @@ -16,7 +16,6 @@ fifo_path = os.path.join(basedir, 'cmdfifo') # rate limiting to 5 messages per 10 minutes hist_ts = [] hist_flag = True -uptime = -time.time() request_counter = 0 parser = None @@ -240,6 +239,7 @@ plugins.logger = logger plugins.ratelimit_exceeded = ratelimit_exceeded plugins.ratelimit_touch = ratelimit_touch plugins.random = random +plugins.time = time plugins.register_all() From c6d7e36357adcb40993a1bef6ee3c6da02860cf7 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 06:03:04 +0200 Subject: [PATCH 059/112] scope fixes: request_counter --- urlbot.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/urlbot.py b/urlbot.py index 01fad42..8dc8c4e 100755 --- a/urlbot.py +++ b/urlbot.py @@ -233,14 +233,18 @@ def get_version_git(): return "(unknown version)" import plugins + plugins.chat_write = chat_write plugins.conf = conf plugins.logger = logger plugins.ratelimit_exceeded = ratelimit_exceeded plugins.ratelimit_touch = ratelimit_touch + plugins.random = random plugins.time = time +plugins.request_counter = request_counter + plugins.register_all() if '__main__' == __name__: From 7c9e5e154b3d6e6138fb315e3c83f3772addaf68 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 06:07:44 +0200 Subject: [PATCH 060/112] scope fixes; put request_counter to conf() --- local_config.py.skel | 6 ++++++ plugins.py | 4 ++-- urlbot.py | 8 ++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/local_config.py.skel b/local_config.py.skel index a1f3e2d..2451593 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -16,6 +16,8 @@ config['hist_max_count'] = 5 config['hist_max_time'] = 10 * 60 config['uptime'] = -time.time() +config['request_counter'] = 0 + # the "dice" feature will use more efficient random data (0) for given users config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) @@ -24,3 +26,7 @@ def conf(val): if val in config.keys(): return config[val] return None + +def set_conf(key, val): + config[key] = val + return None diff --git a/plugins.py b/plugins.py index 1207d5b..4196e02 100644 --- a/plugins.py +++ b/plugins.py @@ -175,11 +175,11 @@ def command_uptime(args): plural_request = 's' if 1 == u: plural_uptime = '' - if 1 == request_counter: plural_request = '' + if 1 == conf('request_counter'): plural_request = '' logger('info', 'sent statistics') return { - 'msg': args['reply_user'] + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, request_counter, plural_request)) + 'msg': args['reply_user'] + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, conf('request_counter'), plural_request)) } def command_ping(args): diff --git a/urlbot.py b/urlbot.py index 8dc8c4e..61fbd70 100755 --- a/urlbot.py +++ b/urlbot.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- import sys, os, re, time, urllib, pickle, random, HTMLParser, stat -from local_config import conf +from local_config import conf, set_conf BUFSIZ = 8192 delay = 0.100 # seconds @@ -16,7 +16,6 @@ fifo_path = os.path.join(basedir, 'cmdfifo') # rate limiting to 5 messages per 10 minutes hist_ts = [] hist_flag = True -request_counter = 0 parser = None @@ -106,8 +105,7 @@ def extract_title(url): return (-1, 'error') def chat_write(message, prefix='/say '): - global request_counter - request_counter += 1 + set_conf('request_counter', conf('request_counter') + 1) if debug_enabled(): print message @@ -243,8 +241,6 @@ plugins.ratelimit_touch = ratelimit_touch plugins.random = random plugins.time = time -plugins.request_counter = request_counter - plugins.register_all() if '__main__' == __name__: From f411e1be4dac16036edfd1e48da3ae768fdc9779 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 06:12:34 +0200 Subject: [PATCH 061/112] scope fixes; moved VERSION to conf() --- local_config.py.skel | 2 ++ plugins.py | 2 +- urlbot.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/local_config.py.skel b/local_config.py.skel index 2451593..670b7e1 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -18,6 +18,8 @@ config['hist_max_time'] = 10 * 60 config['uptime'] = -time.time() config['request_counter'] = 0 +config['version'] = None + # the "dice" feature will use more efficient random data (0) for given users config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) diff --git a/plugins.py b/plugins.py index 4196e02..7ea860f 100644 --- a/plugins.py +++ b/plugins.py @@ -110,7 +110,7 @@ def command_version(args): if 'version' in args['data']: return { - 'msg': args['reply_user'] + (''': I'm running ''' + VERSION) + 'msg': args['reply_user'] + (''': I'm running ''' + conf('version')) } def command_unicode(args): diff --git a/urlbot.py b/urlbot.py index 61fbd70..d63e373 100755 --- a/urlbot.py +++ b/urlbot.py @@ -244,8 +244,8 @@ plugins.time = time plugins.register_all() if '__main__' == __name__: - VERSION = get_version_git() - print sys.argv[0] + ' ' + VERSION + set_conf('version', get_version_git()) + print sys.argv[0] + ' ' + conf('version') if not os.path.exists(fifo_path): logger('error', 'fifo_path "%s" does not exist, exiting' % fifo_path) From 6563addb1bbcb292688b74461aec655efcae3f3f Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 08:43:33 +0200 Subject: [PATCH 062/112] ratelimit_*(): fixes after splitup --- urlbot.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/urlbot.py b/urlbot.py index d63e373..7f9b85f 100755 --- a/urlbot.py +++ b/urlbot.py @@ -127,8 +127,7 @@ def chat_write(message, prefix='/say '): logger('err', "couldn't print to fifo " + fifo_path) def ratelimit_touch(ignored=None): # FIXME: separate counters - now = time.time() - hist_ts.append(now) + hist_ts.append(time.time()) if conf('hist_max_count') < len(hist_ts): hist_ts.pop(0) @@ -139,7 +138,7 @@ def ratelimit_exceeded(ignored=None): # FIXME: separate counters if conf('hist_max_count') < len(hist_ts): first = hist_ts.pop(0) - if (now - first) < conf('hist_max_time'): + if (time.time() - first) < conf('hist_max_time'): if hist_flag: hist_flag = False chat_write('(rate limited to %d messages in %d seconds, try again at %s)' %(conf('hist_max_count'), conf('hist_max_time'), time.strftime('%T %Z', time.localtime(hist_ts[0] + conf('hist_max_time'))))) From aae684170d6c32e29e87dfb04614372d01fb8f71 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 09:19:46 +0200 Subject: [PATCH 063/112] moved common stuff to common.py; import adjustments --- common.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ plugins.py | 16 ++++------------ urlbot.py | 38 ++------------------------------------ 3 files changed, 50 insertions(+), 48 deletions(-) create mode 100644 common.py diff --git a/common.py b/common.py new file mode 100644 index 0000000..8cb5335 --- /dev/null +++ b/common.py @@ -0,0 +1,44 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +if '__main__' == __name__: + print '''this is a library file, which is not meant to be executed''' + exit(-1) + +import sys, os, time + +RATE_GLOBAL = 0x01 +RATE_NO_SILENCE = 0x02 +RATE_INTERACTIVE = 0x04 +RATE_CHAT = 0x08 +RATE_URL = 0x10 + +BUFSIZ = 8192 +delay = 0.100 # seconds + +basedir = '.' +if 2 == len(sys.argv): basedir = sys.argv[1] + +event_files_dir = os.path.join(basedir, 'event_files') +fifo_path = os.path.join(basedir, 'cmdfifo') + +def debug_enabled(): +# return True + return False + +def e(data): + if data: + if unicode == type(data): + return data.encode('utf8') + elif str == type(data): + return data.encode('string-escape') + else: + return data + else: + return "''" + +def logger(severity, message): +# sev = ( 'err', 'warn', 'info' ) +# if severity in sev: + args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message) + sys.stderr.write(e('%s %s %s: %s' % args) + '\n') diff --git a/plugins.py b/plugins.py index 7ea860f..af6bc6a 100644 --- a/plugins.py +++ b/plugins.py @@ -5,11 +5,9 @@ if '__main__' == __name__: print '''this is a plugin file, which is not meant to be executed''' exit(-1) -RATE_GLOBAL = 0x01 -RATE_NO_SILENCE = 0x02 -RATE_INTERACTIVE = 0x04 -RATE_CHAT = 0x08 -RATE_URL = 0x10 +import time, random +from local_config import conf +from common import * plugins = {} plugins['parse'] = [] @@ -290,11 +288,9 @@ funcs['command'] = ( _dir = dir() -debug = False -if debug: +if debug_enabled(): def _chat_write(a): _logger('chat_write', a) def _conf(a): return 'bot' - def _logger(a, b): print 'logger: %s::%s' %(a, b) def _ratelimit_exceeded(ignored=None): return False def _ratelimit_touch(ignored=None): return True @@ -302,14 +298,10 @@ if debug: except NameError: chat_write = _chat_write try: conf except NameError: conf = _conf - try: logger - except NameError: logger = _logger try: ratelimit_exceeded except NameError: ratelimit_exceeded = _ratelimit_exceeded try: ratelimit_touch except NameError: ratelimit_touch = _ratelimit_touch - try: random - except NameError: import random def register(func_type, auto=False): plugins[func_type] = [] diff --git a/urlbot.py b/urlbot.py index 7f9b85f..b397dd7 100755 --- a/urlbot.py +++ b/urlbot.py @@ -1,17 +1,9 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -import sys, os, re, time, urllib, pickle, random, HTMLParser, stat +import sys, os, re, time, urllib, pickle, HTMLParser, stat from local_config import conf, set_conf - -BUFSIZ = 8192 -delay = 0.100 # seconds - -basedir = '.' -if 2 == len(sys.argv): basedir = sys.argv[1] - -event_files_dir = os.path.join(basedir, 'event_files') -fifo_path = os.path.join(basedir, 'cmdfifo') +from common import * # rate limiting to 5 messages per 10 minutes hist_ts = [] @@ -19,27 +11,6 @@ hist_flag = True parser = None -def debug_enabled(): -# return True - return False - -def e(data): - if data: - if unicode == type(data): - return data.encode('utf8') - elif str == type(data): - return data.encode('string-escape') - else: - return data - else: - return "''" - -def logger(severity, message): -# sev = ( 'err', 'warn', 'info' ) -# if severity in sev: - args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message) - sys.stderr.write(e('%s %s %s: %s' % args) + '\n') - class urllib_user_agent_wrapper(urllib.FancyURLopener): version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''' @@ -232,14 +203,9 @@ def get_version_git(): import plugins plugins.chat_write = chat_write -plugins.conf = conf -plugins.logger = logger plugins.ratelimit_exceeded = ratelimit_exceeded plugins.ratelimit_touch = ratelimit_touch -plugins.random = random -plugins.time = time - plugins.register_all() if '__main__' == __name__: From e721c328f7492a0f8fc528189b2aae277f5711aa Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 09:41:29 +0200 Subject: [PATCH 064/112] moved VERSION stuff to common.py --- common.py | 15 +++++++++++++++ local_config.py.skel | 3 --- plugins.py | 2 +- urlbot.py | 16 +--------------- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/common.py b/common.py index 8cb5335..42d5537 100644 --- a/common.py +++ b/common.py @@ -42,3 +42,18 @@ def logger(severity, message): # if severity in sev: args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message) sys.stderr.write(e('%s %s %s: %s' % args) + '\n') + +def get_version_git(): + import subprocess + + cmd = ['git', 'log', '-n', '1', '--oneline', '--abbrev-commit'] + + p = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE) + first_line = p.stdout.readline() + + if 0 == p.wait(): + return "version (Git) '%s'" % e(first_line.strip()) + else: + return "(unknown version)" + +VERSION = get_version_git() diff --git a/local_config.py.skel b/local_config.py.skel index 670b7e1..1e8791c 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -18,9 +18,6 @@ config['hist_max_time'] = 10 * 60 config['uptime'] = -time.time() config['request_counter'] = 0 -config['version'] = None - - # the "dice" feature will use more efficient random data (0) for given users config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) diff --git a/plugins.py b/plugins.py index af6bc6a..fba4640 100644 --- a/plugins.py +++ b/plugins.py @@ -108,7 +108,7 @@ def command_version(args): if 'version' in args['data']: return { - 'msg': args['reply_user'] + (''': I'm running ''' + conf('version')) + 'msg': args['reply_user'] + (''': I'm running ''' + VERSION) } def command_unicode(args): diff --git a/urlbot.py b/urlbot.py index b397dd7..9825a16 100755 --- a/urlbot.py +++ b/urlbot.py @@ -187,19 +187,6 @@ def parse_delete(filepath): plugins.data_parse_other(content) return -def get_version_git(): - import subprocess - - cmd = ['git', 'log', '-n', '1', '--oneline', '--abbrev-commit'] - - p = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE) - first_line = p.stdout.readline() - - if 0 == p.wait(): - return "version (Git) '%s'" % e(first_line.strip()) - else: - return "(unknown version)" - import plugins plugins.chat_write = chat_write @@ -209,8 +196,7 @@ plugins.ratelimit_touch = ratelimit_touch plugins.register_all() if '__main__' == __name__: - set_conf('version', get_version_git()) - print sys.argv[0] + ' ' + conf('version') + print sys.argv[0] + ' ' + VERSION if not os.path.exists(fifo_path): logger('error', 'fifo_path "%s" does not exist, exiting' % fifo_path) From 951054aff5da8d33fa1376209d56f1528564bd4d Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 16:06:26 +0200 Subject: [PATCH 065/112] add modul. "help"; prep "save"; add {desc} --- common.py | 12 +++++- local_config.py.skel | 9 ++++- plugins.py | 87 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 86 insertions(+), 22 deletions(-) diff --git a/common.py b/common.py index 42d5537..c94932f 100644 --- a/common.py +++ b/common.py @@ -5,7 +5,8 @@ if '__main__' == __name__: print '''this is a library file, which is not meant to be executed''' exit(-1) -import sys, os, time +import sys, os, time, pickle +from local_config import conf RATE_GLOBAL = 0x01 RATE_NO_SILENCE = 0x02 @@ -43,6 +44,15 @@ def logger(severity, message): args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message) sys.stderr.write(e('%s %s %s: %s' % args) + '\n') +def conf_save(obj): + with open(conf('persistent_storage'), 'wb') as fd: + return pickle.dump(obj, fd) + +def conf_load(): + with open(conf('persistent_storage'), 'rb') as fd: + fd.seek(0) + return pickle.load(fd) + def get_version_git(): import subprocess diff --git a/local_config.py.skel b/local_config.py.skel index 1e8791c..1208031 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -1,6 +1,10 @@ #!/usr/bin/python -import time +import time, sys + +def _logger(a, b): sys.stderr.write('logger: %s::%s\n' %(a, b)) +try: logger +except NameError: logger = _logger if '__main__' == __name__: print '''this is a config file, which is not meant to be executed''' @@ -18,12 +22,15 @@ config['hist_max_time'] = 10 * 60 config['uptime'] = -time.time() config['request_counter'] = 0 +config['persistent_storage'] = 'urlbot.persistent' + # the "dice" feature will use more efficient random data (0) for given users config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) def conf(val): if val in config.keys(): return config[val] + logger('warn', 'conf(): unknown key ' + str(val)) return None def set_conf(key, val): diff --git a/plugins.py b/plugins.py index fba4640..930652b 100644 --- a/plugins.py +++ b/plugins.py @@ -85,23 +85,63 @@ def data_parse_other(data): ratelimit_touch(RATE_CHAT) chat_write(ret['msg']) -def command_help(args): +def command_command(args): if 'register' == args: return { - 'name': 'prints help', - 'args': ('data', 'reply_user'), + 'name': 'command', + 'desc': 'lists commands', + 'args': ('data', 'reply_user', 'cmd_list'), 'ratelimit_class': RATE_GLOBAL } if 'command' in args['data']: return { - 'msg': args['reply_user'] + (""": known commands: 'command', 'dice', 'info', 'hangup', 'nospoiler', 'ping', 'uptime', 'source', 'version'""") + 'msg': args['reply_user'] + ': known commands: ' + str(args['cmd_list']).strip('[]') } +def command_help(args): + if 'register' == args: + return { + 'name': 'help', + 'desc': 'print help for a command', + 'args': ('data', 'reply_user', 'cmd_list'), + 'ratelimit_class': RATE_GLOBAL + } + + + cmd = None + flag = False + + for word in args['data'].split(): + if True == flag: + cmd = word + break + + if 'help' == word: + flag = True + + if None == cmd: + return { + 'msg': args['reply_user'] + ': no command given' + } + + if not cmd in [p['name'] for p in plugins['command']]: + return { + 'msg': args['reply_user'] + ': no such command: %s' % cmd + } + + for p in plugins['command']: + if cmd == p['name']: + return { + 'msg': args['reply_user'] + ': help for %s: %s' %(cmd, p['desc']) + } + + def command_version(args): if 'register' == args: return { - 'name': 'prints version', + 'name': 'version', + 'desc': 'prints version', 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_GLOBAL } @@ -114,7 +154,8 @@ def command_version(args): def command_unicode(args): if 'register' == args: return { - 'name': 'prints an unicode string', + 'name': 'unikot', + 'desc': 'prints an unicode string', 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_GLOBAL } @@ -130,7 +171,8 @@ def command_unicode(args): def command_source(args): if 'register' == args: return { - 'name': 'prints git URL', + 'name': 'source', + 'desc': 'prints git URL', 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_GLOBAL } @@ -143,7 +185,8 @@ def command_source(args): def command_dice(args): if 'register' == args: return { - 'name': 'rolls a dice', + 'name': 'dice', + 'desc': 'rolls a dice', 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_INTERACTIVE } @@ -162,7 +205,8 @@ def command_dice(args): def command_uptime(args): if 'register' == args: return { - 'name': 'prints uptime', + 'name': 'uptime', + 'desc': 'prints uptime', 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_GLOBAL } @@ -183,7 +227,8 @@ def command_uptime(args): def command_ping(args): if 'register' == args: return { - 'name': 'pong', + 'name': 'ping', + 'desc': 'sends pong', 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_INTERACTIVE } @@ -207,7 +252,8 @@ def command_ping(args): def command_info(args): if 'register' == args: return { - 'name': 'prints info message', + 'name': 'info', + 'desc': 'prints info message', 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_GLOBAL } @@ -241,8 +287,6 @@ def data_parse_commands(data): reply_user = get_reply_user(data) - flag = False - for p in plugins['command']: if ratelimit_exceeded(p['ratelimit_class']): continue @@ -255,6 +299,10 @@ def data_parse_commands(data): if 'data' == a: args['data'] = data + elif 'cmd_list' == a: + cmds = [c['name'] for c in plugins['command']] + cmds.sort() + args['cmd_list'] = cmds elif 'reply_user' == a: args['reply_user'] = reply_user else: @@ -263,13 +311,10 @@ def data_parse_commands(data): ret = p['func'](args) if None != ret: - flag = True if 'msg' in ret.keys(): ratelimit_touch(RATE_CHAT) chat_write(ret['msg']) - - if False != flag: - return None + return None ret = command_else({'reply_user': reply_user}) if None != ret: @@ -282,14 +327,14 @@ def data_parse_commands(data): funcs = {} funcs['parse'] = (parse_mental_ill, parse_skynet) funcs['command'] = ( - command_help, command_version, command_unicode, command_source, - command_dice, command_uptime, command_ping, command_info + command_command, command_help, command_version, command_unicode, + command_source, command_dice, command_uptime, command_ping, command_info ) _dir = dir() if debug_enabled(): - def _chat_write(a): _logger('chat_write', a) + def _chat_write(a): logger('chat_write', a) def _conf(a): return 'bot' def _ratelimit_exceeded(ignored=None): return False def _ratelimit_touch(ignored=None): return True @@ -303,6 +348,8 @@ if debug_enabled(): try: ratelimit_touch except NameError: ratelimit_touch = _ratelimit_touch + logger('info', 'debugging enabled') + def register(func_type, auto=False): plugins[func_type] = [] From c9f8237f25474c330c43c7ec17fd340b00999fcd Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 16:10:00 +0200 Subject: [PATCH 066/112] gitignore urlbot.persistent --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b452778..d150401 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ cmdfifo logs/ event_files/ +urlbot.persistent From afd79c49a6446793dcdad8929dc32761f0f98c0e Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 16:15:01 +0200 Subject: [PATCH 067/112] fix "help" globbing --- plugins.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plugins.py b/plugins.py index 930652b..f12a726 100644 --- a/plugins.py +++ b/plugins.py @@ -120,6 +120,9 @@ def command_help(args): if 'help' == word: flag = True + if False == flag: # no match on 'help' + return None + if None == cmd: return { 'msg': args['reply_user'] + ': no command given' From ead248e7fafa0cd0668d9e3bfa8228abd7c397fa Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 16:20:34 +0200 Subject: [PATCH 068/112] fix for multi-line output, e.g. "unikot" --- plugins.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/plugins.py b/plugins.py index f12a726..955b1f8 100644 --- a/plugins.py +++ b/plugins.py @@ -166,9 +166,11 @@ def command_unicode(args): if 'unikot' in args['data']: return { 'msg': - args['reply_user'] + u''': ┌────────┐''' + '\n' + - args['reply_user'] + u''': │Unicode!│''' + '\n' + - args['reply_user'] + u''': └────────┘''' + ( + args['reply_user'] + u''': ┌────────┐''', + args['reply_user'] + u''': │Unicode!│''', + args['reply_user'] + u''': └────────┘''' + ) } def command_source(args): @@ -315,8 +317,14 @@ def data_parse_commands(data): if None != ret: if 'msg' in ret.keys(): - ratelimit_touch(RATE_CHAT) - chat_write(ret['msg']) + if str == type(ret['msg']): + ratelimit_touch(RATE_CHAT) + chat_write(ret['msg']) + else: + for line in ret['msg']: + ratelimit_touch(RATE_CHAT) + chat_write(line) + return None ret = command_else({'reply_user': reply_user}) From ab6382801961f4e62dba54d195c03e3ba290a80e Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 27 Sep 2014 16:50:19 +0200 Subject: [PATCH 069/112] fix multi-line plugin output --- plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins.py b/plugins.py index 955b1f8..bb4744b 100644 --- a/plugins.py +++ b/plugins.py @@ -317,7 +317,7 @@ def data_parse_commands(data): if None != ret: if 'msg' in ret.keys(): - if str == type(ret['msg']): + if str == type(ret['msg']) or unicode == type(ret['msg']): ratelimit_touch(RATE_CHAT) chat_write(ret['msg']) else: From acc5242de0847f7cbc870aae5053999845b260d0 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sun, 28 Sep 2014 18:03:08 +0200 Subject: [PATCH 070/112] Python3 migration; trashed e(); html->html_text rename --- common.py | 19 +++----------- local_config.py.skel | 6 ++--- plugins.py | 32 ++++++++++++++---------- urlbot.py | 59 ++++++++++++++++++++++---------------------- 4 files changed, 56 insertions(+), 60 deletions(-) diff --git a/common.py b/common.py index c94932f..c961257 100644 --- a/common.py +++ b/common.py @@ -1,8 +1,8 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: utf-8 -*- if '__main__' == __name__: - print '''this is a library file, which is not meant to be executed''' + print('''this is a library file, which is not meant to be executed''') exit(-1) import sys, os, time, pickle @@ -27,22 +27,11 @@ def debug_enabled(): # return True return False -def e(data): - if data: - if unicode == type(data): - return data.encode('utf8') - elif str == type(data): - return data.encode('string-escape') - else: - return data - else: - return "''" - def logger(severity, message): # sev = ( 'err', 'warn', 'info' ) # if severity in sev: args = (sys.argv[0], time.strftime('%Y-%m-%d.%H:%M:%S'), severity, message) - sys.stderr.write(e('%s %s %s: %s' % args) + '\n') + sys.stderr.write('%s %s %s: %s\n' % args) def conf_save(obj): with open(conf('persistent_storage'), 'wb') as fd: @@ -62,7 +51,7 @@ def get_version_git(): first_line = p.stdout.readline() if 0 == p.wait(): - return "version (Git) '%s'" % e(first_line.strip()) + return "version (Git) '%s'" % str(first_line.strip()) else: return "(unknown version)" diff --git a/local_config.py.skel b/local_config.py.skel index 1208031..dbb806a 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import time, sys @@ -7,7 +7,7 @@ try: logger except NameError: logger = _logger if '__main__' == __name__: - print '''this is a config file, which is not meant to be executed''' + print('''this is a config file, which is not meant to be executed''') exit(-1) config = {} @@ -28,7 +28,7 @@ config['persistent_storage'] = 'urlbot.persistent' config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) def conf(val): - if val in config.keys(): + if val in list(config.keys()): return config[val] logger('warn', 'conf(): unknown key ' + str(val)) return None diff --git a/plugins.py b/plugins.py index bb4744b..35db5a4 100644 --- a/plugins.py +++ b/plugins.py @@ -1,8 +1,8 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: utf-8 -*- if '__main__' == __name__: - print '''this is a plugin file, which is not meant to be executed''' + print('''this is a plugin file, which is not meant to be executed''') exit(-1) import time, random @@ -67,7 +67,7 @@ def data_parse_other(data): args = {} - if 'args' in p.keys(): + if 'args' in list(p.keys()): for a in p['args']: if None == a: continue @@ -81,7 +81,7 @@ def data_parse_other(data): ret = p['func'](args) if None != ret: - if 'msg' in ret.keys(): + if 'msg' in list(ret.keys()): ratelimit_touch(RATE_CHAT) chat_write(ret['msg']) @@ -167,9 +167,9 @@ def command_unicode(args): return { 'msg': ( - args['reply_user'] + u''': ┌────────┐''', - args['reply_user'] + u''': │Unicode!│''', - args['reply_user'] + u''': └────────┘''' + args['reply_user'] + ''': ┌────────┐''', + args['reply_user'] + ''': │Unicode!│''', + args['reply_user'] + ''': └────────┘''' ) } @@ -202,7 +202,7 @@ def command_dice(args): else: rnd = random.randint(1, 6) - dice_char = [u'◇', u'⚀', u'⚁', u'⚂', u'⚃', u'⚄', u'⚅'] + dice_char = ['◇', '⚀', '⚁', '⚂', '⚃', '⚄', '⚅'] return { 'msg': 'rolling a dice for %s: %s (%d)' %(args['reply_user'], dice_char[rnd], rnd) } @@ -298,7 +298,7 @@ def data_parse_commands(data): args = {} - if 'args' in p.keys(): + if 'args' in list(p.keys()): for a in p['args']: if None == a: continue @@ -316,13 +316,19 @@ def data_parse_commands(data): ret = p['func'](args) if None != ret: - if 'msg' in ret.keys(): - if str == type(ret['msg']) or unicode == type(ret['msg']): + if 'msg' in list(ret.keys()): + if str == type(ret['msg']): # FIXME 2to3 ratelimit_touch(RATE_CHAT) + if ratelimit_exceeded(RATE_CHAT): + return False + chat_write(ret['msg']) else: for line in ret['msg']: ratelimit_touch(RATE_CHAT) + if ratelimit_exceeded(RATE_CHAT): + return False + chat_write(line) return None @@ -332,7 +338,7 @@ def data_parse_commands(data): if ratelimit_exceeded(RATE_GLOBAL): return False - if 'msg' in ret.keys(): + if 'msg' in list(ret.keys()): chat_write(ret['msg']) funcs = {} @@ -368,7 +374,7 @@ def register(func_type, auto=False): # FIXME: this is broken. dir() returns str, but not # the addr of the functions which we'd need here. for f in _dir: - print 'testing(%s)' % f + print('testing(%s)' % f) if not f.startswith(func_type + '_'): continue diff --git a/urlbot.py b/urlbot.py index 9825a16..260836f 100755 --- a/urlbot.py +++ b/urlbot.py @@ -1,7 +1,8 @@ -#!/usr/bin/python +#!/usr/bin/python3 # -*- coding: utf-8 -*- -import sys, os, re, time, urllib, pickle, HTMLParser, stat +import sys, os, stat, re, time, pickle +import urllib.request, urllib.parse, urllib.error, html.parser from local_config import conf, set_conf from common import * @@ -11,17 +12,17 @@ hist_flag = True parser = None -class urllib_user_agent_wrapper(urllib.FancyURLopener): +class urllib_user_agent_wrapper(urllib.request.FancyURLopener): version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''' def fetch_page(url): logger('info', 'fetching page ' + url) try: - urllib._urlopener = urllib_user_agent_wrapper() - response = urllib.urlopen(url) - html = response.read(BUFSIZ) # ignore more than BUFSIZ + urllib.request._urlopener = urllib_user_agent_wrapper() + response = urllib.request.urlopen(url) + html_text = response.read(BUFSIZ) # ignore more than BUFSIZ response.close() - return (html, response.headers) + return (html_text, response.headers) except IOError as e: logger('warn', 'failed: ' + e.errno) @@ -36,8 +37,8 @@ def extract_title(url): logger('info', 'extracting title from ' + url) - (html, headers) = fetch_page(url) - if html: + (html_text, headers) = fetch_page(url) + if html_text: charset = '' if 'content-type' in headers: logger('debug', 'content-type: ' + headers['content-type']) @@ -48,21 +49,21 @@ def extract_title(url): charset = re.sub('.*charset=(?P<charset>\S+).*', '\g<charset>', headers['content-type'], re.IGNORECASE) - result = re.match(r'.*?<title.*?>(.*?).*?', html, re.S | re.M | re.IGNORECASE) + if '' != charset: + try: + html_text = html_text.decode(charset) + except LookupError: + logger('warn', 'invalid charset in ' + headers['content-type']) + + if str != type(html_text): + html_text = str(html_text) + + result = re.match(r'.*?(.*?).*?', html_text, re.S | re.M | re.IGNORECASE) if result: match = result.groups()[0] -# if 'charset=UTF-8' in headers['content-type']: -# match = unicode(match) - if None == parser: - parser = HTMLParser.HTMLParser() - - if '' != charset: - try: - match = match.decode(charset) - except LookupError: - logger('warn', 'invalid charset in ' + headers['content-type']) + parser = html.parser.HTMLParser() try: expanded_html = parser.unescape(match) @@ -79,15 +80,15 @@ def chat_write(message, prefix='/say '): set_conf('request_counter', conf('request_counter') + 1) if debug_enabled(): - print message + print(message) else: try: fd = open(fifo_path, 'wb') - +# FIXME 2to3 # FIXME: somehow, unicode chars can end up inside a message, # which seems to make both unicode() and ''.encode('utf8') fail. try: - msg = unicode(prefix) + unicode(message) + '\n' + msg = str(prefix) + str(message) + '\n' msg = msg.encode('utf8') except UnicodeDecodeError: msg = prefix + message + '\n' @@ -132,16 +133,16 @@ def extract_url(data): (status, title) = extract_title(r) if 0 == status: - message = 'Title: %s: %s' % (title.strip(), e(r)) + message = 'Title: %s: %s' % (title.strip(), r) elif 1 == status: logger('info', 'no message sent for non-text %s (%s)' %(r, title)) continue elif 2 == status: - message = 'No title: %s' % (e(r)) + message = 'No title: %s' % r elif 3 == status: message = title else: - message = 'some error occurred when fetching %s' % e(r) + message = 'some error occurred when fetching %s' % r message = message.replace('\n', '\\n') @@ -159,7 +160,7 @@ def parse_pn(data): def parse_delete(filepath): try: - fd = open(filepath, 'rb') + fd = open(filepath, 'r') except IOError: logger('err', 'file has vanished: ' + filepath) return False @@ -196,7 +197,7 @@ plugins.ratelimit_touch = ratelimit_touch plugins.register_all() if '__main__' == __name__: - print sys.argv[0] + ' ' + VERSION + print(sys.argv[0] + ' ' + VERSION) if not os.path.exists(fifo_path): logger('error', 'fifo_path "%s" does not exist, exiting' % fifo_path) @@ -214,5 +215,5 @@ if '__main__' == __name__: time.sleep(delay) except KeyboardInterrupt: - print "" + print("") exit(130) From af9e195011d2af07bfaca1868458f0dba953d013 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 28 Sep 2014 18:11:19 +0200 Subject: [PATCH 071/112] asuming UTF-8 as git-log(1) output --- common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.py b/common.py index c961257..59d7754 100644 --- a/common.py +++ b/common.py @@ -51,7 +51,7 @@ def get_version_git(): first_line = p.stdout.readline() if 0 == p.wait(): - return "version (Git) '%s'" % str(first_line.strip()) + return "version (Git) '%s'" % str(first_line.strip(), encoding='utf8') else: return "(unknown version)" From eba70a5ed08fa4978d9c8becc6d10d5522612ff3 Mon Sep 17 00:00:00 2001 From: urlbot Date: Sun, 28 Sep 2014 22:44:42 +0200 Subject: [PATCH 072/112] plugin debugging enabled --- plugins.py | 23 +++++++++++++++++------ urlbot.py | 2 +- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/plugins.py b/plugins.py index 35db5a4..e46efcd 100644 --- a/plugins.py +++ b/plugins.py @@ -41,6 +41,7 @@ def parse_mental_ill(args): break if True == flag: + logger('plugin', 'sent mental illness reply') return { 'msg': '''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % args['reply_user'] } @@ -54,6 +55,7 @@ def parse_skynet(args): } if 'skynet' in args['data'].lower(): + logger('plugin', 'sent skynet reply') return { 'msg': '''I'm an independent bot and have nothing to do with other artificial intelligence systems!''' } @@ -95,6 +97,7 @@ def command_command(args): } if 'command' in args['data']: + logger('plugin', 'sent command list') return { 'msg': args['reply_user'] + ': known commands: ' + str(args['cmd_list']).strip('[]') } @@ -124,17 +127,20 @@ def command_help(args): return None if None == cmd: + logger('plugin', 'empty help request') return { 'msg': args['reply_user'] + ': no command given' } if not cmd in [p['name'] for p in plugins['command']]: + logger('plugin', 'no help found for %s' % cmd) return { 'msg': args['reply_user'] + ': no such command: %s' % cmd } for p in plugins['command']: if cmd == p['name']: + logger('plugin', 'sent help for %s' % cmd) return { 'msg': args['reply_user'] + ': help for %s: %s' %(cmd, p['desc']) } @@ -150,6 +156,7 @@ def command_version(args): } if 'version' in args['data']: + logger('plugin', 'sent version string') return { 'msg': args['reply_user'] + (''': I'm running ''' + VERSION) } @@ -164,6 +171,7 @@ def command_unicode(args): } if 'unikot' in args['data']: + logger('plugin', 'sent some unicode') return { 'msg': ( @@ -183,6 +191,7 @@ def command_source(args): } if 'source' in args['data']: + logger('plugin', 'sent source URL') return { 'msg': 'My source code can be found at %s' % conf('src-url') } @@ -199,8 +208,10 @@ def command_dice(args): if 'dice' in args['data']: if args['reply_user'] in conf('enhanced-random-user'): rnd = 0 # this might confuse users. good. + logger('plugin', 'sent random (enhanced)') else: rnd = random.randint(1, 6) + logger('plugin', 'sent random') dice_char = ['◇', '⚀', '⚁', '⚂', '⚃', '⚄', '⚅'] return { @@ -224,7 +235,7 @@ def command_uptime(args): if 1 == u: plural_uptime = '' if 1 == conf('request_counter'): plural_request = '' - logger('info', 'sent statistics') + logger('plugin', 'sent statistics') return { 'msg': args['reply_user'] + (''': happily serving for %d second%s, %d request%s so far.''' %(u, plural_uptime, conf('request_counter'), plural_request)) } @@ -242,13 +253,13 @@ def command_ping(args): rnd = random.randint(0, 3) # 1:4 if 0 == rnd: msg = args['reply_user'] + ''': peng (You're dead now.)''' - logger('info', 'sent pong (variant)') + logger('plugin', 'sent pong (variant)') elif 1 == rnd: msg = args['reply_user'] + ''': I don't like you, leave me alone.''' - logger('info', 'sent pong (dontlike)') + logger('plugin', 'sent pong (dontlike)') else: msg = args['reply_user'] + ''': pong''' - logger('info', 'sent pong') + logger('plugin', 'sent pong') return { 'msg': msg @@ -264,13 +275,13 @@ def command_info(args): } if 'info' in args['data']: - logger('info', 'sent long info') + logger('plugin', 'sent long info') return { 'msg': args['reply_user'] + (''': I'm a bot, my job is to extract tags from posted URLs. In case I'm annoying or for further questions, please talk to my master %s. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'command'.''' %(conf('bot_owner'), conf('hist_max_count'), conf('hist_max_time'))) } def command_else(args): - logger('info', 'sent short info') + logger('plugin', 'sent short info') return { 'msg': args['reply_user'] + ''': I'm a bot (highlight me with 'info' for more information).''' } diff --git a/urlbot.py b/urlbot.py index 260836f..77ca3a4 100755 --- a/urlbot.py +++ b/urlbot.py @@ -215,5 +215,5 @@ if '__main__' == __name__: time.sleep(delay) except KeyboardInterrupt: - print("") + print('') exit(130) From 98dd94fc63c898e2da67f6568fc5e3a5b7702ad9 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 29 Sep 2014 00:11:40 +0200 Subject: [PATCH 073/112] testing levenshtein distance for (url, title) --- common.py | 21 +++++++++++++++++++++ urlbot.py | 4 +++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/common.py b/common.py index 59d7754..d1dcb30 100644 --- a/common.py +++ b/common.py @@ -42,6 +42,27 @@ def conf_load(): fd.seek(0) return pickle.load(fd) +def levenshtein(a, b, return_table=False): + '''returns the levenshtein distance between a and b''' + # initialisize a table with 0, but the 0-rows/cols with their index + d = [[ (i if 0 == j else j if 0 == i else 0) for j in range(len(b)+1) ] for i in range(len(a)+1) ] + + for i in range(1, len(a)+1): + for j in range(1, len(b)+1): + if a[i-1] == b[j-1]: + d[i][j] = d[i-1][j-1] + else: + d[i][j] = min( + d[i-1][j] + 1, # deletion + d[i][j-1] + 1, # insertion + d[i-1][j-1] + 1, # substitution + ) + + if return_table: + return (d, d[i][j]) + else: + return d[i][j] + def get_version_git(): import subprocess diff --git a/urlbot.py b/urlbot.py index 77ca3a4..62db411 100755 --- a/urlbot.py +++ b/urlbot.py @@ -133,7 +133,9 @@ def extract_url(data): (status, title) = extract_title(r) if 0 == status: - message = 'Title: %s: %s' % (title.strip(), r) + message = 'lev=%d/%d:%d Title: %s: %s' %( + levenshtein(r, title.strip()), len(title.strip()), len(r), title.strip(), r + ) elif 1 == status: logger('info', 'no message sent for non-text %s (%s)' %(r, title)) continue From 1ddd43c27418a0efa996f3edfa200d0c2768be6c Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 29 Sep 2014 19:15:00 +0200 Subject: [PATCH 074/112] a "teatimer" implemented --- local_config.py.skel | 2 ++ plugins.py | 43 ++++++++++++++++++++++++++++++++++++++++++- urlbot.py | 4 +++- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/local_config.py.skel b/local_config.py.skel index dbb806a..a08409d 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -27,6 +27,8 @@ config['persistent_storage'] = 'urlbot.persistent' # the "dice" feature will use more efficient random data (0) for given users config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) +config['tea_steep_time'] = (3*60 + 40) + def conf(val): if val in list(config.keys()): return config[val] diff --git a/plugins.py b/plugins.py index e46efcd..91c238f 100644 --- a/plugins.py +++ b/plugins.py @@ -9,6 +9,8 @@ import time, random from local_config import conf from common import * +joblist = [] + plugins = {} plugins['parse'] = [] plugins['command'] = [] @@ -18,6 +20,9 @@ def get_reply_user(data): # or a user 'foo' just wrote '> > bar' return data.split(' ')[0].strip('<>') +def register_event(t, callback, args): + joblist.append((t, callback, args)) + def parse_mental_ill(args): if 'register' == args: return { @@ -280,6 +285,27 @@ def command_info(args): 'msg': args['reply_user'] + (''': I'm a bot, my job is to extract <title> tags from posted URLs. In case I'm annoying or for further questions, please talk to my master %s. I'm rate limited and shouldn't post more than %d messages per %d seconds. To make me exit immediately, highlight me with 'hangup' in the message (emergency only, please). For other commands, highlight me with 'command'.''' %(conf('bot_owner'), conf('hist_max_count'), conf('hist_max_time'))) } +def command_teatimer(args): + if 'register' == args: + return { + 'name': 'teatimer', + 'desc': 'sets a tea timer', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_GLOBAL + } + + if 'teatimer' in args['data']: + ready = time.time() + conf('tea_steep_time') + + logger('plugin', 'tea timer set to %s' % time.strftime('%F.%T', time.localtime(ready))) + register_event(ready, chat_write, args['reply_user'] + ': Your tea is ready!') + + return { + 'msg': args['reply_user'] + ': Tea timer set to %s' % time.strftime( + '%F.%T', time.localtime(ready) + ) + } + def command_else(args): logger('plugin', 'sent short info') return { @@ -356,7 +382,8 @@ funcs = {} funcs['parse'] = (parse_mental_ill, parse_skynet) funcs['command'] = ( command_command, command_help, command_version, command_unicode, - command_source, command_dice, command_uptime, command_ping, command_info + command_source, command_dice, command_uptime, command_ping, command_info, + command_teatimer ) _dir = dir() @@ -405,3 +432,17 @@ def register(func_type, auto=False): def register_all(): register('parse') register('command') + +def event_trigger(): + if 0 == len(joblist): + return + + now = time.time() + + i = 0 + for (t, callback, args) in joblist: + if t < now: + callback(args) + del(joblist[i]) + + i += 1 diff --git a/urlbot.py b/urlbot.py index 62db411..fca7f0a 100755 --- a/urlbot.py +++ b/urlbot.py @@ -182,7 +182,7 @@ def parse_delete(filepath): return if 'nospoiler' in content: - logger('info', "no spoiler for: " + content) +# logger('info', "no spoiler for: " + content) return if True != extract_url(content): @@ -215,6 +215,8 @@ if '__main__' == __name__: if 'mcabber-' == f[:8]: parse_delete(os.path.join(event_files_dir, f)) + plugins.event_trigger() + time.sleep(delay) except KeyboardInterrupt: print('') From 75dd68733f65e2e313bbd066e7def1a44c04c806 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 29 Sep 2014 19:26:36 +0200 Subject: [PATCH 075/112] more precise "teatimer" help --- plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins.py b/plugins.py index 91c238f..26101a9 100644 --- a/plugins.py +++ b/plugins.py @@ -289,7 +289,7 @@ def command_teatimer(args): if 'register' == args: return { 'name': 'teatimer', - 'desc': 'sets a tea timer', + 'desc': 'sets a tea timer to currently %d seconds' % conf('tea_steep_time'), 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_GLOBAL } From ca715ca0f8a61dd260e528683ffc9ce09ad98bc3 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Tue, 30 Sep 2014 16:48:17 +0200 Subject: [PATCH 076/112] fix levenshtein() for empty strings --- common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/common.py b/common.py index d1dcb30..5f02a7a 100644 --- a/common.py +++ b/common.py @@ -47,6 +47,7 @@ def levenshtein(a, b, return_table=False): # initialisize a table with 0, but the 0-rows/cols with their index d = [[ (i if 0 == j else j if 0 == i else 0) for j in range(len(b)+1) ] for i in range(len(a)+1) ] + i = j = 0 for i in range(1, len(a)+1): for j in range(1, len(b)+1): if a[i-1] == b[j-1]: From 5a7601b108b083989642df769a518a02f2f10e44 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Tue, 30 Sep 2014 17:15:43 +0200 Subject: [PATCH 077/112] fix crash on 404 pages (2to3 fixup) --- urlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urlbot.py b/urlbot.py index fca7f0a..a3602ed 100755 --- a/urlbot.py +++ b/urlbot.py @@ -24,7 +24,7 @@ def fetch_page(url): response.close() return (html_text, response.headers) except IOError as e: - logger('warn', 'failed: ' + e.errno) + logger('warn', 'failed: ' + str(e.errno)) return (None, None) From 126b238aebc4131404f40150679c7f24e8d5f014 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Tue, 30 Sep 2014 17:39:52 +0200 Subject: [PATCH 078/112] levenshtein: strip domain; write to persistent struct --- urlbot.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/urlbot.py b/urlbot.py index a3602ed..cc4ddf6 100755 --- a/urlbot.py +++ b/urlbot.py @@ -133,9 +133,16 @@ def extract_url(data): (status, title) = extract_title(r) if 0 == status: - message = 'lev=%d/%d:%d Title: %s: %s' %( - levenshtein(r, title.strip()), len(title.strip()), len(r), title.strip(), r - ) + title = title.strip() + lev_url = re.sub(r'https?://[^/]*/', '', r) + lev_res = levenshtein(lev_url, title) + + obj = conf_load() + obj['lev'].append((lev_res, title, lev_url)) + conf_save(obj) + + lev_str = 'lev=%d/%d:%d ' %(lev_res, len(title), len(lev_url)) + message = lev_str + 'Title: %s: %s' %(title, r) elif 1 == status: logger('info', 'no message sent for non-text %s (%s)' %(r, title)) continue From d8d5ed746123f48aa7426c0c452250c01d12e726 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Tue, 30 Sep 2014 17:41:51 +0200 Subject: [PATCH 079/112] move path to local_config.py, add levenshtein_test.py --- common.py | 4 ++-- levenshtein_test.py | 38 ++++++++++++++++++++++++++++++++++++++ local_config.py.skel | 2 ++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100755 levenshtein_test.py diff --git a/common.py b/common.py index 5f02a7a..5725750 100644 --- a/common.py +++ b/common.py @@ -20,8 +20,8 @@ delay = 0.100 # seconds basedir = '.' if 2 == len(sys.argv): basedir = sys.argv[1] -event_files_dir = os.path.join(basedir, 'event_files') -fifo_path = os.path.join(basedir, 'cmdfifo') +event_files_dir = os.path.join(basedir, conf('path_event_files')) +fifo_path = os.path.join(basedir, conf('path_cmdfifo')) def debug_enabled(): # return True diff --git a/levenshtein_test.py b/levenshtein_test.py new file mode 100755 index 0000000..a29abc7 --- /dev/null +++ b/levenshtein_test.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 + +from common import levenshtein + +(a, b) = ('foo barbaz', 'foobar baz') +(a, b) = ('sitting', 'kitten') +(a, b) = ('Monte Kali (Heringen)', 'http://de.wikipedia.org/wiki/Monte_Kali_%28Heringen%29') + +(matrix, ret) = levenshtein(a, b, return_table=True) + +sep = ' '*0 +out = '' +for B in b: + out += sep + '%2s' % B +print(sep + ' '*4 + out) + +for i in range(len(matrix)): + if 0 == i: + out = ' ' + else: + out = '%2s' % a[i-1] + + for j in range(len(matrix[i])): + if 0 == i or 0 == j: + col = '30;42' + elif i == j: + col = '41' + else: + col = 0 + + if 0 != col: + out += sep + '\x1b[%sm%2d\x1b[m' %(col, matrix[i][j]) + else: + out += sep + '%2d' % matrix[i][j] + + print(out) + +print(ret) diff --git a/local_config.py.skel b/local_config.py.skel index a08409d..6774ad2 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -22,6 +22,8 @@ config['hist_max_time'] = 10 * 60 config['uptime'] = -time.time() config['request_counter'] = 0 +config['path_event_files'] = 'event_files' +config['path_cmdfifo'] = 'cmdfifo' config['persistent_storage'] = 'urlbot.persistent' # the "dice" feature will use more efficient random data (0) for given users From 351da575db883411bf2c7a0d9c974efd716da497 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 1 Oct 2014 09:33:21 +0200 Subject: [PATCH 080/112] (not in use) add string_similarity() for testing --- strsim.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100755 strsim.py diff --git a/strsim.py b/strsim.py new file mode 100755 index 0000000..fbcf8e2 --- /dev/null +++ b/strsim.py @@ -0,0 +1,63 @@ +#!/usr/bin/python3 + +import re + +def str_sim(a, b): + a = a.lower() + b = b.lower() + + a_parts = re.split('[\W_]+', a) + b_parts = re.split('[\W_]+', b) + + # this is a "simple" way to declare out[a][b] + out = list(map(list, [[0]*len(b_parts)]*len(a_parts))) + + for i in range(0, len(a_parts)-1): + for j in range(0, len(b_parts)-1): + if a_parts[i] == b_parts[j]: + out[i][j] += 1 + + i = 0 + for j in range(0, len(b_parts)): + print(' |'*i + ' '*2 + '.- ' + b_parts[j]) + i += 1 + print(' |'*i) + + for i in range(0, len(a_parts)): + print(' ' + str(out[i]) + ' ' + a_parts[i]) + + return out + +def sum_array(array): + _sum = 0 + for a in array: + if list == type(a) or tuple == type(a) or hash == type(a): + _sum += sum_array(a) + elif int == type(a) or float == type(a): + _sum += a + return _sum + +def wrapper_print(a, b, comment=''): + ret = str_sim(a, b) + if '' != comment: comment = ' ^ ' + comment + print('[%2dx%2d::%2d]%s' %(len(ret), len(ret[0]), sum_array(ret), comment)) + +pairs = ( + ( + 'http://de.wikipedia.org/wiki/Monte_Kali_%28Heringen%29', + 'Monte Kali (Heringen)' + ), + ( + 'http://www.spiegel.de/politik/ausland/buddhisten-treffen-in-colombo-blitzender-moench-a-994447.html', + 'Buddhisten-Treffen in Colombo: Blitzender Mönch - SPIEGEL ONLINE' + ) +) + +wrapper_print('foo bar baz', 'foo bar boom') + +for (url, title) in pairs: + wrapper_print(title, url, comment='raw') + url_no_proto = re.sub(r'https?://[^/]*/', '', url) + wrapper_print(title, url_no_proto, comment='no proto/domain') + url_no_proto_no_digits = re.sub(r'[0-9]*', '', url_no_proto) + wrapper_print(title, url_no_proto_no_digits, comment='no proto/domain/[0-9]') From 6ce9e177de4849ff6b2e6abb3e6bf0e191900336 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 1 Oct 2014 10:21:27 +0200 Subject: [PATCH 081/112] add send error replys from urllib --- urlbot.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/urlbot.py b/urlbot.py index cc4ddf6..acbc0e1 100755 --- a/urlbot.py +++ b/urlbot.py @@ -22,11 +22,12 @@ def fetch_page(url): response = urllib.request.urlopen(url) html_text = response.read(BUFSIZ) # ignore more than BUFSIZ response.close() - return (html_text, response.headers) + return (0, html_text, response.headers) except IOError as e: - logger('warn', 'failed: ' + str(e.errno)) + logger('warn', 'failed: ' + str(e)) + return (1, str(e), 'dummy') - return (None, None) + return (-1, None, None) def extract_title(url): global parser @@ -37,7 +38,11 @@ def extract_title(url): logger('info', 'extracting title from ' + url) - (html_text, headers) = fetch_page(url) + (code, html_text, headers) = fetch_page(url) + + if 1 == code: + return (3, 'failed: %s for %s' %(html_text, url)) + if html_text: charset = '' if 'content-type' in headers: From 6d11929e54aa0607bc20b1252b81cef17d3e489f Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Fri, 3 Oct 2014 19:18:52 +0200 Subject: [PATCH 082/112] re-add image_preview; configurable --- local_config.py.skel | 2 ++ urlbot.py | 13 ++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/local_config.py.skel b/local_config.py.skel index 6774ad2..77f57f1 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -31,6 +31,8 @@ config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) config['tea_steep_time'] = (3*60 + 40) +config['image_preview'] = True + def conf(val): if val in list(config.keys()): return config[val] diff --git a/urlbot.py b/urlbot.py index acbc0e1..9a3b1fa 100755 --- a/urlbot.py +++ b/urlbot.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 # -*- coding: utf-8 -*- -import sys, os, stat, re, time, pickle +import sys, os, stat, re, time, pickle, random import urllib.request, urllib.parse, urllib.error, html.parser from local_config import conf, set_conf from common import * @@ -149,8 +149,15 @@ def extract_url(data): lev_str = 'lev=%d/%d:%d ' %(lev_res, len(title), len(lev_url)) message = lev_str + 'Title: %s: %s' %(title, r) elif 1 == status: - logger('info', 'no message sent for non-text %s (%s)' %(r, title)) - continue + if conf('image_preview'): + # of course it's fake, but it looks interesting at least + char = """,._-+=\|/*`~"'""" + message = 'No text but %s, 1-bit ASCII art preview: [%c] %s' %( + title, random.choice(char), r + ) + else: + logger('info', 'no message sent for non-text %s (%s)' %(r, title)) + continue elif 2 == status: message = 'No title: %s' % r elif 3 == status: From 09f96101f0d5e75de7298485ece907aa47a27d9d Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sun, 5 Oct 2014 13:58:44 +0200 Subject: [PATCH 083/112] Python3 post-migration: fix User-Agent --- urlbot.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/urlbot.py b/urlbot.py index 9a3b1fa..92f1eb8 100755 --- a/urlbot.py +++ b/urlbot.py @@ -12,14 +12,12 @@ hist_flag = True parser = None -class urllib_user_agent_wrapper(urllib.request.FancyURLopener): - version = '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''' - def fetch_page(url): logger('info', 'fetching page ' + url) try: - urllib.request._urlopener = urllib_user_agent_wrapper() - response = urllib.request.urlopen(url) + request = urllib.request.Request(url) + request.add_header('User-Agent', '''Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.0''') + response = urllib.request.urlopen(request) html_text = response.read(BUFSIZ) # ignore more than BUFSIZ response.close() return (0, html_text, response.headers) From 6a916c701acf7fac9dd8f7a2eb9d0f72c04d1230 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sun, 5 Oct 2014 23:39:51 +0200 Subject: [PATCH 084/112] testing str_sim() for (title, lev_url) --- strsim.py | 53 ++++++++++++++++++++++++++++------------------------- urlbot.py | 11 ++++++++++- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/strsim.py b/strsim.py index fbcf8e2..9a98749 100755 --- a/strsim.py +++ b/strsim.py @@ -1,8 +1,9 @@ #!/usr/bin/python3 +# -*- coding: utf-8 -*- import re -def str_sim(a, b): +def str_sim(a, b, do_print=False): a = a.lower() b = b.lower() @@ -17,14 +18,15 @@ def str_sim(a, b): if a_parts[i] == b_parts[j]: out[i][j] += 1 - i = 0 - for j in range(0, len(b_parts)): - print(' |'*i + ' '*2 + '.- ' + b_parts[j]) - i += 1 - print(' |'*i) + if do_print: + i = 0 + for j in range(0, len(b_parts)): + print(' |'*i + ' '*2 + '.- ' + b_parts[j]) + i += 1 + print(' |'*i) - for i in range(0, len(a_parts)): - print(' ' + str(out[i]) + ' ' + a_parts[i]) + for i in range(0, len(a_parts)): + print(' ' + str(out[i]) + ' ' + a_parts[i]) return out @@ -38,26 +40,27 @@ def sum_array(array): return _sum def wrapper_print(a, b, comment=''): - ret = str_sim(a, b) + ret = str_sim(a, b, do_print=True) if '' != comment: comment = ' ^ ' + comment print('[%2dx%2d::%2d]%s' %(len(ret), len(ret[0]), sum_array(ret), comment)) -pairs = ( - ( - 'http://de.wikipedia.org/wiki/Monte_Kali_%28Heringen%29', - 'Monte Kali (Heringen)' - ), - ( - 'http://www.spiegel.de/politik/ausland/buddhisten-treffen-in-colombo-blitzender-moench-a-994447.html', - 'Buddhisten-Treffen in Colombo: Blitzender Mönch - SPIEGEL ONLINE' +if '__main__' == __name__: + pairs = ( + ( + 'http://de.wikipedia.org/wiki/Monte_Kali_%28Heringen%29', + 'Monte Kali (Heringen)' + ), + ( + 'http://www.spiegel.de/politik/ausland/buddhisten-treffen-in-colombo-blitzender-moench-a-994447.html', + 'Buddhisten-Treffen in Colombo: Blitzender Mönch - SPIEGEL ONLINE' + ) ) -) -wrapper_print('foo bar baz', 'foo bar boom') + wrapper_print('foo bar baz', 'foo bar boom') -for (url, title) in pairs: - wrapper_print(title, url, comment='raw') - url_no_proto = re.sub(r'https?://[^/]*/', '', url) - wrapper_print(title, url_no_proto, comment='no proto/domain') - url_no_proto_no_digits = re.sub(r'[0-9]*', '', url_no_proto) - wrapper_print(title, url_no_proto_no_digits, comment='no proto/domain/[0-9]') + for (url, title) in pairs: + wrapper_print(title, url, comment='raw') + url_no_proto = re.sub(r'https?://[^/]*/', '', url) + wrapper_print(title, url_no_proto, comment='no proto/domain') + url_no_proto_no_digits = re.sub(r'[0-9]*', '', url_no_proto) + wrapper_print(title, url_no_proto_no_digits, comment='no proto/domain/[0-9]') diff --git a/urlbot.py b/urlbot.py index 92f1eb8..2b93e24 100755 --- a/urlbot.py +++ b/urlbot.py @@ -5,6 +5,7 @@ import sys, os, stat, re, time, pickle, random import urllib.request, urllib.parse, urllib.error, html.parser from local_config import conf, set_conf from common import * +from strsim import str_sim # rate limiting to 5 messages per 10 minutes hist_ts = [] @@ -145,7 +146,15 @@ def extract_url(data): conf_save(obj) lev_str = 'lev=%d/%d:%d ' %(lev_res, len(title), len(lev_url)) - message = lev_str + 'Title: %s: %s' %(title, r) + + sim = str_sim(title, lev_url) + sim_len_title = len(sim) + sim_len_url = len(sim[0]) + sim_sum = sum([sum(a) for a in sim]) + + sim_str = 'sim=%d/%d:%d ' %(sim_sum, sim_len_title, sim_len_url) + + message = lev_str + sim_str + 'Title: %s: %s' %(title, r) elif 1 == status: if conf('image_preview'): # of course it's fake, but it looks interesting at least From 663e2131edf81297c5a9b1e5fc57839304b1d498 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 6 Oct 2014 00:19:47 +0200 Subject: [PATCH 085/112] rename r->url; add obj['sim'] --- urlbot.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/urlbot.py b/urlbot.py index 2b93e24..e0e391b 100755 --- a/urlbot.py +++ b/urlbot.py @@ -129,22 +129,18 @@ def extract_url(data): ret = None result = re.findall("(https?://[^\s>]+)", data) if result: - for r in result: + for url in result: ratelimit_touch() if ratelimit_exceeded(): return False - (status, title) = extract_title(r) + (status, title) = extract_title(url) if 0 == status: title = title.strip() - lev_url = re.sub(r'https?://[^/]*/', '', r) + lev_url = re.sub(r'https?://[^/]*/', '', url) lev_res = levenshtein(lev_url, title) - obj = conf_load() - obj['lev'].append((lev_res, title, lev_url)) - conf_save(obj) - lev_str = 'lev=%d/%d:%d ' %(lev_res, len(title), len(lev_url)) sim = str_sim(title, lev_url) @@ -154,23 +150,28 @@ def extract_url(data): sim_str = 'sim=%d/%d:%d ' %(sim_sum, sim_len_title, sim_len_url) - message = lev_str + sim_str + 'Title: %s: %s' %(title, r) + obj = conf_load() + obj['lev'].append((lev_res, title, url)) + obj['sim'].append((sim_sum, sim_len_title, sim_len_url, title, url)) + conf_save(obj) + + message = lev_str + sim_str + 'Title: %s: %s' %(title, url) elif 1 == status: if conf('image_preview'): # of course it's fake, but it looks interesting at least char = """,._-+=\|/*`~"'""" message = 'No text but %s, 1-bit ASCII art preview: [%c] %s' %( - title, random.choice(char), r + title, random.choice(char), url ) else: - logger('info', 'no message sent for non-text %s (%s)' %(r, title)) + logger('info', 'no message sent for non-text %s (%s)' %(url, title)) continue elif 2 == status: - message = 'No title: %s' % r + message = 'No title: %s' % url elif 3 == status: message = title else: - message = 'some error occurred when fetching %s' % r + message = 'some error occurred when fetching %s' % url message = message.replace('\n', '\\n') From 24084909a4dc63678d5605c485fc3721f2b7b0b8 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Thu, 9 Oct 2014 22:07:40 +0200 Subject: [PATCH 086/112] remove lev_str, sim_str from visible output --- urlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urlbot.py b/urlbot.py index e0e391b..13da26d 100755 --- a/urlbot.py +++ b/urlbot.py @@ -155,7 +155,7 @@ def extract_url(data): obj['sim'].append((sim_sum, sim_len_title, sim_len_url, title, url)) conf_save(obj) - message = lev_str + sim_str + 'Title: %s: %s' %(title, url) + message = 'Title: %s: %s' %(title, url) elif 1 == status: if conf('image_preview'): # of course it's fake, but it looks interesting at least From e4d23e07c09bd48b91fb84b11ad3594e8898065b Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Thu, 9 Oct 2014 22:48:23 +0200 Subject: [PATCH 087/112] fix crash for https?://\..* links ('.'.encode('idna') fails) --- urlbot.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/urlbot.py b/urlbot.py index 13da26d..5d383f6 100755 --- a/urlbot.py +++ b/urlbot.py @@ -130,6 +130,19 @@ def extract_url(data): result = re.findall("(https?://[^\s>]+)", data) if result: for url in result: +# urllib.request is broken: +# >>> '.'.encode('idna') +# .... +# UnicodeError: label empty or too long +# >>> '.a.'.encode('idna') +# .... +# UnicodeError: label empty or too long +# >>> 'a.a.'.encode('idna') +# b'a.a.' + if re.match(r'https?://\.', url): + logger('warn', 'bug tiggered, invalid url: %s' % url) + continue + ratelimit_touch() if ratelimit_exceeded(): return False From 76fd4645c8bed2cc3b4ceaecde4cf64f7e00233a Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Fri, 10 Oct 2014 00:01:22 +0200 Subject: [PATCH 088/112] http://a../ also triggers, remove fix, wrap exception arround --- urlbot.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/urlbot.py b/urlbot.py index 5d383f6..928f3fd 100755 --- a/urlbot.py +++ b/urlbot.py @@ -130,6 +130,10 @@ def extract_url(data): result = re.findall("(https?://[^\s>]+)", data) if result: for url in result: + ratelimit_touch() + if ratelimit_exceeded(): + return False + # urllib.request is broken: # >>> '.'.encode('idna') # .... @@ -139,15 +143,11 @@ def extract_url(data): # UnicodeError: label empty or too long # >>> 'a.a.'.encode('idna') # b'a.a.' - if re.match(r'https?://\.', url): - logger('warn', 'bug tiggered, invalid url: %s' % url) - continue - ratelimit_touch() - if ratelimit_exceeded(): - return False - - (status, title) = extract_title(url) + try: + (status, title) = extract_title(url) + except UnicodeError: + (status, title) = (4, None) if 0 == status: title = title.strip() @@ -183,6 +183,9 @@ def extract_url(data): message = 'No title: %s' % url elif 3 == status: message = title + elif 4 == status: + message = 'Bug triggered, invalid URL/domain part: %s' % url + logger('warn', message) else: message = 'some error occurred when fetching %s' % url From 94c538696f3bb8a76bec38b8156d80257aa52d51 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 11 Oct 2014 16:22:19 +0200 Subject: [PATCH 089/112] warp all urlib-specific stuff in an exception --- urlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/urlbot.py b/urlbot.py index 928f3fd..8d3b186 100755 --- a/urlbot.py +++ b/urlbot.py @@ -22,7 +22,7 @@ def fetch_page(url): html_text = response.read(BUFSIZ) # ignore more than BUFSIZ response.close() return (0, html_text, response.headers) - except IOError as e: + except Exception as e: logger('warn', 'failed: ' + str(e)) return (1, str(e), 'dummy') From 583e1174d68b9fc6bcead711e48f2e8cc0120def Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 11 Oct 2014 16:31:09 +0200 Subject: [PATCH 090/112] ignore lines that are probably the bots own log lines --- urlbot.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/urlbot.py b/urlbot.py index 8d3b186..82919e4 100755 --- a/urlbot.py +++ b/urlbot.py @@ -223,11 +223,15 @@ def parse_delete(filepath): if content.startswith('PRIV#'): parse_pn(content) return - + if 'nospoiler' in content: # logger('info', "no spoiler for: " + content) return + if sys.argv[0] in content: + logger('info', 'silenced, this is my own log') + return + if True != extract_url(content): plugins.data_parse_commands(content) plugins.data_parse_other(content) From 7d60a5861e4cdf0710c5f179c403e66aa51e93ac Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 13 Oct 2014 18:23:51 +0200 Subject: [PATCH 091/112] add plugin/command_decode(): translate unicode->text --- plugins.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/plugins.py b/plugins.py index 26101a9..875da1e 100644 --- a/plugins.py +++ b/plugins.py @@ -5,7 +5,7 @@ if '__main__' == __name__: print('''this is a plugin file, which is not meant to be executed''') exit(-1) -import time, random +import time, random, unicodedata from local_config import conf from common import * @@ -306,6 +306,56 @@ def command_teatimer(args): ) } +def command_decode(args): + if 'register' == args: + return { + 'name': 'decode', + 'desc': 'prints the long description of an unicode character', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_GLOBAL + } + + if not 'decode' in args['data']: + return + + d = args['data'].split() + + if 4 == len(d): + char = d[3][0] + logger('plugin', 'decode called for %s' % char) + + try: + uni_name = unicodedata.name(char) + except e as Exception: + logger('plugin', 'decode(%s) failed: %s' %(char, str(e))) + return { + 'msg': args['reply_user'] + ": can't decode %s: %s" %(char, str(e)) + } + + return { + 'msg': args['reply_user'] + ': %s is called "%s"' %(char, uni_name) + } + else: + return { + 'msg': args['reply_user'] + ': usage: decode {single character}' + } + +#def command_dummy(args): +# if 'register' == args: +# return { +# 'name': 'dummy', +# 'desc': 'dummy description', +# 'args': ('data', 'reply_user'), +# 'ratelimit_class': RATE_GLOBAL +# } +# +# if 'dummy' in args['data']: +# logger('plugin', 'dummy plugin called') +# +# return { +# 'msg': args['reply_user'] + ': dummy plugin called' +# } + def command_else(args): logger('plugin', 'sent short info') return { @@ -383,7 +433,7 @@ funcs['parse'] = (parse_mental_ill, parse_skynet) funcs['command'] = ( command_command, command_help, command_version, command_unicode, command_source, command_dice, command_uptime, command_ping, command_info, - command_teatimer + command_teatimer, command_decode ) _dir = dir() From 29981b776a4673ed40a380b7d5745175eea82119 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Tue, 14 Oct 2014 16:38:12 +0200 Subject: [PATCH 092/112] "decode": add hex value to output --- plugins.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plugins.py b/plugins.py index 875da1e..6bb2fd6 100644 --- a/plugins.py +++ b/plugins.py @@ -322,6 +322,7 @@ def command_decode(args): if 4 == len(d): char = d[3][0] + char_esc = str(char.encode('unicode_escape'))[3:-1] logger('plugin', 'decode called for %s' % char) try: @@ -329,11 +330,11 @@ def command_decode(args): except e as Exception: logger('plugin', 'decode(%s) failed: %s' %(char, str(e))) return { - 'msg': args['reply_user'] + ": can't decode %s: %s" %(char, str(e)) + 'msg': args['reply_user'] + ": can't decode %s (%s): %s" %(char, char_esc, str(e)) } return { - 'msg': args['reply_user'] + ': %s is called "%s"' %(char, uni_name) + 'msg': args['reply_user'] + ': %s (%s) is called "%s"' %(char, char_esc, uni_name) } else: return { From 0b56d21101bc9f1a50f9d781efdefb334ca2e01e Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 20 Oct 2014 07:46:05 +0200 Subject: [PATCH 093/112] bugfixing as pylint, pyflakes suggest --- plugins.py | 6 +++--- urlbot.py | 7 ++----- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/plugins.py b/plugins.py index 6bb2fd6..6359cc0 100644 --- a/plugins.py +++ b/plugins.py @@ -83,7 +83,7 @@ def data_parse_other(data): elif 'reply_user' == a: args['reply_user'] = reply_user else: - logger('warn', 'unknown required arg for %s: %s' %(f, a)) + logger('warn', 'unknown required arg for %s: %s' %(p['name'], a)) ret = p['func'](args) @@ -327,7 +327,7 @@ def command_decode(args): try: uni_name = unicodedata.name(char) - except e as Exception: + except Exception as e: logger('plugin', 'decode(%s) failed: %s' %(char, str(e))) return { 'msg': args['reply_user'] + ": can't decode %s (%s): %s" %(char, char_esc, str(e)) @@ -399,7 +399,7 @@ def data_parse_commands(data): elif 'reply_user' == a: args['reply_user'] = reply_user else: - logger('warn', 'unknown required arg for %s: %s' %(f, a)) + logger('warn', 'unknown required arg for %s: %s' %(p['name'], a)) ret = p['func'](args) diff --git a/urlbot.py b/urlbot.py index 82919e4..b24c35c 100755 --- a/urlbot.py +++ b/urlbot.py @@ -94,7 +94,8 @@ def chat_write(message, prefix='/say '): try: msg = str(prefix) + str(message) + '\n' msg = msg.encode('utf8') - except UnicodeDecodeError: + except UnicodeDecodeError as e: + logger('warn', 'encoding msg failed: ' + str(e)) msg = prefix + message + '\n' fd.write(msg) @@ -154,15 +155,11 @@ def extract_url(data): lev_url = re.sub(r'https?://[^/]*/', '', url) lev_res = levenshtein(lev_url, title) - lev_str = 'lev=%d/%d:%d ' %(lev_res, len(title), len(lev_url)) - sim = str_sim(title, lev_url) sim_len_title = len(sim) sim_len_url = len(sim[0]) sim_sum = sum([sum(a) for a in sim]) - sim_str = 'sim=%d/%d:%d ' %(sim_sum, sim_len_title, sim_len_url) - obj = conf_load() obj['lev'].append((lev_res, title, url)) obj['sim'].append((sim_sum, sim_len_title, sim_len_url, title, url)) From 02105b7f13998b1a2f8db259c64494f5eae8a2d3 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Tue, 21 Oct 2014 18:51:55 +0200 Subject: [PATCH 094/112] don't drop exception descriptions anymore --- urlbot.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/urlbot.py b/urlbot.py index b24c35c..b4328db 100755 --- a/urlbot.py +++ b/urlbot.py @@ -100,8 +100,8 @@ def chat_write(message, prefix='/say '): fd.write(msg) fd.close() - except IOError: - logger('err', "couldn't print to fifo " + fifo_path) + except IOError as e: + logger('err', "couldn't print to fifo %s: %s" % (fifo_path, str(e))) def ratelimit_touch(ignored=None): # FIXME: separate counters hist_ts.append(time.time()) @@ -147,8 +147,8 @@ def extract_url(data): try: (status, title) = extract_title(url) - except UnicodeError: - (status, title) = (4, None) + except UnicodeError as e: + (status, title) = (4, str(e)) if 0 == status: title = title.strip() @@ -181,7 +181,7 @@ def extract_url(data): elif 3 == status: message = title elif 4 == status: - message = 'Bug triggered, invalid URL/domain part: %s' % url + message = 'Bug triggered (%s), invalid URL/domain part: %s' % (title, url) logger('warn', message) else: message = 'some error occurred when fetching %s' % url @@ -203,8 +203,8 @@ def parse_pn(data): def parse_delete(filepath): try: fd = open(filepath, 'r') - except IOError: - logger('err', 'file has vanished: ' + filepath) + except IOError as e: + logger('err', 'file has vanished: %s: %s' % (filepath, e)) return False content = fd.read(BUFSIZ) # ignore more than BUFSIZ From 76fd48485cdfd70c5bfe14e13f746a2d6b2c2b1e Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 29 Oct 2014 13:01:32 +0100 Subject: [PATCH 095/112] add plugin argv; add user-settable teatimer timeout --- plugins.py | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/plugins.py b/plugins.py index 6359cc0..fcd7be3 100644 --- a/plugins.py +++ b/plugins.py @@ -15,10 +15,17 @@ plugins = {} plugins['parse'] = [] plugins['command'] = [] -def get_reply_user(data): +def get_reply_data(data, field=0): # FIXME: we can't determine if a user named 'foo> ' just wrote ' > bar' # or a user 'foo' just wrote '> > bar' - return data.split(' ')[0].strip('<>') + f = data.split(' ') + + if 0 == field: + return f[0].strip('<>') + else: + if field > len(f): + return None + return f[field] def register_event(t, callback, args): joblist.append((t, callback, args)) @@ -66,7 +73,7 @@ def parse_skynet(args): } def data_parse_other(data): - reply_user = get_reply_user(data) + reply_user = get_reply_data(data) for p in plugins['parse']: if ratelimit_exceeded(p['ratelimit_class']): @@ -289,13 +296,25 @@ def command_teatimer(args): if 'register' == args: return { 'name': 'teatimer', - 'desc': 'sets a tea timer to currently %d seconds' % conf('tea_steep_time'), - 'args': ('data', 'reply_user'), + 'desc': 'sets a tea timer to $1 or currently %d seconds' % conf('tea_steep_time'), + 'args': ('data', 'reply_user', 'argv1'), 'ratelimit_class': RATE_GLOBAL } if 'teatimer' in args['data']: - ready = time.time() + conf('tea_steep_time') + steep = conf('tea_steep_time') + + if None != args['argv1']: + try: + steep = int(args['argv1']) + except Exception as e: + return { + 'msg': args['reply_user'] + ': error when parsing int(%s): %s' % ( + args['argv1'], str(e) + ) + } + + ready = time.time() + steep logger('plugin', 'tea timer set to %s' % time.strftime('%F.%T', time.localtime(ready))) register_event(ready, chat_write, args['reply_user'] + ': Your tea is ready!') @@ -378,7 +397,8 @@ def data_parse_commands(data): logger('warn', 'received hangup: ' + data) return None - reply_user = get_reply_user(data) + reply_user = get_reply_data(data) + argv1 = get_reply_data(data, field=2) for p in plugins['command']: if ratelimit_exceeded(p['ratelimit_class']): @@ -398,6 +418,8 @@ def data_parse_commands(data): args['cmd_list'] = cmds elif 'reply_user' == a: args['reply_user'] = reply_user + elif 'argv1' == a: + args['argv1'] = argv1 else: logger('warn', 'unknown required arg for %s: %s' %(p['name'], a)) From 0811e30b82c183b8993f5d6f371b063c2898d692 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 29 Oct 2014 13:05:23 +0100 Subject: [PATCH 096/112] add plugin argv0; fix wrong offset on argv1 --- plugins.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/plugins.py b/plugins.py index fcd7be3..e226961 100644 --- a/plugins.py +++ b/plugins.py @@ -297,11 +297,11 @@ def command_teatimer(args): return { 'name': 'teatimer', 'desc': 'sets a tea timer to $1 or currently %d seconds' % conf('tea_steep_time'), - 'args': ('data', 'reply_user', 'argv1'), + 'args': ('reply_user', 'argv0', 'argv1'), 'ratelimit_class': RATE_GLOBAL } - if 'teatimer' in args['data']: + if 'teatimer' == args['argv0']: steep = conf('tea_steep_time') if None != args['argv1']: @@ -398,7 +398,8 @@ def data_parse_commands(data): return None reply_user = get_reply_data(data) - argv1 = get_reply_data(data, field=2) + argv0 = get_reply_data(data, field=2) + argv1 = get_reply_data(data, field=3) for p in plugins['command']: if ratelimit_exceeded(p['ratelimit_class']): @@ -418,6 +419,8 @@ def data_parse_commands(data): args['cmd_list'] = cmds elif 'reply_user' == a: args['reply_user'] = reply_user + elif 'argv0' == a: + args['argv0'] = argv0 elif 'argv1' == a: args['argv1'] = argv1 else: From e85cc3a148a1e695e740791a9c78b7f556376041 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 29 Oct 2014 13:12:41 +0100 Subject: [PATCH 097/112] fix time_t exceeding --- plugins.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/plugins.py b/plugins.py index e226961..27d67ce 100644 --- a/plugins.py +++ b/plugins.py @@ -316,7 +316,13 @@ def command_teatimer(args): ready = time.time() + steep - logger('plugin', 'tea timer set to %s' % time.strftime('%F.%T', time.localtime(ready))) + try: + logger('plugin', 'tea timer set to %s' % time.strftime('%F.%T', time.localtime(ready))) + except ValueError as e: + return { + 'msg': args['reply_user'] + ': time format error: ' + str(e) + } + register_event(ready, chat_write, args['reply_user'] + ': Your tea is ready!') return { From d571926fd5589e5ddccee30cbf3442dc2b0d79b8 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 29 Oct 2014 13:25:09 +0100 Subject: [PATCH 098/112] chat_write: don't print strange chars; get_reply_data: strip \n --- plugins.py | 2 +- urlbot.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/plugins.py b/plugins.py index 27d67ce..e97a084 100644 --- a/plugins.py +++ b/plugins.py @@ -25,7 +25,7 @@ def get_reply_data(data, field=0): else: if field > len(f): return None - return f[field] + return f[field].strip('\n') def register_event(t, callback, args): joblist.append((t, callback, args)) diff --git a/urlbot.py b/urlbot.py index b4328db..9f80fd1 100755 --- a/urlbot.py +++ b/urlbot.py @@ -83,6 +83,11 @@ def extract_title(url): def chat_write(message, prefix='/say '): set_conf('request_counter', conf('request_counter') + 1) + for m in message: + if 0x20 > ord(m): + logger('warn', 'strange char 0x%02x in chat_write(message), skipping' % ord(m)) + return False + if debug_enabled(): print(message) else: From 65681bcc61fd9630a48f258a266b36a38bebb483 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 29 Oct 2014 13:47:26 +0100 Subject: [PATCH 099/112] get_reply_data: split on anything, remove strip(\n) --- plugins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins.py b/plugins.py index e97a084..cd88afc 100644 --- a/plugins.py +++ b/plugins.py @@ -18,14 +18,14 @@ plugins['command'] = [] def get_reply_data(data, field=0): # FIXME: we can't determine if a user named 'foo> ' just wrote ' > bar' # or a user 'foo' just wrote '> > bar' - f = data.split(' ') + f = data.split() if 0 == field: return f[0].strip('<>') else: if field > len(f): return None - return f[field].strip('\n') + return f[field] def register_event(t, callback, args): joblist.append((t, callback, args)) From 3a8fecd8f9121134bf45c28bbde28cae44e5d2bd Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Wed, 29 Oct 2014 13:50:01 +0100 Subject: [PATCH 100/112] fix crash on empty args --- plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins.py b/plugins.py index cd88afc..c30ee11 100644 --- a/plugins.py +++ b/plugins.py @@ -23,7 +23,7 @@ def get_reply_data(data, field=0): if 0 == field: return f[0].strip('<>') else: - if field > len(f): + if field >= len(f): return None return f[field] From 691db08e50146d17d0fc474735b7af2b5f3b1ea9 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sun, 9 Nov 2014 16:52:22 +0100 Subject: [PATCH 101/112] add plugin parse_debbug --- plugins.py | 32 ++++++++++++++++++++++++++++++-- urlbot.py | 16 ++++++++-------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/plugins.py b/plugins.py index c30ee11..5f28e19 100644 --- a/plugins.py +++ b/plugins.py @@ -5,9 +5,10 @@ if '__main__' == __name__: print('''this is a plugin file, which is not meant to be executed''') exit(-1) -import time, random, unicodedata +import time, random, unicodedata, re from local_config import conf from common import * +from urlbot import extract_title joblist = [] @@ -58,6 +59,33 @@ def parse_mental_ill(args): 'msg': '''Multiple exclamation/question marks are a sure sign of mental disease, with %s as a living example.''' % args['reply_user'] } +def parse_debbug(args): + if 'register' == args: + return { + 'name': 'parse Debian bug numbers', + 'args': ('data',), + 'ratelimit_class': RATE_NO_SILENCE | RATE_GLOBAL + } + + bugs = re.findall(r'#(\d{4,})', args['data']) + if not bugs: + return None + + url = 'https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s' % bugs[0] + status, title = extract_title(url) + + if 0 == status: + title = 'Debian Bug: ' + title + elif 3 == status: + pass + else: + return None + + logger('plugin', 'detected Debian bug') + return { + 'msg': title + } + def parse_skynet(args): if 'register' == args: return { @@ -461,7 +489,7 @@ def data_parse_commands(data): chat_write(ret['msg']) funcs = {} -funcs['parse'] = (parse_mental_ill, parse_skynet) +funcs['parse'] = (parse_mental_ill, parse_skynet, parse_debbug) funcs['command'] = ( command_command, command_help, command_version, command_unicode, command_source, command_dice, command_uptime, command_ping, command_info, diff --git a/urlbot.py b/urlbot.py index 9f80fd1..37b077d 100755 --- a/urlbot.py +++ b/urlbot.py @@ -239,15 +239,15 @@ def parse_delete(filepath): plugins.data_parse_other(content) return -import plugins - -plugins.chat_write = chat_write -plugins.ratelimit_exceeded = ratelimit_exceeded -plugins.ratelimit_touch = ratelimit_touch - -plugins.register_all() - if '__main__' == __name__: + import plugins + + plugins.chat_write = chat_write + plugins.ratelimit_exceeded = ratelimit_exceeded + plugins.ratelimit_touch = ratelimit_touch + + plugins.register_all() + print(sys.argv[0] + ' ' + VERSION) if not os.path.exists(fifo_path): From 00febaf2ccdaaa0b833b22b7796cc2bb7c8714bc Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Fri, 14 Nov 2014 09:15:00 +0100 Subject: [PATCH 102/112] get_reply_data: fix crash for empty data --- plugins.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins.py b/plugins.py index 5f28e19..0f8fdd8 100644 --- a/plugins.py +++ b/plugins.py @@ -22,6 +22,8 @@ def get_reply_data(data, field=0): f = data.split() if 0 == field: + if 1 > len(f): + return None return f[0].strip('<>') else: if field >= len(f): From 7cdb2e301206864524a1093ab8dd194122ba5cc9 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 17 Nov 2014 19:49:02 +0100 Subject: [PATCH 103/112] add plugin karl klammer --- plugins.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/plugins.py b/plugins.py index 0f8fdd8..e937cc2 100644 --- a/plugins.py +++ b/plugins.py @@ -203,6 +203,29 @@ def command_version(args): 'msg': args['reply_user'] + (''': I'm running ''' + VERSION) } +def command_klammer(args): + if 'register' == args: + return { + 'name': 'karl klammer', + 'desc': 'prints an anoying paper clip', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_GLOBAL + } + + if 'klammer' in args['data']: + logger('plugin', 'sent karl klammer') + return { + 'msg': + ( + args['reply_user'] + r''' _, Was moechten''', + args['reply_user'] + r'''( _\_ Sie tun?''', + args['reply_user'] + r''' \0 O\ ''', + args['reply_user'] + r''' \\ \\ [ ] ja ''', + args['reply_user'] + r''' \`' ) [ ] noe''', + args['reply_user'] + r''' `'' ''' + ) + } + def command_unicode(args): if 'register' == args: return { @@ -494,8 +517,8 @@ funcs = {} funcs['parse'] = (parse_mental_ill, parse_skynet, parse_debbug) funcs['command'] = ( command_command, command_help, command_version, command_unicode, - command_source, command_dice, command_uptime, command_ping, command_info, - command_teatimer, command_decode + command_klammer, command_source, command_dice, command_uptime, command_ping, + command_info, command_teatimer, command_decode ) _dir = dir() From 79b6b8a6f4812bcdf99c93ca3013c4a671a43408 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 17 Nov 2014 19:51:08 +0100 Subject: [PATCH 104/112] fix plugin karl klammer --- plugins.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/plugins.py b/plugins.py index e937cc2..cada994 100644 --- a/plugins.py +++ b/plugins.py @@ -217,12 +217,12 @@ def command_klammer(args): return { 'msg': ( - args['reply_user'] + r''' _, Was moechten''', - args['reply_user'] + r'''( _\_ Sie tun?''', - args['reply_user'] + r''' \0 O\ ''', - args['reply_user'] + r''' \\ \\ [ ] ja ''', - args['reply_user'] + r''' \`' ) [ ] noe''', - args['reply_user'] + r''' `'' ''' + args['reply_user'] + r''': _, Was moechten''', + args['reply_user'] + r''': ( _\_ Sie tun?''', + args['reply_user'] + r''': \0 O\ ''', + args['reply_user'] + r''': \\ \\ [ ] ja ''', + args['reply_user'] + r''': \`' ) [ ] noe''', + args['reply_user'] + r''': `'' ''' ) } From 368469ec7dfa9325ce32df55a1c64fee2424c4e5 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 17 Nov 2014 19:55:45 +0100 Subject: [PATCH 105/112] fix karl klammer naming m( --- plugins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins.py b/plugins.py index cada994..9b4efaf 100644 --- a/plugins.py +++ b/plugins.py @@ -206,8 +206,8 @@ def command_version(args): def command_klammer(args): if 'register' == args: return { - 'name': 'karl klammer', - 'desc': 'prints an anoying paper clip', + 'name': 'klammer', + 'desc': 'prints an anoying paper clip aka. Karl Klammer', 'args': ('data', 'reply_user'), 'ratelimit_class': RATE_GLOBAL } From 31f1e285c667c2204194ad6db20d343dc622affd Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Fri, 28 Nov 2014 19:13:45 +0100 Subject: [PATCH 106/112] add URL blacklist; add command_show_blacklist() --- local_config.py.skel | 5 +++++ plugins.py | 21 ++++++++++++++++++++- urlbot.py | 12 ++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/local_config.py.skel b/local_config.py.skel index 77f57f1..ef74c76 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -26,6 +26,11 @@ config['path_event_files'] = 'event_files' config['path_cmdfifo'] = 'cmdfifo' config['persistent_storage'] = 'urlbot.persistent' +config['url_blacklist'] = [ + r'^.*heise\.de/newsticker/.*$', + r'^.*wikipedia\.org/wiki/.*$' +] + # the "dice" feature will use more efficient random data (0) for given users config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) diff --git a/plugins.py b/plugins.py index 9b4efaf..76515f4 100644 --- a/plugins.py +++ b/plugins.py @@ -419,6 +419,25 @@ def command_decode(args): 'msg': args['reply_user'] + ': usage: decode {single character}' } +def command_show_blacklist(args): + if 'register' == args: + return { + 'name': 'show-blacklist', + 'desc': 'show the current URL blacklist', + 'args': ('data', 'reply_user'), + 'ratelimit_class': RATE_GLOBAL + } + + if 'show-blacklist' in args['data']: + logger('plugin', 'sent URL blacklist') + + return { + 'msg': '\n'.join([ + args['reply_user'] + ': URL blacklist: ' + b + for b in conf('url_blacklist') + ]) + } + #def command_dummy(args): # if 'register' == args: # return { @@ -518,7 +537,7 @@ funcs['parse'] = (parse_mental_ill, parse_skynet, parse_debbug) funcs['command'] = ( command_command, command_help, command_version, command_unicode, command_klammer, command_source, command_dice, command_uptime, command_ping, - command_info, command_teatimer, command_decode + command_info, command_teatimer, command_decode, command_show_blacklist ) _dir = dir() diff --git a/urlbot.py b/urlbot.py index 37b077d..1b3d59c 100755 --- a/urlbot.py +++ b/urlbot.py @@ -140,6 +140,18 @@ def extract_url(data): if ratelimit_exceeded(): return False + flag = False + for b in conf('url_blacklist'): + if not None is re.match(b, url): + flag = True + message = 'url blacklist match for ' + url + logger('info', message) + chat_write(message) + + if flag: + # an URL has matched the blacklist, continue to the next URL + continue + # urllib.request is broken: # >>> '.'.encode('idna') # .... From df471859d0d138ce5f9868daae902eb0de86b716 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Fri, 28 Nov 2014 19:18:45 +0100 Subject: [PATCH 107/112] fix multi-item 'msg' for plugins --- plugins.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/plugins.py b/plugins.py index 76515f4..bf7ed31 100644 --- a/plugins.py +++ b/plugins.py @@ -432,10 +432,10 @@ def command_show_blacklist(args): logger('plugin', 'sent URL blacklist') return { - 'msg': '\n'.join([ + 'msg': [ args['reply_user'] + ': URL blacklist: ' + b for b in conf('url_blacklist') - ]) + ] } #def command_dummy(args): @@ -530,7 +530,11 @@ def data_parse_commands(data): return False if 'msg' in list(ret.keys()): - chat_write(ret['msg']) + if list is type(ret['msg']): + for m in ret['msg']: + chat_write(m) + else: + chat_write(ret['msg']) funcs = {} funcs['parse'] = (parse_mental_ill, parse_skynet, parse_debbug) From cb6873c2f63d9bdd1486118c2f884164c507592c Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Fri, 28 Nov 2014 19:33:52 +0100 Subject: [PATCH 108/112] add version id counter in get_version_git() --- common.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/common.py b/common.py index 5725750..42c786a 100644 --- a/common.py +++ b/common.py @@ -67,13 +67,17 @@ def levenshtein(a, b, return_table=False): def get_version_git(): import subprocess - cmd = ['git', 'log', '-n', '1', '--oneline', '--abbrev-commit'] + cmd = ['git', 'log', '--oneline', '--abbrev-commit'] p = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE) first_line = p.stdout.readline() + line_count = len(p.stdout.readlines()) + 1 if 0 == p.wait(): - return "version (Git) '%s'" % str(first_line.strip(), encoding='utf8') + # skip this 1st, 2nd, 3rd stuff and use always [0-9]th + return "version (Git, %dth rev) '%s'" % ( + line_count, str(first_line.strip(), encoding='utf8') + ) else: return "(unknown version)" From 9f6078a314647fdbb2492ca70b0c2cc2d32e7f0c Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Fri, 28 Nov 2014 19:36:15 +0100 Subject: [PATCH 109/112] remove blacklist message from output --- urlbot.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/urlbot.py b/urlbot.py index 1b3d59c..5abb4db 100755 --- a/urlbot.py +++ b/urlbot.py @@ -144,9 +144,7 @@ def extract_url(data): for b in conf('url_blacklist'): if not None is re.match(b, url): flag = True - message = 'url blacklist match for ' + url - logger('info', message) - chat_write(message) + logger('info', 'url blacklist match for ' + url) if flag: # an URL has matched the blacklist, continue to the next URL From b3bebedd474c65a79d14934a13d3442d544b9507 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Sat, 29 Nov 2014 14:17:56 +0100 Subject: [PATCH 110/112] url_blacklist: unloosen the heise regex --- local_config.py.skel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/local_config.py.skel b/local_config.py.skel index ef74c76..7e0a78e 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -27,7 +27,7 @@ config['path_cmdfifo'] = 'cmdfifo' config['persistent_storage'] = 'urlbot.persistent' config['url_blacklist'] = [ - r'^.*heise\.de/newsticker/.*$', + r'^.*heise\.de/[^/]+/meldung/.*$', r'^.*wikipedia\.org/wiki/.*$' ] From 924ecb0109bfed3d95cb7cafb28557dd7324e886 Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 1 Dec 2014 11:55:28 +0100 Subject: [PATCH 111/112] use a more reasonable config{} definition method --- local_config.py.skel | 47 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/local_config.py.skel b/local_config.py.skel index 7e0a78e..78a9112 100644 --- a/local_config.py.skel +++ b/local_config.py.skel @@ -10,33 +10,34 @@ if '__main__' == __name__: print('''this is a config file, which is not meant to be executed''') exit(-1) -config = {} -config['src-url'] = 'FIXME' - -config['bot_user'] = 'urlbot' -config['bot_owner'] = 'FIXME' - -config['hist_max_count'] = 5 -config['hist_max_time'] = 10 * 60 - -config['uptime'] = -time.time() -config['request_counter'] = 0 - -config['path_event_files'] = 'event_files' -config['path_cmdfifo'] = 'cmdfifo' -config['persistent_storage'] = 'urlbot.persistent' - -config['url_blacklist'] = [ - r'^.*heise\.de/[^/]+/meldung/.*$', - r'^.*wikipedia\.org/wiki/.*$' -] +config = { + 'src-url': 'FIXME', + + 'bot_user': 'urlbot', + 'bot_owner': 'FIXME', + + 'hist_max_count': 5, + 'hist_max_time': 10 * 60, + + 'uptime': -time.time(), + 'request_counter': 0, + + 'path_event_files': 'event_files', + 'path_cmdfifo': 'cmdfifo', + 'persistent_storage': 'urlbot.persistent', + + 'url_blacklist': [ + r'^.*heise\.de/[^/]+/meldung/.*$', + r'^.*wikipedia\.org/wiki/.*$' + ], # the "dice" feature will use more efficient random data (0) for given users -config['enhanced-random-user'] = ( 'FIXME', 'FIXME' ) + 'enhanced-random-user': ( 'FIXME', 'FIXME' ), -config['tea_steep_time'] = (3*60 + 40) + 'tea_steep_time': (3*60 + 40), -config['image_preview'] = True + 'image_preview': True +} def conf(val): if val in list(config.keys()): From 74909f496f396efee846827879738b116e96d0dc Mon Sep 17 00:00:00 2001 From: urlbot <urlbot@eagle.local.yeeer.net> Date: Mon, 1 Dec 2014 17:50:24 +0100 Subject: [PATCH 112/112] add parse_cve: print sec-tr.d.o links --- plugins.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/plugins.py b/plugins.py index bf7ed31..a66f453 100644 --- a/plugins.py +++ b/plugins.py @@ -88,6 +88,23 @@ def parse_debbug(args): 'msg': title } +def parse_cve(args): + if 'register' == args: + return { + 'name': 'parse a CVE handle', + 'args': ('data',), + 'ratelimit_class': RATE_NO_SILENCE | RATE_GLOBAL + } + + cves = re.findall(r'(CVE-\d\d\d\d-\d+)', args['data'].upper()) + if not cves: + return None + + logger('plugin', 'detected CVE handle') + return { + 'msg': 'https://security-tracker.debian.org/tracker/%s' % cves[0] + } + def parse_skynet(args): if 'register' == args: return { @@ -537,7 +554,7 @@ def data_parse_commands(data): chat_write(ret['msg']) funcs = {} -funcs['parse'] = (parse_mental_ill, parse_skynet, parse_debbug) +funcs['parse'] = (parse_mental_ill, parse_skynet, parse_debbug, parse_cve) funcs['command'] = ( command_command, command_help, command_version, command_unicode, command_klammer, command_source, command_dice, command_uptime, command_ping,