Compare commits

15 Commits

Author SHA1 Message Date
04ac4f8e50 tox.ini: change dep to dnspython3 2017-12-23 12:00:08 +01:00
2713b649c4 tox.ini: add dnspython, pyasn1, pyasn1-modules 2017-12-23 12:00:08 +01:00
46c6577634 add tox.ini for virtualenv goodness 2017-12-23 12:00:08 +01:00
Thorsten
a44edbccc5 timeout to fetch url titles 2017-11-13 19:58:01 +01:00
Thorsten
c3bf599b08 content is optional 2017-09-06 22:14:51 +02:00
Thorsten
bb1fd36665 revert 2017-09-06 22:09:13 +02:00
Thorsten
4ee7b60640 restrict to bing, google and yahoo (probably pointless) 2017-09-06 22:06:40 +02:00
Thorsten
b0e2041989 assume the order is better the other way round? 2017-09-06 21:53:54 +02:00
Thorsten
10cec5bbea python3.4 bug
see also https://github.com/mozilla/http-observatory/pull/86/files
2017-09-06 21:51:01 +02:00
Thorsten
c9aedc4b18 Merge remote-tracking branch 'origin/master' 2017-09-06 21:38:04 +02:00
Thorsten
28ef6bd23d add search, fix TLS, do not import plugins in idlebot 2017-09-06 21:37:49 +02:00
Thorsten S
19e124e186 Merge branch 'master' of rootie:./urlbot-native 2017-05-31 18:31:24 +02:00
Thorsten S
3c6d7b2497 add license 2017-05-31 18:31:13 +02:00
Thorsten
328e821f6d misc 2016-12-11 12:24:00 +01:00
Thorsten
9c0ae3982a deployment settings 2016-09-06 19:40:53 +02:00
10 changed files with 212 additions and 88 deletions

7
LICENSE Normal file
View File

@@ -0,0 +1,7 @@
Copyright (c) 2017 Thorsten Sperber
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -39,7 +39,7 @@ VERSION = get_version_git()
def fetch_page(url): def fetch_page(url):
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
log.info('fetching page ' + url) log.info('fetching page ' + url)
response = requests.get(url, headers={'User-Agent': USER_AGENT}, stream=True) response = requests.get(url, headers={'User-Agent': USER_AGENT}, stream=True, timeout=15)
content = response.raw.read(BUFSIZ, decode_content=True) content = response.raw.read(BUFSIZ, decode_content=True)
return content.decode(response.encoding or 'utf-8'), response.headers return content.decode(response.encoding or 'utf-8'), response.headers

View File

@@ -1,2 +1,2 @@
[bots] [bots]
aero2k.de ansible_host=2a01:4f8:d16:130c::2 aero2k.de ansible_host=2a01:4f8:d16:130c::2 ansible_become_method=su

View File

@@ -1,19 +1,23 @@
#!/usr/bin/python3 #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging
import time
import sys import sys
import time
import _ssl
from sleekxmpp import ClientXMPP
import config import config
import events import events
from common import VERSION from common import VERSION
from sleekxmpp import ClientXMPP
class IdleBot(ClientXMPP): class IdleBot(ClientXMPP):
def __init__(self, jid, password, rooms, nick): def __init__(self, jid, password, rooms, nick):
ClientXMPP.__init__(self, jid, password) ClientXMPP.__init__(self, jid, password)
self.ssl_version = _ssl.PROTOCOL_TLSv1_2
self.rooms = rooms self.rooms = rooms
self.nick = nick self.nick = nick
@@ -21,6 +25,7 @@ class IdleBot(ClientXMPP):
self.add_event_handler('groupchat_message', self.muc_message) self.add_event_handler('groupchat_message', self.muc_message)
self.add_event_handler('disconnected', self.disconnected) self.add_event_handler('disconnected', self.disconnected)
self.add_event_handler('presence_error', self.disconnected) self.add_event_handler('presence_error', self.disconnected)
self.add_event_handler('session_end', self.disconnected)
self.priority = 0 self.priority = 0
self.status = None self.status = None
self.show = None self.show = None
@@ -30,6 +35,7 @@ class IdleBot(ClientXMPP):
self.add_event_handler('muc::%s::got_offline' % room, self.muc_offline) self.add_event_handler('muc::%s::got_offline' % room, self.muc_offline)
def disconnected(self, _): def disconnected(self, _):
self.logger.warn("Disconnected! dbg: {}".format(str(_)))
self.disconnect(wait=True) self.disconnect(wait=True)
def session_start(self, _): def session_start(self, _):
@@ -105,12 +111,14 @@ def start(botclass, active=False):
bot.connect() bot.connect()
bot.register_plugin('xep_0045') bot.register_plugin('xep_0045')
bot.register_plugin('xep_0199', {'keepalive': True})
bot.register_plugin('xep_0308')
bot.process() bot.process()
config.runtimeconf_set('start_time', -time.time()) config.runtimeconf_set('start_time', -time.time())
if active: if active:
import plugins pass
events.event_loop.start() events.event_loop.start()

View File

@@ -6,28 +6,48 @@ from plugin_system import pluginfunction, ptypes
from rate_limit import RATE_FUN, RATE_GLOBAL from rate_limit import RATE_FUN, RATE_GLOBAL
def give_item(user, item_name, search_word=None):
if not search_word:
search_word = item_name
return {'msg': '{} for {}: {}'.format(item_name, user, giphy(search_word, 'dc6zaTOxFJmzC'))}
def cake_excuse(user):
return {
'msg': '{}: {}'.format(user, random.choice(cakes))
}
@pluginfunction('cake', 'displays a cake ASCII art', ptypes.COMMAND, ratelimit_class=RATE_FUN | RATE_GLOBAL) @pluginfunction('cake', 'displays a cake ASCII art', ptypes.COMMAND, ratelimit_class=RATE_FUN | RATE_GLOBAL)
def command_cake(argv, **args): def command_cake(argv, **args):
if {'please', 'bitte'}.intersection(set(argv)): if {'please', 'bitte'}.intersection(set(argv)):
return { return give_item(args['reply_user'], 'cake')
'msg': 'cake for {}: {}'.format(args['reply_user'], giphy('cake', 'dc6zaTOxFJmzC')) else:
} return cake_excuse(args['reply_user'])
return {
'msg': args['reply_user'] + ': %s' % random.choice(cakes)
}
@pluginfunction('keks', 'keks!', ptypes.COMMAND, ratelimit_class=RATE_FUN | RATE_GLOBAL) @pluginfunction('keks', 'keks!', ptypes.COMMAND, ratelimit_class=RATE_FUN | RATE_GLOBAL)
def command_cookie(argv, **args): def command_cookie(argv, **args):
if {'please', 'bitte'}.intersection(set(argv)): if {'please', 'bitte'}.intersection(set(argv)):
return { return give_item(args['reply_user'], 'keks', 'cookie')
'msg': 'keks für {}: {}'.format(args['reply_user'], giphy('cookie', 'dc6zaTOxFJmzC')) else:
} return cake_excuse(args['reply_user'])
return {
'msg': args['reply_user'] + ': %s' % random.choice(cakes) @pluginfunction('schnitzel', 'schnitzel!', ptypes.COMMAND, ratelimit_class=RATE_FUN | RATE_GLOBAL)
} def command_schnitzel(argv, **args):
if {'please', 'bitte'}.intersection(set(argv)):
return give_item(args['reply_user'], 'schnitzel')
else:
return cake_excuse(args['reply_user'])
@pluginfunction('kaffee', 'kaffee!', ptypes.COMMAND, ratelimit_class=RATE_FUN | RATE_GLOBAL)
def command_coffee(argv, **args):
if {'please', 'bitte'}.intersection(set(argv)):
return give_item(args['reply_user'], 'kaffee', 'coffee')
else:
return cake_excuse(args['reply_user'])
cakes = [ cakes = [
@@ -46,4 +66,3 @@ cakes = [
"I'm going to kill you, and all the cake is gone.", "I'm going to kill you, and all the cake is gone.",
"Who's gonna make the cake when I'm gone? You?" "Who's gonna make the cake when I'm gone? You?"
] ]

View File

@@ -14,6 +14,7 @@ from lxml import etree
import config import config
from common import VERSION from common import VERSION
from plugins.searx import searx
from rate_limit import RATE_FUN, RATE_GLOBAL, RATE_INTERACTIVE, RATE_NO_SILENCE, RATE_NO_LIMIT from rate_limit import RATE_FUN, RATE_GLOBAL, RATE_INTERACTIVE, RATE_NO_SILENCE, RATE_NO_LIMIT
from plugin_system import pluginfunction, ptypes, plugin_storage, plugin_enabled_get, plugin_enabled_set from plugin_system import pluginfunction, ptypes, plugin_storage, plugin_enabled_get, plugin_enabled_set
@@ -881,37 +882,8 @@ def reload_runtimeconfig(argv, **args):
return {'msg': 'done'} return {'msg': 'done'}
@pluginfunction('snitch', "tell on a spammy user", ptypes.COMMAND) @pluginfunction('ducksearch', 'search the web (using duckduckgo)', ptypes.COMMAND)
def ignore_user(argv, **args): def search_the_duck(argv, **args):
if not argv:
return {'msg': 'syntax: "{}: snitch username"'.format(config.conf_get("bot_nickname"))}
then = time.time() + 15 * 60
spammer = argv[0]
if spammer == config.conf_get("bot_owner"):
return {
'msg': 'My owner does not spam, he is just very informative.'
}
if spammer not in config.runtime_config_store['spammers']:
config.runtime_config_store['spammers'].append(spammer)
def unblock_user(user):
if user not in config.runtime_config_store['spammers']:
config.runtime_config_store['spammers'].append(user)
return {
'msg': 'user reported and ignored till {}'.format(time.strftime('%H:%M', time.localtime(then))),
'event': {
'time': then,
'command': (unblock_user, ([spammer],))
}
}
@pluginfunction('search', 'search the web (using duckduckgo)', ptypes.COMMAND)
def search_the_web(argv, **args):
url = 'http://api.duckduckgo.com/' url = 'http://api.duckduckgo.com/'
params = dict( params = dict(
q=' '.join(argv), q=' '.join(argv),
@@ -942,6 +914,24 @@ def search_the_web(argv, **args):
return {'msg': 'Sorry, no results.'} return {'msg': 'Sorry, no results.'}
@pluginfunction('search', 'search the web (using searx)', ptypes.COMMAND)
def search_the_web(argv, **args):
result = searx(' '.join(argv))
if not result:
return {'msg': 'Sorry, no results.'}
else:
abstract, url = result
if len(abstract) > 150:
suffix = ''
else:
suffix = ''
return {
'msg': '{}{} ({})'.format(abstract[:150], suffix, url)
}
pass
@pluginfunction('raise', 'only for debugging', ptypes.COMMAND) @pluginfunction('raise', 'only for debugging', ptypes.COMMAND)
def raise_an_error(argv, **args): def raise_an_error(argv, **args):
if args['reply_user'] == config.conf_get("bot_owner"): if args['reply_user'] == config.conf_get("bot_owner"):

View File

@@ -58,14 +58,11 @@ def parse_debbug(**args):
log.info('detected Debian bug #%s' % b) log.info('detected Debian bug #%s' % b)
url = 'https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s' % b url = 'https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=%s' % b
status, title = extract_title(url)
if 0 == status: title = extract_title(url)
if title:
out.append('Debian Bug: %s: %s' % (title, url)) out.append('Debian Bug: %s: %s' % (title, url))
elif 3 == status:
out.append('error for #%s: %s' % (b, title))
else:
log.info('unknown status %d' % status)
return { return {
'msg': out 'msg': out
@@ -130,34 +127,6 @@ def parse_slash_me(**args):
} }
@pluginfunction("recognize_bots", "got ya", ptypes.PARSE, enabled=False)
def recognize_bots(**args):
# disabled until channel separation
return
unique_standard_phrases = (
'independent bot and have nothing to do with other artificial intelligence systems',
'new Debian Security Announce',
'I\'m a bot (highlight me',
)
def _add_to_list(username, message):
if username not in config.runtime_config_store['other_bots']:
config.runtime_config_store['other_bots'].append(username)
config.runtimeconf_persist()
log.info("Adding {} to the list of bots (now {})".format(username, config.runtime_config_store['other_bots']))
return {
'event': {
'time': time.time() + 3,
'msg': message
}
}
if any([phrase in args['data'] for phrase in unique_standard_phrases]):
return _add_to_list(args['reply_user'], 'Making notes...')
elif 'I\'ll be back' in args['data']:
return _add_to_list(args['reply_user'], 'Hey there, buddy!')
@pluginfunction('resolve-url-title', 'extract titles from urls', ptypes.PARSE, ratelimit_class=RATE_URL) @pluginfunction('resolve-url-title', 'extract titles from urls', ptypes.PARSE, ratelimit_class=RATE_URL)
def resolve_url_title(**args): def resolve_url_title(**args):
user = args['reply_user'] user = args['reply_user']
@@ -173,7 +142,7 @@ def resolve_url_title(**args):
url_blacklist = config.runtime_config_store['url_blacklist'].values() url_blacklist = config.runtime_config_store['url_blacklist'].values()
out = [] out = []
for url in result: for url in result[:10]:
if any([re.match(b, url) for b in url_blacklist]): if any([re.match(b, url) for b in url_blacklist]):
log.info('url blacklist match for ' + url) log.info('url blacklist match for ' + url)
break break

99
plugins/searx.py Normal file
View File

@@ -0,0 +1,99 @@
import logging
import time
from functools import wraps
import json
import requests
from lxml import etree, html
from requests import HTTPError
search_list = []
if not hasattr(json, 'JSONDecodeError'):
json.JSONDecodeError = ValueError
class RateLimitingError(HTTPError):
pass
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
"""Retry calling the decorated function using an exponential backoff.
http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
:param ExceptionToCheck: the exception to check. may be a tuple of
exceptions to check
:type ExceptionToCheck: Exception or tuple
:param tries: number of times to try (not retry) before giving up
:type tries: int
:param delay: initial delay between retries in seconds
:type delay: int
:param backoff: backoff multiplier e.g. value of 2 will double the delay
each retry
:type backoff: int
:param logger: logger to use. If None, print
:type logger: logging.Logger instance
"""
def deco_retry(f):
@wraps(f)
def f_retry(*args, **kwargs):
mtries, mdelay = tries, delay
while mtries > 1:
try:
return f(*args, **kwargs)
except ExceptionToCheck as e:
msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
if logger:
logger.warning(msg)
else:
print(msg)
time.sleep(mdelay)
mtries -= 1
mdelay *= backoff
return f(*args, **kwargs)
return f_retry # true decorator
return deco_retry
def fetch_all_searx_engines():
# error handling is for pussies
tree = etree.XML(
requests.get("http://stats.searx.oe5tpo.com").content,
parser=html.HTMLParser()
)
searxes = [str(x) for x in tree.xpath('//span[text()[contains(.,"200 - OK")]]/../..//a/text()')]
return searxes
@retry(ExceptionToCheck=(RateLimitingError, json.JSONDecodeError))
def searx(text):
global search_list
if not search_list:
search_list = fetch_all_searx_engines()
logger = logging.getLogger(__name__)
url = search_list[0]
logger.info('Currently feeding from {} (of {} in stock)'.format(url, len(search_list)))
response = requests.get(url, params={
'q': text,
'format': 'json',
'lang': 'de'
})
if response.status_code == 429:
search_list.pop(0)
raise RateLimitingError(response=response, request=response.request)
try:
response = response.json()
except json.JSONDecodeError:
# "maintenance" they say...
search_list.pop(0)
raise
if not response['results']:
return
return [(r.get('content', ''), r['url']) for r in response['results']][0]

32
tox.ini Normal file
View File

@@ -0,0 +1,32 @@
# defaults to tests
# run with tox -e urlbot, tox -e idlebot, etc
[tox]
envlist = test
# we have no setup.py
skipsdist = true
[testenv]
envdir = {toxinidir}/.env
deps=nose
fasteners
sleekxmpp
configobj
requests
lxml
dnspython3
pyasn1
pyasn1-modules
commands=
test: nosetests [] # substitute with tox' positional arguments
idlebot: python idlebot.py []
urlbot: python urlbot.py []
sh: sh []
bash: bash []
zsh: zsh []
whitelist_externals = zsh
bash
sh

View File

@@ -88,7 +88,7 @@ class UrlBot(IdleBot):
request_counter = int(config.runtimeconf_get('request_counter')) request_counter = int(config.runtimeconf_get('request_counter'))
config.runtimeconf_set('request_counter', request_counter + 1) config.runtimeconf_set('request_counter', request_counter + 1)
if str is not type(message): if not isinstance(message, str):
message = '\n'.join(message) message = '\n'.join(message)
def cached(function, ttl=60): def cached(function, ttl=60):
@@ -300,7 +300,7 @@ class UrlBot(IdleBot):
if not plugin_enabled_get(plugin): if not plugin_enabled_get(plugin):
continue continue
ret = plugin(reply_user=reply_user, data=data) ret = plugin(reply_user=reply_user, data=data, sender=msg_obj['from'])
if ret: if ret:
self._run_action(ret, plugin, msg_obj) self._run_action(ret, plugin, msg_obj)