From 28ef6bd23d0e83e9123db9175c8a5c6090cbd80d Mon Sep 17 00:00:00 2001 From: Thorsten Date: Wed, 6 Sep 2017 21:37:49 +0200 Subject: [PATCH] add search, fix TLS, do not import plugins in idlebot --- idlebot.py | 12 ++++-- plugins/commands.py | 23 ++++++++++- plugins/searx.py | 98 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 6 deletions(-) create mode 100644 plugins/searx.py diff --git a/idlebot.py b/idlebot.py index 840ae56..7b647c1 100755 --- a/idlebot.py +++ b/idlebot.py @@ -1,19 +1,23 @@ #!/usr/bin/python3 # -*- coding: utf-8 -*- import logging -import time import sys +import time +import _ssl + +from sleekxmpp import ClientXMPP + import config import events from common import VERSION -from sleekxmpp import ClientXMPP - class IdleBot(ClientXMPP): def __init__(self, jid, password, rooms, nick): ClientXMPP.__init__(self, jid, password) + self.ssl_version = _ssl.PROTOCOL_TLSv1_2 + self.rooms = rooms self.nick = nick @@ -114,7 +118,7 @@ def start(botclass, active=False): config.runtimeconf_set('start_time', -time.time()) if active: - import plugins + pass events.event_loop.start() diff --git a/plugins/commands.py b/plugins/commands.py index 4a4e9b1..f22065e 100644 --- a/plugins/commands.py +++ b/plugins/commands.py @@ -14,6 +14,7 @@ from lxml import etree import config from common import VERSION +from plugins.searx import searx from rate_limit import RATE_FUN, RATE_GLOBAL, RATE_INTERACTIVE, RATE_NO_SILENCE, RATE_NO_LIMIT from plugin_system import pluginfunction, ptypes, plugin_storage, plugin_enabled_get, plugin_enabled_set @@ -881,8 +882,8 @@ def reload_runtimeconfig(argv, **args): return {'msg': 'done'} -@pluginfunction('search', 'search the web (using duckduckgo)', ptypes.COMMAND) -def search_the_web(argv, **args): +@pluginfunction('ducksearch', 'search the web (using duckduckgo)', ptypes.COMMAND) +def search_the_duck(argv, **args): url = 'http://api.duckduckgo.com/' params = dict( q=' '.join(argv), @@ -913,6 +914,24 @@ def search_the_web(argv, **args): return {'msg': 'Sorry, no results.'} +@pluginfunction('search', 'search the web (using searx)', ptypes.COMMAND) +def search_the_web(argv, **args): + result = searx(' '.join(argv)) + if not result: + return {'msg': 'Sorry, no results.'} + else: + abstract, url = result + + if len(abstract) > 150: + suffix = '…' + else: + suffix = '' + return { + 'msg': '{}{} ({})'.format(abstract[:150], suffix, url) + } + pass + + @pluginfunction('raise', 'only for debugging', ptypes.COMMAND) def raise_an_error(argv, **args): if args['reply_user'] == config.conf_get("bot_owner"): diff --git a/plugins/searx.py b/plugins/searx.py new file mode 100644 index 0000000..058e318 --- /dev/null +++ b/plugins/searx.py @@ -0,0 +1,98 @@ +import logging +import time +from functools import wraps +from json import JSONDecodeError + +import requests +from lxml import etree, html +from requests import HTTPError + +search_list = [] + + +class RateLimitingError(HTTPError): + pass + + +def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): + """Retry calling the decorated function using an exponential backoff. + + http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ + original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry + + :param ExceptionToCheck: the exception to check. may be a tuple of + exceptions to check + :type ExceptionToCheck: Exception or tuple + :param tries: number of times to try (not retry) before giving up + :type tries: int + :param delay: initial delay between retries in seconds + :type delay: int + :param backoff: backoff multiplier e.g. value of 2 will double the delay + each retry + :type backoff: int + :param logger: logger to use. If None, print + :type logger: logging.Logger instance + """ + + def deco_retry(f): + + @wraps(f) + def f_retry(*args, **kwargs): + mtries, mdelay = tries, delay + while mtries > 1: + try: + return f(*args, **kwargs) + except ExceptionToCheck as e: + msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) + if logger: + logger.warning(msg) + else: + print(msg) + time.sleep(mdelay) + mtries -= 1 + mdelay *= backoff + return f(*args, **kwargs) + + return f_retry # true decorator + + return deco_retry + + +def fetch_all_searx_engines(): + # error handling is for pussies + tree = etree.XML( + requests.get("http://stats.searx.oe5tpo.com").content, + parser=html.HTMLParser() + ) + searxes = [str(x) for x in tree.xpath('//span[text()[contains(.,"200 - OK")]]/../..//a/text()')] + + return searxes + + +@retry(ExceptionToCheck=(RateLimitingError, JSONDecodeError)) +def searx(text): + global search_list + if not search_list: + search_list = fetch_all_searx_engines() + logger = logging.getLogger(__name__) + + url = search_list[-1] + logger.info('Currently feeding from {} (of {} in stock)'.format(url, len(search_list))) + response = requests.get(url, params={ + 'q': text, + 'format': 'json', + 'lang': 'de' + }) + if response.status_code == 429: + search_list.pop() + raise RateLimitingError(response=response, request=response.request) + try: + response = response.json() + except JSONDecodeError as e: + # "maintenance" they say... + search_list.pop() + raise + + if not response['results']: + return + return [(r['content'], r['url']) for r in response['results']][0]