add search, fix TLS, do not import plugins in idlebot

This commit is contained in:
Thorsten
2017-09-06 21:37:49 +02:00
parent 328e821f6d
commit 28ef6bd23d
3 changed files with 127 additions and 6 deletions

98
plugins/searx.py Normal file
View File

@@ -0,0 +1,98 @@
import logging
import time
from functools import wraps
from json import JSONDecodeError
import requests
from lxml import etree, html
from requests import HTTPError
search_list = []
class RateLimitingError(HTTPError):
pass
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
"""Retry calling the decorated function using an exponential backoff.
http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
:param ExceptionToCheck: the exception to check. may be a tuple of
exceptions to check
:type ExceptionToCheck: Exception or tuple
:param tries: number of times to try (not retry) before giving up
:type tries: int
:param delay: initial delay between retries in seconds
:type delay: int
:param backoff: backoff multiplier e.g. value of 2 will double the delay
each retry
:type backoff: int
:param logger: logger to use. If None, print
:type logger: logging.Logger instance
"""
def deco_retry(f):
@wraps(f)
def f_retry(*args, **kwargs):
mtries, mdelay = tries, delay
while mtries > 1:
try:
return f(*args, **kwargs)
except ExceptionToCheck as e:
msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
if logger:
logger.warning(msg)
else:
print(msg)
time.sleep(mdelay)
mtries -= 1
mdelay *= backoff
return f(*args, **kwargs)
return f_retry # true decorator
return deco_retry
def fetch_all_searx_engines():
# error handling is for pussies
tree = etree.XML(
requests.get("http://stats.searx.oe5tpo.com").content,
parser=html.HTMLParser()
)
searxes = [str(x) for x in tree.xpath('//span[text()[contains(.,"200 - OK")]]/../..//a/text()')]
return searxes
@retry(ExceptionToCheck=(RateLimitingError, JSONDecodeError))
def searx(text):
global search_list
if not search_list:
search_list = fetch_all_searx_engines()
logger = logging.getLogger(__name__)
url = search_list[-1]
logger.info('Currently feeding from {} (of {} in stock)'.format(url, len(search_list)))
response = requests.get(url, params={
'q': text,
'format': 'json',
'lang': 'de'
})
if response.status_code == 429:
search_list.pop()
raise RateLimitingError(response=response, request=response.request)
try:
response = response.json()
except JSONDecodeError as e:
# "maintenance" they say...
search_list.pop()
raise
if not response['results']:
return
return [(r['content'], r['url']) for r in response['results']][0]