add search, fix TLS, do not import plugins in idlebot
This commit is contained in:
98
plugins/searx.py
Normal file
98
plugins/searx.py
Normal file
@@ -0,0 +1,98 @@
|
||||
import logging
|
||||
import time
|
||||
from functools import wraps
|
||||
from json import JSONDecodeError
|
||||
|
||||
import requests
|
||||
from lxml import etree, html
|
||||
from requests import HTTPError
|
||||
|
||||
search_list = []
|
||||
|
||||
|
||||
class RateLimitingError(HTTPError):
|
||||
pass
|
||||
|
||||
|
||||
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
||||
"""Retry calling the decorated function using an exponential backoff.
|
||||
|
||||
http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
|
||||
original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
|
||||
|
||||
:param ExceptionToCheck: the exception to check. may be a tuple of
|
||||
exceptions to check
|
||||
:type ExceptionToCheck: Exception or tuple
|
||||
:param tries: number of times to try (not retry) before giving up
|
||||
:type tries: int
|
||||
:param delay: initial delay between retries in seconds
|
||||
:type delay: int
|
||||
:param backoff: backoff multiplier e.g. value of 2 will double the delay
|
||||
each retry
|
||||
:type backoff: int
|
||||
:param logger: logger to use. If None, print
|
||||
:type logger: logging.Logger instance
|
||||
"""
|
||||
|
||||
def deco_retry(f):
|
||||
|
||||
@wraps(f)
|
||||
def f_retry(*args, **kwargs):
|
||||
mtries, mdelay = tries, delay
|
||||
while mtries > 1:
|
||||
try:
|
||||
return f(*args, **kwargs)
|
||||
except ExceptionToCheck as e:
|
||||
msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
|
||||
if logger:
|
||||
logger.warning(msg)
|
||||
else:
|
||||
print(msg)
|
||||
time.sleep(mdelay)
|
||||
mtries -= 1
|
||||
mdelay *= backoff
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return f_retry # true decorator
|
||||
|
||||
return deco_retry
|
||||
|
||||
|
||||
def fetch_all_searx_engines():
|
||||
# error handling is for pussies
|
||||
tree = etree.XML(
|
||||
requests.get("http://stats.searx.oe5tpo.com").content,
|
||||
parser=html.HTMLParser()
|
||||
)
|
||||
searxes = [str(x) for x in tree.xpath('//span[text()[contains(.,"200 - OK")]]/../..//a/text()')]
|
||||
|
||||
return searxes
|
||||
|
||||
|
||||
@retry(ExceptionToCheck=(RateLimitingError, JSONDecodeError))
|
||||
def searx(text):
|
||||
global search_list
|
||||
if not search_list:
|
||||
search_list = fetch_all_searx_engines()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
url = search_list[-1]
|
||||
logger.info('Currently feeding from {} (of {} in stock)'.format(url, len(search_list)))
|
||||
response = requests.get(url, params={
|
||||
'q': text,
|
||||
'format': 'json',
|
||||
'lang': 'de'
|
||||
})
|
||||
if response.status_code == 429:
|
||||
search_list.pop()
|
||||
raise RateLimitingError(response=response, request=response.request)
|
||||
try:
|
||||
response = response.json()
|
||||
except JSONDecodeError as e:
|
||||
# "maintenance" they say...
|
||||
search_list.pop()
|
||||
raise
|
||||
|
||||
if not response['results']:
|
||||
return
|
||||
return [(r['content'], r['url']) for r in response['results']][0]
|
||||
Reference in New Issue
Block a user