Files
urlbot-native/plugins/searx.py

100 lines
2.9 KiB
Python
Raw Normal View History

import logging
import time
from functools import wraps
import json
import requests
from lxml import etree, html
from requests import HTTPError
search_list = []
if not hasattr(json, 'JSONDecodeError'):
json.JSONDecodeError = ValueError
class RateLimitingError(HTTPError):
pass
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
"""Retry calling the decorated function using an exponential backoff.
http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
:param ExceptionToCheck: the exception to check. may be a tuple of
exceptions to check
:type ExceptionToCheck: Exception or tuple
:param tries: number of times to try (not retry) before giving up
:type tries: int
:param delay: initial delay between retries in seconds
:type delay: int
:param backoff: backoff multiplier e.g. value of 2 will double the delay
each retry
:type backoff: int
:param logger: logger to use. If None, print
:type logger: logging.Logger instance
"""
def deco_retry(f):
@wraps(f)
def f_retry(*args, **kwargs):
mtries, mdelay = tries, delay
while mtries > 1:
try:
return f(*args, **kwargs)
except ExceptionToCheck as e:
msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
if logger:
logger.warning(msg)
else:
print(msg)
time.sleep(mdelay)
mtries -= 1
mdelay *= backoff
return f(*args, **kwargs)
return f_retry # true decorator
return deco_retry
def fetch_all_searx_engines():
# error handling is for pussies
tree = etree.XML(
requests.get("http://stats.searx.oe5tpo.com").content,
parser=html.HTMLParser()
)
searxes = [str(x) for x in tree.xpath('//span[text()[contains(.,"200 - OK")]]/../..//a/text()')]
return searxes
@retry(ExceptionToCheck=(RateLimitingError, json.JSONDecodeError))
def searx(text):
global search_list
if not search_list:
search_list = fetch_all_searx_engines()
logger = logging.getLogger(__name__)
url = search_list[-1]
logger.info('Currently feeding from {} (of {} in stock)'.format(url, len(search_list)))
response = requests.get(url, params={
'q': text,
'format': 'json',
'lang': 'de'
})
if response.status_code == 429:
search_list.pop()
raise RateLimitingError(response=response, request=response.request)
try:
response = response.json()
except json.JSONDecodeError:
# "maintenance" they say...
search_list.pop()
raise
if not response['results']:
return
return [(r['content'], r['url']) for r in response['results']][0]