2014-09-27 09:19:46 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
2015-11-30 19:50:11 +01:00
|
|
|
""" Common functions for urlbot """
|
2015-11-20 21:07:48 +01:00
|
|
|
import html.parser
|
|
|
|
|
import logging
|
|
|
|
|
import re
|
2015-12-31 15:31:34 +01:00
|
|
|
import requests
|
2015-12-21 19:39:09 +01:00
|
|
|
from urllib.error import URLError
|
2015-11-20 22:23:31 +01:00
|
|
|
|
2014-09-27 09:19:46 +02:00
|
|
|
BUFSIZ = 8192
|
2015-11-30 19:50:11 +01:00
|
|
|
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) ' \
|
|
|
|
|
'Gecko/20100101 Firefox/31.0 Iceweasel/31.0'
|
2014-09-27 09:19:46 +02:00
|
|
|
|
2016-04-05 18:40:31 +02:00
|
|
|
|
2014-09-27 09:41:29 +02:00
|
|
|
def get_version_git():
|
2015-11-30 19:17:40 +01:00
|
|
|
import subprocess
|
2014-09-27 09:41:29 +02:00
|
|
|
|
2015-11-30 19:17:40 +01:00
|
|
|
cmd = ['git', 'log', '--oneline', '--abbrev-commit']
|
2014-09-27 09:41:29 +02:00
|
|
|
|
2015-11-30 19:17:40 +01:00
|
|
|
try:
|
|
|
|
|
p = subprocess.Popen(cmd, bufsize=1, stdout=subprocess.PIPE)
|
|
|
|
|
first_line = p.stdout.readline()
|
|
|
|
|
line_count = len(p.stdout.readlines()) + 1
|
2014-09-27 09:41:29 +02:00
|
|
|
|
2015-11-30 19:17:40 +01:00
|
|
|
if 0 == p.wait():
|
|
|
|
|
# skip this 1st, 2nd, 3rd stuff and use always [0-9]th
|
|
|
|
|
return "version (Git, %dth rev) '%s'" % (
|
|
|
|
|
line_count, str(first_line.strip(), encoding='utf8')
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
return "(unknown version)"
|
|
|
|
|
except:
|
|
|
|
|
return "cannot determine version"
|
2014-09-27 09:41:29 +02:00
|
|
|
|
2015-11-20 21:07:48 +01:00
|
|
|
|
2014-09-27 09:41:29 +02:00
|
|
|
VERSION = get_version_git()
|
2015-11-20 21:07:48 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_page(url):
|
2015-11-30 19:17:40 +01:00
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
log.info('fetching page ' + url)
|
2015-12-31 15:31:34 +01:00
|
|
|
response = requests.get(url, headers={'User-Agent': USER_AGENT}, stream=True)
|
|
|
|
|
content = response.raw.read(BUFSIZ, decode_content=True)
|
2016-01-04 22:42:18 +01:00
|
|
|
return content.decode(response.encoding or 'utf-8'), response.headers
|
2015-11-20 21:07:48 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_title(url):
|
2015-11-30 19:17:40 +01:00
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
global parser
|
2015-11-20 21:07:48 +01:00
|
|
|
|
2015-11-30 19:17:40 +01:00
|
|
|
if 'repo/urlbot-native.git' in url:
|
|
|
|
|
log.info('repo URL found: ' + url)
|
2015-12-21 19:39:09 +01:00
|
|
|
return 'wee, that looks like my home repo!', []
|
2015-11-20 21:07:48 +01:00
|
|
|
|
2015-11-30 19:17:40 +01:00
|
|
|
log.info('extracting title from ' + url)
|
2015-11-20 21:07:48 +01:00
|
|
|
|
2015-12-21 19:39:09 +01:00
|
|
|
try:
|
|
|
|
|
(html_text, headers) = fetch_page(url)
|
|
|
|
|
except URLError as e:
|
|
|
|
|
return None
|
2016-01-06 16:02:54 +01:00
|
|
|
except UnicodeDecodeError:
|
|
|
|
|
return None
|
2015-12-21 19:39:09 +01:00
|
|
|
except Exception as e:
|
|
|
|
|
return 'failed: %s for %s' % (str(e), url)
|
2015-11-20 21:07:48 +01:00
|
|
|
|
2015-11-30 19:17:40 +01:00
|
|
|
if 'content-type' in headers:
|
|
|
|
|
log.debug('content-type: ' + headers['content-type'])
|
2015-11-20 21:07:48 +01:00
|
|
|
|
2015-11-30 19:17:40 +01:00
|
|
|
if 'text/' != headers['content-type'][:len('text/')]:
|
|
|
|
|
return 1, headers['content-type']
|
2015-11-20 21:07:48 +01:00
|
|
|
|
2015-11-30 19:17:40 +01:00
|
|
|
result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html_text, re.S | re.M | re.IGNORECASE)
|
|
|
|
|
if result:
|
|
|
|
|
match = result.groups()[0]
|
2015-11-20 21:07:48 +01:00
|
|
|
|
2015-11-30 19:17:40 +01:00
|
|
|
parser = html.parser.HTMLParser()
|
|
|
|
|
try:
|
|
|
|
|
expanded_html = parser.unescape(match)
|
|
|
|
|
except UnicodeDecodeError as e: # idk why this can happen, but it does
|
|
|
|
|
log.warn('parser.unescape() expoded here: ' + str(e))
|
|
|
|
|
expanded_html = match
|
2015-12-21 19:39:09 +01:00
|
|
|
return expanded_html
|
2015-11-30 19:17:40 +01:00
|
|
|
else:
|
2015-12-21 19:39:09 +01:00
|
|
|
return None
|
2015-12-22 13:42:44 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def giphy(subject, api_key):
|
|
|
|
|
url = 'http://api.giphy.com/v1/gifs/random?tag={}&api_key={}&limit=1&offset=0'.format(subject, api_key)
|
2015-12-31 15:31:34 +01:00
|
|
|
response = requests.get(url)
|
2015-12-22 13:42:44 +01:00
|
|
|
giphy_url = None
|
|
|
|
|
try:
|
2015-12-31 15:31:34 +01:00
|
|
|
data = response.json()
|
2015-12-22 13:42:44 +01:00
|
|
|
giphy_url = data['data']['image_url']
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
return giphy_url
|
2015-12-26 13:50:21 +01:00
|
|
|
|
|
|
|
|
|
2016-01-28 20:18:26 +01:00
|
|
|
def get_nick_from_object(message_obj):
|
|
|
|
|
"""
|
|
|
|
|
not quite correct yet, also the private property access isn't nice.
|
|
|
|
|
"""
|
|
|
|
|
nick = message_obj['mucnick'] or message_obj['from']._jid[2]
|
|
|
|
|
return nick
|
|
|
|
|
|
|
|
|
|
|
2016-04-05 14:18:22 +02:00
|
|
|
def else_command(args):
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
log.info('sent short info')
|
|
|
|
|
return {
|
|
|
|
|
'msg': args['reply_user'] + ''': I'm a bot (highlight me with 'info' for more information).'''
|
|
|
|
|
}
|
|
|
|
|
|