mirror of
http://aero2k.de/t/repos/urlbot-native.git
synced 2017-09-06 15:25:38 +02:00
remove error output for url resolving if its the url which is broken.
This commit is contained in:
56
common.py
56
common.py
@@ -6,6 +6,7 @@ import re
|
|||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
from urllib.error import URLError
|
||||||
|
|
||||||
RATE_NO_LIMIT = 0x00
|
RATE_NO_LIMIT = 0x00
|
||||||
RATE_GLOBAL = 0x01
|
RATE_GLOBAL = 0x01
|
||||||
@@ -124,24 +125,20 @@ VERSION = get_version_git()
|
|||||||
def fetch_page(url):
|
def fetch_page(url):
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
log.info('fetching page ' + url)
|
log.info('fetching page ' + url)
|
||||||
try:
|
request = urllib.request.Request(url)
|
||||||
request = urllib.request.Request(url)
|
request.add_header('User-Agent', USER_AGENT)
|
||||||
request.add_header('User-Agent', USER_AGENT)
|
response = urllib.request.urlopen(request)
|
||||||
response = urllib.request.urlopen(request)
|
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
|
||||||
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
|
if html_text[0] == 0x1f and html_text[1] == 0x8b:
|
||||||
if html_text[0] == 0x1f and html_text[1] == 0x8b:
|
import zlib
|
||||||
import zlib
|
try:
|
||||||
try:
|
gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16)
|
||||||
gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16)
|
except:
|
||||||
except:
|
pass
|
||||||
pass
|
else:
|
||||||
else:
|
html_text = gzip_data
|
||||||
html_text = gzip_data
|
response.close()
|
||||||
response.close()
|
return html_text, response.headers
|
||||||
return 0, html_text, response.headers
|
|
||||||
except Exception as e:
|
|
||||||
log.warn('failed: %s' % e)
|
|
||||||
return 1, str(e), 'dummy'
|
|
||||||
|
|
||||||
|
|
||||||
def extract_title(url):
|
def extract_title(url):
|
||||||
@@ -150,19 +147,18 @@ def extract_title(url):
|
|||||||
|
|
||||||
if 'repo/urlbot-native.git' in url:
|
if 'repo/urlbot-native.git' in url:
|
||||||
log.info('repo URL found: ' + url)
|
log.info('repo URL found: ' + url)
|
||||||
return 3, 'wee, that looks like my home repo!'
|
return 'wee, that looks like my home repo!', []
|
||||||
|
|
||||||
log.info('extracting title from ' + url)
|
log.info('extracting title from ' + url)
|
||||||
|
|
||||||
(code, html_text, headers) = fetch_page(url)
|
try:
|
||||||
|
(html_text, headers) = fetch_page(url)
|
||||||
|
except URLError as e:
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
return 'failed: %s for %s' % (str(e), url)
|
||||||
|
|
||||||
if 1 == code:
|
charset = None
|
||||||
return 3, 'failed: %s for %s' % (html_text, url)
|
|
||||||
|
|
||||||
if not html_text:
|
|
||||||
return -1, 'error'
|
|
||||||
|
|
||||||
charset = ''
|
|
||||||
if 'content-type' in headers:
|
if 'content-type' in headers:
|
||||||
log.debug('content-type: ' + headers['content-type'])
|
log.debug('content-type: ' + headers['content-type'])
|
||||||
|
|
||||||
@@ -174,7 +170,7 @@ def extract_title(url):
|
|||||||
r'\g<charset>', headers['content-type'], re.IGNORECASE
|
r'\g<charset>', headers['content-type'], re.IGNORECASE
|
||||||
)
|
)
|
||||||
|
|
||||||
if '' != charset:
|
if charset:
|
||||||
try:
|
try:
|
||||||
html_text = html_text.decode(charset)
|
html_text = html_text.decode(charset)
|
||||||
except LookupError:
|
except LookupError:
|
||||||
@@ -193,6 +189,6 @@ def extract_title(url):
|
|||||||
except UnicodeDecodeError as e: # idk why this can happen, but it does
|
except UnicodeDecodeError as e: # idk why this can happen, but it does
|
||||||
log.warn('parser.unescape() expoded here: ' + str(e))
|
log.warn('parser.unescape() expoded here: ' + str(e))
|
||||||
expanded_html = match
|
expanded_html = match
|
||||||
return 0, expanded_html
|
return expanded_html
|
||||||
else:
|
else:
|
||||||
return 2, 'no title'
|
return None
|
||||||
|
|||||||
35
plugins.py
35
plugins.py
@@ -1055,6 +1055,7 @@ def flausch(argv, **args):
|
|||||||
'msg': '{}: *flausch*'.format(argv[1])
|
'msg': '{}: *flausch*'.format(argv[1])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pluginfunction('resolve-url-title', 'extract titles from urls', ptypes_PARSE, ratelimit_class=RATE_URL)
|
@pluginfunction('resolve-url-title', 'extract titles from urls', ptypes_PARSE, ratelimit_class=RATE_URL)
|
||||||
def resolve_url_title(**args):
|
def resolve_url_title(**args):
|
||||||
user = args['reply_user']
|
user = args['reply_user']
|
||||||
@@ -1086,37 +1087,17 @@ def resolve_url_title(**args):
|
|||||||
# b'a.a.'
|
# b'a.a.'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
(status, title) = extract_title(url)
|
title = extract_title(url)
|
||||||
except UnicodeError as e:
|
except UnicodeError as e:
|
||||||
(status, title) = (4, str(e))
|
message = 'Bug triggered (%s), invalid URL/domain part: %s' % (str(e), url)
|
||||||
|
log.warn(message)
|
||||||
|
return {'msg': message}
|
||||||
|
|
||||||
if 0 == status:
|
if title:
|
||||||
title = title.strip()
|
title = title.strip()
|
||||||
message = 'Title: %s' % title
|
message = 'Title: %s' % title
|
||||||
elif 1 == status:
|
message = message.replace('\n', '\\n')
|
||||||
if config.conf_get('image_preview'):
|
out.append(message)
|
||||||
# of course it's fake, but it looks interesting at least
|
|
||||||
char = r""",._-+=\|/*`~"'"""
|
|
||||||
message = 'No text but %s, 1-bit ASCII art preview: [%c]' % (
|
|
||||||
title, random.choice(char)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
log.info('no message sent for non-text %s (%s)' % (url, title))
|
|
||||||
continue
|
|
||||||
elif 2 == status:
|
|
||||||
message = '(No title)'
|
|
||||||
elif 3 == status:
|
|
||||||
message = title
|
|
||||||
elif 4 == status:
|
|
||||||
message = 'Bug triggered (%s), invalid URL/domain part: %s' % (title, url)
|
|
||||||
log.warn(message)
|
|
||||||
else:
|
|
||||||
message = 'some error occurred when fetching %s' % url
|
|
||||||
|
|
||||||
message = message.replace('\n', '\\n')
|
|
||||||
|
|
||||||
log.info('adding to out buf: ' + message)
|
|
||||||
out.append(message)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'msg': out
|
'msg': out
|
||||||
|
|||||||
Reference in New Issue
Block a user