1
0
mirror of http://aero2k.de/t/repos/urlbot-native.git synced 2017-09-06 15:25:38 +02:00

remove error output for url resolving if its the url which is broken.

This commit is contained in:
Thorsten S
2015-12-21 19:39:09 +01:00
parent 603791b7bb
commit cd27764e37
2 changed files with 34 additions and 57 deletions

View File

@@ -6,6 +6,7 @@ import re
import time import time
import urllib.request import urllib.request
from collections import namedtuple from collections import namedtuple
from urllib.error import URLError
RATE_NO_LIMIT = 0x00 RATE_NO_LIMIT = 0x00
RATE_GLOBAL = 0x01 RATE_GLOBAL = 0x01
@@ -124,24 +125,20 @@ VERSION = get_version_git()
def fetch_page(url): def fetch_page(url):
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
log.info('fetching page ' + url) log.info('fetching page ' + url)
try: request = urllib.request.Request(url)
request = urllib.request.Request(url) request.add_header('User-Agent', USER_AGENT)
request.add_header('User-Agent', USER_AGENT) response = urllib.request.urlopen(request)
response = urllib.request.urlopen(request) html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ if html_text[0] == 0x1f and html_text[1] == 0x8b:
if html_text[0] == 0x1f and html_text[1] == 0x8b: import zlib
import zlib try:
try: gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16)
gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16) except:
except: pass
pass else:
else: html_text = gzip_data
html_text = gzip_data response.close()
response.close() return html_text, response.headers
return 0, html_text, response.headers
except Exception as e:
log.warn('failed: %s' % e)
return 1, str(e), 'dummy'
def extract_title(url): def extract_title(url):
@@ -150,19 +147,18 @@ def extract_title(url):
if 'repo/urlbot-native.git' in url: if 'repo/urlbot-native.git' in url:
log.info('repo URL found: ' + url) log.info('repo URL found: ' + url)
return 3, 'wee, that looks like my home repo!' return 'wee, that looks like my home repo!', []
log.info('extracting title from ' + url) log.info('extracting title from ' + url)
(code, html_text, headers) = fetch_page(url) try:
(html_text, headers) = fetch_page(url)
except URLError as e:
return None
except Exception as e:
return 'failed: %s for %s' % (str(e), url)
if 1 == code: charset = None
return 3, 'failed: %s for %s' % (html_text, url)
if not html_text:
return -1, 'error'
charset = ''
if 'content-type' in headers: if 'content-type' in headers:
log.debug('content-type: ' + headers['content-type']) log.debug('content-type: ' + headers['content-type'])
@@ -174,7 +170,7 @@ def extract_title(url):
r'\g<charset>', headers['content-type'], re.IGNORECASE r'\g<charset>', headers['content-type'], re.IGNORECASE
) )
if '' != charset: if charset:
try: try:
html_text = html_text.decode(charset) html_text = html_text.decode(charset)
except LookupError: except LookupError:
@@ -193,6 +189,6 @@ def extract_title(url):
except UnicodeDecodeError as e: # idk why this can happen, but it does except UnicodeDecodeError as e: # idk why this can happen, but it does
log.warn('parser.unescape() expoded here: ' + str(e)) log.warn('parser.unescape() expoded here: ' + str(e))
expanded_html = match expanded_html = match
return 0, expanded_html return expanded_html
else: else:
return 2, 'no title' return None

View File

@@ -1055,6 +1055,7 @@ def flausch(argv, **args):
'msg': '{}: *flausch*'.format(argv[1]) 'msg': '{}: *flausch*'.format(argv[1])
} }
@pluginfunction('resolve-url-title', 'extract titles from urls', ptypes_PARSE, ratelimit_class=RATE_URL) @pluginfunction('resolve-url-title', 'extract titles from urls', ptypes_PARSE, ratelimit_class=RATE_URL)
def resolve_url_title(**args): def resolve_url_title(**args):
user = args['reply_user'] user = args['reply_user']
@@ -1086,37 +1087,17 @@ def resolve_url_title(**args):
# b'a.a.' # b'a.a.'
try: try:
(status, title) = extract_title(url) title = extract_title(url)
except UnicodeError as e: except UnicodeError as e:
(status, title) = (4, str(e)) message = 'Bug triggered (%s), invalid URL/domain part: %s' % (str(e), url)
log.warn(message)
return {'msg': message}
if 0 == status: if title:
title = title.strip() title = title.strip()
message = 'Title: %s' % title message = 'Title: %s' % title
elif 1 == status: message = message.replace('\n', '\\n')
if config.conf_get('image_preview'): out.append(message)
# of course it's fake, but it looks interesting at least
char = r""",._-+=\|/*`~"'"""
message = 'No text but %s, 1-bit ASCII art preview: [%c]' % (
title, random.choice(char)
)
else:
log.info('no message sent for non-text %s (%s)' % (url, title))
continue
elif 2 == status:
message = '(No title)'
elif 3 == status:
message = title
elif 4 == status:
message = 'Bug triggered (%s), invalid URL/domain part: %s' % (title, url)
log.warn(message)
else:
message = 'some error occurred when fetching %s' % url
message = message.replace('\n', '\\n')
log.info('adding to out buf: ' + message)
out.append(message)
return { return {
'msg': out 'msg': out