cleanup

2015-07-19 20:47:57 +02:00
parent 52fb4d11d3
commit d1bbf13d6e
1 changed files with 31 additions and 31 deletions
--- a/urlbot.py
+++ b/urlbot.py
@@ -58,45 +58,45 @@ def extract_title(url):
 	if 1 == code:
 		return (3, 'failed: %s for %s' % (html_text, url))
-	if html_text:
+	if not html_text:
-		charset = ''
+		return (-1, 'error')
 		if 'content-type' in headers:
 			log.debug('content-type: ' + headers['content-type'])
-			if 'text/' != headers['content-type'][:len('text/')]:
+	charset = ''
-				return (1, headers['content-type'])
+	if 'content-type' in headers:
 		log.debug('content-type: ' + headers['content-type'])
-			charset = re.sub(
+		if 'text/' != headers['content-type'][:len('text/')]:
-				'.*charset=(?P<charset>\S+).*',
+			return (1, headers['content-type'])
 				'\g<charset>', headers['content-type'], re.IGNORECASE
 			)
-		if '' != charset:
+		charset = re.sub(
-			try:
+			'.*charset=(?P<charset>\S+).*',
-				html_text = html_text.decode(charset)
+			'\g<charset>', headers['content-type'], re.IGNORECASE
-			except LookupError:
+		)
 				log.warn("invalid charset in '%s': '%s'" % (headers['content-type'], charset))
-		if str != type(html_text):
+	if '' != charset:
-			html_text = str(html_text)
+		try:
 			html_text = html_text.decode(charset)
 		except LookupError:
 			log.warn("invalid charset in '%s': '%s'" % (headers['content-type'], charset))
-		result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html_text, re.S | re.M | re.IGNORECASE)
+	if str != type(html_text):
-		if result:
+		html_text = str(html_text)
 			match = result.groups()[0]
-			if None == parser:
+	result = re.match(r'.*?<title.*?>(.*?)</title>.*?', html_text, re.S | re.M | re.IGNORECASE)
-				parser = html.parser.HTMLParser()
+	if result:
 		match = result.groups()[0]
-			try:
+		if None == parser:
-				expanded_html = parser.unescape(match)
+			parser = html.parser.HTMLParser()
 			except UnicodeDecodeError as e:  # idk why this can happen, but it does
 				log.warn('parser.unescape() expoded here: ' + str(e))
 				expanded_html = match
 			return (0, expanded_html)
 		else:
 			return (2, 'no title')
-	return (-1, 'error')
+		try:
 			expanded_html = parser.unescape(match)
 		except UnicodeDecodeError as e:  # idk why this can happen, but it does
 			log.warn('parser.unescape() expoded here: ' + str(e))
 			expanded_html = match
 		return (0, expanded_html)
 	else:
 		return (2, 'no title')
 def send_reply(message, msg_obj=None):
 	set_conf('request_counter', conf('request_counter') + 1)