replace urllib with requests

This commit is contained in:
Thorsten
2015-12-31 15:31:34 +01:00
parent 35158a623d
commit 5c0846ea69
2 changed files with 27 additions and 22 deletions

View File

@@ -5,7 +5,7 @@ import json
import logging import logging
import re import re
import time import time
import urllib.request import requests
from collections import namedtuple from collections import namedtuple
from urllib.error import URLError from urllib.error import URLError
@@ -126,20 +126,23 @@ VERSION = get_version_git()
def fetch_page(url): def fetch_page(url):
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
log.info('fetching page ' + url) log.info('fetching page ' + url)
request = urllib.request.Request(url) # request = urllib.request.Request(url)
request.add_header('User-Agent', USER_AGENT) # request.add_header('User-Agent', USER_AGENT)
response = urllib.request.urlopen(request) # response = urllib.request.urlopen(request)
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ # html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
if html_text[0] == 0x1f and html_text[1] == 0x8b: # if html_text[0] == 0x1f and html_text[1] == 0x8b:
import zlib # import zlib
try: # try:
gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16) # gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16)
except: # except:
pass # pass
else: # else:
html_text = gzip_data # html_text = gzip_data
response.close() # response.close()
return html_text, response.headers response = requests.get(url, headers={'User-Agent': USER_AGENT}, stream=True)
content = response.raw.read(BUFSIZ, decode_content=True)
# return html_text, response.headers
return content, response.headers
def extract_title(url): def extract_title(url):
@@ -171,11 +174,11 @@ def extract_title(url):
r'\g<charset>', headers['content-type'], re.IGNORECASE r'\g<charset>', headers['content-type'], re.IGNORECASE
) )
if charset: # if charset:
try: # try:
html_text = html_text.decode(charset) # html_text = html_text.decode(charset)
except LookupError: # except LookupError:
log.warn("invalid charset in '%s': '%s'" % (headers['content-type'], charset)) # log.warn("invalid charset in '%s': '%s'" % (headers['content-type'], charset))
if str != type(html_text): if str != type(html_text):
html_text = str(html_text) html_text = str(html_text)
@@ -197,10 +200,10 @@ def extract_title(url):
def giphy(subject, api_key): def giphy(subject, api_key):
url = 'http://api.giphy.com/v1/gifs/random?tag={}&api_key={}&limit=1&offset=0'.format(subject, api_key) url = 'http://api.giphy.com/v1/gifs/random?tag={}&api_key={}&limit=1&offset=0'.format(subject, api_key)
response = urllib.request.urlopen(url) response = requests.get(url)
giphy_url = None giphy_url = None
try: try:
data = json.loads(response.read().decode('utf-8')) data = response.json()
giphy_url = data['data']['image_url'] giphy_url = data['data']['image_url']
except: except:
pass pass

View File

@@ -1,3 +1,5 @@
fasteners fasteners
sleekxmpp sleekxmpp
configobj configobj
lxml
requests