From 5c0846ea69bdf963ab23defa97ea92a3ca5dfeda Mon Sep 17 00:00:00 2001
From: Thorsten <mail@aero2k.de>
Date: Thu, 31 Dec 2015 15:31:34 +0100
Subject: [PATCH] replace urllib with requests

---
 common.py        | 47 +++++++++++++++++++++++++----------------------
 requirements.txt |  2 ++
 2 files changed, 27 insertions(+), 22 deletions(-)
diff --git a/common.py b/common.py
index ecb3464..6523b28 100644
--- a/common.py
+++ b/common.py
@@ -5,7 +5,7 @@ import json
 import logging
 import re
 import time
-import urllib.request
+import requests
 from collections import namedtuple
 from urllib.error import URLError
 
@@ -126,20 +126,23 @@ VERSION = get_version_git()
 def fetch_page(url):
     log = logging.getLogger(__name__)
     log.info('fetching page ' + url)
-    request = urllib.request.Request(url)
-    request.add_header('User-Agent', USER_AGENT)
-    response = urllib.request.urlopen(request)
-    html_text = response.read(BUFSIZ)  # ignore more than BUFSIZ
-    if html_text[0] == 0x1f and html_text[1] == 0x8b:
-        import zlib
-        try:
-            gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16)
-        except:
-            pass
-        else:
-            html_text = gzip_data
-    response.close()
-    return html_text, response.headers
+    # request = urllib.request.Request(url)
+    # request.add_header('User-Agent', USER_AGENT)
+    # response = urllib.request.urlopen(request)
+    # html_text = response.read(BUFSIZ)  # ignore more than BUFSIZ
+    # if html_text[0] == 0x1f and html_text[1] == 0x8b:
+    #     import zlib
+    #     try:
+    #         gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16)
+    #     except:
+    #         pass
+    #     else:
+    #         html_text = gzip_data
+    # response.close()
+    response = requests.get(url, headers={'User-Agent': USER_AGENT}, stream=True)
+    content = response.raw.read(BUFSIZ, decode_content=True)
+    # return html_text, response.headers
+    return content, response.headers
 
 
 def extract_title(url):
@@ -171,11 +174,11 @@ def extract_title(url):
             r'\g<charset>', headers['content-type'], re.IGNORECASE
         )
 
-    if charset:
-        try:
-            html_text = html_text.decode(charset)
-        except LookupError:
-            log.warn("invalid charset in '%s': '%s'" % (headers['content-type'], charset))
+    # if charset:
+    #     try:
+    #         html_text = html_text.decode(charset)
+    #     except LookupError:
+    #         log.warn("invalid charset in '%s': '%s'" % (headers['content-type'], charset))
 
     if str != type(html_text):
         html_text = str(html_text)
@@ -197,10 +200,10 @@ def extract_title(url):
 
 def giphy(subject, api_key):
     url = 'http://api.giphy.com/v1/gifs/random?tag={}&api_key={}&limit=1&offset=0'.format(subject, api_key)
-    response = urllib.request.urlopen(url)
+    response = requests.get(url)
     giphy_url = None
     try:
-        data = json.loads(response.read().decode('utf-8'))
+        data = response.json()
         giphy_url = data['data']['image_url']
     except:
         pass
diff --git a/requirements.txt b/requirements.txt
index bb8a682..2845491 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
 fasteners
 sleekxmpp
 configobj
+lxml
+requests