From 9c3f7dae0f8edeb6b5288bc5ae99547b4584c6e8 Mon Sep 17 00:00:00 2001 From: Thorsten Date: Sat, 19 Dec 2015 22:53:28 +0100 Subject: [PATCH] handle some pages sending gzip without the accept header --- common.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/common.py b/common.py index 20a4297..e3ac26d 100644 --- a/common.py +++ b/common.py @@ -159,6 +159,14 @@ def fetch_page(url): request.add_header('User-Agent', USER_AGENT) response = urllib.request.urlopen(request) html_text = response.read(BUFSIZ) # ignore more than BUFSIZ + if html_text[0] == 0x1f and html_text[1] == 0x8b: + import zlib + try: + gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16) + except: + pass + else: + html_text = gzip_data response.close() return 0, html_text, response.headers except Exception as e: