handle some pages sending gzip without the accept header
This commit is contained in:
@@ -159,6 +159,14 @@ def fetch_page(url):
|
|||||||
request.add_header('User-Agent', USER_AGENT)
|
request.add_header('User-Agent', USER_AGENT)
|
||||||
response = urllib.request.urlopen(request)
|
response = urllib.request.urlopen(request)
|
||||||
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
|
html_text = response.read(BUFSIZ) # ignore more than BUFSIZ
|
||||||
|
if html_text[0] == 0x1f and html_text[1] == 0x8b:
|
||||||
|
import zlib
|
||||||
|
try:
|
||||||
|
gzip_data = zlib.decompress(html_text, zlib.MAX_WBITS | 16)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
html_text = gzip_data
|
||||||
response.close()
|
response.close()
|
||||||
return 0, html_text, response.headers
|
return 0, html_text, response.headers
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user