import urllib from bs4 import BeautifulSoup from io import BytesIO import urllib.request from urllib.request import urlopen import zlib import gzip import re from gzip import GzipFile def loadData(url): request = urllib.request.Request(url) request.add_header('Accept-encoding', 'gzip,deflate') response = urlopen(request) content = response.read() encoding = response.info().get('Content-Encoding') if encoding == 'gzip': content = gzip1(content) elif encoding == 'deflate': content = deflate(content) elif encoding == 'br': content == br(content) return content def gzip1(data): buf = BytesIO(data) f = gzip.GzipFile(fileobj=buf) return f.read() def deflate(data): try: return zlib.decompress(data, -zlib.MAX_WBITS) except zlib.error: return zlib.decompress(data) import brotli def br(data): data = brotli.decompress(data) def main(): url = "https://www.jjwxc.net/topten.php?orderstr=7&t=0" content = loadData(url) html = content.decode("gb18030") print(html) if __name__ == '__main__': main()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)