# 爬取微信阅读top总榜 import requests import re import csv url = "https://weread.qq.com/web/category/all" res = requests.get(url) res.encoding = 'utf-8' # print(res.text) obj = re.compile(r'(?P
' r'.*?d+) (?P
' r'.*?) <.*?>(?P.*?).*?
(' r'?P
', re.S) # obj = re.compile(r'.*?) (?P
', re.S) result = obj.finditer(res.text) books = [] for item in result: dic = item.groupdict() dic['作者'] = dic.pop('author') dic['排名'] = dic.pop('index') dic['书名'] = dic.pop('name') dic['描述'] = dic.pop('description') books.append(dic) print(dic) header = ["排名", "作者", "书名", "描述"] # newline是数据之间不加空行 with open('weixingRead.csv', 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=header) # 提前预览列名 writer.writeheader() writer.writerows(books).*?)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)