import requests from lxml import etree class Fsssezj(): def __init__(self): self.header={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'} self.url='http://ewenyan.com/' #爬取数据 def get_data(self): response=requests.get(self.url,headers=self.header) #解决gb2312乱码 response.encoding='gbk' response=response.text return response #解析数据 def parse_data(self,response): html=etree.HTML(response) content_text=html.xpath('//p/text()') return content_text def write(self,content_text): Data='' for i in content_text: #data将lxml.etree._ElementUnicodeResul转化成字符 Data+=str(i) with open('shiji.txt','a',encoding='utf8') as f: f.write(Data) def run(self): response=self.get_data() content_text=self.parse_data(response) self.write(content_text) if __name__=='__main__': Fsssezj=Fsssezj() Fsssezj.run()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)