import requests from bs4 import BeautifulSoup import csv import time from tqdm import tqdm bt_list = [] all_list = [] jg_list = [] wz_list = [] mj_list = [] url_list=[] head=["标题","地区","价格","面积","详情链接"] def run(i): url=f"https://beijing.qfang.com/newhouse/newhouseIndexList?pageIndex={i}" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36' } page=requests.get(url=url,headers=headers).text soup=BeautifulSoup(page,'lxml') # bt=soup.find('div', class_='title fl').text bt=soup.select(".clearfix>div>a") gj=soup.select(".con") url_l=soup.select(".link-box") for i in url_l: url_list.append(f"https://beijing.qfang.com{i['href']}") for i in bt: if i.string!=None: bt_list.append(i.string) for i in gj: if i.string!=None: all_list.append(i.string) for i in range(0,len(all_list),3): jg_list.append(all_list[i]) for i in range(1,len(all_list),3): wz_list.append(all_list[i]) for i in range(2,len(all_list),3): mj_list.append(all_list[i]) if __name__=="__main__": for i in tqdm(range(1,21)): time.sleep(0.5) run(i) with open("房源信息.csv", 'w', newline='') as f: writer = csv.writer(f) writer.writerow(head) for i in range(len(bt_list)): ls=[] ls.append(bt_list[i]) ls.append(wz_list[i]) ls.append(jg_list[i]) ls.append(mj_list[i]) ls.append(url_list[i]) writer.writerow(ls)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)