我会使用
FormRequest.from_response()哪个为您完成所有工作,因为您仍然可能会错过一些字段:
from scrapy.spider import baseSpiderfrom scrapy.selector import HtmlXPathSelectorfrom scrapy.http import FormRequest, Requestfrom robots_immo.items import AnnonceItemclass ElyseAvenueSpider(baseSpider): name = "elyse_avenue" allowed_domains = ["elyseavenue.com"] # i fixed this start_urls = ["http://www.elyseavenue.com/"] # i added this def parse(self, response): yield FormRequest.from_response(response, formname='moteurRecherche', formdata={'recherche_distance_km_0':'20', 'recherche_type_logement':'9'}, callback=self.parseAnnonces) def parseAnnonces(self, response): hxs = HtmlXPathSelector(response) annonces = hxs.select('//div[@id="contenuCentre"]/div[@]') items = [] for annonce in annonces: item = AnnonceItem() item['nom'] = annonce.select('span[contains(@class,"nomBienImmo")]/a/text()').extract() item['superficie'] = annonce.select('table//tr[2]/td[2]/span/text()').extract() item['prix'] = annonce.select('span[@]/span[1]/text()').extract() items.append(item) return items
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)