import asynciofrom requests_HTML import HTMLSessionurl = 'http://www.xiaohuar.com/hua/'session = HTMLSession( browser_args=[ '--no-sand','--disable-infobars' '--user-agent=Mozilla/5.0 (windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/75.0.3770.142 Safari/537.36' ],headless=False)res = session.request(url=url,method='GET')script = """ () => { return { wIDth: document.documentElement.clIEntWIDth,height: document.documentElement.clIEntHeight,deviceScaleFactor: window.devicePixelRatio,} } """try: res.HTML.render(keep_page = True) async def main(): await res.HTML.page.waitFor(1000) await res.HTML.page.setVIEwport({'wIDth': 1366,'height': 768}) url_List = await res.HTML.page.xpath('//div[@]/a') for url in url_List: url_link = await (await url.getProperty('href')).JsonValue() print(url_link) asyncio.get_event_loop().run_until_complete(main())except Exception as e: print(e)finally: session.close()总结
以上是内存溢出为你收集整理的request-html 简单爬虫全部内容,希望文章能够帮你解决request-html 简单爬虫所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)