在谷歌浏览器运行需要chromedriver.exe文件放在同级目录下
import reimport urllibimport requestsimport osfrom bs4 import BeautifulSoupimport timefrom selenium import webdriver ##驱动浏览器from selenium.webdriver.support.wait import webdriverwaitdef ID(path): browser = webdriver.Chrome() ##调用Chrome的驱动,生产一个浏览器对象 wait = webdriverwait(browser, 10) ##设置selenium等待浏览器加载完毕的最大时间 try: url = "https://movIE.douban.com/" browser.get(url) douban = browser.find_element_by_ID("inp-query") douban.send_keys(path) douban = browser.find_element_by_CSS_selector(".nav-search .inp-btn input") douban.click() browser.find_element_by_class_name("cover").click() url = browser.current_url ID = url.split("/")[-2] browser.close() return ID except: passdef getContent(q,w): headers={ "User-Agent":"Mozilla/5.0 (windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36" , 'Connection': 'keep-alive' } url = "https://movIE.douban.com/celebrity/%s/photos/?type=C&start=%s&sortby=like&size=a&subtype=a"%(q,w) ##请求对象(url+请求头) r = requests.get(url,headers = headers) ##获取页面内容 soup = BeautifulSoup(r.text,"HTML.parser") return soupdef getItem(soup): try: movIEList = soup.find("ul",attrs={"class": "poster-col3 clearfix"}) List = movIEList.find_all("img") num = re.findall(r'src="(.+?)"', str(List)) return num except: os.removedirs(path) print("您输入的不是明星")def downloadfile(url,dir,index): ext = url.split(".")[-1] path = dir + "/" + str(index)+"."+ext urllib.request.urlretrIEve(url,path)def creadDir(path): if os.path.isdir(path)==False: os.mkdir(path)def ye(q): try: for w in range(0,10000,30): soup = getContent(q, w) List = getItem(soup) n = 1 for m in range(0, len(List)): url = List[m] downloadfile(url, path, n) n += 1 return "完成" except: passpath = input("请输入需要查找的明星")q = ID(path)creadDir(path)ye(q)
总结
以上是内存溢出为你收集整理的python爬虫(爬美女图片)全部内容,希望文章能够帮你解决python爬虫(爬美女图片)所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)