# 利用正则及异步协程 爬取豆瓣top250电影图片
# author:xyislove
# 欢迎各位大神修改指导
from asyncio import tasks
import re
from fake_useragent import UserAgent
import os
import asyncio
import aiofiles
import aiohttp
############分页获取图片链接#############################################
async def get_pic(url,headers,pic_url,name):
async with aiohttp.ClientSession() as session:
async with session.get(url,headers=headers) as resp:
pagecontent=await resp.text()
obj=re.compile(r'
.*?src="(?P.*?)" class=',re.S)
result=obj.finditer(pagecontent)
for i in result:
print(i.group('link'))
pic_url.append(i.group('link'))
print(f'{name}完成!')
async def aio_download():
tasks=[]
pic_url=[]#创建图片链接库
j=1
for i in range(0,250,25):
print(i)
ua=UserAgent()
headers={'User-Agent':ua.random}
url=f'https://movie.douban.com/top250?start={i}&filter='
name=f'第{j}页'
tasks.append(asyncio.create_task(get_pic(url,headers,pic_url,name)))
j=j+1
await asyncio.wait(tasks)
return pic_url
##############分页获取图片链接##结束#######################################################
##############异步协程下载图片########################
async def pic_download(i,session,headers,name):
async with session.get(i,headers=headers) as resp:
async with aiofiles.open(f'./pic/{name}.jpg',mode='wb') as f:
await f.write(await resp.content.read())
resp.close()
f.close()
async def aio_download2(pic):
tasks=[]
ua=UserAgent()
headers={'User-Agent':ua.random}
j=1
async with aiohttp.ClientSession() as session:
for i in pic:
i=i.strip()
name=j
task=asyncio.create_task(pic_download(i,session,headers,name))
tasks.append(task)
j=j+1
await asyncio.wait(tasks)
##############结束#############################
# 主函数####
def main():
# 获取图片链接 保存链接到txt文件
loop=asyncio.get_event_loop()
pic_url=loop.run_until_complete(aio_download())
with open('pic-url.txt',mode='a',encoding='utf-8') as f:
for i in pic_url:
i=i.strip()
f.write(f'{i}n')
f.close()
#####下载图片到文件夹#######################################
with open('pic-url.txt',mode='r',encoding='utf-8') as f:
pic=[]
for line in f:
pic.append(line)
f.close()
loop=asyncio.get_event_loop()
loop.run_until_complete(aio_download2(pic))
if __name__=='__main__':
main()
评论列表(0条)