python异步协程,抓取豆瓣top250电影海报。

python异步协程,抓取豆瓣top250电影海报。,第1张

python异步协程,抓取豆瓣top250电影海报。
# 利用正则及异步协程 爬取豆瓣top250电影图片
# author:xyislove
# 欢迎各位大神修改指导
from asyncio import tasks
import re
from fake_useragent import UserAgent
import os
import asyncio
import aiofiles
import aiohttp

############分页获取图片链接#############################################
async def get_pic(url,headers,pic_url,name):
    async with aiohttp.ClientSession() as session:
        async with session.get(url,headers=headers) as resp:
            pagecontent=await resp.text()
            obj=re.compile(r'
  • .*?src="(?P.*?)" class=',re.S) result=obj.finditer(pagecontent) for i in result: print(i.group('link')) pic_url.append(i.group('link')) print(f'{name}完成!') async def aio_download(): tasks=[] pic_url=[]#创建图片链接库 j=1 for i in range(0,250,25): print(i) ua=UserAgent() headers={'User-Agent':ua.random} url=f'https://movie.douban.com/top250?start={i}&filter=' name=f'第{j}页' tasks.append(asyncio.create_task(get_pic(url,headers,pic_url,name))) j=j+1 await asyncio.wait(tasks) return pic_url ##############分页获取图片链接##结束####################################################### ##############异步协程下载图片######################## async def pic_download(i,session,headers,name): async with session.get(i,headers=headers) as resp: async with aiofiles.open(f'./pic/{name}.jpg',mode='wb') as f: await f.write(await resp.content.read()) resp.close() f.close() async def aio_download2(pic): tasks=[] ua=UserAgent() headers={'User-Agent':ua.random} j=1 async with aiohttp.ClientSession() as session: for i in pic: i=i.strip() name=j task=asyncio.create_task(pic_download(i,session,headers,name)) tasks.append(task) j=j+1 await asyncio.wait(tasks) ##############结束############################# # 主函数#### def main(): # 获取图片链接 保存链接到txt文件 loop=asyncio.get_event_loop() pic_url=loop.run_until_complete(aio_download()) with open('pic-url.txt',mode='a',encoding='utf-8') as f: for i in pic_url: i=i.strip() f.write(f'{i}n') f.close() #####下载图片到文件夹####################################### with open('pic-url.txt',mode='r',encoding='utf-8') as f: pic=[] for line in f: pic.append(line) f.close() loop=asyncio.get_event_loop() loop.run_until_complete(aio_download2(pic)) if __name__=='__main__': main()
  • 欢迎分享,转载请注明来源:内存溢出

    原文地址: https://outofmemory.cn/zaji/5680515.html

    (0)
    打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
    上一篇 2022-12-17
    下一篇 2022-12-17

    发表评论

    登录后才能评论

    评论列表(0条)

    保存