url = "https://wallhaven.cc/toplist?page=1"
整套代码如下,仅供参考
import time
import aiohttp
import asyncio
import requests
import aiofiles
from lxml import etree
async def aiodownload(li):
# 发送下载请求
# 得到图片内容
# 保存到文件
# resp.content.read() ==> requests.content()
async with aiohttp.ClientSession() as session:
sub_url = "".join(li.xpath('./figure/a/@href'))
sub_resp = requests.get(sub_url, headers=headers) # get请求子网页
if len(sub_resp.text) < 1000: # 粗糙的429处理办法 :(
await asyncio.sleep(3)
sub_html = etree.HTML(sub_resp.text)
img_url = "".join(sub_html.xpath('/html/body/main/section/div[1]/img/@src')) # 图片的下载链接
async with session.get(img_url, headers=headers) as img_resp:
img_name = img_url.split('/')[-1]
async with aiofiles.open("img/" + img_name, mode="wb") as f:
await f.write(await img_resp.content.read())
print("下载完成", img_name)
await asyncio.sleep(0)
async def main():
url = "https://wallhaven.cc/toplist?page=1"
resp = requests.get(url, headers=headers)
html = etree.HTML(resp.text)
# 拿到图片组
lis = html.xpath('/html/body/main/div[1]/section[1]/ul/li')
tasks = []
for li in lis:
tasks.append(asyncio.create_task(aiodownload(li))) # 将转换后的协程对象添加进入任务列表
await asyncio.wait(tasks)
if __name__ == '__main__':
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.44"
}
t1 = time.time()
print("开始下载...")
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
print("下载完成...")
t2 = time.time()
print('运行时间:', t2 - t1)
# requests.get() --> 异步 *** 作
# 实战:wallhaven壁纸网站
import time
import aiohttp
import asyncio
import requests
import aiofiles
from lxml import etree
# 异步协程优化版,跳过获取子页面,通过拼接获取图片下载地址,从单线程100秒,到异步协程只需要20秒,500%的提升
async def aiodownload(li):
# 发送下载请求
# 得到图片内容
# 保存到文件
# resp.content.read() ==> requests.content()
async with aiohttp.ClientSession() as session:
sub_url = "".join(li.xpath('./figure/a/@href'))
url_1 = sub_url.split('/')[-1]
url_2 = url_1[:2]
img_url = 'https://w.wallhaven.cc/full/' + url_2 + '/wallhaven-' + url_1 + '.jpg' # 图片的下载链接
async with session.get(img_url, headers=headers) as img_resp:
if not img_resp.ok:
img_url = 'https://w.wallhaven.cc/full/' + url_2 + '/wallhaven-' + url_1 + '.png' # 图片的下载链接
async with session.get(img_url, headers=headers) as img_resp:
img_name = img_url.split('/')[-1]
async with aiofiles.open("img/" + img_name, mode="wb") as f:
await f.write(await img_resp.content.read())
print("下载完成", img_name)
else:
img_name = img_url.split('/')[-1]
async with aiofiles.open("img/" + img_name, mode="wb") as f:
await f.write(await img_resp.content.read())
print("下载完成", img_name)
await asyncio.sleep(0)
async def main(page):
tasks = []
url = f"https://wallhaven.cc/toplist?page={page}"
resp = requests.get(url, headers=headers)
html = etree.HTML(resp.text)
# 拿到图片组
lis = html.xpath('/html/body/main/div[1]/section[1]/ul/li')
for li in lis:
tasks.append(asyncio.create_task(aiodownload(li))) # 将转换后的协程对象添加进入任务列表
await asyncio.wait(tasks)
if __name__ == '__main__':
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.44"
}
t1 = time.time()
page = int(input("请输入要下载的页数,从第一页开始:"))
for i in range(1, page+1):
print(f"开始下载第{i}页")
loop = asyncio.get_event_loop()
loop.run_until_complete(main(page))
print("下载完成...")
t2 = time.time()
print('运行时间:', t2 - t1)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)