```python
from threading import Thread
from threading import Thread
from fake_useragent import UserAgent
import requests
from time import sleep
from queue import Queue
def spider():
while not url_queue.empty(): #调用Queue中的empty方法,回传回True 或 False
url = url_queue.get() #调用get方法,得到队列中的目标网站
headers = {'User-Agent' : UserAgent().chrome}
resp = requests.get(url,headers = headers)
#处理响应结果
for d in resp.json().get('data'):
print(f'tid:{d.get("tid")} topic:{d.get("topicName")} content:{d.get("content")}')
sleep(3)
if __name__ == '__main__':
#将目标网站扔进队列
#目标网站
url_queue = Queue()
for i in range (1,11):
url = f'https://www.hupu.com/home/v1/news?pageNo={i}&pageSize=50'
url_queue.put(url)
for i in range(2): #开启两个线程
t1 = Thread(target = spider)
t1.start() #开启线程
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)