python学习之路day5_python

概述bs4与requests结合爬取代理目录扫描(挂代理池)defscan():header={'Host':'www.xxx.com','User-Agent':'Mozilla/5.0(WindowsNT10.0;Win64;x64;rv:79.0)Gecko/20100101Firefox/79.0','Accep bs4与requests结合爬取代理目录扫描 (挂代理池)

def scan():    header = {        'Host': 'www.xxx.com',        'User-Agent': 'Mozilla/5.0 (windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 firefox/79.0',        'Accept': 'text/HTML,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',        'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',        'Accept-EnCoding': 'gzip, deflate',        'Content-Type': 'application/x-www-form-urlencoded'    }    i_List = {}    for dirs in open('zIDian.txt'):        urls = 'http://www.baIDu.com'        url_dir = urls + dirs        url_dir.replace('\n','')        for ips in open('ip.txt'):            ips = ips.replace('\n','')            i_List['http'] = ips            try:                code = requests.get(url_dir,header=header,proxIEs=i_List,verify=False).status_code                if code == 200 or code == 403:                    print(url_dir)            except requests.exceptions.ConnectionError:                print('ConnectionError -- 等待一分钟')                time.sleep(1)            except requests.exceptions.ChunkedEnCodingError:                print('ConnectionError -- 等待一分钟')                time.sleep(1)            except:                print('UnboundLocalError -- 等待一分钟')                time.sleep(1)def getip():    for x in range(i,int(i)+1):        url = 'https://www.kuaIDaili.com/free/inha/'+str(x)        rep = requests.get(url).content        time.sleep(1)        soup = BeautifulSoup(rep,"lxml")        ip = soup.select('td[data-title="IP"]')        port = soup.select('td[data-title="PORT"]')        for ips,ports in zip(ip,port):            file = open('ip.txt','a+')            file.write(ips.string+":"+ports.string+'\n')            file.close()if __name__ == '__main__':    i = input("需要多少页ip:")    getip()    scan()

总结

以上是内存溢出为你收集整理的python学习之路day5全部内容，希望文章能够帮你解决python学习之路day5所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错，欢迎将内存溢出网站推荐给程序员好友。

欢迎分享，转载请注明来源：内存溢出

原文地址: https://outofmemory.cn/langs/1189391.html