#CSDNimport requestsimport reimport timepayload = ""# 请求头headers = { "Accept": "*/*", "Accept-EnCoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", "cookie": "l=AurqcpuigwQdnQv7WvAfCoR1olrRQW7h; isg=BHp6mNB79CHqYXpVEiRteXyyyKNcg8YEwjgLqoRvCI3ddxqxbLtOFUBGwwOrZ3ad; thw=cn; cna=VsJQERAypn0CATrXFEIahcz8; t=0eed37629fe7ef5ec0b8ecb6cd3a3577; tracknick=tb830309_22; _cc_=UtASsssmfA%3D%3D; tg=0; ubn=p; ucn=unzbyun; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; miID=981798063989731689; hng=CN%7Czh-CN%7CCNY%7C156; um=0712F33290AB8A6D01951C8161A2DF2CDC7C5278664EE3E02F8F6195B27229B88A7470FD7B89F7FACD43AD3E795C914CC2A8BEB1FA88729A3A74257D8EE4FBBC; enc=1UeyOeN0l7Fkx0yPu7l6BuiPkT%2BdSxE0EqUM26jcSMdi1LtYaZbjQCMj5dKU3P0qfGwJn8QqYXc6oJugH%2FhFRA%3D%3D; ali_ab=58.215.20.66.1516409089271.6; mt=ci%3D-1_1; cookie2=104f8fc9c13eb24c296768a50cabdd6e; _tb_token_=ee7e1e1e7dbe7; v=0", "User-Agent": "Mozilla/5.0 (windows NT 10.0; Win64; x64;` rv:47.0) Gecko/20100101 firefox/47.0"}# 获得文章列表urlsdef getUrls(url): # 发送请求 resp = requests.request("GET", url, data=payload, headers=headers) #设置解码方式 resp.enCoding=resp.apparent_enCoding #这里会用设置的解码方式解码 HTML_source = resp.text # 正则表达式,取出网页中的url链接(一些寻找注入点的工具也是这么做出来的) urls = re.findall("https://[^>\";\']*\d",HTML_source) new_urls=[] for url in urls: if 'details' in url: if url not in new_urls: new_urls.append(url) return new_urlsurls = getUrls("主页地址")while True: for url in urls: requests.request("GET", url, data=payload, headers=headers) print(url, "Ok") time.sleep(3) time.sleep(3)
#博客园import requestsimport timeimport refrom lxml import etreeheaders = { 'referer':'https://i.cnblogs.com/posts', "User-Agent": "Mozilla/5.0 (windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36"}proxy = { 'http':'xxxx:xxx',#可以使用代理}def get_urls(url): ret = requests.get(url=url,headers = headers) ret.enCoding='utf-8' urls = re.findall('href="https://www.cnblogs.com/shiguanggege/p/(.*?).HTML"',ret.text) #正则条件根据博客地址修改 return urlsurls = []for url in [f'https://www.cnblogs.com/shiguanggege/default.HTML?page={i}' for i in range(1,10)]: #url根据自己博客地址修改 links = get_urls(url) for link in links: l = f'https://www.cnblogs.com/shiguanggege/p/{link}.HTML' #这个地址也是根据博客地址修改 if l not in urls: urls.append(l)while True: for i in urls: requests.get(url=i,proxIEs=proxy) print(i,'ok') time.sleep(1) time.sleep(3)
总结 以上是内存溢出为你收集整理的使用python刷访问量全部内容,希望文章能够帮你解决使用python刷访问量所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)