- 下方代码中的get_proxies方法,根据自身情况进行开发
# -*- coding: utf-8 -*-
import hashlib
import random
import requests
from datetime import datetime
class BaseSpider:
def __init__(self):
self.sess = requests.Session()
def make_rand_name(self, rand_times=4) -> str:
arr = [random.randint(1, 99) for _ in range(rand_times)]
_name = "{}" * rand_times
name = _name.format(*arr)
return name
def get_curr_time(self) -> str:
curr_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
return curr_time
def make_md5(self, s: str) -> str:
value = hashlib.md5(s.encode()).hexdigest()
return value
def make_ua(self) -> str:
a = random.randint(55, 100)
c = random.randint(0, 3200)
d = random.randint(0, 140)
os_type = [
'(Windows NT 6.1; WOW64)', '(Windows NT 10.0; WOW64)', '(X11; Linux x86_64)',
'(Macintosh; Intel Mac OS X 10_12_6)'
]
chrome_version = 'Chrome/{}.0.{}.{}'.format(a, c, d)
ua = ' '.join(
['Mozilla/5.0', random.choice(os_type), 'AppleWebKit/537.36',
'(KHTML, like Gecko)', chrome_version, 'Safari/537.36']
)
return ua
def make_head(self) -> dict:
headers = {
"User-Agent": self.make_ua()
}
return headers
def get_proxies(self) -> dict:
return {}
def get_curr_ip(self) -> str:
url = "https://httpbin.org/ip"
resp = self.parse(url)
ip = resp.json()["origin"]
return ip
def parse(self, url, timeout=10, retry=2, hope_code=200):
headers = self.make_head()
proxies = self.get_proxies()
for _ in range(retry + 1):
try:
resp = self.sess.get(url, headers=headers, proxies=proxies, timeout=timeout)
except Exception as e:
print("ERROR {}" .format(e))
else:
if resp.status_code == hope_code:
return resp
else:
print("WARNING {}" .format(resp.status_code))
else:
return None
def download_html(self, fp: str, url, encoding=None):
resp = self.parse(url)
with open(fp, "w", encoding=encoding or "utf8") as f:
f.write(resp.text)
def download_img(self, fp: str, url):
resp = self.parse(url)
with open(fp, "wb") as f:
f.write(resp.content)
def save_file(self, fp: str, data: str):
with open(fp, "w") as f:
f.write(data)
if __name__ == '__main__':
bs = BaseSpider()
print(bs.get_curr_ip())
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)