初步分析 网站动态显示
故采取selenium爬取
第一步安装Googledrive,phantomjs,pip install selenium。
1.xpath查找“切换”节点
element01=browser.find_element(by=By.XPATH, value="//h1//span[2]")
2.模拟点击
3.xpath寻找文本框节点
element02 = browser.find_element(By.ID, 'cityInput')
4.文本框输入信息
element03=browser.find_element(By.ID,"xxxxxxxxx") #分析html 单纯输入城市名称 不会直接返回到文本框 分析发现每个城市有着对应9位数代码 直接在文本框内输入城市代码即可
5.模拟点击
6.xpth寻找所需数据即可
# -*- coding =utf-8 -*-
# @time : 2022.5.7 17:30
# @software: PyCharm
# Author:Xiao_yu
import selenium
import smtplib
from selenium import webdriver
import time
from email.mime.text import MIMEText
from email.header import Header
from email.utils import formataddr
from selenium.webdriver.common.by import By
# from selenium.webdriver.chrome.options import Options
# chrome_options = Options()
# chrome_options.add_argument('--headless')
# chrome_options.add_argument('--disable-gpu')
# chrome_options.add_experimental_option('excludeSwitches',['enable-automation'])
# browser = webdriver.Chrome(chrome_options = chrome_options)
my_sender=input("输入你的QQ邮箱账号:")
my_pass=input("输入你自己QQ邮箱开启smtp后生成的一串代码:")
my_user=input("接受者的邮箱账号:")
wait_time=180
url='http://www.weather.com.cn/life/'
# browser=webdriver.PhantomJS('C:/Users/wangningmei/AppData/Local/Programs/Python/Python39/phantomjs.exe')
browser=webdriver.Chrome()
browser.maximize_window()
browser.get(url)
time.sleep(5)
browser.implicitly_wait(10)
element01=browser.find_element(by=By.XPATH, value="//h1//span[2]")
browser.execute_script("arguments[0].click();", element01)
element02 = browser.find_element(By.ID, 'cityInput')
element02.send_keys('xxxxxxxxx')#城市代码
element03=browser.find_element(By.ID,"xxxxxxxxx")#城市代码
browser.execute_script("arguments[0].click();", element03)
dataray=[]
dataclothes=[]
time.sleep(5)
data01=browser.find_elements(by=By.XPATH, value="//div[@class='second']//a//dl[1]")
for i in data01:
dataray.append(i.text)
data02=browser.find_elements(by=By.XPATH, value="//div[@class='you']//div[contains(@style, 'display: block;')]")
for i in data02:
dataclothes.append(i.text)
# print(dataray)
# print(dataclothes[1])
msg = MIMEText("life:"+'\n'+dataray[0]+'\n'+dataray[1]+'\n'+dataray[2]+'\n'+"clothes:"+'\n'+dataclothes[1],'plain','utf-8')
msg['From']=formataddr(["your name",my_sender])
# msg["To"] = Header(",".join(my_user))
msg['Subject']='简洁的标题'
server=smtplib.SMTP_SSL("smtp.qq.com",465)
server.login(my_sender,my_pass)
server.sendmail(my_sender,my_user,msg.as_string())
server.quit()
print('over')
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)