selenium基本用法
import time from selenium import webdriver class Tjgb: def __init__(self,url,if_headless=False): self.chrome_driver_path = r'D:Python3.6scriptschromedriver.exe' self.binary_location = r'C:Program Files (x86)GoogleChromeApplicationchrome.exe' self.url = url self.ua_pool = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36' ] self.if_headless = if_headless self.driver = self.base_driver() def opt_config(self): user_agent = self.ua_pool[0] opt = webdriver.ChromeOptions() if self.if_headless: opt.add_argument('--headless') opt.add_argument('-disable-gpu') opt.add_argument('--no-sandbox') opt.add_experimental_option('excludeSwitches',['enable-automation']) opt.add_experimental_option('useAutomationExtension',False) opt.add_argument('--disable-blink-features=AutomationControlled') opt.add_argument(f'user-agent={user_agent}') opt.binary_location = self.binary_location return opt def base_driver(self): driver = webdriver.Chrome(options=self.opt_config(),executable_path=self.chrome_driver_path) driver.execute_cdp_cmd("Page.addscriptToevaluateOnNewdocument",{ "source":""" Object.defineProperty(navigator,'webdriver',{ get:()=>undefined})""" }) return driver def load_webpage(self): self.driver.get(self.url) time.sleep(15) self.driver.implicitly_wait(10) # li_list = self.driver.find_elements_by_xpath('//div[@]/ul/li') # li_list = self.driver.find_elements_by_xpath('//div[@id="375746"]/div/div/ul/li') # tag = self.driver.find_element_by_tag_name('iframe') # print(tag) self.driver.switch_to.frame('ml') li_list = self.driver.find_elements_by_xpath('//ul[@id="fanye"]/li') print(li_list) # li_list = self.driver.find_elements_by_xpath('//ul[@id="fanye"]/li') # print(li_list) for li in li_list: title = li.find_element_by_xpath('./a').get_attribute('title') # pub_date = li.find_element_by_xpath('./span[@]').text pub_date = li.find_element_by_xpath('./span').get_attribute('textContent') print(title,pub_date) self.driver.switch_to.default_content() def close_driver(self): print(self.driver.window_handles) for handle in self.driver.window_handles: self.driver.switch_to_window(handle) self.driver.close() self.driver.quit() print('close all') if __name__ == '__main__': # url = 'http://tjj.hefei.gov.cn/tjyw/tjgb/index.html' # url = 'http://jxf.jiangxi.gov.cn/col/col41574/index.html' url = 'http://tjj.ezhou.gov.cn/zwgk/fdzdgknr/?itemid=2392' tjgb = Tjgb(url=url,if_headless=False) tjgb.load_webpage() tjgb.close_driver()
selenium对iframe *** 作
# id # 跳转到iframe driver.switch_to.frame("frame1") # name # 跳转到iframe driver.switch_to.frame("slider") # 元素定位 #跳转到iframe iframe_elem = driver.find_element_by_class_name('x-iframe').find_element_by_tag_name('iframe') driver.switch_to.frame(iframe_elem)
多个iframe嵌套
# 一层层跳进去 driver.switch_to.frame("frame1") driver.switch_to.frame("frame2") # 跳出iframe # 1.跳转到iframe后,返回到主窗体 drvier.switch_to.default_content() # 2.存在多个iframe,跳转到二级iframe后,返回上一级的iframe: drvier.switch_to.parent_frame()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)