selenium基本用法_随笔

selenium基本用法

import time

from selenium import webdriver

class Tjgb:
    def __init__(self,url,if_headless=False):
        self.chrome_driver_path = r'D:Python3.6scriptschromedriver.exe'
        self.binary_location = r'C:Program Files (x86)GoogleChromeApplicationchrome.exe'
        self.url = url
        self.ua_pool = [
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
        ]
        self.if_headless = if_headless
        self.driver = self.base_driver()

    def opt_config(self):
        user_agent = self.ua_pool[0]
        opt = webdriver.ChromeOptions()
        if self.if_headless:
            opt.add_argument('--headless')
            opt.add_argument('-disable-gpu')
            opt.add_argument('--no-sandbox')
        opt.add_experimental_option('excludeSwitches',['enable-automation'])
        opt.add_experimental_option('useAutomationExtension',False)
        opt.add_argument('--disable-blink-features=AutomationControlled')
        opt.add_argument(f'user-agent={user_agent}')
        opt.binary_location = self.binary_location
        return opt

    def base_driver(self):
        driver = webdriver.Chrome(options=self.opt_config(),executable_path=self.chrome_driver_path)
        driver.execute_cdp_cmd("Page.addscriptToevaluateOnNewdocument",{
            "source":"""
            Object.defineProperty(navigator,'webdriver',{
            get:()=>undefined})"""
        })
        return driver

    def load_webpage(self):
        self.driver.get(self.url)
        time.sleep(15)
        self.driver.implicitly_wait(10)
        # li_list = self.driver.find_elements_by_xpath('//div[@]/ul/li')
        # li_list = self.driver.find_elements_by_xpath('//div[@id="375746"]/div/div/ul/li')
        # tag = self.driver.find_element_by_tag_name('iframe')
        # print(tag)
        self.driver.switch_to.frame('ml')
        li_list = self.driver.find_elements_by_xpath('//ul[@id="fanye"]/li')
        print(li_list)
        # li_list = self.driver.find_elements_by_xpath('//ul[@id="fanye"]/li')
        # print(li_list)
        for li in li_list:
            title = li.find_element_by_xpath('./a').get_attribute('title')
            # pub_date = li.find_element_by_xpath('./span[@]').text
            pub_date = li.find_element_by_xpath('./span').get_attribute('textContent')
            print(title,pub_date)

        self.driver.switch_to.default_content()


    def close_driver(self):
        print(self.driver.window_handles)
        for handle in self.driver.window_handles:
            self.driver.switch_to_window(handle)
            self.driver.close()
        self.driver.quit()
        print('close all')

if __name__ == '__main__':
    # url = 'http://tjj.hefei.gov.cn/tjyw/tjgb/index.html'
    # url = 'http://jxf.jiangxi.gov.cn/col/col41574/index.html'
    url = 'http://tjj.ezhou.gov.cn/zwgk/fdzdgknr/?itemid=2392'
    tjgb = Tjgb(url=url,if_headless=False)
    tjgb.load_webpage()
    tjgb.close_driver()

selenium对iframe *** 作

# id



# 跳转到iframe
driver.switch_to.frame("frame1")

# name



# 跳转到iframe
driver.switch_to.frame("slider")

# 元素定位

#跳转到iframe
iframe_elem = driver.find_element_by_class_name('x-iframe').find_element_by_tag_name('iframe')
driver.switch_to.frame(iframe_elem)

多个iframe嵌套


    


# 一层层跳进去
driver.switch_to.frame("frame1")
driver.switch_to.frame("frame2")

# 跳出iframe
# 1.跳转到iframe后，返回到主窗体
drvier.switch_to.default_content()

# 2.存在多个iframe，跳转到二级iframe后，返回上一级的iframe：
drvier.switch_to.parent_frame()

欢迎分享，转载请注明来源：内存溢出

原文地址: https://outofmemory.cn/zaji/5650733.html

selenium基本用法

发表评论

评论列表（0条）