selenium捞取http请求信息_随笔

selenium捞取http请求信息

UI自动化过程中，经常存在需要获取http请求信息的场景。
例如：元素文案为接口返回需要，需要验证文案正确性；出现报错时方便排查问题，打印http请求日志等

方式一：selenium-wire

selenium-wire官网该库仅支持python3.6+

# 以获取API商城 - IP查询服务的timestamp签名为例
# 是seleniumwire 不是 selenium
import time
from seleniumwire import webdriver
driver = webdriver.Chrome()

driver.get('https://apis.baidu.com/store/aladdin/land?cardType=ipSearch')
driver.find_element_by_xpath('//*[@id="app"]/div[2]/div/div/div[2]/input').send_keys("112.10.36.59")
driver.find_element_by_xpath('//*[@id="app"]/div[2]/div/div/div[2]/div').click()
time.sleep(1)
# Access requests via the `requests` attribute
for request in driver.requests:
    if request.response and "timestamp" in request.headers:
        print(request.headers["timestamp"])

driver.close()

方式二：设置代理

浏览器设置代理，从代理服务器获取请求，该方式不做详细描述
设置代理
推荐代理工具使用Mitmproxy

方式三：捞取webdrive日志

启动浏览器时添加如下信息

from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

option = webdriver.ChromeOptions()
option.add_experimental_option('perfLoggingPrefs', {'enableNetwork': True})
caps = DesiredCapabilities.CHROME
caps['goog:loggingPrefs'] = {'performance': 'ALL'}
driver = webdriver.Chrome(chrome_driver_path, options=option, desired_capabilities=caps)

"""
日志的类型
['Network.loadingFailed', 'Network.loadingFinished', 'Network.resourceChangedPriority',
 'Network.requestServedFromCache', 'Network.requestWillBeSent', 'Network.requestWillBeSentExtraInfo',
 'Network.responseReceived', 'Network.responseReceivedExtraInfo', 'Network.dataReceived',
 'Page.frameAttached', 'Page.frameRequestedNavigation', 'Page.frameStoppedLoading',
 'Page.frameClearedScheduledNavigation', 'Page.loadEventFired', 'Page.frameStartedLoading',
 'Page.frameDetached', 'Page.frameScheduledNavigation', 'Page.frameNavigated', 'Page.frameResized',
 'Page.domContentEventFired']
请求的类型(待补充)
 [XHR(接口请求), Fetch(接口请求), 'script'(.js), 'Stylesheet(.css)', 'Image'(.png等), 'Font', 'document(文件), 'Manifest', 'Ping', 'Preflight', 'Navigation', 'other']
"""

time.sleep(2)  # 等待一下大部分请求处理完成
request_list = []  # 所有的请求
response_list = []  # 所有的返回
cache_list = []  # 所有的缓存读取记录
for responseReceived in self.driver.get_log('performance'):
    message = json.loads(responseReceived['message'])['message']
    # 获取所有请求信息(请求信息集中在requestWillBeSent)
    if message['method'] == 'Network.requestWillBeSent':
        request_id = message['params']['requestId']
        request = message['params']['request']
        try:  # 尝试获取请求body，type为浏览器开发者模式network下类型筛选（用于区分接口请求和页面请求）
            request_list.append({'id': request_id, 'type': message['params']['type'],
                                 'url': request['url'], 'method': request['method'],
                                 'req_time': responseReceived['timestamp'], 'req_headers': request['headers'],
                                 'req_body': json.loads(request['postData'])})
        except:
            request_list.append({'id': request_id, 'type': message['params']['type'],
                                 'url': request['url'], 'method': request['method'],
                                 'req_time': responseReceived['timestamp'], 'req_headers': request['headers']})
    # 获取所有返回信息(返回信息集中在responseReceived，但是其中无body信息)
    elif message['method'] == 'Network.responseReceived':
        request_id = message['params']['requestId']
        response = message['params']['response']
        try:  # responseReceived日志中无response body信息，需要额外进行获取
            resp_body = json.loads(self.driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id})['body'])
        except:
            resp_body = None
        try:  # 能获取到requestHeaders尽量使用，因为此处比较全
            response_list.append({'id': request_id, 'type': message['params']['type'], 'url': response['url'],
                                  'resp_time': responseReceived['timestamp'],
                                  'req_headers': response['requestHeaders'], 'resp_status': response['status'],
                                  'resp_headers': response['headers'], 'resp_body': resp_body})
        except:
            response_list.append({'id': request_id,  'type': message['params']['type'], 'url': response['url'],
                                  'resp_time': responseReceived['timestamp'], 'resp_status': response['status'],
                                  'resp_headers': response['headers'], 'resp_body': resp_body})
    # 获取是否为缓存请求(从浏览器缓存直接获取，一般为css、js文件请求)
    elif message['method'] == 'Network.requestServedFromCache':
        request_id = message['params']['requestId']
        cache_list.append({'id': request_id})
# 合并request与cache（cache必定少于等于request）
new_request_list = []
for request in request_list:
    num = 0
    for cache in cache_list:
        num += 1
        if request['id'] == cache['id']:
            new_request_list.append(dict(request, **{'req_from_cache': True}))
            break
        if num == len(cache_list) and request['id'] != cache['id']:
            new_request_list.append(dict(request, **{'req_from_cache': False}))
# 合并request与response（response必定少于等于request，原因是拉取日志的时候会有一些还没收到response的请求）
complete_request_list = []  # 完整有response的请求列表
incomplete_request_list = []  # 不完整没有response的请求列表
for request in new_request_list:
    num = 0
    for response in response_list:
        num += 1
        if request['url'] == response['url'] and request['id'] == response['id'] and request['type'] == response['type']:
            complete_request_list.append(dict(request, **response))  # response在后，因为response中的req_headers比较全
            break
        if num == len(response_list) and request['id'] != response['id']:
            incomplete_request_list.append(request)

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/zaji/5479746.html

selenium捞取http请求信息

发表评论

评论列表（0条）