KVMTOOL: PCI设备的模拟

KVMTOOL: PCI设备的模拟,第1张

KVMTOOL: PCI设备的模拟

在 KVMTOOL 中对于 PCI 设备的模拟,每个 virtio 后端驱动都会有如下类似这样的调用来初始化后端,例如virtio-vsock:virtio_vsock_init_one -> virtio_init -> virtio_pci__init(vdev->ops->init)。 virtio_pci__init 函数就是在模拟一个PCI 设备的配置空间

int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
		     int device_id, int subsys_id, int class)
{
	struct virtio_pci *vpci = vdev->virtio;
	u32 mmio_addr, msix_io_block;
	u16 port_addr;
	int r;

	vpci->kvm = kvm;
	vpci->dev = dev;

	BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE)); //pci设备io空间大小0x100

	port_addr = pci_get_io_port_block(PCI_IO_SIZE);//从PCI_IOPORT_START(x86是
                                                   //0x6200)开始分配pci设备io port空间

	mmio_addr = pci_get_mmio_block(PCI_IO_SIZE); //对于x86,从0xD2000000开始为pci设备分
                                                 //配mmio空间

	msix_io_block = pci_get_mmio_block(VIRTIO_MSIX_BAR_SIZE);

    
	vpci->pci_hdr = (struct pci_device_header) {
		.vendor_id		= cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET), //0x1af4. 
            

		.device_id		= cpu_to_le16(device_id),
            //eg, PCI_DEVICE_ID_VIRTIO_VSOCK (0x1012), 定义见 include/kvm/virtio-
            //pci-dev.h

		.command		= PCI_COMMAND_IO | PCI_COMMAND_MEMORY,
		.header_type		= PCI_HEADER_TYPE_NORMAL,
            //表示这是一个EP设备(如果设置为 PCI_HEADER_TYPE_BRIDGE 表示桥)

		.revision_id		= 0,
		.class[0]		= class & 0xff,
		.class[1]		= (class >> 8) & 0xff,
		.class[2]		= (class >> 16) & 0xff,
		.subsys_vendor_id	= cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
		.subsys_id		= cpu_to_le16(subsys_id), //VIRTIO devide id 
                                                  //eg,VIRTIO_ID_VSOCK

        

		.bar[0]	= cpu_to_le32(port_addr | PCI_base_ADDRESS_SPACE_IO),             
                  //PCI_base_ADDRESS_SPACE_IO = 1, I/O bar。从0x6200开始

		.bar[1]	= cpu_to_le32(mmio_addr | PCI_base_ADDRESS_SPACE_MEMORY),
                  // PCI_base_ADDRESS_SPACE_MEMORY = 0, memory bar。从0xD2000000开始

		.bar[2]	= cpu_to_le32(msix_io_block | PCI_base_ADDRESS_SPACE_MEMORY),

		.status			= cpu_to_le16(PCI_STATUS_CAP_LIST),
		.capabilities		= (void *)&vpci->pci_hdr.msix - (void *)&vpci->pci_hdr,
		.bar_size[0]		= cpu_to_le32(PCI_IO_SIZE),
		.bar_size[1]		= cpu_to_le32(PCI_IO_SIZE),
		.bar_size[2]		= cpu_to_le32(VIRTIO_MSIX_BAR_SIZE),
	};

	r = pci__register_bar_regions(kvm, &vpci->pci_hdr,
				      virtio_pci__bar_activate, // activate a bar
				      virtio_pci__bar_deactivate, vdev); // deactivate a bar
	if (r < 0)
		return r;

	vpci->dev_hdr = (struct device_header) {
		.bus_type		= DEVICE_BUS_PCI,
		.data			= &vpci->pci_hdr,
	};

	vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX;
	vpci->pci_hdr.msix.next = 0;
	
	vpci->pci_hdr.msix.ctrl = cpu_to_le16(VIRTIO_NR_MSIX - 1);

	
	vpci->pci_hdr.msix.table_offset = cpu_to_le32(2);
	vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | VIRTIO_MSIX_TABLE_SIZE);
	vpci->config_vector = 0;

	if (irq__can_signal_msi(kvm))
		vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI;

	vpci->legacy_irq_line = pci__assign_irq(&vpci->pci_hdr);

	r = device__register(&vpci->dev_hdr);
	if (r < 0)
		return r;

	return 0;
}


int pci__assign_irq(struct pci_device_header *pci_hdr)
{
	
     //配置 中断引脚 与 中断编号
        
    
	pci_hdr->irq_pin	= 1;
	pci_hdr->irq_line	= irq__alloc_line();

	if (!pci_hdr->irq_type)
		pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING;

	return pci_hdr->irq_line;
}

// x86 guest 内存布局
// x86/include/kvm/kvm-arch.h

#define KVM_32BIT_MAX_MEM_SIZE  (1ULL << 32) //0x100000000
#define KVM_32BIT_GAP_SIZE      (768 << 20) i// 0x30000000 > 768Mb
#define KVM_32BIT_GAP_START     (KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE) //0xD0000000

#define KVM_MMIO_START          KVM_32BIT_GAP_START //0xD0000000


#define KVM_IOPORT_AREA         0x0
#define KVM_PCI_CFG_AREA        (KVM_MMIO_START + 0x1000000)
#define KVM_PCI_MMIO_AREA       (KVM_MMIO_START + 0x2000000) //0xD2000000
#define KVM_VIRTIO_MMIO_AREA    (KVM_MMIO_START + 0x3000000)

Guest 因 KVM_EXIT_MMIO/KVM_EXIT_IO 原因退出时,根据退出时访问的 addr 判断所属的 bar 空间,然后进行 IN/OUT 的 *** 作,
kvm_cpu_thread(vcpu thread)
    -> kvm_cpu__start 
        -> KVM_EXIT_{MMIO,IO}: kvm_cpu__emulate_{mmio,io} 
            -> kvm__emulate_{mmio,io}
                -> mmio_fn(virtio_pci__io_mmio_callback)
                    -> virtio_pci__data_{in,out}

static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu,
                                         u64 addr, u8 *data, u32 len,
                                         u8 is_write, void *ptr)
{
        struct virtio_device *vdev = ptr;
        struct virtio_pci *vpci = vdev->virtio;
        u32 base_addr;
        u32 bar0, bar1;

        bar0 = virtio_pci__port_addr(vpci); //获取 bar0: vpci->pci_hdr->bar[0]
        bar1 = virtio_pci__mmio_addr(vpci); //获取 bar1: vpci->pci_hdr->bar[1]

        //判断 addr 所属的 bar
        if (addr >= bar0 && addr < bar0 + pci__bar_size(&vpci->pci_hdr, 0))
                base_addr = bar0;//bar0 存储的基地址
        else
                base_addr = bar1;//bar1 存储的基地址

        if (!is_write)
                virtio_pci__data_in(vcpu, vdev, addr - base_addr, data, len);
                                                //addr在bar中的偏移
        else
                virtio_pci__data_out(vcpu, vdev, addr - base_addr, data, len);
                                                //addr在bar中的偏移
}

在virtio_pci__data_{out,in}函数中,将bar空间作为guest与host之间的virtio feature协商空间,例如QUEUE的配置、PCI 设备状态、QUEUE NOTIFY等,

static bool virtio_pci__data_out(struct kvm_cpu *vcpu, struct virtio_device *vdev,
				 unsigned long offset, void *data, int size)
{
	bool ret = true;
	struct virtio_pci *vpci;
	struct kvm *kvm;
	u32 val;

	kvm = vcpu->kvm;
	vpci = vdev->virtio;

	switch (offset) {
	case VIRTIO_PCI_GUEST_FEATURES:
		val = ioport__read32(data);
		virtio_set_guest_features(kvm, vdev, vpci->dev, val);
		break;
	case VIRTIO_PCI_QUEUE_PFN: //配置virtio queue
		val = ioport__read32(data);
		if (val) {
			virtio_pci__init_ioeventfd(kvm, vdev,
						   vpci->queue_selector); //为queue idx初始化eventfd
			vdev->ops->init_vq(kvm, vpci->dev, vpci->queue_selector,
					   1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT,
					   VIRTIO_PCI_VRING_ALIGN, val);//初始化用户态virtio queue
		} else {
			virtio_pci_exit_vq(kvm, vdev, vpci->queue_selector);
		}
		break;
	case VIRTIO_PCI_QUEUE_SEL: //virtio queue index
		vpci->queue_selector = ioport__read16(data);
		break;
	case VIRTIO_PCI_QUEUE_NOTIFY: //guest需要通知host后端,写此offset(如果是vhost模式,
                                  //直接在内核态处理,不会走到这里)
		val = ioport__read16(data);
		vdev->ops->notify_vq(kvm, vpci->dev, val);
		break;
	case VIRTIO_PCI_STATUS: //guest virtio_pci 设备状态
		vpci->status = ioport__read8(data);
		if (!vpci->status) 
			vdev->endian = kvm_cpu__get_endianness(vcpu);
		virtio_notify_status(kvm, vdev, vpci->dev, vpci->status);
		break;
	default:
		ret = virtio_pci__specific_data_out(kvm, vdev, data, size, offset);
		break;
	};

	return ret;
}

Guest 侧 virtio-pci 设备驱动加载打印如下,

virtio_pci_driver 
    virtio_pci_probe
        virtio_pci_modern_probe

[    0.372195] PCI: Probing PCI hardware
[    0.372519] PCI host bridge to bus 0000:00
[    0.372881] pci_bus 0000:00: root bus resource [io  0x0000-0xffff]
[    0.373418] pci_bus 0000:00: root bus resource [mem 0x00000000-0x7fffffffff]
[    0.374028] pci_bus 0000:00: No busn resource found for root bus, will use [bus 00-ff]
[    0.374767] pci 0000:00:00.0: [1af4:1009] type 00 class 0xff0000
[    0.375389] pci 0000:00:00.0: reg 0x10: [io  0x6200-0x62ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备x的bar0空间
[    0.375913] pci 0000:00:00.0: reg 0x14: [mem 0xd2000000-0xd20000ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备x的bar1空间
[    0.376214] pci 0000:00:00.0: reg 0x18: [mem 0xd2000400-0xd20007ff]
[    0.377300] pci 0000:00:01.0: [1af4:1009] type 00 class 0xff0000
[    0.377920] pci 0000:00:01.0: reg 0x10: [io  0x6300-0x63ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备y的bar0空间
[    0.378466] pci 0000:00:01.0: reg 0x14: [mem 0xd2000800-0xd20008ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备y的bar1空间
[    0.379047] pci 0000:00:01.0: reg 0x18: [mem 0xd2000c00-0xd2000fff]
[    0.380589] pci 0000:00:02.0: [1af4:1000] type 00 class 0x020000
[    0.381209] pci 0000:00:02.0: reg 0x10: [io  0x6400-0x64ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备z的bar0空间
[    0.381736] pci 0000:00:02.0: reg 0x14: [mem 0xd2001000-0xd20010ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备x的bar1空间
[    0.382322] pci 0000:00:02.0: reg 0x18: [mem 0xd2001400-0xd20017ff]
[    0.387756] pci_bus 0000:00: busn_res: [bus 00-ff] end is updated to 00

[    0.413901] pci_bus 0000:00: resource 4 [io  0x0000-0xffff]
[    0.414399] pci_bus 0000:00: resource 5 [mem 0x00000000-0x7fffffffff]
[    0.433793] virtio-pci 0000:00:00.0: virtio_pci: leaving for legacy driver
[    0.434506] virtio-pci 0000:00:01.0: virtio_pci: leaving for legacy driver
[    0.435215] virtio-pci 0000:00:02.0: virtio_pci: leaving for legacy driver

References:

1, virtio spec

2, 老男孩读PCIe之六:配置和地址空间

3, 原来PCIe这么简单,一定要看!_Linux阅码场-CSDN博客 

4, PCI设备驱动之设备_huangweiqing80的博客-CSDN博客

5, VirtIO实现原理——PCI基础_Take Easy,Work Hard!-CSDN博客_virtio实现

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5720394.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-17
下一篇 2022-12-18

发表评论

登录后才能评论

评论列表(0条)

保存