在 KVMTOOL 中对于 PCI 设备的模拟,每个 virtio 后端驱动都会有如下类似这样的调用来初始化后端,例如virtio-vsock:virtio_vsock_init_one -> virtio_init -> virtio_pci__init(vdev->ops->init)。 virtio_pci__init 函数就是在模拟一个PCI 设备的配置空间。
int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev, int device_id, int subsys_id, int class) { struct virtio_pci *vpci = vdev->virtio; u32 mmio_addr, msix_io_block; u16 port_addr; int r; vpci->kvm = kvm; vpci->dev = dev; BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE)); //pci设备io空间大小0x100 port_addr = pci_get_io_port_block(PCI_IO_SIZE);//从PCI_IOPORT_START(x86是 //0x6200)开始分配pci设备io port空间 mmio_addr = pci_get_mmio_block(PCI_IO_SIZE); //对于x86,从0xD2000000开始为pci设备分 //配mmio空间 msix_io_block = pci_get_mmio_block(VIRTIO_MSIX_BAR_SIZE); vpci->pci_hdr = (struct pci_device_header) { .vendor_id = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET), //0x1af4. .device_id = cpu_to_le16(device_id), //eg, PCI_DEVICE_ID_VIRTIO_VSOCK (0x1012), 定义见 include/kvm/virtio- //pci-dev.h .command = PCI_COMMAND_IO | PCI_COMMAND_MEMORY, .header_type = PCI_HEADER_TYPE_NORMAL, //表示这是一个EP设备(如果设置为 PCI_HEADER_TYPE_BRIDGE 表示桥) .revision_id = 0, .class[0] = class & 0xff, .class[1] = (class >> 8) & 0xff, .class[2] = (class >> 16) & 0xff, .subsys_vendor_id = cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET), .subsys_id = cpu_to_le16(subsys_id), //VIRTIO devide id //eg,VIRTIO_ID_VSOCK .bar[0] = cpu_to_le32(port_addr | PCI_base_ADDRESS_SPACE_IO), //PCI_base_ADDRESS_SPACE_IO = 1, I/O bar。从0x6200开始 .bar[1] = cpu_to_le32(mmio_addr | PCI_base_ADDRESS_SPACE_MEMORY), // PCI_base_ADDRESS_SPACE_MEMORY = 0, memory bar。从0xD2000000开始 .bar[2] = cpu_to_le32(msix_io_block | PCI_base_ADDRESS_SPACE_MEMORY), .status = cpu_to_le16(PCI_STATUS_CAP_LIST), .capabilities = (void *)&vpci->pci_hdr.msix - (void *)&vpci->pci_hdr, .bar_size[0] = cpu_to_le32(PCI_IO_SIZE), .bar_size[1] = cpu_to_le32(PCI_IO_SIZE), .bar_size[2] = cpu_to_le32(VIRTIO_MSIX_BAR_SIZE), }; r = pci__register_bar_regions(kvm, &vpci->pci_hdr, virtio_pci__bar_activate, // activate a bar virtio_pci__bar_deactivate, vdev); // deactivate a bar if (r < 0) return r; vpci->dev_hdr = (struct device_header) { .bus_type = DEVICE_BUS_PCI, .data = &vpci->pci_hdr, }; vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX; vpci->pci_hdr.msix.next = 0; vpci->pci_hdr.msix.ctrl = cpu_to_le16(VIRTIO_NR_MSIX - 1); vpci->pci_hdr.msix.table_offset = cpu_to_le32(2); vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | VIRTIO_MSIX_TABLE_SIZE); vpci->config_vector = 0; if (irq__can_signal_msi(kvm)) vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI; vpci->legacy_irq_line = pci__assign_irq(&vpci->pci_hdr); r = device__register(&vpci->dev_hdr); if (r < 0) return r; return 0; } int pci__assign_irq(struct pci_device_header *pci_hdr) { //配置 中断引脚 与 中断编号 pci_hdr->irq_pin = 1; pci_hdr->irq_line = irq__alloc_line(); if (!pci_hdr->irq_type) pci_hdr->irq_type = IRQ_TYPE_EDGE_RISING; return pci_hdr->irq_line; } // x86 guest 内存布局 // x86/include/kvm/kvm-arch.h #define KVM_32BIT_MAX_MEM_SIZE (1ULL << 32) //0x100000000 #define KVM_32BIT_GAP_SIZE (768 << 20) i// 0x30000000 > 768Mb #define KVM_32BIT_GAP_START (KVM_32BIT_MAX_MEM_SIZE - KVM_32BIT_GAP_SIZE) //0xD0000000 #define KVM_MMIO_START KVM_32BIT_GAP_START //0xD0000000 #define KVM_IOPORT_AREA 0x0 #define KVM_PCI_CFG_AREA (KVM_MMIO_START + 0x1000000) #define KVM_PCI_MMIO_AREA (KVM_MMIO_START + 0x2000000) //0xD2000000 #define KVM_VIRTIO_MMIO_AREA (KVM_MMIO_START + 0x3000000)
Guest 因 KVM_EXIT_MMIO/KVM_EXIT_IO 原因退出时,根据退出时访问的 addr 判断所属的 bar 空间,然后进行 IN/OUT 的 *** 作,
kvm_cpu_thread(vcpu thread)
-> kvm_cpu__start
-> KVM_EXIT_{MMIO,IO}: kvm_cpu__emulate_{mmio,io}
-> kvm__emulate_{mmio,io}
-> mmio_fn(virtio_pci__io_mmio_callback)
-> virtio_pci__data_{in,out}
static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len, u8 is_write, void *ptr) { struct virtio_device *vdev = ptr; struct virtio_pci *vpci = vdev->virtio; u32 base_addr; u32 bar0, bar1; bar0 = virtio_pci__port_addr(vpci); //获取 bar0: vpci->pci_hdr->bar[0] bar1 = virtio_pci__mmio_addr(vpci); //获取 bar1: vpci->pci_hdr->bar[1] //判断 addr 所属的 bar if (addr >= bar0 && addr < bar0 + pci__bar_size(&vpci->pci_hdr, 0)) base_addr = bar0;//bar0 存储的基地址 else base_addr = bar1;//bar1 存储的基地址 if (!is_write) virtio_pci__data_in(vcpu, vdev, addr - base_addr, data, len); //addr在bar中的偏移 else virtio_pci__data_out(vcpu, vdev, addr - base_addr, data, len); //addr在bar中的偏移 }
在virtio_pci__data_{out,in}函数中,将bar空间作为guest与host之间的virtio feature协商空间,例如QUEUE的配置、PCI 设备状态、QUEUE NOTIFY等,
static bool virtio_pci__data_out(struct kvm_cpu *vcpu, struct virtio_device *vdev, unsigned long offset, void *data, int size) { bool ret = true; struct virtio_pci *vpci; struct kvm *kvm; u32 val; kvm = vcpu->kvm; vpci = vdev->virtio; switch (offset) { case VIRTIO_PCI_GUEST_FEATURES: val = ioport__read32(data); virtio_set_guest_features(kvm, vdev, vpci->dev, val); break; case VIRTIO_PCI_QUEUE_PFN: //配置virtio queue val = ioport__read32(data); if (val) { virtio_pci__init_ioeventfd(kvm, vdev, vpci->queue_selector); //为queue idx初始化eventfd vdev->ops->init_vq(kvm, vpci->dev, vpci->queue_selector, 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT, VIRTIO_PCI_VRING_ALIGN, val);//初始化用户态virtio queue } else { virtio_pci_exit_vq(kvm, vdev, vpci->queue_selector); } break; case VIRTIO_PCI_QUEUE_SEL: //virtio queue index vpci->queue_selector = ioport__read16(data); break; case VIRTIO_PCI_QUEUE_NOTIFY: //guest需要通知host后端,写此offset(如果是vhost模式, //直接在内核态处理,不会走到这里) val = ioport__read16(data); vdev->ops->notify_vq(kvm, vpci->dev, val); break; case VIRTIO_PCI_STATUS: //guest virtio_pci 设备状态 vpci->status = ioport__read8(data); if (!vpci->status) vdev->endian = kvm_cpu__get_endianness(vcpu); virtio_notify_status(kvm, vdev, vpci->dev, vpci->status); break; default: ret = virtio_pci__specific_data_out(kvm, vdev, data, size, offset); break; }; return ret; }
Guest 侧 virtio-pci 设备驱动加载打印如下,
virtio_pci_driver virtio_pci_probe virtio_pci_modern_probe [ 0.372195] PCI: Probing PCI hardware [ 0.372519] PCI host bridge to bus 0000:00 [ 0.372881] pci_bus 0000:00: root bus resource [io 0x0000-0xffff] [ 0.373418] pci_bus 0000:00: root bus resource [mem 0x00000000-0x7fffffffff] [ 0.374028] pci_bus 0000:00: No busn resource found for root bus, will use [bus 00-ff] [ 0.374767] pci 0000:00:00.0: [1af4:1009] type 00 class 0xff0000 [ 0.375389] pci 0000:00:00.0: reg 0x10: [io 0x6200-0x62ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备x的bar0空间 [ 0.375913] pci 0000:00:00.0: reg 0x14: [mem 0xd2000000-0xd20000ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备x的bar1空间 [ 0.376214] pci 0000:00:00.0: reg 0x18: [mem 0xd2000400-0xd20007ff] [ 0.377300] pci 0000:00:01.0: [1af4:1009] type 00 class 0xff0000 [ 0.377920] pci 0000:00:01.0: reg 0x10: [io 0x6300-0x63ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备y的bar0空间 [ 0.378466] pci 0000:00:01.0: reg 0x14: [mem 0xd2000800-0xd20008ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备y的bar1空间 [ 0.379047] pci 0000:00:01.0: reg 0x18: [mem 0xd2000c00-0xd2000fff] [ 0.380589] pci 0000:00:02.0: [1af4:1000] type 00 class 0x020000 [ 0.381209] pci 0000:00:02.0: reg 0x10: [io 0x6400-0x64ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备z的bar0空间 [ 0.381736] pci 0000:00:02.0: reg 0x14: [mem 0xd2001000-0xd20010ff] //根据上述virtio_pci__init的设置,可知这是virto-pci设备x的bar1空间 [ 0.382322] pci 0000:00:02.0: reg 0x18: [mem 0xd2001400-0xd20017ff] [ 0.387756] pci_bus 0000:00: busn_res: [bus 00-ff] end is updated to 00 [ 0.413901] pci_bus 0000:00: resource 4 [io 0x0000-0xffff] [ 0.414399] pci_bus 0000:00: resource 5 [mem 0x00000000-0x7fffffffff] [ 0.433793] virtio-pci 0000:00:00.0: virtio_pci: leaving for legacy driver [ 0.434506] virtio-pci 0000:00:01.0: virtio_pci: leaving for legacy driver [ 0.435215] virtio-pci 0000:00:02.0: virtio_pci: leaving for legacy driver
References:
1, virtio spec
2, 老男孩读PCIe之六:配置和地址空间
3, 原来PCIe这么简单,一定要看!_Linux阅码场-CSDN博客
4, PCI设备驱动之设备_huangweiqing80的博客-CSDN博客
5, VirtIO实现原理——PCI基础_Take Easy,Work Hard!-CSDN博客_virtio实现
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)