OSC2011 Tokyo/Fall 濃いバナ(virtio)

Embed Size (px)

Citation preview

  • 1. at OSC2011 Tokyo/Spring(2011/3/6 ) virtio (hasegaw at sra.co.jp)Twitter : @hasegaw Copyright(C) Software Research Associates, Inc. All Rights Reserved.

2. (HASEGAWA Takeshi) Twitter: @hasegaw SRA Linux Xen 20072009 LDAP Super Expert2006 2003 MYCOM2 3. 3 4. gihyo.jphttp://gihyo.jp/dev/serial/01/vm_work/ 4 5. LinuxKVM virtio VMware Tools IA-32 C (OS) 5 6. virtio () FreeBSD virtio 6 7. KVMLinux qemu-kvmqemu-kvmVirtual Machine Image Kernel-based Virtual MachineDevice Emulator(KVM) Virtual CPU K ernel-basedernel-basedSystem callsSystem calls qemu-kvm V irtualirtual M achine Device Driversachine Device Drivers VM IA-32 Linux Kernel Linux Kernel Hardware Hardware I/O ()7 8. x86I/O I/O ON(1) OFF(0) 1 864K8 I/O (Memory Mapped I/O) VGA, Sound, Disk Controller, Ethernet Controller (Interrupt) CPU (DMA, MMIO)8 9. QEMUx86CPU CPUIntel 82441FXIntel 82441FXSystem MemorySystem Memory (North-bridge) (North-bridge)Cirrus Logic Cirrus LogicLSI LogicVGA VGACL-GD5446 LSI LogicSCSI HDDSCSI HDD CL-GD5446LSI53c895a LSI53c895aRealtek Realtek PCIPCIPCI Slot EthernetEthernetRTL8029 PCI Slot RTL8029 BusBusPCI SlotPCI SlotEnsoniq Ensoniq SpeakerSpeaker ES1370IDE HDD IDE HDD ES1370PIIX3 PCI IDEPIIX3 PCI IDEIntel 82371 PIIX3 CD-ROM CD-ROM Intel 82371 PIIX3 Bochs (South-bridge)(South-bridge)BochsPIIX3 PCI USBPIIX3 PCI USB USB USB Flash BIOSFlash BIOSISA ISABus BusISA Floppy Floppy Real TimeReal TimeISAKeyboard Clock I/O InterfaceI/O InterfaceKeyboardClockPC Speaker PS/2 PS/2PC Speaker Mouse Serial ParallelMouseSerial ParallelPort Port Port Port : KVM (2010 ) 9 10. KVMx86 QEMUqemu-kvm qemu-kvm Xen, VirtualBoxQEMU Virtual Machine Image VM CPUOSOSDevice Emulator KVMQEMU QEMU Kernel-basedKernel-basedSystem calls System calls I/O Virtual Virtual MachineMachineDevice DriversDevice Drivers QEMUKVMKVMVMLinux KernelLinux Kernel VMHardware Hardware 10 11. KVMx86 H/W ( #1) qemu-kvm.c kvm-all.c kvm_run (VM)kvm_handle_io (I/O)599 int kvm_run(CPUState *env)733 static int kvm_handle_io(uint16_t port, void *data, int direction, int size,600 { 734uint32_t count)601 int r;735 {602 kvm_context_t kvm = &env->kvm_state->kvm_context; 736int i;603604struct kvm_run *run = env->kvm_run;int fd = env->kvm_fd;737 738 uint8_t *ptr = data; I/O605 739for (i = 0; i < count; i++) {606 again:VM740if (direction == KVM_EXIT_IO_IN) { 741switch (size) {610 } 742case 1:626 r = ioctl(fd, KVM_RUN, 0);743stb_p(ptr, cpu_inb(port));744break;645 if (1) {745case 2:646switch (run->exit_reason) {746stw_p(ptr, cpu_inw(port));647case KVM_EXIT_UNKNOWN:747break;650653 case KVM_EXIT_FAIL_ENTRY: case KVM_EXIT_EXCEPTION:748case 4:I/O749stl_p(ptr, cpu_inl(port));660661 case KVM_EXIT_IO: r = kvm_handle_io(run->io.port,750751} break;662 (uint8_t *)run + run->io.data_offset, 752} else {663 run->io.direction,753switch (size) {664 run->io.size, 754case 1:665 run->io.count); 755cpu_outb(port, ldub_p(ptr));666r = 0; I/O QEMU756break;667break; 757case 2:668case KVM_EXIT_DEBUG: 758cpu_outw(port, lduw_p(ptr));694 default: 759760 break; case 4:701}761cpu_outl(port, ldl_p(ptr));ioport.c ioport.c702 } 762break; 201 void cpu_outw(pio_addr_t addr, uint16_t val) 70 static void ioport_write(int index, uint32_t address, uint32_t data)703 more: 763} 202 { 71 {704 if (!r) {72static IOPortWriteFunc * const default_func[3] = { 764} 203 LOG_IOPORT("outw: %04"FMT_pioaddr" %04"PRIx16"n", addr, val);705goto again; 73default_ioport_writeb, 765204 ioport_write(1, addr, val);706 }74default_ioport_writew, 766ptr +=205 }size;707 return r;75default_ioport_writel767} 76}; 768708 } 77IOPortWriteFunc *func = ioport_write_table[index][address];769return 1; 78if (!func) 770 } 79 80func = default_func[index];func(ioport_opaque[address], address, data); I/O 81 } 11 12. KVMx86 H/W ( #)qemu-0.12.5/hw/fdc.c () QEMUI/O589 static uint32_t fdctrl_read_port (void *opaque, uint32_t reg)590 {591 return fdctrl_read(opaque, reg & 7);QEMU 592 }5931955 static int isabus_fdc_init1(ISADevice *dev)594 static void fdctrl_write_port (void *opaque, uint32_t reg, uint32_t value)1956 {595 {1957fdctrl_isabus_t *isa = DO_UPCAST(fdctrl_isabus_t, busdev, dev); 596 fdctrl_write(opaque, reg & 7, value);1958fdctrl_t *fdctrl = &isa->state; 597 }1959int iobase = 0x3f0; 5981960int isairq = 6; 599 static uint32_t fdctrl_read_mem (void *opaque, target_phys_addr_t reg)1961int dma_chann = 2;600 {QEMU I/O1962int ret;529 601static uint32_t fdctrl_read (void(uint32_t)reg);return fdctrl_read(opaque, *opaque, uint32_t reg)1963530 602 }{1964register_ioport_read(iobase + 0x01, 5, 1, 531 603 fdctrl_t *fdctrl = opaque;1965&fdctrl_read_port, fdctrl); 532 604 static void fdctrl_write_mem (void *opaque, uint32_t retval;1966register_ioport_read(iobase + 0x07, 1, 1, 533 605 target_phys_addr_t reg, uint32_t value)1967&fdctrl_read_port, fdctrl); 534 606 {switch (reg) {1968register_ioport_write(iobase + 0x01, 5, 1,535 607 case FD_REG_SRA:fdctrl_write(opaque, (uint32_t)reg, value);1969 &fdctrl_write_port, fdctrl); 536 608 }retval = fdctrl_read_statusA(fdctrl);1970register_ioport_write(iobase + 0x07, 1, 1,537 break;1971 &fdctrl_write_port, fdctrl); 538 case FD_REG_SRB:1972isa_init_irq(&isa->busdev, &fdctrl->irq, isairq); 539 retval = fdctrl_read_statusB(fdctrl);1973fdctrl->dma_chann = dma_chann;540 break;1974541 case FD_REG_DOR:1975ret = fdctrl_init_common(fdctrl, iobase); 542 retval = fdctrl_read_dor(fdctrl);1976543 break;1977return ret; 1978 }556 default:557 retval = (uint32_t)(-1); 558 break; 847 /* Status B register : 0x01 (read-only) */ 559 } 848 static uint32_t fdctrl_read_statusB (fdctrl_t *fdctrl) 561 849 {562 return retval; 850 uint32_t retval = fdctrl->srb; 563 } 854 return retval; 855 }12 13. v1.1 (hbstudy 17) http://www.slideshare.net/TakeshiHasegawa1/20101126hbstudy17 gihyo.jp http://gihyo.jp/dev/serial/01/vm_work13 14. - I/O 1 MAC DMA IRQ IRQ In Service Register (ISR) IRQ 14 15. NIC IA-32 VMEXIT KVM qemu-kvm qemu-kvm KVM VMENTER IA-32 IRQ IRQISR 15 16. -VM Micro-architechtural improvements (cycles) *13.8GHz 2.66GHzP4 672Core 2 Duo VM entry2409937 Page fault VM exit1931 1186 *2VMCB read17852VMCB write 17144*1 Keith Adams, Ole Agesen Comparision of Software and HardwareTechniques for x86 Virtualization VMware, 2010*2 Mar 1 2010 (Nehalem) 16 17. I/OI/O(I/OI/O I/OCPU () I/O -I/F(virtio17 18. virtio Host Guest GuestGuestvirtiovirtio virtio virtio virtiovirtiovirtio virtio Host Guest OS Ballon BallonBlock BlockBlock BlockNet Net qemu-kvm, VirtualBox virtiovirtio virtiovirtio virtiovirtio virtiovirtioBallonBlockBlockNet BallonBlockBlockNet PCI Bus PCI BusHost Host virtio Block x2 PCI x2 H/W 18 19. virtiovirtio-pci (PCI) I/F PCI I/Ovirtio ring () 19 20. virtio Host Guest Guest Guest 1virtio virtiovirtio virtiovirtio virtiovirtio virtio Ballon Ballon BlockBlockBlockBlock Net Net virtiovirtiovirtio virtiovirtio virtiovirtio virtiovirtioBalloonBlock BlockNet BalloonBlock BlockNet PCI Bus Guest PCI Bus HostHost GuestHost(Guest-Physical) 20 21. virtio-net 0 GuestGuest virtio-net virtio-net 1Queue 0Queue 1Queue 2 2 virtio-net PCI virtio-net PCIPCI Bus PCI BusHost Host21 22. virtio(1/2)Guest Guest 2 Guest vring.avail.idx(vring.avail.idx) Host(vring.used.idx)vring.used.idx Lock-Free Host Host Guest Guest ; while (last_used_idx < vring.used.idx) {vring.avail.idx++; ; last_used_idx ++; Host } while (vring.used.idx