Linux 4.14实现:
KVM Guest Exit 实现原理:
- 进入Guest OS之前保存Host的调用__guest_enter时的context以便Guest退出时返回该调用点继续支持
- Guest退出Trap到EL2时,则会借用__guest_exit返回enter时的调用点继续执行
例如:Guest OS访问GICD Trap时,el1_sync -》
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | ENTRY(__kvm_hyp_vector) ventry el2t_sync_invalid // Synchronous EL2t ventry el2t_irq_invalid // IRQ EL2t ventry el2t_fiq_invalid // FIQ EL2t ventry el2t_error_invalid // Error EL2t ventry el2h_sync_invalid // Synchronous EL2h ventry el2h_irq_invalid // IRQ EL2h ventry el2h_fiq_invalid // FIQ EL2h ventry el2_error // Error EL2h ventry el1_sync // Synchronous 64-bit EL1 ventry el1_irq // IRQ 64-bit EL1 ventry el1_fiq_invalid // FIQ 64-bit EL1 ventry el1_error // Error 64-bit EL1 ventry el1_sync // Synchronous 32-bit EL1 ventry el1_irq // IRQ 32-bit EL1 ventry el1_fiq_invalid // FIQ 32-bit EL1 ventry el1_error // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | el1_sync: // Guest trapped into EL2 stp x0, x1, [sp, #-16]! mrs x0, esr_el2 lsr x0, x0, #ESR_ELx_EC_SHIFT cmp x0, #ESR_ELx_EC_HVC64 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap mrs x1, vttbr_el2 // If vttbr is valid, the guest cbnz x1, el1_hvc_guest // called HVC /* Here, we're pretty sure the host called HVC. */ ldp x0, x1, [sp], #16 /* Check for a stub HVC call */ cmp x0, #HVC_STUB_HCALL_NR b.hs 1f /* * Compute the idmap address of __kvm_handle_stub_hvc and * jump there. Since we use kimage_voffset, do not use the * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead * (by loading it from the constant pool). * * Preserve x0-x4, which may contain stub parameters. */ ldr x5, =__kvm_handle_stub_hvc ldr_l x6, kimage_voffset /* x5 = __pa(x5) */ sub x5, x5, x6 br x5 1: /* * Perform the EL2 call */ kern_hyp_va x0 do_el2_call eret el1_hvc_guest: /* * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1. * The workaround has already been applied on the host, * so let's quickly get back to the guest. We don't bother * restoring x1, as it can be clobbered anyway. */ ldr x1, [sp] // Guest's x0 eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1 cbz w1, wa_epilogue /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ ARM_SMCCC_ARCH_WORKAROUND_2) cbnz w1, el1_trap #ifdef CONFIG_ARM64_SSBD alternative_cb arm64_enable_wa2_handling b wa2_end alternative_cb_end get_vcpu_ptr x2, x0 ldr x0, [x2, #VCPU_WORKAROUND_FLAGS] // Sanitize the argument and update the guest flags ldr x1, [sp, #8] // Guest's x1 clz w1, w1 // Murphy's device: lsr w1, w1, #5 // w1 = !!w1 without using eor w1, w1, #1 // the flags... bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1 str x0, [x2, #VCPU_WORKAROUND_FLAGS] /* Check that we actually need to perform the call */ hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2 cbz x0, wa2_end mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 smc #0 /* Don't leak data from the SMC call */ mov x3, xzr wa2_end: mov x2, xzr mov x1, xzr #endif wa_epilogue: mov x0, xzr add sp, sp, #16 eret el1_trap: get_vcpu_ptr x1, x0 mrs x0, esr_el2 lsr x0, x0, #ESR_ELx_EC_SHIFT /* * x0: ESR_EC * x1: vcpu pointer */ /* * We trap the first access to the FP/SIMD to save the host context * and restore the guest context lazily. * If FP/SIMD is not implemented, handle the trap and inject an * undefined instruction exception to the guest. */ alternative_if_not ARM64_HAS_NO_FPSIMD cmp x0, #ESR_ELx_EC_FP_ASIMD b.eq __fpsimd_guest_restore alternative_else_nop_endif mov x0, #ARM_EXCEPTION_TRAP b __guest_exit el1_irq: stp x0, x1, [sp, #-16]! get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IRQ b __guest_exit el1_error: stp x0, x1, [sp, #-16]! get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_EL1_SERROR b __guest_exit el2_error: /* * Only two possibilities: * 1) Either we come from the exit path, having just unmasked * PSTATE.A: change the return code to an EL2 fault, and * carry on, as we're already in a sane state to handle it. * 2) Or we come from anywhere else, and that's a bug: we panic. * * For (1), x0 contains the original return code and x1 doesn't * contain anything meaningful at that stage. We can reuse them * as temp registers. * For (2), who cares? */ mrs x0, elr_el2 adr x1, abort_guest_exit_start cmp x0, x1 adr x1, abort_guest_exit_end ccmp x0, x1, #4, ne b.ne __hyp_panic mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) eret |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | /* * u64 __guest_enter(struct kvm_vcpu *vcpu, * struct kvm_cpu_context *host_ctxt); */ ENTRY(__guest_enter) // x0: vcpu // x1: host context // x2-x17: clobbered by macros // x18: guest context // Store the host regs save_callee_saved_regs x1 add x18, x0, #VCPU_CONTEXT // Restore guest regs x0-x17 ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] // Restore guest regs x19-x29, lr restore_callee_saved_regs x18 // Restore guest reg x18 ldr x18, [x18, #CPU_XREG_OFFSET(18)] // Do not touch any register after this! eret ENDPROC(__guest_enter) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | ENTRY(__guest_exit) // x0: return code // x1: vcpu // x2-x29,lr: vcpu regs // vcpu x0-x1 on the stack add x1, x1, #VCPU_CONTEXT ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) // Store the guest regs x2 and x3 stp x2, x3, [x1, #CPU_XREG_OFFSET(2)] // Retrieve the guest regs x0-x1 from the stack ldp x2, x3, [sp], #16 // x0, x1 // Store the guest regs x0-x1 and x4-x18 stp x2, x3, [x1, #CPU_XREG_OFFSET(0)] stp x4, x5, [x1, #CPU_XREG_OFFSET(4)] stp x6, x7, [x1, #CPU_XREG_OFFSET(6)] stp x8, x9, [x1, #CPU_XREG_OFFSET(8)] stp x10, x11, [x1, #CPU_XREG_OFFSET(10)] stp x12, x13, [x1, #CPU_XREG_OFFSET(12)] stp x14, x15, [x1, #CPU_XREG_OFFSET(14)] stp x16, x17, [x1, #CPU_XREG_OFFSET(16)] str x18, [x1, #CPU_XREG_OFFSET(18)] // Store the guest regs x19-x29, lr save_callee_saved_regs x1 get_host_ctxt x2, x3 // Now restore the host regs restore_callee_saved_regs x2 // If we have a pending asynchronous abort, now is the // time to find out. From your VAXorcist book, page 666: // "Threaten me not, oh Evil one! For I speak with // the power of DEC, and I command thee to show thyself!" mrs x2, elr_el2 mrs x3, esr_el2 mrs x4, spsr_el2 mov x5, x0 dsb sy // Synchronize against in-flight ld/st msr daifclr, #4 // Unmask aborts // This is our single instruction exception window. A pending // SError is guaranteed to occur at the earliest when we unmask // it, and at the latest just after the ISB. .global abort_guest_exit_start abort_guest_exit_start: isb .global abort_guest_exit_end abort_guest_exit_end: // If the exception took place, restore the EL1 exception // context so that we can report some information. // Merge the exception code with the SError pending bit. tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f msr elr_el2, x2 msr esr_el2, x3 msr spsr_el2, x4 orr x0, x0, x5 1: ret //返回Host元enter Guest的调用点 ENDPROC(__guest_exit) |
Linux 5.6实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | ```c /* * u64 __guest_enter(struct kvm_vcpu *vcpu, * struct kvm_cpu_context *host_ctxt); */ SYM_FUNC_START(__guest_enter) // x0: vcpu // x1: host context // x2-x17: clobbered by macros // x29: guest context // Store the host regs /* 在进入Guest OS之前,保存Host中调用点的信息,以便Guest OS退出时从该代码行继续执行 */ save_callee_saved_regs x1 // Now the host state is stored if we have a pending RAS SError it must // affect the host. If any asynchronous exception is pending we defer // the guest entry. The DSB isn't necessary before v8.2 as any SError // would be fatal. alternative_if ARM64_HAS_RAS_EXTN dsb nshst isb alternative_else_nop_endif mrs x1, isr_el1 //将ISR_EL1, Interrupt Status Register读取到x1中 cbz x1, 1f //x1为0则跳转到1: no irq, no fiq, no SError pending mov x0, #ARM_EXCEPTION_IRQ //x0中保存了ARM_EXCEPTION_IRQ ret //如果有pending的irq/fiq/SError,那么先不进入Guest OS? 1: add x29, x0, #VCPU_CONTEXT //x29 = x0 + #VCPU_CONTEXT (偏移) // Macro ptrauth_switch_to_guest format: // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) // The below macro to restore guest keys is not implemented in C code // as it may cause Pointer Authentication key signing mismatch errors // when this feature is enabled for kernel code. ptrauth_switch_to_guest x29, x0, x1, x2 // Restore guest regs x0-x17 ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)] ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)] ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)] ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)] ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)] ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)] // Restore guest regs x18-x29, lr restore_callee_saved_regs x29 // Do not touch any register after this! //恢复了guest OS的上下文,eret回到Guest OS的EL1模式 eret sb SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // x0: return code // x1: vcpu // x2-x29,lr: vcpu regs // vcpu x0-x1 on the stack add x1, x1, #VCPU_CONTEXT ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) // Store the guest regs x2 and x3 stp x2, x3, [x1, #CPU_XREG_OFFSET(2)] // Retrieve the guest regs x0-x1 from the stack ldp x2, x3, [sp], #16 // x0, x1 // Store the guest regs x0-x1 and x4-x17 stp x2, x3, [x1, #CPU_XREG_OFFSET(0)] stp x4, x5, [x1, #CPU_XREG_OFFSET(4)] stp x6, x7, [x1, #CPU_XREG_OFFSET(6)] stp x8, x9, [x1, #CPU_XREG_OFFSET(8)] stp x10, x11, [x1, #CPU_XREG_OFFSET(10)] stp x12, x13, [x1, #CPU_XREG_OFFSET(12)] stp x14, x15, [x1, #CPU_XREG_OFFSET(14)] stp x16, x17, [x1, #CPU_XREG_OFFSET(16)] // Store the guest regs x18-x29, lr save_callee_saved_regs x1 get_host_ctxt x2, x3 // Macro ptrauth_switch_to_guest format: // ptrauth_switch_to_host(guest cxt, host cxt, tmp1, tmp2, tmp3) // The below macro to save/restore keys is not implemented in C code // as it may cause Pointer Authentication key signing mismatch errors // when this feature is enabled for kernel code. ptrauth_switch_to_host x1, x2, x3, x4, x5 // Now restore the host regs restore_callee_saved_regs x2 alternative_if ARM64_HAS_RAS_EXTN // If we have the RAS extensions we can consume a pending error // without an unmask-SError and isb. The ESB-instruction consumed any // pending guest error when we took the exception from the guest. mrs_s x2, SYS_DISR_EL1 str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)] cbz x2, 1f msr_s SYS_DISR_EL1, xzr orr x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT) 1: ret alternative_else dsb sy // Synchronize against in-flight ld/st isb // Prevent an early read of side-effect free ISR mrs x2, isr_el1 tbnz x2, #8, 2f // ISR_EL1.A ret nop 2: alternative_endif // We know we have a pending asynchronous abort, now is the // time to flush it out. From your VAXorcist book, page 666: // "Threaten me not, oh Evil one! For I speak with // the power of DEC, and I command thee to show thyself!" mrs x2, elr_el2 mrs x3, esr_el2 mrs x4, spsr_el2 mov x5, x0 msr daifclr, #4 // Unmask aborts // This is our single instruction exception window. A pending // SError is guaranteed to occur at the earliest when we unmask // it, and at the latest just after the ISB. .global abort_guest_exit_start abort_guest_exit_start: isb .global abort_guest_exit_end abort_guest_exit_end: msr daifset, #4 // Mask aborts // If the exception took place, restore the EL1 exception // context so that we can report some information. // Merge the exception code with the SError pending bit. tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f msr elr_el2, x2 msr esr_el2, x3 msr spsr_el2, x4 orr x0, x0, x5 1: ret SYM_FUNC_END(__guest_enter) |
1 |