x86, HYP: Remove Lazy VCPU FPU Switching

On x86 both the VM monitor as the VM guest have their own FPU state.
The FPU of the monitor is controlled with seL4_TCBFlag_fpuDisabled
TCB flag. For the guest running on the VCPU the FPU will always be
enabled.

Signed-off-by: Indan Zupancic <indan@nul.nu>
This commit is contained in:
Indan Zupancic
2024-09-09 16:56:32 +01:00
committed by Gerwin Klein
parent 5035def0b9
commit e0a50c8205

View File

@@ -1276,21 +1276,7 @@ static inline void finishVmexitSaving(void)
/* Update our cache of what is in the vmcs. This is the only value
* that we cache that can be modified by the guest during execution */
vcpu->cached_cr0 = vmread(VMX_GUEST_CR0);
if (vcpuThreadUsingFPU(NODE_STATE(ksCurThread))) {
/* If the vcpu owns the fpu then we did not modify the active cr0 to anything different
* to what the VCPU owner requested, so we can update it with any modifications
* the guest may have made */
vcpu->cr0 = vcpu->cached_cr0;
} else {
/* If the vcpu does not own the fpu then we will have forced the task switched flag
* to be set in the cr0 that was put into the vmcs. Since the VCPU owner (or the guest)
* may believe the task switched flag is not set, and the guest may have modified other
* parts of cr0 during execution then to update the desired cr0 value to be the cr0
* value from the vmcs (thus pulling in any modifications the guest made) but removing
* the task switched flag that we set and then adding back in the task switched flag
* that may be in the desired current cr0 */
vcpu->cr0 = (vcpu->cached_cr0 & ~CR0_TASK_SWITCH) | (NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 & CR0_TASK_SWITCH);
}
vcpu->cr0 = vcpu->cached_cr0;
}
exception_t handleVmexit(void)
@@ -1321,89 +1307,6 @@ exception_t handleVmexit(void)
NODE_LOCK_SYS;
if (!vcpuThreadUsingFPU(NODE_STATE(ksCurThread))) {
/* since this vcpu does not currently own the fpu state, check if the kernel should
* switch the fpu owner or not. We switch if the guest performed and unimplemented device
* exception AND the owner of this vcpu has not requested that these exceptions be forwarded
* to them (i.e. if they have not explicitly set the unimplemented device exception in the
* exception_bitmap) */
if (reason == EXCEPTION_OR_NMI && !(NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->exception_bitmap & BIT(int_unimpl_dev))) {
interrupt = vmread(VMX_DATA_EXIT_INTERRUPT_INFO);
/* The exception number is the bottom 8 bits of the interrupt info */
if ((interrupt & 0xff) == int_unimpl_dev) {
switchLocalFpuOwner(&NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->fpuState);
return EXCEPTION_NONE;
}
} else if (reason == CONTROL_REGISTER && !(NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_mask & CR0_TASK_SWITCH)) {
/* we get here if the guest is attempting to write to a control register that is set (by
* a 1 bit in the cr0 mask) as being owned by the host. If we got here then the previous check
* on cr0_mask meant that the VCPU owner did not claim ownership of the the task switch bit
* however we may have temporarily claimed ownership for the purposes of FPU switching.
* At this point we could still have a false positive, as the guest could be attempted to
* manipulate bits that are not task switch, so we still have to be careful and propagate
* all or some of an attempted write */
qualification = vmread(VMX_DATA_EXIT_QUALIFICATION);
vmx_data_exit_qualification_control_register_t qual;
qual.words[0] = qualification;
/* We only care about some of the exit qualification cases, we handle them here
* and will deliver any others through to fault handler */
switch (vmx_data_exit_qualification_control_register_get_access_type(qual)) {
case VMX_EXIT_QUAL_TYPE_MOV_CR: {
/* check for cr0 */
if (vmx_data_exit_qualification_control_register_get_cr(qual) == 0) {
vcpu_gp_register_t source = crExitRegs[vmx_data_exit_qualification_control_register_get_reg(qual)];
word_t value;
if (source == VCPU_ESP) {
/* ESP is the only register that is is not part of the general purpose
* registers that we have to save and restore ourselves, so we need to
* get this one from the vmcs */
value = vmread(VMX_GUEST_RSP);
} else {
value = NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->gp_registers[source];
}
/* First unset the task switch bit in cr0 */
NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 &= ~CR0_TASK_SWITCH;
/* now set it to the value we were given */
NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 |= value & CR0_TASK_SWITCH;
/* check if there are any parts of the write remaining to forward. we only need
* to consider bits that the hardware will not have handled without faulting, which
* is writing any bit such that it is different to the shadow, but only considering
* bits that the VCPU owner has declared that they want to own (via the cr0_shadow)
*/
if (!((value ^ NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_shadow) &
NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_mask)) {
return EXCEPTION_NONE;
}
}
break;
}
case VMX_EXIT_QUAL_TYPE_CLTS: {
/* Easy case. Just remove the task switch bit out of cr0 */
NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 &= ~CR0_TASK_SWITCH;
return EXCEPTION_NONE;
}
case VMX_EXIT_QUAL_TYPE_LMSW: {
uint16_t value = vmx_data_exit_qualification_control_register_get_data(qual);
/* First unset the task switch bit in cr0 */
NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 &= ~CR0_TASK_SWITCH;
/* now set it to the value we were given */
NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 |= value & CR0_TASK_SWITCH;
/* check if there are any parts of the write remaining to forward. we only need
* to consider bits that the hardware will not have handled without faulting, which
* is writing any bit such that it is different to the shadow, but only considering
* bits that the VCPU owner has declared that they want to own (via the cr0_shadow).
* Additionally since LMSW only loads the bottom 4 bits of CR0 we only consider
* the low 4 bits
*/
if (!((value ^ NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_shadow) &
NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_mask & MASK(4))) {
return EXCEPTION_NONE;
}
break;
}
}
}
}
switch (reason) {
case EXCEPTION_OR_NMI:
case MOV_DR:
@@ -1518,58 +1421,6 @@ static void setEPTRoot(cap_t vmxSpace, vcpu_t *vcpu)
}
}
static void handleLazyFpu(void)
{
vcpu_t *vcpu = NODE_STATE(ksCurThread)->tcbArch.tcbVCPU;
word_t cr0 = vcpu->cr0;
word_t exception_bitmap = vcpu->exception_bitmap;
word_t cr0_mask = vcpu->cr0_mask;
word_t cr0_shadow = vcpu->cr0_shadow;
/* if the vcpu actually owns the fpu then we do not need to change any bits
* and so we will put into the vmcs precisely what the VCPU owner has requested */
if (!vcpuThreadUsingFPU(NODE_STATE(ksCurThread))) {
/* when the vcpu doesn't own the fpu we need to force the task switched flag
* so that we can take an exception and perform lazy fpu switching */
cr0 |= CR0_TASK_SWITCH;
/* once we have forced the task switched flag we also need to handle
* unimplemented device exceptions so we force these to trap. In the case
* where the VCPU owner had already set these to trap then this well be
* checked in handleVmexit and the exception will get forwarded */
exception_bitmap |= BIT(int_unimpl_dev);
/* we need to claim ownership of the task switch bit so that any modifications
* of it cause an exception and we must do this regardless of whether the
* VCPU owner also wants to claim ownership. In the scenario where the VCPU
* owner had also claimed ownership then in handleVmexit we won't actually
* do any fpu switching and will forward the fault */
cr0_mask |= CR0_TASK_SWITCH;
/* since we have forced a value in the cr0 mask we need to set an appropriate value
* in the cr0 read shadow. If the VCPU owner is also masking this bit then
* we should use the value they have put in the cr0 read shadow. If they aren't
* then the read shadow will contain garbage and we should instead set the
* read shadow to the actual desired cr0 value */
if (!(vcpu->cr0_mask & CR0_TASK_SWITCH)) {
cr0_shadow &= ~CR0_TASK_SWITCH;
cr0_shadow |= vcpu->cr0 & CR0_TASK_SWITCH;
}
}
if (cr0 != vcpu->cached_cr0) {
vmwrite(VMX_GUEST_CR0, cr0);
vcpu->cached_cr0 = cr0;
}
if (exception_bitmap != vcpu->cached_exception_bitmap) {
vmwrite(VMX_CONTROL_EXCEPTION_BITMAP, exception_bitmap);
vcpu->cached_exception_bitmap = exception_bitmap;
}
if (cr0_mask != vcpu->cached_cr0_mask) {
vmwrite(VMX_CONTROL_CR0_MASK, cr0_mask);
vcpu->cached_cr0_mask = cr0_mask;
}
if (cr0_shadow != vcpu->cached_cr0_shadow) {
vmwrite(VMX_CONTROL_CR0_READ_SHADOW, cr0_shadow);
vcpu->cached_cr0_shadow = cr0_shadow;
}
}
void clearVPIDIOPortMappings(vpid_t vpid, uint16_t first, uint16_t last)
{
if (vpid == VPID_INVALID) {
@@ -1658,7 +1509,14 @@ void restoreVMCS(void)
}
}
setEPTRoot(TCB_PTR_CTE_PTR(NODE_STATE(ksCurThread), tcbArchEPTRoot)->cap, expected_vmcs);
handleLazyFpu();
/* Make sure FPU is enabled */
if (vcpuThreadUsingFPU(NODE_STATE(ksCurThread))) {
enableFpu();
} else {
/* When the vcpu doesn't own the fpu we need to switch FPU state */
switchLocalFpuOwner(&expected_vmcs->fpuState);
}
}
void invept(ept_pml4e_t *ept_pml4)