hw: fix "hw: x86_64: clean up vCPU startup"

Syncing the vCPU state with the VMM before the virtualization technology
was inititialized on the target CPU core may fail, such as on VMX where
the resulting `vmread` instructions will cause #UD faults.

Skip syncing the vCPU state when the vCPU isn't initialized
yet and sync the initial `Vcpu_state` from the VMM when the vCPU is
known to be initialized.

Issue #5474
This commit is contained in:
Benjamin Lamowski 2025-03-17 16:33:26 +01:00 committed by Norman Feske
parent 71e9b603a7
commit dd8b78575e
5 changed files with 40 additions and 71 deletions

View File

@ -1,58 +0,0 @@
/*
* \brief Interface between kernel and hypervisor
* \author Benjamin Lamowski
* \date 2022-10-14
*/
/*
* Copyright (C) 2022 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
#ifndef _SPEC__PC__VIRTUALIZATION_HYPERVISOR_H_
#define _SPEC__PC__VIRTUALIZATION_HYPERVISOR_H_
#include <base/stdint.h>
#include <base/log.h>
namespace Hypervisor {
using Call_arg = Genode::umword_t;
using Call_ret = Genode::umword_t;
inline void restore_state_for_entry(Call_arg regs, Call_arg fpu_context)
{
asm volatile(
"fxrstor (%[fpu_context]);"
"mov %[regs], %%rsp;"
"popq %%r8;"
"popq %%r9;"
"popq %%r10;"
"popq %%r11;"
"popq %%r12;"
"popq %%r13;"
"popq %%r14;"
"popq %%r15;"
"popq %%rax;"
"popq %%rbx;"
"popq %%rcx;"
"popq %%rdx;"
"popq %%rdi;"
"popq %%rsi;"
"popq %%rbp;"
"sti;" /* maybe enter the kernel to handle an external
interrupt that occured ... */
"nop;"
"cli;" /* ... otherwise, just disable interrupts again */
"jmp _kernel_entry;"
:
: [regs] "r"(regs), [fpu_context] "r"(fpu_context)
: "memory");
};
}
#endif /* _SPEC__PC__VIRTUALIZATION_HYPERVISOR_H_ */

View File

@ -14,6 +14,7 @@
#include <base/internal/page_size.h>
#include <base/log.h>
#include <hw/spec/x86_64/x86_64.h>
#include <hw/memory_consts.h>
#include <kernel/cpu.h>
#include <platform.h>
#include <spec/x86_64/virtualization/svm.h>
@ -496,7 +497,7 @@ void Vmcb::switch_world(Core::Cpu::Context &regs, addr_t stack_start)
that occured ... */
"nop;"
"cli;" /* ... otherwise, just disable interrupts again */
"subq $568, %%rsp;" /* keep room for fpu and general-purpose registers */
"subq %[stack_offset], %%rsp;" /* keep room for fpu and general-purpose registers */
"pushq %[trap_vmexit];" /* make the stack point to trapno, the right place
to jump to _kernel_entry. We push 256 because
this is outside of the valid range for interrupts
@ -504,11 +505,12 @@ void Vmcb::switch_world(Core::Cpu::Context &regs, addr_t stack_start)
"jmp _kernel_entry;" /* jump to _kernel_entry to save the
GPRs without breaking any */
:
: [regs] "r" (&regs.r8),
[fpu_context] "r" (&regs.fpu_context()),
[guest_state] "r" (vcpu_data.phys_addr + get_page_size()),
[host_state] "r" (root_vmcb_phys),
[stack] "r" (stack_start),
[trap_vmexit] "i"(TRAP_VMEXIT)
: [regs] "r" (&regs.r8),
[fpu_context] "r" (&regs.fpu_context()),
[guest_state] "r" (vcpu_data.phys_addr + get_page_size()),
[host_state] "r" (root_vmcb_phys),
[stack] "r" (stack_start),
[stack_offset] "i" (Hw::Mm::KERNEL_STACK_ERRCODE_OFFSET),
[trap_vmexit] "i"(TRAP_VMEXIT)
: "rax", "memory");
}

View File

@ -24,7 +24,6 @@
#include <kernel/main.h>
#include <hw/spec/x86_64/x86_64.h>
#include <virtualization/hypervisor.h>
#include <virtualization/svm.h>
#include <virtualization/vmx.h>
@ -77,6 +76,13 @@ void Vm::proceed()
_vcpu_context.initialize(_cpu(),
reinterpret_cast<addr_t>(_id.table));
_vcpu_context.tsc_aux_host = _cpu().id();
_vcpu_context.init_state = Board::Vcpu_context::Init_state::STARTED;
/*
* Sync the initial state from the VMM that was skipped due to
* the vCPU being uninitialized.
*/
_sync_from_vmm();
}
Cpu::Ia32_tsc_aux::write(
@ -161,6 +167,15 @@ void Vm::exception(Genode::Cpu_state &state)
void Vm::_sync_to_vmm()
{
/*
* If the vCPU isn't initialized, sync instructions such as vmread may fail.
* Just sync the startup exit and skip the rest of the synchronization.
*/
if (_vcpu_context.init_state != Board::Vcpu_context::Init_state::STARTED) {
_state.exit_reason = (unsigned) Board::EXIT_STARTUP;
return;
}
_vcpu_context.write_vcpu_state(_state);
/*
@ -173,6 +188,13 @@ void Vm::_sync_to_vmm()
void Vm::_sync_from_vmm()
{
/*
* Syncing the state to an unitialized vCPU may fail.
* The inial state from the VMM will be synced after vCPU initialization.
*/
if (_vcpu_context.init_state != Board::Vcpu_context::Init_state::STARTED)
return;
_vcpu_context.read_vcpu_state(_state);
}

View File

@ -14,6 +14,7 @@
#include <base/log.h>
#include <cpu/cpu_state.h>
#include <hw/spec/x86_64/x86_64.h>
#include <hw/memory_consts.h>
#include <kernel/cpu.h>
#include <platform.h>
#include <spec/x86_64/kernel/panic.h>
@ -363,10 +364,11 @@ void Vmcs::initialize(Kernel::Cpu &cpu, Genode::addr_t page_table_phys)
write(E_HOST_IA32_SYSENTER_EIP, reinterpret_cast<Genode::uint64_t>(&kernel_entry_push_trap));
/*
* Set the RSP to trapno, so that _kernel_entry will save the registers
* into the right fields.
* Set the RSP to a stack offset such that kernel_entry_push_trap pushes
* the `TRAP_VMEXIT` value to the right place for consumption by the
* subsequent jump to _kernel_entry.
*/
write(E_HOST_RSP, cpu.stack_start() - 568);
write(E_HOST_RSP, cpu.stack_start() - Hw::Mm::KERNEL_STACK_ERRCODE_OFFSET);
write(E_HOST_RIP, reinterpret_cast<Genode::uint64_t>(&kernel_entry_push_trap));
}

View File

@ -16,8 +16,9 @@
#namespace Hw {
# namespace Mm {
HW_MM_KERNEL_START = 0xffffffc000000000
HW_MM_KERNEL_STACK_SIZE = 0x10000
HW_MM_KERNEL_START = 0xffffffc000000000
HW_MM_KERNEL_STACK_SIZE = 0x10000
HW_MM_KERNEL_STACK_ERRCODE_OFFSET = 568
HW_MM_CPU_LOCAL_MEMORY_AREA_START = 0xffffffe070000000
HW_MM_CPU_LOCAL_MEMORY_AREA_SIZE = 0x10000000