hw: prepare SVM implementation for generic virtualization support

The SVM implementation did not lend itself to a runtime selection of the
x86 virtualization technology.

Encapsulate functionality in the VMCB class to facilitate adding support
for Intel's VMX.

Issue #5113
This commit is contained in:
Benjamin Lamowski 2024-02-07 11:26:14 +01:00 committed by Christian Helmuth
parent 3a88d133ed
commit 0d1716b07d
5 changed files with 364 additions and 365 deletions

View File

@ -31,9 +31,7 @@ namespace Board {
using Vm_page_table = Hw::Page_table;
using Vm_page_table_array =
Vm_page_table::Allocator::Array<Kernel::DEFAULT_TRANSLATION_TABLE_MAX>;
struct Vcpu_context;
using Vcpu_data = Genode::Vcpu_data;
using Vcpu_state = Genode::Vcpu_state;
@ -42,7 +40,7 @@ namespace Board {
};
/* FIXME move into Vcpu_context as 'enum class' when we have C++20 */
enum Platform_exitcodes : Genode::uint64_t {
enum Platform_exitcodes : uint64_t {
EXIT_NPF = 0xfc,
EXIT_INIT = 0xfd,
EXIT_STARTUP = 0xfe,
@ -64,19 +62,21 @@ namespace Kernel {
struct Board::Vcpu_context
{
Vcpu_context(unsigned id, void *vcpu_data_ptr);
Vcpu_context(unsigned id, void *virt_area, addr_t vmcb_phys_addr);
void initialize_svm(Kernel::Cpu &cpu, void *table);
Vcpu_context(unsigned id, Vcpu_data &vcpu_data);
void initialize(Kernel::Cpu &cpu, addr_t table_phys_addr);
void read_vcpu_state(Vcpu_state &state);
void write_vcpu_state(Vcpu_state &state);
Vmcb &vmcb;
addr_t vmcb_phys_addr;
Vmcb *vmcb { nullptr };
Genode::Align_at<Core::Cpu::Context> regs;
Vcpu_data &vcpu_data;
uint64_t tsc_aux_host = 0U;
uint64_t tsc_aux_guest = 0U;
uint64_t exitcode = EXIT_INIT;
Vcpu_context(const Vcpu_context &) = delete;
const Vcpu_context &operator=(Vcpu_context &) = delete;
};
#endif /* _CORE__SPEC__PC__VIRTUALIZATION__BOARD_H_ */

View File

@ -53,53 +53,6 @@ namespace Hypervisor {
: "memory");
};
inline void switch_world(Call_arg guest_state, Call_arg regs,
Call_arg fpu_context)
{
asm volatile(
"fxrstor (%[fpu_context]);"
"mov %[guest_state], %%rax;"
"mov %[regs], %%rsp;"
"popq %%r8;"
"popq %%r9;"
"popq %%r10;"
"popq %%r11;"
"popq %%r12;"
"popq %%r13;"
"popq %%r14;"
"popq %%r15;"
"add $8, %%rsp;" /* don't pop rax */
"popq %%rbx;"
"popq %%rcx;"
"popq %%rdx;"
"popq %%rdi;"
"popq %%rsi;"
"popq %%rbp;"
"clgi;"
"sti;"
"vmload;"
"vmrun;"
"vmsave;"
"popq %%rax;" /* get the physical address of the host VMCB from
the stack */
"vmload;"
"stgi;" /* maybe enter the kernel to handle an external interrupt
that occured ... */
"nop;"
"cli;" /* ... otherwise, just disable interrupts again */
"pushq $256;" /* make the stack point to trapno, the right place
to jump to _kernel_entry. We push 256 because
this is outside of the valid range for interrupts
*/
"jmp _kernel_entry;" /* jump to _kernel_entry to save the
GPRs without breaking any */
:
: [regs] "r"(regs), [fpu_context] "r"(fpu_context),
[guest_state] "r"(guest_state)
: "rax", "memory");
}
}
#endif /* _SPEC__PC__VIRTUALIZATION_HYPERVISOR_H_ */

View File

@ -18,13 +18,14 @@
#include <spec/x86_64/virtualization/svm.h>
#include <util/mmio.h>
using Genode::addr_t;
using namespace Genode;
using Kernel::Cpu;
using Kernel::Vm;
using Board::Vmcb;
Vmcb::Vmcb(Genode::uint32_t id)
Vmcb::Vmcb(uint32_t id)
:
Mmio({(char *)this, Mmio::SIZE})
{
@ -40,9 +41,9 @@ Vmcb::Vmcb(Genode::uint32_t id)
}
Vmcb & Vmcb::host_vmcb(Genode::size_t cpu_id)
Vmcb & Vmcb::host_vmcb(size_t cpu_id)
{
static Genode::Constructible<Vmcb> host_vmcb[NR_OF_CPUS];
static Constructible<Vmcb> host_vmcb[NR_OF_CPUS];
if (!host_vmcb[cpu_id].constructed()) {
host_vmcb[cpu_id].construct(Vmcb::Asid_host);
@ -50,13 +51,21 @@ Vmcb & Vmcb::host_vmcb(Genode::size_t cpu_id)
return *host_vmcb[cpu_id];
}
void Vmcb::init(Genode::size_t cpu_id, void * table_ptr)
void Vmcb::initialize(Kernel::Cpu &cpu, addr_t page_table_phys_addr)
{
using Cpu = Hw::X86_64_cpu;
root_vmcb_phys = Core::Platform::core_phys_addr((addr_t)
&host_vmcb(cpu_id));
Cpu::Ia32_efer::access_t ia32_efer_msr = Cpu::Ia32_efer::read();
Cpu::Ia32_efer::Svme::set(ia32_efer_msr, 1);
Cpu::Ia32_efer::write(ia32_efer_msr);
Cpu::Amd_vm_syscvg::access_t amd_vm_syscvg_msr =
Cpu::Amd_vm_syscvg::read();
Cpu::Amd_vm_syscvg::Nested_paging::set(amd_vm_syscvg_msr, 1);
Cpu::Amd_vm_syscvg::write(amd_vm_syscvg_msr);
root_vmcb_phys =
Core::Platform::core_phys_addr((addr_t)&host_vmcb(cpu.id()));
asm volatile ("vmsave" : : "a" (root_vmcb_phys) : "memory");
Cpu::Amd_vm_hsavepa::write((Cpu::Amd_vm_hsavepa::access_t) root_vmcb_phys);
@ -64,7 +73,7 @@ void Vmcb::init(Genode::size_t cpu_id, void * table_ptr)
* enable nested paging
*/
write<Npt_control::Np_enable>(1);
write<N_cr3>((Genode::addr_t) table_ptr);
write<N_cr3>(page_table_phys_addr);
write<Int_control::V_intr_mask>(1); /* See 15.2 */
write<Intercept_ex::Vectors>(17); /* AC */
@ -76,7 +85,7 @@ void Vmcb::init(Genode::size_t cpu_id, void * table_ptr)
/*
* Enforce SVM intercepts
*/
void Vmcb::enforce_intercepts(Genode::uint32_t desired_primary, Genode::uint32_t desired_secondary)
void Vmcb::enforce_intercepts(uint32_t desired_primary, uint32_t desired_secondary)
{
write<Vmcb::Intercept_misc1>(
desired_primary |
@ -103,13 +112,11 @@ void Vmcb::enforce_intercepts(Genode::uint32_t desired_primary, Genode::uint32_t
* AMD Vol.2 15.11: MSR Permissions Map
* All set to 1 since we want all MSRs to be intercepted.
*/
Genode::addr_t Vmcb::dummy_msrpm()
addr_t Vmcb::dummy_msrpm()
{
static Genode::Constructible<Board::Msrpm> msrpm;
if (!msrpm.constructed())
msrpm.construct();
static Board::Msrpm msrpm;
return Core::Platform::core_phys_addr((addr_t) & *msrpm);
return Core::Platform::core_phys_addr((addr_t) &msrpm);
}
@ -117,311 +124,271 @@ Genode::addr_t Vmcb::dummy_msrpm()
* AMD Vol.2 15.10.1 I/O Permissions Map
* All set to 1 since we want all IO port accesses to be intercepted.
*/
Genode::addr_t Vmcb::dummy_iopm()
addr_t Vmcb::dummy_iopm()
{
static Genode::Constructible<Board::Iopm> iopm;
if (!iopm.constructed())
iopm.construct();
static Board::Iopm iopm;
return Core::Platform::core_phys_addr((addr_t) &*iopm);
return Core::Platform::core_phys_addr((addr_t) &iopm);
}
Board::Msrpm::Msrpm()
{
Genode::memset(this, 0xFF, sizeof(*this));
memset(this, 0xFF, sizeof(*this));
}
Board::Iopm::Iopm()
{
Genode::memset(this, 0xFF, sizeof(*this));
memset(this, 0xFF, sizeof(*this));
}
void Board::Vcpu_context::initialize_svm(Kernel::Cpu & cpu, void * table)
void Vmcb::write_vcpu_state(Vcpu_state &state)
{
using Cpu = Hw::X86_64_cpu;
typedef Vcpu_state::Range Range;
Cpu::Ia32_efer::access_t ia32_efer_msr = Cpu::Ia32_efer::read();
Cpu::Ia32_efer::Svme::set(ia32_efer_msr, 1);
Cpu::Ia32_efer::write(ia32_efer_msr);
Cpu::Amd_vm_syscvg::access_t amd_vm_syscvg_msr = Cpu::Amd_vm_syscvg::read();
Cpu::Amd_vm_syscvg::Nested_paging::set(amd_vm_syscvg_msr, 1);
Cpu::Amd_vm_syscvg::write(amd_vm_syscvg_msr);
vmcb.init(cpu.id(), table);
}
void Board::Vcpu_context::write_vcpu_state(Genode::Vcpu_state &state)
{
typedef Genode::Vcpu_state::Range Range;
state.discharge();
state.exit_reason = (unsigned) exitcode;
state.fpu.charge([&] (Genode::Vcpu_state::Fpu::State &fpu) {
memcpy(&fpu, (void *) regs->fpu_context(), sizeof(fpu));
});
state.ax.charge(vmcb.rax);
state.cx.charge(regs->rcx);
state.dx.charge(regs->rdx);
state.bx.charge(regs->rbx);
state.di.charge(regs->rdi);
state.si.charge(regs->rsi);
state.bp.charge(regs->rbp);
state.ip.charge(vmcb.rip);
state.ax.charge(rax);
state.ip.charge(rip);
/*
* SVM doesn't use ip_len, so just leave the old value.
* We still have to charge it when charging ip.
*/
state.ip_len.set_charged();
state.flags.charge(vmcb.rflags);
state.sp.charge(vmcb.rsp);
state.flags.charge(rflags);
state.sp.charge(rsp);
state.dr7.charge(vmcb.dr7);
state.dr7.charge(dr7);
state. r8.charge(regs->r8);
state. r9.charge(regs->r9);
state.r10.charge(regs->r10);
state.r11.charge(regs->r11);
state.r12.charge(regs->r12);
state.r13.charge(regs->r13);
state.r14.charge(regs->r14);
state.r15.charge(regs->r15);
state.cr0.charge(cr0);
state.cr2.charge(cr2);
state.cr3.charge(cr3);
state.cr4.charge(cr4);
state.cr0.charge(vmcb.cr0);
state.cr2.charge(vmcb.cr2);
state.cr3.charge(vmcb.cr3);
state.cr4.charge(vmcb.cr4);
state.cs.charge(cs);
state.ss.charge(ss);
state.es.charge(es);
state.ds.charge(ds);
state.fs.charge(fs);
state.gs.charge(gs);
state.tr.charge(tr);
state.ldtr.charge(ldtr);
state.gdtr.charge(Range { .limit = gdtr.limit, .base = gdtr.base });
state.cs.charge(vmcb.cs);
state.ss.charge(vmcb.ss);
state.es.charge(vmcb.es);
state.ds.charge(vmcb.ds);
state.fs.charge(vmcb.fs);
state.gs.charge(vmcb.gs);
state.tr.charge(vmcb.tr);
state.ldtr.charge(vmcb.ldtr);
state.gdtr.charge(Range { .limit = vmcb.gdtr.limit,
.base = vmcb.gdtr.base });
state.idtr.charge(Range { .limit = idtr.limit, .base = idtr.base });
state.idtr.charge(Range { .limit = vmcb.idtr.limit,
.base = vmcb.idtr.base });
state.sysenter_cs.charge(sysenter_cs);
state.sysenter_sp.charge(sysenter_esp);
state.sysenter_ip.charge(sysenter_eip);
state.sysenter_cs.charge(vmcb.sysenter_cs);
state.sysenter_sp.charge(vmcb.sysenter_esp);
state.sysenter_ip.charge(vmcb.sysenter_eip);
state.qual_primary.charge(read<Vmcb::Exitinfo1>());
state.qual_secondary.charge(read<Vmcb::Exitinfo2>());
state.qual_primary.charge(vmcb.read<Vmcb::Exitinfo1>());
state.qual_secondary.charge(vmcb.read<Vmcb::Exitinfo2>());
/* Charging ctrl_primary and ctrl_secondary breaks Virtualbox 6 */
state.ctrl_primary.charge(vmcb.read<Vmcb::Intercept_misc1>());
state.ctrl_secondary.charge(vmcb.read<Vmcb::Intercept_misc2>());
state.inj_info.charge(vmcb.read<Vmcb::Exitintinfo>()& 0xFFFFFFFF);
state.inj_error.charge((Genode::uint32_t)
(vmcb.read<Vmcb::Exitintinfo>() >> 32));
state.inj_info.charge(read<Vmcb::Exitintinfo>() & 0xFFFFFFFF);
state.inj_error.charge(
(uint32_t)(read<Vmcb::Exitintinfo>() >> 32));
/* Guest is in an interrupt shadow, see 15.21.5 */
state.intr_state.charge((unsigned)
vmcb.read<Vmcb::Int_control_ext::Int_shadow>());
state.intr_state.charge(
(unsigned)read<Vmcb::Int_control_ext::Int_shadow>());
/* Guest activity state (actv) not used by SVM */
state.actv_state.set_charged();
state.tsc.charge(Hw::Lapic::rdtsc());
state.tsc_offset.charge(vmcb.read<Vmcb::Tsc_offset>());
state.tsc_offset.charge(read<Vmcb::Tsc_offset>());
tsc_aux_guest = Cpu::Ia32_tsc_aux::read();
state.tsc_aux.charge(tsc_aux_guest);
Cpu::Ia32_tsc_aux::write((Cpu::Ia32_tsc_aux::access_t) tsc_aux_host);
state.efer.charge(vmcb.efer);
state.efer.charge(efer);
/* pdpte not used by SVM */
state.star.charge(vmcb.star);
state.lstar.charge(vmcb.lstar);
state.cstar.charge(vmcb.cstar);
state.fmask.charge(vmcb.sfmask);
state.kernel_gs_base.charge(vmcb.kernel_gs_base);
state.star.charge(star);
state.lstar.charge(lstar);
state.cstar.charge(cstar);
state.fmask.charge(sfmask);
state.kernel_gs_base.charge(kernel_gs_base);
/* Task Priority Register, see 15.24 */
state.tpr.charge((unsigned) vmcb.read<Vmcb::Int_control::V_tpr>());
state.tpr.charge((unsigned)read<Vmcb::Int_control::V_tpr>());
/* TPR threshold not used by SVM */
}
void Board::Vcpu_context::read_vcpu_state(Genode::Vcpu_state &state)
void Vmcb::read_vcpu_state(Vcpu_state &state)
{
if (state.ax.charged() || state.cx.charged() ||
state.dx.charged() || state.bx.charged()) {
vmcb.rax = state.ax.value();
regs->rcx = state.cx.value();
regs->rdx = state.dx.value();
regs->rbx = state.bx.value();
}
if (state.bp.charged() || state.di.charged() || state.si.charged()) {
regs->rdi = state.di.value();
regs->rsi = state.si.value();
regs->rbp = state.bp.value();
}
if (state.flags.charged()) {
vmcb.rflags = state.flags.value();
}
if (state.sp.charged()) {
vmcb.rsp = state.sp.value();
}
if (state.ip.charged()) {
vmcb.rip = state.ip.value();
if (state.ax.charged()) rax = state.ax.value();
if (state.flags.charged()) rflags = state.flags.value();
if (state.sp.charged()) rsp = state.sp.value();
if (state.ip.charged()) rip = state.ip.value();
/* ip_len not used by SVM */
}
if (state.dr7.charged()) dr7 = state.dr7.value();
if (state.dr7.charged()) {
vmcb.dr7 = state.dr7.value();
}
if (state.cr0.charged()) cr0 = state.cr0.value();
if (state.cr2.charged()) cr2 = state.cr2.value();
if (state.cr3.charged()) cr3 = state.cr3.value();
if (state.cr4.charged()) cr4 = state.cr4.value();
if (state.r8 .charged() || state.r9 .charged() ||
state.r10.charged() || state.r11.charged() ||
state.r12.charged() || state.r13.charged() ||
state.r14.charged() || state.r15.charged()) {
if (state.cs.charged()) cs = state.cs.value();
if (state.ss.charged()) ss = state.ss.value();
regs->r8 = state.r8.value();
regs->r9 = state.r9.value();
regs->r10 = state.r10.value();
regs->r11 = state.r11.value();
regs->r12 = state.r12.value();
regs->r13 = state.r13.value();
regs->r14 = state.r14.value();
regs->r15 = state.r15.value();
}
if (state.es.charged()) es = state.es.value();
if (state.ds.charged()) ds = state.ds.value();
if (state.cr0.charged() || state.cr2.charged() ||
state.cr3.charged() || state.cr4.charged()) {
vmcb.cr0 = state.cr0.value();
vmcb.cr2 = state.cr2.value();
vmcb.cr3 = state.cr3.value();
vmcb.cr4 = state.cr4.value();
}
if (state.fs.charged()) fs = state.fs.value();
if (state.gs.charged()) gs = state.gs.value();
if (state.cs.charged() || state.ss.charged()) {
vmcb.cs = state.cs.value();
vmcb.ss = state.ss.value();
}
if (state.es.charged() || state.ds.charged()) {
vmcb.es = state.es.value();
vmcb.ds = state.ds.value();
}
if (state.fs.charged() || state.gs.charged()) {
vmcb.fs = state.fs.value();
vmcb.gs = state.gs.value();
}
if (state.tr.charged()) {
vmcb.tr = state.tr.value();
}
if (state.ldtr.charged()) {
vmcb.ldtr = state.ldtr.value();
}
if (state.tr.charged()) tr = state.tr.value();
if (state.ldtr.charged()) ldtr = state.ldtr.value();
if (state.gdtr.charged()) {
vmcb.gdtr.limit = state.gdtr.value().limit;
vmcb.gdtr.base = state.gdtr.value().base;
gdtr.limit = state.gdtr.value().limit;
gdtr.base = state.gdtr.value().base;
}
if (state.idtr.charged()) {
vmcb.idtr.limit = state.idtr.value().limit;
vmcb.idtr.base = state.idtr.value().base;
idtr.limit = state.idtr.value().limit;
idtr.base = state.idtr.value().base;
}
if (state.sysenter_cs.charged() || state.sysenter_sp.charged() ||
state.sysenter_ip.charged()) {
vmcb.sysenter_cs = state.sysenter_cs.value();
vmcb.sysenter_esp = state.sysenter_sp.value();
vmcb.sysenter_eip = state.sysenter_ip.value();
}
if (state.sysenter_cs.charged()) sysenter_cs = state.sysenter_cs.value();
if (state.sysenter_sp.charged()) sysenter_esp = state.sysenter_sp.value();
if (state.sysenter_ip.charged()) sysenter_eip = state.sysenter_ip.value();
if (state.ctrl_primary.charged() || state.ctrl_secondary.charged()) {
vmcb.enforce_intercepts(state.ctrl_primary.value(),
enforce_intercepts(state.ctrl_primary.value(),
state.ctrl_secondary.value());
}
if (state.inj_info.charged() || state.inj_error.charged()) {
/* Honor special signaling bit */
if (state.inj_info.value() & 0x1000) {
vmcb.write<Vmcb::Int_control::V_irq>(1);
vmcb.write<Vmcb::Int_control::V_ign_tpr>(1);
vmcb.write<Vmcb::Intercept_misc1::Vintr>(1);
write<Vmcb::Int_control::V_irq>(1);
write<Vmcb::Int_control::V_ign_tpr>(1);
write<Vmcb::Intercept_misc1::Vintr>(1);
} else {
vmcb.write<Vmcb::Int_control::V_irq>(0);
vmcb.write<Vmcb::Int_control::V_ign_tpr>(0);
vmcb.write<Vmcb::Intercept_misc1::Vintr>(0);
write<Vmcb::Int_control::V_irq>(0);
write<Vmcb::Int_control::V_ign_tpr>(0);
write<Vmcb::Intercept_misc1::Vintr>(0);
}
vmcb.write<Vmcb::Eventinj>(
write<Vmcb::Eventinj>(
/* Filter out special signaling bits */
(state.inj_info.value() &
(Genode::uint32_t) ~0x3000) |
(((Genode::uint64_t) state.inj_error.value()) << 32)
(uint32_t) ~0x3000) |
(((uint64_t) state.inj_error.value()) << 32)
);
}
if (state.intr_state.charged()) {
vmcb.write<Vmcb::Int_control_ext::Int_shadow>(state.intr_state.value());
write<Vmcb::Int_control_ext::Int_shadow>(
state.intr_state.value());
}
/* Guest activity state (actv) not used by SVM */
if (state.tsc_offset.charged()) {
/* state.tsc not used by SVM */
vmcb.write<Vmcb::Tsc_offset>(vmcb.read<Vmcb::Tsc_offset>() +
write<Vmcb::Tsc_offset>(read<Vmcb::Tsc_offset>() +
state.tsc_offset.value());
}
tsc_aux_host = Cpu::Ia32_tsc_aux::read();
if (state.tsc_aux.charged()) {
tsc_aux_guest = state.tsc_aux.value();
}
Cpu::Ia32_tsc_aux::write((Cpu::Ia32_tsc_aux::access_t) tsc_aux_guest);
if (state.efer.charged()) {
vmcb.efer = state.efer.value();
efer = state.efer.value();
}
/* pdpte not used by SVM */
if (state.star.charged() || state.lstar.charged() ||
state.cstar.charged() || state.fmask.charged() ||
state.kernel_gs_base.charged()) {
vmcb.star = state.star.value();
vmcb.cstar = state.cstar.value();
vmcb.lstar = state.lstar.value();
vmcb.sfmask = state.lstar.value();
vmcb.kernel_gs_base = state.kernel_gs_base.value();
}
if (state.star.charged()) star = state.star.value();
if (state.cstar.charged()) cstar = state.cstar.value();
if (state.lstar.charged()) lstar = state.lstar.value();
if (state.fmask.charged()) sfmask = state.fmask.value();
if (state.kernel_gs_base.charged()) kernel_gs_base = state.kernel_gs_base.value();
if (state.tpr.charged()) {
vmcb.write<Vmcb::Int_control::V_tpr>(state.tpr.value());
write<Vmcb::Int_control::V_tpr>(state.tpr.value());
/* TPR threshold not used on AMD */
}
}
if (state.fpu.charged()) {
state.fpu.with_state([&] (Genode::Vcpu_state::Fpu::State const &fpu) {
memcpy((void *) regs->fpu_context(), &fpu, sizeof(fpu));
});
uint64_t Vmcb::get_exitcode()
{
enum Svm_exitcodes : uint64_t
{
SVM_EXIT_INVALID = -1ULL,
SVM_VMEXIT_INTR = 0x60,
SVM_VMEXIT_NPF = 0x400,
};
uint64_t exitcode = read<Vmcb::Exitcode>();
switch (exitcode) {
case SVM_EXIT_INVALID:
error("VM: invalid SVM state!");
break;
case 0x40 ... 0x5f:
error("VM: unhandled SVM exception ",
Hex(exitcode));
break;
case SVM_VMEXIT_INTR:
exitcode = EXIT_PAUSED;
break;
case SVM_VMEXIT_NPF:
exitcode = EXIT_NPF;
break;
default:
break;
}
return exitcode;
}
void Vmcb::switch_world(addr_t vmcb_phys_addr, Core::Cpu::Context &regs)
{
/*
* We push the host context's physical address to trapno so that
* we can pop it later
*/
regs.trapno = root_vmcb_phys;
asm volatile(
"fxrstor (%[fpu_context]);"
"mov %[guest_state], %%rax;"
"mov %[regs], %%rsp;"
"popq %%r8;"
"popq %%r9;"
"popq %%r10;"
"popq %%r11;"
"popq %%r12;"
"popq %%r13;"
"popq %%r14;"
"popq %%r15;"
"add $8, %%rsp;" /* don't pop rax */
"popq %%rbx;"
"popq %%rcx;"
"popq %%rdx;"
"popq %%rdi;"
"popq %%rsi;"
"popq %%rbp;"
"clgi;"
"sti;"
"vmload;"
"vmrun;"
"vmsave;"
"popq %%rax;" /* get the physical address of the host VMCB from
the stack */
"vmload;"
"stgi;" /* maybe enter the kernel to handle an external interrupt
that occured ... */
"nop;"
"cli;" /* ... otherwise, just disable interrupts again */
"pushq %[trap_vmexit];" /* make the stack point to trapno, the right place
to jump to _kernel_entry. We push 256 because
this is outside of the valid range for interrupts
*/
"jmp _kernel_entry;" /* jump to _kernel_entry to save the
GPRs without breaking any */
:
: [regs] "r"(&regs.r8), [fpu_context] "r"(regs.fpu_context()),
[guest_state] "r"(vmcb_phys_addr),
[trap_vmexit] "i"(TRAP_VMEXIT)
: "rax", "memory");
}

View File

@ -29,7 +29,8 @@
#include <virtualization/svm.h>
#include <hw/spec/x86_64/x86_64.h>
using Genode::addr_t;
using namespace Genode;
using Kernel::Cpu;
using Kernel::Vm;
using Board::Vmcb;
@ -37,7 +38,7 @@ using Board::Vmcb;
Vm::Vm(Irq::Pool & user_irq_pool,
Cpu & cpu,
Genode::Vcpu_data & data,
Vcpu_data & data,
Kernel::Signal_context & context,
Identity & id)
:
@ -47,7 +48,7 @@ Vm::Vm(Irq::Pool & user_irq_pool,
_state(*data.vcpu_state),
_context(context),
_id(id),
_vcpu_context(id.id, data.virt_area, data.phys_addr)
_vcpu_context(id.id, data)
{
affinity(cpu);
}
@ -73,13 +74,8 @@ void Vm::proceed(Cpu & cpu)
Cpu::Ia32_tsc_aux::write(
(Cpu::Ia32_tsc_aux::access_t)_vcpu_context.tsc_aux_guest);
/*
* We push the host context's physical address to trapno so that
* we can pop it later
*/
_vcpu_context.regs->trapno = _vcpu_context.vmcb.root_vmcb_phys;
Hypervisor::switch_world( _vcpu_context.vmcb_phys_addr,
(addr_t)&_vcpu_context.regs->r8, _vcpu_context.regs->fpu_context());
_vcpu_context.vmcb->switch_world(_vcpu_context.vcpu_data.phys_addr,
*_vcpu_context.regs);
/*
* This will fall into an interrupt or otherwise jump into
* _kernel_entry
@ -90,7 +86,6 @@ void Vm::proceed(Cpu & cpu)
void Vm::exception(Cpu & cpu)
{
using namespace Board;
using Genode::Cpu_state;
switch (_vcpu_context.regs->trapno) {
case Cpu_state::INTERRUPTS_START ... Cpu_state::INTERRUPTS_END:
@ -103,7 +98,7 @@ void Vm::exception(Cpu & cpu)
/* exception method was entered without exception */
break;
default:
Genode::error("VM: triggered unknown exception ",
error("VM: triggered unknown exception ",
_vcpu_context.regs->trapno,
" with error code ", _vcpu_context.regs->errcode,
" at ip=",
@ -113,14 +108,10 @@ void Vm::exception(Cpu & cpu)
return;
};
enum Svm_exitcodes : Genode::uint64_t {
VMEXIT_INVALID = -1ULL,
VMEXIT_INTR = 0x60,
VMEXIT_NPF = 0x400,
};
if (_vcpu_context.exitcode == EXIT_INIT) {
_vcpu_context.initialize_svm(cpu, _id.table);
addr_t table_phys_addr =
reinterpret_cast<addr_t>(_id.table);
_vcpu_context.initialize(cpu, table_phys_addr);
_vcpu_context.tsc_aux_host = cpu.id();
_vcpu_context.exitcode = EXIT_STARTUP;
_pause_vcpu();
@ -128,26 +119,11 @@ void Vm::exception(Cpu & cpu)
return;
}
_vcpu_context.exitcode = _vcpu_context.vmcb.read<Vmcb::Exitcode>();
_vcpu_context.exitcode = _vcpu_context.vmcb->get_exitcode();
switch (_vcpu_context.exitcode) {
case VMEXIT_INVALID:
Genode::error("Vm::exception: invalid SVM state!");
return;
case 0x40 ... 0x5f:
Genode::error("Vm::exception: unhandled SVM exception ",
Genode::Hex(_vcpu_context.exitcode));
return;
case VMEXIT_INTR:
_vcpu_context.exitcode = EXIT_PAUSED;
return;
case VMEXIT_NPF:
_vcpu_context.exitcode = EXIT_NPF;
[[fallthrough]];
default:
if (_vcpu_context.exitcode != EXIT_PAUSED) {
_pause_vcpu();
_context.submit(1);
return;
}
}
@ -174,13 +150,94 @@ void Vm::_sync_from_vmm()
}
Board::Vcpu_context::Vcpu_context(unsigned id,
void *virt_area,
addr_t vmcb_phys_addr)
Board::Vcpu_context::Vcpu_context(unsigned id, Vcpu_data &vcpu_data)
:
vmcb(*Genode::construct_at<Vmcb>(virt_area, id)),
vmcb_phys_addr(vmcb_phys_addr),
regs(1)
regs(1),
vcpu_data(vcpu_data)
{
vmcb = construct_at<Vmcb>(vcpu_data.virt_area, id);
regs->trapno = TRAP_VMEXIT;
}
void Board::Vcpu_context::read_vcpu_state(Vcpu_state &state)
{
vmcb->read_vcpu_state(state);
if (state.cx.charged() || state.dx.charged() || state.bx.charged()) {
regs->rax = state.ax.value();
regs->rcx = state.cx.value();
regs->rdx = state.dx.value();
regs->rbx = state.bx.value();
}
if (state.bp.charged() || state.di.charged() || state.si.charged()) {
regs->rdi = state.di.value();
regs->rsi = state.si.value();
regs->rbp = state.bp.value();
}
if (state.r8 .charged() || state.r9 .charged() ||
state.r10.charged() || state.r11.charged() ||
state.r12.charged() || state.r13.charged() ||
state.r14.charged() || state.r15.charged()) {
regs->r8 = state.r8.value();
regs->r9 = state.r9.value();
regs->r10 = state.r10.value();
regs->r11 = state.r11.value();
regs->r12 = state.r12.value();
regs->r13 = state.r13.value();
regs->r14 = state.r14.value();
regs->r15 = state.r15.value();
}
if (state.fpu.charged()) {
state.fpu.with_state(
[&](Vcpu_state::Fpu::State const &fpu) {
memcpy((void *) regs->fpu_context(), &fpu, sizeof(fpu));
});
}
}
void Board::Vcpu_context::write_vcpu_state(Vcpu_state &state)
{
state.discharge();
state.exit_reason = (unsigned) exitcode;
state.fpu.charge([&](Vcpu_state::Fpu::State &fpu) {
memcpy(&fpu, (void *) regs->fpu_context(), sizeof(fpu));
});
/* SVM will overwrite rax but VMX doesn't. */
state.ax.charge(regs->rax);
state.cx.charge(regs->rcx);
state.dx.charge(regs->rdx);
state.bx.charge(regs->rbx);
state.di.charge(regs->rdi);
state.si.charge(regs->rsi);
state.bp.charge(regs->rbp);
state.r8.charge(regs->r8);
state.r9.charge(regs->r9);
state.r10.charge(regs->r10);
state.r11.charge(regs->r11);
state.r12.charge(regs->r12);
state.r13.charge(regs->r13);
state.r14.charge(regs->r14);
state.r15.charge(regs->r15);
state.tsc.charge(Hw::Lapic::rdtsc());
tsc_aux_guest = Cpu::Ia32_tsc_aux::read();
state.tsc_aux.charge(tsc_aux_guest);
Cpu::Ia32_tsc_aux::write((Cpu::Ia32_tsc_aux::access_t) tsc_aux_host);
vmcb->write_vcpu_state(state);
}
void Board::Vcpu_context::initialize(Kernel::Cpu &cpu, addr_t table_phys_addr)
{
vmcb->initialize(cpu, table_phys_addr);
}

View File

@ -16,11 +16,28 @@
#include <base/internal/page_size.h>
#include <base/stdint.h>
#include <cpu.h>
#include <cpu/vcpu_state.h>
#include <cpu/vcpu_state_virtualization.h>
#include <util/mmio.h>
#include <util/string.h>
using Genode::addr_t;
using Genode::size_t;
using Genode::uint8_t;
using Genode::uint32_t;
using Genode::uint64_t;
using Genode::Mmio;
using Genode::Vcpu_data;
using Genode::Vcpu_state;
using Genode::get_page_size;
using Genode::memset;
namespace Kernel
{
class Cpu;
}
namespace Board
{
struct Msrpm;
@ -32,18 +49,18 @@ namespace Board
}
struct alignas(Genode::get_page_size()) Board::Msrpm
struct alignas(get_page_size()) Board::Msrpm
{
Genode::uint8_t pad[8192];
uint8_t pad[8192];
Msrpm();
};
struct
alignas(Genode::get_page_size())
alignas(get_page_size())
Board::Iopm
{
Genode::uint8_t pad[12288];
uint8_t pad[12288];
Iopm();
};
@ -55,17 +72,17 @@ Board::Iopm
*/
struct Board::Vmcb_control_area
{
enum : Genode::size_t {
enum : size_t {
total_size = 1024U,
used_guest_size = 0x3E0U
};
/* The control area is padded and used via Mmio-like accesses. */
Genode::uint8_t control_area[used_guest_size];
uint8_t control_area[used_guest_size];
Vmcb_control_area()
{
Genode::memset((void *) this, 0, sizeof(Vmcb_control_area));
memset((void *) this, 0, sizeof(Vmcb_control_area));
}
};
@ -77,10 +94,10 @@ struct Board::Vmcb_control_area
struct Board::Vmcb_reserved_for_host
{
/* 64bit used by the inherited Mmio class here */
Genode::addr_t root_vmcb_phys = 0U;
addr_t root_vmcb_phys = 0U;
};
static_assert(Board::Vmcb_control_area::total_size -
sizeof(Board::Vmcb_control_area) - sizeof(Genode::Mmio<0>) -
sizeof(Board::Vmcb_control_area) - sizeof(Mmio<0>) -
sizeof(Board::Vmcb_reserved_for_host) ==
0);
@ -89,28 +106,28 @@ static_assert(Board::Vmcb_control_area::total_size -
*/
struct Board::Vmcb_state_save_area
{
typedef Genode::Vcpu_state::Segment Segment;
typedef Vcpu_state::Segment Segment;
Segment es, cs, ss, ds, fs, gs, gdtr, ldtr, idtr, tr;
Genode::uint8_t reserved1[43];
Genode::uint8_t cpl;
Genode::uint8_t reserved2[4];
Genode::uint64_t efer;
Genode::uint8_t reserved3[112];
Genode::uint64_t cr4, cr3, cr0, dr7, dr6, rflags, rip;
Genode::uint8_t reserved4[88];
Genode::uint64_t rsp;
Genode::uint64_t s_cet, ssp, isst_addr;
Genode::uint64_t rax, star, lstar, cstar, sfmask, kernel_gs_base;
Genode::uint64_t sysenter_cs, sysenter_esp, sysenter_eip, cr2;
Genode::uint8_t reserved5[32];
Genode::uint64_t g_pat;
Genode::uint64_t dbgctl;
Genode::uint64_t br_from;
Genode::uint64_t br_to;
Genode::uint64_t lastexcpfrom;
Genode::uint8_t reserved6[72];
Genode::uint64_t spec_ctrl;
uint8_t reserved1[43];
uint8_t cpl;
uint8_t reserved2[4];
uint64_t efer;
uint8_t reserved3[112];
uint64_t cr4, cr3, cr0, dr7, dr6, rflags, rip;
uint8_t reserved4[88];
uint64_t rsp;
uint64_t s_cet, ssp, isst_addr;
uint64_t rax, star, lstar, cstar, sfmask, kernel_gs_base;
uint64_t sysenter_cs, sysenter_esp, sysenter_eip, cr2;
uint8_t reserved5[32];
uint64_t g_pat;
uint64_t dbgctl;
uint64_t br_from;
uint64_t br_to;
uint64_t lastexcpfrom;
uint8_t reserved6[72];
uint64_t spec_ctrl;
} __attribute__((packed));
@ -132,10 +149,10 @@ struct Board::Vmcb_state_save_area
* In total, this allows Register type access to the VMCB control area and easy
* direct access to the VMCB state save area.
*/
struct alignas(Genode::get_page_size()) Board::Vmcb
struct alignas(get_page_size()) Board::Vmcb
:
Board::Vmcb_control_area,
public Genode::Mmio<Genode::get_page_size()>,
public Mmio<get_page_size()>,
Board::Vmcb_reserved_for_host,
Board::Vmcb_state_save_area
{
@ -143,14 +160,19 @@ struct alignas(Genode::get_page_size()) Board::Vmcb
Asid_host = 0,
};
Vmcb(Genode::uint32_t id);
void init(Genode::size_t cpu_id, void * table_ptr);
static Vmcb & host_vmcb(Genode::size_t cpu_id);
static Genode::addr_t dummy_msrpm();
void enforce_intercepts(Genode::uint32_t desired_primary = 0U, Genode::uint32_t desired_secondary = 0U);
static Genode::addr_t dummy_iopm();
Vmcb(uint32_t id);
static Vmcb & host_vmcb(size_t cpu_id);
static addr_t dummy_msrpm();
void enforce_intercepts(uint32_t desired_primary = 0U, uint32_t desired_secondary = 0U);
static addr_t dummy_iopm();
Genode::uint8_t reserved[Genode::get_page_size() -
void initialize(Kernel::Cpu &cpu, addr_t page_table_phys_addr);
void write_vcpu_state(Vcpu_state &state);
void read_vcpu_state(Vcpu_state &state);
void switch_world(addr_t vmcb_phys_addr, Core::Cpu::Context &regs);
uint64_t get_exitcode();
uint8_t reserved[get_page_size() -
sizeof(Board::Vmcb_state_save_area) -
Board::Vmcb_control_area::total_size];