hw: prepare SVM implementation for generic virtualization support

The SVM implementation did not lend itself to a runtime selection of the
x86 virtualization technology.

Encapsulate functionality in the VMCB class to facilitate adding support
for Intel's VMX.

Issue #5113
This commit is contained in:
Benjamin Lamowski 2024-02-07 11:26:14 +01:00 committed by Christian Helmuth
parent 3a88d133ed
commit 0d1716b07d
5 changed files with 364 additions and 365 deletions

View File

@ -31,9 +31,7 @@ namespace Board {
using Vm_page_table = Hw::Page_table; using Vm_page_table = Hw::Page_table;
using Vm_page_table_array = using Vm_page_table_array =
Vm_page_table::Allocator::Array<Kernel::DEFAULT_TRANSLATION_TABLE_MAX>; Vm_page_table::Allocator::Array<Kernel::DEFAULT_TRANSLATION_TABLE_MAX>;
struct Vcpu_context; struct Vcpu_context;
using Vcpu_data = Genode::Vcpu_data; using Vcpu_data = Genode::Vcpu_data;
using Vcpu_state = Genode::Vcpu_state; using Vcpu_state = Genode::Vcpu_state;
@ -42,7 +40,7 @@ namespace Board {
}; };
/* FIXME move into Vcpu_context as 'enum class' when we have C++20 */ /* FIXME move into Vcpu_context as 'enum class' when we have C++20 */
enum Platform_exitcodes : Genode::uint64_t { enum Platform_exitcodes : uint64_t {
EXIT_NPF = 0xfc, EXIT_NPF = 0xfc,
EXIT_INIT = 0xfd, EXIT_INIT = 0xfd,
EXIT_STARTUP = 0xfe, EXIT_STARTUP = 0xfe,
@ -64,19 +62,21 @@ namespace Kernel {
struct Board::Vcpu_context struct Board::Vcpu_context
{ {
Vcpu_context(unsigned id, void *vcpu_data_ptr); Vcpu_context(unsigned id, Vcpu_data &vcpu_data);
Vcpu_context(unsigned id, void *virt_area, addr_t vmcb_phys_addr); void initialize(Kernel::Cpu &cpu, addr_t table_phys_addr);
void initialize_svm(Kernel::Cpu &cpu, void *table);
void read_vcpu_state(Vcpu_state &state); void read_vcpu_state(Vcpu_state &state);
void write_vcpu_state(Vcpu_state &state); void write_vcpu_state(Vcpu_state &state);
Vmcb &vmcb; Vmcb *vmcb { nullptr };
addr_t vmcb_phys_addr;
Genode::Align_at<Core::Cpu::Context> regs; Genode::Align_at<Core::Cpu::Context> regs;
Vcpu_data &vcpu_data;
uint64_t tsc_aux_host = 0U; uint64_t tsc_aux_host = 0U;
uint64_t tsc_aux_guest = 0U; uint64_t tsc_aux_guest = 0U;
uint64_t exitcode = EXIT_INIT; uint64_t exitcode = EXIT_INIT;
Vcpu_context(const Vcpu_context &) = delete;
const Vcpu_context &operator=(Vcpu_context &) = delete;
}; };
#endif /* _CORE__SPEC__PC__VIRTUALIZATION__BOARD_H_ */ #endif /* _CORE__SPEC__PC__VIRTUALIZATION__BOARD_H_ */

View File

@ -53,53 +53,6 @@ namespace Hypervisor {
: "memory"); : "memory");
}; };
inline void switch_world(Call_arg guest_state, Call_arg regs,
Call_arg fpu_context)
{
asm volatile(
"fxrstor (%[fpu_context]);"
"mov %[guest_state], %%rax;"
"mov %[regs], %%rsp;"
"popq %%r8;"
"popq %%r9;"
"popq %%r10;"
"popq %%r11;"
"popq %%r12;"
"popq %%r13;"
"popq %%r14;"
"popq %%r15;"
"add $8, %%rsp;" /* don't pop rax */
"popq %%rbx;"
"popq %%rcx;"
"popq %%rdx;"
"popq %%rdi;"
"popq %%rsi;"
"popq %%rbp;"
"clgi;"
"sti;"
"vmload;"
"vmrun;"
"vmsave;"
"popq %%rax;" /* get the physical address of the host VMCB from
the stack */
"vmload;"
"stgi;" /* maybe enter the kernel to handle an external interrupt
that occured ... */
"nop;"
"cli;" /* ... otherwise, just disable interrupts again */
"pushq $256;" /* make the stack point to trapno, the right place
to jump to _kernel_entry. We push 256 because
this is outside of the valid range for interrupts
*/
"jmp _kernel_entry;" /* jump to _kernel_entry to save the
GPRs without breaking any */
:
: [regs] "r"(regs), [fpu_context] "r"(fpu_context),
[guest_state] "r"(guest_state)
: "rax", "memory");
}
} }
#endif /* _SPEC__PC__VIRTUALIZATION_HYPERVISOR_H_ */ #endif /* _SPEC__PC__VIRTUALIZATION_HYPERVISOR_H_ */

View File

@ -18,13 +18,14 @@
#include <spec/x86_64/virtualization/svm.h> #include <spec/x86_64/virtualization/svm.h>
#include <util/mmio.h> #include <util/mmio.h>
using Genode::addr_t; using namespace Genode;
using Kernel::Cpu; using Kernel::Cpu;
using Kernel::Vm; using Kernel::Vm;
using Board::Vmcb; using Board::Vmcb;
Vmcb::Vmcb(Genode::uint32_t id) Vmcb::Vmcb(uint32_t id)
: :
Mmio({(char *)this, Mmio::SIZE}) Mmio({(char *)this, Mmio::SIZE})
{ {
@ -40,9 +41,9 @@ Vmcb::Vmcb(Genode::uint32_t id)
} }
Vmcb & Vmcb::host_vmcb(Genode::size_t cpu_id) Vmcb & Vmcb::host_vmcb(size_t cpu_id)
{ {
static Genode::Constructible<Vmcb> host_vmcb[NR_OF_CPUS]; static Constructible<Vmcb> host_vmcb[NR_OF_CPUS];
if (!host_vmcb[cpu_id].constructed()) { if (!host_vmcb[cpu_id].constructed()) {
host_vmcb[cpu_id].construct(Vmcb::Asid_host); host_vmcb[cpu_id].construct(Vmcb::Asid_host);
@ -50,13 +51,21 @@ Vmcb & Vmcb::host_vmcb(Genode::size_t cpu_id)
return *host_vmcb[cpu_id]; return *host_vmcb[cpu_id];
} }
void Vmcb::initialize(Kernel::Cpu &cpu, addr_t page_table_phys_addr)
void Vmcb::init(Genode::size_t cpu_id, void * table_ptr)
{ {
using Cpu = Hw::X86_64_cpu; using Cpu = Hw::X86_64_cpu;
root_vmcb_phys = Core::Platform::core_phys_addr((addr_t) Cpu::Ia32_efer::access_t ia32_efer_msr = Cpu::Ia32_efer::read();
&host_vmcb(cpu_id)); Cpu::Ia32_efer::Svme::set(ia32_efer_msr, 1);
Cpu::Ia32_efer::write(ia32_efer_msr);
Cpu::Amd_vm_syscvg::access_t amd_vm_syscvg_msr =
Cpu::Amd_vm_syscvg::read();
Cpu::Amd_vm_syscvg::Nested_paging::set(amd_vm_syscvg_msr, 1);
Cpu::Amd_vm_syscvg::write(amd_vm_syscvg_msr);
root_vmcb_phys =
Core::Platform::core_phys_addr((addr_t)&host_vmcb(cpu.id()));
asm volatile ("vmsave" : : "a" (root_vmcb_phys) : "memory"); asm volatile ("vmsave" : : "a" (root_vmcb_phys) : "memory");
Cpu::Amd_vm_hsavepa::write((Cpu::Amd_vm_hsavepa::access_t) root_vmcb_phys); Cpu::Amd_vm_hsavepa::write((Cpu::Amd_vm_hsavepa::access_t) root_vmcb_phys);
@ -64,7 +73,7 @@ void Vmcb::init(Genode::size_t cpu_id, void * table_ptr)
* enable nested paging * enable nested paging
*/ */
write<Npt_control::Np_enable>(1); write<Npt_control::Np_enable>(1);
write<N_cr3>((Genode::addr_t) table_ptr); write<N_cr3>(page_table_phys_addr);
write<Int_control::V_intr_mask>(1); /* See 15.2 */ write<Int_control::V_intr_mask>(1); /* See 15.2 */
write<Intercept_ex::Vectors>(17); /* AC */ write<Intercept_ex::Vectors>(17); /* AC */
@ -76,7 +85,7 @@ void Vmcb::init(Genode::size_t cpu_id, void * table_ptr)
/* /*
* Enforce SVM intercepts * Enforce SVM intercepts
*/ */
void Vmcb::enforce_intercepts(Genode::uint32_t desired_primary, Genode::uint32_t desired_secondary) void Vmcb::enforce_intercepts(uint32_t desired_primary, uint32_t desired_secondary)
{ {
write<Vmcb::Intercept_misc1>( write<Vmcb::Intercept_misc1>(
desired_primary | desired_primary |
@ -103,13 +112,11 @@ void Vmcb::enforce_intercepts(Genode::uint32_t desired_primary, Genode::uint32_t
* AMD Vol.2 15.11: MSR Permissions Map * AMD Vol.2 15.11: MSR Permissions Map
* All set to 1 since we want all MSRs to be intercepted. * All set to 1 since we want all MSRs to be intercepted.
*/ */
Genode::addr_t Vmcb::dummy_msrpm() addr_t Vmcb::dummy_msrpm()
{ {
static Genode::Constructible<Board::Msrpm> msrpm; static Board::Msrpm msrpm;
if (!msrpm.constructed())
msrpm.construct();
return Core::Platform::core_phys_addr((addr_t) & *msrpm); return Core::Platform::core_phys_addr((addr_t) &msrpm);
} }
@ -117,311 +124,271 @@ Genode::addr_t Vmcb::dummy_msrpm()
* AMD Vol.2 15.10.1 I/O Permissions Map * AMD Vol.2 15.10.1 I/O Permissions Map
* All set to 1 since we want all IO port accesses to be intercepted. * All set to 1 since we want all IO port accesses to be intercepted.
*/ */
Genode::addr_t Vmcb::dummy_iopm() addr_t Vmcb::dummy_iopm()
{ {
static Genode::Constructible<Board::Iopm> iopm; static Board::Iopm iopm;
if (!iopm.constructed())
iopm.construct();
return Core::Platform::core_phys_addr((addr_t) &*iopm); return Core::Platform::core_phys_addr((addr_t) &iopm);
} }
Board::Msrpm::Msrpm() Board::Msrpm::Msrpm()
{ {
Genode::memset(this, 0xFF, sizeof(*this)); memset(this, 0xFF, sizeof(*this));
} }
Board::Iopm::Iopm() Board::Iopm::Iopm()
{ {
Genode::memset(this, 0xFF, sizeof(*this)); memset(this, 0xFF, sizeof(*this));
} }
void Board::Vcpu_context::initialize_svm(Kernel::Cpu & cpu, void * table) void Vmcb::write_vcpu_state(Vcpu_state &state)
{ {
using Cpu = Hw::X86_64_cpu; typedef Vcpu_state::Range Range;
Cpu::Ia32_efer::access_t ia32_efer_msr = Cpu::Ia32_efer::read(); state.ax.charge(rax);
Cpu::Ia32_efer::Svme::set(ia32_efer_msr, 1); state.ip.charge(rip);
Cpu::Ia32_efer::write(ia32_efer_msr);
Cpu::Amd_vm_syscvg::access_t amd_vm_syscvg_msr = Cpu::Amd_vm_syscvg::read();
Cpu::Amd_vm_syscvg::Nested_paging::set(amd_vm_syscvg_msr, 1);
Cpu::Amd_vm_syscvg::write(amd_vm_syscvg_msr);
vmcb.init(cpu.id(), table);
}
void Board::Vcpu_context::write_vcpu_state(Genode::Vcpu_state &state)
{
typedef Genode::Vcpu_state::Range Range;
state.discharge();
state.exit_reason = (unsigned) exitcode;
state.fpu.charge([&] (Genode::Vcpu_state::Fpu::State &fpu) {
memcpy(&fpu, (void *) regs->fpu_context(), sizeof(fpu));
});
state.ax.charge(vmcb.rax);
state.cx.charge(regs->rcx);
state.dx.charge(regs->rdx);
state.bx.charge(regs->rbx);
state.di.charge(regs->rdi);
state.si.charge(regs->rsi);
state.bp.charge(regs->rbp);
state.ip.charge(vmcb.rip);
/* /*
* SVM doesn't use ip_len, so just leave the old value. * SVM doesn't use ip_len, so just leave the old value.
* We still have to charge it when charging ip. * We still have to charge it when charging ip.
*/ */
state.ip_len.set_charged(); state.ip_len.set_charged();
state.flags.charge(vmcb.rflags); state.flags.charge(rflags);
state.sp.charge(vmcb.rsp); state.sp.charge(rsp);
state.dr7.charge(vmcb.dr7); state.dr7.charge(dr7);
state. r8.charge(regs->r8); state.cr0.charge(cr0);
state. r9.charge(regs->r9); state.cr2.charge(cr2);
state.r10.charge(regs->r10); state.cr3.charge(cr3);
state.r11.charge(regs->r11); state.cr4.charge(cr4);
state.r12.charge(regs->r12);
state.r13.charge(regs->r13);
state.r14.charge(regs->r14);
state.r15.charge(regs->r15);
state.cr0.charge(vmcb.cr0); state.cs.charge(cs);
state.cr2.charge(vmcb.cr2); state.ss.charge(ss);
state.cr3.charge(vmcb.cr3); state.es.charge(es);
state.cr4.charge(vmcb.cr4); state.ds.charge(ds);
state.fs.charge(fs);
state.gs.charge(gs);
state.tr.charge(tr);
state.ldtr.charge(ldtr);
state.gdtr.charge(Range { .limit = gdtr.limit, .base = gdtr.base });
state.cs.charge(vmcb.cs); state.idtr.charge(Range { .limit = idtr.limit, .base = idtr.base });
state.ss.charge(vmcb.ss);
state.es.charge(vmcb.es);
state.ds.charge(vmcb.ds);
state.fs.charge(vmcb.fs);
state.gs.charge(vmcb.gs);
state.tr.charge(vmcb.tr);
state.ldtr.charge(vmcb.ldtr);
state.gdtr.charge(Range { .limit = vmcb.gdtr.limit,
.base = vmcb.gdtr.base });
state.idtr.charge(Range { .limit = vmcb.idtr.limit, state.sysenter_cs.charge(sysenter_cs);
.base = vmcb.idtr.base }); state.sysenter_sp.charge(sysenter_esp);
state.sysenter_ip.charge(sysenter_eip);
state.sysenter_cs.charge(vmcb.sysenter_cs); state.qual_primary.charge(read<Vmcb::Exitinfo1>());
state.sysenter_sp.charge(vmcb.sysenter_esp); state.qual_secondary.charge(read<Vmcb::Exitinfo2>());
state.sysenter_ip.charge(vmcb.sysenter_eip);
state.qual_primary.charge(vmcb.read<Vmcb::Exitinfo1>()); /* Charging ctrl_primary and ctrl_secondary breaks Virtualbox 6 */
state.qual_secondary.charge(vmcb.read<Vmcb::Exitinfo2>());
state.ctrl_primary.charge(vmcb.read<Vmcb::Intercept_misc1>()); state.inj_info.charge(read<Vmcb::Exitintinfo>() & 0xFFFFFFFF);
state.ctrl_secondary.charge(vmcb.read<Vmcb::Intercept_misc2>()); state.inj_error.charge(
(uint32_t)(read<Vmcb::Exitintinfo>() >> 32));
state.inj_info.charge(vmcb.read<Vmcb::Exitintinfo>()& 0xFFFFFFFF);
state.inj_error.charge((Genode::uint32_t)
(vmcb.read<Vmcb::Exitintinfo>() >> 32));
/* Guest is in an interrupt shadow, see 15.21.5 */ /* Guest is in an interrupt shadow, see 15.21.5 */
state.intr_state.charge((unsigned) state.intr_state.charge(
vmcb.read<Vmcb::Int_control_ext::Int_shadow>()); (unsigned)read<Vmcb::Int_control_ext::Int_shadow>());
/* Guest activity state (actv) not used by SVM */ /* Guest activity state (actv) not used by SVM */
state.actv_state.set_charged(); state.actv_state.set_charged();
state.tsc.charge(Hw::Lapic::rdtsc()); state.tsc.charge(Hw::Lapic::rdtsc());
state.tsc_offset.charge(vmcb.read<Vmcb::Tsc_offset>()); state.tsc_offset.charge(read<Vmcb::Tsc_offset>());
tsc_aux_guest = Cpu::Ia32_tsc_aux::read(); state.efer.charge(efer);
state.tsc_aux.charge(tsc_aux_guest);
Cpu::Ia32_tsc_aux::write((Cpu::Ia32_tsc_aux::access_t) tsc_aux_host);
state.efer.charge(vmcb.efer);
/* pdpte not used by SVM */ /* pdpte not used by SVM */
state.star.charge(vmcb.star); state.star.charge(star);
state.lstar.charge(vmcb.lstar); state.lstar.charge(lstar);
state.cstar.charge(vmcb.cstar); state.cstar.charge(cstar);
state.fmask.charge(vmcb.sfmask); state.fmask.charge(sfmask);
state.kernel_gs_base.charge(vmcb.kernel_gs_base); state.kernel_gs_base.charge(kernel_gs_base);
/* Task Priority Register, see 15.24 */ /* Task Priority Register, see 15.24 */
state.tpr.charge((unsigned) vmcb.read<Vmcb::Int_control::V_tpr>()); state.tpr.charge((unsigned)read<Vmcb::Int_control::V_tpr>());
/* TPR threshold not used by SVM */ /* TPR threshold not used by SVM */
} }
void Board::Vcpu_context::read_vcpu_state(Genode::Vcpu_state &state) void Vmcb::read_vcpu_state(Vcpu_state &state)
{ {
if (state.ax.charged() || state.cx.charged() || if (state.ax.charged()) rax = state.ax.value();
state.dx.charged() || state.bx.charged()) { if (state.flags.charged()) rflags = state.flags.value();
vmcb.rax = state.ax.value(); if (state.sp.charged()) rsp = state.sp.value();
regs->rcx = state.cx.value(); if (state.ip.charged()) rip = state.ip.value();
regs->rdx = state.dx.value(); /* ip_len not used by SVM */
regs->rbx = state.bx.value(); if (state.dr7.charged()) dr7 = state.dr7.value();
}
if (state.bp.charged() || state.di.charged() || state.si.charged()) { if (state.cr0.charged()) cr0 = state.cr0.value();
regs->rdi = state.di.value(); if (state.cr2.charged()) cr2 = state.cr2.value();
regs->rsi = state.si.value(); if (state.cr3.charged()) cr3 = state.cr3.value();
regs->rbp = state.bp.value(); if (state.cr4.charged()) cr4 = state.cr4.value();
}
if (state.flags.charged()) { if (state.cs.charged()) cs = state.cs.value();
vmcb.rflags = state.flags.value(); if (state.ss.charged()) ss = state.ss.value();
}
if (state.sp.charged()) { if (state.es.charged()) es = state.es.value();
vmcb.rsp = state.sp.value(); if (state.ds.charged()) ds = state.ds.value();
}
if (state.ip.charged()) { if (state.fs.charged()) fs = state.fs.value();
vmcb.rip = state.ip.value(); if (state.gs.charged()) gs = state.gs.value();
/* ip_len not used by SVM */
}
if (state.dr7.charged()) { if (state.tr.charged()) tr = state.tr.value();
vmcb.dr7 = state.dr7.value(); if (state.ldtr.charged()) ldtr = state.ldtr.value();
}
if (state.r8 .charged() || state.r9 .charged() ||
state.r10.charged() || state.r11.charged() ||
state.r12.charged() || state.r13.charged() ||
state.r14.charged() || state.r15.charged()) {
regs->r8 = state.r8.value();
regs->r9 = state.r9.value();
regs->r10 = state.r10.value();
regs->r11 = state.r11.value();
regs->r12 = state.r12.value();
regs->r13 = state.r13.value();
regs->r14 = state.r14.value();
regs->r15 = state.r15.value();
}
if (state.cr0.charged() || state.cr2.charged() ||
state.cr3.charged() || state.cr4.charged()) {
vmcb.cr0 = state.cr0.value();
vmcb.cr2 = state.cr2.value();
vmcb.cr3 = state.cr3.value();
vmcb.cr4 = state.cr4.value();
}
if (state.cs.charged() || state.ss.charged()) {
vmcb.cs = state.cs.value();
vmcb.ss = state.ss.value();
}
if (state.es.charged() || state.ds.charged()) {
vmcb.es = state.es.value();
vmcb.ds = state.ds.value();
}
if (state.fs.charged() || state.gs.charged()) {
vmcb.fs = state.fs.value();
vmcb.gs = state.gs.value();
}
if (state.tr.charged()) {
vmcb.tr = state.tr.value();
}
if (state.ldtr.charged()) {
vmcb.ldtr = state.ldtr.value();
}
if (state.gdtr.charged()) { if (state.gdtr.charged()) {
vmcb.gdtr.limit = state.gdtr.value().limit; gdtr.limit = state.gdtr.value().limit;
vmcb.gdtr.base = state.gdtr.value().base; gdtr.base = state.gdtr.value().base;
} }
if (state.idtr.charged()) { if (state.idtr.charged()) {
vmcb.idtr.limit = state.idtr.value().limit; idtr.limit = state.idtr.value().limit;
vmcb.idtr.base = state.idtr.value().base; idtr.base = state.idtr.value().base;
} }
if (state.sysenter_cs.charged() || state.sysenter_sp.charged() || if (state.sysenter_cs.charged()) sysenter_cs = state.sysenter_cs.value();
state.sysenter_ip.charged()) { if (state.sysenter_sp.charged()) sysenter_esp = state.sysenter_sp.value();
vmcb.sysenter_cs = state.sysenter_cs.value(); if (state.sysenter_ip.charged()) sysenter_eip = state.sysenter_ip.value();
vmcb.sysenter_esp = state.sysenter_sp.value();
vmcb.sysenter_eip = state.sysenter_ip.value();
}
if (state.ctrl_primary.charged() || state.ctrl_secondary.charged()) { if (state.ctrl_primary.charged() || state.ctrl_secondary.charged()) {
vmcb.enforce_intercepts(state.ctrl_primary.value(), enforce_intercepts(state.ctrl_primary.value(),
state.ctrl_secondary.value()); state.ctrl_secondary.value());
} }
if (state.inj_info.charged() || state.inj_error.charged()) { if (state.inj_info.charged() || state.inj_error.charged()) {
/* Honor special signaling bit */ /* Honor special signaling bit */
if (state.inj_info.value() & 0x1000) { if (state.inj_info.value() & 0x1000) {
vmcb.write<Vmcb::Int_control::V_irq>(1); write<Vmcb::Int_control::V_irq>(1);
vmcb.write<Vmcb::Int_control::V_ign_tpr>(1); write<Vmcb::Int_control::V_ign_tpr>(1);
vmcb.write<Vmcb::Intercept_misc1::Vintr>(1); write<Vmcb::Intercept_misc1::Vintr>(1);
} else { } else {
vmcb.write<Vmcb::Int_control::V_irq>(0); write<Vmcb::Int_control::V_irq>(0);
vmcb.write<Vmcb::Int_control::V_ign_tpr>(0); write<Vmcb::Int_control::V_ign_tpr>(0);
vmcb.write<Vmcb::Intercept_misc1::Vintr>(0); write<Vmcb::Intercept_misc1::Vintr>(0);
} }
write<Vmcb::Eventinj>(
vmcb.write<Vmcb::Eventinj>(
/* Filter out special signaling bits */ /* Filter out special signaling bits */
(state.inj_info.value() & (state.inj_info.value() &
(Genode::uint32_t) ~0x3000) | (uint32_t) ~0x3000) |
(((Genode::uint64_t) state.inj_error.value()) << 32) (((uint64_t) state.inj_error.value()) << 32)
); );
} }
if (state.intr_state.charged()) { if (state.intr_state.charged()) {
vmcb.write<Vmcb::Int_control_ext::Int_shadow>(state.intr_state.value()); write<Vmcb::Int_control_ext::Int_shadow>(
state.intr_state.value());
} }
/* Guest activity state (actv) not used by SVM */ /* Guest activity state (actv) not used by SVM */
if (state.tsc_offset.charged()) { if (state.tsc_offset.charged()) {
/* state.tsc not used by SVM */ /* state.tsc not used by SVM */
vmcb.write<Vmcb::Tsc_offset>(vmcb.read<Vmcb::Tsc_offset>() + write<Vmcb::Tsc_offset>(read<Vmcb::Tsc_offset>() +
state.tsc_offset.value()); state.tsc_offset.value());
} }
tsc_aux_host = Cpu::Ia32_tsc_aux::read();
if (state.tsc_aux.charged()) {
tsc_aux_guest = state.tsc_aux.value();
}
Cpu::Ia32_tsc_aux::write((Cpu::Ia32_tsc_aux::access_t) tsc_aux_guest);
if (state.efer.charged()) { if (state.efer.charged()) {
vmcb.efer = state.efer.value(); efer = state.efer.value();
} }
/* pdpte not used by SVM */ /* pdpte not used by SVM */
if (state.star.charged() || state.lstar.charged() || if (state.star.charged()) star = state.star.value();
state.cstar.charged() || state.fmask.charged() || if (state.cstar.charged()) cstar = state.cstar.value();
state.kernel_gs_base.charged()) { if (state.lstar.charged()) lstar = state.lstar.value();
vmcb.star = state.star.value(); if (state.fmask.charged()) sfmask = state.fmask.value();
vmcb.cstar = state.cstar.value(); if (state.kernel_gs_base.charged()) kernel_gs_base = state.kernel_gs_base.value();
vmcb.lstar = state.lstar.value();
vmcb.sfmask = state.lstar.value();
vmcb.kernel_gs_base = state.kernel_gs_base.value();
}
if (state.tpr.charged()) { if (state.tpr.charged()) {
vmcb.write<Vmcb::Int_control::V_tpr>(state.tpr.value()); write<Vmcb::Int_control::V_tpr>(state.tpr.value());
/* TPR threshold not used on AMD */ /* TPR threshold not used on AMD */
} }
}
if (state.fpu.charged()) {
state.fpu.with_state([&] (Genode::Vcpu_state::Fpu::State const &fpu) { uint64_t Vmcb::get_exitcode()
memcpy((void *) regs->fpu_context(), &fpu, sizeof(fpu)); {
}); enum Svm_exitcodes : uint64_t
} {
SVM_EXIT_INVALID = -1ULL,
SVM_VMEXIT_INTR = 0x60,
SVM_VMEXIT_NPF = 0x400,
};
uint64_t exitcode = read<Vmcb::Exitcode>();
switch (exitcode) {
case SVM_EXIT_INVALID:
error("VM: invalid SVM state!");
break;
case 0x40 ... 0x5f:
error("VM: unhandled SVM exception ",
Hex(exitcode));
break;
case SVM_VMEXIT_INTR:
exitcode = EXIT_PAUSED;
break;
case SVM_VMEXIT_NPF:
exitcode = EXIT_NPF;
break;
default:
break;
}
return exitcode;
}
void Vmcb::switch_world(addr_t vmcb_phys_addr, Core::Cpu::Context &regs)
{
/*
* We push the host context's physical address to trapno so that
* we can pop it later
*/
regs.trapno = root_vmcb_phys;
asm volatile(
"fxrstor (%[fpu_context]);"
"mov %[guest_state], %%rax;"
"mov %[regs], %%rsp;"
"popq %%r8;"
"popq %%r9;"
"popq %%r10;"
"popq %%r11;"
"popq %%r12;"
"popq %%r13;"
"popq %%r14;"
"popq %%r15;"
"add $8, %%rsp;" /* don't pop rax */
"popq %%rbx;"
"popq %%rcx;"
"popq %%rdx;"
"popq %%rdi;"
"popq %%rsi;"
"popq %%rbp;"
"clgi;"
"sti;"
"vmload;"
"vmrun;"
"vmsave;"
"popq %%rax;" /* get the physical address of the host VMCB from
the stack */
"vmload;"
"stgi;" /* maybe enter the kernel to handle an external interrupt
that occured ... */
"nop;"
"cli;" /* ... otherwise, just disable interrupts again */
"pushq %[trap_vmexit];" /* make the stack point to trapno, the right place
to jump to _kernel_entry. We push 256 because
this is outside of the valid range for interrupts
*/
"jmp _kernel_entry;" /* jump to _kernel_entry to save the
GPRs without breaking any */
:
: [regs] "r"(&regs.r8), [fpu_context] "r"(regs.fpu_context()),
[guest_state] "r"(vmcb_phys_addr),
[trap_vmexit] "i"(TRAP_VMEXIT)
: "rax", "memory");
} }

View File

@ -29,7 +29,8 @@
#include <virtualization/svm.h> #include <virtualization/svm.h>
#include <hw/spec/x86_64/x86_64.h> #include <hw/spec/x86_64/x86_64.h>
using Genode::addr_t; using namespace Genode;
using Kernel::Cpu; using Kernel::Cpu;
using Kernel::Vm; using Kernel::Vm;
using Board::Vmcb; using Board::Vmcb;
@ -37,7 +38,7 @@ using Board::Vmcb;
Vm::Vm(Irq::Pool & user_irq_pool, Vm::Vm(Irq::Pool & user_irq_pool,
Cpu & cpu, Cpu & cpu,
Genode::Vcpu_data & data, Vcpu_data & data,
Kernel::Signal_context & context, Kernel::Signal_context & context,
Identity & id) Identity & id)
: :
@ -47,7 +48,7 @@ Vm::Vm(Irq::Pool & user_irq_pool,
_state(*data.vcpu_state), _state(*data.vcpu_state),
_context(context), _context(context),
_id(id), _id(id),
_vcpu_context(id.id, data.virt_area, data.phys_addr) _vcpu_context(id.id, data)
{ {
affinity(cpu); affinity(cpu);
} }
@ -73,13 +74,8 @@ void Vm::proceed(Cpu & cpu)
Cpu::Ia32_tsc_aux::write( Cpu::Ia32_tsc_aux::write(
(Cpu::Ia32_tsc_aux::access_t)_vcpu_context.tsc_aux_guest); (Cpu::Ia32_tsc_aux::access_t)_vcpu_context.tsc_aux_guest);
/* _vcpu_context.vmcb->switch_world(_vcpu_context.vcpu_data.phys_addr,
* We push the host context's physical address to trapno so that *_vcpu_context.regs);
* we can pop it later
*/
_vcpu_context.regs->trapno = _vcpu_context.vmcb.root_vmcb_phys;
Hypervisor::switch_world( _vcpu_context.vmcb_phys_addr,
(addr_t)&_vcpu_context.regs->r8, _vcpu_context.regs->fpu_context());
/* /*
* This will fall into an interrupt or otherwise jump into * This will fall into an interrupt or otherwise jump into
* _kernel_entry * _kernel_entry
@ -90,7 +86,6 @@ void Vm::proceed(Cpu & cpu)
void Vm::exception(Cpu & cpu) void Vm::exception(Cpu & cpu)
{ {
using namespace Board; using namespace Board;
using Genode::Cpu_state;
switch (_vcpu_context.regs->trapno) { switch (_vcpu_context.regs->trapno) {
case Cpu_state::INTERRUPTS_START ... Cpu_state::INTERRUPTS_END: case Cpu_state::INTERRUPTS_START ... Cpu_state::INTERRUPTS_END:
@ -103,7 +98,7 @@ void Vm::exception(Cpu & cpu)
/* exception method was entered without exception */ /* exception method was entered without exception */
break; break;
default: default:
Genode::error("VM: triggered unknown exception ", error("VM: triggered unknown exception ",
_vcpu_context.regs->trapno, _vcpu_context.regs->trapno,
" with error code ", _vcpu_context.regs->errcode, " with error code ", _vcpu_context.regs->errcode,
" at ip=", " at ip=",
@ -113,14 +108,10 @@ void Vm::exception(Cpu & cpu)
return; return;
}; };
enum Svm_exitcodes : Genode::uint64_t {
VMEXIT_INVALID = -1ULL,
VMEXIT_INTR = 0x60,
VMEXIT_NPF = 0x400,
};
if (_vcpu_context.exitcode == EXIT_INIT) { if (_vcpu_context.exitcode == EXIT_INIT) {
_vcpu_context.initialize_svm(cpu, _id.table); addr_t table_phys_addr =
reinterpret_cast<addr_t>(_id.table);
_vcpu_context.initialize(cpu, table_phys_addr);
_vcpu_context.tsc_aux_host = cpu.id(); _vcpu_context.tsc_aux_host = cpu.id();
_vcpu_context.exitcode = EXIT_STARTUP; _vcpu_context.exitcode = EXIT_STARTUP;
_pause_vcpu(); _pause_vcpu();
@ -128,26 +119,11 @@ void Vm::exception(Cpu & cpu)
return; return;
} }
_vcpu_context.exitcode = _vcpu_context.vmcb.read<Vmcb::Exitcode>(); _vcpu_context.exitcode = _vcpu_context.vmcb->get_exitcode();
switch (_vcpu_context.exitcode) { if (_vcpu_context.exitcode != EXIT_PAUSED) {
case VMEXIT_INVALID:
Genode::error("Vm::exception: invalid SVM state!");
return;
case 0x40 ... 0x5f:
Genode::error("Vm::exception: unhandled SVM exception ",
Genode::Hex(_vcpu_context.exitcode));
return;
case VMEXIT_INTR:
_vcpu_context.exitcode = EXIT_PAUSED;
return;
case VMEXIT_NPF:
_vcpu_context.exitcode = EXIT_NPF;
[[fallthrough]];
default:
_pause_vcpu(); _pause_vcpu();
_context.submit(1); _context.submit(1);
return;
} }
} }
@ -174,13 +150,94 @@ void Vm::_sync_from_vmm()
} }
Board::Vcpu_context::Vcpu_context(unsigned id, Board::Vcpu_context::Vcpu_context(unsigned id, Vcpu_data &vcpu_data)
void *virt_area,
addr_t vmcb_phys_addr)
: :
vmcb(*Genode::construct_at<Vmcb>(virt_area, id)), regs(1),
vmcb_phys_addr(vmcb_phys_addr), vcpu_data(vcpu_data)
regs(1)
{ {
vmcb = construct_at<Vmcb>(vcpu_data.virt_area, id);
regs->trapno = TRAP_VMEXIT; regs->trapno = TRAP_VMEXIT;
} }
void Board::Vcpu_context::read_vcpu_state(Vcpu_state &state)
{
vmcb->read_vcpu_state(state);
if (state.cx.charged() || state.dx.charged() || state.bx.charged()) {
regs->rax = state.ax.value();
regs->rcx = state.cx.value();
regs->rdx = state.dx.value();
regs->rbx = state.bx.value();
}
if (state.bp.charged() || state.di.charged() || state.si.charged()) {
regs->rdi = state.di.value();
regs->rsi = state.si.value();
regs->rbp = state.bp.value();
}
if (state.r8 .charged() || state.r9 .charged() ||
state.r10.charged() || state.r11.charged() ||
state.r12.charged() || state.r13.charged() ||
state.r14.charged() || state.r15.charged()) {
regs->r8 = state.r8.value();
regs->r9 = state.r9.value();
regs->r10 = state.r10.value();
regs->r11 = state.r11.value();
regs->r12 = state.r12.value();
regs->r13 = state.r13.value();
regs->r14 = state.r14.value();
regs->r15 = state.r15.value();
}
if (state.fpu.charged()) {
state.fpu.with_state(
[&](Vcpu_state::Fpu::State const &fpu) {
memcpy((void *) regs->fpu_context(), &fpu, sizeof(fpu));
});
}
}
void Board::Vcpu_context::write_vcpu_state(Vcpu_state &state)
{
state.discharge();
state.exit_reason = (unsigned) exitcode;
state.fpu.charge([&](Vcpu_state::Fpu::State &fpu) {
memcpy(&fpu, (void *) regs->fpu_context(), sizeof(fpu));
});
/* SVM will overwrite rax but VMX doesn't. */
state.ax.charge(regs->rax);
state.cx.charge(regs->rcx);
state.dx.charge(regs->rdx);
state.bx.charge(regs->rbx);
state.di.charge(regs->rdi);
state.si.charge(regs->rsi);
state.bp.charge(regs->rbp);
state.r8.charge(regs->r8);
state.r9.charge(regs->r9);
state.r10.charge(regs->r10);
state.r11.charge(regs->r11);
state.r12.charge(regs->r12);
state.r13.charge(regs->r13);
state.r14.charge(regs->r14);
state.r15.charge(regs->r15);
state.tsc.charge(Hw::Lapic::rdtsc());
tsc_aux_guest = Cpu::Ia32_tsc_aux::read();
state.tsc_aux.charge(tsc_aux_guest);
Cpu::Ia32_tsc_aux::write((Cpu::Ia32_tsc_aux::access_t) tsc_aux_host);
vmcb->write_vcpu_state(state);
}
void Board::Vcpu_context::initialize(Kernel::Cpu &cpu, addr_t table_phys_addr)
{
vmcb->initialize(cpu, table_phys_addr);
}

View File

@ -16,11 +16,28 @@
#include <base/internal/page_size.h> #include <base/internal/page_size.h>
#include <base/stdint.h> #include <base/stdint.h>
#include <cpu.h>
#include <cpu/vcpu_state.h> #include <cpu/vcpu_state.h>
#include <cpu/vcpu_state_virtualization.h> #include <cpu/vcpu_state_virtualization.h>
#include <util/mmio.h> #include <util/mmio.h>
#include <util/string.h> #include <util/string.h>
using Genode::addr_t;
using Genode::size_t;
using Genode::uint8_t;
using Genode::uint32_t;
using Genode::uint64_t;
using Genode::Mmio;
using Genode::Vcpu_data;
using Genode::Vcpu_state;
using Genode::get_page_size;
using Genode::memset;
namespace Kernel
{
class Cpu;
}
namespace Board namespace Board
{ {
struct Msrpm; struct Msrpm;
@ -32,18 +49,18 @@ namespace Board
} }
struct alignas(Genode::get_page_size()) Board::Msrpm struct alignas(get_page_size()) Board::Msrpm
{ {
Genode::uint8_t pad[8192]; uint8_t pad[8192];
Msrpm(); Msrpm();
}; };
struct struct
alignas(Genode::get_page_size()) alignas(get_page_size())
Board::Iopm Board::Iopm
{ {
Genode::uint8_t pad[12288]; uint8_t pad[12288];
Iopm(); Iopm();
}; };
@ -55,17 +72,17 @@ Board::Iopm
*/ */
struct Board::Vmcb_control_area struct Board::Vmcb_control_area
{ {
enum : Genode::size_t { enum : size_t {
total_size = 1024U, total_size = 1024U,
used_guest_size = 0x3E0U used_guest_size = 0x3E0U
}; };
/* The control area is padded and used via Mmio-like accesses. */ /* The control area is padded and used via Mmio-like accesses. */
Genode::uint8_t control_area[used_guest_size]; uint8_t control_area[used_guest_size];
Vmcb_control_area() Vmcb_control_area()
{ {
Genode::memset((void *) this, 0, sizeof(Vmcb_control_area)); memset((void *) this, 0, sizeof(Vmcb_control_area));
} }
}; };
@ -77,10 +94,10 @@ struct Board::Vmcb_control_area
struct Board::Vmcb_reserved_for_host struct Board::Vmcb_reserved_for_host
{ {
/* 64bit used by the inherited Mmio class here */ /* 64bit used by the inherited Mmio class here */
Genode::addr_t root_vmcb_phys = 0U; addr_t root_vmcb_phys = 0U;
}; };
static_assert(Board::Vmcb_control_area::total_size - static_assert(Board::Vmcb_control_area::total_size -
sizeof(Board::Vmcb_control_area) - sizeof(Genode::Mmio<0>) - sizeof(Board::Vmcb_control_area) - sizeof(Mmio<0>) -
sizeof(Board::Vmcb_reserved_for_host) == sizeof(Board::Vmcb_reserved_for_host) ==
0); 0);
@ -89,28 +106,28 @@ static_assert(Board::Vmcb_control_area::total_size -
*/ */
struct Board::Vmcb_state_save_area struct Board::Vmcb_state_save_area
{ {
typedef Genode::Vcpu_state::Segment Segment; typedef Vcpu_state::Segment Segment;
Segment es, cs, ss, ds, fs, gs, gdtr, ldtr, idtr, tr; Segment es, cs, ss, ds, fs, gs, gdtr, ldtr, idtr, tr;
Genode::uint8_t reserved1[43]; uint8_t reserved1[43];
Genode::uint8_t cpl; uint8_t cpl;
Genode::uint8_t reserved2[4]; uint8_t reserved2[4];
Genode::uint64_t efer; uint64_t efer;
Genode::uint8_t reserved3[112]; uint8_t reserved3[112];
Genode::uint64_t cr4, cr3, cr0, dr7, dr6, rflags, rip; uint64_t cr4, cr3, cr0, dr7, dr6, rflags, rip;
Genode::uint8_t reserved4[88]; uint8_t reserved4[88];
Genode::uint64_t rsp; uint64_t rsp;
Genode::uint64_t s_cet, ssp, isst_addr; uint64_t s_cet, ssp, isst_addr;
Genode::uint64_t rax, star, lstar, cstar, sfmask, kernel_gs_base; uint64_t rax, star, lstar, cstar, sfmask, kernel_gs_base;
Genode::uint64_t sysenter_cs, sysenter_esp, sysenter_eip, cr2; uint64_t sysenter_cs, sysenter_esp, sysenter_eip, cr2;
Genode::uint8_t reserved5[32]; uint8_t reserved5[32];
Genode::uint64_t g_pat; uint64_t g_pat;
Genode::uint64_t dbgctl; uint64_t dbgctl;
Genode::uint64_t br_from; uint64_t br_from;
Genode::uint64_t br_to; uint64_t br_to;
Genode::uint64_t lastexcpfrom; uint64_t lastexcpfrom;
Genode::uint8_t reserved6[72]; uint8_t reserved6[72];
Genode::uint64_t spec_ctrl; uint64_t spec_ctrl;
} __attribute__((packed)); } __attribute__((packed));
@ -132,10 +149,10 @@ struct Board::Vmcb_state_save_area
* In total, this allows Register type access to the VMCB control area and easy * In total, this allows Register type access to the VMCB control area and easy
* direct access to the VMCB state save area. * direct access to the VMCB state save area.
*/ */
struct alignas(Genode::get_page_size()) Board::Vmcb struct alignas(get_page_size()) Board::Vmcb
: :
Board::Vmcb_control_area, Board::Vmcb_control_area,
public Genode::Mmio<Genode::get_page_size()>, public Mmio<get_page_size()>,
Board::Vmcb_reserved_for_host, Board::Vmcb_reserved_for_host,
Board::Vmcb_state_save_area Board::Vmcb_state_save_area
{ {
@ -143,14 +160,19 @@ struct alignas(Genode::get_page_size()) Board::Vmcb
Asid_host = 0, Asid_host = 0,
}; };
Vmcb(Genode::uint32_t id); Vmcb(uint32_t id);
void init(Genode::size_t cpu_id, void * table_ptr); static Vmcb & host_vmcb(size_t cpu_id);
static Vmcb & host_vmcb(Genode::size_t cpu_id); static addr_t dummy_msrpm();
static Genode::addr_t dummy_msrpm(); void enforce_intercepts(uint32_t desired_primary = 0U, uint32_t desired_secondary = 0U);
void enforce_intercepts(Genode::uint32_t desired_primary = 0U, Genode::uint32_t desired_secondary = 0U); static addr_t dummy_iopm();
static Genode::addr_t dummy_iopm();
Genode::uint8_t reserved[Genode::get_page_size() - void initialize(Kernel::Cpu &cpu, addr_t page_table_phys_addr);
void write_vcpu_state(Vcpu_state &state);
void read_vcpu_state(Vcpu_state &state);
void switch_world(addr_t vmcb_phys_addr, Core::Cpu::Context &regs);
uint64_t get_exitcode();
uint8_t reserved[get_page_size() -
sizeof(Board::Vmcb_state_save_area) - sizeof(Board::Vmcb_state_save_area) -
Board::Vmcb_control_area::total_size]; Board::Vmcb_control_area::total_size];