diff --git a/repos/base-hw/lib/mk/spec/x86_64/core-hw-pc.mk b/repos/base-hw/lib/mk/spec/x86_64/core-hw-pc.mk index 0856786fbb..b7133939d5 100644 --- a/repos/base-hw/lib/mk/spec/x86_64/core-hw-pc.mk +++ b/repos/base-hw/lib/mk/spec/x86_64/core-hw-pc.mk @@ -21,6 +21,7 @@ SRC_CC += kernel/cpu_mp.cc SRC_CC += kernel/vm_thread_on.cc SRC_CC += spec/x86_64/virtualization/kernel/vm.cc SRC_CC += spec/x86_64/virtualization/kernel/svm.cc +SRC_CC += spec/x86_64/virtualization/kernel/vmx.cc SRC_CC += spec/x86_64/virtualization/vm_session_component.cc SRC_CC += vm_session_common.cc SRC_CC += vm_session_component.cc diff --git a/repos/base-hw/src/core/spec/x86_64/platform_support.cc b/repos/base-hw/src/core/spec/x86_64/platform_support.cc index 68dd4d3820..ef14783784 100644 --- a/repos/base-hw/src/core/spec/x86_64/platform_support.cc +++ b/repos/base-hw/src/core/spec/x86_64/platform_support.cc @@ -1,11 +1,12 @@ /* * \brief Platform implementations specific for x86_64 * \author Reto Buerki + * \author Benjamin Lamowski * \date 2015-05-04 */ /* - * Copyright (C) 2015-2017 Genode Labs GmbH + * Copyright (C) 2015-2024 Genode Labs GmbH * * This file is part of the Genode OS framework, which is distributed * under the terms of the GNU Affero General Public License version 3. @@ -55,7 +56,7 @@ void Platform::_init_additional_platform_info(Xml_generator &xml) xml.node("hardware", [&]() { xml.node("features", [&] () { xml.attribute("svm", Hw::Virtualization_support::has_svm()); - xml.attribute("vmx", false); + xml.attribute("vmx", Hw::Virtualization_support::has_vmx()); }); xml.node("tsc", [&]() { xml.attribute("invariant", Hw::Lapic::invariant_tsc()); diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/board.h b/repos/base-hw/src/core/spec/x86_64/virtualization/board.h index 265abae512..dadb7d8c60 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/board.h +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/board.h @@ -18,10 +18,12 @@ #include #include +#include #include #include +#include #include -#include +#include using Genode::addr_t; using Genode::uint64_t; @@ -46,7 +48,7 @@ namespace Board { EXIT_PAUSED = 0xff, }; - enum Custom_trapnos { + enum Custom_trapnos : uint64_t { TRAP_VMEXIT = 256, TRAP_VMSKIP = 257, }; @@ -72,14 +74,24 @@ struct Board::Vcpu_context uint64_t tsc_aux_host = 0U; uint64_t tsc_aux_guest = 0U; - uint64_t exitcode = EXIT_INIT; + uint64_t exit_reason = EXIT_INIT; static Virt_interface &detect_virtualization(Vcpu_data &vcpu_data, unsigned id) { - return *Genode::construct_at( - vcpu_data.virt_area, - vcpu_data, id); + if (Hw::Virtualization_support::has_svm()) + return *Genode::construct_at( + vcpu_data.virt_area, + vcpu_data, + id); + else if (Hw::Virtualization_support::has_vmx()) { + return *Genode::construct_at( + vcpu_data.virt_area, + vcpu_data); + } else { + Genode::error( "No virtualization support detected."); + throw Core::Service_denied(); + } } }; diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc index 30a0a1446d..750d8f2d5f 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vm.cc @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -24,9 +23,10 @@ #include #include +#include #include #include -#include +#include using namespace Genode; @@ -62,7 +62,7 @@ void Vm::proceed(Cpu & cpu) using namespace Board; cpu.switch_to(*_vcpu_context.regs); - if (_vcpu_context.exitcode == EXIT_INIT) { + if (_vcpu_context.exit_reason == EXIT_INIT) { _vcpu_context.regs->trapno = TRAP_VMSKIP; Hypervisor::restore_state_for_entry((addr_t)&_vcpu_context.regs->r8, _vcpu_context.regs->fpu_context()); @@ -75,8 +75,11 @@ void Vm::proceed(Cpu & cpu) _vcpu_context.virt.switch_world(*_vcpu_context.regs); /* * This will fall into an interrupt or otherwise jump into - * _kernel_entry + * _kernel_entry. If VMX encountered a severe error condition, + * it will print an error message and regularly return from the world + * switch. In this case, just remove the vCPU thread from the scheduler. */ + _pause_vcpu(); } @@ -84,15 +87,58 @@ void Vm::exception(Cpu & cpu) { using namespace Board; + bool pause = false; + switch (_vcpu_context.regs->trapno) { + case TRAP_VMEXIT: + _vcpu_context.exit_reason = + _vcpu_context.virt.handle_vm_exit(); + /* + * If handle_vm_exit() returns EXIT_PAUSED, the vCPU has + * exited due to a host interrupt. The exit reason is + * set to EXIT_PAUSED so that if the VMM queries the + * vCPU state while the vCPU is stopped, it is clear + * that it does not need to handle a synchronous vCPU exit. + * + * VMX jumps directly to __kernel_entry when exiting + * guest mode and skips the interrupt vectors, therefore + * trapno will not be set to the host interrupt and we + * have to explicitly handle interrupts here. + * + * SVM on the other hand will service the host interrupt + * after the stgi instruction (see + * AMD64 Architecture Programmer's Manual Vol 2 + * 15.17 Global Interrupt Flag, STGI and CLGI Instructions) + * and will jump to the interrupt vector, setting trapno + * to the host interrupt. This means the exception + * handler should actually skip this case branch, which + * is fine because _vcpu_context.exit_reason is set to + * EXIT_PAUSED by default, so a VMM querying the vCPU + * state will still get the right value. + * + * For any other exit reason, we exclude this vCPU + * thread from being scheduled and signal the VMM that + * it needs to handle an exit. + */ + if (_vcpu_context.exit_reason == EXIT_PAUSED) + _interrupt(_user_irq_pool, cpu.id()); + else + pause = true; + break; case Cpu_state::INTERRUPTS_START ... Cpu_state::INTERRUPTS_END: _interrupt(_user_irq_pool, cpu.id()); break; - case TRAP_VMEXIT: - /* exception method was entered because of a VMEXIT */ - break; case TRAP_VMSKIP: - /* exception method was entered without exception */ + /* vCPU is running for the first time */ + _vcpu_context.initialize(cpu, + reinterpret_cast(_id.table)); + _vcpu_context.tsc_aux_host = cpu.id(); + /* + * We set the artificial startup exit code, stop the + * vCPU thread and ask the VMM to handle it. + */ + _vcpu_context.exit_reason = EXIT_STARTUP; + pause = true; break; default: error("VM: triggered unknown exception ", @@ -102,25 +148,12 @@ void Vm::exception(Cpu & cpu) (void *)_vcpu_context.regs->ip, " sp=", (void *)_vcpu_context.regs->sp); _pause_vcpu(); - return; + break; }; - if (_vcpu_context.exitcode == EXIT_INIT) { - addr_t table_phys_addr = - reinterpret_cast(_id.table); - _vcpu_context.initialize(cpu, table_phys_addr); - _vcpu_context.tsc_aux_host = cpu.id(); - _vcpu_context.exitcode = EXIT_STARTUP; - _pause_vcpu(); - _context.submit(1); - return; - } - - _vcpu_context.exitcode = _vcpu_context.virt.handle_vm_exit(); - - if (_vcpu_context.exitcode != EXIT_PAUSED) { - _pause_vcpu(); - _context.submit(1); + if (pause == true) { + _pause_vcpu(); + _context.submit(1); } } @@ -133,14 +166,14 @@ void Vm::_sync_to_vmm() * Set exit code so that if _run() was not called after an exit, the * next exit due to a signal will be interpreted as PAUSE request. */ - _vcpu_context.exitcode = Board::EXIT_PAUSED; + _vcpu_context.exit_reason = Board::EXIT_PAUSED; } void Vm::_sync_from_vmm() { /* first run() will skip through to issue startup exit */ - if (_vcpu_context.exitcode == Board::EXIT_INIT) + if (_vcpu_context.exit_reason == Board::EXIT_INIT) return; _vcpu_context.read_vcpu_state(_state); @@ -197,7 +230,7 @@ void Board::Vcpu_context::read_vcpu_state(Vcpu_state &state) void Board::Vcpu_context::write_vcpu_state(Vcpu_state &state) { state.discharge(); - state.exit_reason = (unsigned) exitcode; + state.exit_reason = (unsigned) exit_reason; state.fpu.charge([&](Vcpu_state::Fpu::State &fpu) { memcpy(&fpu, (void *) regs->fpu_context(), sizeof(fpu)); diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vmx.cc b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vmx.cc new file mode 100644 index 0000000000..2807f346c2 --- /dev/null +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/kernel/vmx.cc @@ -0,0 +1,924 @@ +/* + * VMX virtualization + * \author Benjamin Lamowski + * \date 2023-10-04 + */ + +/* + * Copyright (C) 2023-2024 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using Genode::addr_t; +using Kernel::Cpu; +using Kernel::Vm; +using Board::Vmcs; +using Board::Vmcs_buf; + +extern "C" +{ + extern Genode::addr_t _kernel_entry; +} + +Genode::uint32_t Vmcs::system_rev = 0U; +Genode::uint32_t Vmcs::pinbased_allowed_0 = 0U; +Genode::uint32_t Vmcs::pinbased_allowed_1 = 0U; +Genode::uint32_t Vmcs::pri_exit_allowed_0 = 0U; +Genode::uint32_t Vmcs::pri_exit_allowed_1 = 0U; +Genode::uint32_t Vmcs::vm_entry_allowed_0 = 0U; +Genode::uint32_t Vmcs::vm_entry_allowed_1 = 0U; +Genode::uint32_t Vmcs::pri_procbased_allowed_0 = 0U; +Genode::uint32_t Vmcs::pri_procbased_allowed_1 = 0U; +Genode::uint32_t Vmcs::sec_procbased_allowed_0 = 0U; +Genode::uint32_t Vmcs::sec_procbased_allowed_1 = 0U; +Genode::uint64_t Vmcs::cr0_fixed0 = 0U; +Genode::uint64_t Vmcs::cr0_fixed1 = 0U; +Genode::uint64_t Vmcs::cr0_mask = 0U; +Genode::uint64_t Vmcs::cr4_fixed0 = 0U; +Genode::uint64_t Vmcs::cr4_fixed1 = 0U; +Genode::uint64_t Vmcs::cr4_mask = 0U; +extern int __idt; + +Vmcs * current_vmcs[Hw::Pc_board::NR_OF_CPUS] = { nullptr }; + +Vmcs_buf::Vmcs_buf(Genode::uint32_t system_rev) +{ + Genode::memset((void *) this, 0, sizeof(Vmcs_buf)); + rev = system_rev; +} + +Vmcs::Vmcs(Genode::Vcpu_data &vcpu_data) +: + Board::Virt_interface(vcpu_data) +{ + if (!system_rev) + setup_vmx_info(); + + Genode::construct_at((void *)(((addr_t) vcpu_data.virt_area) + + get_page_size()), system_rev); +} + + +void Vmcs::construct_host_vmcs() +{ + static Genode::Constructible host_vmcs_buf[NR_OF_CPUS]; + + if (!host_vmcs_buf[_cpu_id].constructed()) { + host_vmcs_buf[_cpu_id].construct(system_rev); + + Genode::addr_t host_vmcs_phys = + Core::Platform::core_phys_addr( + (addr_t)& host_vmcs_buf[_cpu_id]); + + vmxon(host_vmcs_phys); + } +} + +/* + * Setup static VMX information. This only works well as long as Intel's E and P + * cores report the same feature set. + */ +void Vmcs::setup_vmx_info() +{ + using Cpu = Hw::X86_64_cpu; + + + /* Get revision */ + Cpu::Ia32_vmx_basic::access_t vmx_basic = Cpu::Ia32_vmx_basic::read(); + system_rev = Cpu::Ia32_vmx_basic::Rev::get(vmx_basic); + + + /* Get pin-based controls */ + bool clear_controls = + Cpu::Ia32_vmx_basic::Clear_controls::get(vmx_basic); + + + Genode::uint64_t pinbased_ctls { }; + + if (clear_controls) + pinbased_ctls = Cpu::Ia32_vmx_true_pinbased_ctls::read(); + else + pinbased_ctls = Cpu::Ia32_vmx_pinbased_ctls::read(); + + pinbased_allowed_0 = + Cpu::Ia32_vmx_pinbased_ctls::Allowed_0_settings::get(pinbased_ctls); + + /* + * Vol. 3C of the Intel SDM (September 2023): + * 25.6.1 Pin-Based VM-Execution Controls + * "Logical processors that support the 0-settings of any of these bits + * will support the VMX capability MSR IA32_VMX_TRUE_PIN- BASED_CTLS + * MSR, and software should consult this MSR to discover support for the + * 0-settings of these bits. Software that is not aware of the + * functionality of any one of these bits should set that bit to 1. + */ + Pin_based_execution_controls::Bit_1::set(pinbased_allowed_0); + Pin_based_execution_controls::Bit_2::set(pinbased_allowed_0); + Pin_based_execution_controls::Bit_4::set(pinbased_allowed_0); + pinbased_allowed_1 = + Cpu::Ia32_vmx_pinbased_ctls::Allowed_1_settings::get(pinbased_ctls); + + + /* Get entry controls */ + Genode::uint64_t vm_entry_ctls { }; + + if (clear_controls) + vm_entry_ctls = Cpu::Ia32_vmx_true_entry_ctls::read(); + else + vm_entry_ctls = Cpu::Ia32_vmx_entry_ctls::read(); + + vm_entry_allowed_0 = + Cpu::Ia32_vmx_entry_ctls::Allowed_0_settings::get(vm_entry_ctls); + vm_entry_allowed_1 = + Cpu::Ia32_vmx_entry_ctls::Allowed_1_settings::get(vm_entry_ctls); + + + /* Get primary exit controls */ + Genode::uint64_t pri_exit_ctls { }; + + if (clear_controls) + pri_exit_ctls = Cpu::Ia32_vmx_true_exit_ctls::read(); + else + pri_exit_ctls = Cpu::Ia32_vmx_exit_ctls::read(); + + pri_exit_allowed_0 = + Cpu::Ia32_vmx_exit_ctls::Allowed_0_settings::get(pri_exit_ctls); + pri_exit_allowed_1 = + Cpu::Ia32_vmx_exit_ctls::Allowed_1_settings::get(pri_exit_ctls); + + /* Get primary proc-based exit controls */ + Genode::uint64_t pri_procbased_ctls { }; + + if (clear_controls) + pri_procbased_ctls = Cpu::Ia32_vmx_true_procbased_ctls::read(); + else + pri_procbased_ctls = Cpu::Ia32_vmx_procbased_ctls::read(); + + pri_procbased_allowed_0 = + Cpu::Ia32_vmx_procbased_ctls::Allowed_0_settings::get( + pri_procbased_ctls); + pri_procbased_allowed_1 = + Cpu::Ia32_vmx_procbased_ctls::Allowed_1_settings::get( + pri_procbased_ctls); + + /* + * Make sure that required features are available. + * At this point the VM session is already established. + * To our knowledge the required feature set should be available on any + * x86_64 processor that indicated VMX support, so we resolve to panic + * if the required feature set is not available. + */ + if (!Primary_proc_based_execution_controls:: + Activate_secondary_controls::get(pri_procbased_allowed_1)) { + Kernel::panic("Processor does not support secondary controls"); + } + + /* Get secondary proc-based exec controls */ + Cpu::Ia32_vmx_procbased_ctls2::access_t sec_procbased_ctls = + Cpu::Ia32_vmx_procbased_ctls2::read(); + sec_procbased_allowed_0 = + Cpu::Ia32_vmx_procbased_ctls::Allowed_0_settings::get( + sec_procbased_ctls); + sec_procbased_allowed_1 = + Cpu::Ia32_vmx_procbased_ctls::Allowed_1_settings::get( + sec_procbased_ctls); + + if (!Secondary_proc_based_execution_controls::Enable_ept::get( + sec_procbased_allowed_1)) { + Kernel::panic("Processor does not support nested page tables"); + } + + if (!Secondary_proc_based_execution_controls::Unrestricted_guest::get( + sec_procbased_allowed_1)) { + Kernel::panic("Processor does not support Unrestricted guest mode"); + } + + if (!Primary_proc_based_execution_controls::Use_tpr_shadow::get( + pri_procbased_allowed_1)) { + Kernel::panic("Processor does not support VTPR"); + } + + /* CR0 and CR4 fixed values */ + cr0_fixed0 = Cpu::Ia32_vmx_cr0_fixed0::read(); + cr0_fixed1 = Cpu::Ia32_vmx_cr0_fixed1::read(); + + /* + * We demand that unrestriced guest mode is used, hence don't force PE + * and PG + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 24.8 Restrictions on VMX Operation + * Yes, forced-to-one bits are in fact read from IA32_VMX_CR0_FIXED0. + */ + Cpu::Cr0::Pe::clear(cr0_fixed0); + Cpu::Cr0::Pg::clear(cr0_fixed0); + + cr0_mask = ~cr0_fixed1 | cr0_fixed0; + Cpu::Cr0::Cd::set(cr0_mask); + Cpu::Cr0::Nw::set(cr0_mask); + + cr4_fixed0 = Cpu::Ia32_vmx_cr4_fixed0::read(); + cr4_fixed1 = Cpu::Ia32_vmx_cr4_fixed1::read(); + cr4_mask = ~cr4_fixed1 | cr4_fixed0; +} + +void Vmcs::initialize(Kernel::Cpu &cpu, Genode::addr_t page_table_phys, + Core::Cpu::Context ®s) +{ + using Cpu = Hw::X86_64_cpu; + + /* Enable VMX */ + Cpu::Ia32_feature_control::access_t feature_control = + Cpu::Ia32_feature_control::read(); + if (!Cpu::Ia32_feature_control::Vmx_no_smx::get(feature_control)) { + Genode::log("Enabling VMX."); + if (!Cpu::Ia32_feature_control::Lock::get(feature_control)) { + Cpu::Ia32_feature_control::Vmx_no_smx::set(feature_control, 1); + Cpu::Ia32_feature_control::Lock::set(feature_control, 1); + Cpu::Ia32_feature_control::write(feature_control); + } else { + /* + * Since the lock condition has been checked in + * Hw::Virtualization_support::has_vmx(), this should never happen. + */ + Kernel::panic("VMX feature disabled"); + } + } + + Cpu::Cr4::access_t cr4 = Cpu::Cr4::read(); + Cpu::Cr4::Vmxe::set(cr4); + Cpu::Cr4::write(cr4); + + _cpu_id = cpu.id(); + + construct_host_vmcs(); + + Genode::construct_at( + (void *)(((addr_t) vcpu_data.virt_area) + 2 * get_page_size())); + + + vmclear(vcpu_data.phys_addr + get_page_size()); + _load_pointer(); + + prepare_vmcs(); + + /* + * Set the VMCS link pointer to ~0UL according to spec + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.4.2 Guest Non-Register State: vmcs link pointer + */ + write(E_VMCS_LINK_POINTER, ~0ULL); + + /* + * Set up the Embedded Page Table Pointer + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.11 Extended-Page-Table Pointer (EPTP) + */ + struct Ept_ptr : Genode::Register<64> { + struct Memory_type : Bitfield< 0,3> { }; + struct Ept_walk_length_minus_1 : Bitfield< 3,3> { }; + struct Phys_addr_4k_aligned : Bitfield<12,51> { }; + }; + + enum Memory_type { + EPT_UNCACHEABLE = 0, + EPT_WRITEBACK = 6, + }; + + /* We have a 4 level page table */ + const uint8_t ept_length = 4; + Ept_ptr::access_t ept_ptr { 0 }; + Ept_ptr::Memory_type::set(ept_ptr, EPT_WRITEBACK); + Ept_ptr::Ept_walk_length_minus_1::set(ept_ptr, ept_length - 1); + Ept_ptr::Phys_addr_4k_aligned::set(ept_ptr, page_table_phys >> 12); + write(E_EPT_POINTER, ept_ptr); + + + write(E_HOST_IA32_EFER, Cpu::Ia32_efer::read()); + + /* + * If this looks the wrong way around then you are in good company. + * For details, and a nonchalant explanation of this cursed interface, + * see Vol. 3D of the Intel SDM (September 2023): + * A.7 VMX-Fixed Bits in CR0 + */ + Genode::uint64_t cr0 = Cpu::Cr0::read(); + + cr0 = (cr0 & cr0_fixed1) | cr0_fixed0; + /* NW and CD shouln'd be set by hw in the first place, but to be sure. */ + Cpu::Cr0::Nw::clear(cr0); + Cpu::Cr0::Cd::clear(cr0); + Cpu::Cr0::write(cr0); + write(E_HOST_CR0, cr0); + write(E_CR0_GUEST_HOST_MASK, cr0_mask); + + write(E_HOST_CR3, Cpu::Cr3::read()); + + /* See above */ + cr4 = (cr4 & cr4_fixed1) | cr4_fixed0; + Cpu::Cr4::write(cr4); + write(E_HOST_CR4, cr4); + write(E_CR4_GUEST_HOST_MASK, cr4_mask); + + + /* offsets from GDT in src/core/spec/x86_64/cpu.h */ + write(E_HOST_CS_SELECTOR, 0x8); + + write(E_HOST_FS_SELECTOR, 0); + write(E_HOST_GS_SELECTOR, 0); + + write(E_HOST_TR_BASE, reinterpret_cast(&(cpu.tss))); + /* see Cpu::Tss::init() / the tss_descriptor is in slot 5 of the GDT */ + write(E_HOST_TR_SELECTOR, 0x28); + write(E_HOST_GDTR_BASE, reinterpret_cast(&(cpu.gdt))); + write(E_HOST_IDTR_BASE, reinterpret_cast(&__idt)); + + write(E_HOST_IA32_SYSENTER_ESP, reinterpret_cast(&(cpu.tss.rsp[0]))); + write(E_HOST_IA32_SYSENTER_CS, 0x8); + write(E_HOST_IA32_SYSENTER_EIP, reinterpret_cast(&_kernel_entry)); + + /* + * Set the RSP to trapno, so that _kernel_entry will save the registers + * into the right fields. + */ + write(E_HOST_RSP, reinterpret_cast(&(regs.trapno))); + write(E_HOST_RIP, reinterpret_cast(&_kernel_entry)); +} + + +/* + * Enforce VMX intercepts + */ +void Vmcs::enforce_execution_controls(Genode::uint32_t desired_primary, + Genode::uint32_t desired_secondary) +{ + /* + * Processor-Based VM-Execution Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.2 Processor-Based VM-Execution Controls + */ + + /* Exit on HLT instruction */ + Primary_proc_based_execution_controls::Hlt_exiting::set(desired_primary); + + /* Enforce use of nested paging */ + Primary_proc_based_execution_controls::Invlpg_exiting::clear(desired_primary); + Primary_proc_based_execution_controls::Cr3_load_exiting::clear(desired_primary); + Primary_proc_based_execution_controls::Cr3_store_exiting::clear(desired_primary); + Primary_proc_based_execution_controls::Activate_secondary_controls::set(desired_primary); + Secondary_proc_based_execution_controls::Enable_ept::set(desired_secondary); + Secondary_proc_based_execution_controls::Unrestricted_guest::set(desired_secondary); + Secondary_proc_based_execution_controls::Enable_vpid::clear(desired_secondary); + + /* + * Always exit on IO and MSR accesses. + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 26.1.3 Instructions That Cause VM Exits Conditionally + */ + Primary_proc_based_execution_controls::Unconditional_io_exiting::set(desired_primary); + Primary_proc_based_execution_controls::Use_io_bitmaps::clear(desired_primary); + Primary_proc_based_execution_controls::Use_msr_bitmaps::clear(desired_primary); + + /* + * Force a Virtual task-priority register (VTPR) + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 30.1.1 Virtualized APIC Registers + */ + Primary_proc_based_execution_controls::Use_tpr_shadow::set(desired_primary); + + Genode::uint32_t pri_procbased_set = + (desired_primary | pri_procbased_allowed_0) + & pri_procbased_allowed_1; + write(E_PRI_PROC_BASED_VM_EXEC_CTRL, pri_procbased_set); + + Genode::uint32_t sec_procbased_set = + (desired_secondary | sec_procbased_allowed_0) + & sec_procbased_allowed_1; + write(E_SEC_PROC_BASED_VM_EXEC_CTRL, sec_procbased_set); +} + +void Vmcs::prepare_vmcs() +{ + /* + * Pin-Based VM-Execution Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.1 Pin-Based VM-Execution Controls + */ + Genode::uint32_t pinbased_want = 0U; + Pin_based_execution_controls::External_interrupt_exiting::set(pinbased_want); + Pin_based_execution_controls::Nmi_exiting::set(pinbased_want); + Pin_based_execution_controls::Virtual_nmis::set(pinbased_want); + Genode::uint32_t pinbased_set = (pinbased_want | pinbased_allowed_0) + & pinbased_allowed_1; + write(E_PIN_BASED_VM_EXECUTION_CTRL, pinbased_set); + + /* + * Primary VM-Exit Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table 25-13. Definitions of Primary VM-Exit Controls + */ + Genode::uint32_t primary_exit_want = 0U; + Primary_vm_exit_controls::Save_debug_controls::set(primary_exit_want); + Primary_vm_exit_controls::Host_address_space_size::set(primary_exit_want); + Primary_vm_exit_controls::Ack_interrupt_on_exit::set(primary_exit_want); + Primary_vm_exit_controls::Save_ia32_efer::set(primary_exit_want); + Primary_vm_exit_controls::Load_ia32_efer::set(primary_exit_want); + Genode::uint32_t primary_exit_set = + (primary_exit_want | pri_exit_allowed_0) & pri_exit_allowed_1; + write(E_PRIMARY_VM_EXIT_CONTROLS, primary_exit_set); + + /* + * VM-Entry Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table 25-13. Definitions of Primary VM-Exit Controls + * 25.8.1 VM-Entry Controls + */ + Genode::uint32_t vm_entry_want = 0U; + Vm_entry_controls::Load_debug_controls::set(vm_entry_want); + Vm_entry_controls::Load_ia32_efer::set(vm_entry_want); + Genode::uint32_t vm_entry_set = + (vm_entry_want | vm_entry_allowed_0) & vm_entry_allowed_1; + write(E_VM_ENTRY_CONTROLS, vm_entry_set); + + + enforce_execution_controls(0U, 0U); + + write(E_VM_EXIT_MSR_STORE_ADDRESS, msr_phys_addr(&guest_msr_store_area)); + write(E_VM_EXIT_MSR_STORE_COUNT, Board::Msr_store_area::get_count()); + write(E_VM_ENTRY_MSR_LOAD_ADDRESS, msr_phys_addr(&guest_msr_store_area)); + write(E_VM_ENTRY_MSR_LOAD_COUNT, Board::Msr_store_area::get_count()); + + write(E_VM_EXIT_MSR_LOAD_ADDRESS, msr_phys_addr(&host_msr_store_area)); + write(E_VM_EXIT_MSR_LOAD_COUNT, Board::Msr_store_area::get_count()); + + write(E_VIRTUAL_APIC_ADDRESS, vcpu_data.phys_addr + 2 * get_page_size()); + + /* + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 26.2 Other Causes Of VM Exits: Exceptions + */ + write(E_EXCEPTION_BITMAP, (1 << Genode::Cpu_state::ALIGNMENT_CHECK) | + (1 << Genode::Cpu_state::DEBUG)); + write(E_PAGE_FAULT_ERROR_CODE_MASK, 0U); + write(E_PAGE_FAULT_ERROR_CODE_MATCH, 0U); + + /* + * For now, don't use CR3 targets. + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.7 CR3-Target Controls + */ + write(E_CR3_TARGET_COUNT, 0U); +} + +void Vmcs::write_vcpu_state(Genode::Vcpu_state &state) +{ + typedef Genode::Vcpu_state::Range Range; + typedef Genode::Vcpu_state::Segment Segment; + + using Cpu = Hw::X86_64_cpu; + using Genode::uint16_t; + using Genode::uint32_t; + + _load_pointer(); + + state.ip.charge(read(E_GUEST_RIP)); + state.ip_len.charge(read(E_VM_EXIT_INSTRUCTION_LENGTH)); + + state.flags.charge(read(E_GUEST_RFLAGS)); + state.sp.charge(read(E_GUEST_RSP)); + + state.dr7.charge(read(E_GUEST_DR7)); + + state.cr0.charge(read(E_GUEST_CR0)); + state.cr2.charge(Cpu::Cr2::read()); + state.cr3.charge(read(E_GUEST_CR3)); + state.cr4.charge(read(E_GUEST_CR4)); + + state.cs.charge(Segment { + .sel = static_cast(read(E_GUEST_CS_SELECTOR)), + .ar = _ar_convert_to_genode(read(E_GUEST_CS_ACCESS_RIGHTS)), + .limit = static_cast(read(E_GUEST_CS_LIMIT)), + .base = read(E_GUEST_CS_BASE) + }); + + state.ss.charge(Segment { + .sel = static_cast(read(E_GUEST_SS_SELECTOR)), + .ar = _ar_convert_to_genode(read(E_GUEST_SS_ACCESS_RIGHTS)), + .limit = static_cast(read(E_GUEST_SS_LIMIT)), + .base = read(E_GUEST_SS_BASE) + }); + + state.es.charge(Segment { + .sel = static_cast(read(E_GUEST_ES_SELECTOR)), + .ar = _ar_convert_to_genode(read(E_GUEST_ES_ACCESS_RIGHTS)), + .limit = static_cast(read(E_GUEST_ES_LIMIT)), + .base = read(E_GUEST_ES_BASE) + }); + + state.ds.charge(Segment { + .sel = static_cast(read(E_GUEST_DS_SELECTOR)), + .ar = _ar_convert_to_genode(read(E_GUEST_DS_ACCESS_RIGHTS)), + .limit = static_cast(read(E_GUEST_DS_LIMIT)), + .base = read(E_GUEST_DS_BASE) + }); + + state.fs.charge(Segment { + .sel = static_cast(read(E_GUEST_FS_SELECTOR)), + .ar = _ar_convert_to_genode(read(E_GUEST_FS_ACCESS_RIGHTS)), + .limit = static_cast(read(E_GUEST_FS_LIMIT)), + .base = read(E_GUEST_FS_BASE) + }); + + state.gs.charge(Segment { + .sel = static_cast(read(E_GUEST_GS_SELECTOR)), + .ar = _ar_convert_to_genode(read(E_GUEST_GS_ACCESS_RIGHTS)), + .limit = static_cast(read(E_GUEST_GS_LIMIT)), + .base = read(E_GUEST_GS_BASE) + }); + + state.tr.charge(Segment { + .sel = static_cast(read(E_GUEST_TR_SELECTOR)), + .ar = _ar_convert_to_genode(read(E_GUEST_TR_ACCESS_RIGHTS)), + .limit = static_cast(read(E_GUEST_TR_LIMIT)), + .base = read(E_GUEST_TR_BASE) + }); + + state.ldtr.charge(Segment { + .sel = static_cast(read(E_GUEST_LDTR_SELECTOR)), + .ar = _ar_convert_to_genode(read(E_GUEST_LDTR_ACCESS_RIGHTS)), + .limit = static_cast(read(E_GUEST_LDTR_LIMIT)), + .base = read(E_GUEST_LDTR_BASE) + }); + + state.gdtr.charge(Range { + .limit = static_cast(read(E_GUEST_GDTR_LIMIT)), + .base = read(E_GUEST_GDTR_BASE) + }); + + state.idtr.charge(Range { + .limit = static_cast(read(E_GUEST_IDTR_LIMIT)), + .base = read(E_GUEST_IDTR_BASE) + }); + + + state.sysenter_cs.charge(read(E_IA32_SYSENTER_CS)); + state.sysenter_sp.charge(read(E_GUEST_IA32_SYSENTER_ESP)); + state.sysenter_ip.charge(read(E_GUEST_IA32_SYSENTER_EIP)); + + state.qual_primary.charge(read(E_EXIT_QUALIFICATION)); + state.qual_secondary.charge(read(E_GUEST_PHYSICAL_ADDRESS)); + + /* Charging ctrl_primary and ctrl_secondary breaks Virtualbox 6 */ + + if (state.exit_reason == EXIT_PAUSED || state.exit_reason == VMX_EXIT_INVGUEST) { + state.inj_info.charge(static_cast(read(E_VM_ENTRY_INTERRUPT_INFO_FIELD))); + state.inj_error.charge(static_cast(read(E_VM_ENTRY_EXCEPTION_ERROR_CODE))); + + } else { + state.inj_info.charge(static_cast(read(E_IDT_VECTORING_INFORMATION_FIELD))); + state.inj_error.charge(static_cast(read(E_IDT_VECTORING_ERROR_CODE))); + } + + state.intr_state.charge( + static_cast(read(E_GUEST_INTERRUPTIBILITY_STATE))); + state.actv_state.charge( + static_cast(read(E_GUEST_ACTIVITY_STATE))); + + state.tsc.charge(Hw::Lapic::rdtsc()); + state.tsc_offset.charge(read(E_TSC_OFFSET)); + + state.efer.charge(read(E_GUEST_IA32_EFER)); + + state.pdpte_0.charge(read(E_GUEST_PDPTE0)); + state.pdpte_1.charge(read(E_GUEST_PDPTE1)); + state.pdpte_2.charge(read(E_GUEST_PDPTE2)); + state.pdpte_3.charge(read(E_GUEST_PDPTE3)); + + state.star.charge(guest_msr_store_area.star.get()); + state.lstar.charge(guest_msr_store_area.lstar.get()); + state.cstar.charge(guest_msr_store_area.cstar.get()); + state.fmask.charge(guest_msr_store_area.fmask.get()); + state.kernel_gs_base.charge(guest_msr_store_area.kernel_gs_base.get()); + + Virtual_apic_state *virtual_apic_state = + reinterpret_cast( + ((addr_t) vcpu_data.virt_area) + 2 * get_page_size()); + state.tpr.charge(virtual_apic_state->get_vtpr()); + state.tpr_threshold.charge( + static_cast(read(E_TPR_THRESHOLD))); +} + + +void Vmcs::read_vcpu_state(Genode::Vcpu_state &state) +{ + _load_pointer(); + + if (state.flags.charged()) { + write(E_GUEST_RFLAGS, state.flags.value()); + } + + if (state.sp.charged()) { + write(E_GUEST_RSP, state.sp.value()); + } + + if (state.ip.charged()) { + write(E_GUEST_RIP, state.ip.value()); + write(E_VM_ENTRY_INSTRUCTION_LENGTH, state.ip_len.value()); + } + + if (state.dr7.charged()) { + write(E_GUEST_DR7, state.dr7.value()); + } + + if (state.cr0.charged() || state.cr2.charged() || + state.cr3.charged() || state.cr4.charged()) { + write(E_GUEST_CR0, (state.cr0.value() & ~cr0_mask & cr0_fixed1) | cr0_fixed0); + write(E_CR0_READ_SHADOW, (state.cr0.value() & cr0_fixed1) | cr0_fixed0); + cr2 = state.cr2.value(); + write(E_GUEST_CR3, state.cr3.value()); + write(E_GUEST_CR4, (state.cr4.value() & cr4_fixed1) | cr4_fixed0); + write(E_CR4_READ_SHADOW, (state.cr4.value() & cr4_fixed1) | cr4_fixed0); + } + + if (state.cs.charged() || state.ss.charged()) { + write(E_GUEST_CS_SELECTOR, state.cs.value().sel); + write(E_GUEST_CS_ACCESS_RIGHTS, _ar_convert_to_intel(state.cs.value().ar)); + write(E_GUEST_CS_LIMIT, state.cs.value().limit); + write(E_GUEST_CS_BASE, state.cs.value().base); + + write(E_GUEST_SS_SELECTOR, state.ss.value().sel); + write(E_GUEST_SS_ACCESS_RIGHTS, _ar_convert_to_intel(state.ss.value().ar)); + write(E_GUEST_SS_LIMIT, state.ss.value().limit); + write(E_GUEST_SS_BASE, state.ss.value().base); + } + + if (state.es.charged() || state.ds.charged()) { + write(E_GUEST_ES_SELECTOR, state.es.value().sel); + write(E_GUEST_ES_ACCESS_RIGHTS, _ar_convert_to_intel(state.es.value().ar)); + write(E_GUEST_ES_LIMIT, state.es.value().limit); + write(E_GUEST_ES_BASE, state.es.value().base); + + write(E_GUEST_DS_SELECTOR, state.ds.value().sel); + write(E_GUEST_DS_ACCESS_RIGHTS, _ar_convert_to_intel(state.ds.value().ar)); + write(E_GUEST_DS_LIMIT, state.ds.value().limit); + write(E_GUEST_DS_BASE, state.ds.value().base); + } + + if (state.fs.charged() || state.gs.charged()) { + write(E_GUEST_FS_SELECTOR, state.fs.value().sel); + write(E_GUEST_FS_ACCESS_RIGHTS, _ar_convert_to_intel(state.fs.value().ar)); + write(E_GUEST_FS_LIMIT, state.fs.value().limit); + write(E_GUEST_FS_BASE, state.fs.value().base); + + write(E_GUEST_GS_SELECTOR, state.gs.value().sel); + write(E_GUEST_GS_ACCESS_RIGHTS, _ar_convert_to_intel(state.gs.value().ar)); + write(E_GUEST_GS_LIMIT, state.gs.value().limit); + write(E_GUEST_GS_BASE, state.gs.value().base); + } + + if (state.tr.charged()) { + write(E_GUEST_TR_SELECTOR, state.tr.value().sel); + write(E_GUEST_TR_ACCESS_RIGHTS, _ar_convert_to_intel(state.tr.value().ar)); + write(E_GUEST_TR_LIMIT, state.tr.value().limit); + write(E_GUEST_TR_BASE, state.tr.value().base); + } + + if (state.ldtr.charged()) { + write(E_GUEST_LDTR_SELECTOR, state.ldtr.value().sel); + write(E_GUEST_LDTR_ACCESS_RIGHTS, _ar_convert_to_intel(state.ldtr.value().ar)); + write(E_GUEST_LDTR_LIMIT, state.ldtr.value().limit); + write(E_GUEST_LDTR_BASE, state.ldtr.value().base); + } + + if (state.gdtr.charged()) { + write(E_GUEST_GDTR_LIMIT, state.gdtr.value().limit); + write(E_GUEST_GDTR_BASE, state.gdtr.value().base); + } + + if (state.idtr.charged()) { + write(E_GUEST_IDTR_LIMIT, state.idtr.value().limit); + write(E_GUEST_IDTR_BASE, state.idtr.value().base); + } + + if (state.sysenter_cs.charged() || state.sysenter_sp.charged() || + state.sysenter_ip.charged()) { + write(E_IA32_SYSENTER_CS, state.sysenter_cs.value()); + write(E_GUEST_IA32_SYSENTER_ESP, state.sysenter_sp.value()); + write(E_GUEST_IA32_SYSENTER_EIP, state.sysenter_ip.value()); + } + + if (state.ctrl_primary.charged() || state.ctrl_secondary.charged()) { + enforce_execution_controls(state.ctrl_primary.value(), + state.ctrl_secondary.value()); + } + + if (state.inj_info.charged() || state.inj_error.charged()) { + Genode::uint32_t pri_controls = static_cast (read(E_PRI_PROC_BASED_VM_EXEC_CTRL)); + Genode::uint32_t sec_controls = static_cast (read(E_SEC_PROC_BASED_VM_EXEC_CTRL)); + bool set_controls = false; + + if (state.inj_info.value() & 0x1000) { + if (!Primary_proc_based_execution_controls::Interrupt_window_exiting::get(pri_controls)) { + Primary_proc_based_execution_controls::Interrupt_window_exiting::set(pri_controls); + set_controls = true; + } + } else { + if (Primary_proc_based_execution_controls::Interrupt_window_exiting::get(pri_controls)) { + Primary_proc_based_execution_controls::Interrupt_window_exiting::clear(pri_controls); + set_controls = true; + } + } + + if (state.inj_info.value() & 0x2000) { + if (!Primary_proc_based_execution_controls::Nmi_window_exiting::get(pri_controls)) { + Primary_proc_based_execution_controls::Nmi_window_exiting::set(pri_controls); + set_controls = true; + } + } else { + if (Primary_proc_based_execution_controls::Nmi_window_exiting::get(pri_controls)) { + Primary_proc_based_execution_controls::Nmi_window_exiting::clear(pri_controls); + set_controls = true; + } + } + + if (set_controls) + enforce_execution_controls(pri_controls, sec_controls); + + write(E_VM_ENTRY_INTERRUPT_INFO_FIELD, + /* Filter out special signaling bits */ + (state.inj_info.value() & + (Genode::uint32_t) ~0x3000)); + + write(E_VM_ENTRY_EXCEPTION_ERROR_CODE, state.inj_error.value()); + } + + if (state.intr_state.charged()) { + write(E_GUEST_INTERRUPTIBILITY_STATE, state.intr_state.value()); + } + + if (state.actv_state.charged()) { + write(E_GUEST_ACTIVITY_STATE, state.actv_state.value()); + } + + if (state.tsc_offset.charged()) { + /* state.tsc not used by SVM */ + write(E_TSC_OFFSET, state.tsc_offset.value()); + } + + if (state.efer.charged()) { + auto efer = state.efer.value(); + write(E_GUEST_IA32_EFER, efer); + + Vm_entry_controls::access_t entry_controls = static_cast(read(E_VM_ENTRY_CONTROLS)); + if (Cpu::Ia32_efer::Lma::get(efer)) + Vm_entry_controls::Ia32e_mode_guest::set(entry_controls); + else + Vm_entry_controls::Ia32e_mode_guest::clear(entry_controls); + + write(E_VM_ENTRY_CONTROLS, entry_controls); + } + + if (state.pdpte_0.charged() || state.pdpte_1.charged() || + state.pdpte_1.charged() || state.pdpte_2.charged()) { + write(E_GUEST_PDPTE0, state.pdpte_0.value()); + write(E_GUEST_PDPTE1, state.pdpte_1.value()); + write(E_GUEST_PDPTE2, state.pdpte_2.value()); + write(E_GUEST_PDPTE3, state.pdpte_3.value()); + } + + if (state.star.charged() || state.lstar.charged() || + state.cstar.charged() || state.fmask.charged() || + state.kernel_gs_base.charged()) { + guest_msr_store_area.star.set(state.star.value()); + guest_msr_store_area.lstar.set(state.lstar.value()); + guest_msr_store_area.cstar.set(state.cstar.value()); + guest_msr_store_area.fmask.set(state.fmask.value()); + guest_msr_store_area.kernel_gs_base.set(state.kernel_gs_base.value()); + } + + Virtual_apic_state * virtual_apic_state = + reinterpret_cast(((addr_t) vcpu_data.virt_area) + + 2 * get_page_size()); + + if (state.tpr.charged()) { + virtual_apic_state->set_vtpr(state.tpr.value()); + write(E_TPR_THRESHOLD, state.tpr_threshold.value()); + } +} + +void Vmcs::switch_world(Core::Cpu::Context ®s) +{ + _load_pointer(); + + save_host_msrs(); + + Cpu::Cr2::write(cr2); + + regs.trapno = TRAP_VMEXIT; + asm volatile( + "fxrstor (%[fpu_context]);" + "mov %[regs], %%rsp;" + "popq %%r8;" + "popq %%r9;" + "popq %%r10;" + "popq %%r11;" + "popq %%r12;" + "popq %%r13;" + "popq %%r14;" + "popq %%r15;" + "popq %%rax;" + "popq %%rbx;" + "popq %%rcx;" + "popq %%rdx;" + "popq %%rdi;" + "popq %%rsi;" + "popq %%rbp;" + "vmresume;" + "vmlaunch;" + : + : [regs] "r"(®s.r8), + [fpu_context] "r"(regs.fpu_context()) + : "memory"); + /* + * Usually when exiting guest mode, VMX will jump to the address + * provided in E_HOST_RIP; in our case: _kernel_entry. + * + * Execution continuing after the vmlaunch instruction indicates an + * error in setting up VMX that should never happen. If we regularly + * return from this method, the vCPU thread will be removed from the + * scheduler. + * + * For error codes, see Intel SDM (September 2023) Vol. 3C + * 31.4 Vm Instruction Error Numbers + */ + error("VM: execution error: ", Genode::Hex(read(Vmcs::E_VM_INSTRUCTION_ERROR))); +} + +/* + * Store MSRs to the Host MSR Store Area so that VMX restores them on VM exit + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 28.6 Loading MSRs + */ +void Vmcs::save_host_msrs() +{ + using Cpu = Hw::X86_64_cpu; + + host_msr_store_area.star.set(Cpu::Ia32_star::read()); + host_msr_store_area.lstar.set(Cpu::Ia32_lstar::read()); + host_msr_store_area.cstar.set(Cpu::Ia32_cstar::read()); + host_msr_store_area.fmask.set(Cpu::Ia32_fmask::read()); + host_msr_store_area.kernel_gs_base.set( + Cpu::Ia32_kernel_gs_base::read()); +} + + +void Vmcs::_load_pointer() +{ + if (current_vmcs[_cpu_id] == this) + return; + + current_vmcs[_cpu_id] = this; + + vmptrld(vcpu_data.phys_addr + get_page_size()); +} + + +uint64_t Vmcs::handle_vm_exit() +{ + cr2 = Cpu::Cr2::read(); + uint64_t exitcode = read(E_EXIT_REASON) & 0xFFFF; + + switch (exitcode) { + case VMX_EXIT_NMI: + /* + * XXX We might need to handle host NMIs encoded in + * the VM_EXIT_INTERRUPT_INFORMATION field, so let's + * issue a warning. + */ + Genode::warning("VMX NMI exit occured"); + break; + case VMX_EXIT_INTR: + exitcode = EXIT_PAUSED; + break; + default: + break; + } + + return exitcode; +} diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/virt_interface.h b/repos/base-hw/src/core/spec/x86_64/virtualization/virt_interface.h index d7c36d4acd..af0536bb78 100644 --- a/repos/base-hw/src/core/spec/x86_64/virtualization/virt_interface.h +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/virt_interface.h @@ -30,6 +30,7 @@ namespace Board { enum Virt_type { SVM, + VMX }; struct Virt_interface diff --git a/repos/base-hw/src/core/spec/x86_64/virtualization/vmx.h b/repos/base-hw/src/core/spec/x86_64/virtualization/vmx.h new file mode 100644 index 0000000000..01efa08fed --- /dev/null +++ b/repos/base-hw/src/core/spec/x86_64/virtualization/vmx.h @@ -0,0 +1,516 @@ +/* + * \brief VMX data structure + * \author Benjamin Lamowski + * \date 2023-09-26 + */ + +/* + * Copyright (C) 2023-2024 Genode Labs GmbH + * + * This file is part of the Genode OS framework, which is distributed + * under the terms of the GNU Affero General Public License version 3. + */ + +#ifndef _INCLUDE__SPEC__PC__VMX_H_ +#define _INCLUDE__SPEC__PC__VMX_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using Genode::addr_t; +using Genode::uint16_t; +using Genode::uint32_t; +using Genode::uint64_t; + +namespace Kernel { class Cpu; } + +namespace Board +{ + struct Vmcs; + struct Vmcs_buf; + struct Msr_store_area; + struct Virtual_apic_state; +} + +/* + * VMX exitcodes, incomplete list. + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table C-1. Basic Exit Reasons + */ +enum Vmx_exitcodes : uint32_t { + VMX_EXIT_NMI = 0, + VMX_EXIT_INTR = 1, + VMX_EXIT_INVGUEST = 33, +}; + + +/* + * MSR-store area + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.7.2 VM-Exit Controls for MSRs + */ +struct +alignas(16) +Board::Msr_store_area +{ + struct Msr_entry + { + uint32_t msr_index = 0U; + uint32_t _reserved = 0U; + uint64_t msr_data = 0U; + + void set(uint64_t data) + { + msr_data = data; + } + + uint64_t get() + { + return msr_data; + } + } __attribute__((packed)); + + Msr_entry star { 0xC0000081 }; + Msr_entry lstar { 0xC0000082 }; + Msr_entry cstar { 0xC0000083 }; + Msr_entry fmask { 0xC0000084 }; + Msr_entry kernel_gs_base { 0xC0000102 }; + + static constexpr Core::size_t get_count() + { + return sizeof(Msr_store_area) / sizeof(Msr_entry); + } +}; + + +/* + * Physical VMCS buffer + */ +struct +alignas(Genode::get_page_size()) +Board::Vmcs_buf +{ + union { + uint32_t rev; + Genode::uint8_t pad[Genode::get_page_size()]; + }; + + Vmcs_buf(uint32_t rev); +}; + + +/* + * VMCS + * + * See Intel SDM (September 2023) Vol. 3C, section 24.2. + */ +struct +Board::Vmcs +: + public Board::Virt_interface +{ + static uint32_t system_rev; + static uint32_t pinbased_allowed_0; + static uint32_t pinbased_allowed_1; + static uint32_t vm_entry_allowed_0; + static uint32_t vm_entry_allowed_1; + static uint32_t pri_exit_allowed_0; + static uint32_t pri_exit_allowed_1; + static uint32_t pri_procbased_allowed_0; + static uint32_t pri_procbased_allowed_1; + static uint32_t sec_procbased_allowed_0; + static uint32_t sec_procbased_allowed_1; + static uint64_t cr0_fixed0; + static uint64_t cr0_fixed1; + static uint64_t cr0_mask; + static uint64_t cr4_fixed0; + static uint64_t cr4_fixed1; + static uint64_t cr4_mask; + static uint64_t vpid; + + Msr_store_area guest_msr_store_area { }; + /* XXX only needed per vCPU */ + Msr_store_area host_msr_store_area { }; + uint64_t cr2 { 0 }; + + Genode::size_t _cpu_id { }; + + addr_t msr_phys_addr(Msr_store_area *msr_ptr) + { + Genode::size_t offset = + (Genode::size_t)msr_ptr - (Genode::size_t)this; + return vcpu_data.phys_addr + offset; + } + + /* + * VMCS field encodings + * + * See Intel SDM (September 2023) Vol. 3D, appendix B. + */ + enum Field_encoding : uint64_t { + /* + * B.1 16-Bit Fields + */ + + /* B.1.2 16-Bit Guest-State Fields */ + E_GUEST_ES_SELECTOR = 0x00000800, + E_GUEST_CS_SELECTOR = 0x00000802, + E_GUEST_SS_SELECTOR = 0x00000804, + E_GUEST_DS_SELECTOR = 0x00000806, + E_GUEST_FS_SELECTOR = 0x00000808, + E_GUEST_GS_SELECTOR = 0x0000080A, + E_GUEST_LDTR_SELECTOR = 0x0000080C, + E_GUEST_TR_SELECTOR = 0x0000080E, + + /* B.1.3 16-Bit Host-State Fields */ + E_HOST_CS_SELECTOR = 0x00000C02, + E_HOST_FS_SELECTOR = 0x00000C08, + E_HOST_GS_SELECTOR = 0x00000C0A, + E_HOST_TR_SELECTOR = 0x00000C0C, + + + /* + * B.2 64-Bit Fields + */ + + /* B.2.1 64-Bit Control Fields */ + E_VM_EXIT_MSR_STORE_ADDRESS = 0x00002006, + E_VM_EXIT_MSR_LOAD_ADDRESS = 0x00002008, + E_VM_ENTRY_MSR_LOAD_ADDRESS = 0x0000200A, + E_TSC_OFFSET = 0x00002010, + E_VIRTUAL_APIC_ADDRESS = 0x00002012, + E_EPT_POINTER = 0x0000201A, + + /* B.2.2 64-Bit Read-Only Data Field */ + E_GUEST_PHYSICAL_ADDRESS = 0x00002400, + + /* B.2.3 64-Bit Guest-State Fields */ + E_VMCS_LINK_POINTER = 0x00002800, + E_GUEST_IA32_EFER = 0x00002806, + E_GUEST_PDPTE0 = 0x0000280A, + E_GUEST_PDPTE1 = 0x0000280C, + E_GUEST_PDPTE2 = 0x0000280E, + E_GUEST_PDPTE3 = 0x00002810, + + /* B.2.4 64-Bit Host-State Fields */ + E_HOST_IA32_EFER = 0x00002C02, + + + /* + * B.3 32-Bit Fields + */ + + /* B.3.1 32-Bit Control Fields */ + E_PIN_BASED_VM_EXECUTION_CTRL = 0x00004000, + E_PRI_PROC_BASED_VM_EXEC_CTRL = 0x00004002, + E_EXCEPTION_BITMAP = 0x00004004, + E_PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + E_PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + E_CR3_TARGET_COUNT = 0x0000400A, + E_PRIMARY_VM_EXIT_CONTROLS = 0x0000400C, + E_VM_EXIT_MSR_STORE_COUNT = 0x0000400E, + E_VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + E_VM_ENTRY_CONTROLS = 0x00004012, + E_VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + E_VM_ENTRY_INTERRUPT_INFO_FIELD = 0x00004016, + E_VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + E_VM_ENTRY_INSTRUCTION_LENGTH = 0x0000401A, + E_TPR_THRESHOLD = 0x0000401C, + E_SEC_PROC_BASED_VM_EXEC_CTRL = 0x0000401E, + + /* B.3.2 32-Bit Read-Only Data Fields */ + E_VM_INSTRUCTION_ERROR = 0x00004400, + E_EXIT_REASON = 0x00004402, + E_IDT_VECTORING_INFORMATION_FIELD = 0x00004408, + E_IDT_VECTORING_ERROR_CODE = 0x0000440A, + E_VM_EXIT_INSTRUCTION_LENGTH = 0x0000440C, + + /* B.3.3 32-Bit Guest-State Fields */ + E_GUEST_ES_LIMIT = 0x00004800, + E_GUEST_CS_LIMIT = 0x00004802, + E_GUEST_SS_LIMIT = 0x00004804, + E_GUEST_DS_LIMIT = 0x00004806, + E_GUEST_FS_LIMIT = 0x00004808, + E_GUEST_GS_LIMIT = 0x0000480A, + E_GUEST_LDTR_LIMIT = 0x0000480C, + E_GUEST_TR_LIMIT = 0x0000480E, + E_GUEST_GDTR_LIMIT = 0x00004810, + E_GUEST_IDTR_LIMIT = 0x00004812, + E_GUEST_ES_ACCESS_RIGHTS = 0x00004814, + E_GUEST_CS_ACCESS_RIGHTS = 0x00004816, + E_GUEST_SS_ACCESS_RIGHTS = 0x00004818, + E_GUEST_DS_ACCESS_RIGHTS = 0x0000481A, + E_GUEST_FS_ACCESS_RIGHTS = 0x0000481C, + E_GUEST_GS_ACCESS_RIGHTS = 0x0000481E, + E_GUEST_LDTR_ACCESS_RIGHTS = 0x00004820, + E_GUEST_TR_ACCESS_RIGHTS = 0x00004822, + E_GUEST_INTERRUPTIBILITY_STATE = 0x00004824, + E_GUEST_ACTIVITY_STATE = 0x00004826, + E_IA32_SYSENTER_CS = 0x0000482A, + + /* B.3.3 32-Bit Host-State Field */ + E_HOST_IA32_SYSENTER_CS = 0x00004C00, + + + /* + * B.4 Natural-Width Fields + */ + + /* B.4.1 Natural-Width Control Fields */ + E_CR0_GUEST_HOST_MASK = 0x00006000, + E_CR4_GUEST_HOST_MASK = 0x00006002, + E_CR0_READ_SHADOW = 0x00006004, + E_CR4_READ_SHADOW = 0x00006006, + + /* B.4.2 Natural-Width Read-Only Data Fields */ + E_EXIT_QUALIFICATION = 0x00006400, + + /* B.4.3 Natural-Width Guest-State Fields */ + E_GUEST_CR0 = 0x00006800, + E_GUEST_CR3 = 0x00006802, + E_GUEST_CR4 = 0x00006804, + E_GUEST_ES_BASE = 0x00006806, + E_GUEST_CS_BASE = 0x00006808, + E_GUEST_SS_BASE = 0x0000680A, + E_GUEST_DS_BASE = 0x0000680C, + E_GUEST_FS_BASE = 0x0000680E, + E_GUEST_GS_BASE = 0x00006810, + E_GUEST_LDTR_BASE = 0x00006812, + E_GUEST_TR_BASE = 0x00006814, + E_GUEST_GDTR_BASE = 0x00006816, + E_GUEST_IDTR_BASE = 0x00006818, + E_GUEST_DR7 = 0x0000681A, + E_GUEST_RSP = 0x0000681C, + E_GUEST_RIP = 0x0000681E, + E_GUEST_RFLAGS = 0x00006820, + E_GUEST_IA32_SYSENTER_ESP = 0x00006824, + E_GUEST_IA32_SYSENTER_EIP = 0x00006826, + + /* B.4.4 Natural-Width Host-State Fields */ + E_HOST_CR0 = 0x00006C00, + E_HOST_CR3 = 0x00006C02, + E_HOST_CR4 = 0x00006C04, + E_HOST_TR_BASE = 0x00006C0A, + E_HOST_GDTR_BASE = 0x00006C0C, + E_HOST_IDTR_BASE = 0x00006C0E, + E_HOST_IA32_SYSENTER_ESP = 0x00006C10, + E_HOST_IA32_SYSENTER_EIP = 0x00006C12, + E_HOST_RSP = 0x00006C14, + E_HOST_RIP = 0x00006C16, + }; + + static void vmxon(addr_t phys_addr) + { + bool success = false; + asm volatile( + "vmxon %[vmcs];" + /* the command succeeded if CF = 0 and ZF = 0 */ + : "=@cca"(success) + : [vmcs] "m"(phys_addr) + : "cc"); + assert(success && "vmxon failed"); + } + + static void vmptrld(addr_t phys_addr) + { + bool success = false; + asm volatile( + "vmptrld %[vmcs];" + /* the command succeeded if CF = 0 and ZF = 0 */ + : "=@cca"(success) + : [vmcs] "m"(phys_addr) + : "cc"); + assert(success && "vmptrld failed"); + } + + static uint64_t read(uint32_t enc) + { + uint64_t val; + asm volatile( + "vmread %[enc], %[val];" + : [val] "=rm"(val) + : [enc] "rm"(static_cast(enc)) + : "cc"); + return val; + } + + static void vmclear(addr_t phys_addr) + { + bool success = false; + asm volatile( + "vmclear %[vmcs];" + /* the command succeeded if CF = 0 and ZF = 0 */ + : "=@cca"(success) + : [vmcs] "m"(phys_addr) + : "cc"); + assert(success && "vmclear failed"); + } + + static void write(uint32_t enc, uint64_t val) + { + /* Genode::raw("VMWRITE: ", Genode::Hex(enc), " val: ", Genode::Hex(val)); */ + bool success = false; + asm volatile( + "vmwrite %[val], %[enc];" + /* the command succeeded if CF = 0 and ZF = 0 */ + : "=@cca"(success) + : [enc]"rm"(static_cast(enc)), [val] "r"(val) + : "cc"); + assert(success && "vmwrite failed"); + } + + Vmcs(Genode::Vcpu_data &vcpu_data); + Virt_type virt_type() override + { + return Virt_type::VMX; + } + + static inline uint32_t _ar_convert_to_intel(uint16_t ar) { + return ((ar << 4) & 0x1F000) | (ar & 0xFF); + } + + static inline uint16_t _ar_convert_to_genode(uint64_t ar) { + return ((ar >> 4) & 0x1F00) | (ar & 0xFF); + } + + void initialize(Kernel::Cpu &cpu, addr_t page_table_phys, + Core::Cpu::Context ®s) override; + void write_vcpu_state(Genode::Vcpu_state &state) override; + void read_vcpu_state(Genode::Vcpu_state &state) override; + void switch_world(Core::Cpu::Context ®s) override; + uint64_t handle_vm_exit() override; + + void save_host_msrs(); + void prepare_vmcs(); + void setup_vmx_info(); + static void enforce_execution_controls(uint32_t desired_primary, + uint32_t desired_secondary); + void _load_pointer(); + void construct_host_vmcs(); +}; + + +/* + * Access controls + */ + +/* + * Pin-Based VM-Execution Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.1 Pin-Based VM-Execution Controls + */ + +/* 25-5. Definitions of Pin-Based VM-Execution Controls */ +struct Pin_based_execution_controls : Genode::Register<32> +{ + struct External_interrupt_exiting : Bitfield<0,1> { }; + struct Bit_1 : Bitfield<1,1> { }; + struct Bit_2 : Bitfield<2,1> { }; + struct Nmi_exiting : Bitfield<3,1> { }; + struct Bit_4 : Bitfield<4,1> { }; + struct Virtual_nmis : Bitfield<5,1> { }; +}; + +/* + * Primary VM-Exit Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table 25-13. Definitions of Primary VM-Exit Controls + */ +struct Primary_vm_exit_controls : Genode::Register<32> +{ + struct Save_debug_controls : Bitfield< 2,1> { }; + struct Host_address_space_size : Bitfield< 9,1> { }; + struct Ack_interrupt_on_exit : Bitfield<15,1> { }; + struct Save_ia32_efer : Bitfield<20,1> { }; + struct Load_ia32_efer : Bitfield<21,1> { }; +}; + + + +/* + * VM-Entry Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * Table 25-13. Definitions of Primary VM-Exit Controls + * 25.8.1 VM-Entry Controls + */ +struct Vm_entry_controls : Genode::Register<32> +{ + struct Load_debug_controls : Bitfield< 2,1> { }; + struct Ia32e_mode_guest : Bitfield< 9,1> { }; + struct Load_ia32_efer : Bitfield<15,1> { }; +}; + + +/* + * Processor-Based VM-Execution Controls + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 25.6.2 Processor-Based VM-Execution Controls + */ + +/* Table 25-6. Definitions of Primary Processor-Based VM-Execution Controls */ +struct Primary_proc_based_execution_controls : Genode::Register<32> +{ + struct Interrupt_window_exiting : Bitfield< 2,1> { }; + struct Hlt_exiting : Bitfield< 7,1> { }; + struct Invlpg_exiting : Bitfield< 9,1> { }; + struct Cr3_load_exiting : Bitfield<15,1> { }; + struct Cr3_store_exiting : Bitfield<16,1> { }; + struct Use_tpr_shadow : Bitfield<21,1> { }; + struct Nmi_window_exiting : Bitfield<22,1> { }; + struct Unconditional_io_exiting : Bitfield<24,1> { }; + struct Use_io_bitmaps : Bitfield<25,1> { }; + struct Use_msr_bitmaps : Bitfield<28,1> { }; + struct Activate_secondary_controls : Bitfield<31,1> { }; +}; + +/* Table 25-7. Definitions of Secondary Processor-Based VM-Execution Controls */ +struct Secondary_proc_based_execution_controls : Genode::Register<32> +{ + struct Enable_ept : Bitfield< 1,1> { }; + struct Enable_vpid : Bitfield< 5,1> { }; + struct Unrestricted_guest : Bitfield< 7,1> { }; + struct Enable_vm_functi : Bitfield<13,1> { }; +}; + + +/* + * Virtual Apic State + * + * For details, see Vol. 3C of the Intel SDM (September 2023): + * 30.1 Virtual Apic State + */ +struct Board::Virtual_apic_state +{ + enum { + VTPR_OFFSET = 0x80, + }; + + Genode::uint8_t pad[4096]; + + uint32_t get_vtpr() + { + return static_cast(*(pad + VTPR_OFFSET)); + } + + void set_vtpr(uint32_t vtpr) + { + uint32_t *tpr = + reinterpret_cast(pad + VTPR_OFFSET); + *tpr = vtpr; + } +}; + +#endif /* _INCLUDE__SPEC__PC__VMX_H_ */ diff --git a/repos/base-hw/src/include/hw/spec/x86_64/cpu.h b/repos/base-hw/src/include/hw/spec/x86_64/cpu.h index 5de0df8137..0b888eff0d 100644 --- a/repos/base-hw/src/include/hw/spec/x86_64/cpu.h +++ b/repos/base-hw/src/include/hw/spec/x86_64/cpu.h @@ -1,11 +1,12 @@ /* * \brief x86_64 CPU definitions * \author Stefan Kalkowski + * \author Benjamin Lamowski * \date 2017-04-07 */ /* - * Copyright (C) 2017 Genode Labs GmbH + * Copyright (C) 2017-2024 Genode Labs GmbH * * This file is part of the Genode OS framework, which is distributed * under the terms of the GNU Affero General Public License version 3. @@ -130,16 +131,178 @@ struct Hw::X86_64_cpu ); X86_64_MSR_REGISTER(Ia32_efer, 0xC0000080, - struct Svme : Bitfield< 12, 1> { }; /* Secure Virtual Machine Enable */ + struct Lme : Bitfield< 8, 1> { }; /* Long Mode Enable */ + struct Lma : Bitfield<10, 1> { }; /* Long Mode Active */ + struct Svme : Bitfield<12, 1> { }; /* Secure Virtual Machine Enable */ + ); + + /* Map of BASE Address of FS */ + X86_64_MSR_REGISTER(Ia32_fs_base, 0xC0000100); + + /* Map of BASE Address of GS */ + X86_64_MSR_REGISTER(Ia32_gs_base, 0xC0000101); + + /* System Call Target Address */ + X86_64_MSR_REGISTER(Ia32_star, 0xC0000081); + + /* IA-32e Mode System Call Target Address */ + X86_64_MSR_REGISTER(Ia32_lstar, 0xC0000082); + + /* IA-32e Mode System Call Target Address */ + X86_64_MSR_REGISTER(Ia32_cstar, 0xC0000083); + + /* System Call Flag Mask */ + X86_64_MSR_REGISTER(Ia32_fmask, 0xC0000084); + + /* Swap Target of BASE Address of GS */ + X86_64_MSR_REGISTER(Ia32_kernel_gs_base, 0xC0000102); + + /* See Vol. 4, Table 2-2 of the Intel SDM */ + X86_64_MSR_REGISTER(Ia32_feature_control, 0x3A, + struct Lock : Bitfield< 0, 0> { }; /* VMX Lock */ + struct Vmx_no_smx : Bitfield< 2, 2> { }; /* Enable VMX outside SMX */ ); /* * Auxiliary TSC register - * For details, see Vol. 3B of the Intel SDM: - * 17.17.2 IA32_TSC_AUX Register and RDTSCP Support + * For details, see Vol. 3B of the Intel SDM (September 2023): + * 18.17.2 IA32_TSC_AUX Register and RDTSCP Support */ X86_64_MSR_REGISTER(Ia32_tsc_aux, 0xc0000103); + /* + * Reporting Register of Basic VMX Capabilities + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.1 Basic VMX Information + */ + X86_64_MSR_REGISTER(Ia32_vmx_basic, 0x480, + struct Rev : Bitfield< 0,31> { }; /* VMCS revision */ + struct Clear_controls : Bitfield<55, 1> { }; /* VMCS controls may be cleared, see A.2 */ + ); + + /* + * Capability Reporting Register of Pin-Based VM-Execution Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.1 Pin-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_pinbased_ctls, 0x481, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Pin-Based VM-Execution Flex Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.1 Pin-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_true_pinbased_ctls, 0x48D, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Primary Processor-Based VM-Execution Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.2 Primary Processor-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_procbased_ctls, 0x482, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Primary Processor-Based VM-Execution Flex Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.2 Primary Processor-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_true_procbased_ctls, 0x48E, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Primary VM-Exit Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.4.1 Primary VM-Exit Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_exit_ctls, 0x483, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of VM-Exit Flex Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.4.1 Primary VM-Exit Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_true_exit_ctls, 0x48F, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of VM-Entry Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.5 VM-Entry Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_entry_ctls, 0x484, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of VM-Entry Flex Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.5 VM-Entry Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_true_entry_ctls, 0x490, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of Secondary Processor-Based VM-Execution Controls + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.3.3 Secondary Processor-Based VM-Execution Controls + */ + X86_64_MSR_REGISTER(Ia32_vmx_procbased_ctls2, 0x48B, + struct Allowed_0_settings : Bitfield< 0,32> { }; /* allowed 0-settings */ + struct Allowed_1_settings : Bitfield<32,32> { }; /* allowed 1-settings */ + ); + + /* + * Capability Reporting Register of CR0 Bits Fixed to 0 + * [sic] in fact, bits reported here need to be 1 + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.7 VMX-Fixed Bits in CR0 + */ + X86_64_MSR_REGISTER(Ia32_vmx_cr0_fixed0, 0x486); + + /* + * Capability Reporting Register of CR0 Bits Fixed to 1 + * [sic] in fact, bits *NOT* reported here need to be 0 + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.7 VMX-Fixed Bits in CR0 + */ + X86_64_MSR_REGISTER(Ia32_vmx_cr0_fixed1, 0x487); + + /* + * Capability Reporting Register of CR5 Bits Fixed to 0 + * [sic] in fact, bits reported here need to be 1 + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.8 VMX-Fixed Bits in CR4 + */ + X86_64_MSR_REGISTER(Ia32_vmx_cr4_fixed0, 0x488); + + /* + * Capability Reporting Register of CR4 Bits Fixed to 1 + * [sic] in fact, bits *NOT* reported here need to be 0 + * For details, see Vol. 3D of the Intel SDM (September 2023): + * A.8 VMX-Fixed Bits in CR4 + */ + X86_64_MSR_REGISTER(Ia32_vmx_cr4_fixed1, 0x489); + + X86_64_CPUID_REGISTER(Cpuid_0_eax, 0, eax); X86_64_CPUID_REGISTER(Cpuid_0_ebx, 0, ebx); X86_64_CPUID_REGISTER(Cpuid_0_ecx, 0, ecx); @@ -148,6 +311,7 @@ struct Hw::X86_64_cpu X86_64_CPUID_REGISTER(Cpuid_1_eax, 1, eax); X86_64_CPUID_REGISTER(Cpuid_1_ecx, 1, ecx, + struct Vmx : Bitfield< 5, 1> { }; struct Tsc_deadline : Bitfield<24, 1> { }; ); @@ -155,9 +319,6 @@ struct Hw::X86_64_cpu struct Pat : Bitfield<16, 1> { }; ); - /* Number of address space identifiers (ASID) */ - X86_64_CPUID_REGISTER(Amd_nasid, 0x8000000A, ebx); - X86_64_CPUID_REGISTER(Cpuid_15_eax, 15, eax); X86_64_CPUID_REGISTER(Cpuid_15_ebx, 15, ebx); X86_64_CPUID_REGISTER(Cpuid_15_ecx, 15, ecx); diff --git a/repos/base-hw/src/include/hw/spec/x86_64/x86_64.h b/repos/base-hw/src/include/hw/spec/x86_64/x86_64.h index 822edca490..c3a53eacb9 100644 --- a/repos/base-hw/src/include/hw/spec/x86_64/x86_64.h +++ b/repos/base-hw/src/include/hw/spec/x86_64/x86_64.h @@ -1,11 +1,12 @@ /* * \brief Definitions common to all x86_64 CPUs * \author Stefan Kalkowski + * \author Benjamin Lamowski * \date 2017-04-10 */ /* - * Copyright (C) 2017 Genode Labs GmbH + * Copyright (C) 2017-2024 Genode Labs GmbH * * This file is part of the Genode OS framework, which is distributed * under the terms of the GNU Affero General Public License version 3. @@ -299,6 +300,25 @@ struct Hw::Virtualization_support return false; } + + static bool has_vmx() + { + if (Hw::Vendor::get_vendor_id() != Hw::Vendor::INTEL) + return false; + + Cpu::Cpuid_1_ecx::access_t ecx = Cpu::Cpuid_1_ecx::read(); + if (!Cpu::Cpuid_1_ecx::Vmx::get(ecx)) + return false; + + /* Check if VMX feature is off and locked */ + Cpu::Ia32_feature_control::access_t feature_control = + Cpu::Ia32_feature_control::read(); + if (!Cpu::Ia32_feature_control::Vmx_no_smx::get(feature_control) && + Cpu::Ia32_feature_control::Lock::get(feature_control)) + return false; + + return true; + } }; #endif /* _SRC__LIB__HW__SPEC__X86_64__X86_64_H_ */ diff --git a/repos/base/include/spec/x86_64/cpu/cpu_state.h b/repos/base/include/spec/x86_64/cpu/cpu_state.h index 5002e93d3e..99ca5c8901 100644 --- a/repos/base/include/spec/x86_64/cpu/cpu_state.h +++ b/repos/base/include/spec/x86_64/cpu/cpu_state.h @@ -4,13 +4,14 @@ * \author Christian Prochaska * \author Reto Buerki * \author Stefan Kalkowski + * \author Benjamin Lamowski * \date 2011-04-15 * * This file contains the x86_64-specific part of the CPU state. */ /* - * Copyright (C) 2011-2017 Genode Labs GmbH + * Copyright (C) 2011-2024 Genode Labs GmbH * * This file is part of the Genode OS framework, which is distributed * under the terms of the GNU Affero General Public License version 3. @@ -33,6 +34,7 @@ struct Genode::Cpu_state NO_MATH_COPROC = 0x07, GENERAL_PROTECTION = 0x0d, PAGE_FAULT = 0x0e, + ALIGNMENT_CHECK = 0x11, SUPERVISOR_CALL = 0x80, INTERRUPTS_START = 0x20, RESET = 0xfe,