sel4/x86: implement vm_session interface

Issue #3111

- enable vt-x in kernel configuration

Kernel patches:

- add unrestricted guest support
- avoid kernel boot failure when vt-x is not available
- avoid nullpointer in kernel when vcpu is not fully setup
- avoid vcpu scheduling bug which causes starvation on same/below prio level
- save efer register correctly from guest
This commit is contained in:
Alexander Boettcher 2018-09-26 11:00:01 +02:00 committed by Norman Feske
parent b75307b070
commit ef130a3bf9
35 changed files with 1759 additions and 65 deletions

View File

@ -0,0 +1,14 @@
#
# \brief Portions of base library shared by core and non-core processes
# \author Norman Feske
# \date 2013-02-14
#
include $(BASE_DIR)/lib/mk/base-common.inc
LIBS += startup-sel4 syscall-sel4
SRC_CC += rpc_dispatch_loop.cc
SRC_CC += thread.cc thread_myself.cc thread_bootstrap.cc
SRC_CC += capability.cc capability_raw.cc
SRC_CC += stack_area_addr.cc

View File

@ -4,11 +4,4 @@
# \date 2013-02-14 # \date 2013-02-14
# #
include $(BASE_DIR)/lib/mk/base-common.inc include $(REP_DIR)/lib/mk/base-sel4-common.inc
LIBS += startup-sel4 syscall-sel4
SRC_CC += rpc_dispatch_loop.cc
SRC_CC += thread.cc thread_myself.cc thread_bootstrap.cc
SRC_CC += capability.cc capability_raw.cc
SRC_CC += stack_area_addr.cc

View File

@ -0,0 +1,9 @@
#
# \brief Portions of base library shared by core and non-core processes
# \author Norman Feske
# \date 2013-02-14
#
vpath vm_session.cc $(REP_DIR)/src/lib/base/x86
include $(REP_DIR)/lib/mk/base-sel4-common.inc

View File

@ -1,18 +1,20 @@
GEN_SRC_CC = \ GEN_SRC_CC = \
spec/x86/platform_services.cc \
spec/x86/io_port_session_component.cc spec/x86/io_port_session_component.cc
REP_SRC_CC = \ REP_SRC_CC = \
spec/x86/io_port_session_support.cc \ spec/x86/io_port_session_support.cc \
spec/x86/irq.cc \ spec/x86/irq.cc \
spec/x86/platform_services.cc \
spec/x86/platform_thread.cc \ spec/x86/platform_thread.cc \
spec/x86/vm_space.cc \ spec/x86/vm_space.cc \
spec/x86/vm_session_component.cc \
spec/x86_32/boot_info.cc \ spec/x86_32/boot_info.cc \
spec/x86_32/platform.cc \ spec/x86_32/platform.cc \
spec/x86_32/platform_pd.cc \ spec/x86_32/platform_pd.cc \
spec/x86_32/thread.cc \ spec/x86_32/thread.cc \
spec/x86_32/vm_space.cc spec/x86_32/vm_space.cc
INC_DIR += $(REP_DIR)/src/core/spec/x86_32
INC_DIR += $(REP_DIR)/src/core/spec/x86 INC_DIR += $(REP_DIR)/src/core/spec/x86
include $(REP_DIR)/lib/mk/core-sel4.inc include $(REP_DIR)/lib/mk/core-sel4.inc

View File

@ -5,6 +5,6 @@ SEL4_ARCH := ia32
PLAT_BOARD := /$(SEL4_ARCH) PLAT_BOARD := /$(SEL4_ARCH)
SEL4_WORDBITS := 32 SEL4_WORDBITS := 32
ARCH_INCLUDES := exIPC.h ARCH_INCLUDES := exIPC.h vmenter.h
include $(REP_DIR)/lib/mk/syscall-sel4.inc include $(REP_DIR)/lib/mk/syscall-sel4.inc

View File

@ -1,18 +1,20 @@
GEN_SRC_CC = \ GEN_SRC_CC = \
spec/x86/platform_services.cc \
spec/x86/io_port_session_component.cc spec/x86/io_port_session_component.cc
REP_SRC_CC = \ REP_SRC_CC = \
spec/x86/io_port_session_support.cc \ spec/x86/io_port_session_support.cc \
spec/x86/irq.cc \ spec/x86/irq.cc \
spec/x86/platform_services.cc \
spec/x86/platform_thread.cc \ spec/x86/platform_thread.cc \
spec/x86/vm_space.cc \ spec/x86/vm_space.cc \
spec/x86/vm_session_component.cc \
spec/x86_64/boot_info.cc \ spec/x86_64/boot_info.cc \
spec/x86_64/platform.cc \ spec/x86_64/platform.cc \
spec/x86_64/platform_pd.cc \ spec/x86_64/platform_pd.cc \
spec/x86_64/thread.cc \ spec/x86_64/thread.cc \
spec/x86_64/vm_space.cc spec/x86_64/vm_space.cc
INC_DIR += $(REP_DIR)/src/core/spec/x86_64
INC_DIR += $(REP_DIR)/src/core/spec/x86 INC_DIR += $(REP_DIR)/src/core/spec/x86
include $(REP_DIR)/lib/mk/core-sel4.inc include $(REP_DIR)/lib/mk/core-sel4.inc

View File

@ -5,7 +5,7 @@ SEL4_ARCH := x86_64
PLAT_BOARD := /$(SEL4_ARCH) PLAT_BOARD := /$(SEL4_ARCH)
SEL4_WORDBITS := 64 SEL4_WORDBITS := 64
ARCH_INCLUDES := exIPC.h ARCH_INCLUDES := exIPC.h vmenter.h
SEL4_ARCH_INCLUDES := syscalls_syscall.h SEL4_ARCH_INCLUDES := syscalls_syscall.h
include $(REP_DIR)/lib/mk/syscall-sel4.inc include $(REP_DIR)/lib/mk/syscall-sel4.inc

View File

@ -32,7 +32,7 @@
#define CONFIG_NUM_PRIORITIES 256 #define CONFIG_NUM_PRIORITIES 256
#define CONFIG_TESTPRINTER_REGEX ".*" #define CONFIG_TESTPRINTER_REGEX ".*"
#define CONFIG_APP_SEL4TEST 1 #define CONFIG_APP_SEL4TEST 1
@@ -96,6 +96,11 @@ @@ -96,6 +96,13 @@
#define CONFIG_CACHE_LN_SZ 64 #define CONFIG_CACHE_LN_SZ 64
#define CONFIG_LIB_SEL4_MUSLC_SYS_MORECORE_BYTES 1048576 #define CONFIG_LIB_SEL4_MUSLC_SYS_MORECORE_BYTES 1048576
#define CONFIG_BUILDSYS_USE_CCACHE 1 #define CONFIG_BUILDSYS_USE_CCACHE 1
@ -41,6 +41,8 @@
+#define CONFIG_PRINTING 1 +#define CONFIG_PRINTING 1
+#define CONFIG_ENABLE_BENCHMARKS 1 +#define CONFIG_ENABLE_BENCHMARKS 1
+#define CONFIG_BENCHMARK_TRACK_UTILISATION 1 +#define CONFIG_BENCHMARK_TRACK_UTILISATION 1
+#define CONFIG_VTX 1
+#define CONFIG_MAX_VPIDS 64
#else #else
#define AUTOCONF_INCLUDED #define AUTOCONF_INCLUDED
#define CONFIG_LIB_SEL4_SIMPLE 1 #define CONFIG_LIB_SEL4_SIMPLE 1

View File

@ -42,7 +42,7 @@
#define CONFIG_NUM_PRIORITIES 256 #define CONFIG_NUM_PRIORITIES 256
#define CONFIG_TESTPRINTER_REGEX ".*" #define CONFIG_TESTPRINTER_REGEX ".*"
#define CONFIG_APP_SEL4TEST 1 #define CONFIG_APP_SEL4TEST 1
@@ -189,9 +187,14 @@ @@ -189,9 +187,16 @@
#define CONFIG_LIBSEL4DEBUG_ALLOC_BUFFER_ENTRIES 128 #define CONFIG_LIBSEL4DEBUG_ALLOC_BUFFER_ENTRIES 128
#define CONFIG_CACHE_LN_SZ 64 #define CONFIG_CACHE_LN_SZ 64
#define CONFIG_ARCH_X86_64 1 #define CONFIG_ARCH_X86_64 1
@ -58,4 +58,6 @@
+#define CONFIG_PRINTING 1 +#define CONFIG_PRINTING 1
+#define CONFIG_ENABLE_BENCHMARKS 1 +#define CONFIG_ENABLE_BENCHMARKS 1
+#define CONFIG_BENCHMARK_TRACK_UTILISATION 1 +#define CONFIG_BENCHMARK_TRACK_UTILISATION 1
+#define CONFIG_VTX 1
+#define CONFIG_MAX_VPIDS 64
#endif /* ARCH_IA32 */ #endif /* ARCH_IA32 */

View File

@ -0,0 +1,9 @@
--- src/kernel/sel4/src/arch/x86/object/vcpu.c
+++ src/kernel/sel4/src/arch/x86/object/vcpu.c
@@ -340,6 +340,7 @@ init_vtx_fixed_values(bool_t useTrueMsrs)
secondary_control_high |= secondary_control_mask;
exit_control_high |= exit_control_mask;
+ entry_control_high |= BIT(15);
return true;
}

View File

@ -0,0 +1,28 @@
--- src/kernel/sel4/src/arch/x86/object/vcpu.c
+++ src/kernel/sel4/src/arch/x86/object/vcpu.c
@@ -377,6 +379,12 @@ check_vtx_fixed_values(bool_t useTrueMsrs)
uint32_t local_cr4_high = x86_rdmsr_low(IA32_VMX_CR4_FIXED0_MSR);
uint32_t local_cr4_low = x86_rdmsr_low(IA32_VMX_CR4_FIXED1_MSR);
+ /* if UG was switched on on boot CPU, do it also on all other CPUs */
+ if (secondary_control_low & BIT(7)) {
+ local_secondary_control_high &= BIT(7);
+ local_cr0_high &= ~(BIT(31) | BIT(0));
+ }
+
/* We want to check that any bits that there are no bits that this core
* requires to be high, that the BSP did not require to be high. This can
* be checked with 'local_high & high == local_high'.
@@ -1067,6 +1075,12 @@ vtx_init(void)
printf("vt-x: lack of required features\n");
return false;
}
+
+ /* enable unrestricted guest support if available */
+ if (secondary_control_low & BIT(7)) {
+ secondary_control_high |= BIT(7);
+ cr0_high &= ~(BIT(31) | BIT(0));
+ }
}
if (!check_vtx_fixed_values(vmx_basic_msr_get_true_msrs(vmx_basic))) {
printf("vt-x: cores have inconsistent features\n");

View File

@ -0,0 +1,85 @@
--- src/kernel/sel4/include/arch/x86/arch/object/vcpu.h
+++ src/kernel/sel4/include/arch/x86/arch/object/vcpu.h
@@ -322,7 +322,7 @@ unverified_compile_assert(vcpu_fpu_state_alignment_valid,
/* Initializes a VCPU object with default values. A VCPU object that is not inititlized
* must not be run/loaded with vmptrld */
-void vcpu_init(vcpu_t *vcpu);
+bool_t vcpu_init(vcpu_t *vcpu);
/* Cleans up the VCPU object such that its memory can be freed */
void vcpu_finalise(vcpu_t *vcpu);
--- src/kernel/sel4/src/arch/x86/kernel/boot.c
+++ src/kernel/sel4/src/arch/x86/kernel/boot.c
@@ -582,9 +582,7 @@ init_cpu(
#ifdef CONFIG_VTX
/* initialise Intel VT-x extensions */
- if (!vtx_init()) {
- return false;
- }
+ vtx_init();
#endif
return true;
--- src/kernel/sel4/src/arch/x86/object/objecttype.c
+++ src/kernel/sel4/src/arch/x86/object/objecttype.c
@@ -476,7 +476,8 @@ Arch_createObject(object_t t, void *regionBase, word_t userSize, bool_t deviceMe
case seL4_X86_VCPUObject: {
vcpu_t *vcpu;
vcpu = VCPU_PTR((word_t)regionBase);
- vcpu_init(vcpu);
+ if (!vcpu_init(vcpu))
+ return cap_null_cap_new();
return cap_vcpu_cap_new(VCPU_REF(vcpu));
}
case seL4_X86_EPTPML4Object:
--- src/kernel/sel4/src/arch/x86/object/vcpu.c
+++ src/kernel/sel4/src/arch/x86/object/vcpu.c
@@ -409,9 +416,14 @@ applyFixedBits(uint32_t original, uint32_t high, uint32_t low)
return original;
}
-void
+static bool_t vcpu_support_available = false;
+
+bool_t
vcpu_init(vcpu_t *vcpu)
{
+ if (!vcpu_support_available)
+ return false;
+
vcpu->vcpuTCB = NULL;
vcpu->launched = false;
@@ -481,6 +493,8 @@ vcpu_init(vcpu_t *vcpu)
memset(vcpu->io, ~(word_t)0, VCPU_IOBITMAP_SIZE);
vmwrite(VMX_CONTROL_IOA_ADDRESS, pptr_to_paddr(vcpu->io));
vmwrite(VMX_CONTROL_IOB_ADDRESS, pptr_to_paddr((char *)vcpu->io + (VCPU_IOBITMAP_SIZE / 2)));
+
+ return true;
}
static void
@@ -1067,6 +1081,12 @@ vtx_init(void)
printf("vt-x: lack of required features\n");
return false;
}
+
+ /* enable unrestricted guest support if available */
+ if (secondary_control_low & BIT(7)) {
+ secondary_control_high |= BIT(7);
+ cr0_high &= ~(BIT(31) | BIT(0));
+ }
}
if (!check_vtx_fixed_values(vmx_basic_msr_get_true_msrs(vmx_basic))) {
printf("vt-x: cores have inconsistent features\n");
@@ -1109,6 +1129,8 @@ vtx_init(void)
return false;
}
+ vcpu_support_available = true;
+
return true;
}

View File

@ -0,0 +1,27 @@
--- src/kernel/sel4/src/kernel/thread.c
+++ src/kernel/sel4/src/kernel/thread.c
@@ -437,8 +437,11 @@ scheduleTCB(tcb_t *tptr)
void
timerTick(void)
{
- if (likely(thread_state_get_tsType(NODE_STATE(ksCurThread)->tcbState) ==
- ThreadState_Running)) {
+ switch (thread_state_get_tsType(NODE_STATE(ksCurThread)->tcbState)) {
+ case ThreadState_Running:
+#ifdef CONFIG_VTX
+ case ThreadState_RunningVM:
+#endif
if (NODE_STATE(ksCurThread)->tcbTimeSlice > 1) {
NODE_STATE(ksCurThread)->tcbTimeSlice--;
} else {
@@ -446,6 +449,10 @@ timerTick(void)
SCHED_APPEND_CURRENT_TCB;
rescheduleRequired();
}
+ break;
+ default:
+ /* no tick updates */
+ break;
}
if (CONFIG_NUM_DOMAINS > 1) {

View File

@ -0,0 +1,13 @@
Bug detected by Genode and patch provided by Anna Lyons:
http://sel4.systems/pipermail/devel/2018-September/002161.html
--- src/kernel/sel4/src/arch/x86/c_traps.c
+++ src/kernel/sel4/src/arch/x86/c_traps.c
@@ -111,7 +111,7 @@ slowpath(syscall_t syscall)
{
#ifdef CONFIG_VTX
- if (syscall == SysVMEnter) {
+ if (syscall == SysVMEnter && NODE_STATE(ksCurThread)->tcbArch.tcbVCPU) {
vcpu_update_state_sysvmenter(NODE_STATE(ksCurThread)->tcbArch.tcbVCPU);
if (NODE_STATE(ksCurThread)->tcbBoundNotification && notification_ptr_get_state(NODE_STATE(ksCurThread)->tcbBoundNotification) == NtfnState_Active) {
completeSignal(NODE_STATE(ksCurThread)->tcbBoundNotification, NODE_STATE(ksCurThread));

View File

@ -1 +1 @@
876d7aaf232c1b7dd548dd2b18474bb91f19ac23 8518d37c4b819daba60648ca23a739ccf1f55460

View File

@ -123,6 +123,8 @@ class Genode::Platform_pd : public Address_space
size_t cspace_size_log2() { return CSPACE_SIZE_LOG2; } size_t cspace_size_log2() { return CSPACE_SIZE_LOG2; }
bool install_mapping(Mapping const &mapping, const char * thread_name); bool install_mapping(Mapping const &mapping, const char * thread_name);
static Bit_allocator<1024> &pd_id_alloc();
}; };
#endif /* _CORE__INCLUDE__PLATFORM_PD_H_ */ #endif /* _CORE__INCLUDE__PLATFORM_PD_H_ */

View File

@ -66,6 +66,8 @@ class Genode::Platform_thread : public List<Platform_thread>::Element
Cap_sel _fault_handler_sel { 0 }; Cap_sel _fault_handler_sel { 0 };
Cap_sel _ep_sel { 0 }; Cap_sel _ep_sel { 0 };
Cap_sel _lock_sel { 0 }; Cap_sel _lock_sel { 0 };
Cap_sel _vcpu_sel { 0 };
Cap_sel _vcpu_notify_sel { 0 };
friend class Platform_pd; friend class Platform_pd;
@ -188,6 +190,8 @@ class Genode::Platform_thread : public List<Platform_thread>::Element
Cap_sel tcb_sel() const { return _info.tcb_sel; } Cap_sel tcb_sel() const { return _info.tcb_sel; }
bool install_mapping(Mapping const &mapping); bool install_mapping(Mapping const &mapping);
void setup_vcpu(Cap_sel ept, Cap_sel notification);
}; };
#endif /* _CORE__INCLUDE__PLATFORM_THREAD_H_ */ #endif /* _CORE__INCLUDE__PLATFORM_THREAD_H_ */

View File

@ -32,8 +32,10 @@ namespace Genode {
Cap_sel tcb_sel { 0 }; Cap_sel tcb_sel { 0 };
Cap_sel ep_sel { 0 }; Cap_sel ep_sel { 0 };
Cap_sel lock_sel { 0 }; Cap_sel lock_sel { 0 };
Cap_sel vcpu_sel { 0 };
addr_t ipc_buffer_phys = 0; addr_t ipc_buffer_phys { 0 };
addr_t vcpu_state_phys { 0 };
inline void write_thread_info_to_ipc_buffer(Cap_sel pd_ep_sel); inline void write_thread_info_to_ipc_buffer(Cap_sel pd_ep_sel);
@ -44,6 +46,7 @@ namespace Genode {
inline void init(addr_t const utcb_virt_addr, unsigned const prio); inline void init(addr_t const utcb_virt_addr, unsigned const prio);
inline void destruct(); inline void destruct();
bool init_vcpu(Platform &, Cap_sel ept);
}; };
/** /**
@ -129,6 +132,11 @@ void Genode::Thread_info::destruct()
seL4_CNode_Delete(seL4_CapInitThreadCNode, tcb_sel.value(), 32); seL4_CNode_Delete(seL4_CapInitThreadCNode, tcb_sel.value(), 32);
platform_specific().core_sel_alloc().free(tcb_sel); platform_specific().core_sel_alloc().free(tcb_sel);
} }
if (vcpu_sel.value()) {
/* XXX free 16k memory */
seL4_CNode_Delete(seL4_CapInitThreadCNode, vcpu_sel.value(), 32);
platform_specific().core_sel_alloc().free(vcpu_sel);
}
if (ipc_buffer_phys) { if (ipc_buffer_phys) {
Platform &platform = platform_specific(); Platform &platform = platform_specific();

View File

@ -0,0 +1,110 @@
/*
* \brief Core-specific instance of the VM session interface
* \author Alexander Boettcher
* \date 2018-08-26
*/
/*
* Copyright (C) 2018 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
#ifndef _CORE__VM_SESSION_COMPONENT_H_
#define _CORE__VM_SESSION_COMPONENT_H_
/* Genode includes */
#include <base/allocator_guard.h>
#include <base/rpc_server.h>
#include <vm_session/vm_session.h>
namespace Genode { class Vm_session_component; }
class Genode::Vm_session_component
:
private Ram_quota_guard,
private Cap_quota_guard,
public Rpc_object<Vm_session, Vm_session_component>
{
private:
class Vcpu : public Genode::List<Vcpu>::Element
{
private:
Constrained_ram_allocator &_ram_alloc;
Ram_dataspace_capability _ds_cap;
Cap_sel _notification { 0 };
Vm_session::Vcpu_id _vcpu_id;
void _free_up();
public:
Vcpu(Constrained_ram_allocator &, Cap_quota_guard &, Vcpu_id,
seL4_Untyped);
~Vcpu() { _free_up(); }
Dataspace_capability ds_cap() const { return _ds_cap; }
bool match(Vcpu_id id) const { return id.id == _vcpu_id.id; }
void signal() const { seL4_Signal(_notification.value()); }
Cap_sel notification_cap() const { return _notification; }
};
Rpc_entrypoint &_ep;
Constrained_ram_allocator _constrained_md_ram_alloc;
Sliced_heap _sliced_heap;
List<Vcpu> _vcpus { };
unsigned _id_alloc { 0 };
unsigned _pd_id { 0 };
Cap_sel _vm_page_table;
Page_table_registry _page_table_registry { _sliced_heap };
Vm_space _vm_space;
struct {
addr_t _phys;
seL4_Untyped _service;
} _ept { 0, 0 };
struct {
addr_t _phys;
seL4_Untyped _service;
} _notifications { 0, 0 };
Vcpu * _lookup(Vcpu_id const vcpu_id)
{
for (Vcpu * vcpu = _vcpus.first(); vcpu; vcpu = vcpu->next())
if (vcpu->match(vcpu_id)) return vcpu;
return nullptr;
}
protected:
Ram_quota_guard &_ram_quota_guard() { return *this; }
Cap_quota_guard &_cap_quota_guard() { return *this; }
public:
using Ram_quota_guard::upgrade;
using Cap_quota_guard::upgrade;
Vm_session_component(Rpc_entrypoint &, Resources, Label const &,
Diag, Ram_allocator &ram, Region_map &);
~Vm_session_component();
/**************************
** Vm session interface **
**************************/
Dataspace_capability _cpu_state(Vcpu_id);
void _exception_handler(Signal_context_capability, Vcpu_id) {}
void _run(Vcpu_id) {}
void _pause(Vcpu_id);
void attach(Dataspace_capability, addr_t) override;
void attach_pic(addr_t) override {}
void detach(addr_t, size_t) override {}
void _create_vcpu(Thread_capability);
};
#endif /* _CORE__VM_SESSION_COMPONENT_H_ */

View File

@ -157,7 +157,8 @@ class Genode::Vm_space
addr_t _idx_to_sel(addr_t idx) const { return (_id << 20) | idx; } addr_t _idx_to_sel(addr_t idx) const { return (_id << 20) | idx; }
void _flush(bool const flush_support) template <typename FN>
void _flush(bool const flush_support, FN const &fn)
{ {
if (!flush_support) { if (!flush_support) {
warning("mapping cache full, but can't flush"); warning("mapping cache full, but can't flush");
@ -167,38 +168,18 @@ class Genode::Vm_space
warning("flush page table entries - mapping cache full - PD: ", warning("flush page table entries - mapping cache full - PD: ",
_pd_label.string()); _pd_label.string());
_page_table_registry.flush_pages([&] (Cap_sel const &idx, _page_table_registry.flush_pages(fn);
addr_t const v_addr)
{
/* XXX - INITIAL_IPC_BUFFER can't be re-mapped currently */
if (v_addr == 0x1000)
return false;
/* XXX - UTCB can't be re-mapped currently */
if (stack_area_virtual_base() <= v_addr
&& (v_addr < stack_area_virtual_base() +
stack_area_virtual_size())
&& !((v_addr + 0x1000) & (stack_virtual_size() - 1)))
return false;
long err = _unmap_page(idx);
if (err != seL4_NoError)
error("unmap failed, idx=", idx, " res=", err);
_leaf_cnode(idx.value()).remove(_leaf_cnode_entry(idx.value()));
_sel_alloc.free(idx.value());
return true;
});
} }
bool _map_frame(addr_t const from_phys, addr_t const to_virt, template <typename FN>
bool _map_frame(addr_t const from_phys, addr_t const to_dest,
Cache_attribute const cacheability, Cache_attribute const cacheability,
bool const writable, bool const executable, bool const writable, bool const executable,
bool const flush_support) bool const flush_support, bool guest, FN const &fn)
{ {
if (_page_table_registry.page_frame_at(to_virt)) { if (_page_table_registry.page_frame_at(to_dest)) {
/* /*
* Valid behaviour if multiple threads concurrently * Valid behaviour if multiple threads concurrently
* causing the same page-fault. For the first thread the * causing the same page-fault. For the first thread the
@ -215,7 +196,7 @@ class Genode::Vm_space
catch (Selector_allocator::Out_of_indices) { catch (Selector_allocator::Out_of_indices) {
/* free all page-table-entry selectors and retry once */ /* free all page-table-entry selectors and retry once */
_flush(flush_support); _flush(flush_support, fn);
pte_idx = _sel_alloc.alloc(); pte_idx = _sel_alloc.alloc();
} }
@ -230,22 +211,22 @@ class Genode::Vm_space
Cnode_index(_leaf_cnode_entry(pte_idx))); Cnode_index(_leaf_cnode_entry(pte_idx)));
/* remember relationship between pte_sel and the virtual address */ /* remember relationship between pte_sel and the virtual address */
try { _page_table_registry.insert_page_frame(to_virt, Cap_sel(pte_idx)); } try { _page_table_registry.insert_page_frame(to_dest, Cap_sel(pte_idx)); }
catch (Page_table_registry::Mapping_cache_full) { catch (Page_table_registry::Mapping_cache_full) {
/* free all entries of mapping cache and re-try once */ /* free all entries of mapping cache and re-try once */
_flush(flush_support); _flush(flush_support, fn);
_page_table_registry.insert_page_frame(to_virt, Cap_sel(pte_idx)); _page_table_registry.insert_page_frame(to_dest, Cap_sel(pte_idx));
} }
/* /*
* Insert copy of page-frame selector into page table * Insert copy of page-frame selector into page table
*/ */
long ret = _map_page(Cap_sel(pte_idx), to_virt, cacheability, long ret = _map_page(Cap_sel(pte_idx), to_dest, cacheability,
writable, executable); writable, executable, guest);
if (ret != seL4_NoError) { if (ret != seL4_NoError) {
error("seL4_*_Page_Map ", Hex(from_phys), "->", error("seL4_*_Page_Map ", Hex(from_phys), "->",
Hex(to_virt), " returned ", ret); Hex(to_dest), " returned ", ret);
return false; return false;
} }
return true; return true;
@ -256,7 +237,7 @@ class Genode::Vm_space
*/ */
long _map_page(Genode::Cap_sel const &idx, Genode::addr_t const virt, long _map_page(Genode::Cap_sel const &idx, Genode::addr_t const virt,
Cache_attribute const cacheability, bool const write, Cache_attribute const cacheability, bool const write,
bool const writable); bool const writable, bool guest);
long _unmap_page(Genode::Cap_sel const &idx); long _unmap_page(Genode::Cap_sel const &idx);
long _invalidate_page(Genode::Cap_sel const &, seL4_Word const, long _invalidate_page(Genode::Cap_sel const &, seL4_Word const,
seL4_Word const); seL4_Word const);
@ -393,6 +374,29 @@ class Genode::Vm_space
size_t const num_pages, Cache_attribute const cacheability, size_t const num_pages, Cache_attribute const cacheability,
bool const writable, bool const executable, bool flush_support) bool const writable, bool const executable, bool flush_support)
{ {
auto fn_unmap = [&] (Cap_sel const &idx, addr_t const v_addr)
{
/* XXX - INITIAL_IPC_BUFFER can't be re-mapped currently */
if (v_addr == 0x1000)
return false;
/* XXX - UTCB can't be re-mapped currently */
if (stack_area_virtual_base() <= v_addr
&& (v_addr < stack_area_virtual_base() +
stack_area_virtual_size())
&& !((v_addr + 0x1000) & (stack_virtual_size() - 1)))
return false;
long err = _unmap_page(idx);
if (err != seL4_NoError)
error("unmap failed, idx=", idx, " res=", err);
_leaf_cnode(idx.value()).remove(_leaf_cnode_entry(idx.value()));
_sel_alloc.free(idx.value());
return true;
};
Lock::Guard guard(_lock); Lock::Guard guard(_lock);
bool ok = true; bool ok = true;
@ -402,7 +406,8 @@ class Genode::Vm_space
if (_map_frame(from_phys + offset, to_virt + offset, if (_map_frame(from_phys + offset, to_virt + offset,
cacheability, writable, executable, cacheability, writable, executable,
flush_support)) flush_support, false /* host page table */,
fn_unmap))
continue; continue;
ok = false; ok = false;
@ -415,6 +420,34 @@ class Genode::Vm_space
return ok; return ok;
} }
void map_guest(addr_t const from_phys, addr_t const guest_phys,
size_t const num_pages, Cache_attribute const cacheability,
bool const writable, bool const executable, bool flush_support)
{
auto fn_unmap = [&] (Cap_sel const &idx, addr_t const) {
long err = _unmap_page(idx);
if (err != seL4_NoError)
error("unmap failed, idx=", idx, " res=", err);
_leaf_cnode(idx.value()).remove(_leaf_cnode_entry(idx.value()));
_sel_alloc.free(idx.value());
return true;
};
Lock::Guard guard(_lock);
for (size_t i = 0; i < num_pages; i++) {
off_t const offset = i << get_page_size_log2();
_map_frame(from_phys + offset, guest_phys + offset,
cacheability, writable, executable,
flush_support, true /* guest page table */,
fn_unmap);
}
}
bool unmap(addr_t const virt, size_t const num_pages, bool unmap(addr_t const virt, size_t const num_pages,
bool const invalidate = false) bool const invalidate = false)
{ {
@ -453,12 +486,20 @@ class Genode::Vm_space
void unsynchronized_alloc_page_tables(addr_t const start, void unsynchronized_alloc_page_tables(addr_t const start,
addr_t const size); addr_t const size);
void unsynchronized_alloc_guest_page_tables(addr_t, addr_t);
void alloc_page_tables(addr_t const start, addr_t const size) void alloc_page_tables(addr_t const start, addr_t const size)
{ {
Lock::Guard guard(_lock); Lock::Guard guard(_lock);
unsynchronized_alloc_page_tables(start, size); unsynchronized_alloc_page_tables(start, size);
} }
void alloc_guest_page_tables(addr_t const start, addr_t const size)
{
Lock::Guard guard(_lock);
unsynchronized_alloc_guest_page_tables(start, size);
}
Session_label const & pd_label() const { return _pd_label; } Session_label const & pd_label() const { return _pd_label; }
}; };

View File

@ -406,8 +406,11 @@ void Platform::_init_rom_modules()
xml.node("hardware", [&] () { xml.node("hardware", [&] () {
xml.node("features", [&] () { xml.node("features", [&] () {
xml.attribute("svm", false); #ifdef CONFIG_VTX
xml.attribute("vmx", true);
#else
xml.attribute("vmx", false); xml.attribute("vmx", false);
#endif
}); });
xml.node("tsc", [&] () { xml.node("tsc", [&] () {
xml.attribute("freq_khz" , boot_freq->freq_mhz * 1000UL); xml.attribute("freq_khz" , boot_freq->freq_mhz * 1000UL);

View File

@ -45,7 +45,7 @@ struct Pd_id_alloc : Bit_allocator<1024>
}; };
static Pd_id_alloc &pd_id_alloc() Bit_allocator<1024> &Platform_pd::pd_id_alloc()
{ {
static Pd_id_alloc inst; static Pd_id_alloc inst;
return inst; return inst;
@ -59,8 +59,10 @@ bool Platform_pd::bind_thread(Platform_thread &thread)
thread._fault_handler_sel = alloc_sel(); thread._fault_handler_sel = alloc_sel();
/* allocate endpoint selector in the PD's CSpace */ /* allocate endpoint selector in the PD's CSpace */
thread._ep_sel = alloc_sel(); thread._ep_sel = alloc_sel();
thread._vcpu_sel = alloc_sel();
/* allocate asynchronous selector used for locks in the PD's CSpace */ /* allocate asynchronous selector used for locks in the PD's CSpace */
thread._lock_sel = thread._utcb ? alloc_sel() : Cap_sel(INITIAL_SEL_LOCK); thread._lock_sel = thread._utcb ? alloc_sel() : Cap_sel(INITIAL_SEL_LOCK);
thread._vcpu_notify_sel = alloc_sel();
} catch (Platform_pd::Sel_bit_alloc::Out_of_indices) { } catch (Platform_pd::Sel_bit_alloc::Out_of_indices) {
if (thread._fault_handler_sel.value()) { if (thread._fault_handler_sel.value()) {
free_sel(thread._fault_handler_sel); free_sel(thread._fault_handler_sel);
@ -70,6 +72,14 @@ bool Platform_pd::bind_thread(Platform_thread &thread)
free_sel(thread._ep_sel); free_sel(thread._ep_sel);
thread._ep_sel = Cap_sel(0); thread._ep_sel = Cap_sel(0);
} }
if (thread._vcpu_sel.value()) {
free_sel(thread._vcpu_sel);
thread._vcpu_sel = Cap_sel(0);
}
if (thread._vcpu_notify_sel.value()) {
free_sel(thread._vcpu_notify_sel);
thread._vcpu_notify_sel = Cap_sel(0);
}
return false; return false;
} }
@ -103,6 +113,8 @@ void Platform_pd::unbind_thread(Platform_thread &thread)
free_sel(thread._fault_handler_sel); free_sel(thread._fault_handler_sel);
free_sel(thread._ep_sel); free_sel(thread._ep_sel);
free_sel(thread._vcpu_sel);
free_sel(thread._vcpu_notify_sel);
if (thread._utcb) if (thread._utcb)
_vm_space.unmap(thread._utcb, 1); _vm_space.unmap(thread._utcb, 1);

View File

@ -275,3 +275,19 @@ unsigned long long Platform_thread::execution_time() const
uint64_t const execution_time = values[BENCHMARK_TCB_UTILISATION]; uint64_t const execution_time = values[BENCHMARK_TCB_UTILISATION];
return execution_time; return execution_time;
} }
void Platform_thread::setup_vcpu(Cap_sel ept, Cap_sel notification)
{
if (!_info.init_vcpu(platform_specific(), ept)) {
Genode::error("creating vCPU failed");
return;
}
/* install the thread's endpoint selector to the PD's CSpace */
_pd->cspace_cnode(_vcpu_sel).copy(platform_specific().core_cnode(),
_info.vcpu_sel, _vcpu_sel);
_pd->cspace_cnode(_vcpu_notify_sel).copy(platform_specific().core_cnode(),
notification, _vcpu_notify_sel);
prepopulate_ipc_buffer(_info.ipc_buffer_phys, _vcpu_sel, _vcpu_notify_sel);
}

View File

@ -28,7 +28,8 @@ long Genode::Vm_space::_map_page(Genode::Cap_sel const &idx,
Genode::addr_t const virt, Genode::addr_t const virt,
Cache_attribute const cacheability, Cache_attribute const cacheability,
bool const writable, bool const writable,
bool const executable) bool const executable,
bool)
{ {
seL4_ARM_Page const service = _idx_to_sel(idx.value()); seL4_ARM_Page const service = _idx_to_sel(idx.value());
seL4_ARM_PageDirectory const pd = _pd_sel.value(); seL4_ARM_PageDirectory const pd = _pd_sel.value();

View File

@ -0,0 +1,35 @@
/*
* \brief Platform specific services for seL4 x86
* \author Alexander Boettcher
* \date 2018-08-26
*/
/*
* Copyright (C) 2018 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
/* core includes */
#include <core_env.h>
#include <platform_services.h>
#include <vm_root.h>
#include <io_port_root.h>
/*
* Add x86 specific services
*/
void Genode::platform_add_local_services(Rpc_entrypoint &ep,
Sliced_heap &heap,
Registry<Service> &services)
{
static Vm_root vm_root(ep, heap, core_env().ram_allocator(),
core_env().local_rm());
static Core_service<Vm_session_component> vm(services, vm_root);
static Io_port_root io_root(*core_env().pd_session(),
platform().io_port_alloc(), heap);
static Core_service<Io_port_session_component> io_port(services, io_root);
}

View File

@ -12,9 +12,40 @@
*/ */
#include <platform_thread.h> #include <platform_thread.h>
#include <arch_kernel_object.h>
using Genode::Phys_allocator;
using Genode::Allocator;
Phys_allocator& Genode::phys_alloc_16k(Allocator * core_mem_alloc)
{
static Genode::Phys_allocator phys_alloc_16k(core_mem_alloc);
return phys_alloc_16k;
}
void Genode::Platform_thread::affinity(Affinity::Location location) void Genode::Platform_thread::affinity(Affinity::Location location)
{ {
_location = location; _location = location;
seL4_TCB_SetAffinity(tcb_sel().value(), location.xpos()); seL4_TCB_SetAffinity(tcb_sel().value(), location.xpos());
} }
bool Genode::Thread_info::init_vcpu(Platform &platform, Cap_sel ept)
{
enum { PAGES_16K = (1UL << Vcpu_kobj::SIZE_LOG2) / 4096 };
this->vcpu_state_phys = Untyped_memory::alloc_pages(phys_alloc_16k(), PAGES_16K);
this->vcpu_sel = platform.core_sel_alloc().alloc();
seL4_Untyped const service = Untyped_memory::_core_local_sel(Core_cspace::TOP_CNODE_UNTYPED_16K, vcpu_state_phys, Vcpu_kobj::SIZE_LOG2).value();
create<Vcpu_kobj>(service, platform.core_cnode().sel(), vcpu_sel);
seL4_Error res = seL4_X86_VCPU_SetTCB(vcpu_sel.value(), tcb_sel.value());
if (res != seL4_NoError)
return false;
int error = seL4_TCB_SetEPTRoot(tcb_sel.value(), ept.value());
if (error != seL4_NoError)
return false;
return true;
}

View File

@ -0,0 +1,247 @@
/*
* \brief Core-specific instance of the VM session interface
* \author Alexander Boettcher
* \date 2018-08-26
*/
/*
* Copyright (C) 2018 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
/* core includes */
#include <core_env.h>
#include <vm_session_component.h>
#include <pd_session_component.h>
#include <cpu_thread_component.h>
#include <arch_kernel_object.h>
using namespace Genode;
void Vm_session_component::Vcpu::_free_up()
{
if (_ds_cap.valid())
_ram_alloc.free(_ds_cap);
if (_notification.value()) {
int ret = seL4_CNode_Delete(seL4_CapInitThreadCNode,
_notification.value(), 32);
if (ret == seL4_NoError)
platform_specific().core_sel_alloc().free(_notification);
else
Genode::error(__func__, " cnode delete error ", ret);
}
}
Vm_session_component::Vcpu::Vcpu(Constrained_ram_allocator &ram_alloc,
Cap_quota_guard &cap_alloc,
Vcpu_id const vcpu_id,
seL4_Untyped const service)
:
_ram_alloc(ram_alloc),
_ds_cap (_ram_alloc.alloc(4096, Cache_attribute::CACHED)),
_vcpu_id(vcpu_id)
{
try {
/* notification cap */
Cap_quota_guard::Reservation caps(cap_alloc, Cap_quota{1});
_notification = platform_specific().core_sel_alloc().alloc();
create<Notification_kobj>(service,
platform_specific().core_cnode().sel(),
_notification);
caps.acknowledge();
} catch (...) {
_free_up();
throw;
}
}
Vm_session_component::Vm_session_component(Rpc_entrypoint &ep,
Resources resources,
Label const &,
Diag,
Ram_allocator &ram,
Region_map &local_rm)
try
:
Ram_quota_guard(resources.ram_quota),
Cap_quota_guard(resources.cap_quota),
_ep(ep),
_constrained_md_ram_alloc(ram, _ram_quota_guard(), _cap_quota_guard()),
_sliced_heap(_constrained_md_ram_alloc, local_rm),
_pd_id(Platform_pd::pd_id_alloc().alloc()),
_vm_page_table(platform_specific().core_sel_alloc().alloc()),
_vm_space(_vm_page_table,
platform_specific().core_sel_alloc(),
platform().ram_alloc(),
platform_specific().top_cnode(),
platform_specific().core_cnode(),
platform_specific().phys_cnode(),
_pd_id, _page_table_registry, "VM")
{
Platform &platform = platform_specific();
Range_allocator &phys_alloc = platform.ram_alloc();
/* _pd_id && _vm_page_table */
Cap_quota_guard::Reservation caps(_cap_quota_guard(), Cap_quota{2});
/* ept object requires a page taken directly from core's phys_alloc */
/* notifications requires a page taken directly from core's phys_alloc */
Ram_quota_guard::Reservation ram(_ram_quota_guard(), Ram_quota{2 * 4096});
try {
_ept._phys = Untyped_memory::alloc_page(phys_alloc);
_ept._service = Untyped_memory::untyped_sel(_ept._phys).value();
create<Ept_kobj>(_ept._service, platform.core_cnode().sel(),
_vm_page_table);
} catch (...) {
throw Service_denied();
}
long ret = seL4_X86_ASIDPool_Assign(platform.asid_pool().value(),
_vm_page_table.value());
if (ret != seL4_NoError)
throw Service_denied();
try {
_notifications._phys = Untyped_memory::alloc_page(phys_alloc);
_notifications._service = Untyped_memory::untyped_sel(_notifications._phys).value();
} catch (...) {
throw Service_denied();
}
caps.acknowledge();
ram.acknowledge();
} catch (...) {
if (_notifications._service)
Untyped_memory::free_page(platform().ram_alloc(), _notifications._phys);
if (_ept._service) {
int ret = seL4_CNode_Delete(seL4_CapInitThreadCNode,
_vm_page_table.value(), 32);
if (ret == seL4_NoError)
Untyped_memory::free_page(platform().ram_alloc(), _ept._phys);
if (ret != seL4_NoError)
error(__FUNCTION__, ": could not free ASID entry, "
"leaking physical memory ", ret);
}
if (_vm_page_table.value())
platform_specific().core_sel_alloc().free(_vm_page_table);
if (_pd_id)
Platform_pd::pd_id_alloc().free(_pd_id);
throw;
}
Vm_session_component::~Vm_session_component()
{
for (;Vcpu * vcpu = _vcpus.first();) {
_vcpus.remove(vcpu);
destroy(_sliced_heap, vcpu);
}
if (_vm_page_table.value())
platform_specific().core_sel_alloc().free(_vm_page_table);
if (_pd_id)
Platform_pd::pd_id_alloc().free(_pd_id);
}
void Vm_session_component::_create_vcpu(Thread_capability cap)
{
if (!cap.valid())
return;
auto lambda = [&] (Cpu_thread_component *thread) {
if (!thread)
return;
/* allocate vCPU object */
Vcpu * vcpu = nullptr;
/* code to revert partial allocations in case of Out_of_ram/_quota */
auto free_up = [&] () { if (vcpu) destroy(_sliced_heap, vcpu); };
try {
vcpu = new (_sliced_heap) Vcpu(_constrained_md_ram_alloc,
_cap_quota_guard(),
Vcpu_id{_id_alloc},
_notifications._service);
Platform_thread &pthread = thread->platform_thread();
pthread.setup_vcpu(_vm_page_table, vcpu->notification_cap());
int ret = seL4_TCB_BindNotification(pthread.tcb_sel().value(),
vcpu->notification_cap().value());
if (ret != seL4_NoError)
throw 0;
} catch (Out_of_ram) {
free_up();
throw;
} catch (Out_of_caps) {
free_up();
throw;
} catch (...) {
Genode::error("unexpected exception occurred");
free_up();
return;
}
_vcpus.insert(vcpu);
_id_alloc++;
};
_ep.apply(cap, lambda);
}
Dataspace_capability Vm_session_component::_cpu_state(Vcpu_id const vcpu_id)
{
Vcpu * vcpu = _lookup(vcpu_id);
if (!vcpu)
return Dataspace_capability();
return vcpu->ds_cap();
}
void Vm_session_component::attach(Dataspace_capability cap, addr_t guest_phys)
{
if (!cap.valid())
throw Invalid_dataspace();
/* check dataspace validity */
_ep.apply(cap, [&] (Dataspace_component *ptr) {
if (!ptr)
throw Invalid_dataspace();
Dataspace_component &dsc = *ptr;
/* unsupported - deny otherwise arbitrary physical memory can be mapped to a VM */
if (dsc.managed())
throw Invalid_dataspace();
_vm_space.alloc_guest_page_tables(guest_phys, dsc.size());
enum { FLUSHABLE = true, EXECUTABLE = true };
_vm_space.map_guest(dsc.phys_addr(), guest_phys, dsc.size() >> 12,
dsc.cacheability(),
dsc.writable(), EXECUTABLE, FLUSHABLE);
});
}
void Vm_session_component::_pause(Vcpu_id const vcpu_id)
{
Vcpu * vcpu = _lookup(vcpu_id);
if (!vcpu)
return;
vcpu->signal();
}

View File

@ -18,7 +18,7 @@ long Genode::Vm_space::_map_page(Genode::Cap_sel const &idx,
Genode::addr_t const virt, Genode::addr_t const virt,
Cache_attribute const cacheability, Cache_attribute const cacheability,
bool const writable, bool const writable,
bool const) bool const, bool ept)
{ {
seL4_X86_Page const service = _idx_to_sel(idx.value()); seL4_X86_Page const service = _idx_to_sel(idx.value());
seL4_X86_PageDirectory const pd = _pd_sel.value(); seL4_X86_PageDirectory const pd = _pd_sel.value();
@ -31,7 +31,10 @@ long Genode::Vm_space::_map_page(Genode::Cap_sel const &idx,
if (cacheability == WRITE_COMBINED) if (cacheability == WRITE_COMBINED)
attr = seL4_X86_WriteCombining; attr = seL4_X86_WriteCombining;
return seL4_X86_Page_Map(service, pd, virt, rights, attr); if (ept)
return seL4_X86_Page_MapEPT(service, pd, virt, rights, attr);
else
return seL4_X86_Page_Map(service, pd, virt, rights, attr);
} }
long Genode::Vm_space::_unmap_page(Genode::Cap_sel const &idx) long Genode::Vm_space::_unmap_page(Genode::Cap_sel const &idx)
@ -45,3 +48,92 @@ long Genode::Vm_space::_invalidate_page(Genode::Cap_sel const &,
{ {
return seL4_NoError; return seL4_NoError;
} }
/*******
* EPT *
*******/
enum {
EPT_PAGE_TABLE_LOG2_SIZE = 21, /* 2M region */
EPT_PAGE_DIR_LOG2_SIZE = 30, /* 1GB region */
EPT_PAGE_PDPT_LOG2_SIZE = 39 /* 512GB region */
};
struct Ept_page_table_kobj
{
enum { SEL4_TYPE = seL4_X86_EPTPTObject, SIZE_LOG2 = 12 };
static char const *name() { return "ept page table"; }
};
struct Ept_page_directory_kobj
{
enum { SEL4_TYPE = seL4_X86_EPTPDObject, SIZE_LOG2 = 12 };
static char const *name() { return "ept page directory"; }
};
struct Ept_page_pointer_table_kobj
{
enum { SEL4_TYPE = seL4_X86_EPTPDPTObject, SIZE_LOG2 = 12 };
static char const *name() { return "ept page directory pointer table"; }
};
struct Ept_page_map_kobj
{
enum { SEL4_TYPE = seL4_X86_EPTPML4Object, SIZE_LOG2 = 12 };
static char const *name() { return "ept page-map level-4 table"; }
};
static long map_page_table(Genode::Cap_sel const pagetable,
Genode::Cap_sel const vroot,
Genode::addr_t const virt)
{
return seL4_X86_EPTPT_Map(pagetable.value(), vroot.value(), virt,
seL4_X86_Default_VMAttributes);
}
static long map_pdpt(Genode::Cap_sel const pdpt,
Genode::Cap_sel const vroot,
Genode::addr_t const virt)
{
return seL4_X86_EPTPDPT_Map(pdpt.value(), vroot.value(), virt,
seL4_X86_Default_VMAttributes);
}
static long map_directory(Genode::Cap_sel const pd,
Genode::Cap_sel const vroot,
Genode::addr_t const virt)
{
return seL4_X86_EPTPD_Map(pd.value(), vroot.value(), virt,
seL4_X86_Default_VMAttributes);
}
void Genode::Vm_space::unsynchronized_alloc_guest_page_tables(addr_t const start,
addr_t size)
{
addr_t constexpr PAGE_TABLE_AREA = 1UL << EPT_PAGE_TABLE_LOG2_SIZE;
addr_t virt = start & ~(PAGE_TABLE_AREA - 1);
for (; size != 0; size -= min(size, PAGE_TABLE_AREA), virt += PAGE_TABLE_AREA) {
addr_t phys = 0;
if (!_page_table_registry.page_level3_at(virt, EPT_PAGE_PDPT_LOG2_SIZE)) {
/* 512 GB range - page directory pointer table */
Cap_sel const pd = _alloc_and_map<Ept_page_pointer_table_kobj>(virt, map_pdpt, phys);
_page_table_registry.insert_page_level3(virt, pd, phys, EPT_PAGE_PDPT_LOG2_SIZE);
}
if (!_page_table_registry.page_directory_at(virt, EPT_PAGE_DIR_LOG2_SIZE)) {
/* 1 GB range - page directory */
Cap_sel const pd = _alloc_and_map<Ept_page_directory_kobj>(virt, map_directory, phys);
_page_table_registry.insert_page_directory(virt, pd, phys,
EPT_PAGE_DIR_LOG2_SIZE);
}
if (!_page_table_registry.page_table_at(virt, EPT_PAGE_TABLE_LOG2_SIZE)) {
/* 2 MB range - page table */
Cap_sel const pt = _alloc_and_map<Ept_page_table_kobj>(virt, map_page_table, phys);
_page_table_registry.insert_page_table(virt, pt, phys,
EPT_PAGE_TABLE_LOG2_SIZE);
}
}
}

View File

@ -15,9 +15,12 @@
#define _CORE__X86_32_ARCH_KERNEL_OBJECT_H_ #define _CORE__X86_32_ARCH_KERNEL_OBJECT_H_
#include <sel4/objecttype.h> #include <sel4/objecttype.h>
#include <platform.h>
namespace Genode { namespace Genode {
Phys_allocator &phys_alloc_16k(Allocator * core_mem_alloc = nullptr);
enum { enum {
PAGE_TABLE_LOG2_SIZE = 22 /* 4M region */ PAGE_TABLE_LOG2_SIZE = 22 /* 4M region */
}; };
@ -35,6 +38,17 @@ namespace Genode {
static char const *name() { return "page directory"; } static char const *name() { return "page directory"; }
}; };
struct Vcpu_kobj
{
enum { SEL4_TYPE = seL4_X86_VCPUObject, SIZE_LOG2 = 14 };
static char const *name() { return "vcpu"; }
};
struct Ept_kobj
{
enum { SEL4_TYPE = seL4_X86_EPTPML4Object, SIZE_LOG = 12 };
static char const *name() { return "ept pml4"; }
};
}; };
#endif /* _CORE__X86_32_ARCH_KERNEL_OBJECT_H_ */ #endif /* _CORE__X86_32_ARCH_KERNEL_OBJECT_H_ */

View File

@ -18,6 +18,7 @@
#include <boot_modules.h> #include <boot_modules.h>
#include <platform.h> #include <platform.h>
#include <thread_sel4.h>
#include "arch_kernel_object.h" #include "arch_kernel_object.h"
seL4_Word Genode::Untyped_memory::smallest_page_type() { return seL4_X86_4K; } seL4_Word Genode::Untyped_memory::smallest_page_type() { return seL4_X86_4K; }
@ -50,6 +51,21 @@ void Genode::Platform::_init_core_page_table_registry()
virt_addr += 1024 * get_page_size(); virt_addr += 1024 * get_page_size();
} }
/* initialize 16k memory allocator */
phys_alloc_16k(&core_mem_alloc());
/* reserve some memory for VCPUs - must be 16k */
enum { MAX_VCPU_COUNT = 16 };
addr_t const max_pd_mem = MAX_VCPU_COUNT * (1UL << Vcpu_kobj::SIZE_LOG2);
_initial_untyped_pool.turn_into_untyped_object(Core_cspace::TOP_CNODE_UNTYPED_16K,
[&] (addr_t const phys, addr_t const size, bool) {
phys_alloc_16k().add_range(phys, size);
_unused_phys_alloc.remove_range(phys, size);
},
Vcpu_kobj::SIZE_LOG2, max_pd_mem);
log(":phys_mem_16k: ", phys_alloc_16k());
/* /*
* Register initial page frames * Register initial page frames
* - actually we don't use them in core -> skip * - actually we don't use them in core -> skip
@ -70,5 +86,3 @@ void Genode::Platform::_init_core_page_table_registry()
} }
#endif #endif
} }

View File

@ -15,9 +15,12 @@
#define _CORE__X86_64_ARCH_KERNEL_OBJECT_H_ #define _CORE__X86_64_ARCH_KERNEL_OBJECT_H_
#include <sel4/objecttype.h> #include <sel4/objecttype.h>
#include <platform.h>
namespace Genode { namespace Genode {
Phys_allocator &phys_alloc_16k(Allocator * core_mem_alloc = nullptr);
enum { enum {
PAGE_TABLE_LOG2_SIZE = 21, /* 2M region */ PAGE_TABLE_LOG2_SIZE = 21, /* 2M region */
PAGE_DIR_LOG2_SIZE = 30, /* 1GB region */ PAGE_DIR_LOG2_SIZE = 30, /* 1GB region */
@ -48,6 +51,18 @@ namespace Genode {
enum { SEL4_TYPE = seL4_X64_PML4Object, SIZE_LOG2 = 12 }; enum { SEL4_TYPE = seL4_X64_PML4Object, SIZE_LOG2 = 12 };
static char const *name() { return "page-map level-4 table"; } static char const *name() { return "page-map level-4 table"; }
}; };
struct Vcpu_kobj
{
enum { SEL4_TYPE = seL4_X86_VCPUObject, SIZE_LOG2 = 14 };
static char const *name() { return "vcpu"; }
};
struct Ept_kobj
{
enum { SEL4_TYPE = seL4_X86_EPTPML4Object, SIZE_LOG = 12 };
static char const *name() { return "ept pml4"; }
};
}; };
#endif /* _CORE__X86_64_ARCH_KERNEL_OBJECT_H_ */ #endif /* _CORE__X86_64_ARCH_KERNEL_OBJECT_H_ */

View File

@ -18,8 +18,10 @@
#include <boot_modules.h> #include <boot_modules.h>
#include <platform.h> #include <platform.h>
#include <thread_sel4.h>
#include "arch_kernel_object.h" #include "arch_kernel_object.h"
seL4_Word Genode::Untyped_memory::smallest_page_type() { return seL4_X86_4K; } seL4_Word Genode::Untyped_memory::smallest_page_type() { return seL4_X86_4K; }
void Genode::Platform::init_sel4_ipc_buffer() { } void Genode::Platform::init_sel4_ipc_buffer() { }
@ -59,6 +61,22 @@ void Genode::Platform::_init_core_page_table_registry()
virt_addr += 512 * get_page_size(); virt_addr += 512 * get_page_size();
} }
/* initialize 16k memory allocator */
phys_alloc_16k(&core_mem_alloc());
/* reserve some memory for VCPUs - must be 16k */
enum { MAX_VCPU_COUNT = 16 };
addr_t const max_pd_mem = MAX_VCPU_COUNT * (1UL << Vcpu_kobj::SIZE_LOG2);
_initial_untyped_pool.turn_into_untyped_object(Core_cspace::TOP_CNODE_UNTYPED_16K,
[&] (addr_t const phys, addr_t const size, bool) {
phys_alloc_16k().add_range(phys, size);
_unused_phys_alloc.remove_range(phys, size);
},
Vcpu_kobj::SIZE_LOG2, max_pd_mem);
log(":phys_mem_16k: ", phys_alloc_16k());
/* /*
* Register initial page frames * Register initial page frames
* - actually we don't use them in core -> skip * - actually we don't use them in core -> skip

View File

@ -24,17 +24,17 @@ struct Genode::Native_utcb
* On seL4, the UTCB is called IPC buffer. We use one page * On seL4, the UTCB is called IPC buffer. We use one page
* for each IPC buffer. * for each IPC buffer.
*/ */
enum { IPC_BUFFER_SIZE = 4096 }; enum { IPC_BUFFER_SIZE = 4096, ELEMENTS = IPC_BUFFER_SIZE/sizeof(addr_t) };
addr_t _raw[IPC_BUFFER_SIZE/sizeof(addr_t)]; addr_t _raw[ELEMENTS];
Native_utcb() { }; Native_utcb() { };
addr_t ep_sel() const { return _raw[0]; } addr_t ep_sel() const { return _raw[ELEMENTS - 1]; }
addr_t lock_sel() const { return _raw[1]; } addr_t lock_sel() const { return _raw[ELEMENTS - 2]; }
void ep_sel (addr_t sel) { _raw[0] = sel; } void ep_sel (addr_t sel) { _raw[ELEMENTS - 1] = sel; }
void lock_sel(addr_t sel) { _raw[1] = sel; } void lock_sel(addr_t sel) { _raw[ELEMENTS - 2] = sel; }
}; };
#endif /* _INCLUDE__BASE__INTERNAL__NATIVE_UTCB_H_ */ #endif /* _INCLUDE__BASE__INTERNAL__NATIVE_UTCB_H_ */

View File

@ -0,0 +1,845 @@
/*
* \brief Client-side VM session interface
* \author Alexander Boettcher
* \date 2018-08-27
*/
/*
* Copyright (C) 2018 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
#include <base/allocator.h>
#include <base/env.h>
#include <base/registry.h>
#include <vm_session/client.h>
#include <cpu/vm_state.h>
#include <trace/timestamp.h>
#include <base/internal/native_thread.h>
#include <base/internal/native_utcb.h>
#include <sel4/sel4.h>
#include <sel4/arch/vmenter.h>
#include <base/internal/stack.h>
using namespace Genode;
struct Vcpu;
static Genode::Registry<Genode::Registered<Vcpu> > vcpus;
static unsigned vcpu_id = 0;
struct Vcpu : Genode::Thread
{
private:
Signal_context_capability &_signal;
Semaphore _wake_up { 0 };
Semaphore &_handler_ready;
Lock _startup { Genode::Lock::LOCKED };
Vm_session_client::Vcpu_id _id;
addr_t _state { 0 };
addr_t _recall { 0 };
uint64_t _tsc_offset { 0 };
bool _show_error_unsupported_r { true };
bool _show_error_unsupported_tpr { true };
bool _show_error_unsupported_star { true };
enum { EXIT_ON_HLT = 1U << 7, EXIT_ON_RDTSC = 1U << 12 };
addr_t const _vmcs_ctrl0 = EXIT_ON_HLT; // | EXIT_ON_RDTSC;
enum { STACK_SIZE = 0x3000 };
enum State {
NONE = 0,
PAUSE = 1,
RUN = 2
} _remote { NONE };
Lock _remote_lock { Lock::UNLOCKED };
enum {
VMEXIT_INVALID = 0x21,
VMEXIT_STARTUP = 0xfe,
VMEXIT_RECALL = 0xff
};
enum {
CR0_PE = 0, /* 1U << 0 - not needed in case of UG */
CR0_CP = 1U << 1,
CR0_NE = 1U << 5,
CR0_NM = 1U << 29,
CR0_CD = 1U << 30,
CR0_PG = 0, /* 1U << 31 - not needed in case of UG */
CR4_VMX = 1 << 13,
};
addr_t const cr0_mask = CR0_PE | CR0_CP | CR0_NE | CR0_NM | CR0_CD | CR0_PG;
addr_t const cr0_set = 0;
addr_t const cr4_mask = CR4_VMX;
addr_t const cr4_set = CR4_VMX;
void entry() override
{
/* trigger that thread is up */
_startup.unlock();
/* wait until vcpu is assigned to us */
_wake_up.down();
/* get selector to read/write VMCS */
addr_t const service = _stack->utcb().ep_sel();
/* get selector to call back a vCPU into VMM */
_recall = _stack->utcb().lock_sel();
Vm_state &state = *reinterpret_cast<Vm_state *>(_state);
state = Vm_state {};
/* wait for first user resume() */
_wake_up.down();
{
Lock::Guard guard(_remote_lock);
_remote = NONE;
}
/* initial startup VM exit to get valid VM state */
state.exit_reason = VMEXIT_STARTUP;
_read_sel4_state_async(service, state);
Genode::Signal_transmitter(_signal).submit();
_handler_ready.down();
_wake_up.down();
State local_state { NONE };
_write_vmcs(service, Vmcs::CR0_MASK, cr0_mask);
_write_vmcs(service, Vmcs::CR4_MASK, cr4_mask);
while (true) {
/* read in requested state from remote threads */
{
Lock::Guard guard(_remote_lock);
local_state = _remote;
_remote = NONE;
}
if (local_state == NONE) {
_wake_up.down();
continue;
}
if (local_state == PAUSE) {
_write_sel4_state(service, state);
seL4_Word badge = 0;
/* consume spurious notification - XXX better way ? */
seL4_SetMR(0, state.ip.value());
seL4_SetMR(1, _vmcs_ctrl0 | state.ctrl_primary.value());
seL4_SetMR(2, state.inj_info.value() & ~0x3000);
if (seL4_VMEnter(&badge) == SEL4_VMENTER_RESULT_FAULT)
Genode::error("invalid state ahead ", badge);
state = Vm_state {};
state.ip.value(seL4_GetMR(SEL4_VMENTER_CALL_EIP_MR));
state.ctrl_primary.value(seL4_GetMR(SEL4_VMENTER_CALL_CONTROL_PPC_MR));
state.inj_info.value(seL4_GetMR(SEL4_VMENTER_CALL_CONTROL_ENTRY_MR));
state.exit_reason = VMEXIT_RECALL;
_read_sel4_state_async(service, state);
/* notify VM handler */
Genode::Signal_transmitter(_signal).submit();
/*
* Wait until VM handler is really really done,
* otherwise we lose state.
*/
_handler_ready.down();
continue;
}
if (local_state != RUN) {
Genode::error("unknown vcpu state ", (int)local_state);
while (true) { _remote_lock.lock(); }
}
_write_sel4_state(service, state);
seL4_SetMR(0, state.ip.value());
seL4_SetMR(1, _vmcs_ctrl0 | state.ctrl_primary.value());
seL4_SetMR(2, state.inj_info.value() & ~0x3000);
seL4_Word badge = 0;
seL4_Word res = seL4_VMEnter(&badge);
state = Vm_state {};
if (res == SEL4_VMENTER_RESULT_FAULT) {
state.ip.value(seL4_GetMR(SEL4_VMENTER_CALL_EIP_MR));
state.ctrl_primary.value(seL4_GetMR(SEL4_VMENTER_CALL_CONTROL_PPC_MR));
state.exit_reason = seL4_GetMR(SEL4_VMENTER_FAULT_REASON_MR);
state.ip_len.value(seL4_GetMR(SEL4_VMENTER_FAULT_INSTRUCTION_LEN_MR));
state.qual_primary.value(seL4_GetMR(SEL4_VMENTER_FAULT_QUALIFICATION_MR));
state.qual_secondary.value(seL4_GetMR(SEL4_VMENTER_FAULT_GUEST_PHYSICAL_MR));
state.flags.value(seL4_GetMR(SEL4_VMENTER_FAULT_RFLAGS_MR));
state.intr_state.value(seL4_GetMR(SEL4_VMENTER_FAULT_GUEST_INT_MR));
state.cr3.value(seL4_GetMR(SEL4_VMENTER_FAULT_CR3_MR));
state.ax.value(seL4_GetMR(SEL4_VMENTER_FAULT_EAX));
state.bx.value(seL4_GetMR(SEL4_VMENTER_FAULT_EBX));
state.cx.value(seL4_GetMR(SEL4_VMENTER_FAULT_ECX));
state.dx.value(seL4_GetMR(SEL4_VMENTER_FAULT_EDX));
state.si.value(seL4_GetMR(SEL4_VMENTER_FAULT_ESI));
state.di.value(seL4_GetMR(SEL4_VMENTER_FAULT_EDI));
state.bp.value(seL4_GetMR(SEL4_VMENTER_FAULT_EBP));
_read_sel4_state(service, state);
} else {
state.ip.value(seL4_GetMR(SEL4_VMENTER_CALL_EIP_MR));
state.ctrl_primary.value(seL4_GetMR(SEL4_VMENTER_CALL_CONTROL_PPC_MR));
/* what about the other GPR stuff ? XXX */
state.exit_reason = VMEXIT_RECALL;
_read_sel4_state_async(service, state);
Lock::Guard guard(_remote_lock);
if (_remote == PAUSE) {
_remote = NONE;
_wake_up.down();
}
}
/* notify VM handler */
Genode::Signal_transmitter(_signal).submit();
/*
* Wait until VM handler is really really done,
* otherwise we lose state.
*/
_handler_ready.down();
}
}
enum Vmcs {
IRQ_WINDOW = 1U << 2,
CR0 = 0x6800,
CR0_MASK = 0x6000,
CR0_SHADOW = 0x6004,
CR3 = 0x6802,
CR4 = 0x6804,
CR4_MASK = 0x6002,
CR4_SHADOW = 0x6006,
DR7 = 0x681a,
RFLAGS = 0x6820,
RSP = 0x681c,
RIP = 0x681e,
INST_LEN = 0x440c,
EFER = 0x2806,
CTRL_0 = 0x4002,
CTRL_1 = 0x401e,
CS_SEL = 0x0802,
CS_LIMIT = 0x4802,
CS_AR = 0x4816,
CS_BASE = 0x6808,
SS_SEL = 0x0804,
SS_LIMIT = 0x4804,
SS_AR = 0x4818,
SS_BASE = 0x680a,
ES_SEL = 0x0800,
ES_LIMIT = 0x4800,
ES_AR = 0x4814,
ES_BASE = 0x6806,
DS_SEL = 0x0806,
DS_LIMIT = 0x4806,
DS_AR = 0x481a,
DS_BASE = 0x680c,
FS_SEL = 0x0808,
FS_LIMIT = 0x4808,
FS_AR = 0x481c,
FS_BASE = 0x680e,
GS_SEL = 0x080a,
GS_LIMIT = 0x480a,
GS_AR = 0x481e,
GS_BASE = 0x6810,
LDTR_SEL = 0x080c,
LDTR_LIMIT = 0x480c,
LDTR_AR = 0x4820,
LDTR_BASE = 0x6812,
TR_SEL = 0x080e,
TR_LIMIT = 0x480e,
TR_AR = 0x4822,
TR_BASE = 0x6814,
IDTR_LIMIT = 0x4812,
IDTR_BASE = 0x6818,
GDTR_LIMIT = 0x4810,
GDTR_BASE = 0x6816,
PDPTE_0 = 0x280a,
PDPTE_1 = 0x280c,
PDPTE_2 = 0x280e,
PDPTE_3 = 0x2810,
SYSENTER_CS = 0x482a,
SYSENTER_SP = 0x6824,
SYSENTER_IP = 0x6826,
STATE_INTR = 0x4824,
STATE_ACTV = 0x4826,
INTR_INFO = 0x4016,
INTR_ERROR = 0x4018,
IDT_INFO = 0x4408,
IDT_ERROR = 0x440a,
TSC_OFF_LO = 0x2010,
TSC_OFF_HI = 0x2011,
};
void _write_vmcs(seL4_X86_VCPU const service, enum Vmcs const field,
seL4_Word const value)
{
seL4_X86_VCPU_WriteVMCS_t res;
res = seL4_X86_VCPU_WriteVMCS(service, field, value);
if (res.error != seL4_NoError)
Genode::error("field ", Hex(field), " - ", res.error,
" ", res.written);
}
void _write_gpr(seL4_X86_VCPU const service, Vm_state &state)
{
seL4_VCPUContext regs;
regs.eax = state.ax.value();
regs.ebx = state.bx.value();
regs.ecx = state.cx.value();
regs.edx = state.dx.value();
regs.esi = state.si.value();
regs.edi = state.di.value();
regs.ebp = state.bp.value();
seL4_Error res = seL4_X86_VCPU_WriteRegisters(service, &regs);
if (res != seL4_NoError)
Genode::error("setting general purpose register failed ",
(int)res);
}
/*
* Convert to Intel format comprising 32 bits.
*/
addr_t _convert_ar(addr_t value) {
return ((value << 4) & 0x1f000) | (value & 0xff); }
/*
* Convert to AMD (and Genode) format comprising 16 bits.
*/
uint16_t _convert_ar_16(addr_t value) {
return ((value & 0x1f000) >> 4) | (value & 0xff); }
void _write_sel4_state(seL4_X86_VCPU const service, Vm_state &state)
{
if (state.ax.valid() || state.cx.valid() ||
state.dx.valid() || state.bx.valid() ||
state.bp.valid() || state.di.valid() ||
state.si.valid()) {
/* XXX read first all values and write back only the changed ones ... */
_write_gpr(service, state);
}
if (state.r8.valid() || state.r9.valid() ||
state.r10.valid() || state.r11.valid() ||
state.r12.valid() || state.r13.valid() ||
state.r14.valid() || state.r15.valid())
{
if (_show_error_unsupported_r)
{
_show_error_unsupported_r = false;
Genode::error("registers r8-15 not supported by seL4");
}
}
if (state.tsc.valid() || state.tsc_offset.valid())
{
_tsc_offset += state.tsc_offset.value();
/* not supported by seL4 */
#if 0
_write_vmcs(service, Vmcs::TSC_OFF_LO, _tsc_offset & 0xffffffffu);
_write_vmcs(service, Vmcs::TSC_OFF_HI, (_tsc_offset >> 32) & 0xffffffffu);
#endif
}
if (state.star.valid() || state.lstar.valid() ||
state.fmask.valid() || state.kernel_gs_base.valid())
{
if (_show_error_unsupported_star) {
_show_error_unsupported_star = false;
Genode::error("star, lstar, fmask, gs_base not supported by seL4");
}
}
if (state.tpr.valid() || state.tpr_threshold.valid())
{
if (_show_error_unsupported_tpr)
{
_show_error_unsupported_tpr = false;
Genode::error("tpr* not supported by seL4");
}
}
if (state.dr7.valid())
_write_vmcs(service, Vmcs::DR7, state.dr7.value());
if (state.cr0.valid()) {
_write_vmcs(service, Vmcs::CR0, cr0_set | (~cr0_mask & state.cr0.value()));
_write_vmcs(service, Vmcs::CR0_SHADOW, state.cr0.value());
}
/* not supported on seL4 - state.cr2.valid() */
if (state.cr3.valid())
_write_vmcs(service, Vmcs::CR3, state.cr3.value());
if (state.cr4.valid()) {
_write_vmcs(service, Vmcs::CR4, cr4_set | (~cr4_mask & state.cr4.value()));
_write_vmcs(service, Vmcs::CR4_SHADOW, state.cr4.value());
}
if (state.inj_info.valid()) {
addr_t ctrl_0 = _read_vmcs(service, Vmcs::CTRL_0);
if (state.inj_info.value() & 0x2000)
Genode::warning("inj_info for NMI not supported");
if (state.inj_info.value() & 0x1000)
ctrl_0 |= Vmcs::IRQ_WINDOW;
else
ctrl_0 &= ~Vmcs::IRQ_WINDOW;
state.ctrl_primary.value(ctrl_0);
}
if (state.inj_error.valid()) {
/* not supported by seL4 */
//_write_vmcs(service, Vmcs::INTR_ERROR, state.inj_error.value());
}
if (state.flags.valid())
_write_vmcs(service, Vmcs::RFLAGS, state.flags.value());
if (state.sp.valid())
_write_vmcs(service, Vmcs::RSP, state.sp.value());
if (state.ip.valid())
_write_vmcs(service, Vmcs::RIP, state.ip.value());
if (state.efer.valid())
_write_vmcs(service, Vmcs::EFER, state.efer.value());
/* state.ctrl_primary.valid() update on vmenter - see above */
if (state.ctrl_secondary.valid())
_write_vmcs(service, Vmcs::CTRL_1, state.ctrl_secondary.value());
if (state.intr_state.valid())
_write_vmcs(service, Vmcs::STATE_INTR, state.intr_state.value());
if (state.actv_state.valid())
_write_vmcs(service, Vmcs::STATE_ACTV, state.actv_state.value());
if (state.cs.valid()) {
_write_vmcs(service, Vmcs::CS_SEL, state.cs.value().sel);
_write_vmcs(service, Vmcs::CS_LIMIT, state.cs.value().limit);
_write_vmcs(service, Vmcs::CS_AR, _convert_ar(state.cs.value().ar));
_write_vmcs(service, Vmcs::CS_BASE, state.cs.value().base);
}
if (state.ss.valid()) {
_write_vmcs(service, Vmcs::SS_SEL, state.ss.value().sel);
_write_vmcs(service, Vmcs::SS_LIMIT, state.ss.value().limit);
_write_vmcs(service, Vmcs::SS_AR, _convert_ar(state.ss.value().ar));
_write_vmcs(service, Vmcs::SS_BASE, state.ss.value().base);
}
if (state.es.valid()) {
_write_vmcs(service, Vmcs::ES_SEL, state.es.value().sel);
_write_vmcs(service, Vmcs::ES_LIMIT, state.es.value().limit);
_write_vmcs(service, Vmcs::ES_AR, _convert_ar(state.es.value().ar));
_write_vmcs(service, Vmcs::ES_BASE, state.es.value().base);
}
if (state.ds.valid()) {
_write_vmcs(service, Vmcs::DS_SEL, state.ds.value().sel);
_write_vmcs(service, Vmcs::DS_LIMIT, state.ds.value().limit);
_write_vmcs(service, Vmcs::DS_AR, _convert_ar(state.ds.value().ar));
_write_vmcs(service, Vmcs::DS_BASE, state.ds.value().base);
}
if (state.fs.valid()) {
_write_vmcs(service, Vmcs::FS_SEL, state.fs.value().sel);
_write_vmcs(service, Vmcs::FS_LIMIT, state.fs.value().limit);
_write_vmcs(service, Vmcs::FS_AR, _convert_ar(state.fs.value().ar));
_write_vmcs(service, Vmcs::FS_BASE, state.fs.value().base);
}
if (state.gs.valid()) {
_write_vmcs(service, Vmcs::GS_SEL, state.gs.value().sel);
_write_vmcs(service, Vmcs::GS_LIMIT, state.gs.value().limit);
_write_vmcs(service, Vmcs::GS_AR, _convert_ar(state.gs.value().ar));
_write_vmcs(service, Vmcs::GS_BASE, state.gs.value().base);
}
if (state.tr.valid()) {
_write_vmcs(service, Vmcs::TR_SEL, state.tr.value().sel);
_write_vmcs(service, Vmcs::TR_LIMIT, state.tr.value().limit);
_write_vmcs(service, Vmcs::TR_AR, _convert_ar(state.tr.value().ar));
_write_vmcs(service, Vmcs::TR_BASE, state.tr.value().base);
}
if (state.ldtr.valid()) {
_write_vmcs(service, Vmcs::LDTR_SEL, state.ldtr.value().sel);
_write_vmcs(service, Vmcs::LDTR_LIMIT, state.ldtr.value().limit);
_write_vmcs(service, Vmcs::LDTR_AR, _convert_ar(state.ldtr.value().ar));
_write_vmcs(service, Vmcs::LDTR_BASE, state.ldtr.value().base);
}
if (state.idtr.valid()) {
_write_vmcs(service, Vmcs::IDTR_BASE, state.idtr.value().base);
_write_vmcs(service, Vmcs::IDTR_LIMIT, state.idtr.value().limit);
}
if (state.gdtr.valid()) {
_write_vmcs(service, Vmcs::GDTR_BASE, state.gdtr.value().base);
_write_vmcs(service, Vmcs::GDTR_LIMIT, state.gdtr.value().limit);
}
if (state.pdpte_0.valid())
_write_vmcs(service, Vmcs::PDPTE_0, state.pdpte_0.value());
if (state.pdpte_1.valid())
_write_vmcs(service, Vmcs::PDPTE_1, state.pdpte_1.value());
if (state.pdpte_2.valid())
_write_vmcs(service, Vmcs::PDPTE_2, state.pdpte_2.value());
if (state.pdpte_3.valid())
_write_vmcs(service, Vmcs::PDPTE_3, state.pdpte_3.value());
if (state.sysenter_cs.valid())
_write_vmcs(service, Vmcs::SYSENTER_CS, state.sysenter_cs.value());
if (state.sysenter_sp.valid())
_write_vmcs(service, Vmcs::SYSENTER_SP, state.sysenter_sp.value());
if (state.sysenter_ip.valid())
_write_vmcs(service, Vmcs::SYSENTER_IP, state.sysenter_ip.value());
}
seL4_Word _read_vmcs(seL4_X86_VCPU const service, enum Vmcs const field)
{
seL4_X86_VCPU_ReadVMCS_t res;
res = seL4_X86_VCPU_ReadVMCS(service, field);
if (res.error != seL4_NoError)
Genode::error("field ", Hex(field), " - ", res.error);
return res.value;
}
template <typename T>
T _read_vmcsX(seL4_X86_VCPU const service, enum Vmcs const field)
{
seL4_X86_VCPU_ReadVMCS_t res;
res = seL4_X86_VCPU_ReadVMCS(service, field);
if (res.error != seL4_NoError)
Genode::error("field ", Hex(field), " - ", res.error);
return (T)(res.value);
}
uint16_t _read_vmcs_16(seL4_X86_VCPU const service, enum Vmcs const field) {
return _read_vmcsX<uint16_t>(service, field); }
uint16_t _read_vmcs_32(seL4_X86_VCPU const service, enum Vmcs const field) {
return _read_vmcsX<uint32_t>(service, field); }
void _read_sel4_state_async(seL4_X86_VCPU const service, Vm_state &state)
{
state.ax.value(state.ax.value()); /* XXX ? */
state.cx.value(state.cx.value());
state.dx.value(state.dx.value());
state.bx.value(state.bx.value());
state.di.value(state.di.value()); /* XXX ? */
state.si.value(state.si.value());
state.bp.value(state.bp.value());
state.flags.value(_read_vmcs(service, Vmcs::RFLAGS));
state.ip.value(_read_vmcs(service, Vmcs::RIP));
state.ip_len.value(_read_vmcs(service, Vmcs::INST_LEN));
state.cr3.value(_read_vmcs(service, Vmcs::CR3));
state.qual_primary.value(state.qual_primary.value()); /* XXX ? */
state.qual_secondary.value(state.qual_secondary.value()); /* XXX ? */
state.ctrl_primary.value(_read_vmcs(service, Vmcs::CTRL_0));
_read_sel4_state(service, state);
}
void _read_sel4_state(seL4_X86_VCPU const service, Vm_state &state)
{
state.sp.value(_read_vmcs(service, Vmcs::RSP));
state.dr7.value(_read_vmcs(service, Vmcs::DR7));
/* r8 - r15 not supported on seL4 */
{
addr_t const cr0 = _read_vmcs(service, Vmcs::CR0);
addr_t const cr0_shadow = _read_vmcs(service, Vmcs::CR0_SHADOW);
state.cr0.value((cr0 & ~cr0_mask) | (cr0_shadow & cr0_mask));
if (state.cr0.value() != cr0_shadow) {
Genode::error("reset cr0_shadow to cr0 ", Genode::Hex(cr0), " ", Genode::Hex(cr0_shadow), "->", Genode::Hex(state.cr0.value()));
_write_vmcs(service, Vmcs::CR0_SHADOW, state.cr0.value());
}
}
/* cr2 not supported on seL4 */
state.cr2.value(state.cr2.value());
{
addr_t const cr4 = _read_vmcs(service, Vmcs::CR4);
addr_t const cr4_shadow = _read_vmcs(service, Vmcs::CR4_SHADOW);
state.cr4.value((cr4 & ~cr4_mask) | (cr4_shadow & cr4_mask));
if (state.cr4.value() != cr4_shadow) {
Genode::error("reset cr4_shadow to cr4 ", Genode::Hex(cr4), " ", Genode::Hex(cr4_shadow), "->", Genode::Hex(state.cr4.value()));
_write_vmcs(service, Vmcs::CR4_SHADOW, state.cr4.value());
}
}
typedef Genode::Vm_state::Segment Segment;
typedef Genode::Vm_state::Range Range;
state.cs.value(Segment{_read_vmcs_16(service, Vmcs::CS_SEL),
_convert_ar_16(_read_vmcs(service, Vmcs::CS_AR)),
_read_vmcs_32(service, Vmcs::CS_LIMIT),
_read_vmcs(service, Vmcs::CS_BASE)});
state.ss.value(Segment{_read_vmcs_16(service, Vmcs::SS_SEL),
_convert_ar_16(_read_vmcs(service, Vmcs::SS_AR)),
_read_vmcs_32(service, Vmcs::SS_LIMIT),
_read_vmcs(service, Vmcs::SS_BASE)});
state.es.value(Segment{_read_vmcs_16(service, Vmcs::ES_SEL),
_convert_ar_16(_read_vmcs(service, Vmcs::ES_AR)),
_read_vmcs_32(service, Vmcs::ES_LIMIT),
_read_vmcs(service, Vmcs::ES_BASE)});
state.ds.value(Segment{_read_vmcs_16(service, Vmcs::DS_SEL),
_convert_ar_16(_read_vmcs(service, Vmcs::DS_AR)),
_read_vmcs_32(service, Vmcs::DS_LIMIT),
_read_vmcs(service, Vmcs::DS_BASE)});
state.fs.value(Segment{_read_vmcs_16(service, Vmcs::FS_SEL),
_convert_ar_16(_read_vmcs(service, Vmcs::FS_AR)),
_read_vmcs_32(service, Vmcs::FS_LIMIT),
_read_vmcs(service, Vmcs::FS_BASE)});
state.gs.value(Segment{_read_vmcs_16(service, Vmcs::GS_SEL),
_convert_ar_16(_read_vmcs(service, Vmcs::GS_AR)),
_read_vmcs_32(service, Vmcs::GS_LIMIT),
_read_vmcs(service, Vmcs::GS_BASE)});
state.tr.value(Segment{_read_vmcs_16(service, Vmcs::TR_SEL),
_convert_ar_16(_read_vmcs(service, Vmcs::TR_AR)),
_read_vmcs_32(service, Vmcs::TR_LIMIT),
_read_vmcs(service, Vmcs::TR_BASE)});
state.ldtr.value(Segment{_read_vmcs_16(service, Vmcs::LDTR_SEL),
_convert_ar_16(_read_vmcs(service, Vmcs::LDTR_AR)),
_read_vmcs_32(service, Vmcs::LDTR_LIMIT),
_read_vmcs(service, Vmcs::LDTR_BASE)});
state.idtr.value(Range{_read_vmcs(service, Vmcs::IDTR_BASE),
_read_vmcs_32(service, Vmcs::IDTR_LIMIT)});
state.gdtr.value(Range{_read_vmcs(service, Vmcs::GDTR_BASE),
_read_vmcs_32(service, Vmcs::GDTR_LIMIT)});
state.sysenter_cs.value(_read_vmcs(service, Vmcs::SYSENTER_CS));
state.sysenter_sp.value(_read_vmcs(service, Vmcs::SYSENTER_SP));
state.sysenter_ip.value(_read_vmcs(service, Vmcs::SYSENTER_IP));
/* no support by seL4 to read this value */
state.ctrl_secondary.value(state.ctrl_secondary.value());
//state.ctrl_secondary.value(_read_vmcs(service, Vmcs::CTRL_1));
if (state.exit_reason == VMEXIT_INVALID ||
state.exit_reason == VMEXIT_RECALL)
{
state.inj_info.value(_read_vmcs(service, Vmcs::INTR_INFO));
/* no support by seL4 to read this value */
state.inj_error.value(0);
//state.inj_error.value(_read_vmcs(service, Vmcs::INTR_ERROR));
} else {
state.inj_info.value(_read_vmcs(service, Vmcs::IDT_INFO));
state.inj_error.value(_read_vmcs(service, Vmcs::IDT_ERROR));
}
state.intr_state.value(_read_vmcs(service, Vmcs::STATE_INTR));
state.actv_state.value(_read_vmcs(service, Vmcs::STATE_ACTV));
state.pdpte_0.value(_read_vmcs(service, Vmcs::PDPTE_0));
state.pdpte_1.value(_read_vmcs(service, Vmcs::PDPTE_1));
state.pdpte_2.value(_read_vmcs(service, Vmcs::PDPTE_2));
state.pdpte_3.value(_read_vmcs(service, Vmcs::PDPTE_3));
/* tsc and tsc_offset not supported by seL4 */
state.tsc.value(Trace::timestamp());
state.tsc_offset.value(_tsc_offset);
state.efer.value(_read_vmcs(service, Vmcs::EFER));
/* XXX star, lstar, fmask, kernel_gs_base not supported by seL4 */
/* XXX tpr and tpr_threshold not supported by seL4 */
}
public:
Vcpu(Genode::Env &env, Genode::Signal_context_capability &cap,
Semaphore &handler_ready, unsigned id)
:
Thread(env, "vcpu_thread", STACK_SIZE), _signal(cap),
_handler_ready(handler_ready), _id({id})
{ }
void start() override {
Thread::start();
_startup.lock();
}
Genode::Vm_session_client::Vcpu_id id() const { return _id; }
void assign_ds_state(Region_map &rm, Dataspace_capability cap) {
_state = rm.attach(cap); }
void initial_resume()
{
_wake_up.up();
}
void resume()
{
Lock::Guard guard(_remote_lock);
if (_remote == RUN)
return;
_remote = RUN;
_wake_up.up();
}
void pause()
{
Lock::Guard guard(_remote_lock);
if (_remote == PAUSE)
return;
_remote = PAUSE;
seL4_Signal(_recall);
_wake_up.up();
}
};
Genode::Vm_session_client::Vcpu_id
Genode::Vm_session_client::create_vcpu(Allocator &alloc, Env &env,
Vm_handler_base &handler)
{
/* create thread that switches modes between thread/cpu */
Vcpu * vcpu = new (alloc) Genode::Registered<Vcpu> (vcpus, env,
handler._cap,
handler._done,
vcpu_id);
try {
/* now it gets actually valid - vcpu->cap() becomes valid */
vcpu->start();
/* instruct core to let it become a vCPU */
call<Rpc_exception_handler>(handler._cap, vcpu->id());
call<Rpc_create_vcpu>(vcpu->cap());
vcpu->assign_ds_state(env.rm(), call<Rpc_cpu_state>(vcpu->id()));
} catch (...) {
destroy(alloc, vcpu);
throw;
}
vcpu->initial_resume();
vcpu_id++;
return vcpu->id();
}
void Genode::Vm_session_client::run(Genode::Vm_session_client::Vcpu_id id)
{
vcpus.for_each([&] (Vcpu &vcpu) {
if (vcpu.id().id == id.id)
vcpu.resume();
});
}
void Vm_session_client::pause(Vm_session_client::Vcpu_id vcpu_id)
{
vcpus.for_each([&] (Vcpu &vcpu) {
if (vcpu.id().id != vcpu_id.id)
return;
vcpu.pause();
});
}
Genode::Dataspace_capability Genode::Vm_session_client::cpu_state(Vcpu_id vcpu_id)
{
Dataspace_capability cap;
cap = call<Rpc_cpu_state>(vcpu_id);
return cap;
}

View File

@ -18,7 +18,7 @@ if { [get_cmd_switch --autopilot] } {
exit 0 exit 0
} }
if {[have_spec nova] || [have_spec foc]} { if {[have_spec nova] || [have_spec foc] || [have_spec sel4]} {
} else { } else {
puts "\n Run script is not supported on this platform. \n"; puts "\n Run script is not supported on this platform. \n";
exit 0 exit 0