nova: transfer guest fpu state via utcb

instead via the hardware registers of the FPU. On Genode all components and so VMMs are built such, that the compiler may generate optimized code by using the FPU at any time. We had to make sure to save the FPU state as early as possible before the VMM component touches the FPU, to avoid corrupting & losing guest FPU state. This caused headache again and again. To avoid the uncertainty, we remove this feature and explicitly transfer the FPU state via the UTCB.
2025-06-23 17:17:38 +00:00 · 2023-09-21 14:36:20 +02:00
parent 37a6669e71
commit 5c27993884
7 changed files with 25 additions and 73 deletions
--- a/repos/base-nova/include/nova/syscall-generic.h
+++ b/repos/base-nova/include/nova/syscall-generic.h
@ -665,6 +665,7 @@ namespace Nova {
 #endif
 				} gdtr, idtr;
 				unsigned long long tsc_val, tsc_off, tsc_aux;
+				uint8_t fpu[512];
 			} __attribute__((packed));
 			mword_t mr[(4096 - 4 * sizeof(mword_t)) / sizeof(mword_t)];
 		};
--- a/repos/base-nova/ports/nova.hash
+++ b/repos/base-nova/ports/nova.hash
@ -1 +1 @@
-1a32b53643e8df78d1998c198587f5fa6180ab60
+a9f11fdc132f027ed1d58da86fe39679da98699c
--- a/repos/base-nova/ports/nova.port
+++ b/repos/base-nova/ports/nova.port
@ -4,7 +4,7 @@ DOWNLOADS := nova.git

 # r10 branch
 URL(nova) := https://github.com/alex-ab/NOVA.git
-REV(nova) := af3a087934e88f43ec2b8547d7195cf29b319cfa
+REV(nova) := 1dff7b1311016b111ce71365ffe7ba625fa4708c
 DIR(nova) := src/kernel/nova

 PATCHES   := $(sort $(wildcard $(REP_DIR)/patches/*.patch))
--- a/repos/base-nova/src/lib/base/vm.cc
+++ b/repos/base-nova/src/lib/base/vm.cc
@ -60,7 +60,6 @@ struct Nova_vcpu : Rpc_client<Vm_session::Native_vcpu>, Noncopyable
 		void                   *_ep_handler    { nullptr };
 		void                   *_dispatching   { nullptr };
 		bool                    _block         { true };
-		bool                    _use_guest_fpu { false };

 		Vcpu_state              _vcpu_state __attribute__((aligned(0x10))) { };

@ -70,8 +69,7 @@ struct Nova_vcpu : Rpc_client<Vm_session::Native_vcpu>, Noncopyable
 			RUN = 2
 		} _remote { NONE };

-		inline void _read_nova_state(Nova::Utcb &, unsigned exit_reason,
-		                             uint8_t const &);
+		inline void _read_nova_state(Nova::Utcb &, unsigned exit_reason);

 		inline void _write_nova_state(Nova::Utcb &);

@ -81,8 +79,7 @@ struct Nova_vcpu : Rpc_client<Vm_session::Native_vcpu>, Noncopyable
 		addr_t _ec_sel() const { return _sm_sel() + 1; }

 		/**
-		 * NOVA badge with 15-bit exit reason, 1-bit fpu usage and
-		 * 16-bit artificial vCPU I
+		 * NOVA badge with 16-bit exit reason and 16-bit artificial vCPU ID
 		 */
 		struct Badge
 		{
@ -91,19 +88,15 @@ struct Nova_vcpu : Rpc_client<Vm_session::Native_vcpu>, Noncopyable
 			Badge(unsigned long value)
 			: _value((uint32_t)value) { }

-			Badge(uint16_t vcpu_id, uint16_t exit_reason, bool fpu_usage)
-			: _value((uint32_t)(vcpu_id << 16) |
-			         (exit_reason & 0x7fffu)   |
-			         (fpu_usage ? 0x8000u : 0u)
-			        ) { }
+			Badge(uint16_t vcpu_id, uint16_t exit_reason)
+			: _value((uint32_t)(vcpu_id << 16) | (exit_reason & 0xffffu)) { }

-			uint16_t exit_reason() const { return (uint16_t)( _value        & 0x7fff); }
+			uint16_t exit_reason() const { return (uint16_t)( _value        & 0xffff); }
 			uint16_t vcpu_id()     const { return (uint16_t)((_value >> 16) & 0xffff); }
-			bool     fpu_usage()   const { return _value & 0x8000; }
 			uint32_t value()       const { return _value; }
 		};

-		bool _handle_exit(Nova::Utcb &, uint16_t exit_reason, uint8_t const &);
+		bool _handle_exit(Nova::Utcb &, uint16_t exit_reason);

 		__attribute__((regparm(1))) static void _exit_entry(addr_t badge);

@ -141,8 +134,6 @@ struct Nova_vcpu : Rpc_client<Vm_session::Native_vcpu>, Noncopyable
 			mtd |= Nova::Mtd::SYSCALL_SWAPGS;
 			mtd |= Nova::Mtd::TPR;
 			mtd |= Nova::Mtd::QUAL;
-
-			_use_guest_fpu = true;
 			mtd |= Nova::Mtd::FPU;

 			return Nova::Mtd(mtd);
@ -175,8 +166,7 @@ struct Nova_vcpu : Rpc_client<Vm_session::Native_vcpu>, Noncopyable
 };


-void Nova_vcpu::_read_nova_state(Nova::Utcb &utcb, unsigned exit_reason,
-                                 uint8_t const &fpu_at_exit)
+void Nova_vcpu::_read_nova_state(Nova::Utcb &utcb, unsigned exit_reason)
 {
 	typedef Genode::Vcpu_state::Segment Segment;
 	typedef Genode::Vcpu_state::Range Range;
@ -186,7 +176,7 @@ void Nova_vcpu::_read_nova_state(Nova::Utcb &utcb, unsigned exit_reason,

 	if (utcb.mtd & Nova::Mtd::FPU) {
 		state().fpu.charge([&] (Vcpu_state::Fpu::State &fpu) {
-			memcpy(&fpu, &fpu_at_exit, sizeof(fpu));
+			memcpy(&fpu, utcb.fpu, sizeof(fpu));
 		});
 	}

@ -553,8 +543,8 @@ void Nova_vcpu::_write_nova_state(Nova::Utcb &utcb)

 	if (state().fpu.charged()) {
 		utcb.mtd |= Nova::Mtd::FPU;
-		state().fpu.with_state([] (Vcpu_state::Fpu::State const &fpu) {
-			asm volatile ("fxrstor %0" : : "m" (fpu) : "memory");
+		state().fpu.with_state([&] (Vcpu_state::Fpu::State const &fpu) {
+			memcpy(utcb.fpu, &fpu, sizeof(fpu));
 		});
 	}
 }
@ -590,7 +580,7 @@ void Nova_vcpu::run()
 * Do not touch the UTCB before _read_nova_state() and after
 * _write_nova_state(), particularly not by logging diagnostics.
 */
-bool Nova_vcpu::_handle_exit(Nova::Utcb &utcb, uint16_t exit_reason, uint8_t const &fpu)
+bool Nova_vcpu::_handle_exit(Nova::Utcb &utcb, uint16_t exit_reason)
 {
 	/* reset blocking state */
 	bool const previous_blocked = _block;
@ -604,7 +594,7 @@ bool Nova_vcpu::_handle_exit(Nova::Utcb &utcb, uint16_t exit_reason, uint8_t con

 	/* transform state from NOVA to Genode */
 	if (exit_reason != VM_EXIT_RECALL || !previous_blocked)
-		_read_nova_state(utcb, exit_reason, fpu);
+		_read_nova_state(utcb, exit_reason);

 	if (exit_reason == VM_EXIT_RECALL) {
 		if (previous_blocked)
@ -662,10 +652,6 @@ bool Nova_vcpu::_handle_exit(Nova::Utcb &utcb, uint16_t exit_reason, uint8_t con

 void Nova_vcpu::_exit_entry(addr_t badge)
 {
-	uint8_t _fpu_at_exit[512] __attribute__((aligned(0x10)));
-	if (Badge(badge).fpu_usage())
-		asm volatile ("fxsave %0" : "=m" (*_fpu_at_exit) :: "memory");
-
 	Thread     &myself = *Thread::myself();
 	Nova::Utcb &utcb   = *reinterpret_cast<Nova::Utcb *>(myself.utcb());

@ -675,8 +661,7 @@ void Nova_vcpu::_exit_entry(addr_t badge)
 	try {
 		_vcpu_space().apply<Nova_vcpu>(vcpu_id, [&] (Nova_vcpu &vcpu)
 		{
-			bool const block = vcpu._handle_exit(utcb, exit_reason,
-			                                     *_fpu_at_exit);
+			bool const block = vcpu._handle_exit(utcb, exit_reason);

 			if (block) {
 				Nova::reply(myself.stack_top(), vcpu._sm_sel());
@ -743,7 +728,7 @@ Signal_context_capability Nova_vcpu::_create_exit_handler(Pd_session        &pd,
 	Native_capability vm_exit_cap =
 		native_pd.alloc_rpc_cap(thread_cap, (addr_t)Nova_vcpu::_exit_entry, mtd.value());

-	Badge const badge { vcpu_id, exit_reason, !!(mtd.value() & Nova::Mtd::FPU) };
+	Badge const badge { vcpu_id, exit_reason };
 	native_pd.imprint_rpc_cap(vm_exit_cap, badge.value());

 	return reinterpret_cap_cast<Signal_context>(vm_exit_cap);
--- a/repos/ports/src/virtualbox5/spec/nova/vcpu.h
+++ b/repos/ports/src/virtualbox5/spec/nova/vcpu.h
@ -85,7 +85,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,

 	private:

-		X86FXSTATE _guest_fpu_state __attribute__((aligned(0x10)));
 		X86FXSTATE _emt_fpu_state __attribute__((aligned(0x10)));

 		pthread         _pthread;
@ -152,16 +151,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,

 	protected:

-		void _fpu_save()
-		{
-			fpu_save(reinterpret_cast<char *>(&_guest_fpu_state));
-		}
-
-		void _fpu_load()
-		{
-			fpu_load(reinterpret_cast<char *>(&_guest_fpu_state));
-		}
-
 		__attribute__((noreturn)) void _longjmp()
 		{
 			longjmp(_env, 1);
@ -173,7 +162,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 		 */
 		__attribute__((noreturn)) void _fpu_save_and_longjmp()
 		{
-			_fpu_save();
 			_longjmp();
 		}

@ -211,7 +199,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 			if (!setjmp(_env)) {
 				_stack_reply = reinterpret_cast<void *>(
 					Abi::stack_align(reinterpret_cast<Genode::addr_t>(&value)));
-				_fpu_load();
 				Nova::reply(_stack_reply);
 			}
 		}
@ -253,7 +240,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,

 				/* got recall during irq injection and the guest is ready for
 				 * delivery of IRQ - just continue */
-				_fpu_load();
 				Nova::reply(_stack_reply);
 			}

@ -264,10 +250,8 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 			}

 			/* check whether we have to request irq injection window */
-			utcb->mtd = Nova::Mtd::FPU;
 			if (check_to_request_irq_window(utcb, _current_vcpu)) {
 				_irq_win = true;
-				_fpu_load();
 				Nova::reply(_stack_reply);
 			}

@ -281,10 +265,8 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 			if (interrupt_pending) {
 				PDMApicSetTPR(_current_vcpu, utcb_tpr);

-				utcb->mtd = Nova::Mtd::FPU;
 				_irq_win = check_to_request_irq_window(utcb, _current_vcpu);
 				if (_irq_win) {
-					_fpu_load();
 					Nova::reply(_stack_reply);
 				}
 			}
@ -298,7 +280,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 			 * by a recall request, but we haven't verified this for each flag
 			 * yet.
 			 */
-			utcb->mtd = Nova::Mtd::FPU;
 			continue_hw_accelerated(utcb, true);

 			if (_irq_win) {
@ -308,7 +289,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 				utcb->mtd      |= Nova::Mtd::INJ;
 			}

-			_fpu_load();
 			Nova::reply(_stack_reply);
 		}

@ -328,8 +308,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 				Nova::reply(_stack_reply);
 			}

-			_fpu_save();
-
 			enum { MAP_SIZE = 0x1000UL };

 			bool writeable = true;
@ -352,7 +330,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,

 			/* fault region can be mapped - prepare utcb */
 			utcb->set_msg_word(0);
-			utcb->mtd = Mtd::FPU;

 			if (utcb->inj_info & IRQ_INJ_VALID_MASK) {
 				/*
@ -396,8 +373,6 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 					         "guestf fault at ", Genode::Hex(guest_fault));
 			} while (res);

-			_fpu_load();
-
 			Nova::reply(_stack_reply);
 		}

@ -720,9 +695,8 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 					_irq_drop++;
 					/* happens if PDMApicSetTPR (see above) mask IRQ */
 					utcb->inj_info = IRQ_INJ_NONE;
-					utcb->mtd      = Nova::Mtd::INJ | Nova::Mtd::FPU;
+					utcb->mtd      = Nova::Mtd::INJ;

-					_fpu_load();
 					Nova::reply(_stack_reply);
 				}
 			}
@ -773,8 +747,7 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 			         Genode::Hex(utcb->actv_state), " mtd ",
 			         Genode::Hex(utcb->mtd));
 */
-			utcb->mtd = Nova::Mtd::INJ | Nova::Mtd::FPU;
-			_fpu_load();
+			utcb->mtd = Nova::Mtd::INJ;
 			Nova::reply(_stack_reply);
 		}

@ -992,8 +965,8 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,

 			/* save current FPU state */
 			fpu_save(reinterpret_cast<char *>(&_emt_fpu_state));
-			/* write FPU state from pCtx to FPU registers */
-			memcpy(&_guest_fpu_state, pCtx->pXStateR3, sizeof(_guest_fpu_state));
+			/* write FPU state from pCtx to utcb */
+			memcpy(utcb->fpu, pCtx->pXStateR3, sizeof(utcb->fpu));
 			/* tell kernel to transfer current fpu registers to vCPU */
 			utcb->mtd |= Mtd::FPU;

@ -1008,8 +981,9 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
 			_current_vm   = 0;
 			_current_vcpu = 0;

-			/* write FPU state of vCPU (in current FPU registers) to pCtx */
-			Genode::memcpy(pCtx->pXStateR3, &_guest_fpu_state, sizeof(X86FXSTATE));
+			/* write FPU state of vCPU in utcb to pCtx */
+			static_assert(sizeof(X86FXSTATE) == sizeof(utcb->fpu), "fpu size mismatch");
+			Genode::memcpy(pCtx->pXStateR3, utcb->fpu, sizeof(X86FXSTATE));

 			/* load saved FPU state of EMT thread */
 			fpu_load(reinterpret_cast<char *>(&_emt_fpu_state));
--- a/repos/ports/src/virtualbox5/spec/nova/vcpu_svm.h
+++ b/repos/ports/src/virtualbox5/spec/nova/vcpu_svm.h
@ -26,7 +26,6 @@ class Vcpu_handler_svm : public Vcpu_handler

 		__attribute__((noreturn)) void _svm_vintr()
 		{
-			_fpu_save();
 			_irq_window();
 		}

@ -103,7 +102,6 @@ class Vcpu_handler_svm : public Vcpu_handler

 		__attribute__((noreturn)) void _svm_recall()
 		{
-			_fpu_save();
 			Vcpu_handler::_recall_handler();
 		}

--- a/repos/ports/src/virtualbox5/spec/nova/vcpu_vmx.h
+++ b/repos/ports/src/virtualbox5/spec/nova/vcpu_vmx.h
@ -107,13 +107,11 @@ class Vcpu_handler_vmx : public Vcpu_handler

 		__attribute__((noreturn)) void _vmx_irqwin()
 		{
-			_fpu_save();
 			_irq_window();
 		}

 		__attribute__((noreturn)) void _vmx_recall()
 		{
-			_fpu_save();
 			Vcpu_handler::_recall_handler();
 		}

@ -145,8 +143,6 @@ class Vcpu_handler_vmx : public Vcpu_handler
 		 */
 		__attribute__((noreturn)) void _vmx_mov_crx()
 		{
-			_fpu_save();
-
 			Genode::Thread *myself = Genode::Thread::myself();
 			Nova::Utcb *utcb = reinterpret_cast<Nova::Utcb *>(myself->utcb());

@ -166,9 +162,7 @@ class Vcpu_handler_vmx : public Vcpu_handler
 			utcb->pdpte[2] = pdpte[2];
 			utcb->pdpte[3] = pdpte[3];

-			utcb->mtd = Nova::Mtd::PDPTE | Nova::Mtd::FPU;
-
-			_fpu_load();
+			utcb->mtd = Nova::Mtd::PDPTE;

 			Nova::reply(_stack_reply);
 		}