From f8c97e71891b1a398e693097bfd7df9d8773b81b Mon Sep 17 00:00:00 2001 From: Alexander Boettcher Date: Tue, 3 Jul 2012 10:47:03 +0200 Subject: [PATCH] NOVA: Deadlock fix in pager/rm_session interaction Following deadlock happens when a Rm_client/Pager_object handles a page-fault and concurrently the same object is dissolved (triggered by parent killing the client). The situation is as follows: Page fault handling : base-nova/src/base/pager/pager.cc : pf_handler() - lock pf_lock base/.../core/rm_session_component.cc: pager() - lock rm_session (in reverse_lookup()) Dissolve of Rm_client: base/src/core/rm_session_component.cc: dissolve() - lock rm_session base-nova/src/base/pager/pager.cc : dissolve() - lock pf_lock The pf_lock is not required here during normal page fault handling, since this pager object @NOVA is executed only by one and the same thread and all critical operations inside the rm_session_object itself are locked anyway. The only critical point is the destruction of the Pager_object which is already handled in the both dissolve functions of the rm-session_component (locking) and the pager_object (finalize in-flight page faults). --- base-nova/src/base/pager/pager.cc | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/base-nova/src/base/pager/pager.cc b/base-nova/src/base/pager/pager.cc index b633858c18..def0b0cda5 100644 --- a/base-nova/src/base/pager/pager.cc +++ b/base-nova/src/base/pager/pager.cc @@ -25,18 +25,12 @@ using namespace Nova; enum { PF_HANDLER_STACK_SIZE = 4096 }; - -static Lock *pf_lock() { static Lock inst; return &inst; } - - void Pager_object::_page_fault_handler() { Ipc_pager ipc_pager; ipc_pager.wait_for_fault(); /* serialize page-fault handling */ - pf_lock()->lock(); - Thread_base *myself = Thread_base::myself(); if (!myself) { PWRN("unexpected page-fault for non-existing pager object, going to sleep forever"); @@ -45,7 +39,6 @@ void Pager_object::_page_fault_handler() Pager_object *obj = static_cast(myself); int ret = obj->pager(ipc_pager); - pf_lock()->unlock(); if (ret) { PWRN("page-fault resolution for address 0x%lx, ip=0x%lx failed", @@ -159,14 +152,17 @@ Pager_object::~Pager_object() revoke(Obj_crd(_pt_sel, 0), true); /* Make sure nobody is in the handler anymore by doing an IPC to a - * local cap pointing to same serving thread. When the call returns + * local cap pointing to same serving thread (if not running in the + * context of the serving thread). When the call returns * we know that nobody is handled by this object anymore, because * all remotely available portals had been revoked beforehand. */ Utcb *utcb = (Utcb *)Thread_base::myself()->utcb(); - utcb->set_msg_word(0); - if (uint8_t res = call(_pt_cleanup)) - PERR("failure - cleanup call failed res=%d", res); + if (reinterpret_cast(&_context->utcb) != utcb) { + utcb->set_msg_word(0); + if (uint8_t res = call(_pt_cleanup)) + PERR("failure - cleanup call failed res=%d", res); + } /* Revoke portal used for the cleanup call */ revoke(Obj_crd(_pt_cleanup, 0), true); @@ -199,8 +195,6 @@ void Pager_entrypoint::dissolve(Pager_object *obj) revoke(Obj_crd(obj->Object_pool::Entry::cap().dst(), 0), true); } - pf_lock()->lock(); remove(obj); - pf_lock()->unlock(); }