libc: pthread TLS optimizations

Fixes #4024
This commit is contained in:
Christian Prochaska 2021-02-18 17:19:12 +01:00 committed by Norman Feske
parent 8d13121e84
commit 59459e60e7
11 changed files with 262 additions and 285 deletions

View File

@ -50,9 +50,8 @@ class Genode::Thread
typedef Cpu_session::Name Name;
typedef Cpu_session::Weight Weight;
struct Stack_info { addr_t base; addr_t top; };
struct Tls { struct Base; Base *ptr; };
struct Stack_info { addr_t base; addr_t top;
addr_t libc_tls_pointer_offset; };
private:
@ -152,17 +151,6 @@ class Genode::Thread
*/
static Trace::Logger *_logger();
/**
* Base pointer to thread-local storage
*
* The opaque pointer allows higher-level thread libraries (i.e.,
* pthread) to implement TLS. It should never be used outside such
* libraries.
*/
Tls _tls { };
friend class Tls::Base;
/**
* Hook for platform-specific constructor supplements
*

View File

@ -86,6 +86,11 @@ class Genode::Stack
*/
addr_t _stack[1];
/*
* TLS pointer for libc pthreads
*/
addr_t _libc_tls_pointer { };
/**
* Thread name, used for debugging
*/
@ -152,6 +157,15 @@ class Genode::Stack
*/
addr_t base() const { return _base; }
/**
* Return libc TLS pointer offset relative to end of stack
*/
addr_t libc_tls_pointer_offset()
{
return (addr_t)this + sizeof(Stack) -
(addr_t)&_libc_tls_pointer;
}
/**
* Ensure that the stack has a given minimum size
*

View File

@ -175,7 +175,8 @@ Thread::Stack_info Thread::mystack()
{
addr_t base = Stack_allocator::addr_to_base(&base);
Stack *stack = Stack_allocator::base_to_stack(base);
return { stack->base(), stack->top() };
return { stack->base(), stack->top(),
stack_virtual_size() - stack->libc_tls_pointer_offset() };
}

View File

@ -973,14 +973,10 @@ _ZN4Libc19Select_handler_baseC2Ev T
_ZN4Libc19Select_handler_baseD1Ev T
_ZN4Libc19Select_handler_baseD2Ev T
_ZN4Libc10resume_allEv T
_ZN4Libc7PthreadC2ERN6Genode6ThreadEPv T
_ZN4Libc7suspendERNS_15Suspend_functorEm T
_Z16pthread_registryv T
_ZN4Libc16Pthread_registry6insertERNS_7PthreadE T
_ZN4Libc16Pthread_registry6removeERNS_7PthreadE T
_ZN4Libc16Pthread_registry7cleanupEPNS_7PthreadE T
_ZN4Libc16Pthread_registry8containsERNS_7PthreadE T
_ZN4Libc14pthread_createEPP7pthreadPFPvS3_ES3_mPKcPN6Genode11Cpu_sessionENS8_8Affinity8LocationE T
_ZN4Libc14pthread_createEPP7pthreadRN6Genode6ThreadE T
_ZN4Libc14pthread_createEPP7pthreadRN6Genode6ThreadEPv T
#
# Libc plugin interface

View File

@ -34,42 +34,28 @@
namespace Libc {
struct Pthread;
struct Pthread_registry;
struct Pthread_blockade;
struct Pthread_cleanup;
struct Pthread_job;
struct Pthread_mutex;
}
/*
* Used by 'pthread_self()' to find out if the current thread is an alien
* thread.
*/
class Libc::Pthread_registry
class Libc::Pthread_cleanup
{
private:
enum { MAX_NUM_PTHREADS = 128 };
Pthread *_array[MAX_NUM_PTHREADS] = { 0 };
/* thread to be destroyed on next 'cleanup()' call */
Pthread *_cleanup_thread { nullptr };
public:
void insert(Pthread &thread);
void remove(Pthread &thread);
bool contains(Pthread &thread);
/* destroy '_cleanup_thread' and register another one if given */
void cleanup(Pthread *new_cleanup_thread = nullptr);
};
Libc::Pthread_registry &pthread_registry();
Libc::Pthread_cleanup &pthread_cleanup();
extern "C" {
@ -89,36 +75,7 @@ extern "C" {
}
struct Genode::Thread::Tls::Base
{
/**
* Register thread-local-storage object at Genode thread
*/
static void tls(Thread &thread, Tls::Base &tls)
{
thread._tls = Tls { &tls };
}
struct Undefined : Exception { };
/**
* Obtain thread-local-storage object for the calling thread
*
* \throw Undefined
*/
static Tls::Base &tls()
{
Thread &myself = *Thread::myself();
if (!myself._tls.ptr)
throw Undefined();
return *myself._tls.ptr;
}
};
struct Libc::Pthread : Noncopyable, Thread::Tls::Base
struct Libc::Pthread : Noncopyable
{
typedef void *(*start_routine_t) (void *);
@ -132,6 +89,8 @@ struct Libc::Pthread : Noncopyable, Thread::Tls::Base
void *&_stack_addr;
size_t &_stack_size;
Pthread *_pthread;
enum { WEIGHT = Cpu_session::Weight::DEFAULT_WEIGHT };
/* 'stack_addr_out' and 'stack_size_out' are written when the thread starts */
@ -139,11 +98,13 @@ struct Libc::Pthread : Noncopyable, Thread::Tls::Base
Cpu_session *cpu,
Affinity::Location location,
start_routine_t start_routine, void *arg,
void *&stack_addr_out, size_t &stack_size_out)
void *&stack_addr_out, size_t &stack_size_out,
Pthread *pthread)
:
Thread(WEIGHT, name, stack_size, Type::NORMAL, cpu, location),
_start_routine(start_routine), _arg(arg),
_stack_addr(stack_addr_out), _stack_size(stack_size_out)
_stack_addr(stack_addr_out), _stack_size(stack_size_out),
_pthread(pthread)
{ }
void entry() override;
@ -166,13 +127,6 @@ struct Libc::Pthread : Noncopyable, Thread::Tls::Base
*/
Thread &_thread;
void _associate_thread_with_pthread()
{
Thread::Tls::Base::tls(_thread, *this);
pthread_registry().cleanup();
pthread_registry().insert(*this);
}
bool _exiting = false;
/*
@ -212,6 +166,22 @@ struct Libc::Pthread : Noncopyable, Thread::Tls::Base
List<Cleanup_handler> _cleanup_handlers;
/* TLS support */
/* mask to obtain stack virtual base from address of stack variable */
static size_t _stack_virtual_base_mask;
/*
* Offset of TLS pointer relative to base address of a thread's
* virtual stack area.
*/
static size_t _tls_pointer_offset;
/* initialize TLS pointer on given stack */
static void _tls_pointer(void *stack_address, Pthread *pthread);
void const *_tls_data[PTHREAD_KEYS_MAX] { };
public:
int thread_local_errno = 0;
@ -225,31 +195,32 @@ struct Libc::Pthread : Noncopyable, Thread::Tls::Base
:
_thread(_construct_thread_object(name, stack_size, cpu, location,
start_routine, arg,
_stack_addr, _stack_size))
_stack_addr, _stack_size, this))
{
_associate_thread_with_pthread();
pthread_cleanup().cleanup();
}
/**
* Constructor to create pthread object out of existing thread,
* i.e., the main thread
* i.e., the main thread or a VirtualBox thread
*
* The 'stack_address' argument can be any address on the stack
* of 'existing_thread'. It is needed to locate the correct
* TLS pointer to initialize, because
*
* - the main thread uses a secondary stack, so
* 'existing_thread.stack_top()' would be the
* wrong stack for the main thread
*
* - VirtualBox EMT threads have this constructor called
* from a different thread than 'existing_thread', so
* the address of a local stack variable would belong to
* the wrong stack for those threads
*
*/
Pthread(Thread &existing_thread)
:
_thread(existing_thread)
{
/* obtain stack attributes of main thread */
Thread::Stack_info info = Thread::mystack();
_stack_addr = (void *)info.base;
_stack_size = info.top - info.base;
Pthread(Thread &existing_thread, void *stack_address);
_associate_thread_with_pthread();
}
~Pthread()
{
pthread_registry().remove(*this);
}
static void init_tls_support();
void start() { _thread.start(); }
@ -276,13 +247,15 @@ struct Libc::Pthread : Noncopyable, Thread::Tls::Base
_detach_blockade.block();
pthread_registry().cleanup(this);
pthread_cleanup().cleanup(this);
sleep_forever();
}
void *stack_addr() const { return _stack_addr; }
size_t stack_size() const { return _stack_size; }
static Pthread *myself();
/*
* Push a cleanup handler to the cancellation cleanup stack.
*/
@ -312,6 +285,16 @@ struct Libc::Pthread : Noncopyable, Thread::Tls::Base
return true;
}
void setspecific(pthread_key_t key, void const *value)
{
_tls_data[key] = value;
}
void const *getspecific(pthread_key_t key)
{
return _tls_data[key];
}
};

View File

@ -32,7 +32,7 @@ namespace Libc {
size_t stack_size, char const * name,
Cpu_session * cpu, Affinity::Location location);
int pthread_create(pthread_t *, Thread &);
int pthread_create(pthread_t *, Thread &, void *stack_address);
}
#endif /* _LIBC__INTERNAL__THREAD_CREATE_H_ */

View File

@ -49,6 +49,8 @@ void Libc::init_pthread_support(Monitor &monitor, Timer_accessor &timer_accessor
_main_thread_ptr = Thread::myself();
_monitor_ptr = &monitor;
_timer_accessor_ptr = &timer_accessor;
Pthread::init_tls_support();
}
@ -66,17 +68,73 @@ namespace { using Fn = Libc::Monitor::Function_result; }
** Pthread **
*************/
size_t Pthread::_stack_virtual_base_mask;
size_t Pthread::_tls_pointer_offset;
void Libc::Pthread::Thread_object::entry()
{
/* obtain stack attributes of new thread */
/*
* Obtain stack attributes of new thread for
* 'pthread_attr_get_np()'
*/
Thread::Stack_info info = Thread::mystack();
_stack_addr = (void *)info.base;
_stack_size = info.top - info.base;
_tls_pointer(&info, _pthread);
pthread_exit(_start_routine(_arg));
}
void Libc::Pthread::_tls_pointer(void *stack_address, Pthread *pthread)
{
addr_t stack_virtual_base = (addr_t)stack_address &
_stack_virtual_base_mask;
*(Pthread**)(stack_virtual_base + _tls_pointer_offset) = pthread;
}
Libc::Pthread::Pthread(Thread &existing_thread, void *stack_address)
:
_thread(existing_thread)
{
/*
* Obtain stack attributes for 'pthread_attr_get_np()'
*
* Note: the values might be incorrect for VirtualBox EMT threads,
* which have this constructor called from a different thread
* than 'existing_thread'.
*
*/
Thread::Stack_info info = Thread::mystack();
_stack_addr = (void *)info.base;
_stack_size = info.top - info.base;
_tls_pointer(stack_address, this);
pthread_cleanup().cleanup();
}
void Libc::Pthread::init_tls_support()
{
Thread::Stack_info info = Thread::mystack();
_tls_pointer_offset = info.libc_tls_pointer_offset;
_stack_virtual_base_mask = ~(Thread::stack_virtual_size() - 1);
}
Pthread *Libc::Pthread::myself()
{
int stack_variable;
addr_t stack_virtual_base = (addr_t)&stack_variable &
_stack_virtual_base_mask;
return *(Pthread**)(stack_virtual_base + _tls_pointer_offset);
}
void Libc::Pthread::join(void **retval)
{
monitor().monitor([&] {
@ -110,50 +168,10 @@ void Libc::Pthread::cancel()
/*
* Registry
* Cleanup
*/
void Libc::Pthread_registry::insert(Pthread &thread)
{
/* prevent multiple insertions at the same location */
static Mutex insert_mutex;
Mutex::Guard guard(insert_mutex);
for (unsigned int i = 0; i < MAX_NUM_PTHREADS; i++) {
if (_array[i] == 0) {
_array[i] = &thread;
return;
}
}
error("pthread registry overflow, pthread_self() might fail");
}
void Libc::Pthread_registry::remove(Pthread &thread)
{
for (unsigned int i = 0; i < MAX_NUM_PTHREADS; i++) {
if (_array[i] == &thread) {
_array[i] = 0;
return;
}
}
error("could not remove unknown pthread from registry");
}
bool Libc::Pthread_registry::contains(Pthread &thread)
{
for (unsigned int i = 0; i < MAX_NUM_PTHREADS; i++)
if (_array[i] == &thread)
return true;
return false;
}
void Libc::Pthread_registry::cleanup(Pthread *new_cleanup_thread)
void Libc::Pthread_cleanup::cleanup(Pthread *new_cleanup_thread)
{
static Mutex cleanup_mutex;
Mutex::Guard guard(cleanup_mutex);
@ -167,9 +185,9 @@ void Libc::Pthread_registry::cleanup(Pthread *new_cleanup_thread)
}
Libc::Pthread_registry &pthread_registry()
Libc::Pthread_cleanup &pthread_cleanup()
{
static Libc::Pthread_registry instance;
static Libc::Pthread_cleanup instance;
return instance;
}
@ -519,8 +537,100 @@ struct Libc::Pthread_mutex_recursive : pthread_mutex
extern "C" int sem_set_clock(sem_t *sem, clockid_t clock_id);
/* TLS */
class Key_allocator : public Genode::Bit_allocator<PTHREAD_KEYS_MAX>
{
private:
Mutex _mutex;
public:
addr_t alloc_key()
{
Mutex::Guard guard(_mutex);
return alloc();
}
void free_key(addr_t key)
{
Mutex::Guard guard(_mutex);
free(key);
}
};
static Key_allocator &key_allocator()
{
static Key_allocator inst;
return inst;
}
typedef void (*key_destructor_func)(void*);
static key_destructor_func key_destructors[PTHREAD_KEYS_MAX];
extern "C" {
int pthread_key_create(pthread_key_t *key, void (*destructor)(void*))
{
if (!key)
return EINVAL;
try {
*key = key_allocator().alloc_key();
key_destructors[*key] = destructor;
return 0;
} catch (Key_allocator::Out_of_indices) {
return EAGAIN;
}
}
typeof(pthread_key_create) _pthread_key_create
__attribute__((alias("pthread_key_create")));
int pthread_key_delete(pthread_key_t key)
{
if (key < 0 || key >= PTHREAD_KEYS_MAX)
return EINVAL;
key_destructors[key] = nullptr;
key_allocator().free_key(key);
return 0;
}
typeof(pthread_key_delete) _pthread_key_delete
__attribute__((alias("pthread_key_delete")));
int pthread_setspecific(pthread_key_t key, const void *value)
{
if (key < 0 || key >= PTHREAD_KEYS_MAX)
return EINVAL;
pthread_t pthread_myself = pthread_self();
pthread_myself->setspecific(key, value);
return 0;
}
typeof(pthread_setspecific) _pthread_setspecific
__attribute__((alias("pthread_setspecific")));
void *pthread_getspecific(pthread_key_t key)
{
if (key < 0 || key >= PTHREAD_KEYS_MAX)
return nullptr;
pthread_t pthread_myself = pthread_self();
return (void*)pthread_myself->getspecific(key);
}
typeof(pthread_getspecific) _pthread_getspecific
__attribute__((alias("pthread_getspecific")));
/* Thread */
int pthread_join(pthread_t thread, void **retval)
@ -577,6 +687,24 @@ extern "C" {
void pthread_exit(void *value_ptr)
{
/* call TLS key destructors */
bool at_least_one_destructor_called;
do {
at_least_one_destructor_called = false;
for (pthread_key_t key = 0; key < PTHREAD_KEYS_MAX; key++) {
if (key_destructors[key]) {
void *value = pthread_getspecific(key);
if (value) {
pthread_setspecific(key, nullptr);
key_destructors[key](value);
at_least_one_destructor_called = true;
}
}
}
} while (at_least_one_destructor_called);
pthread_self()->exit(value_ptr);
}
@ -593,14 +721,10 @@ extern "C" {
pthread_t pthread_self(void)
{
try {
pthread_t pthread_myself =
static_cast<pthread_t>(&Thread::Tls::Base::tls());
pthread_t pthread_myself = static_cast<pthread_t>(Pthread::myself());
if (pthread_registry().contains(*pthread_myself))
return pthread_myself;
}
catch (Thread::Tls::Base::Undefined) { }
if (pthread_myself)
return pthread_myself;
/*
* We pass here if the main thread or an alien thread calls
@ -620,7 +744,7 @@ extern "C" {
* destruction of the pthread object would also destruct the 'Thread'
* of the main thread.
*/
return unmanaged_singleton<pthread>(*Thread::myself());
return unmanaged_singleton<pthread>(*Thread::myself(), &pthread_myself);
}
typeof(pthread_self) _pthread_self
@ -1168,136 +1292,6 @@ extern "C" {
__attribute__((alias("pthread_cond_broadcast")));
/* TLS */
struct Key_element : List<Key_element>::Element
{
const void *thread_base;
const void *value;
Key_element(const void *thread_base, const void *value)
: thread_base(thread_base),
value(value) { }
};
static Mutex &key_list_mutex()
{
static Mutex inst { };
return inst;
}
struct Keys
{
List<Key_element> key[PTHREAD_KEYS_MAX];
};
static Keys &keys()
{
static Keys inst { };
return inst;
}
int pthread_key_create(pthread_key_t *key, void (*destructor)(void*))
{
if (!key)
return EINVAL;
Mutex::Guard guard(key_list_mutex());
for (int k = 0; k < PTHREAD_KEYS_MAX; k++) {
/*
* Find an empty key slot and insert an element for the current
* thread to mark the key slot as used.
*/
if (!keys().key[k].first()) {
Libc::Allocator alloc { };
Key_element *key_element = new (alloc) Key_element(Thread::myself(), 0);
keys().key[k].insert(key_element);
*key = k;
return 0;
}
}
return EAGAIN;
}
typeof(pthread_key_create) _pthread_key_create
__attribute__((alias("pthread_key_create")));
int pthread_key_delete(pthread_key_t key)
{
if (key < 0 || key >= PTHREAD_KEYS_MAX || !keys().key[key].first())
return EINVAL;
Mutex::Guard guard(key_list_mutex());
while (Key_element * element = keys().key[key].first()) {
keys().key[key].remove(element);
Libc::Allocator alloc { };
destroy(alloc, element);
}
return 0;
}
typeof(pthread_key_delete) _pthread_key_delete
__attribute__((alias("pthread_key_delete")));
int pthread_setspecific(pthread_key_t key, const void *value)
{
if (key < 0 || key >= PTHREAD_KEYS_MAX)
return EINVAL;
void *myself = Thread::myself();
Mutex::Guard guard(key_list_mutex());
for (Key_element *key_element = keys().key[key].first(); key_element;
key_element = key_element->next())
if (key_element->thread_base == myself) {
key_element->value = value;
return 0;
}
/* key element does not exist yet - create a new one */
Libc::Allocator alloc { };
Key_element *key_element = new (alloc) Key_element(Thread::myself(), value);
keys().key[key].insert(key_element);
return 0;
}
typeof(pthread_setspecific) _pthread_setspecific
__attribute__((alias("pthread_setspecific")));
void *pthread_getspecific(pthread_key_t key)
{
if (key < 0 || key >= PTHREAD_KEYS_MAX)
return nullptr;
void *myself = Thread::myself();
Mutex::Guard guard(key_list_mutex());
for (Key_element *key_element = keys().key[key].first(); key_element;
key_element = key_element->next())
if (key_element->thread_base == myself)
return (void*)(key_element->value);
return 0;
}
typeof(pthread_getspecific) _pthread_getspecific
__attribute__((alias("pthread_getspecific")));
int pthread_once(pthread_once_t *once, void (*init_once)(void))
{
if (!once || ((once->state != PTHREAD_NEEDS_INIT) &&

View File

@ -104,10 +104,11 @@ int Libc::pthread_create(pthread_t *thread,
}
int Libc::pthread_create(pthread_t *thread, Thread &t)
int Libc::pthread_create(pthread_t *thread, Thread &t, void *stack_address)
{
Libc::Allocator alloc { };
pthread_t thread_obj = new (alloc) pthread(t);
pthread_t thread_obj = new (alloc) pthread(t, stack_address);
if (!thread_obj)
return EAGAIN;

View File

@ -456,7 +456,7 @@ struct Usb_ep : Genode::Entrypoint
void _handle_pthread_registration()
{
Genode::Thread *myself = Genode::Thread::myself();
if (!myself || Libc::pthread_create(&_pthread, *myself)) {
if (!myself || Libc::pthread_create(&_pthread, *myself, &myself)) {
Genode::error("USB passthough will not work - thread for "
"pthread registration invalid");
}

View File

@ -186,7 +186,7 @@ class Nic_client
void _handle_pthread_registration()
{
Genode::Thread *myself = Genode::Thread::myself();
if (!myself || Libc::pthread_create(&_pthread, *myself)) {
if (!myself || Libc::pthread_create(&_pthread, *myself, &myself)) {
Genode::error("network will not work - thread for pthread "
"registration invalid");
return;

View File

@ -857,7 +857,7 @@ class Vcpu_handler : public Vmm::Vcpu_dispatcher<Genode::Thread>,
:
Vmm::Vcpu_dispatcher<Genode::Thread>(env, stack_size, cpu_connection,
location, name),
_pthread(*this),
_pthread(*this, stack_top()),
_start_routine(start_routine),
_start_routine_arg(arg),
_vcpu(cpu_connection, location, pd_vcpu),