From 75934c83423c482682618cb1f70c456caaa4699f Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 28 Nov 2009 15:01:54 -0700 Subject: [PATCH] provide fast paths for common thread state transitions These paths reduce contention among threads by using atomic operations and memory barriers instead of mutexes where possible. This is especially important for JNI calls, since each such call involves two state transitions: from "active" to "idle" and back. --- src/machine.cpp | 104 ++++++++++++++++++++++++++++++++++++------------ src/powerpc.h | 8 +++- src/x86.h | 30 ++++++++++---- 3 files changed, 108 insertions(+), 34 deletions(-) diff --git a/src/machine.cpp b/src/machine.cpp index 30a4e6d220..20c3b43d50 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -14,6 +14,7 @@ #include "stream.h" #include "constants.h" #include "processor.h" +#include "arch.h" using namespace vm; @@ -21,6 +22,14 @@ namespace { const unsigned NoByte = 0xFFFF; +#ifdef USE_ATOMIC_OPERATIONS +void +atomicIncrement(unsigned* p, int v) +{ + while (not atomicCompareAndSwap32(p, *p, *p + v)) { } +} +#endif + bool find(Thread* t, Thread* o) { @@ -2319,10 +2328,22 @@ enter(Thread* t, Thread::State s) return; } +#ifdef USE_ATOMIC_OPERATIONS +# define INCREMENT atomicIncrement +# define ACQUIRE_LOCK ACQUIRE_RAW(t, t->m->stateLock) +# define BARRIER memoryBarrier() +#else +# define INCREMENT(pointer, value) *(pointer) += value; +# define ACQUIRE_LOCK +# define BARRIER + ACQUIRE_RAW(t, t->m->stateLock); +#endif // not USE_ATOMIC_OPERATIONS switch (s) { case Thread::ExclusiveState: { + ACQUIRE_LOCK; + while (t->m->exclusive) { // another thread got here first. ENTER(t, Thread::IdleState); @@ -2332,7 +2353,7 @@ enter(Thread* t, Thread::State s) case Thread::ActiveState: break; case Thread::IdleState: { - ++ t->m->activeCount; + INCREMENT(&(t->m->activeCount), 1); } break; default: abort(t); @@ -2340,14 +2361,35 @@ enter(Thread* t, Thread::State s) t->state = Thread::ExclusiveState; t->m->exclusive = t; - + + BARRIER; + while (t->m->activeCount > 1) { t->m->stateLock->wait(t->systemThread, 0); } } break; case Thread::IdleState: + if (t->state == Thread::ActiveState) { + // fast path + assert(t, t->m->activeCount > 0); + INCREMENT(&(t->m->activeCount), -1); + + t->state = s; + + if (t->m->exclusive) { + ACQUIRE_LOCK; + + t->m->stateLock->notifyAll(t->systemThread); + } + break; + } else { + // fall through to slow path + } + case Thread::ZombieState: { + ACQUIRE_LOCK; + switch (t->state) { case Thread::ExclusiveState: { assert(t, t->m->exclusive == t); @@ -2360,7 +2402,7 @@ enter(Thread* t, Thread::State s) } assert(t, t->m->activeCount > 0); - -- t->m->activeCount; + INCREMENT(&(t->m->activeCount), -1); if (s == Thread::ZombieState) { assert(t, t->m->liveCount > 0); @@ -2375,35 +2417,45 @@ enter(Thread* t, Thread::State s) t->m->stateLock->notifyAll(t->systemThread); } break; - case Thread::ActiveState: { - switch (t->state) { - case Thread::ExclusiveState: { - assert(t, t->m->exclusive == t); - + case Thread::ActiveState: + if (t->state == Thread::IdleState and t->m->exclusive == 0) { + // fast path + INCREMENT(&(t->m->activeCount), 1); t->state = s; - t->m->exclusive = 0; + break; + } else { + ACQUIRE_LOCK; - t->m->stateLock->notifyAll(t->systemThread); - } break; + switch (t->state) { + case Thread::ExclusiveState: { + assert(t, t->m->exclusive == t); - case Thread::NoState: - case Thread::IdleState: { - while (t->m->exclusive) { - t->m->stateLock->wait(t->systemThread, 0); + t->state = s; + t->m->exclusive = 0; + + t->m->stateLock->notifyAll(t->systemThread); + } break; + + case Thread::NoState: + case Thread::IdleState: { + while (t->m->exclusive) { + t->m->stateLock->wait(t->systemThread, 0); + } + + INCREMENT(&(t->m->activeCount), 1); + if (t->state == Thread::NoState) { + ++ t->m->liveCount; + } + t->state = s; + } break; + + default: abort(t); } - - ++ t->m->activeCount; - if (t->state == Thread::NoState) { - ++ t->m->liveCount; - } - t->state = s; } break; - default: abort(t); - } - } break; - case Thread::ExitState: { + ACQUIRE_LOCK; + switch (t->state) { case Thread::ExclusiveState: { assert(t, t->m->exclusive == t); @@ -2418,7 +2470,7 @@ enter(Thread* t, Thread::State s) } assert(t, t->m->activeCount > 0); - -- t->m->activeCount; + INCREMENT(&(t->m->activeCount), -1); t->state = s; diff --git a/src/powerpc.h b/src/powerpc.h index 1b906537cc..1ef1437e0f 100644 --- a/src/powerpc.h +++ b/src/powerpc.h @@ -92,7 +92,7 @@ syncInstructionCache(const void* start, unsigned size) #ifdef USE_ATOMIC_OPERATIONS inline bool -atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) { #if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) return __sync_bool_compare_and_swap(p, old, new_); @@ -118,6 +118,12 @@ atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) return result; #endif // not GCC >= 4.1 } + +inline bool +atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +{ + return atomicCompareAndSwap32(p, old, new_); +} #endif // USE_ATOMIC_OPERATIONS inline uint64_t diff --git a/src/x86.h b/src/x86.h index f9f3c038a1..807fbcecbd 100644 --- a/src/x86.h +++ b/src/x86.h @@ -190,17 +190,13 @@ syncInstructionCache(const void*, unsigned) #ifdef USE_ATOMIC_OPERATIONS inline bool -atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) { #ifdef _MSC_VER -# ifdef ARCH_x86_32 InterlockedCompareExchange(p, new_, old); -# elif defined ARCH_x86_64 - InterlockedCompareExchange64(p, new_, old); -# endif // ARCH_x86_64 #elif (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) return __sync_bool_compare_and_swap(p, old, new_); -#elif defined ARCH_x86_32 +#else uint8_t result; __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1" @@ -209,7 +205,17 @@ atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) : "memory"); return result != 0; -#elif defined ARCH_x86_64 +#endif +} + +inline bool +atomicCompareAndSwap64(uint64_t* p, uint64_t old, uint64_t new_) +{ +#ifdef _MSC_VER + InterlockedCompareExchange64(p, new_, old); +#elif (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) + return __sync_bool_compare_and_swap(p, old, new_); +#else uint8_t result; __asm__ __volatile__("lock; cmpxchgq %2, %0; setz %1" @@ -218,6 +224,16 @@ atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) : "memory"); return result != 0; +#endif +} + +inline bool +atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +{ +#ifdef ARCH_x86_32 + return atomicCompareAndSwap32(p, old, new_); +#elif defined ARCH_x86_64 + return atomicCompareAndSwap64(p, old, new_); #endif // ARCH_x86_64 } #endif // USE_ATOMIC_OPERATIONS