diff --git a/src/compile.cpp b/src/compile.cpp index 29eb342760..4888778e7b 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -235,7 +235,7 @@ methodForIp(MyThread* t, void* ip) // we must use a version of the method tree at least as recent as the // compiled form of the method containing the specified address (see // compile(MyThread*, Allocator*, BootContext*, object)): - memoryBarrier(); + loadMemoryBarrier(); return treeQuery(t, methodTree(t), reinterpret_cast(ip), methodTreeSentinal(t), compareIpToMethodBounds); @@ -5878,7 +5878,7 @@ resolveNative(MyThread* t, object method) // methodCompiled, since we don't want them using the slow calling // convention on a function that expects the fast calling // convention: - memoryBarrier(); + storeStoreMemoryBarrier(); methodCompiled(t, method) = reinterpret_cast(function); } @@ -7469,7 +7469,7 @@ findCallNode(MyThread* t, void* address) // we must use a version of the call table at least as recent as the // compiled form of the method containing the specified address (see // compile(MyThread*, Allocator*, BootContext*, object)): - memoryBarrier(); + loadMemoryBarrier(); MyProcessor* p = processor(t); object table = p->callTable; @@ -8229,7 +8229,7 @@ compile(MyThread* t, Allocator* allocator, BootContext* bootContext, reinterpret_cast(compiled), clone, methodTreeSentinal(t), compareIpToMethodBounds); - memoryBarrier(); + storeStoreMemoryBarrier(); methodCompiled(t, method) = reinterpret_cast(compiled); diff --git a/src/machine.cpp b/src/machine.cpp index 9abe61301d..4fa9aaf9b5 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -2334,11 +2334,11 @@ enter(Thread* t, Thread::State s) #ifdef USE_ATOMIC_OPERATIONS # define INCREMENT atomicIncrement # define ACQUIRE_LOCK ACQUIRE_RAW(t, t->m->stateLock) -# define BARRIER memoryBarrier() +# define STORE_LOAD_MEMORY_BARRIER storeLoadMemoryBarrier() #else # define INCREMENT(pointer, value) *(pointer) += value; # define ACQUIRE_LOCK -# define BARRIER +# define STORE_LOAD_MEMORY_BARRIER ACQUIRE_RAW(t, t->m->stateLock); #endif // not USE_ATOMIC_OPERATIONS @@ -2366,7 +2366,7 @@ enter(Thread* t, Thread::State s) t->state = Thread::ExclusiveState; t->m->exclusive = t; - BARRIER; + STORE_LOAD_MEMORY_BARRIER; while (t->m->activeCount > 1) { t->m->stateLock->wait(t->systemThread, 0); diff --git a/src/x86.cpp b/src/x86.cpp index 7e565dc179..530333c1d6 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -113,23 +113,7 @@ class MyBlock: public Assembler::Block { unsigned size; }; -class Context { - public: - Context(System* s, Allocator* a, Zone* zone): - s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0), - firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(0)), - lastBlock(firstBlock) - { } - - System* s; - Zone* zone; - Assembler::Client* client; - Vector code; - Task* tasks; - uint8_t* result; - MyBlock* firstBlock; - MyBlock* lastBlock; -}; +class Context; typedef void (*OperationType)(Context*); @@ -163,6 +147,25 @@ class ArchitectureContext { * OperandTypeCount]; }; +class Context { + public: + Context(System* s, Allocator* a, Zone* zone, ArchitectureContext* ac): + s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0), + firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(0)), + lastBlock(firstBlock), ac(ac) + { } + + System* s; + Zone* zone; + Assembler::Client* client; + Vector code; + Task* tasks; + uint8_t* result; + MyBlock* firstBlock; + MyBlock* lastBlock; + ArchitectureContext* ac; +}; + void NO_RETURN abort(Context* c) { @@ -620,6 +623,27 @@ void ignore(Context*) { } +void +storeLoadBarrier(Context* c) +{ + if (useSSE(c->ac)) { + // mfence: + c->code.append(0x0f); + c->code.append(0xae); + c->code.append(0xf0); + } else { + // lock addq $0x0,(%rsp): + c->code.append(0xf0); + if (BytesPerWord == 8) { + c->code.append(0x48); + } + c->code.append(0x83); + c->code.append(0x04); + c->code.append(0x24); + c->code.append(0x00); + } +} + void unconditional(Context* c, unsigned jump, Assembler::Constant* a) { @@ -2503,7 +2527,7 @@ populateTables(ArchitectureContext* c) zo[Return] = return_; zo[LoadBarrier] = ignore; zo[StoreStoreBarrier] = ignore; - zo[StoreLoadBarrier] = ignore; + zo[StoreLoadBarrier] = storeLoadBarrier; uo[index(c, Call, C)] = CAST1(callC); uo[index(c, Call, R)] = CAST1(callR); @@ -3225,7 +3249,7 @@ class MyArchitecture: public Assembler::Architecture { class MyAssembler: public Assembler { public: MyAssembler(System* s, Allocator* a, Zone* zone, MyArchitecture* arch): - c(s, a, zone), arch_(arch) + c(s, a, zone, &(arch->c)), arch_(arch) { } virtual void setClient(Client* client) { diff --git a/src/x86.h b/src/x86.h index 0528d33695..2ca377c86c 100644 --- a/src/x86.h +++ b/src/x86.h @@ -169,7 +169,7 @@ memoryBarrier() inline void storeStoreMemoryBarrier() { - memoryBarrier(); + __asm__ __volatile__("": : :"memory"); } inline void @@ -181,13 +181,13 @@ storeLoadMemoryBarrier() inline void loadMemoryBarrier() { - memoryBarrier(); + __asm__ __volatile__("": : :"memory"); } inline void syncInstructionCache(const void*, unsigned) { - // ignore + __asm__ __volatile__("": : :"memory"); } #ifdef USE_ATOMIC_OPERATIONS