diff --git a/src/compile.cpp b/src/compile.cpp index 29eb342760..93f5ca543d 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -235,7 +235,7 @@ methodForIp(MyThread* t, void* ip) // we must use a version of the method tree at least as recent as the // compiled form of the method containing the specified address (see // compile(MyThread*, Allocator*, BootContext*, object)): - memoryBarrier(); + loadMemoryBarrier(); return treeQuery(t, methodTree(t), reinterpret_cast(ip), methodTreeSentinal(t), compareIpToMethodBounds); @@ -785,6 +785,10 @@ class Context { { if (size == 8) { switch(op) { + case Absolute: + assert(t, resultSize == 8); + return local::getThunk(t, absoluteLongThunk); + case FloatNegate: assert(t, resultSize == 8); return local::getThunk(t, negateDoubleThunk); @@ -819,12 +823,16 @@ class Context { assert(t, size == 4); switch(op) { + case Absolute: + assert(t, resultSize == 4); + return local::getThunk(t, absoluteIntThunk); + case FloatNegate: - assert(t, size == 4); + assert(t, resultSize == 4); return local::getThunk(t, negateFloatThunk); case FloatAbsolute: - assert(t, size == 4); + assert(t, resultSize == 4); return local::getThunk(t, absoluteFloatThunk); case Float2Float: @@ -2160,6 +2168,18 @@ absoluteFloat(uint32_t a) return floatToBits(fabsf(bitsToFloat(a))); } +int64_t +absoluteLong(int64_t a) +{ + return a > 0 ? a : -a; +} + +int64_t +absoluteInt(int32_t a) +{ + return a > 0 ? a : -a; +} + int64_t divideLong(int64_t b, int64_t a) { @@ -5878,7 +5898,7 @@ resolveNative(MyThread* t, object method) // methodCompiled, since we don't want them using the slow calling // convention on a function that expects the fast calling // convention: - memoryBarrier(); + storeStoreMemoryBarrier(); methodCompiled(t, method) = reinterpret_cast(function); } @@ -7469,7 +7489,7 @@ findCallNode(MyThread* t, void* address) // we must use a version of the call table at least as recent as the // compiled form of the method containing the specified address (see // compile(MyThread*, Allocator*, BootContext*, object)): - memoryBarrier(); + loadMemoryBarrier(); MyProcessor* p = processor(t); object table = p->callTable; @@ -8229,7 +8249,7 @@ compile(MyThread* t, Allocator* allocator, BootContext* bootContext, reinterpret_cast(compiled), clone, methodTreeSentinal(t), compareIpToMethodBounds); - memoryBarrier(); + storeStoreMemoryBarrier(); methodCompiled(t, method) = reinterpret_cast(compiled); diff --git a/src/compiler.cpp b/src/compiler.cpp index a4e9540b1b..311d4a288c 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -1476,12 +1476,19 @@ pickTarget(Context* c, Read* read, bool intersectRead, Value* value = read->value; - uint32_t registerMask = (value->type == ValueGeneral - ? c->arch->generalRegisterMask() : ~0); + uint32_t registerMask + = (value->type == ValueFloat ? ~0 : c->arch->generalRegisterMask()); SiteMask mask(~0, registerMask, AnyFrameIndex); read->intersect(&mask); + if (value->type == ValueFloat) { + uint32_t floatMask = mask.registerMask & c->arch->floatRegisterMask(); + if (floatMask) { + mask.registerMask = floatMask; + } + } + Target best; Value* successor = read->successor(); @@ -1792,8 +1799,21 @@ class RegisterSite: public Site { } virtual bool matchNextWord(Context* c, Site* s, unsigned) { - return s->type(c) == RegisterOperand - and s->registerSize(c) == BytesPerWord; + assert(c, number != NoRegister); + + if (s->type(c) != RegisterOperand) { + return false; + } + + RegisterSite* rs = static_cast(s); + unsigned size = rs->registerSize(c); + if (size > BytesPerWord) { + assert(c, number != NoRegister); + return number == rs->number; + } else { + uint32_t mask = c->arch->generalRegisterMask(); + return ((1 << number) & mask) and ((1 << rs->number) & mask); + } } virtual void acquire(Context* c, Value* v) { @@ -1876,6 +1896,9 @@ class RegisterSite: public Site { } virtual Site* makeNextWord(Context* c, unsigned) { + assert(c, number != NoRegister); + assert(c, ((1 << number) & c->arch->generalRegisterMask())); + return freeRegisterSite(c, c->arch->generalRegisterMask()); } @@ -1883,8 +1906,16 @@ class RegisterSite: public Site { return SiteMask(1 << RegisterOperand, mask_, NoFrameIndex); } - virtual SiteMask nextWordMask(Context*, unsigned) { - return SiteMask(1 << RegisterOperand, ~0, NoFrameIndex); + virtual SiteMask nextWordMask(Context* c, unsigned) { + assert(c, number != NoRegister); + + if (registerSize(c) > BytesPerWord) { + return SiteMask + (1 << RegisterOperand, number, NoFrameIndex); + } else { + return SiteMask + (1 << RegisterOperand, c->arch->generalRegisterMask(), NoFrameIndex); + } } virtual unsigned registerSize(Context* c) { @@ -1965,12 +1996,14 @@ class MemorySite: public Site { assert(c, acquired); if (mask.typeMask & (1 << MemoryOperand)) { - if (base == c->arch->stack()) { - assert(c, index == NoRegister); - return mask.frameIndex == AnyFrameIndex - or (mask.frameIndex != NoFrameIndex - and static_cast(frameIndexToOffset(c, mask.frameIndex)) - == offset); + if (mask.frameIndex >= 0) { + if (base == c->arch->stack()) { + assert(c, index == NoRegister); + return static_cast(frameIndexToOffset(c, mask.frameIndex)) + == offset; + } else { + return false; + } } else { return true; } @@ -2287,13 +2320,17 @@ acceptMatch(Context* c, Site* s, Read*, const SiteMask& mask) Site* pickSourceSite(Context* c, Read* read, Site* target = 0, - unsigned* cost = 0, uint8_t typeMask = ~0, + unsigned* cost = 0, SiteMask* extraMask = 0, bool intersectRead = true, bool includeBuddies = true, bool includeNextWord = true, bool (*accept)(Context*, Site*, Read*, const SiteMask&) = acceptMatch) { - SiteMask mask(typeMask, ~0, AnyFrameIndex); + SiteMask mask; + + if (extraMask) { + mask = intersect(mask, *extraMask); + } if (intersectRead) { read->intersect(&mask); @@ -2444,7 +2481,7 @@ pickSiteOrMove(Context* c, Read* read, bool intersectRead, bool includeNextWord, unsigned registerReserveCount = 0) { Site* s = pickSourceSite - (c, read, 0, 0, ~0, intersectRead, true, includeNextWord); + (c, read, 0, 0, 0, intersectRead, true, includeNextWord); if (s) { return s; @@ -3533,22 +3570,51 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, } } -void -pickSiteOrMove(Context* c, Value* src, Value* dst) +Site* +pickMatchOrMove(Context* c, Read* r, Site* nextWord, unsigned index, + bool intersectRead) +{ + Site* s = pickSite(c, r->value, nextWord, index, true); + SiteMask mask; + if (intersectRead) { + r->intersect(&mask); + } + if (s and s->match(c, mask)) { + return s; + } + + return pickSiteOrMove + (c, r->value, intersect(mask, nextWord->nextWordMask(c, index)), + true, true); +} + +Site* +pickSiteOrMove(Context* c, Value* src, Value* dst, Site* nextWord, + unsigned index) { if (live(dst)) { Read* read = live(src); - Site* s = pickSourceSite(c, read, 0, 0, ~0, false, true, true); + Site* s; + if (nextWord) { + s = pickMatchOrMove(c, read, nextWord, index, false); + } else { + s = pickSourceSite(c, read, 0, 0, 0, false, true, true); - if (s == 0 or s->isVolatile(c)) { - maybeMove(c, read, false, true); + if (s == 0 or s->isVolatile(c)) { + s = maybeMove(c, read, false, true); + } } + assert(c, s); addBuddy(src, dst); if (src->source->isVolatile(c)) { removeSite(c, src, src->source); } + + return s; + } else { + return 0; } } @@ -3645,6 +3711,13 @@ class MoveEvent: public Event { { apply(c, Move, srcSelectSize, src->source, src->source, dstSize, dst->target, dst->target); + + if (live(dst) == 0) { + removeSite(c, dst, dst->target); + if (dstSize > BytesPerWord) { + removeSite(c, dst->nextWord, dst->nextWord->target); + } + } } else { maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, dstLowMask); @@ -3654,9 +3727,9 @@ class MoveEvent: public Event { } } } else { - pickSiteOrMove(c, src, dst); + Site* low = pickSiteOrMove(c, src, dst, 0, 0); if (dstSize > BytesPerWord) { - pickSiteOrMove(c, src->nextWord, dst->nextWord); + pickSiteOrMove(c, src->nextWord, dst->nextWord, low, 1); } } } else if (srcSelectSize <= BytesPerWord and dstSize <= BytesPerWord) { @@ -3714,7 +3787,7 @@ class MoveEvent: public Event { low->thaw(c, dst); } else { - pickSiteOrMove(c, src, dst); + pickSiteOrMove(c, src, dst, 0, 0); } } @@ -5065,16 +5138,7 @@ readSource(Context* c, Read* r) Value* high = r->high(c); if (high) { - Site* s = pickSite(c, r->value, high->source, 0, true); - SiteMask mask; - r->intersect(&mask); - if (s and s->match(c, mask)) { - return s; - } else { - return pickSiteOrMove - (c, r->value, intersect(mask, high->source->nextWordMask(c, 0)), - true, true); - } + return pickMatchOrMove(c, r, high->source, 0, true); } else { return pickSiteOrMove(c, r, true, true); } @@ -5210,13 +5274,14 @@ resolveSourceSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) Read* r = live(v); if (r and sites[el.localIndex] == 0) { - const uint32_t mask = (1 << RegisterOperand) | (1 << MemoryOperand); + SiteMask mask((1 << RegisterOperand) | (1 << MemoryOperand), + c->arch->generalRegisterMask(), AnyFrameIndex); Site* s = pickSourceSite - (c, r, 0, 0, mask, true, false, true, acceptForResolve); + (c, r, 0, 0, &mask, true, false, true, acceptForResolve); if (s == 0) { s = pickSourceSite - (c, r, 0, 0, mask, false, false, true, acceptForResolve); + (c, r, 0, 0, &mask, false, false, true, acceptForResolve); } if (s) { @@ -5247,15 +5312,16 @@ resolveTargetSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) Read* r = live(v); if (r and sites[el.localIndex] == 0) { - const uint32_t mask = (1 << RegisterOperand) | (1 << MemoryOperand); + SiteMask mask((1 << RegisterOperand) | (1 << MemoryOperand), + c->arch->generalRegisterMask(), AnyFrameIndex); Site* s = pickSourceSite - (c, r, 0, 0, mask, true, true, true, acceptForResolve); + (c, r, 0, 0, &mask, true, true, true, acceptForResolve); if (s == 0) { s = pickSourceSite - (c, r, 0, 0, mask, false, true, true, acceptForResolve); + (c, r, 0, 0, &mask, false, true, true, acceptForResolve); if (s == 0) { - s = maybeMove(c, r, false, true, ResolveRegisterReserveCount); + s = maybeMove(c, v, mask, false, true, ResolveRegisterReserveCount); } } diff --git a/src/machine.cpp b/src/machine.cpp index 9abe61301d..4fa9aaf9b5 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -2334,11 +2334,11 @@ enter(Thread* t, Thread::State s) #ifdef USE_ATOMIC_OPERATIONS # define INCREMENT atomicIncrement # define ACQUIRE_LOCK ACQUIRE_RAW(t, t->m->stateLock) -# define BARRIER memoryBarrier() +# define STORE_LOAD_MEMORY_BARRIER storeLoadMemoryBarrier() #else # define INCREMENT(pointer, value) *(pointer) += value; # define ACQUIRE_LOCK -# define BARRIER +# define STORE_LOAD_MEMORY_BARRIER ACQUIRE_RAW(t, t->m->stateLock); #endif // not USE_ATOMIC_OPERATIONS @@ -2366,7 +2366,7 @@ enter(Thread* t, Thread::State s) t->state = Thread::ExclusiveState; t->m->exclusive = t; - BARRIER; + STORE_LOAD_MEMORY_BARRIER; while (t->m->activeCount > 1) { t->m->stateLock->wait(t->systemThread, 0); diff --git a/src/powerpc.cpp b/src/powerpc.cpp index 63ee6da3b4..9cec9a1cdd 100644 --- a/src/powerpc.cpp +++ b/src/powerpc.cpp @@ -2065,6 +2065,9 @@ class MyArchitecture: public Assembler::Architecture { *aTypeMask = (1 << RegisterOperand); break; + case Absolute: + case FloatAbsolute: + case FloatSquareRoot: case FloatNegate: case Float2Float: case Float2Int: @@ -2096,9 +2099,9 @@ class MyArchitecture: public Assembler::Architecture { } virtual void planMove - (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, + (unsigned, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, - uint8_t dstTypeMask, uint64_t dstRegisterMask) + uint8_t dstTypeMask, uint64_t) { *srcTypeMask = ~0; *srcRegisterMask = ~static_cast(0); diff --git a/src/powerpc.h b/src/powerpc.h index 1ef1437e0f..9a55c13266 100644 --- a/src/powerpc.h +++ b/src/powerpc.h @@ -122,7 +122,7 @@ atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) inline bool atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) { - return atomicCompareAndSwap32(p, old, new_); + return atomicCompareAndSwap32(reinterpret_cast(p), old, new_); } #endif // USE_ATOMIC_OPERATIONS diff --git a/src/thunks.cpp b/src/thunks.cpp index bab06da664..f55c0018a9 100644 --- a/src/thunks.cpp +++ b/src/thunks.cpp @@ -22,6 +22,8 @@ THUNK(divideFloat) THUNK(moduloFloat) THUNK(negateFloat) THUNK(absoluteFloat) +THUNK(absoluteLong) +THUNK(absoluteInt) THUNK(divideLong) THUNK(divideInt) THUNK(moduloLong) diff --git a/src/x86.cpp b/src/x86.cpp index 7e565dc179..b8290389e4 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -113,23 +113,7 @@ class MyBlock: public Assembler::Block { unsigned size; }; -class Context { - public: - Context(System* s, Allocator* a, Zone* zone): - s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0), - firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(0)), - lastBlock(firstBlock) - { } - - System* s; - Zone* zone; - Assembler::Client* client; - Vector code; - Task* tasks; - uint8_t* result; - MyBlock* firstBlock; - MyBlock* lastBlock; -}; +class Context; typedef void (*OperationType)(Context*); @@ -163,6 +147,25 @@ class ArchitectureContext { * OperandTypeCount]; }; +class Context { + public: + Context(System* s, Allocator* a, Zone* zone, ArchitectureContext* ac): + s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0), + firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(0)), + lastBlock(firstBlock), ac(ac) + { } + + System* s; + Zone* zone; + Assembler::Client* client; + Vector code; + Task* tasks; + uint8_t* result; + MyBlock* firstBlock; + MyBlock* lastBlock; + ArchitectureContext* ac; +}; + void NO_RETURN abort(Context* c) { @@ -620,6 +623,27 @@ void ignore(Context*) { } +void +storeLoadBarrier(Context* c) +{ + if (useSSE(c->ac)) { + // mfence: + c->code.append(0x0f); + c->code.append(0xae); + c->code.append(0xf0); + } else { + // lock addq $0x0,(%rsp): + c->code.append(0xf0); + if (BytesPerWord == 8) { + c->code.append(0x48); + } + c->code.append(0x83); + c->code.append(0x04); + c->code.append(0x24); + c->code.append(0x00); + } +} + void unconditional(Context* c, unsigned jump, Assembler::Constant* a) { @@ -954,12 +978,12 @@ sseMoveRR(Context* c, unsigned aSize, Assembler::Register* a, opcode(c, 0xf3); maybeRex(c, 4, a, b); opcode(c, 0x0f, 0x10); - modrm(c, 0xc0, b, a); + modrm(c, 0xc0, a, b); } else { opcode(c, 0xf2); - maybeRex(c, 4, a, b); + maybeRex(c, 8, a, b); opcode(c, 0x0f, 0x10); - modrm(c, 0xc0, b, a); + modrm(c, 0xc0, a, b); } } else if (floatReg(a)) { opcode(c, 0x66); @@ -1094,7 +1118,6 @@ sseMoveMR(Context* c, unsigned aSize, Assembler::Memory* a, unsigned bSize UNUSED, Assembler::Register* b) { assert(c, aSize >= 4); - assert(c, aSize == bSize); if (BytesPerWord == 4 and aSize == 8) { opcode(c, 0xf3); @@ -2503,7 +2526,7 @@ populateTables(ArchitectureContext* c) zo[Return] = return_; zo[LoadBarrier] = ignore; zo[StoreStoreBarrier] = ignore; - zo[StoreLoadBarrier] = ignore; + zo[StoreLoadBarrier] = storeLoadBarrier; uo[index(c, Call, C)] = CAST1(callC); uo[index(c, Call, R)] = CAST1(callR); @@ -2896,9 +2919,13 @@ class MyArchitecture: public Assembler::Architecture { break; case FloatAbsolute: - *aTypeMask = (1 << RegisterOperand); - *aRegisterMask = (static_cast(FloatRegisterMask) << 32) - | FloatRegisterMask; + if (useSSE(&c)) { + *aTypeMask = (1 << RegisterOperand); + *aRegisterMask = (static_cast(FloatRegisterMask) << 32) + | FloatRegisterMask; + } else { + *thunk = true; + } break; case FloatNegate: @@ -2912,9 +2939,13 @@ class MyArchitecture: public Assembler::Architecture { break; case FloatSquareRoot: - *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); - *aRegisterMask = (static_cast(FloatRegisterMask) << 32) - | FloatRegisterMask; + if (useSSE(&c)) { + *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *aRegisterMask = (static_cast(FloatRegisterMask) << 32) + | FloatRegisterMask; + } else { + *thunk = true; + } break; case Float2Float: @@ -3225,7 +3256,7 @@ class MyArchitecture: public Assembler::Architecture { class MyAssembler: public Assembler { public: MyAssembler(System* s, Allocator* a, Zone* zone, MyArchitecture* arch): - c(s, a, zone), arch_(arch) + c(s, a, zone, &(arch->c)), arch_(arch) { } virtual void setClient(Client* client) { diff --git a/src/x86.h b/src/x86.h index 0528d33695..4ae08f6859 100644 --- a/src/x86.h +++ b/src/x86.h @@ -169,7 +169,7 @@ memoryBarrier() inline void storeStoreMemoryBarrier() { - memoryBarrier(); + __asm__ __volatile__("": : :"memory"); } inline void @@ -181,13 +181,13 @@ storeLoadMemoryBarrier() inline void loadMemoryBarrier() { - memoryBarrier(); + __asm__ __volatile__("": : :"memory"); } inline void syncInstructionCache(const void*, unsigned) { - // ignore + __asm__ __volatile__("": : :"memory"); } #ifdef USE_ATOMIC_OPERATIONS @@ -233,9 +233,9 @@ inline bool atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) { #ifdef ARCH_x86_32 - return atomicCompareAndSwap32(p, old, new_); + return atomicCompareAndSwap32(reinterpret_cast(p), old, new_); #elif defined ARCH_x86_64 - return atomicCompareAndSwap64(p, old, new_); + return atomicCompareAndSwap64(reinterpret_cast(p), old, new_); #endif // ARCH_x86_64 } #endif // USE_ATOMIC_OPERATIONS