From bd72745ff9364729b1013916b5bfe2fd506240c6 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Fri, 27 Nov 2009 21:01:27 -0700 Subject: [PATCH 01/19] fix off-by-one error in intrinsic() --- src/compile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compile.cpp b/src/compile.cpp index aeb5e3722b..f224d9f5e1 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -2934,7 +2934,7 @@ bool intrinsic(MyThread* t, Frame* frame, object target) { #define MATCH(name, constant) \ - (byteArrayLength(t, name) - 1 == sizeof(constant) \ + (byteArrayLength(t, name) == sizeof(constant) \ and strcmp(reinterpret_cast(&byteArrayBody(t, name, 0)), \ constant) == 0) From 5ead8fab1714e17c8511abc429cfc14af9bce6a1 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Fri, 27 Nov 2009 21:15:12 -0700 Subject: [PATCH 02/19] refactor code responsible for moving data in the compiler This is partially to address incorrect code generation for 64-bit floating-point values on x86_32 and partially to reduce unnecessary moves. --- src/assembler.h | 7 +- src/compiler.cpp | 622 +++++++++++++++++++++++--------------------- src/jnienv.cpp | 5 +- src/powerpc.cpp | 23 +- src/x86.cpp | 60 +++-- test/AllFloats.java | 3 + test/Floats.java | 31 +++ 7 files changed, 414 insertions(+), 337 deletions(-) diff --git a/src/assembler.h b/src/assembler.h index 248d8272a8..918c548acb 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -361,10 +361,9 @@ class Assembler { unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask) = 0; virtual void planMove - (unsigned size, - uint8_t srcTypeMask, uint64_t srcRegisterMask, - uint8_t dstTypeMask, uint64_t dstRegisterMask, - uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask) = 0; + (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, + uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, + uint8_t dstTypeMask, uint64_t dstRegisterMask) = 0; virtual void planSource (TernaryOperation op, diff --git a/src/compiler.cpp b/src/compiler.cpp index 88402a5e77..e3a90faf32 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -133,12 +133,16 @@ class Site { virtual Site* makeNextWord(Context*, unsigned) = 0; + virtual SiteMask mask(Context*) = 0; + virtual SiteMask nextWordMask(Context*, unsigned) = 0; virtual unsigned registerSize(Context*) { return BytesPerWord; } virtual unsigned registerMask(Context*) { return 0; } + virtual bool isVolatile(Context*) { return false; } + Site* next; }; @@ -1091,6 +1095,23 @@ buddies(Value* a, Value* b) return false; } +void +addBuddy(Value* original, Value* buddy) +{ + buddy->buddy = original; + Value* p = original; + while (p->buddy != original) p = p->buddy; + p->buddy = buddy; + + if (DebugBuddies) { + fprintf(stderr, "add buddy %p to", buddy); + for (Value* p = buddy->buddy; p != buddy; p = p->buddy) { + fprintf(stderr, " %p", p); + } + fprintf(stderr, "\n"); + } +} + void decrementAvailableGeneralRegisterCount(Context* c) { @@ -1582,6 +1603,10 @@ class ConstantSite: public Site { abort(c); } + virtual SiteMask mask(Context*) { + return SiteMask(1 << ConstantOperand, 0, NoFrameIndex); + } + virtual SiteMask nextWordMask(Context*, unsigned) { return SiteMask(1 << ConstantOperand, 0, NoFrameIndex); } @@ -1668,6 +1693,10 @@ class AddressSite: public Site { abort(c); } + virtual SiteMask mask(Context*) { + return SiteMask(1 << AddressOperand, 0, NoFrameIndex); + } + virtual SiteMask nextWordMask(Context* c, unsigned) { abort(c); } @@ -1687,7 +1716,7 @@ freeRegisterSite(Context* c, uint32_t mask); class RegisterSite: public Site { public: RegisterSite(uint32_t mask, int number): - mask(mask), number(number) + mask_(mask), number(number) { } virtual unsigned toString(Context*, char* buffer, unsigned bufferSize) { @@ -1695,7 +1724,7 @@ class RegisterSite: public Site { return vm::snprintf(buffer, bufferSize, "%p register %d", this, number); } else { return vm::snprintf(buffer, bufferSize, - "%p register unacquired (mask %d)", this, mask); + "%p register unacquired (mask %d)", this, mask_); } } @@ -1705,7 +1734,7 @@ class RegisterSite: public Site { if (s and (this == s or (s->type(c) == RegisterOperand - and (static_cast(s)->mask & (1 << number))))) + and (static_cast(s)->mask_ & (1 << number))))) { return 0; } else { @@ -1743,7 +1772,7 @@ class RegisterSite: public Site { if (number != NoRegister) { target = Target(number, RegisterOperand, 0); } else { - target = pickRegisterTarget(c, v, mask); + target = pickRegisterTarget(c, v, mask_); expect(c, target.cost < Target::Impossible); } @@ -1803,7 +1832,7 @@ class RegisterSite: public Site { if (number != NoRegister) { mask = 1 << number; } else { - mask = this->mask; + mask = mask_; } return freeRegisterSite(c, mask); @@ -1821,6 +1850,10 @@ class RegisterSite: public Site { return freeRegisterSite(c, c->arch->generalRegisterMask()); } + virtual SiteMask mask(Context* c UNUSED) { + return SiteMask(1 << RegisterOperand, mask_, NoFrameIndex); + } + virtual SiteMask nextWordMask(Context*, unsigned) { return SiteMask(1 << RegisterOperand, ~0, NoFrameIndex); } @@ -1841,7 +1874,7 @@ class RegisterSite: public Site { return 1 << number; } - uint32_t mask; + uint32_t mask_; int number; }; @@ -2059,6 +2092,11 @@ class MemorySite: public Site { this->index, scale); } + virtual SiteMask mask(Context* c) { + return SiteMask(1 << MemoryOperand, 0, (base == c->arch->stack()) + ? offsetToFrameIndex(c, offset) : NoFrameIndex); + } + virtual SiteMask nextWordMask(Context* c, unsigned index) { // todo: endianness? int frameIndex; @@ -2072,6 +2110,10 @@ class MemorySite: public Site { return SiteMask(1 << MemoryOperand, 0, frameIndex); } + virtual bool isVolatile(Context* c) { + return base != c->arch->stack(); + } + bool acquired; int base; int offset; @@ -2155,74 +2197,6 @@ pickTargetSite(Context* c, Read* read, bool intersectRead = false, } } -void -steal(Context* c, Resource* r, Value* thief) -{ - if (DebugResources) { - char resourceBuffer[256]; r->toString(c, resourceBuffer, 256); - char siteBuffer[1024]; sitesToString(c, r->value, siteBuffer, 1024); - fprintf(stderr, "%p steal %s from %p (%s)\n", - thief, resourceBuffer, r->value, siteBuffer); - } - - if ((not (thief and buddies(thief, r->value)) - and uniqueSite(c, r->value, r->site))) - { - r->site->freeze(c, r->value); - - move(c, r->value, r->site, pickTargetSite - (c, live(r->value), false, StealRegisterReserveCount)); - - r->site->thaw(c, r->value); - } - - removeSite(c, r->value, r->site); -} - -void -acquire(Context* c, Resource* resource, Value* value, Site* site) -{ - assert(c, value); - assert(c, site); - - if (not resource->reserved) { - if (DebugResources) { - char buffer[256]; resource->toString(c, buffer, 256); - fprintf(stderr, "%p acquire %s\n", value, buffer); - } - - if (resource->value) { - assert(c, findSite(c, resource->value, resource->site)); - assert(c, not findSite(c, value, resource->site)); - - steal(c, resource, value); - } - - resource->value = value; - resource->site = site; - } -} - -void -release(Context* c, Resource* resource, Value* value UNUSED, Site* site UNUSED) -{ - if (not resource->reserved) { - if (DebugResources) { - char buffer[256]; resource->toString(c, buffer, 256); - fprintf(stderr, "%p release %s\n", resource->value, buffer); - } - - assert(c, resource->value); - assert(c, resource->site); - - assert(c, buddies(resource->value, value)); - assert(c, site == resource->site); - - resource->value = 0; - resource->site = 0; - } -} - class SingleRead: public Read { public: SingleRead(const SiteMask& mask, Value* successor): @@ -2274,6 +2248,212 @@ read(Context* c, const SiteMask& mask, Value* successor = 0) SingleRead(mask, successor); } +bool +acceptMatch(Context* c, Site* s, Read*, const SiteMask& mask) +{ + return s->match(c, mask); +} + +Site* +pickSourceSite(Context* c, Read* read, Site* target = 0, + unsigned* cost = 0, uint8_t typeMask = ~0, + bool intersectRead = true, bool includeBuddies = true, + bool includeNextWord = true, + bool (*accept)(Context*, Site*, Read*, const SiteMask&) + = acceptMatch) +{ + SiteMask mask(typeMask, ~0, AnyFrameIndex); + + if (intersectRead) { + read->intersect(&mask); + } + + Site* site = 0; + unsigned copyCost = 0xFFFFFFFF; + for (SiteIterator it(c, read->value, includeBuddies, includeNextWord); + it.hasMore();) + { + Site* s = it.next(); + if (accept(c, s, read, mask)) { + unsigned v = s->copyCost(c, target); + if (v < copyCost) { + site = s; + copyCost = v; + } + } + } + + if (DebugMoves and site and target) { + char srcb[256]; site->toString(c, srcb, 256); + char dstb[256]; target->toString(c, dstb, 256); + fprintf(stderr, "pick source %s to %s for %p cost %d\n", + srcb, dstb, read->value, copyCost); + } + + if (cost) *cost = copyCost; + return site; +} + +Site* +maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord, + unsigned registerReserveCount = 0) +{ + Site* dst = pickTargetSite(c, read, intersectRead, registerReserveCount); + + Value* value = read->value; + unsigned size = value == value->nextWord ? BytesPerWord : 8; + + uint8_t srcTypeMask; + uint64_t srcRegisterMask; + uint8_t tmpTypeMask; + uint64_t tmpRegisterMask; + c->arch->planMove + (size, &srcTypeMask, &srcRegisterMask, + &tmpTypeMask, &tmpRegisterMask, + 1 << dst->type(c), dst->registerMask(c)); + + SiteMask srcMask(srcTypeMask, srcRegisterMask, AnyFrameIndex); + SingleRead srcRead(srcMask, 0); + srcRead.value = value; + + unsigned cost; + Site* src = pickSourceSite + (c, &srcRead, dst, &cost, ~0, true, true, includeNextWord); + + if (src == 0 or cost) { + unsigned cost2; + Site* src2 = pickSourceSite + (c, &srcRead, dst, &cost2, ~0, false, true, includeNextWord); + + if (src == 0 or cost2 == 0) { + src = src2; + cost = cost2; + } + } + + if (cost) { + if (not src->match(c, srcMask)) { + src->freeze(c, value); + dst->freeze(c, value); + + SiteMask tmpMask(tmpTypeMask, tmpRegisterMask, AnyFrameIndex); + SingleRead tmpRead(tmpMask, 0); + tmpRead.value = value; + + Site* tmp = pickTargetSite(c, &tmpRead, true); + + move(c, value, src, tmp); + + dst->thaw(c, value); + src->thaw(c, value); + + src = tmp; + } + + move(c, value, src, dst); + } + + return dst; +} + +Site* +pickSiteOrMove(Context* c, Read* read, bool intersectRead, + bool includeNextWord, unsigned registerReserveCount = 0) +{ + Site* s = pickSourceSite + (c, read, 0, 0, ~0, intersectRead, true, includeNextWord); + + if (s) { + return s; + } else { + return maybeMove + (c, read, intersectRead, includeNextWord, registerReserveCount); + } +} + +Site* +pickSiteOrMove(Context* c, Value* v, const SiteMask& mask, bool intersectMask, + bool includeNextWord, unsigned registerReserveCount = 0) +{ + SingleRead read(mask, 0); + read.value = v; + return pickSiteOrMove + (c, &read, intersectMask, includeNextWord, registerReserveCount); +} + +Site* +pickSiteOrMove(Context* c, Value* v, Site* s, unsigned index) +{ + return pickSiteOrMove(c, v, s->nextWordMask(c, index), true, false); +} + +void +steal(Context* c, Resource* r, Value* thief) +{ + if (DebugResources) { + char resourceBuffer[256]; r->toString(c, resourceBuffer, 256); + char siteBuffer[1024]; sitesToString(c, r->value, siteBuffer, 1024); + fprintf(stderr, "%p steal %s from %p (%s)\n", + thief, resourceBuffer, r->value, siteBuffer); + } + + if ((not (thief and buddies(thief, r->value)) + and uniqueSite(c, r->value, r->site))) + { + r->site->freeze(c, r->value); + + maybeMove(c, live(r->value), false, true, StealRegisterReserveCount); + + r->site->thaw(c, r->value); + } + + removeSite(c, r->value, r->site); +} + +void +acquire(Context* c, Resource* resource, Value* value, Site* site) +{ + assert(c, value); + assert(c, site); + + if (not resource->reserved) { + if (DebugResources) { + char buffer[256]; resource->toString(c, buffer, 256); + fprintf(stderr, "%p acquire %s\n", value, buffer); + } + + if (resource->value) { + assert(c, findSite(c, resource->value, resource->site)); + assert(c, not findSite(c, value, resource->site)); + + steal(c, resource, value); + } + + resource->value = value; + resource->site = site; + } +} + +void +release(Context* c, Resource* resource, Value* value UNUSED, Site* site UNUSED) +{ + if (not resource->reserved) { + if (DebugResources) { + char buffer[256]; resource->toString(c, buffer, 256); + fprintf(stderr, "%p release %s\n", resource->value, buffer); + } + + assert(c, resource->value); + assert(c, resource->site); + + assert(c, buddies(resource->value, value)); + assert(c, site == resource->site); + + resource->value = 0; + resource->site = 0; + } +} + SiteMask generalRegisterMask(Context* c) { @@ -2463,47 +2643,21 @@ pickSite(Context* c, Value* v, Site* s, unsigned index) } Site* -pickOrMoveSite(Context* c, Value* v, Site* s, unsigned index) -{ - Site* n = pickSite(c, v, s, index); - if (n) { - return n; - } - - n = s->makeNextWord(c, index); - - Site* src = 0; - unsigned copyCost = 0xFFFFFFFF; - for (SiteIterator it(c, v, true, false); it.hasMore();) { - Site* candidate = it.next(); - unsigned cost = candidate->copyCost(c, n); - if (cost < copyCost) { - src = candidate; - copyCost = cost; - } - } - - move(c, v, src, n); - - return n; -} - -Site* -pickOrMoveSite(Context* c, Value* v, Site* s, Site** low, Site** high) +pickSiteOrMove(Context* c, Value* v, Site* s, Site** low, Site** high) { if (v->wordIndex == 0) { *low = s; - *high = pickOrMoveSite(c, v->nextWord, s, 1); + *high = pickSiteOrMove(c, v->nextWord, s, 1); return *high; } else { - *low = pickOrMoveSite(c, v->nextWord, s, 0); + *low = pickSiteOrMove(c, v->nextWord, s, 0); *high = s; return *low; } } Site* -pickOrGrowSite(Context* c, Value* v, Site* s, unsigned index) +pickSiteOrGrow(Context* c, Value* v, Site* s, unsigned index) { Site* n = pickSite(c, v, s, index); if (n) { @@ -2516,25 +2670,19 @@ pickOrGrowSite(Context* c, Value* v, Site* s, unsigned index) } Site* -pickOrGrowSite(Context* c, Value* v, Site* s, Site** low, Site** high) +pickSiteOrGrow(Context* c, Value* v, Site* s, Site** low, Site** high) { if (v->wordIndex == 0) { *low = s; - *high = pickOrGrowSite(c, v->nextWord, s, 1); + *high = pickSiteOrGrow(c, v->nextWord, s, 1); return *high; } else { - *low = pickOrGrowSite(c, v->nextWord, s, 0); + *low = pickSiteOrGrow(c, v->nextWord, s, 0); *high = s; return *low; } } -bool -acceptMatch(Context* c, Site* s, Read*, const SiteMask& mask) -{ - return s->match(c, mask); -} - bool isHome(Value* v, int frameIndex) { @@ -2566,97 +2714,20 @@ acceptForResolve(Context* c, Site* s, Read* read, const SiteMask& mask) } } -Site* -pickSourceSite(Context* c, Read* read, Site* target = 0, - unsigned* cost = 0, uint8_t typeMask = ~0, - bool intersectRead = true, bool includeBuddies = true, - bool (*accept)(Context*, Site*, Read*, const SiteMask&) - = acceptMatch) -{ - SiteMask mask(typeMask, ~0, AnyFrameIndex); - - if (intersectRead) { - read->intersect(&mask); - } - - Site* site = 0; - unsigned copyCost = 0xFFFFFFFF; - for (SiteIterator it(c, read->value, includeBuddies); it.hasMore();) { - Site* s = it.next(); - if (accept(c, s, read, mask)) { - unsigned v = s->copyCost(c, target); - if (v < copyCost) { - site = s; - copyCost = v; - } - } - } - - if (DebugMoves and site and target) { - char srcb[256]; site->toString(c, srcb, 256); - char dstb[256]; target->toString(c, dstb, 256); - fprintf(stderr, "pick source %s to %s for %p cost %d\n", - srcb, dstb, read->value, copyCost); - } - - if (cost) *cost = copyCost; - return site; -} - void move(Context* c, Value* value, Site* src, Site* dst) { + if (DebugMoves) { + char srcb[256]; src->toString(c, srcb, 256); + char dstb[256]; dst->toString(c, dstb, 256); + fprintf(stderr, "move %s to %s for %p to %p\n", + srcb, dstb, value, value); + } + src->freeze(c, value); addSite(c, value, dst); - src->thaw(c, value); - - uint8_t tmpTypeMask; - uint64_t tmpRegisterMask; - c->arch->planMove - (value->nextWord == value ? BytesPerWord : 8, - 1 << src->type(c), src->registerMask(c), - 1 << dst->type(c), dst->registerMask(c), - &tmpTypeMask, &tmpRegisterMask); - - SiteMask mask(tmpTypeMask, tmpRegisterMask, AnyFrameIndex); - if (not src->match(c, mask)) { - // we can't move directly from src to dst on this architecture, so - // we need to either pick a difference source or use a temporary - - removeSite(c, value, dst); - - SingleRead read(mask, 0); - read.value = value; - Site* newSrc = pickSourceSite(c, &read); - - if (newSrc) { - src = newSrc; - } else { - src->freeze(c, value); - dst->freeze(c, value); - - Site* tmp = pickTargetSite(c, &read, true); - - move(c, value, src, tmp); - - dst->thaw(c, value); - src->thaw(c, value); - - src = tmp; - } - - addSite(c, value, dst); - } - - if (DebugMoves) { - char srcb[256]; src->toString(c, srcb, 256); - char dstb[256]; dst->toString(c, dstb, 256); - fprintf(stderr, "move %s to %s for %p\n", srcb, dstb, value); - } - - src->freeze(c, value); dst->freeze(c, value); unsigned srcSize; @@ -2672,14 +2743,14 @@ move(Context* c, Value* value, Site* src, Site* dst) if (srcSize == dstSize) { apply(c, Move, srcSize, src, src, dstSize, dst, dst); } else if (srcSize > BytesPerWord) { - Site* low, *high, *other = pickOrGrowSite(c, value, dst, &low, &high); + Site* low, *high, *other = pickSiteOrGrow(c, value, dst, &low, &high); other->freeze(c, value->nextWord); apply(c, Move, srcSize, src, src, srcSize, low, high); other->thaw(c, value->nextWord); } else { - Site* low, *high, *other = pickOrMoveSite(c, value, src, &low, &high); + Site* low, *high, *other = pickSiteOrMove(c, value, src, &low, &high); other->freeze(c, value->nextWord); apply(c, Move, dstSize, low, high, dstSize, dst, dst); @@ -3228,24 +3299,6 @@ appendReturn(Context* c, unsigned size, Value* value) ReturnEvent(c, size, value)); } -void -addBuddy(Value* original, Value* buddy) -{ - buddy->buddy = original; - Value* p = original; - while (p->buddy != original) p = p->buddy; - p->buddy = buddy; - //buddy->type = original->type; - - if (DebugBuddies) { - fprintf(stderr, "add buddy %p to", buddy); - for (Value* p = buddy->buddy; p != buddy; p = p->buddy) { - fprintf(stderr, " %p", p); - } - fprintf(stderr, "\n"); - } -} - void maybeMove(Context* c, BinaryOperation type, unsigned srcSize, unsigned srcSelectSize, Value* src, unsigned dstSize, Value* dst, @@ -3375,11 +3428,6 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, } else { target = src->source; - assert(c, src); - assert(c, dst); - - addBuddy(src, dst); - if (DebugMoves) { char dstb[256]; target->toString(c, dstb, 256); fprintf(stderr, "null move in %s for %p to %p\n", dstb, src, dst); @@ -3391,6 +3439,19 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, } } +void +maybeMove(Context* c, Value* src, Value* dst) +{ + if (live(dst)) { + maybeMove(c, live(src), false, true); + addBuddy(src, dst); + + if (src->source->isVolatile(c)) { + removeSite(c, src, src->source); + } + } +} + Value* value(Context* c, ValueType type, Site* site = 0, Site* target = 0) { @@ -3474,19 +3535,26 @@ class MoveEvent: public Event { SiteMask dstLowMask(dstTypeMask, dstRegisterMask, AnyFrameIndex); SiteMask dstHighMask(dstTypeMask, dstRegisterMask >> 32, AnyFrameIndex); - if (srcSelectSize <= BytesPerWord and dstSize <= BytesPerWord) { + if (srcSelectSize >= BytesPerWord + and dstSize >= BytesPerWord + and srcSelectSize >= dstSize) + { + if (dst->target) { + maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, + dstLowMask); + if (dstSize > BytesPerWord) { + maybeMove(c, Move, BytesPerWord, BytesPerWord, src->nextWord, + BytesPerWord, dst->nextWord, dstHighMask); + } + } else { + maybeMove(c, src, dst); + if (dstSize > BytesPerWord) { + maybeMove(c, src->nextWord, dst->nextWord); + } + } + } else if (srcSelectSize <= BytesPerWord and dstSize <= BytesPerWord) { maybeMove(c, type, srcSize, srcSelectSize, src, dstSize, dst, dstLowMask); - } else if (srcSelectSize == dstSize) { - maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, - dstLowMask); - maybeMove(c, Move, BytesPerWord, BytesPerWord, src->nextWord, - BytesPerWord, dst->nextWord, dstHighMask); - } else if (srcSize > BytesPerWord) { - assert(c, dstSize == BytesPerWord); - - maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, - dstLowMask); } else { assert(c, srcSize == BytesPerWord); assert(c, srcSelectSize == BytesPerWord); @@ -3539,8 +3607,7 @@ class MoveEvent: public Event { low->thaw(c, dst); } else { - maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, - dstLowMask); + maybeMove(c, src, dst); } } @@ -3590,11 +3657,11 @@ findConstantSite(Context* c, Value* v) } void -preserve(Context* c, Value* v, Site* s, Read* r) +preserve(Context* c, Value* v, Read* r, Site* s) { s->freeze(c, v); - move(c, v, s, pickTargetSite(c, r)); + maybeMove(c, r, false, true, 0); s->thaw(c, v); } @@ -3613,7 +3680,7 @@ getTarget(Context* c, Value* value, Value* result, const SiteMask& resultMask) s = value->source; v = value; if (r and uniqueSite(c, v, s)) { - preserve(c, v, s, r); + preserve(c, v, r, s); } } else { SingleRead r(resultMask, 0); @@ -4723,7 +4790,10 @@ class BuddyEvent: public Event { } virtual void compile(Context* c) { -// fprintf(stderr, "original %p buddy %p\n", original, buddy); + if (DebugBuddies) { + fprintf(stderr, "original %p buddy %p\n", original, buddy); + } + assert(c, hasSite(c, original)); assert(c, original); @@ -4887,18 +4957,7 @@ readSource(Context* c, Read* r) r->maybeIntersectWithHighSource(c); - Site* site = pickSourceSite(c, r); - - if (site) { - return site; - } else { - Site* target = pickTargetSite(c, r, true); - unsigned copyCost; - site = pickSourceSite(c, r, target, ©Cost, ~0, false); - assert(c, copyCost); - move(c, v, site, target); - return target; - } + return pickSiteOrMove(c, r, true, true); } void @@ -4974,33 +5033,6 @@ thaw(Context* c, SiteRecordList* frozen) } } -Site* -acquireSite(Context* c, SiteRecordList* frozen, Site* target, Value* v, - Read* r, bool pickSource) -{ - assert(c, hasSite(c, v)); - - unsigned copyCost; - Site* source; - if (pickSource) { - source = pickSourceSite(c, r, target, ©Cost, ~0, false); - } else { - copyCost = 0; - source = target; - } - - if (copyCost) { - target = target->copy(c); - move(c, v, source, target); - } else { - target = source; - } - - freeze(c, frozen, target, v); - - return target; -} - bool resolveOriginalSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) @@ -5023,7 +5055,10 @@ resolveOriginalSites(Context* c, Event* e, SiteRecordList* frozen, buffer, v, el.localIndex, frameIndex(c, &el)); } - acquireSite(c, frozen, s, v, r, true); + Site* target = pickSiteOrMove + (c, v, s->mask(c), true, true, ResolveRegisterReserveCount); + + freeze(c, frozen, target, v); } else { complete = false; } @@ -5058,9 +5093,10 @@ resolveSourceSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) const uint32_t mask = (1 << RegisterOperand) | (1 << MemoryOperand); Site* s = pickSourceSite - (c, r, 0, 0, mask, true, false, acceptForResolve); + (c, r, 0, 0, mask, true, false, true, acceptForResolve); if (s == 0) { - s = pickSourceSite(c, r, 0, 0, mask, false, false, acceptForResolve); + s = pickSourceSite + (c, r, 0, 0, mask, false, false, true, acceptForResolve); } if (s) { @@ -5070,7 +5106,9 @@ resolveSourceSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) buffer, v, el.localIndex, frameIndex(c, &el)); } - sites[el.localIndex] = acquireSite(c, frozen, s, v, r, false)->copy(c); + freeze(c, frozen, s, v); + + sites[el.localIndex] = s->copy(c); } else { complete = false; } @@ -5091,25 +5129,25 @@ resolveTargetSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) if (r and sites[el.localIndex] == 0) { const uint32_t mask = (1 << RegisterOperand) | (1 << MemoryOperand); - bool useTarget = false; - Site* s = pickSourceSite(c, r, 0, 0, mask, true, true, acceptForResolve); + Site* s = pickSourceSite + (c, r, 0, 0, mask, true, true, true, acceptForResolve); if (s == 0) { - s = pickSourceSite(c, r, 0, 0, mask, false, true, acceptForResolve); + s = pickSourceSite + (c, r, 0, 0, mask, false, true, true, acceptForResolve); if (s == 0) { - s = pickTargetSite(c, r, false, ResolveRegisterReserveCount); - useTarget = true; + s = maybeMove(c, r, false, true, ResolveRegisterReserveCount); } } + freeze(c, frozen, s, v); + + sites[el.localIndex] = s->copy(c); + if (DebugControl) { - char buffer[256]; s->toString(c, buffer, 256); + char buffer[256]; sites[el.localIndex]->toString(c, buffer, 256); fprintf(stderr, "resolve target %s for %p local %d frame %d\n", buffer, el.value, el.localIndex, frameIndex(c, &el)); } - - Site* acquired = acquireSite(c, frozen, s, v, r, useTarget)->copy(c); - - sites[el.localIndex] = (useTarget ? s : acquired->copy(c)); } } } diff --git a/src/jnienv.cpp b/src/jnienv.cpp index 533039b066..391938180a 100644 --- a/src/jnienv.cpp +++ b/src/jnienv.cpp @@ -2169,10 +2169,7 @@ JNI_CreateJavaVM(Machine** m, Thread** t, void* args) System* s = makeSystem(crashDumpDirectory); Heap* h = makeHeap(s, heapLimit); Finder* f = makeFinder(s, RUNTIME_ARRAY_BODY(classpathBuffer), bootLibrary); - Processor* p = makeProcessor(s, h, false); // change back to true - // once use of SSE is - // fixed on 32-bit - // systems + Processor* p = makeProcessor(s, h, true); const char** properties = static_cast (h->allocate(sizeof(const char*) * propertyCount)); diff --git a/src/powerpc.cpp b/src/powerpc.cpp index b4092aca6c..63ee6da3b4 100644 --- a/src/powerpc.cpp +++ b/src/powerpc.cpp @@ -2096,19 +2096,20 @@ class MyArchitecture: public Assembler::Architecture { } virtual void planMove - (unsigned, - uint8_t srcTypeMask, uint64_t srcRegisterMask, - uint8_t dstTypeMask, uint64_t, - uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask) + (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, + uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, + uint8_t dstTypeMask, uint64_t dstRegisterMask) { - *tmpTypeMask = srcTypeMask; - *tmpRegisterMask = srcRegisterMask; + *srcTypeMask = ~0; + *srcRegisterMask = ~static_cast(0); - if ((dstTypeMask & (1 << MemoryOperand)) - and (srcTypeMask & ((1 << MemoryOperand) | 1 << AddressOperand))) - { - // can't move directly from memory to memory - *tmpTypeMask = (1 << RegisterOperand); + *tmpTypeMask = 0; + *tmpRegisterMask = 0; + + if (dstTypeMask & (1 << MemoryOperand)) { + // can't move directly from memory or constant to memory + *srcTypeMask = 1 << RegisterOperand; + *tmpTypeMask = 1 << RegisterOperand; *tmpRegisterMask = ~static_cast(0); } } diff --git a/src/x86.cpp b/src/x86.cpp index 310aa8350f..b492f299f6 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -2921,7 +2921,7 @@ class MyArchitecture: public Assembler::Architecture { break; case Float2Int: - if (useSSE(&c) and (bSize <= BytesPerWord)) { + if (useSSE(&c) and bSize <= BytesPerWord) { *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); *aRegisterMask = (static_cast(FloatRegisterMask) << 32) | FloatRegisterMask; @@ -2931,7 +2931,7 @@ class MyArchitecture: public Assembler::Architecture { break; case Int2Float: - if (useSSE(&c) and (aSize <= BytesPerWord)) { + if (useSSE(&c) and aSize <= BytesPerWord) { *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); *aRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); @@ -3039,38 +3039,46 @@ class MyArchitecture: public Assembler::Architecture { } virtual void planMove - (unsigned size, - uint8_t srcTypeMask, uint64_t srcRegisterMask, - uint8_t dstTypeMask, uint64_t dstRegisterMask, - uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask) + (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, + uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, + uint8_t dstTypeMask, uint64_t dstRegisterMask) { - *tmpTypeMask = srcTypeMask; - *tmpRegisterMask = srcRegisterMask; + *srcTypeMask = ~0; + *srcRegisterMask = ~static_cast(0); - if ((dstTypeMask & (1 << MemoryOperand)) - and (srcTypeMask & ((1 << MemoryOperand) | 1 << AddressOperand))) - { + *tmpTypeMask = 0; + *tmpRegisterMask = 0; + + if (dstTypeMask & (1 << MemoryOperand)) { // can't move directly from memory to memory - *tmpTypeMask = (1 << RegisterOperand); + *srcTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); + *tmpTypeMask = 1 << RegisterOperand; *tmpRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); } else if (dstTypeMask & (1 << RegisterOperand)) { - if (srcTypeMask & (1 << RegisterOperand)) { - if (size != BytesPerWord - and (((dstRegisterMask & FloatRegisterMask) == 0) - xor ((srcRegisterMask & FloatRegisterMask) == 0))) - { - // can't move directly from FPR to GPR or vice-versa for - // values larger than the GPR size - *tmpTypeMask = (1 << MemoryOperand); - *tmpRegisterMask = 0; + if (size > BytesPerWord) { + // can't move directly from FPR to GPR or vice-versa for + // values larger than the GPR size + if (dstRegisterMask & FloatRegisterMask) { + *srcRegisterMask = FloatRegisterMask + | (static_cast(FloatRegisterMask) << 32); + *tmpTypeMask = 1 << MemoryOperand; + } else if (dstRegisterMask & GeneralRegisterMask) { + *srcRegisterMask = GeneralRegisterMask + | (static_cast(GeneralRegisterMask) << 32); + *tmpTypeMask = 1 << MemoryOperand; } - } else if ((dstRegisterMask & FloatRegisterMask) - and (srcTypeMask & (1 << ConstantOperand))) - { + } + if (dstRegisterMask & FloatRegisterMask) { // can't move directly from constant to FPR - *tmpTypeMask = (1 << MemoryOperand); - *tmpRegisterMask = 0; + *srcTypeMask &= ~(1 << ConstantOperand); + if (size > BytesPerWord) { + *tmpTypeMask = 1 << MemoryOperand; + } else { + *tmpTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *tmpRegisterMask = GeneralRegisterMask + | (static_cast(GeneralRegisterMask) << 32); + } } } } diff --git a/test/AllFloats.java b/test/AllFloats.java index ca68df4396..0bba388b9b 100644 --- a/test/AllFloats.java +++ b/test/AllFloats.java @@ -19,6 +19,8 @@ public class AllFloats { private static float complex(float a, float b) {return (a - b) / (a * b) + (float)Math.sqrt(a);} private static double complex(double a, double b) {return (a - b) / (a * b) + Math.sqrt(a);} private static double complex(float a, double b) {return (a - b) / (a * b) + Math.sqrt(a);} + private static double sqrt(double a) {return Math.sqrt(a);} + private static float complexNoIntrinsic(float a, float b) {return (a - b) / (a * b) + (float)sqrt(a);} private static int f2i(float a) {return (int)a;} private static long f2l(float a) {return (long)a;} private static float i2f(int a) {return (float)a;} @@ -59,6 +61,7 @@ public class AllFloats { expect(complex(4f, 3f) == (4f-3f)/(4f*3f) + 2f); expect(complex(4d, 3d) == (4d-3d)/(4d*3d) + 2d); expect(complex(4f, 3d) == (4f-3d)/(4f*3d) + 2f); + expect(complexNoIntrinsic(4f, 3f) == (4f-3f)/(4f*3f) + 2f); expect(f2i(4f) == 4); expect(f2l(4f) == 4); diff --git a/test/Floats.java b/test/Floats.java index bd85a74d10..9b23b4c2d3 100644 --- a/test/Floats.java +++ b/test/Floats.java @@ -19,6 +19,12 @@ public class Floats { return a - b; } + private double field = 100d; + + private static int doubleToInt(Floats f) { + return (int) f.field; + } + public static void main(String[] args) { expect(multiply(0.5d, 0.5d) == 0.25d); expect(multiply(0.5f, 0.5f) == 0.25f); @@ -50,10 +56,20 @@ public class Floats { expect(((int) d) == 1); } + { double d = 12345d; + expect(((int) d) == 12345); + } + + expect(doubleToInt(new Floats()) == 100); + { float f = 1f; expect(((int) f) == 1); } + { float f = 1f; + expect(((long) f) == 1); + } + expect(Math.round(0.4f) == 0); expect(Math.round(0.5f) == 1); expect(Math.round(1.0f) == 1); @@ -73,5 +89,20 @@ public class Floats { double d = (double) z; expect(d == 6553311036568663.0); } + + { long z = 12345L; + float f = (float) z; + expect(f == 12345.0); + } + + { int z = 12345; + float f = (float) z; + expect(f == 12345.0); + } + + { int z = 12345; + double d = (double) z; + expect(d == 12345.0); + } } } From c615db31fb31c818a241f6ae45b3fcccf58303cd Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 28 Nov 2009 18:17:17 +0000 Subject: [PATCH 03/19] refine move cost calculation to avoid indirect moves (e.g. memory to memory) --- src/compiler.cpp | 197 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 137 insertions(+), 60 deletions(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index e3a90faf32..ec243184b6 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -36,6 +36,12 @@ const unsigned StealRegisterReserveCount = 2; // compare instruction: const unsigned ResolveRegisterReserveCount = (BytesPerWord == 8 ? 2 : 4); +const unsigned RegisterCopyCost = 1; +const unsigned AddressCopyCost = 2; +const unsigned ConstantCopyCost = 3; +const unsigned MemoryCopyCost = 4; +const unsigned CopyPenalty = 10; + class Context; class Value; class Stack; @@ -1247,6 +1253,7 @@ class Target { static const unsigned MinimumFrameCost = 1; static const unsigned StealPenalty = 2; static const unsigned StealUniquePenalty = 4; + static const unsigned IndirectMovePenalty = 4; static const unsigned LowRegisterPenalty = 10; static const unsigned Impossible = 20; @@ -1277,37 +1284,49 @@ valueType(Context* c, Compiler::OperandType type) } } -Target -pickTarget(Context* c, Read* r, bool intersectRead, - unsigned registerReserveCount); +class CostCalculator { + public: + virtual unsigned cost(Context* c, uint8_t typeMask, uint32_t registerMask, + int frameIndex) = 0; +}; unsigned -resourceCost(Context* c UNUSED, Value* v, Resource* r) +resourceCost(Context* c, Value* v, Resource* r, uint8_t typeMask, + uint32_t registerMask, int frameIndex, + CostCalculator* costCalculator) { if (r->reserved or r->freezeCount or r->referenceCount) { return Target::Impossible; - } else if (r->value) { - assert(c, findSite(c, r->value, r->site)); + } else { + unsigned baseCost = costCalculator ? costCalculator->cost + (c, typeMask, registerMask, frameIndex) : 0; - if (v and buddies(r->value, v)) { - return 0; - } else if (uniqueSite(c, r->value, r->site)) { - return Target::StealUniquePenalty; + if (r->value) { + assert(c, findSite(c, r->value, r->site)); + + if (v and buddies(r->value, v)) { + return baseCost; + } else if (uniqueSite(c, r->value, r->site)) { + return baseCost + Target::StealUniquePenalty; + } else { + return baseCost = Target::StealPenalty; + } } else { - return Target::StealPenalty; + return baseCost; } - } else { - return 0; } } bool pickRegisterTarget(Context* c, int i, Value* v, uint32_t mask, int* target, - unsigned* cost) + unsigned* cost, CostCalculator* costCalculator = 0) { if ((1 << i) & mask) { RegisterResource* r = c->registerResources + i; - unsigned myCost = resourceCost(c, v, r) + Target::MinimumRegisterCost; + unsigned myCost = resourceCost + (c, v, r, 1 << RegisterOperand, 1 << i, NoFrameIndex, costCalculator) + + Target::MinimumRegisterCost; + if ((static_cast(1) << i) == mask) { *cost = myCost; return true; @@ -1320,7 +1339,8 @@ pickRegisterTarget(Context* c, int i, Value* v, uint32_t mask, int* target, } int -pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost) +pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost, + CostCalculator* costCalculator = 0) { int target = NoRegister; *cost = Target::Impossible; @@ -1329,7 +1349,7 @@ pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost) for (int i = c->generalRegisterLimit - 1; i >= c->generalRegisterStart; --i) { - if (pickRegisterTarget(c, i, v, mask, &target, cost)) { + if (pickRegisterTarget(c, i, v, mask, &target, cost, costCalculator)) { return i; } } @@ -1339,7 +1359,7 @@ pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost) for (int i = c->floatRegisterStart; i < static_cast(c->floatRegisterLimit); ++i) { - if (pickRegisterTarget(c, i, v, mask, &target, cost)) { + if (pickRegisterTarget(c, i, v, mask, &target, cost, costCalculator)) { return i; } } @@ -1349,29 +1369,34 @@ pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost) } Target -pickRegisterTarget(Context* c, Value* v, uint32_t mask) +pickRegisterTarget(Context* c, Value* v, uint32_t mask, + CostCalculator* costCalculator = 0) { unsigned cost; - int number = pickRegisterTarget(c, v, mask, &cost); + int number = pickRegisterTarget(c, v, mask, &cost, costCalculator); return Target(number, RegisterOperand, cost); } unsigned -frameCost(Context* c, Value* v, int frameIndex) +frameCost(Context* c, Value* v, int frameIndex, CostCalculator* costCalculator) { - return resourceCost(c, v, c->frameResources + frameIndex) + return resourceCost + (c, v, c->frameResources + frameIndex, 1 << MemoryOperand, 0, frameIndex, + costCalculator) + Target::MinimumFrameCost; } Target -pickFrameTarget(Context* c, Value* v) +pickFrameTarget(Context* c, Value* v, CostCalculator* costCalculator) { Target best; Value* p = v; do { if (p->home >= 0) { - Target mine(p->home, MemoryOperand, frameCost(c, v, p->home)); + Target mine + (p->home, MemoryOperand, frameCost(c, v, p->home, costCalculator)); + if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { @@ -1385,13 +1410,13 @@ pickFrameTarget(Context* c, Value* v) } Target -pickAnyFrameTarget(Context* c, Value* v) +pickAnyFrameTarget(Context* c, Value* v, CostCalculator* costCalculator) { Target best; unsigned count = totalFrameSize(c); for (unsigned i = 0; i < count; ++i) { - Target mine(i, MemoryOperand, frameCost(c, v, i)); + Target mine(i, MemoryOperand, frameCost(c, v, i, costCalculator)); if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { @@ -1404,10 +1429,12 @@ pickAnyFrameTarget(Context* c, Value* v) Target pickTarget(Context* c, Value* value, const SiteMask& mask, - unsigned registerPenalty, Target best) + unsigned registerPenalty, Target best, + CostCalculator* costCalculator) { if (mask.typeMask & (1 << RegisterOperand)) { - Target mine = pickRegisterTarget(c, value, mask.registerMask); + Target mine = pickRegisterTarget + (c, value, mask.registerMask, costCalculator); mine.cost += registerPenalty; if (mine.cost == Target::MinimumRegisterCost) { @@ -1420,14 +1447,14 @@ pickTarget(Context* c, Value* value, const SiteMask& mask, if (mask.typeMask & (1 << MemoryOperand)) { if (mask.frameIndex >= 0) { Target mine(mask.frameIndex, MemoryOperand, - frameCost(c, value, mask.frameIndex)); + frameCost(c, value, mask.frameIndex, costCalculator)); if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { best = mine; } } else if (mask.frameIndex == AnyFrameIndex) { - Target mine = pickFrameTarget(c, value); + Target mine = pickFrameTarget(c, value, costCalculator); if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { @@ -1441,7 +1468,7 @@ pickTarget(Context* c, Value* value, const SiteMask& mask, Target pickTarget(Context* c, Read* read, bool intersectRead, - unsigned registerReserveCount) + unsigned registerReserveCount, CostCalculator* costCalculator) { unsigned registerPenalty = (c->availableGeneralRegisterCount > registerReserveCount @@ -1463,7 +1490,9 @@ pickTarget(Context* c, Read* read, bool intersectRead, if (r) { SiteMask intersection = mask; if (r->intersect(&intersection)) { - best = pickTarget(c, value, intersection, registerPenalty, best); + best = pickTarget + (c, value, intersection, registerPenalty, best, costCalculator); + if (best.cost <= Target::MinimumFrameCost) { return best; } @@ -1471,7 +1500,7 @@ pickTarget(Context* c, Read* read, bool intersectRead, } } - best = pickTarget(c, value, mask, registerPenalty, best); + best = pickTarget(c, value, mask, registerPenalty, best, costCalculator); if (best.cost <= Target::MinimumFrameCost) { return best; } @@ -1485,7 +1514,7 @@ pickTarget(Context* c, Read* read, bool intersectRead, return best; } - { Target mine = pickRegisterTarget(c, value, registerMask); + { Target mine = pickRegisterTarget(c, value, registerMask, costCalculator); mine.cost += registerPenalty; @@ -1496,7 +1525,7 @@ pickTarget(Context* c, Read* read, bool intersectRead, } } - { Target mine = pickFrameTarget(c, value); + { Target mine = pickFrameTarget(c, value, costCalculator); if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { @@ -1510,7 +1539,7 @@ pickTarget(Context* c, Read* read, bool intersectRead, // there are no free registers left, so moving from memory to // memory isn't an option - try harder to find an available frame // site: - best = pickAnyFrameTarget(c, value); + best = pickAnyFrameTarget(c, value, costCalculator); assert(c, best.cost <= 3); } @@ -1558,7 +1587,7 @@ class ConstantSite: public Site { } virtual unsigned copyCost(Context*, Site* s) { - return (s == this ? 0 : 3); + return (s == this ? 0 : ConstantCopyCost); } virtual bool match(Context*, const SiteMask& mask) { @@ -1650,7 +1679,7 @@ class AddressSite: public Site { } virtual unsigned copyCost(Context*, Site* s) { - return (s == this ? 0 : 2); + return (s == this ? 0 : AddressCopyCost); } virtual bool match(Context*, const SiteMask& mask) { @@ -1738,7 +1767,7 @@ class RegisterSite: public Site { { return 0; } else { - return 1; + return RegisterCopyCost; } } @@ -1928,7 +1957,7 @@ class MemorySite: public Site { { return 0; } else { - return 4; + return MemoryCopyCost; } } @@ -2186,10 +2215,15 @@ sitesToString(Context* c, Value* v, char* buffer, unsigned size) Site* pickTargetSite(Context* c, Read* read, bool intersectRead = false, - unsigned registerReserveCount = 0) + unsigned registerReserveCount = 0, + CostCalculator* costCalculator = 0) { - Target target(pickTarget(c, read, intersectRead, registerReserveCount)); + Target target + (pickTarget + (c, read, intersectRead, registerReserveCount, costCalculator)); + expect(c, target.cost < Target::Impossible); + if (target.type == MemoryOperand) { return frameSite(c, target.index); } else { @@ -2298,11 +2332,49 @@ Site* maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord, unsigned registerReserveCount = 0) { - Site* dst = pickTargetSite(c, read, intersectRead, registerReserveCount); - Value* value = read->value; unsigned size = value == value->nextWord ? BytesPerWord : 8; + class MyCostCalculator: public CostCalculator { + public: + MyCostCalculator(Value* value, unsigned size, bool includeNextWord): + value(value), + size(size), + includeNextWord(includeNextWord) + { } + + virtual unsigned cost(Context* c, uint8_t typeMask, uint32_t registerMask, + int frameIndex) + { + uint8_t srcTypeMask; + uint64_t srcRegisterMask; + uint8_t tmpTypeMask; + uint64_t tmpRegisterMask; + c->arch->planMove + (size, &srcTypeMask, &srcRegisterMask, + &tmpTypeMask, &tmpRegisterMask, + typeMask, registerMask); + + SiteMask srcMask(srcTypeMask, srcRegisterMask, AnyFrameIndex); + SiteMask dstMask(typeMask, registerMask, frameIndex); + for (SiteIterator it(c, value, true, includeNextWord); it.hasMore();) { + Site* s = it.next(); + if (s->match(c, srcMask) or s->match(c, dstMask)) { + return 0; + } + } + + return Target::IndirectMovePenalty; + } + + Value* value; + unsigned size; + bool includeNextWord; + } costCalculator(value, size, includeNextWord); + + Site* dst = pickTargetSite + (c, read, intersectRead, registerReserveCount, &costCalculator); + uint8_t srcTypeMask; uint64_t srcRegisterMask; uint8_t tmpTypeMask; @@ -2313,24 +2385,25 @@ maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord, 1 << dst->type(c), dst->registerMask(c)); SiteMask srcMask(srcTypeMask, srcRegisterMask, AnyFrameIndex); - SingleRead srcRead(srcMask, 0); - srcRead.value = value; - - unsigned cost; - Site* src = pickSourceSite - (c, &srcRead, dst, &cost, ~0, true, true, includeNextWord); - - if (src == 0 or cost) { - unsigned cost2; - Site* src2 = pickSourceSite - (c, &srcRead, dst, &cost2, ~0, false, true, includeNextWord); - - if (src == 0 or cost2 == 0) { - src = src2; - cost = cost2; + unsigned cost = 0xFFFFFFFF; + Site* src = 0; + for (SiteIterator it(c, value, true, includeNextWord); it.hasMore();) { + Site* s = it.next(); + unsigned v = s->copyCost(c, dst); + if (v == 0) { + src = s; + cost = 0; + break; + } + if (not s->match(c, srcMask)) { + v += CopyPenalty; + } + if (v < cost) { + src = s; + cost = v; } } - + if (cost) { if (not src->match(c, srcMask)) { src->freeze(c, value); @@ -2339,6 +2412,7 @@ maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord, SiteMask tmpMask(tmpTypeMask, tmpRegisterMask, AnyFrameIndex); SingleRead tmpRead(tmpMask, 0); tmpRead.value = value; + tmpRead.successor_ = value; Site* tmp = pickTargetSite(c, &tmpRead, true); @@ -2377,6 +2451,8 @@ pickSiteOrMove(Context* c, Value* v, const SiteMask& mask, bool intersectMask, { SingleRead read(mask, 0); read.value = v; + read.successor_ = v; + return pickSiteOrMove (c, &read, intersectMask, includeNextWord, registerReserveCount); } @@ -3685,6 +3761,7 @@ getTarget(Context* c, Value* value, Value* result, const SiteMask& resultMask) } else { SingleRead r(resultMask, 0); r.value = result; + r.successor_ = result; s = pickTargetSite(c, &r, true); v = result; addSite(c, result, s); From 75934c83423c482682618cb1f70c456caaa4699f Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 28 Nov 2009 15:01:54 -0700 Subject: [PATCH 04/19] provide fast paths for common thread state transitions These paths reduce contention among threads by using atomic operations and memory barriers instead of mutexes where possible. This is especially important for JNI calls, since each such call involves two state transitions: from "active" to "idle" and back. --- src/machine.cpp | 104 ++++++++++++++++++++++++++++++++++++------------ src/powerpc.h | 8 +++- src/x86.h | 30 ++++++++++---- 3 files changed, 108 insertions(+), 34 deletions(-) diff --git a/src/machine.cpp b/src/machine.cpp index 30a4e6d220..20c3b43d50 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -14,6 +14,7 @@ #include "stream.h" #include "constants.h" #include "processor.h" +#include "arch.h" using namespace vm; @@ -21,6 +22,14 @@ namespace { const unsigned NoByte = 0xFFFF; +#ifdef USE_ATOMIC_OPERATIONS +void +atomicIncrement(unsigned* p, int v) +{ + while (not atomicCompareAndSwap32(p, *p, *p + v)) { } +} +#endif + bool find(Thread* t, Thread* o) { @@ -2319,10 +2328,22 @@ enter(Thread* t, Thread::State s) return; } +#ifdef USE_ATOMIC_OPERATIONS +# define INCREMENT atomicIncrement +# define ACQUIRE_LOCK ACQUIRE_RAW(t, t->m->stateLock) +# define BARRIER memoryBarrier() +#else +# define INCREMENT(pointer, value) *(pointer) += value; +# define ACQUIRE_LOCK +# define BARRIER + ACQUIRE_RAW(t, t->m->stateLock); +#endif // not USE_ATOMIC_OPERATIONS switch (s) { case Thread::ExclusiveState: { + ACQUIRE_LOCK; + while (t->m->exclusive) { // another thread got here first. ENTER(t, Thread::IdleState); @@ -2332,7 +2353,7 @@ enter(Thread* t, Thread::State s) case Thread::ActiveState: break; case Thread::IdleState: { - ++ t->m->activeCount; + INCREMENT(&(t->m->activeCount), 1); } break; default: abort(t); @@ -2340,14 +2361,35 @@ enter(Thread* t, Thread::State s) t->state = Thread::ExclusiveState; t->m->exclusive = t; - + + BARRIER; + while (t->m->activeCount > 1) { t->m->stateLock->wait(t->systemThread, 0); } } break; case Thread::IdleState: + if (t->state == Thread::ActiveState) { + // fast path + assert(t, t->m->activeCount > 0); + INCREMENT(&(t->m->activeCount), -1); + + t->state = s; + + if (t->m->exclusive) { + ACQUIRE_LOCK; + + t->m->stateLock->notifyAll(t->systemThread); + } + break; + } else { + // fall through to slow path + } + case Thread::ZombieState: { + ACQUIRE_LOCK; + switch (t->state) { case Thread::ExclusiveState: { assert(t, t->m->exclusive == t); @@ -2360,7 +2402,7 @@ enter(Thread* t, Thread::State s) } assert(t, t->m->activeCount > 0); - -- t->m->activeCount; + INCREMENT(&(t->m->activeCount), -1); if (s == Thread::ZombieState) { assert(t, t->m->liveCount > 0); @@ -2375,35 +2417,45 @@ enter(Thread* t, Thread::State s) t->m->stateLock->notifyAll(t->systemThread); } break; - case Thread::ActiveState: { - switch (t->state) { - case Thread::ExclusiveState: { - assert(t, t->m->exclusive == t); - + case Thread::ActiveState: + if (t->state == Thread::IdleState and t->m->exclusive == 0) { + // fast path + INCREMENT(&(t->m->activeCount), 1); t->state = s; - t->m->exclusive = 0; + break; + } else { + ACQUIRE_LOCK; - t->m->stateLock->notifyAll(t->systemThread); - } break; + switch (t->state) { + case Thread::ExclusiveState: { + assert(t, t->m->exclusive == t); - case Thread::NoState: - case Thread::IdleState: { - while (t->m->exclusive) { - t->m->stateLock->wait(t->systemThread, 0); + t->state = s; + t->m->exclusive = 0; + + t->m->stateLock->notifyAll(t->systemThread); + } break; + + case Thread::NoState: + case Thread::IdleState: { + while (t->m->exclusive) { + t->m->stateLock->wait(t->systemThread, 0); + } + + INCREMENT(&(t->m->activeCount), 1); + if (t->state == Thread::NoState) { + ++ t->m->liveCount; + } + t->state = s; + } break; + + default: abort(t); } - - ++ t->m->activeCount; - if (t->state == Thread::NoState) { - ++ t->m->liveCount; - } - t->state = s; } break; - default: abort(t); - } - } break; - case Thread::ExitState: { + ACQUIRE_LOCK; + switch (t->state) { case Thread::ExclusiveState: { assert(t, t->m->exclusive == t); @@ -2418,7 +2470,7 @@ enter(Thread* t, Thread::State s) } assert(t, t->m->activeCount > 0); - -- t->m->activeCount; + INCREMENT(&(t->m->activeCount), -1); t->state = s; diff --git a/src/powerpc.h b/src/powerpc.h index 1b906537cc..1ef1437e0f 100644 --- a/src/powerpc.h +++ b/src/powerpc.h @@ -92,7 +92,7 @@ syncInstructionCache(const void* start, unsigned size) #ifdef USE_ATOMIC_OPERATIONS inline bool -atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) { #if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) return __sync_bool_compare_and_swap(p, old, new_); @@ -118,6 +118,12 @@ atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) return result; #endif // not GCC >= 4.1 } + +inline bool +atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +{ + return atomicCompareAndSwap32(p, old, new_); +} #endif // USE_ATOMIC_OPERATIONS inline uint64_t diff --git a/src/x86.h b/src/x86.h index f9f3c038a1..807fbcecbd 100644 --- a/src/x86.h +++ b/src/x86.h @@ -190,17 +190,13 @@ syncInstructionCache(const void*, unsigned) #ifdef USE_ATOMIC_OPERATIONS inline bool -atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) { #ifdef _MSC_VER -# ifdef ARCH_x86_32 InterlockedCompareExchange(p, new_, old); -# elif defined ARCH_x86_64 - InterlockedCompareExchange64(p, new_, old); -# endif // ARCH_x86_64 #elif (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) return __sync_bool_compare_and_swap(p, old, new_); -#elif defined ARCH_x86_32 +#else uint8_t result; __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1" @@ -209,7 +205,17 @@ atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) : "memory"); return result != 0; -#elif defined ARCH_x86_64 +#endif +} + +inline bool +atomicCompareAndSwap64(uint64_t* p, uint64_t old, uint64_t new_) +{ +#ifdef _MSC_VER + InterlockedCompareExchange64(p, new_, old); +#elif (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) + return __sync_bool_compare_and_swap(p, old, new_); +#else uint8_t result; __asm__ __volatile__("lock; cmpxchgq %2, %0; setz %1" @@ -218,6 +224,16 @@ atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) : "memory"); return result != 0; +#endif +} + +inline bool +atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +{ +#ifdef ARCH_x86_32 + return atomicCompareAndSwap32(p, old, new_); +#elif defined ARCH_x86_64 + return atomicCompareAndSwap64(p, old, new_); #endif // ARCH_x86_64 } #endif // USE_ATOMIC_OPERATIONS From 3418a8bcbe3b6c61f38e1b5721e644f94e7bb6ba Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 28 Nov 2009 15:24:02 -0700 Subject: [PATCH 05/19] fix race condition introduced in previous commit --- src/machine.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/machine.cpp b/src/machine.cpp index 20c3b43d50..cdb12a5219 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -2421,10 +2421,18 @@ enter(Thread* t, Thread::State s) if (t->state == Thread::IdleState and t->m->exclusive == 0) { // fast path INCREMENT(&(t->m->activeCount), 1); - t->state = s; - break; - } else { - ACQUIRE_LOCK; + + if (t->m->exclusive) { + // a thread has entered exclusive mode - switch to slow path + assert(t, t->m->activeCount > 0); + INCREMENT(&(t->m->activeCount), -1); + } else { + t->state = s; + break; + } + } + + { ACQUIRE_LOCK; switch (t->state) { case Thread::ExclusiveState: { From 1558b85acf6704049c50c44b9f9f92966294b171 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 28 Nov 2009 15:35:15 -0700 Subject: [PATCH 06/19] second attempt to fix "idle to active" fast path If another thread succeeds in entering the "exclusive" state while we use the fast path to transition the current thread to "active", we must switch back to "idle" temporarily to allow the exclusive thread a chance to continue, and then retry the transition to "active" via the slow path. --- src/machine.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/machine.cpp b/src/machine.cpp index cdb12a5219..62c4649659 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -2422,12 +2422,13 @@ enter(Thread* t, Thread::State s) // fast path INCREMENT(&(t->m->activeCount), 1); + t->state = s; + if (t->m->exclusive) { - // a thread has entered exclusive mode - switch to slow path - assert(t, t->m->activeCount > 0); - INCREMENT(&(t->m->activeCount), -1); + // another thread has entered the exclusive state, so we + // return to idle and use the slow path to become active + enter(t, Thread::IdleState); } else { - t->state = s; break; } } From 6d9e1270cadfb7850c8bc2e50168db8f0a82efef Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sun, 29 Nov 2009 09:08:07 -0700 Subject: [PATCH 07/19] fix race conditions in atomic operations --- src/heap.cpp | 5 ++++- src/machine.cpp | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/heap.cpp b/src/heap.cpp index 740b9985af..5dcd75f7c2 100644 --- a/src/heap.cpp +++ b/src/heap.cpp @@ -76,7 +76,10 @@ markBitAtomic(uintptr_t* map, unsigned i) { uintptr_t* p = map + wordOf(i); uintptr_t v = static_cast(1) << bitOf(i); - while (not atomicCompareAndSwap(p, *p, *p | v)) { } + for (uintptr_t old = *p; + not atomicCompareAndSwap(p, old, old | v); + old = *p) + { } } #endif // USE_ATOMIC_OPERATIONS diff --git a/src/machine.cpp b/src/machine.cpp index 62c4649659..b9ae880225 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -24,9 +24,12 @@ const unsigned NoByte = 0xFFFF; #ifdef USE_ATOMIC_OPERATIONS void -atomicIncrement(unsigned* p, int v) +atomicIncrement(uint32_t* p, int v) { - while (not atomicCompareAndSwap32(p, *p, *p + v)) { } + for (uint32_t old = *p; + not atomicCompareAndSwap32(p, old, old + v); + old = *p) + { } } #endif From 0b09c6aa30ed5708effa5b5400957e152bda4aed Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sun, 29 Nov 2009 16:53:05 -0700 Subject: [PATCH 08/19] avoid busy wait when entering "exclusive" state --- src/machine.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/machine.cpp b/src/machine.cpp index b9ae880225..9abe61301d 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -2350,6 +2350,7 @@ enter(Thread* t, Thread::State s) while (t->m->exclusive) { // another thread got here first. ENTER(t, Thread::IdleState); + t->m->stateLock->wait(t->systemThread, 0); } switch (t->state) { From 1c61c1f421fdcbd5de6465eee73efd266c86dfa3 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sun, 29 Nov 2009 16:53:46 -0700 Subject: [PATCH 09/19] fix x86 memoryBarrier implementation --- src/x86.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/x86.h b/src/x86.h index 807fbcecbd..0528d33695 100644 --- a/src/x86.h +++ b/src/x86.h @@ -159,9 +159,11 @@ memoryBarrier() { #ifdef _MSC_VER MemoryBarrier(); -#else - __asm__ __volatile__("": : :"memory"); -#endif +#elif defined ARCH_x86_32 + __asm__ __volatile__("lock; addl $0,0(%%esp)": : :"memory"); +#elif defined ARCH_x86_64 + __asm__ __volatile__("mfence": : :"memory"); +#endif // ARCH_x86_64 } inline void From 79d281f7fa3b6cf7ce2cf572dc6d0ab9b8498d73 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 30 Nov 2009 02:17:08 +0000 Subject: [PATCH 10/19] encourage loads from memory directly into SSE registers where appropriate --- src/compiler.cpp | 6 +++++- src/x86.cpp | 10 ++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index ec243184b6..4223a30741 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -3447,6 +3447,10 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, c->arch->planSource(type, dstSize, &srcTypeMask, &srcRegisterMask, dstSize, &thunk); + if (src->type == ValueGeneral) { + srcRegisterMask &= c->arch->generalRegisterMask(); + } + assert(c, thunk == 0); assert(c, dstMask.typeMask & srcTypeMask & (1 << RegisterOperand)); @@ -6013,7 +6017,7 @@ class MyCompiler: public Compiler { virtual void push(unsigned footprint UNUSED) { assert(&c, footprint == 1); - Value* v = value(&c, ValueFloat); + Value* v = value(&c, ValueGeneral); Stack* s = local::stack(&c, v, c.stack); v->home = frameIndex(&c, s->index + c.localFootprint); diff --git a/src/x86.cpp b/src/x86.cpp index b492f299f6..13d9673c9b 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -946,6 +946,9 @@ void sseMoveRR(Context* c, unsigned aSize, Assembler::Register* a, unsigned bSize UNUSED, Assembler::Register* b) { + assert(c, aSize >= 4); + assert(c, aSize == bSize); + if (floatReg(a) and floatReg(b)) { if (aSize == 4) { opcode(c, 0xf3); @@ -1090,6 +1093,9 @@ void sseMoveMR(Context* c, unsigned aSize, Assembler::Memory* a, unsigned bSize UNUSED, Assembler::Register* b) { + assert(c, aSize >= 4); + assert(c, aSize == bSize); + if (BytesPerWord == 4 and aSize == 8) { opcode(c, 0xf3); opcode(c, 0x0f, 0x7e); @@ -1165,6 +1171,7 @@ void sseMoveRM(Context* c, unsigned aSize, Assembler::Register* a, UNUSED unsigned bSize, Assembler::Memory* b) { + assert(c, aSize >= 4); assert(c, aSize == bSize); if (BytesPerWord == 4 and aSize == 8) { @@ -2942,8 +2949,7 @@ class MyArchitecture: public Assembler::Architecture { case Move: *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); - *aRegisterMask = GeneralRegisterMask - | (static_cast(GeneralRegisterMask) << 32); + *aRegisterMask = ~static_cast(0); if (BytesPerWord == 4) { if (aSize == 4 and bSize == 8) { From ec701b9994ca373c72594b495c554215f7804c56 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 30 Nov 2009 15:08:45 +0000 Subject: [PATCH 11/19] whitespace tweaks --- src/compile.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compile.cpp b/src/compile.cpp index f0dfada33b..29eb342760 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -3040,6 +3040,7 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, (4, 4, c->memory (array, Compiler::FloatType, ArrayBody, index, 4), BytesPerWord)); break; + case iaload: frame->pushInt (c->load @@ -5621,6 +5622,7 @@ compile(MyThread* t, Allocator* allocator, Context* context) frame.set(--index, Frame::Long); c->initLocal(2, index, Compiler::IntegerType); break; + case 'D': frame.set(--index, Frame::Long); frame.set(--index, Frame::Long); From d9de4c607c0f436fe88af6fbdf60ea22d7895ca4 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 30 Nov 2009 15:09:43 +0000 Subject: [PATCH 12/19] allow source operand of any type for move operations --- src/x86.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/x86.cpp b/src/x86.cpp index 13d9673c9b..7e565dc179 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -2948,7 +2948,7 @@ class MyArchitecture: public Assembler::Architecture { break; case Move: - *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *aTypeMask = ~0; *aRegisterMask = ~static_cast(0); if (BytesPerWord == 4) { From 04454960ec5573a3683a463745c02e2ab5ae3b00 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 30 Nov 2009 15:10:34 +0000 Subject: [PATCH 13/19] various bugfixes for handling 64-bit floating point values on 32-bit systems --- src/compiler.cpp | 101 ++++++++++++++++++++++++++++++++--------------- test/Floats.java | 23 +++++++++++ 2 files changed, 93 insertions(+), 31 deletions(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index 4223a30741..a4e9540b1b 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -280,7 +280,7 @@ class Read { virtual bool intersect(SiteMask* mask, unsigned depth = 0) = 0; - virtual void maybeIntersectWithHighSource(Context* c) { abort(c); } + virtual Value* high(Context* c) { abort(c); } virtual Value* successor() = 0; @@ -2234,7 +2234,7 @@ pickTargetSite(Context* c, Read* read, bool intersectRead = false, class SingleRead: public Read { public: SingleRead(const SiteMask& mask, Value* successor): - next_(0), mask(mask), high(0), successor_(successor) + next_(0), mask(mask), high_(0), successor_(successor) { } virtual bool intersect(SiteMask* mask, unsigned) { @@ -2243,11 +2243,8 @@ class SingleRead: public Read { return true; } - virtual void maybeIntersectWithHighSource(Context* c) { - if (high) { - Site* s = high->source; - this->mask = local::intersect(s->nextWordMask(c, 0), this->mask); - } + virtual Value* high(Context*) { + return high_; } virtual Value* successor() { @@ -2269,7 +2266,7 @@ class SingleRead: public Read { Read* next_; SiteMask mask; - Value* high; + Value* high_; Value* successor_; }; @@ -2430,6 +2427,18 @@ maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord, return dst; } +Site* +maybeMove(Context* c, Value* v, const SiteMask& mask, bool intersectMask, + bool includeNextWord, unsigned registerReserveCount = 0) +{ + SingleRead read(mask, 0); + read.value = v; + read.successor_ = v; + + return maybeMove + (c, &read, intersectMask, includeNextWord, registerReserveCount); +} + Site* pickSiteOrMove(Context* c, Read* read, bool intersectRead, bool includeNextWord, unsigned registerReserveCount = 0) @@ -2457,12 +2466,6 @@ pickSiteOrMove(Context* c, Value* v, const SiteMask& mask, bool intersectMask, (c, &read, intersectMask, includeNextWord, registerReserveCount); } -Site* -pickSiteOrMove(Context* c, Value* v, Site* s, unsigned index) -{ - return pickSiteOrMove(c, v, s->nextWordMask(c, index), true, false); -} - void steal(Context* c, Resource* r, Value* thief) { @@ -2706,9 +2709,9 @@ stubRead(Context* c) } Site* -pickSite(Context* c, Value* v, Site* s, unsigned index) +pickSite(Context* c, Value* v, Site* s, unsigned index, bool includeNextWord) { - for (SiteIterator it(c, v, true, false); it.hasMore();) { + for (SiteIterator it(c, v, true, includeNextWord); it.hasMore();) { Site* candidate = it.next(); if (s->matchNextWord(c, candidate, index)) { return candidate; @@ -2718,6 +2721,17 @@ pickSite(Context* c, Value* v, Site* s, unsigned index) return 0; } +Site* +pickSiteOrMove(Context* c, Value* v, Site* s, unsigned index) +{ + Site* n = pickSite(c, v, s, index, false); + if (n) { + return n; + } + + return maybeMove(c, v, s->nextWordMask(c, index), true, false); +} + Site* pickSiteOrMove(Context* c, Value* v, Site* s, Site** low, Site** high) { @@ -2735,7 +2749,7 @@ pickSiteOrMove(Context* c, Value* v, Site* s, Site** low, Site** high) Site* pickSiteOrGrow(Context* c, Value* v, Site* s, unsigned index) { - Site* n = pickSite(c, v, s, index); + Site* n = pickSite(c, v, s, index, false); if (n) { return n; } @@ -2949,7 +2963,7 @@ addReads(Context* c, Event* e, Value* v, unsigned size, SingleRead* r = read(c, lowMask, lowSuccessor); addRead(c, e, v, r); if (size > BytesPerWord) { - r->high = v->nextWord; + r->high_ = v->nextWord; addRead(c, e, v->nextWord, highMask, highSuccessor); } } @@ -3520,10 +3534,16 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, } void -maybeMove(Context* c, Value* src, Value* dst) +pickSiteOrMove(Context* c, Value* src, Value* dst) { if (live(dst)) { - maybeMove(c, live(src), false, true); + Read* read = live(src); + Site* s = pickSourceSite(c, read, 0, 0, ~0, false, true, true); + + if (s == 0 or s->isVolatile(c)) { + maybeMove(c, read, false, true); + } + addBuddy(src, dst); if (src->source->isVolatile(c)) { @@ -3620,16 +3640,23 @@ class MoveEvent: public Event { and srcSelectSize >= dstSize) { if (dst->target) { - maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, - dstLowMask); - if (dstSize > BytesPerWord) { - maybeMove(c, Move, BytesPerWord, BytesPerWord, src->nextWord, - BytesPerWord, dst->nextWord, dstHighMask); + if (dstSize > BytesPerWord + and src->source->registerSize(c) > BytesPerWord) + { + apply(c, Move, srcSelectSize, src->source, src->source, + dstSize, dst->target, dst->target); + } else { + maybeMove(c, Move, BytesPerWord, BytesPerWord, src, + BytesPerWord, dst, dstLowMask); + if (dstSize > BytesPerWord) { + maybeMove(c, Move, BytesPerWord, BytesPerWord, src->nextWord, + BytesPerWord, dst->nextWord, dstHighMask); + } } } else { - maybeMove(c, src, dst); + pickSiteOrMove(c, src, dst); if (dstSize > BytesPerWord) { - maybeMove(c, src->nextWord, dst->nextWord); + pickSiteOrMove(c, src->nextWord, dst->nextWord); } } } else if (srcSelectSize <= BytesPerWord and dstSize <= BytesPerWord) { @@ -3687,7 +3714,7 @@ class MoveEvent: public Event { low->thaw(c, dst); } else { - maybeMove(c, src, dst); + pickSiteOrMove(c, src, dst); } } @@ -5036,9 +5063,21 @@ readSource(Context* c, Read* r) return 0; } - r->maybeIntersectWithHighSource(c); - - return pickSiteOrMove(c, r, true, true); + Value* high = r->high(c); + if (high) { + Site* s = pickSite(c, r->value, high->source, 0, true); + SiteMask mask; + r->intersect(&mask); + if (s and s->match(c, mask)) { + return s; + } else { + return pickSiteOrMove + (c, r->value, intersect(mask, high->source->nextWordMask(c, 0)), + true, true); + } + } else { + return pickSiteOrMove(c, r, true, true); + } } void diff --git a/test/Floats.java b/test/Floats.java index 9b23b4c2d3..412bd5cd6c 100644 --- a/test/Floats.java +++ b/test/Floats.java @@ -25,6 +25,14 @@ public class Floats { return (int) f.field; } + private static void multiplyAndStore(double a, double b, Floats f) { + f.field = a * b; + } + + private static double loadAndMultiply(double a, Floats f) { + return f.field * a; + } + public static void main(String[] args) { expect(multiply(0.5d, 0.5d) == 0.25d); expect(multiply(0.5f, 0.5f) == 0.25f); @@ -62,6 +70,21 @@ public class Floats { expect(doubleToInt(new Floats()) == 100); + { Floats f = new Floats(); + f.field = 32.0d; + expect(loadAndMultiply(2.0d, f) == 64.0d); + } + + { Floats f = new Floats(); + f.field = 32.0d; + expect(multiply(2.0d, f.field) == 64.0d); + } + + { Floats f = new Floats(); + multiplyAndStore(32.0d, 0.5d, f); + expect(f.field == 16.0d); + } + { float f = 1f; expect(((int) f) == 1); } From 851187f0ce959dc0f0ed941b5209bbc4595d5c57 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 30 Nov 2009 15:38:16 +0000 Subject: [PATCH 14/19] refine memory barrier implementation and usage --- src/compile.cpp | 8 +++---- src/machine.cpp | 6 ++--- src/x86.cpp | 62 ++++++++++++++++++++++++++++++++++--------------- src/x86.h | 6 ++--- 4 files changed, 53 insertions(+), 29 deletions(-) diff --git a/src/compile.cpp b/src/compile.cpp index 29eb342760..4888778e7b 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -235,7 +235,7 @@ methodForIp(MyThread* t, void* ip) // we must use a version of the method tree at least as recent as the // compiled form of the method containing the specified address (see // compile(MyThread*, Allocator*, BootContext*, object)): - memoryBarrier(); + loadMemoryBarrier(); return treeQuery(t, methodTree(t), reinterpret_cast(ip), methodTreeSentinal(t), compareIpToMethodBounds); @@ -5878,7 +5878,7 @@ resolveNative(MyThread* t, object method) // methodCompiled, since we don't want them using the slow calling // convention on a function that expects the fast calling // convention: - memoryBarrier(); + storeStoreMemoryBarrier(); methodCompiled(t, method) = reinterpret_cast(function); } @@ -7469,7 +7469,7 @@ findCallNode(MyThread* t, void* address) // we must use a version of the call table at least as recent as the // compiled form of the method containing the specified address (see // compile(MyThread*, Allocator*, BootContext*, object)): - memoryBarrier(); + loadMemoryBarrier(); MyProcessor* p = processor(t); object table = p->callTable; @@ -8229,7 +8229,7 @@ compile(MyThread* t, Allocator* allocator, BootContext* bootContext, reinterpret_cast(compiled), clone, methodTreeSentinal(t), compareIpToMethodBounds); - memoryBarrier(); + storeStoreMemoryBarrier(); methodCompiled(t, method) = reinterpret_cast(compiled); diff --git a/src/machine.cpp b/src/machine.cpp index 9abe61301d..4fa9aaf9b5 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -2334,11 +2334,11 @@ enter(Thread* t, Thread::State s) #ifdef USE_ATOMIC_OPERATIONS # define INCREMENT atomicIncrement # define ACQUIRE_LOCK ACQUIRE_RAW(t, t->m->stateLock) -# define BARRIER memoryBarrier() +# define STORE_LOAD_MEMORY_BARRIER storeLoadMemoryBarrier() #else # define INCREMENT(pointer, value) *(pointer) += value; # define ACQUIRE_LOCK -# define BARRIER +# define STORE_LOAD_MEMORY_BARRIER ACQUIRE_RAW(t, t->m->stateLock); #endif // not USE_ATOMIC_OPERATIONS @@ -2366,7 +2366,7 @@ enter(Thread* t, Thread::State s) t->state = Thread::ExclusiveState; t->m->exclusive = t; - BARRIER; + STORE_LOAD_MEMORY_BARRIER; while (t->m->activeCount > 1) { t->m->stateLock->wait(t->systemThread, 0); diff --git a/src/x86.cpp b/src/x86.cpp index 7e565dc179..530333c1d6 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -113,23 +113,7 @@ class MyBlock: public Assembler::Block { unsigned size; }; -class Context { - public: - Context(System* s, Allocator* a, Zone* zone): - s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0), - firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(0)), - lastBlock(firstBlock) - { } - - System* s; - Zone* zone; - Assembler::Client* client; - Vector code; - Task* tasks; - uint8_t* result; - MyBlock* firstBlock; - MyBlock* lastBlock; -}; +class Context; typedef void (*OperationType)(Context*); @@ -163,6 +147,25 @@ class ArchitectureContext { * OperandTypeCount]; }; +class Context { + public: + Context(System* s, Allocator* a, Zone* zone, ArchitectureContext* ac): + s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0), + firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(0)), + lastBlock(firstBlock), ac(ac) + { } + + System* s; + Zone* zone; + Assembler::Client* client; + Vector code; + Task* tasks; + uint8_t* result; + MyBlock* firstBlock; + MyBlock* lastBlock; + ArchitectureContext* ac; +}; + void NO_RETURN abort(Context* c) { @@ -620,6 +623,27 @@ void ignore(Context*) { } +void +storeLoadBarrier(Context* c) +{ + if (useSSE(c->ac)) { + // mfence: + c->code.append(0x0f); + c->code.append(0xae); + c->code.append(0xf0); + } else { + // lock addq $0x0,(%rsp): + c->code.append(0xf0); + if (BytesPerWord == 8) { + c->code.append(0x48); + } + c->code.append(0x83); + c->code.append(0x04); + c->code.append(0x24); + c->code.append(0x00); + } +} + void unconditional(Context* c, unsigned jump, Assembler::Constant* a) { @@ -2503,7 +2527,7 @@ populateTables(ArchitectureContext* c) zo[Return] = return_; zo[LoadBarrier] = ignore; zo[StoreStoreBarrier] = ignore; - zo[StoreLoadBarrier] = ignore; + zo[StoreLoadBarrier] = storeLoadBarrier; uo[index(c, Call, C)] = CAST1(callC); uo[index(c, Call, R)] = CAST1(callR); @@ -3225,7 +3249,7 @@ class MyArchitecture: public Assembler::Architecture { class MyAssembler: public Assembler { public: MyAssembler(System* s, Allocator* a, Zone* zone, MyArchitecture* arch): - c(s, a, zone), arch_(arch) + c(s, a, zone, &(arch->c)), arch_(arch) { } virtual void setClient(Client* client) { diff --git a/src/x86.h b/src/x86.h index 0528d33695..2ca377c86c 100644 --- a/src/x86.h +++ b/src/x86.h @@ -169,7 +169,7 @@ memoryBarrier() inline void storeStoreMemoryBarrier() { - memoryBarrier(); + __asm__ __volatile__("": : :"memory"); } inline void @@ -181,13 +181,13 @@ storeLoadMemoryBarrier() inline void loadMemoryBarrier() { - memoryBarrier(); + __asm__ __volatile__("": : :"memory"); } inline void syncInstructionCache(const void*, unsigned) { - // ignore + __asm__ __volatile__("": : :"memory"); } #ifdef USE_ATOMIC_OPERATIONS From 7fa10909f484a29fdcd29d484b2a14f6f5128fe3 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 30 Nov 2009 22:08:59 +0000 Subject: [PATCH 15/19] more bugfixes for handling 64-bit floats on 32-bit systems --- src/compiler.cpp | 120 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 30 deletions(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index a4e9540b1b..fbdd80b372 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -1476,12 +1476,19 @@ pickTarget(Context* c, Read* read, bool intersectRead, Value* value = read->value; - uint32_t registerMask = (value->type == ValueGeneral - ? c->arch->generalRegisterMask() : ~0); + uint32_t registerMask + = (value->type == ValueFloat ? ~0 : c->arch->generalRegisterMask()); SiteMask mask(~0, registerMask, AnyFrameIndex); read->intersect(&mask); + if (value->type == ValueFloat) { + uint32_t floatMask = mask.registerMask & c->arch->floatRegisterMask(); + if (floatMask) { + mask.registerMask = floatMask; + } + } + Target best; Value* successor = read->successor(); @@ -1792,8 +1799,21 @@ class RegisterSite: public Site { } virtual bool matchNextWord(Context* c, Site* s, unsigned) { - return s->type(c) == RegisterOperand - and s->registerSize(c) == BytesPerWord; + assert(c, number != NoRegister); + + if (s->type(c) != RegisterOperand) { + return false; + } + + RegisterSite* rs = static_cast(s); + unsigned size = rs->registerSize(c); + if (size > BytesPerWord) { + assert(c, number != NoRegister); + return number == rs->number; + } else { + uint32_t mask = c->arch->generalRegisterMask(); + return ((1 << number) & mask) and ((1 << rs->number) & mask); + } } virtual void acquire(Context* c, Value* v) { @@ -1876,6 +1896,9 @@ class RegisterSite: public Site { } virtual Site* makeNextWord(Context* c, unsigned) { + assert(c, number != NoRegister); + assert(c, ((1 << number) & c->arch->generalRegisterMask())); + return freeRegisterSite(c, c->arch->generalRegisterMask()); } @@ -1883,8 +1906,16 @@ class RegisterSite: public Site { return SiteMask(1 << RegisterOperand, mask_, NoFrameIndex); } - virtual SiteMask nextWordMask(Context*, unsigned) { - return SiteMask(1 << RegisterOperand, ~0, NoFrameIndex); + virtual SiteMask nextWordMask(Context* c, unsigned) { + assert(c, number != NoRegister); + + if (registerSize(c) > BytesPerWord) { + return SiteMask + (1 << RegisterOperand, number, NoFrameIndex); + } else { + return SiteMask + (1 << RegisterOperand, c->arch->generalRegisterMask(), NoFrameIndex); + } } virtual unsigned registerSize(Context* c) { @@ -1965,12 +1996,14 @@ class MemorySite: public Site { assert(c, acquired); if (mask.typeMask & (1 << MemoryOperand)) { - if (base == c->arch->stack()) { - assert(c, index == NoRegister); - return mask.frameIndex == AnyFrameIndex - or (mask.frameIndex != NoFrameIndex - and static_cast(frameIndexToOffset(c, mask.frameIndex)) - == offset); + if (mask.frameIndex >= 0) { + if (base == c->arch->stack()) { + assert(c, index == NoRegister); + return static_cast(frameIndexToOffset(c, mask.frameIndex)) + == offset; + } else { + return false; + } } else { return true; } @@ -3533,22 +3566,51 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, } } -void -pickSiteOrMove(Context* c, Value* src, Value* dst) +Site* +pickMatchOrMove(Context* c, Read* r, Site* nextWord, unsigned index, + bool intersectRead) +{ + Site* s = pickSite(c, r->value, nextWord, index, true); + SiteMask mask; + if (intersectRead) { + r->intersect(&mask); + } + if (s and s->match(c, mask)) { + return s; + } + + return pickSiteOrMove + (c, r->value, intersect(mask, nextWord->nextWordMask(c, index)), + true, true); +} + +Site* +pickSiteOrMove(Context* c, Value* src, Value* dst, Site* nextWord, + unsigned index) { if (live(dst)) { Read* read = live(src); - Site* s = pickSourceSite(c, read, 0, 0, ~0, false, true, true); + Site* s; + if (nextWord) { + s = pickMatchOrMove(c, read, nextWord, index, false); + } else { + s = pickSourceSite(c, read, 0, 0, ~0, false, true, true); - if (s == 0 or s->isVolatile(c)) { - maybeMove(c, read, false, true); + if (s == 0 or s->isVolatile(c)) { + s = maybeMove(c, read, false, true); + } } + assert(c, s); addBuddy(src, dst); if (src->source->isVolatile(c)) { removeSite(c, src, src->source); } + + return s; + } else { + return 0; } } @@ -3645,6 +3707,13 @@ class MoveEvent: public Event { { apply(c, Move, srcSelectSize, src->source, src->source, dstSize, dst->target, dst->target); + + if (live(dst) == 0) { + removeSite(c, dst, dst->target); + if (dstSize > BytesPerWord) { + removeSite(c, dst->nextWord, dst->nextWord->target); + } + } } else { maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, dstLowMask); @@ -3654,9 +3723,9 @@ class MoveEvent: public Event { } } } else { - pickSiteOrMove(c, src, dst); + Site* low = pickSiteOrMove(c, src, dst, 0, 0); if (dstSize > BytesPerWord) { - pickSiteOrMove(c, src->nextWord, dst->nextWord); + pickSiteOrMove(c, src->nextWord, dst->nextWord, low, 1); } } } else if (srcSelectSize <= BytesPerWord and dstSize <= BytesPerWord) { @@ -3714,7 +3783,7 @@ class MoveEvent: public Event { low->thaw(c, dst); } else { - pickSiteOrMove(c, src, dst); + pickSiteOrMove(c, src, dst, 0, 0); } } @@ -5065,16 +5134,7 @@ readSource(Context* c, Read* r) Value* high = r->high(c); if (high) { - Site* s = pickSite(c, r->value, high->source, 0, true); - SiteMask mask; - r->intersect(&mask); - if (s and s->match(c, mask)) { - return s; - } else { - return pickSiteOrMove - (c, r->value, intersect(mask, high->source->nextWordMask(c, 0)), - true, true); - } + return pickMatchOrMove(c, r, high->source, 0, true); } else { return pickSiteOrMove(c, r, true, true); } From 175cb8e89ba8901039168386a2b8d601a7ad8418 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 1 Dec 2009 02:06:01 +0000 Subject: [PATCH 16/19] more floating point bugfixes --- src/compiler.cpp | 28 +++++++++++++++++----------- src/x86.cpp | 7 +++---- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index fbdd80b372..311d4a288c 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -2320,13 +2320,17 @@ acceptMatch(Context* c, Site* s, Read*, const SiteMask& mask) Site* pickSourceSite(Context* c, Read* read, Site* target = 0, - unsigned* cost = 0, uint8_t typeMask = ~0, + unsigned* cost = 0, SiteMask* extraMask = 0, bool intersectRead = true, bool includeBuddies = true, bool includeNextWord = true, bool (*accept)(Context*, Site*, Read*, const SiteMask&) = acceptMatch) { - SiteMask mask(typeMask, ~0, AnyFrameIndex); + SiteMask mask; + + if (extraMask) { + mask = intersect(mask, *extraMask); + } if (intersectRead) { read->intersect(&mask); @@ -2477,7 +2481,7 @@ pickSiteOrMove(Context* c, Read* read, bool intersectRead, bool includeNextWord, unsigned registerReserveCount = 0) { Site* s = pickSourceSite - (c, read, 0, 0, ~0, intersectRead, true, includeNextWord); + (c, read, 0, 0, 0, intersectRead, true, includeNextWord); if (s) { return s; @@ -3594,7 +3598,7 @@ pickSiteOrMove(Context* c, Value* src, Value* dst, Site* nextWord, if (nextWord) { s = pickMatchOrMove(c, read, nextWord, index, false); } else { - s = pickSourceSite(c, read, 0, 0, ~0, false, true, true); + s = pickSourceSite(c, read, 0, 0, 0, false, true, true); if (s == 0 or s->isVolatile(c)) { s = maybeMove(c, read, false, true); @@ -5270,13 +5274,14 @@ resolveSourceSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) Read* r = live(v); if (r and sites[el.localIndex] == 0) { - const uint32_t mask = (1 << RegisterOperand) | (1 << MemoryOperand); + SiteMask mask((1 << RegisterOperand) | (1 << MemoryOperand), + c->arch->generalRegisterMask(), AnyFrameIndex); Site* s = pickSourceSite - (c, r, 0, 0, mask, true, false, true, acceptForResolve); + (c, r, 0, 0, &mask, true, false, true, acceptForResolve); if (s == 0) { s = pickSourceSite - (c, r, 0, 0, mask, false, false, true, acceptForResolve); + (c, r, 0, 0, &mask, false, false, true, acceptForResolve); } if (s) { @@ -5307,15 +5312,16 @@ resolveTargetSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) Read* r = live(v); if (r and sites[el.localIndex] == 0) { - const uint32_t mask = (1 << RegisterOperand) | (1 << MemoryOperand); + SiteMask mask((1 << RegisterOperand) | (1 << MemoryOperand), + c->arch->generalRegisterMask(), AnyFrameIndex); Site* s = pickSourceSite - (c, r, 0, 0, mask, true, true, true, acceptForResolve); + (c, r, 0, 0, &mask, true, true, true, acceptForResolve); if (s == 0) { s = pickSourceSite - (c, r, 0, 0, mask, false, true, true, acceptForResolve); + (c, r, 0, 0, &mask, false, true, true, acceptForResolve); if (s == 0) { - s = maybeMove(c, r, false, true, ResolveRegisterReserveCount); + s = maybeMove(c, v, mask, false, true, ResolveRegisterReserveCount); } } diff --git a/src/x86.cpp b/src/x86.cpp index 530333c1d6..8ca7b9d614 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -978,12 +978,12 @@ sseMoveRR(Context* c, unsigned aSize, Assembler::Register* a, opcode(c, 0xf3); maybeRex(c, 4, a, b); opcode(c, 0x0f, 0x10); - modrm(c, 0xc0, b, a); + modrm(c, 0xc0, a, b); } else { opcode(c, 0xf2); - maybeRex(c, 4, a, b); + maybeRex(c, 8, a, b); opcode(c, 0x0f, 0x10); - modrm(c, 0xc0, b, a); + modrm(c, 0xc0, a, b); } } else if (floatReg(a)) { opcode(c, 0x66); @@ -1118,7 +1118,6 @@ sseMoveMR(Context* c, unsigned aSize, Assembler::Memory* a, unsigned bSize UNUSED, Assembler::Register* b) { assert(c, aSize >= 4); - assert(c, aSize == bSize); if (BytesPerWord == 4 and aSize == 8) { opcode(c, 0xf3); From 0bdf1d8e82843cfcf5e2f2b1b7834278e3f62b2e Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 30 Nov 2009 22:02:26 -0700 Subject: [PATCH 17/19] use thunks for floating point ops if SSE is not available --- src/x86.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/x86.cpp b/src/x86.cpp index 8ca7b9d614..b8290389e4 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -2919,9 +2919,13 @@ class MyArchitecture: public Assembler::Architecture { break; case FloatAbsolute: - *aTypeMask = (1 << RegisterOperand); - *aRegisterMask = (static_cast(FloatRegisterMask) << 32) - | FloatRegisterMask; + if (useSSE(&c)) { + *aTypeMask = (1 << RegisterOperand); + *aRegisterMask = (static_cast(FloatRegisterMask) << 32) + | FloatRegisterMask; + } else { + *thunk = true; + } break; case FloatNegate: @@ -2935,9 +2939,13 @@ class MyArchitecture: public Assembler::Architecture { break; case FloatSquareRoot: - *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); - *aRegisterMask = (static_cast(FloatRegisterMask) << 32) - | FloatRegisterMask; + if (useSSE(&c)) { + *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *aRegisterMask = (static_cast(FloatRegisterMask) << 32) + | FloatRegisterMask; + } else { + *thunk = true; + } break; case Float2Float: From 9ba71cf508f807bf734854bc6958c022ada5d903 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 1 Dec 2009 08:23:11 -0700 Subject: [PATCH 18/19] fix Darwin build --- src/x86.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/x86.h b/src/x86.h index 2ca377c86c..4ae08f6859 100644 --- a/src/x86.h +++ b/src/x86.h @@ -233,9 +233,9 @@ inline bool atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) { #ifdef ARCH_x86_32 - return atomicCompareAndSwap32(p, old, new_); + return atomicCompareAndSwap32(reinterpret_cast(p), old, new_); #elif defined ARCH_x86_64 - return atomicCompareAndSwap64(p, old, new_); + return atomicCompareAndSwap64(reinterpret_cast(p), old, new_); #endif // ARCH_x86_64 } #endif // USE_ATOMIC_OPERATIONS From 98275e175e93cf36a23d21df4d4f6e78b1e5b050 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 1 Dec 2009 09:21:33 -0700 Subject: [PATCH 19/19] powerpc bugfixes --- src/compile.cpp | 24 ++++++++++++++++++++++-- src/powerpc.cpp | 7 +++++-- src/powerpc.h | 2 +- src/thunks.cpp | 2 ++ 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/compile.cpp b/src/compile.cpp index 4888778e7b..93f5ca543d 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -785,6 +785,10 @@ class Context { { if (size == 8) { switch(op) { + case Absolute: + assert(t, resultSize == 8); + return local::getThunk(t, absoluteLongThunk); + case FloatNegate: assert(t, resultSize == 8); return local::getThunk(t, negateDoubleThunk); @@ -819,12 +823,16 @@ class Context { assert(t, size == 4); switch(op) { + case Absolute: + assert(t, resultSize == 4); + return local::getThunk(t, absoluteIntThunk); + case FloatNegate: - assert(t, size == 4); + assert(t, resultSize == 4); return local::getThunk(t, negateFloatThunk); case FloatAbsolute: - assert(t, size == 4); + assert(t, resultSize == 4); return local::getThunk(t, absoluteFloatThunk); case Float2Float: @@ -2160,6 +2168,18 @@ absoluteFloat(uint32_t a) return floatToBits(fabsf(bitsToFloat(a))); } +int64_t +absoluteLong(int64_t a) +{ + return a > 0 ? a : -a; +} + +int64_t +absoluteInt(int32_t a) +{ + return a > 0 ? a : -a; +} + int64_t divideLong(int64_t b, int64_t a) { diff --git a/src/powerpc.cpp b/src/powerpc.cpp index 63ee6da3b4..9cec9a1cdd 100644 --- a/src/powerpc.cpp +++ b/src/powerpc.cpp @@ -2065,6 +2065,9 @@ class MyArchitecture: public Assembler::Architecture { *aTypeMask = (1 << RegisterOperand); break; + case Absolute: + case FloatAbsolute: + case FloatSquareRoot: case FloatNegate: case Float2Float: case Float2Int: @@ -2096,9 +2099,9 @@ class MyArchitecture: public Assembler::Architecture { } virtual void planMove - (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, + (unsigned, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, - uint8_t dstTypeMask, uint64_t dstRegisterMask) + uint8_t dstTypeMask, uint64_t) { *srcTypeMask = ~0; *srcRegisterMask = ~static_cast(0); diff --git a/src/powerpc.h b/src/powerpc.h index 1ef1437e0f..9a55c13266 100644 --- a/src/powerpc.h +++ b/src/powerpc.h @@ -122,7 +122,7 @@ atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) inline bool atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) { - return atomicCompareAndSwap32(p, old, new_); + return atomicCompareAndSwap32(reinterpret_cast(p), old, new_); } #endif // USE_ATOMIC_OPERATIONS diff --git a/src/thunks.cpp b/src/thunks.cpp index bab06da664..f55c0018a9 100644 --- a/src/thunks.cpp +++ b/src/thunks.cpp @@ -22,6 +22,8 @@ THUNK(divideFloat) THUNK(moduloFloat) THUNK(negateFloat) THUNK(absoluteFloat) +THUNK(absoluteLong) +THUNK(absoluteInt) THUNK(divideLong) THUNK(divideInt) THUNK(moduloLong)