diff --git a/makefile b/makefile index 7006194a35..9376afadef 100644 --- a/makefile +++ b/makefile @@ -101,7 +101,7 @@ warnings = -Wall -Wextra -Werror -Wunused-parameter -Winit-self \ common-cflags = $(warnings) -fno-rtti -fno-exceptions -fno-omit-frame-pointer \ "-I$(JAVA_HOME)/include" -idirafter $(src) -I$(native-build) \ -D__STDC_LIMIT_MACROS -D_JNI_IMPLEMENTATION_ -DAVIAN_VERSION=\"$(version)\" \ - $(gnu-cflags) + -DUSE_ATOMIC_OPERATIONS $(gnu-cflags) build-cflags = $(common-cflags) -fPIC -fvisibility=hidden \ "-I$(JAVA_HOME)/include/linux" -I$(src) -pthread @@ -240,10 +240,12 @@ ifeq ($(mode),small) cflags += -Os -g3 -DNDEBUG endif +ifneq ($(platform),darwin) ifeq ($(arch),i386) # this is necessary to support __sync_bool_compare_and_swap: cflags += -march=i486 endif +endif output = -o $(1) as := $(cc) diff --git a/src/assembler.h b/src/assembler.h index 248d8272a8..918c548acb 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -361,10 +361,9 @@ class Assembler { unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask) = 0; virtual void planMove - (unsigned size, - uint8_t srcTypeMask, uint64_t srcRegisterMask, - uint8_t dstTypeMask, uint64_t dstRegisterMask, - uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask) = 0; + (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, + uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, + uint8_t dstTypeMask, uint64_t dstRegisterMask) = 0; virtual void planSource (TernaryOperation op, diff --git a/src/binaryToObject/main.cpp b/src/binaryToObject/main.cpp index a6fcbd6cce..c95f193d3d 100644 --- a/src/binaryToObject/main.cpp +++ b/src/binaryToObject/main.cpp @@ -14,7 +14,11 @@ #include "string.h" #include "sys/stat.h" +#ifdef WIN32 +#include +#else #include "sys/mman.h" +#endif #include "fcntl.h" #include "unistd.h" @@ -153,8 +157,29 @@ main(int argc, const char** argv) struct stat s; int r = fstat(fd, &s); if (r != -1) { +#ifdef WIN32 + HANDLE fm; + HANDLE h = (HANDLE) _get_osfhandle (fd); + + fm = CreateFileMapping( + h, + NULL, + PAGE_READONLY, + 0, + 0, + NULL); + data = static_cast(MapViewOfFile( + fm, + FILE_MAP_READ, + 0, + 0, + s.st_size)); + + CloseHandle(fm); +#else data = static_cast (mmap(0, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0)); +#endif size = s.st_size; } close(fd); @@ -174,7 +199,11 @@ main(int argc, const char** argv) fprintf(stderr, "unable to open %s\n", argv[2]); } +#ifdef WIN32 + UnmapViewOfFile(data); +#else munmap(data, size); +#endif } else { perror(argv[0]); } diff --git a/src/common.h b/src/common.h index 535e231b6c..b9990a674f 100644 --- a/src/common.h +++ b/src/common.h @@ -325,14 +325,6 @@ markBit(uintptr_t* map, unsigned i) map[wordOf(i)] |= static_cast(1) << bitOf(i); } -inline void -markBitAtomic(uintptr_t* map, unsigned i) -{ - uintptr_t* p = map + wordOf(i); - uintptr_t v = static_cast(1) << bitOf(i); - while (not __sync_bool_compare_and_swap(p, *p, *p | v)) { } -} - inline void clearBit(uintptr_t* map, unsigned i) { diff --git a/src/compile.cpp b/src/compile.cpp index aeb5e3722b..29eb342760 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -2934,7 +2934,7 @@ bool intrinsic(MyThread* t, Frame* frame, object target) { #define MATCH(name, constant) \ - (byteArrayLength(t, name) - 1 == sizeof(constant) \ + (byteArrayLength(t, name) == sizeof(constant) \ and strcmp(reinterpret_cast(&byteArrayBody(t, name, 0)), \ constant) == 0) @@ -3040,6 +3040,7 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, (4, 4, c->memory (array, Compiler::FloatType, ArrayBody, index, 4), BytesPerWord)); break; + case iaload: frame->pushInt (c->load @@ -5621,6 +5622,7 @@ compile(MyThread* t, Allocator* allocator, Context* context) frame.set(--index, Frame::Long); c->initLocal(2, index, Compiler::IntegerType); break; + case 'D': frame.set(--index, Frame::Long); frame.set(--index, Frame::Long); @@ -7260,7 +7262,7 @@ class MyProcessor: public Processor { class Visitor: public System::ThreadVisitor { public: Visitor(MyThread* t, MyProcessor* p, MyThread* target): - t(t), p(p), target(target) + t(t), p(p), target(target), trace(0) { } virtual void visit(void* ip, void* base, void* stack) { diff --git a/src/compiler.cpp b/src/compiler.cpp index 88402a5e77..a4e9540b1b 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -36,6 +36,12 @@ const unsigned StealRegisterReserveCount = 2; // compare instruction: const unsigned ResolveRegisterReserveCount = (BytesPerWord == 8 ? 2 : 4); +const unsigned RegisterCopyCost = 1; +const unsigned AddressCopyCost = 2; +const unsigned ConstantCopyCost = 3; +const unsigned MemoryCopyCost = 4; +const unsigned CopyPenalty = 10; + class Context; class Value; class Stack; @@ -133,12 +139,16 @@ class Site { virtual Site* makeNextWord(Context*, unsigned) = 0; + virtual SiteMask mask(Context*) = 0; + virtual SiteMask nextWordMask(Context*, unsigned) = 0; virtual unsigned registerSize(Context*) { return BytesPerWord; } virtual unsigned registerMask(Context*) { return 0; } + virtual bool isVolatile(Context*) { return false; } + Site* next; }; @@ -270,7 +280,7 @@ class Read { virtual bool intersect(SiteMask* mask, unsigned depth = 0) = 0; - virtual void maybeIntersectWithHighSource(Context* c) { abort(c); } + virtual Value* high(Context* c) { abort(c); } virtual Value* successor() = 0; @@ -1091,6 +1101,23 @@ buddies(Value* a, Value* b) return false; } +void +addBuddy(Value* original, Value* buddy) +{ + buddy->buddy = original; + Value* p = original; + while (p->buddy != original) p = p->buddy; + p->buddy = buddy; + + if (DebugBuddies) { + fprintf(stderr, "add buddy %p to", buddy); + for (Value* p = buddy->buddy; p != buddy; p = p->buddy) { + fprintf(stderr, " %p", p); + } + fprintf(stderr, "\n"); + } +} + void decrementAvailableGeneralRegisterCount(Context* c) { @@ -1226,6 +1253,7 @@ class Target { static const unsigned MinimumFrameCost = 1; static const unsigned StealPenalty = 2; static const unsigned StealUniquePenalty = 4; + static const unsigned IndirectMovePenalty = 4; static const unsigned LowRegisterPenalty = 10; static const unsigned Impossible = 20; @@ -1256,37 +1284,49 @@ valueType(Context* c, Compiler::OperandType type) } } -Target -pickTarget(Context* c, Read* r, bool intersectRead, - unsigned registerReserveCount); +class CostCalculator { + public: + virtual unsigned cost(Context* c, uint8_t typeMask, uint32_t registerMask, + int frameIndex) = 0; +}; unsigned -resourceCost(Context* c UNUSED, Value* v, Resource* r) +resourceCost(Context* c, Value* v, Resource* r, uint8_t typeMask, + uint32_t registerMask, int frameIndex, + CostCalculator* costCalculator) { if (r->reserved or r->freezeCount or r->referenceCount) { return Target::Impossible; - } else if (r->value) { - assert(c, findSite(c, r->value, r->site)); + } else { + unsigned baseCost = costCalculator ? costCalculator->cost + (c, typeMask, registerMask, frameIndex) : 0; - if (v and buddies(r->value, v)) { - return 0; - } else if (uniqueSite(c, r->value, r->site)) { - return Target::StealUniquePenalty; + if (r->value) { + assert(c, findSite(c, r->value, r->site)); + + if (v and buddies(r->value, v)) { + return baseCost; + } else if (uniqueSite(c, r->value, r->site)) { + return baseCost + Target::StealUniquePenalty; + } else { + return baseCost = Target::StealPenalty; + } } else { - return Target::StealPenalty; + return baseCost; } - } else { - return 0; } } bool pickRegisterTarget(Context* c, int i, Value* v, uint32_t mask, int* target, - unsigned* cost) + unsigned* cost, CostCalculator* costCalculator = 0) { if ((1 << i) & mask) { RegisterResource* r = c->registerResources + i; - unsigned myCost = resourceCost(c, v, r) + Target::MinimumRegisterCost; + unsigned myCost = resourceCost + (c, v, r, 1 << RegisterOperand, 1 << i, NoFrameIndex, costCalculator) + + Target::MinimumRegisterCost; + if ((static_cast(1) << i) == mask) { *cost = myCost; return true; @@ -1299,7 +1339,8 @@ pickRegisterTarget(Context* c, int i, Value* v, uint32_t mask, int* target, } int -pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost) +pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost, + CostCalculator* costCalculator = 0) { int target = NoRegister; *cost = Target::Impossible; @@ -1308,7 +1349,7 @@ pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost) for (int i = c->generalRegisterLimit - 1; i >= c->generalRegisterStart; --i) { - if (pickRegisterTarget(c, i, v, mask, &target, cost)) { + if (pickRegisterTarget(c, i, v, mask, &target, cost, costCalculator)) { return i; } } @@ -1318,7 +1359,7 @@ pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost) for (int i = c->floatRegisterStart; i < static_cast(c->floatRegisterLimit); ++i) { - if (pickRegisterTarget(c, i, v, mask, &target, cost)) { + if (pickRegisterTarget(c, i, v, mask, &target, cost, costCalculator)) { return i; } } @@ -1328,29 +1369,34 @@ pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost) } Target -pickRegisterTarget(Context* c, Value* v, uint32_t mask) +pickRegisterTarget(Context* c, Value* v, uint32_t mask, + CostCalculator* costCalculator = 0) { unsigned cost; - int number = pickRegisterTarget(c, v, mask, &cost); + int number = pickRegisterTarget(c, v, mask, &cost, costCalculator); return Target(number, RegisterOperand, cost); } unsigned -frameCost(Context* c, Value* v, int frameIndex) +frameCost(Context* c, Value* v, int frameIndex, CostCalculator* costCalculator) { - return resourceCost(c, v, c->frameResources + frameIndex) + return resourceCost + (c, v, c->frameResources + frameIndex, 1 << MemoryOperand, 0, frameIndex, + costCalculator) + Target::MinimumFrameCost; } Target -pickFrameTarget(Context* c, Value* v) +pickFrameTarget(Context* c, Value* v, CostCalculator* costCalculator) { Target best; Value* p = v; do { if (p->home >= 0) { - Target mine(p->home, MemoryOperand, frameCost(c, v, p->home)); + Target mine + (p->home, MemoryOperand, frameCost(c, v, p->home, costCalculator)); + if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { @@ -1364,13 +1410,13 @@ pickFrameTarget(Context* c, Value* v) } Target -pickAnyFrameTarget(Context* c, Value* v) +pickAnyFrameTarget(Context* c, Value* v, CostCalculator* costCalculator) { Target best; unsigned count = totalFrameSize(c); for (unsigned i = 0; i < count; ++i) { - Target mine(i, MemoryOperand, frameCost(c, v, i)); + Target mine(i, MemoryOperand, frameCost(c, v, i, costCalculator)); if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { @@ -1383,10 +1429,12 @@ pickAnyFrameTarget(Context* c, Value* v) Target pickTarget(Context* c, Value* value, const SiteMask& mask, - unsigned registerPenalty, Target best) + unsigned registerPenalty, Target best, + CostCalculator* costCalculator) { if (mask.typeMask & (1 << RegisterOperand)) { - Target mine = pickRegisterTarget(c, value, mask.registerMask); + Target mine = pickRegisterTarget + (c, value, mask.registerMask, costCalculator); mine.cost += registerPenalty; if (mine.cost == Target::MinimumRegisterCost) { @@ -1399,14 +1447,14 @@ pickTarget(Context* c, Value* value, const SiteMask& mask, if (mask.typeMask & (1 << MemoryOperand)) { if (mask.frameIndex >= 0) { Target mine(mask.frameIndex, MemoryOperand, - frameCost(c, value, mask.frameIndex)); + frameCost(c, value, mask.frameIndex, costCalculator)); if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { best = mine; } } else if (mask.frameIndex == AnyFrameIndex) { - Target mine = pickFrameTarget(c, value); + Target mine = pickFrameTarget(c, value, costCalculator); if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { @@ -1420,7 +1468,7 @@ pickTarget(Context* c, Value* value, const SiteMask& mask, Target pickTarget(Context* c, Read* read, bool intersectRead, - unsigned registerReserveCount) + unsigned registerReserveCount, CostCalculator* costCalculator) { unsigned registerPenalty = (c->availableGeneralRegisterCount > registerReserveCount @@ -1442,7 +1490,9 @@ pickTarget(Context* c, Read* read, bool intersectRead, if (r) { SiteMask intersection = mask; if (r->intersect(&intersection)) { - best = pickTarget(c, value, intersection, registerPenalty, best); + best = pickTarget + (c, value, intersection, registerPenalty, best, costCalculator); + if (best.cost <= Target::MinimumFrameCost) { return best; } @@ -1450,7 +1500,7 @@ pickTarget(Context* c, Read* read, bool intersectRead, } } - best = pickTarget(c, value, mask, registerPenalty, best); + best = pickTarget(c, value, mask, registerPenalty, best, costCalculator); if (best.cost <= Target::MinimumFrameCost) { return best; } @@ -1464,7 +1514,7 @@ pickTarget(Context* c, Read* read, bool intersectRead, return best; } - { Target mine = pickRegisterTarget(c, value, registerMask); + { Target mine = pickRegisterTarget(c, value, registerMask, costCalculator); mine.cost += registerPenalty; @@ -1475,7 +1525,7 @@ pickTarget(Context* c, Read* read, bool intersectRead, } } - { Target mine = pickFrameTarget(c, value); + { Target mine = pickFrameTarget(c, value, costCalculator); if (mine.cost == Target::MinimumFrameCost) { return mine; } else if (mine.cost < best.cost) { @@ -1489,7 +1539,7 @@ pickTarget(Context* c, Read* read, bool intersectRead, // there are no free registers left, so moving from memory to // memory isn't an option - try harder to find an available frame // site: - best = pickAnyFrameTarget(c, value); + best = pickAnyFrameTarget(c, value, costCalculator); assert(c, best.cost <= 3); } @@ -1537,7 +1587,7 @@ class ConstantSite: public Site { } virtual unsigned copyCost(Context*, Site* s) { - return (s == this ? 0 : 3); + return (s == this ? 0 : ConstantCopyCost); } virtual bool match(Context*, const SiteMask& mask) { @@ -1582,6 +1632,10 @@ class ConstantSite: public Site { abort(c); } + virtual SiteMask mask(Context*) { + return SiteMask(1 << ConstantOperand, 0, NoFrameIndex); + } + virtual SiteMask nextWordMask(Context*, unsigned) { return SiteMask(1 << ConstantOperand, 0, NoFrameIndex); } @@ -1625,7 +1679,7 @@ class AddressSite: public Site { } virtual unsigned copyCost(Context*, Site* s) { - return (s == this ? 0 : 2); + return (s == this ? 0 : AddressCopyCost); } virtual bool match(Context*, const SiteMask& mask) { @@ -1668,6 +1722,10 @@ class AddressSite: public Site { abort(c); } + virtual SiteMask mask(Context*) { + return SiteMask(1 << AddressOperand, 0, NoFrameIndex); + } + virtual SiteMask nextWordMask(Context* c, unsigned) { abort(c); } @@ -1687,7 +1745,7 @@ freeRegisterSite(Context* c, uint32_t mask); class RegisterSite: public Site { public: RegisterSite(uint32_t mask, int number): - mask(mask), number(number) + mask_(mask), number(number) { } virtual unsigned toString(Context*, char* buffer, unsigned bufferSize) { @@ -1695,7 +1753,7 @@ class RegisterSite: public Site { return vm::snprintf(buffer, bufferSize, "%p register %d", this, number); } else { return vm::snprintf(buffer, bufferSize, - "%p register unacquired (mask %d)", this, mask); + "%p register unacquired (mask %d)", this, mask_); } } @@ -1705,11 +1763,11 @@ class RegisterSite: public Site { if (s and (this == s or (s->type(c) == RegisterOperand - and (static_cast(s)->mask & (1 << number))))) + and (static_cast(s)->mask_ & (1 << number))))) { return 0; } else { - return 1; + return RegisterCopyCost; } } @@ -1743,7 +1801,7 @@ class RegisterSite: public Site { if (number != NoRegister) { target = Target(number, RegisterOperand, 0); } else { - target = pickRegisterTarget(c, v, mask); + target = pickRegisterTarget(c, v, mask_); expect(c, target.cost < Target::Impossible); } @@ -1803,7 +1861,7 @@ class RegisterSite: public Site { if (number != NoRegister) { mask = 1 << number; } else { - mask = this->mask; + mask = mask_; } return freeRegisterSite(c, mask); @@ -1821,6 +1879,10 @@ class RegisterSite: public Site { return freeRegisterSite(c, c->arch->generalRegisterMask()); } + virtual SiteMask mask(Context* c UNUSED) { + return SiteMask(1 << RegisterOperand, mask_, NoFrameIndex); + } + virtual SiteMask nextWordMask(Context*, unsigned) { return SiteMask(1 << RegisterOperand, ~0, NoFrameIndex); } @@ -1841,7 +1903,7 @@ class RegisterSite: public Site { return 1 << number; } - uint32_t mask; + uint32_t mask_; int number; }; @@ -1895,7 +1957,7 @@ class MemorySite: public Site { { return 0; } else { - return 4; + return MemoryCopyCost; } } @@ -2059,6 +2121,11 @@ class MemorySite: public Site { this->index, scale); } + virtual SiteMask mask(Context* c) { + return SiteMask(1 << MemoryOperand, 0, (base == c->arch->stack()) + ? offsetToFrameIndex(c, offset) : NoFrameIndex); + } + virtual SiteMask nextWordMask(Context* c, unsigned index) { // todo: endianness? int frameIndex; @@ -2072,6 +2139,10 @@ class MemorySite: public Site { return SiteMask(1 << MemoryOperand, 0, frameIndex); } + virtual bool isVolatile(Context* c) { + return base != c->arch->stack(); + } + bool acquired; int base; int offset; @@ -2144,10 +2215,15 @@ sitesToString(Context* c, Value* v, char* buffer, unsigned size) Site* pickTargetSite(Context* c, Read* read, bool intersectRead = false, - unsigned registerReserveCount = 0) + unsigned registerReserveCount = 0, + CostCalculator* costCalculator = 0) { - Target target(pickTarget(c, read, intersectRead, registerReserveCount)); + Target target + (pickTarget + (c, read, intersectRead, registerReserveCount, costCalculator)); + expect(c, target.cost < Target::Impossible); + if (target.type == MemoryOperand) { return frameSite(c, target.index); } else { @@ -2155,6 +2231,241 @@ pickTargetSite(Context* c, Read* read, bool intersectRead = false, } } +class SingleRead: public Read { + public: + SingleRead(const SiteMask& mask, Value* successor): + next_(0), mask(mask), high_(0), successor_(successor) + { } + + virtual bool intersect(SiteMask* mask, unsigned) { + *mask = local::intersect(*mask, this->mask); + + return true; + } + + virtual Value* high(Context*) { + return high_; + } + + virtual Value* successor() { + return successor_; + } + + virtual bool valid() { + return true; + } + + virtual void append(Context* c UNUSED, Read* r) { + assert(c, next_ == 0); + next_ = r; + } + + virtual Read* next(Context*) { + return next_; + } + + Read* next_; + SiteMask mask; + Value* high_; + Value* successor_; +}; + +SingleRead* +read(Context* c, const SiteMask& mask, Value* successor = 0) +{ + assert(c, (mask.typeMask != 1 << MemoryOperand) or mask.frameIndex >= 0); + + return new (c->zone->allocate(sizeof(SingleRead))) + SingleRead(mask, successor); +} + +bool +acceptMatch(Context* c, Site* s, Read*, const SiteMask& mask) +{ + return s->match(c, mask); +} + +Site* +pickSourceSite(Context* c, Read* read, Site* target = 0, + unsigned* cost = 0, uint8_t typeMask = ~0, + bool intersectRead = true, bool includeBuddies = true, + bool includeNextWord = true, + bool (*accept)(Context*, Site*, Read*, const SiteMask&) + = acceptMatch) +{ + SiteMask mask(typeMask, ~0, AnyFrameIndex); + + if (intersectRead) { + read->intersect(&mask); + } + + Site* site = 0; + unsigned copyCost = 0xFFFFFFFF; + for (SiteIterator it(c, read->value, includeBuddies, includeNextWord); + it.hasMore();) + { + Site* s = it.next(); + if (accept(c, s, read, mask)) { + unsigned v = s->copyCost(c, target); + if (v < copyCost) { + site = s; + copyCost = v; + } + } + } + + if (DebugMoves and site and target) { + char srcb[256]; site->toString(c, srcb, 256); + char dstb[256]; target->toString(c, dstb, 256); + fprintf(stderr, "pick source %s to %s for %p cost %d\n", + srcb, dstb, read->value, copyCost); + } + + if (cost) *cost = copyCost; + return site; +} + +Site* +maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord, + unsigned registerReserveCount = 0) +{ + Value* value = read->value; + unsigned size = value == value->nextWord ? BytesPerWord : 8; + + class MyCostCalculator: public CostCalculator { + public: + MyCostCalculator(Value* value, unsigned size, bool includeNextWord): + value(value), + size(size), + includeNextWord(includeNextWord) + { } + + virtual unsigned cost(Context* c, uint8_t typeMask, uint32_t registerMask, + int frameIndex) + { + uint8_t srcTypeMask; + uint64_t srcRegisterMask; + uint8_t tmpTypeMask; + uint64_t tmpRegisterMask; + c->arch->planMove + (size, &srcTypeMask, &srcRegisterMask, + &tmpTypeMask, &tmpRegisterMask, + typeMask, registerMask); + + SiteMask srcMask(srcTypeMask, srcRegisterMask, AnyFrameIndex); + SiteMask dstMask(typeMask, registerMask, frameIndex); + for (SiteIterator it(c, value, true, includeNextWord); it.hasMore();) { + Site* s = it.next(); + if (s->match(c, srcMask) or s->match(c, dstMask)) { + return 0; + } + } + + return Target::IndirectMovePenalty; + } + + Value* value; + unsigned size; + bool includeNextWord; + } costCalculator(value, size, includeNextWord); + + Site* dst = pickTargetSite + (c, read, intersectRead, registerReserveCount, &costCalculator); + + uint8_t srcTypeMask; + uint64_t srcRegisterMask; + uint8_t tmpTypeMask; + uint64_t tmpRegisterMask; + c->arch->planMove + (size, &srcTypeMask, &srcRegisterMask, + &tmpTypeMask, &tmpRegisterMask, + 1 << dst->type(c), dst->registerMask(c)); + + SiteMask srcMask(srcTypeMask, srcRegisterMask, AnyFrameIndex); + unsigned cost = 0xFFFFFFFF; + Site* src = 0; + for (SiteIterator it(c, value, true, includeNextWord); it.hasMore();) { + Site* s = it.next(); + unsigned v = s->copyCost(c, dst); + if (v == 0) { + src = s; + cost = 0; + break; + } + if (not s->match(c, srcMask)) { + v += CopyPenalty; + } + if (v < cost) { + src = s; + cost = v; + } + } + + if (cost) { + if (not src->match(c, srcMask)) { + src->freeze(c, value); + dst->freeze(c, value); + + SiteMask tmpMask(tmpTypeMask, tmpRegisterMask, AnyFrameIndex); + SingleRead tmpRead(tmpMask, 0); + tmpRead.value = value; + tmpRead.successor_ = value; + + Site* tmp = pickTargetSite(c, &tmpRead, true); + + move(c, value, src, tmp); + + dst->thaw(c, value); + src->thaw(c, value); + + src = tmp; + } + + move(c, value, src, dst); + } + + return dst; +} + +Site* +maybeMove(Context* c, Value* v, const SiteMask& mask, bool intersectMask, + bool includeNextWord, unsigned registerReserveCount = 0) +{ + SingleRead read(mask, 0); + read.value = v; + read.successor_ = v; + + return maybeMove + (c, &read, intersectMask, includeNextWord, registerReserveCount); +} + +Site* +pickSiteOrMove(Context* c, Read* read, bool intersectRead, + bool includeNextWord, unsigned registerReserveCount = 0) +{ + Site* s = pickSourceSite + (c, read, 0, 0, ~0, intersectRead, true, includeNextWord); + + if (s) { + return s; + } else { + return maybeMove + (c, read, intersectRead, includeNextWord, registerReserveCount); + } +} + +Site* +pickSiteOrMove(Context* c, Value* v, const SiteMask& mask, bool intersectMask, + bool includeNextWord, unsigned registerReserveCount = 0) +{ + SingleRead read(mask, 0); + read.value = v; + read.successor_ = v; + + return pickSiteOrMove + (c, &read, intersectMask, includeNextWord, registerReserveCount); +} + void steal(Context* c, Resource* r, Value* thief) { @@ -2170,8 +2481,7 @@ steal(Context* c, Resource* r, Value* thief) { r->site->freeze(c, r->value); - move(c, r->value, r->site, pickTargetSite - (c, live(r->value), false, StealRegisterReserveCount)); + maybeMove(c, live(r->value), false, true, StealRegisterReserveCount); r->site->thaw(c, r->value); } @@ -2223,57 +2533,6 @@ release(Context* c, Resource* resource, Value* value UNUSED, Site* site UNUSED) } } -class SingleRead: public Read { - public: - SingleRead(const SiteMask& mask, Value* successor): - next_(0), mask(mask), high(0), successor_(successor) - { } - - virtual bool intersect(SiteMask* mask, unsigned) { - *mask = local::intersect(*mask, this->mask); - - return true; - } - - virtual void maybeIntersectWithHighSource(Context* c) { - if (high) { - Site* s = high->source; - this->mask = local::intersect(s->nextWordMask(c, 0), this->mask); - } - } - - virtual Value* successor() { - return successor_; - } - - virtual bool valid() { - return true; - } - - virtual void append(Context* c UNUSED, Read* r) { - assert(c, next_ == 0); - next_ = r; - } - - virtual Read* next(Context*) { - return next_; - } - - Read* next_; - SiteMask mask; - Value* high; - Value* successor_; -}; - -SingleRead* -read(Context* c, const SiteMask& mask, Value* successor = 0) -{ - assert(c, (mask.typeMask != 1 << MemoryOperand) or mask.frameIndex >= 0); - - return new (c->zone->allocate(sizeof(SingleRead))) - SingleRead(mask, successor); -} - SiteMask generalRegisterMask(Context* c) { @@ -2450,9 +2709,9 @@ stubRead(Context* c) } Site* -pickSite(Context* c, Value* v, Site* s, unsigned index) +pickSite(Context* c, Value* v, Site* s, unsigned index, bool includeNextWord) { - for (SiteIterator it(c, v, true, false); it.hasMore();) { + for (SiteIterator it(c, v, true, includeNextWord); it.hasMore();) { Site* candidate = it.next(); if (s->matchNextWord(c, candidate, index)) { return candidate; @@ -2463,49 +2722,34 @@ pickSite(Context* c, Value* v, Site* s, unsigned index) } Site* -pickOrMoveSite(Context* c, Value* v, Site* s, unsigned index) +pickSiteOrMove(Context* c, Value* v, Site* s, unsigned index) { - Site* n = pickSite(c, v, s, index); + Site* n = pickSite(c, v, s, index, false); if (n) { return n; } - n = s->makeNextWord(c, index); - - Site* src = 0; - unsigned copyCost = 0xFFFFFFFF; - for (SiteIterator it(c, v, true, false); it.hasMore();) { - Site* candidate = it.next(); - unsigned cost = candidate->copyCost(c, n); - if (cost < copyCost) { - src = candidate; - copyCost = cost; - } - } - - move(c, v, src, n); - - return n; + return maybeMove(c, v, s->nextWordMask(c, index), true, false); } Site* -pickOrMoveSite(Context* c, Value* v, Site* s, Site** low, Site** high) +pickSiteOrMove(Context* c, Value* v, Site* s, Site** low, Site** high) { if (v->wordIndex == 0) { *low = s; - *high = pickOrMoveSite(c, v->nextWord, s, 1); + *high = pickSiteOrMove(c, v->nextWord, s, 1); return *high; } else { - *low = pickOrMoveSite(c, v->nextWord, s, 0); + *low = pickSiteOrMove(c, v->nextWord, s, 0); *high = s; return *low; } } Site* -pickOrGrowSite(Context* c, Value* v, Site* s, unsigned index) +pickSiteOrGrow(Context* c, Value* v, Site* s, unsigned index) { - Site* n = pickSite(c, v, s, index); + Site* n = pickSite(c, v, s, index, false); if (n) { return n; } @@ -2516,25 +2760,19 @@ pickOrGrowSite(Context* c, Value* v, Site* s, unsigned index) } Site* -pickOrGrowSite(Context* c, Value* v, Site* s, Site** low, Site** high) +pickSiteOrGrow(Context* c, Value* v, Site* s, Site** low, Site** high) { if (v->wordIndex == 0) { *low = s; - *high = pickOrGrowSite(c, v->nextWord, s, 1); + *high = pickSiteOrGrow(c, v->nextWord, s, 1); return *high; } else { - *low = pickOrGrowSite(c, v->nextWord, s, 0); + *low = pickSiteOrGrow(c, v->nextWord, s, 0); *high = s; return *low; } } -bool -acceptMatch(Context* c, Site* s, Read*, const SiteMask& mask) -{ - return s->match(c, mask); -} - bool isHome(Value* v, int frameIndex) { @@ -2566,97 +2804,20 @@ acceptForResolve(Context* c, Site* s, Read* read, const SiteMask& mask) } } -Site* -pickSourceSite(Context* c, Read* read, Site* target = 0, - unsigned* cost = 0, uint8_t typeMask = ~0, - bool intersectRead = true, bool includeBuddies = true, - bool (*accept)(Context*, Site*, Read*, const SiteMask&) - = acceptMatch) -{ - SiteMask mask(typeMask, ~0, AnyFrameIndex); - - if (intersectRead) { - read->intersect(&mask); - } - - Site* site = 0; - unsigned copyCost = 0xFFFFFFFF; - for (SiteIterator it(c, read->value, includeBuddies); it.hasMore();) { - Site* s = it.next(); - if (accept(c, s, read, mask)) { - unsigned v = s->copyCost(c, target); - if (v < copyCost) { - site = s; - copyCost = v; - } - } - } - - if (DebugMoves and site and target) { - char srcb[256]; site->toString(c, srcb, 256); - char dstb[256]; target->toString(c, dstb, 256); - fprintf(stderr, "pick source %s to %s for %p cost %d\n", - srcb, dstb, read->value, copyCost); - } - - if (cost) *cost = copyCost; - return site; -} - void move(Context* c, Value* value, Site* src, Site* dst) { + if (DebugMoves) { + char srcb[256]; src->toString(c, srcb, 256); + char dstb[256]; dst->toString(c, dstb, 256); + fprintf(stderr, "move %s to %s for %p to %p\n", + srcb, dstb, value, value); + } + src->freeze(c, value); addSite(c, value, dst); - src->thaw(c, value); - - uint8_t tmpTypeMask; - uint64_t tmpRegisterMask; - c->arch->planMove - (value->nextWord == value ? BytesPerWord : 8, - 1 << src->type(c), src->registerMask(c), - 1 << dst->type(c), dst->registerMask(c), - &tmpTypeMask, &tmpRegisterMask); - - SiteMask mask(tmpTypeMask, tmpRegisterMask, AnyFrameIndex); - if (not src->match(c, mask)) { - // we can't move directly from src to dst on this architecture, so - // we need to either pick a difference source or use a temporary - - removeSite(c, value, dst); - - SingleRead read(mask, 0); - read.value = value; - Site* newSrc = pickSourceSite(c, &read); - - if (newSrc) { - src = newSrc; - } else { - src->freeze(c, value); - dst->freeze(c, value); - - Site* tmp = pickTargetSite(c, &read, true); - - move(c, value, src, tmp); - - dst->thaw(c, value); - src->thaw(c, value); - - src = tmp; - } - - addSite(c, value, dst); - } - - if (DebugMoves) { - char srcb[256]; src->toString(c, srcb, 256); - char dstb[256]; dst->toString(c, dstb, 256); - fprintf(stderr, "move %s to %s for %p\n", srcb, dstb, value); - } - - src->freeze(c, value); dst->freeze(c, value); unsigned srcSize; @@ -2672,14 +2833,14 @@ move(Context* c, Value* value, Site* src, Site* dst) if (srcSize == dstSize) { apply(c, Move, srcSize, src, src, dstSize, dst, dst); } else if (srcSize > BytesPerWord) { - Site* low, *high, *other = pickOrGrowSite(c, value, dst, &low, &high); + Site* low, *high, *other = pickSiteOrGrow(c, value, dst, &low, &high); other->freeze(c, value->nextWord); apply(c, Move, srcSize, src, src, srcSize, low, high); other->thaw(c, value->nextWord); } else { - Site* low, *high, *other = pickOrMoveSite(c, value, src, &low, &high); + Site* low, *high, *other = pickSiteOrMove(c, value, src, &low, &high); other->freeze(c, value->nextWord); apply(c, Move, dstSize, low, high, dstSize, dst, dst); @@ -2802,7 +2963,7 @@ addReads(Context* c, Event* e, Value* v, unsigned size, SingleRead* r = read(c, lowMask, lowSuccessor); addRead(c, e, v, r); if (size > BytesPerWord) { - r->high = v->nextWord; + r->high_ = v->nextWord; addRead(c, e, v->nextWord, highMask, highSuccessor); } } @@ -3228,24 +3389,6 @@ appendReturn(Context* c, unsigned size, Value* value) ReturnEvent(c, size, value)); } -void -addBuddy(Value* original, Value* buddy) -{ - buddy->buddy = original; - Value* p = original; - while (p->buddy != original) p = p->buddy; - p->buddy = buddy; - //buddy->type = original->type; - - if (DebugBuddies) { - fprintf(stderr, "add buddy %p to", buddy); - for (Value* p = buddy->buddy; p != buddy; p = p->buddy) { - fprintf(stderr, " %p", p); - } - fprintf(stderr, "\n"); - } -} - void maybeMove(Context* c, BinaryOperation type, unsigned srcSize, unsigned srcSelectSize, Value* src, unsigned dstSize, Value* dst, @@ -3318,6 +3461,10 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, c->arch->planSource(type, dstSize, &srcTypeMask, &srcRegisterMask, dstSize, &thunk); + if (src->type == ValueGeneral) { + srcRegisterMask &= c->arch->generalRegisterMask(); + } + assert(c, thunk == 0); assert(c, dstMask.typeMask & srcTypeMask & (1 << RegisterOperand)); @@ -3375,11 +3522,6 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, } else { target = src->source; - assert(c, src); - assert(c, dst); - - addBuddy(src, dst); - if (DebugMoves) { char dstb[256]; target->toString(c, dstb, 256); fprintf(stderr, "null move in %s for %p to %p\n", dstb, src, dst); @@ -3391,6 +3533,25 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, } } +void +pickSiteOrMove(Context* c, Value* src, Value* dst) +{ + if (live(dst)) { + Read* read = live(src); + Site* s = pickSourceSite(c, read, 0, 0, ~0, false, true, true); + + if (s == 0 or s->isVolatile(c)) { + maybeMove(c, read, false, true); + } + + addBuddy(src, dst); + + if (src->source->isVolatile(c)) { + removeSite(c, src, src->source); + } + } +} + Value* value(Context* c, ValueType type, Site* site = 0, Site* target = 0) { @@ -3474,19 +3635,33 @@ class MoveEvent: public Event { SiteMask dstLowMask(dstTypeMask, dstRegisterMask, AnyFrameIndex); SiteMask dstHighMask(dstTypeMask, dstRegisterMask >> 32, AnyFrameIndex); - if (srcSelectSize <= BytesPerWord and dstSize <= BytesPerWord) { + if (srcSelectSize >= BytesPerWord + and dstSize >= BytesPerWord + and srcSelectSize >= dstSize) + { + if (dst->target) { + if (dstSize > BytesPerWord + and src->source->registerSize(c) > BytesPerWord) + { + apply(c, Move, srcSelectSize, src->source, src->source, + dstSize, dst->target, dst->target); + } else { + maybeMove(c, Move, BytesPerWord, BytesPerWord, src, + BytesPerWord, dst, dstLowMask); + if (dstSize > BytesPerWord) { + maybeMove(c, Move, BytesPerWord, BytesPerWord, src->nextWord, + BytesPerWord, dst->nextWord, dstHighMask); + } + } + } else { + pickSiteOrMove(c, src, dst); + if (dstSize > BytesPerWord) { + pickSiteOrMove(c, src->nextWord, dst->nextWord); + } + } + } else if (srcSelectSize <= BytesPerWord and dstSize <= BytesPerWord) { maybeMove(c, type, srcSize, srcSelectSize, src, dstSize, dst, dstLowMask); - } else if (srcSelectSize == dstSize) { - maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, - dstLowMask); - maybeMove(c, Move, BytesPerWord, BytesPerWord, src->nextWord, - BytesPerWord, dst->nextWord, dstHighMask); - } else if (srcSize > BytesPerWord) { - assert(c, dstSize == BytesPerWord); - - maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, - dstLowMask); } else { assert(c, srcSize == BytesPerWord); assert(c, srcSelectSize == BytesPerWord); @@ -3539,8 +3714,7 @@ class MoveEvent: public Event { low->thaw(c, dst); } else { - maybeMove(c, Move, BytesPerWord, BytesPerWord, src, BytesPerWord, dst, - dstLowMask); + pickSiteOrMove(c, src, dst); } } @@ -3590,11 +3764,11 @@ findConstantSite(Context* c, Value* v) } void -preserve(Context* c, Value* v, Site* s, Read* r) +preserve(Context* c, Value* v, Read* r, Site* s) { s->freeze(c, v); - move(c, v, s, pickTargetSite(c, r)); + maybeMove(c, r, false, true, 0); s->thaw(c, v); } @@ -3613,11 +3787,12 @@ getTarget(Context* c, Value* value, Value* result, const SiteMask& resultMask) s = value->source; v = value; if (r and uniqueSite(c, v, s)) { - preserve(c, v, s, r); + preserve(c, v, r, s); } } else { SingleRead r(resultMask, 0); r.value = result; + r.successor_ = result; s = pickTargetSite(c, &r, true); v = result; addSite(c, result, s); @@ -4723,7 +4898,10 @@ class BuddyEvent: public Event { } virtual void compile(Context* c) { -// fprintf(stderr, "original %p buddy %p\n", original, buddy); + if (DebugBuddies) { + fprintf(stderr, "original %p buddy %p\n", original, buddy); + } + assert(c, hasSite(c, original)); assert(c, original); @@ -4885,19 +5063,20 @@ readSource(Context* c, Read* r) return 0; } - r->maybeIntersectWithHighSource(c); - - Site* site = pickSourceSite(c, r); - - if (site) { - return site; + Value* high = r->high(c); + if (high) { + Site* s = pickSite(c, r->value, high->source, 0, true); + SiteMask mask; + r->intersect(&mask); + if (s and s->match(c, mask)) { + return s; + } else { + return pickSiteOrMove + (c, r->value, intersect(mask, high->source->nextWordMask(c, 0)), + true, true); + } } else { - Site* target = pickTargetSite(c, r, true); - unsigned copyCost; - site = pickSourceSite(c, r, target, ©Cost, ~0, false); - assert(c, copyCost); - move(c, v, site, target); - return target; + return pickSiteOrMove(c, r, true, true); } } @@ -4974,33 +5153,6 @@ thaw(Context* c, SiteRecordList* frozen) } } -Site* -acquireSite(Context* c, SiteRecordList* frozen, Site* target, Value* v, - Read* r, bool pickSource) -{ - assert(c, hasSite(c, v)); - - unsigned copyCost; - Site* source; - if (pickSource) { - source = pickSourceSite(c, r, target, ©Cost, ~0, false); - } else { - copyCost = 0; - source = target; - } - - if (copyCost) { - target = target->copy(c); - move(c, v, source, target); - } else { - target = source; - } - - freeze(c, frozen, target, v); - - return target; -} - bool resolveOriginalSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) @@ -5023,7 +5175,10 @@ resolveOriginalSites(Context* c, Event* e, SiteRecordList* frozen, buffer, v, el.localIndex, frameIndex(c, &el)); } - acquireSite(c, frozen, s, v, r, true); + Site* target = pickSiteOrMove + (c, v, s->mask(c), true, true, ResolveRegisterReserveCount); + + freeze(c, frozen, target, v); } else { complete = false; } @@ -5058,9 +5213,10 @@ resolveSourceSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) const uint32_t mask = (1 << RegisterOperand) | (1 << MemoryOperand); Site* s = pickSourceSite - (c, r, 0, 0, mask, true, false, acceptForResolve); + (c, r, 0, 0, mask, true, false, true, acceptForResolve); if (s == 0) { - s = pickSourceSite(c, r, 0, 0, mask, false, false, acceptForResolve); + s = pickSourceSite + (c, r, 0, 0, mask, false, false, true, acceptForResolve); } if (s) { @@ -5070,7 +5226,9 @@ resolveSourceSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) buffer, v, el.localIndex, frameIndex(c, &el)); } - sites[el.localIndex] = acquireSite(c, frozen, s, v, r, false)->copy(c); + freeze(c, frozen, s, v); + + sites[el.localIndex] = s->copy(c); } else { complete = false; } @@ -5091,25 +5249,25 @@ resolveTargetSites(Context* c, Event* e, SiteRecordList* frozen, Site** sites) if (r and sites[el.localIndex] == 0) { const uint32_t mask = (1 << RegisterOperand) | (1 << MemoryOperand); - bool useTarget = false; - Site* s = pickSourceSite(c, r, 0, 0, mask, true, true, acceptForResolve); + Site* s = pickSourceSite + (c, r, 0, 0, mask, true, true, true, acceptForResolve); if (s == 0) { - s = pickSourceSite(c, r, 0, 0, mask, false, true, acceptForResolve); + s = pickSourceSite + (c, r, 0, 0, mask, false, true, true, acceptForResolve); if (s == 0) { - s = pickTargetSite(c, r, false, ResolveRegisterReserveCount); - useTarget = true; + s = maybeMove(c, r, false, true, ResolveRegisterReserveCount); } } + freeze(c, frozen, s, v); + + sites[el.localIndex] = s->copy(c); + if (DebugControl) { - char buffer[256]; s->toString(c, buffer, 256); + char buffer[256]; sites[el.localIndex]->toString(c, buffer, 256); fprintf(stderr, "resolve target %s for %p local %d frame %d\n", buffer, el.value, el.localIndex, frameIndex(c, &el)); } - - Site* acquired = acquireSite(c, frozen, s, v, r, useTarget)->copy(c); - - sites[el.localIndex] = (useTarget ? s : acquired->copy(c)); } } } @@ -5898,7 +6056,7 @@ class MyCompiler: public Compiler { virtual void push(unsigned footprint UNUSED) { assert(&c, footprint == 1); - Value* v = value(&c, ValueFloat); + Value* v = value(&c, ValueGeneral); Stack* s = local::stack(&c, v, c.stack); v->home = frameIndex(&c, s->index + c.localFootprint); diff --git a/src/heap.cpp b/src/heap.cpp index 11c3c4ceda..5dcd75f7c2 100644 --- a/src/heap.cpp +++ b/src/heap.cpp @@ -11,6 +11,7 @@ #include "heap.h" #include "system.h" #include "common.h" +#include "arch.h" using namespace vm; @@ -69,6 +70,19 @@ System* system(Context*); void* tryAllocate(Context* c, unsigned size); void free(Context* c, const void* p, unsigned size); +#ifdef USE_ATOMIC_OPERATIONS +inline void +markBitAtomic(uintptr_t* map, unsigned i) +{ + uintptr_t* p = map + wordOf(i); + uintptr_t v = static_cast(1) << bitOf(i); + for (uintptr_t old = *p; + not atomicCompareAndSwap(p, old, old | v); + old = *p) + { } +} +#endif // USE_ATOMIC_OPERATIONS + inline void* get(void* o, unsigned offsetInWords) { @@ -303,12 +317,14 @@ class Segment { if (child) child->set(p, v); } +#ifdef USE_ATOMIC_OPERATIONS void markAtomic(void* p) { assert(segment->context, bitsPerRecord == 1); markBitAtomic(data, indexOf(p)); assert(segment->context, getBit(data, indexOf(p))); if (child) child->markAtomic(p); } +#endif unsigned get(void* p) { return getBits(data, bitsPerRecord, indexOf(p)); @@ -1020,7 +1036,9 @@ void markDirty(Context* c, Fixie* f) { if (not f->dirty) { +#ifdef USE_ATOMIC_OPERATIONS ACQUIRE(c->lock); +#endif if (not f->dirty) { f->dirty = true; @@ -1816,6 +1834,10 @@ class MyHeap: public Heap { virtual void mark(void* p, unsigned offset, unsigned count) { if (needsMark(p)) { +#ifndef USE_ATOMIC_OPERATIONS + ACQUIRE(c.lock); +#endif + if (c.client->isFixed(p)) { Fixie* f = fixie(p); assert(&c, offset == 0 or f->hasMask); @@ -1830,7 +1852,11 @@ class MyHeap: public Heap { } dirty = true; +#ifdef USE_ATOMIC_OPERATIONS markBitAtomic(f->mask(), offset + i); +#else + markBit(f->mask(), offset + i); +#endif assert(&c, getBit(f->mask(), offset + i)); } } @@ -1848,7 +1874,11 @@ class MyHeap: public Heap { for (unsigned i = 0; i < count; ++i) { void** target = static_cast(p) + offset + i; if (targetNeedsMark(mask(*target))) { +#ifdef USE_ATOMIC_OPERATIONS map->markAtomic(target); +#else + map->set(target); +#endif } } } diff --git a/src/jnienv.cpp b/src/jnienv.cpp index d0e7aebaa5..b28b089244 100644 --- a/src/jnienv.cpp +++ b/src/jnienv.cpp @@ -2173,10 +2173,7 @@ JNI_CreateJavaVM(Machine** m, Thread** t, void* args) System* s = makeSystem(crashDumpDirectory); Heap* h = makeHeap(s, heapLimit); Finder* f = makeFinder(s, RUNTIME_ARRAY_BODY(classpathBuffer), bootLibrary); - Processor* p = makeProcessor(s, h, false); // change back to true - // once use of SSE is - // fixed on 32-bit - // systems + Processor* p = makeProcessor(s, h, true); const char** properties = static_cast (h->allocate(sizeof(const char*) * propertyCount)); diff --git a/src/machine.cpp b/src/machine.cpp index 30a4e6d220..9abe61301d 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -14,6 +14,7 @@ #include "stream.h" #include "constants.h" #include "processor.h" +#include "arch.h" using namespace vm; @@ -21,6 +22,17 @@ namespace { const unsigned NoByte = 0xFFFF; +#ifdef USE_ATOMIC_OPERATIONS +void +atomicIncrement(uint32_t* p, int v) +{ + for (uint32_t old = *p; + not atomicCompareAndSwap32(p, old, old + v); + old = *p) + { } +} +#endif + bool find(Thread* t, Thread* o) { @@ -2319,20 +2331,33 @@ enter(Thread* t, Thread::State s) return; } +#ifdef USE_ATOMIC_OPERATIONS +# define INCREMENT atomicIncrement +# define ACQUIRE_LOCK ACQUIRE_RAW(t, t->m->stateLock) +# define BARRIER memoryBarrier() +#else +# define INCREMENT(pointer, value) *(pointer) += value; +# define ACQUIRE_LOCK +# define BARRIER + ACQUIRE_RAW(t, t->m->stateLock); +#endif // not USE_ATOMIC_OPERATIONS switch (s) { case Thread::ExclusiveState: { + ACQUIRE_LOCK; + while (t->m->exclusive) { // another thread got here first. ENTER(t, Thread::IdleState); + t->m->stateLock->wait(t->systemThread, 0); } switch (t->state) { case Thread::ActiveState: break; case Thread::IdleState: { - ++ t->m->activeCount; + INCREMENT(&(t->m->activeCount), 1); } break; default: abort(t); @@ -2340,14 +2365,35 @@ enter(Thread* t, Thread::State s) t->state = Thread::ExclusiveState; t->m->exclusive = t; - + + BARRIER; + while (t->m->activeCount > 1) { t->m->stateLock->wait(t->systemThread, 0); } } break; case Thread::IdleState: + if (t->state == Thread::ActiveState) { + // fast path + assert(t, t->m->activeCount > 0); + INCREMENT(&(t->m->activeCount), -1); + + t->state = s; + + if (t->m->exclusive) { + ACQUIRE_LOCK; + + t->m->stateLock->notifyAll(t->systemThread); + } + break; + } else { + // fall through to slow path + } + case Thread::ZombieState: { + ACQUIRE_LOCK; + switch (t->state) { case Thread::ExclusiveState: { assert(t, t->m->exclusive == t); @@ -2360,7 +2406,7 @@ enter(Thread* t, Thread::State s) } assert(t, t->m->activeCount > 0); - -- t->m->activeCount; + INCREMENT(&(t->m->activeCount), -1); if (s == Thread::ZombieState) { assert(t, t->m->liveCount > 0); @@ -2375,35 +2421,54 @@ enter(Thread* t, Thread::State s) t->m->stateLock->notifyAll(t->systemThread); } break; - case Thread::ActiveState: { - switch (t->state) { - case Thread::ExclusiveState: { - assert(t, t->m->exclusive == t); + case Thread::ActiveState: + if (t->state == Thread::IdleState and t->m->exclusive == 0) { + // fast path + INCREMENT(&(t->m->activeCount), 1); t->state = s; - t->m->exclusive = 0; - t->m->stateLock->notifyAll(t->systemThread); - } break; - - case Thread::NoState: - case Thread::IdleState: { - while (t->m->exclusive) { - t->m->stateLock->wait(t->systemThread, 0); + if (t->m->exclusive) { + // another thread has entered the exclusive state, so we + // return to idle and use the slow path to become active + enter(t, Thread::IdleState); + } else { + break; } - - ++ t->m->activeCount; - if (t->state == Thread::NoState) { - ++ t->m->liveCount; - } - t->state = s; - } break; - - default: abort(t); } - } break; + + { ACQUIRE_LOCK; + + switch (t->state) { + case Thread::ExclusiveState: { + assert(t, t->m->exclusive == t); + + t->state = s; + t->m->exclusive = 0; + + t->m->stateLock->notifyAll(t->systemThread); + } break; + + case Thread::NoState: + case Thread::IdleState: { + while (t->m->exclusive) { + t->m->stateLock->wait(t->systemThread, 0); + } + + INCREMENT(&(t->m->activeCount), 1); + if (t->state == Thread::NoState) { + ++ t->m->liveCount; + } + t->state = s; + } break; + + default: abort(t); + } + } break; case Thread::ExitState: { + ACQUIRE_LOCK; + switch (t->state) { case Thread::ExclusiveState: { assert(t, t->m->exclusive == t); @@ -2418,7 +2483,7 @@ enter(Thread* t, Thread::State s) } assert(t, t->m->activeCount > 0); - -- t->m->activeCount; + INCREMENT(&(t->m->activeCount), -1); t->state = s; diff --git a/src/powerpc.cpp b/src/powerpc.cpp index b4092aca6c..63ee6da3b4 100644 --- a/src/powerpc.cpp +++ b/src/powerpc.cpp @@ -2096,19 +2096,20 @@ class MyArchitecture: public Assembler::Architecture { } virtual void planMove - (unsigned, - uint8_t srcTypeMask, uint64_t srcRegisterMask, - uint8_t dstTypeMask, uint64_t, - uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask) + (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, + uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, + uint8_t dstTypeMask, uint64_t dstRegisterMask) { - *tmpTypeMask = srcTypeMask; - *tmpRegisterMask = srcRegisterMask; + *srcTypeMask = ~0; + *srcRegisterMask = ~static_cast(0); - if ((dstTypeMask & (1 << MemoryOperand)) - and (srcTypeMask & ((1 << MemoryOperand) | 1 << AddressOperand))) - { - // can't move directly from memory to memory - *tmpTypeMask = (1 << RegisterOperand); + *tmpTypeMask = 0; + *tmpRegisterMask = 0; + + if (dstTypeMask & (1 << MemoryOperand)) { + // can't move directly from memory or constant to memory + *srcTypeMask = 1 << RegisterOperand; + *tmpTypeMask = 1 << RegisterOperand; *tmpRegisterMask = ~static_cast(0); } } diff --git a/src/powerpc.h b/src/powerpc.h index e60d27bfe5..1ef1437e0f 100644 --- a/src/powerpc.h +++ b/src/powerpc.h @@ -90,6 +90,42 @@ syncInstructionCache(const void* start, unsigned size) __asm__ __volatile__("isync"); } +#ifdef USE_ATOMIC_OPERATIONS +inline bool +atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) +{ +#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) + return __sync_bool_compare_and_swap(p, old, new_); +#else // not GCC >= 4.1 + bool result; + + __asm__ __volatile__(" sync\n" + "1:\n" + " lwarx %0,0,%2\n" + " cmpw %0,%3\n" + " bne- 2f\n" + " stwcx. %4,0,%2\n" + " bne- 1b\n" + " isync \n" + "2:\n" + " xor %0,%0,%3\n" + " cntlzw %0,%0\n" + " srwi %0,%0,5\n" + : "=&r"(result), "+m"(*p) + : "r"(p), "r"(old), "r"(new_) + : "cc", "memory"); + + return result; +#endif // not GCC >= 4.1 +} + +inline bool +atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +{ + return atomicCompareAndSwap32(p, old, new_); +} +#endif // USE_ATOMIC_OPERATIONS + inline uint64_t dynamicCall(void* function, uintptr_t* arguments, uint8_t* argumentTypes, unsigned argumentCount, unsigned argumentsSize, diff --git a/src/windows.cpp b/src/windows.cpp index f15c2996d2..4ac44e4d05 100644 --- a/src/windows.cpp +++ b/src/windows.cpp @@ -616,26 +616,32 @@ class MySystem: public System { ACQUIRE(this, mutex); + bool success = false; int rv = SuspendThread(target->thread); - expect(this, rv != -1); + if (rv != -1) { + CONTEXT context; + memset(&context, 0, sizeof(CONTEXT)); + context.ContextFlags = CONTEXT_CONTROL; + rv = GetThreadContext(target->thread, &context); - CONTEXT context; - rv = GetThreadContext(target->thread, &context); - expect(this, rv); + if (rv) { #ifdef ARCH_x86_32 - visitor->visit(reinterpret_cast(context.Eip), - reinterpret_cast(context.Ebp), - reinterpret_cast(context.Esp)); + visitor->visit(reinterpret_cast(context.Eip), + reinterpret_cast(context.Ebp), + reinterpret_cast(context.Esp)); #elif defined ARCH_x86_64 - visitor->visit(reinterpret_cast(context.Rip), - reinterpret_cast(context.Rbp), - reinterpret_cast(context.Rsp)); + visitor->visit(reinterpret_cast(context.Rip), + reinterpret_cast(context.Rbp), + reinterpret_cast(context.Rsp)); #endif + success = true; + } - rv = ResumeThread(target->thread); - expect(this, rv != -1); + rv = ResumeThread(target->thread); + expect(this, rv != -1); + } - return 0; + return (success ? 0 : 1); } virtual uint64_t call(void* function, uintptr_t* arguments, uint8_t* types, diff --git a/src/x86.cpp b/src/x86.cpp index 310aa8350f..7e565dc179 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -946,6 +946,9 @@ void sseMoveRR(Context* c, unsigned aSize, Assembler::Register* a, unsigned bSize UNUSED, Assembler::Register* b) { + assert(c, aSize >= 4); + assert(c, aSize == bSize); + if (floatReg(a) and floatReg(b)) { if (aSize == 4) { opcode(c, 0xf3); @@ -1090,6 +1093,9 @@ void sseMoveMR(Context* c, unsigned aSize, Assembler::Memory* a, unsigned bSize UNUSED, Assembler::Register* b) { + assert(c, aSize >= 4); + assert(c, aSize == bSize); + if (BytesPerWord == 4 and aSize == 8) { opcode(c, 0xf3); opcode(c, 0x0f, 0x7e); @@ -1165,6 +1171,7 @@ void sseMoveRM(Context* c, unsigned aSize, Assembler::Register* a, UNUSED unsigned bSize, Assembler::Memory* b) { + assert(c, aSize >= 4); assert(c, aSize == bSize); if (BytesPerWord == 4 and aSize == 8) { @@ -2921,7 +2928,7 @@ class MyArchitecture: public Assembler::Architecture { break; case Float2Int: - if (useSSE(&c) and (bSize <= BytesPerWord)) { + if (useSSE(&c) and bSize <= BytesPerWord) { *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); *aRegisterMask = (static_cast(FloatRegisterMask) << 32) | FloatRegisterMask; @@ -2931,7 +2938,7 @@ class MyArchitecture: public Assembler::Architecture { break; case Int2Float: - if (useSSE(&c) and (aSize <= BytesPerWord)) { + if (useSSE(&c) and aSize <= BytesPerWord) { *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); *aRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); @@ -2941,9 +2948,8 @@ class MyArchitecture: public Assembler::Architecture { break; case Move: - *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); - *aRegisterMask = GeneralRegisterMask - | (static_cast(GeneralRegisterMask) << 32); + *aTypeMask = ~0; + *aRegisterMask = ~static_cast(0); if (BytesPerWord == 4) { if (aSize == 4 and bSize == 8) { @@ -3039,38 +3045,46 @@ class MyArchitecture: public Assembler::Architecture { } virtual void planMove - (unsigned size, - uint8_t srcTypeMask, uint64_t srcRegisterMask, - uint8_t dstTypeMask, uint64_t dstRegisterMask, - uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask) + (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, + uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, + uint8_t dstTypeMask, uint64_t dstRegisterMask) { - *tmpTypeMask = srcTypeMask; - *tmpRegisterMask = srcRegisterMask; + *srcTypeMask = ~0; + *srcRegisterMask = ~static_cast(0); - if ((dstTypeMask & (1 << MemoryOperand)) - and (srcTypeMask & ((1 << MemoryOperand) | 1 << AddressOperand))) - { + *tmpTypeMask = 0; + *tmpRegisterMask = 0; + + if (dstTypeMask & (1 << MemoryOperand)) { // can't move directly from memory to memory - *tmpTypeMask = (1 << RegisterOperand); + *srcTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); + *tmpTypeMask = 1 << RegisterOperand; *tmpRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); } else if (dstTypeMask & (1 << RegisterOperand)) { - if (srcTypeMask & (1 << RegisterOperand)) { - if (size != BytesPerWord - and (((dstRegisterMask & FloatRegisterMask) == 0) - xor ((srcRegisterMask & FloatRegisterMask) == 0))) - { - // can't move directly from FPR to GPR or vice-versa for - // values larger than the GPR size - *tmpTypeMask = (1 << MemoryOperand); - *tmpRegisterMask = 0; + if (size > BytesPerWord) { + // can't move directly from FPR to GPR or vice-versa for + // values larger than the GPR size + if (dstRegisterMask & FloatRegisterMask) { + *srcRegisterMask = FloatRegisterMask + | (static_cast(FloatRegisterMask) << 32); + *tmpTypeMask = 1 << MemoryOperand; + } else if (dstRegisterMask & GeneralRegisterMask) { + *srcRegisterMask = GeneralRegisterMask + | (static_cast(GeneralRegisterMask) << 32); + *tmpTypeMask = 1 << MemoryOperand; } - } else if ((dstRegisterMask & FloatRegisterMask) - and (srcTypeMask & (1 << ConstantOperand))) - { + } + if (dstRegisterMask & FloatRegisterMask) { // can't move directly from constant to FPR - *tmpTypeMask = (1 << MemoryOperand); - *tmpRegisterMask = 0; + *srcTypeMask &= ~(1 << ConstantOperand); + if (size > BytesPerWord) { + *tmpTypeMask = 1 << MemoryOperand; + } else { + *tmpTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *tmpRegisterMask = GeneralRegisterMask + | (static_cast(GeneralRegisterMask) << 32); + } } } } diff --git a/src/x86.h b/src/x86.h index 7dd86c482b..0528d33695 100644 --- a/src/x86.h +++ b/src/x86.h @@ -159,9 +159,11 @@ memoryBarrier() { #ifdef _MSC_VER MemoryBarrier(); -#else - __asm__ __volatile__("": : :"memory"); -#endif +#elif defined ARCH_x86_32 + __asm__ __volatile__("lock; addl $0,0(%%esp)": : :"memory"); +#elif defined ARCH_x86_64 + __asm__ __volatile__("mfence": : :"memory"); +#endif // ARCH_x86_64 } inline void @@ -188,6 +190,56 @@ syncInstructionCache(const void*, unsigned) // ignore } +#ifdef USE_ATOMIC_OPERATIONS +inline bool +atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) +{ +#ifdef _MSC_VER + InterlockedCompareExchange(p, new_, old); +#elif (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) + return __sync_bool_compare_and_swap(p, old, new_); +#else + uint8_t result; + + __asm__ __volatile__("lock; cmpxchgl %2, %0; setz %1" + : "=m"(*p), "=q"(result) + : "r"(new_), "a"(old), "m"(*p) + : "memory"); + + return result != 0; +#endif +} + +inline bool +atomicCompareAndSwap64(uint64_t* p, uint64_t old, uint64_t new_) +{ +#ifdef _MSC_VER + InterlockedCompareExchange64(p, new_, old); +#elif (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) + return __sync_bool_compare_and_swap(p, old, new_); +#else + uint8_t result; + + __asm__ __volatile__("lock; cmpxchgq %2, %0; setz %1" + : "=m"(*p), "=q"(result) + : "r"(new_), "a"(old), "m"(*p) + : "memory"); + + return result != 0; +#endif +} + +inline bool +atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_) +{ +#ifdef ARCH_x86_32 + return atomicCompareAndSwap32(p, old, new_); +#elif defined ARCH_x86_64 + return atomicCompareAndSwap64(p, old, new_); +#endif // ARCH_x86_64 +} +#endif // USE_ATOMIC_OPERATIONS + } // namespace vm #endif//X86_H diff --git a/test/AllFloats.java b/test/AllFloats.java index ca68df4396..0bba388b9b 100644 --- a/test/AllFloats.java +++ b/test/AllFloats.java @@ -19,6 +19,8 @@ public class AllFloats { private static float complex(float a, float b) {return (a - b) / (a * b) + (float)Math.sqrt(a);} private static double complex(double a, double b) {return (a - b) / (a * b) + Math.sqrt(a);} private static double complex(float a, double b) {return (a - b) / (a * b) + Math.sqrt(a);} + private static double sqrt(double a) {return Math.sqrt(a);} + private static float complexNoIntrinsic(float a, float b) {return (a - b) / (a * b) + (float)sqrt(a);} private static int f2i(float a) {return (int)a;} private static long f2l(float a) {return (long)a;} private static float i2f(int a) {return (float)a;} @@ -59,6 +61,7 @@ public class AllFloats { expect(complex(4f, 3f) == (4f-3f)/(4f*3f) + 2f); expect(complex(4d, 3d) == (4d-3d)/(4d*3d) + 2d); expect(complex(4f, 3d) == (4f-3d)/(4f*3d) + 2f); + expect(complexNoIntrinsic(4f, 3f) == (4f-3f)/(4f*3f) + 2f); expect(f2i(4f) == 4); expect(f2l(4f) == 4); diff --git a/test/Floats.java b/test/Floats.java index bd85a74d10..412bd5cd6c 100644 --- a/test/Floats.java +++ b/test/Floats.java @@ -19,6 +19,20 @@ public class Floats { return a - b; } + private double field = 100d; + + private static int doubleToInt(Floats f) { + return (int) f.field; + } + + private static void multiplyAndStore(double a, double b, Floats f) { + f.field = a * b; + } + + private static double loadAndMultiply(double a, Floats f) { + return f.field * a; + } + public static void main(String[] args) { expect(multiply(0.5d, 0.5d) == 0.25d); expect(multiply(0.5f, 0.5f) == 0.25f); @@ -50,10 +64,35 @@ public class Floats { expect(((int) d) == 1); } + { double d = 12345d; + expect(((int) d) == 12345); + } + + expect(doubleToInt(new Floats()) == 100); + + { Floats f = new Floats(); + f.field = 32.0d; + expect(loadAndMultiply(2.0d, f) == 64.0d); + } + + { Floats f = new Floats(); + f.field = 32.0d; + expect(multiply(2.0d, f.field) == 64.0d); + } + + { Floats f = new Floats(); + multiplyAndStore(32.0d, 0.5d, f); + expect(f.field == 16.0d); + } + { float f = 1f; expect(((int) f) == 1); } + { float f = 1f; + expect(((long) f) == 1); + } + expect(Math.round(0.4f) == 0); expect(Math.round(0.5f) == 1); expect(Math.round(1.0f) == 1); @@ -73,5 +112,20 @@ public class Floats { double d = (double) z; expect(d == 6553311036568663.0); } + + { long z = 12345L; + float f = (float) z; + expect(f == 12345.0); + } + + { int z = 12345; + float f = (float) z; + expect(f == 12345.0); + } + + { int z = 12345; + double d = (double) z; + expect(d == 12345.0); + } } }