From cec6444911abb60f83892d6a32885fa61fce8812 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 17 Oct 2009 18:18:03 -0600 Subject: [PATCH] fix bootimage build for case where the JIT code area is too far from the AOT code area to do immediate-offset jumps between them --- makefile | 2 +- src/assembler.h | 8 ++- src/compile.cpp | 149 ++++++++++++++++++++++++++++++++++------------- src/compiler.cpp | 14 ++++- src/compiler.h | 1 + src/x86.cpp | 107 +++++++++++++++++++++++++++++----- 6 files changed, 221 insertions(+), 60 deletions(-) diff --git a/makefile b/makefile index 4a72aa1d7e..a3a9b30f58 100644 --- a/makefile +++ b/makefile @@ -1,4 +1,4 @@ -#MAKEFLAGS = -s +MAKEFLAGS = -s name = avian version = 0.2 diff --git a/src/assembler.h b/src/assembler.h index f135fb7c3b..0334c716ea 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -34,9 +34,11 @@ const unsigned OperationCount = StoreLoadBarrier + 1; enum UnaryOperation { Call, LongCall, + AlignedLongCall, AlignedCall, Jump, LongJump, + AlignedLongJump, AlignedJump, NoUnaryOperation = -1 @@ -308,6 +310,8 @@ class Assembler { virtual bool bigEndian() = 0; + virtual uintptr_t maximumImmediateJump() = 0; + virtual unsigned registerSize(ValueType type) = 0; virtual bool alwaysCondensed(BinaryOperation op) = 0; @@ -324,8 +328,8 @@ class Assembler { virtual bool matchCall(void* returnAddress, void* target) = 0; - virtual void updateCall(UnaryOperation op, bool assertAlignment, - void* returnAddress, void* newTarget) = 0; + virtual void updateCall(UnaryOperation op, void* returnAddress, + void* newTarget) = 0; virtual uintptr_t getConstant(const void* src) = 0; virtual void setConstant(void* dst, uintptr_t constant) = 0; diff --git a/src/compile.cpp b/src/compile.cpp index f623615f4d..a3a93fb8c0 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -617,6 +617,7 @@ class TraceElement: public TraceHandler { public: static const unsigned VirtualCall = 1 << 0; static const unsigned TailCall = 1 << 1; + static const unsigned LongCall = 1 << 2; TraceElement(Context* context, object target, unsigned flags, TraceElement* next): @@ -1876,12 +1877,18 @@ defaultThunk(MyThread* t); uintptr_t nativeThunk(MyThread* t); +uintptr_t +bootNativeThunk(MyThread* t); + uintptr_t aioobThunk(MyThread* t); uintptr_t virtualThunk(MyThread* t, unsigned index); +bool +unresolved(MyThread* t, uintptr_t methodAddress); + uintptr_t methodAddress(Thread* t, object method) { @@ -1912,7 +1919,7 @@ findInterfaceMethodFromInstance(MyThread* t, object method, object instance) if (instance) { object target = findInterfaceMethod(t, method, objectClass(t, instance)); - if (methodAddress(t, target) == defaultThunk(t)) { + if (unresolved(t, methodAddress(t, target))) { PROTECT(t, target); compile(t, codeAllocator(t), 0, target); @@ -2478,24 +2485,51 @@ operandTypeForFieldCode(Thread* t, unsigned code) } } +bool +useLongJump(MyThread* t, uintptr_t target) +{ + uintptr_t reach = t->arch->maximumImmediateJump(); + FixedAllocator* a = codeAllocator(t); + uintptr_t start = reinterpret_cast(a->base); + uintptr_t end = reinterpret_cast(a->base) + a->capacity; + assert(t, end - start < reach); + + return (target > end && (target - start) > reach) + or (target < start && (end - target) > reach); +} + Compiler::Operand* compileDirectInvoke(MyThread* t, Frame* frame, object target, bool tailCall, bool useThunk, unsigned rSize, Promise* addressPromise) { Compiler* c = frame->c; - unsigned flags = (tailCall ? Compiler::TailJump : 0); + unsigned flags = (TailCalls and tailCall ? Compiler::TailJump : 0); + unsigned traceFlags; + + if (addressPromise == 0 and useLongJump(t, methodAddress(t, target))) { + flags |= Compiler::LongJumpOrCall; + traceFlags = TraceElement::LongCall; + } else { + traceFlags = 0; + } + + if (useThunk + or (TailCalls and tailCall and (methodFlags(t, target) & ACC_NATIVE))) + { + flags |= Compiler::Aligned; - if (useThunk or (tailCall and (methodFlags(t, target) & ACC_NATIVE))) { if (TailCalls and tailCall) { - TraceElement* trace = frame->trace(target, TraceElement::TailCall); + traceFlags |= TraceElement::TailCall; + + TraceElement* trace = frame->trace(target, traceFlags); Compiler::Operand* returnAddress = c->promiseConstant (new (frame->context->zone.allocate(sizeof(TraceElementPromise))) TraceElementPromise(t->m->system, trace), Compiler::AddressType); Compiler::Operand* result = c->stackCall (returnAddress, - flags | Compiler::Aligned, + flags, trace, rSize, operandTypeForFieldCode(t, methodReturnCode(t, target)), @@ -2506,18 +2540,18 @@ compileDirectInvoke(MyThread* t, Frame* frame, object target, bool tailCall, (c->register_(t->arch->thread()), Compiler::AddressType, difference(&(t->tailAddress), t))); - if (methodFlags(t, target) & ACC_NATIVE) { - c->exit(c->constant(nativeThunk(t), Compiler::AddressType)); - } else { - c->exit(c->constant(defaultThunk(t), Compiler::AddressType)); - } + c->exit + (c->constant + ((methodFlags(t, target) & ACC_NATIVE) + ? nativeThunk(t) : defaultThunk(t), + Compiler::AddressType)); return result; } else { return c->stackCall (c->constant(defaultThunk(t), Compiler::AddressType), - flags | Compiler::Aligned, - frame->trace(target, 0), + flags, + frame->trace(target, traceFlags), rSize, operandTypeForFieldCode(t, methodReturnCode(t, target)), methodParameterFootprint(t, target)); @@ -2567,7 +2601,7 @@ compileDirectInvoke(MyThread* t, Frame* frame, object target, bool tailCall) result = compileDirectInvoke (t, frame, target, tailCall, true, rSize, 0); } - } else if (methodAddress(t, target) == defaultThunk(t) + } else if (unresolved(t, methodAddress(t, target)) or classNeedsInit(t, methodClass(t, target))) { result = compileDirectInvoke @@ -5607,10 +5641,9 @@ compile(MyThread* t, Allocator* allocator, Context* context) } void -updateCall(MyThread* t, UnaryOperation op, bool assertAlignment, - void* returnAddress, void* target) +updateCall(MyThread* t, UnaryOperation op, void* returnAddress, void* target) { - t->arch->updateCall(op, assertAlignment, returnAddress, target); + t->arch->updateCall(op, returnAddress, target); } void* @@ -5633,13 +5666,32 @@ compileMethod2(MyThread* t, void* ip) if (UNLIKELY(t->exception)) { return 0; } else { - void* address = reinterpret_cast(methodAddress(t, target)); + uintptr_t address; + if ((methodFlags(t, target) & ACC_NATIVE) + and useLongJump(t, reinterpret_cast(ip))) + { + address = bootNativeThunk(t); + } else { + address = methodAddress(t, target); + } uint8_t* updateIp = static_cast(ip); - - updateCall(t, (callNodeFlags(t, node) & TraceElement::TailCall) - ? Jump : Call, true, updateIp, address); - return address; + UnaryOperation op; + if (callNodeFlags(t, node) & TraceElement::LongCall) { + if (callNodeFlags(t, node) & TraceElement::TailCall) { + op = AlignedLongJump; + } else { + op = AlignedLongCall; + } + } else if (callNodeFlags(t, node) & TraceElement::TailCall) { + op = AlignedJump; + } else { + op = AlignedCall; + } + + updateCall(t, op, updateIp, reinterpret_cast(address)); + + return reinterpret_cast(address); } } @@ -5730,7 +5782,7 @@ resolveNative(MyThread* t, object method) initClass(t, methodClass(t, method)); if (LIKELY(t->exception == 0) - and methodCompiled(t, method) == defaultThunk(t)) + and unresolved(t, methodCompiled(t, method))) { void* function = resolveNativeMethod(t, method); if (UNLIKELY(function == 0)) { @@ -6817,8 +6869,10 @@ class MyProcessor: public Processor { s(s), allocator(allocator), defaultThunk(0), + bootDefaultThunk(0), defaultVirtualThunk(0), nativeThunk(0), + bootNativeThunk(0), aioobThunk(0), callTable(0), methodTree(0), @@ -7243,10 +7297,10 @@ class MyProcessor: public Processor { methodTree = methodTreeSentinal = makeTreeNode(t, 0, 0, 0); set(t, methodTree, TreeNodeLeft, methodTreeSentinal); set(t, methodTree, TreeNodeRight, methodTreeSentinal); - - local::compileThunks(static_cast(t), &codeAllocator, this); } + local::compileThunks(static_cast(t), &codeAllocator, this); + segFaultHandler.m = t->m; expect(t, t->m->system->success (t->m->system->handleSegFault(&segFaultHandler))); @@ -7303,8 +7357,10 @@ class MyProcessor: public Processor { System* s; Allocator* allocator; uint8_t* defaultThunk; + uint8_t* bootDefaultThunk; uint8_t* defaultVirtualThunk; uint8_t* nativeThunk; + uint8_t* bootNativeThunk; uint8_t* aioobThunk; uint8_t* thunkTable; object callTable; @@ -7340,8 +7396,7 @@ findCallNode(MyThread* t, void* address) object table = p->callTable; intptr_t key = reinterpret_cast(address); - unsigned index = static_cast(key) - & (arrayLength(t, table) - 1); + unsigned index = static_cast(key) & (arrayLength(t, table) - 1); for (object n = arrayBody(t, table, index); n; n = callNodeNext(t, n)) @@ -7592,32 +7647,23 @@ fixupThunks(MyThread* t, BootImage* image, uint8_t* code) { MyProcessor* p = processor(t); - p->defaultThunk = code + image->defaultThunk; + p->bootDefaultThunk = code + image->defaultThunk; + p->bootNativeThunk = code + image->nativeThunk; - updateCall(t, LongCall, false, code + image->compileMethodCall, + updateCall(t, LongCall, code + image->compileMethodCall, voidPointer(local::compileMethod)); - p->defaultVirtualThunk = code + image->defaultVirtualThunk; - - updateCall(t, LongCall, false, code + image->compileVirtualMethodCall, + updateCall(t, LongCall, code + image->compileVirtualMethodCall, voidPointer(local::compileVirtualMethod)); - p->nativeThunk = code + image->nativeThunk; - - updateCall(t, LongCall, false, code + image->invokeNativeCall, + updateCall(t, LongCall, code + image->invokeNativeCall, voidPointer(invokeNative)); - p->aioobThunk = code + image->aioobThunk; - - updateCall(t, LongCall, false, - code + image->throwArrayIndexOutOfBoundsCall, + updateCall(t, LongCall, code + image->throwArrayIndexOutOfBoundsCall, voidPointer(throwArrayIndexOutOfBounds)); - p->thunkTable = code + image->thunkTable; - p->thunkSize = image->thunkSize; - #define THUNK(s) \ - updateCall(t, LongJump, false, code + image->s##Call, voidPointer(s)); + updateCall(t, LongJump, code + image->s##Call, voidPointer(s)); #include "thunks.cpp" @@ -7957,6 +8003,12 @@ defaultThunk(MyThread* t) return reinterpret_cast(processor(t)->defaultThunk); } +uintptr_t +bootDefaultThunk(MyThread* t) +{ + return reinterpret_cast(processor(t)->bootDefaultThunk); +} + uintptr_t defaultVirtualThunk(MyThread* t) { @@ -7969,12 +8021,25 @@ nativeThunk(MyThread* t) return reinterpret_cast(processor(t)->nativeThunk); } +uintptr_t +bootNativeThunk(MyThread* t) +{ + return reinterpret_cast(processor(t)->bootNativeThunk); +} + uintptr_t aioobThunk(MyThread* t) { return reinterpret_cast(processor(t)->aioobThunk); } +bool +unresolved(MyThread* t, uintptr_t methodAddress) +{ + return methodAddress == defaultThunk(t) + or methodAddress == bootDefaultThunk(t); +} + uintptr_t compileVirtualThunk(MyThread* t, unsigned index) { diff --git a/src/compiler.cpp b/src/compiler.cpp index 9c1e989923..c832ebb28d 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -2957,7 +2957,13 @@ class CallEvent: public Event { UnaryOperation op; if (TailCalls and (flags & Compiler::TailJump)) { - if (flags & Compiler::Aligned) { + if (flags & Compiler::LongJumpOrCall) { + if (flags & Compiler::Aligned) { + op = AlignedLongJump; + } else { + op = LongJump; + } + } else if (flags & Compiler::Aligned) { op = AlignedJump; } else { op = Jump; @@ -2993,6 +2999,12 @@ class CallEvent: public Event { - static_cast(c->arch->argumentFootprint(c->parameterFootprint)); c->assembler->popFrameForTailCall(c->alignedFrameSize, offset, ras, fps); + } else if (flags & Compiler::LongJumpOrCall) { + if (flags & Compiler::Aligned) { + op = AlignedLongCall; + } else { + op = LongCall; + } } else if (flags & Compiler::Aligned) { op = AlignedCall; } else { diff --git a/src/compiler.h b/src/compiler.h index 2a1b197cf4..71d53d6622 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -36,6 +36,7 @@ class Compiler { static const unsigned Aligned = 1 << 0; static const unsigned NoReturn = 1 << 1; static const unsigned TailJump = 1 << 2; + static const unsigned LongJumpOrCall = 1 << 3; enum OperandType { ObjectType, diff --git a/src/x86.cpp b/src/x86.cpp index 9d49ad9392..71287544a7 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -67,6 +67,8 @@ const unsigned FloatRegisterMask const unsigned FrameHeaderSize = 2; +const int LongJumpRegister = r10; + const unsigned StackAlignmentInBytes = 16; const unsigned StackAlignmentInWords = StackAlignmentInBytes / BytesPerWord; @@ -410,7 +412,12 @@ appendImmediateTask(Context* c, Promise* promise, Promise* offset, class AlignmentPadding { public: - AlignmentPadding(Context* c): offset(c->code.length()), next(0) { + AlignmentPadding(Context* c, unsigned instructionOffset, unsigned alignment): + offset(c->code.length()), + instructionOffset(instructionOffset), + alignment(alignment), + next(0) + { if (c->lastBlock->firstPadding) { c->lastBlock->lastPadding->next = this; } else { @@ -420,6 +427,8 @@ class AlignmentPadding { } unsigned offset; + unsigned instructionOffset; + unsigned alignment; AlignmentPadding* next; }; @@ -432,7 +441,7 @@ padding(AlignmentPadding* p, unsigned start, unsigned offset, unsigned index = 0; for (; p; p = p->next) { index = p->offset - offset; - while ((start + index + padding + 1) % 4) { + while ((start + index + padding + p->instructionOffset) % p->alignment) { ++ padding; } @@ -448,7 +457,10 @@ detectFeature(unsigned ecx, unsigned edx); bool useSSE(ArchitectureContext* c) { - if (c->useNativeFeatures) { + if (BytesPerWord == 8) { + // amd64 implies SSE2 support + return true; + } else if (c->useNativeFeatures) { static int supported = -1; if (supported == -1) { supported = detectFeature(0, 0x2000000) // SSE 1 @@ -703,7 +715,7 @@ longCallC(Context* c, unsigned size, Assembler::Constant* a) assert(c, size == BytesPerWord); if (BytesPerWord == 8) { - Assembler::Register r(r10); + Assembler::Register r(LongJumpRegister); moveCR2(c, size, a, size, &r, 11); callR(c, size, &r); } else { @@ -744,7 +756,7 @@ longJumpC(Context* c, unsigned size, Assembler::Constant* a) assert(c, size == BytesPerWord); if (BytesPerWord == 8) { - Assembler::Register r(r10); + Assembler::Register r(LongJumpRegister); moveCR2(c, size, a, size, &r, 11); jumpR(c, size, &r); } else { @@ -775,17 +787,45 @@ callM(Context* c, unsigned size UNUSED, Assembler::Memory* a) void alignedCallC(Context* c, unsigned size, Assembler::Constant* a) { - new (c->zone->allocate(sizeof(AlignmentPadding))) AlignmentPadding(c); + new (c->zone->allocate(sizeof(AlignmentPadding))) AlignmentPadding(c, 1, 4); callC(c, size, a); } +void +alignedLongCallC(Context* c, unsigned size, Assembler::Constant* a) +{ + assert(c, size == BytesPerWord); + + if (BytesPerWord == 8) { + new (c->zone->allocate(sizeof(AlignmentPadding))) + AlignmentPadding(c, 2, 8); + longCallC(c, size, a); + } else { + alignedCallC(c, size, a); + } +} + void alignedJumpC(Context* c, unsigned size, Assembler::Constant* a) { - new (c->zone->allocate(sizeof(AlignmentPadding))) AlignmentPadding(c); + new (c->zone->allocate(sizeof(AlignmentPadding))) AlignmentPadding(c, 1, 4); jumpC(c, size, a); } +void +alignedLongJumpC(Context* c, unsigned size, Assembler::Constant* a) +{ + assert(c, size == BytesPerWord); + + if (BytesPerWord == 8) { + new (c->zone->allocate(sizeof(AlignmentPadding))) + AlignmentPadding(c, 2, 8); + longJumpC(c, size, a); + } else { + alignedJumpC(c, size, a); + } +} + void pushR(Context* c, unsigned size, Assembler::Register* a) { @@ -1054,8 +1094,6 @@ void sseMoveMR(Context* c, unsigned aSize, Assembler::Memory* a, unsigned bSize UNUSED, Assembler::Register* b) { - assert(c, aSize == bSize); - if (BytesPerWord == 4 and aSize == 8) { opcode(c, 0xf3); opcode(c, 0x0f, 0x7e); @@ -2472,6 +2510,8 @@ populateTables(ArchitectureContext* c) uo[index(c, LongCall, C)] = CAST1(longCallC); + uo[index(c, AlignedLongCall, C)] = CAST1(alignedLongCallC); + uo[index(c, Jump, R)] = CAST1(jumpR); uo[index(c, Jump, C)] = CAST1(jumpC); uo[index(c, Jump, M)] = CAST1(jumpM); @@ -2480,6 +2520,8 @@ populateTables(ArchitectureContext* c) uo[index(c, LongJump, C)] = CAST1(longJumpC); + uo[index(c, AlignedLongJump, C)] = CAST1(alignedLongJumpC); + bo[index(c, Negate, R, R)] = CAST2(negateRR); bo[index(c, FloatNegate, R, R)] = CAST2(floatNegateRR); @@ -2610,6 +2652,10 @@ class MyArchitecture: public Assembler::Architecture { return false; } + virtual uintptr_t maximumImmediateJump() { + return 0x7FFFFFFF; + } + virtual unsigned registerSize(ValueType type) { switch (type) { case ValueGeneral: return BytesPerWord; @@ -2696,9 +2742,35 @@ class MyArchitecture: public Assembler::Architecture { return *instruction == 0xE8 and actualTarget == target; } - virtual void updateCall(UnaryOperation op, bool assertAlignment UNUSED, - void* returnAddress, void* newTarget) + virtual void updateCall(UnaryOperation op, void* returnAddress, + void* newTarget) { + bool assertAlignment; + switch (op) { + case AlignedCall: + op = Call; + assertAlignment = true; + break; + + case AlignedJump: + op = Jump; + assertAlignment = true; + break; + + case AlignedLongCall: + op = LongCall; + assertAlignment = true; + break; + + case AlignedLongJump: + op = LongJump; + assertAlignment = true; + break; + + default: + assertAlignment = false; + } + if (BytesPerWord == 4 or op == Call or op == Jump) { uint8_t* instruction = static_cast(returnAddress) - 5; @@ -2708,9 +2780,14 @@ class MyArchitecture: public Assembler::Architecture { assert(&c, (not assertAlignment) or reinterpret_cast(instruction + 1) % 4 == 0); - int32_t v = static_cast(newTarget) + intptr_t v = static_cast(newTarget) - static_cast(returnAddress); - memcpy(instruction + 1, &v, 4); + + assert(&c, isInt32(v)); + + int32_t v32 = v; + + memcpy(instruction + 1, &v32, 4); } else { uint8_t* instruction = static_cast(returnAddress) - 13; @@ -3422,7 +3499,9 @@ class MyAssembler: public Assembler { index += size; - while ((b->start + index + padding + 1) % 4) { + while ((b->start + index + padding + p->instructionOffset) + % p->alignment) + { *(dst + b->start + index + padding) = 0x90; ++ padding; }