diff --git a/src/assembler.h b/src/assembler.h index e208a5bc6e..0075ed1c8e 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -264,6 +264,8 @@ class Assembler { virtual int thread() = 0; virtual int returnLow() = 0; virtual int returnHigh() = 0; + virtual int virtualCallClass() = 0; + virtual int virtualCallIndex() = 0; virtual bool condensedAddressing() = 0; @@ -291,6 +293,8 @@ class Assembler { virtual unsigned frameHeaderSize() = 0; virtual unsigned frameReturnAddressSize() = 0; virtual unsigned frameFooterSize() = 0; + virtual unsigned returnAddressOffset() = 0; + virtual unsigned framePointerOffset() = 0; virtual void nextFrame(void** stack, void** base) = 0; virtual void plan @@ -319,12 +323,16 @@ class Assembler { virtual Architecture* arch() = 0; - virtual void popReturnAddress(unsigned addressOffset) = 0; virtual void saveFrame(unsigned stackOffset, unsigned baseOffset) = 0; virtual void restoreFrame(unsigned stackOffset, unsigned baseOffset) = 0; virtual void pushFrame(unsigned argumentCount, ...) = 0; virtual void allocateFrame(unsigned footprint) = 0; virtual void popFrame() = 0; + virtual void popFrameForTailCall(unsigned footprint, int offset, + int returnAddressSurrogate, + int framePointerSurrogate) = 0; + virtual void popFrameAndPopArgumentsAndReturn(unsigned argumentFootprint) + = 0; virtual void apply(Operation op) = 0; diff --git a/src/bootimage.h b/src/bootimage.h index c8df51c0c6..4472ac28ae 100644 --- a/src/bootimage.h +++ b/src/bootimage.h @@ -44,11 +44,8 @@ class BootImage { uintptr_t codeBase; unsigned defaultThunk; - unsigned defaultTailThunk; unsigned defaultVirtualThunk; - unsigned tailHelperThunk; unsigned nativeThunk; - unsigned nativeTailThunk; unsigned aioobThunk; unsigned thunkTable; @@ -56,7 +53,6 @@ class BootImage { unsigned compileMethodCall; unsigned compileVirtualMethodCall; - unsigned tailCallCall; unsigned invokeNativeCall; unsigned throwArrayIndexOutOfBoundsCall; diff --git a/src/common.h b/src/common.h index 0b5fc040a3..8ab02e4041 100644 --- a/src/common.h +++ b/src/common.h @@ -103,12 +103,19 @@ avg(unsigned a, unsigned b) return (a + b) / 2; } +inline unsigned +pad(unsigned n, unsigned alignment) +{ + return (n + (alignment - 1)) & ~(alignment - 1); +} + inline unsigned pad(unsigned n) { - return (n + (BytesPerWord - 1)) & ~(BytesPerWord - 1); + return pad(n, BytesPerWord); } + inline unsigned ceiling(unsigned n, unsigned d) { diff --git a/src/compile.cpp b/src/compile.cpp index 979463fc7a..881ff5d30b 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -27,7 +27,7 @@ vmCall(); namespace { -const bool DebugCompile = false; +const bool DebugCompile = true; const bool DebugNatives = false; const bool DebugCallTable = false; const bool DebugMethodTree = false; @@ -1358,18 +1358,9 @@ objectPools(MyThread* t); uintptr_t defaultThunk(MyThread* t); -uintptr_t -defaultTailThunk(MyThread* t); - uintptr_t nativeThunk(MyThread* t); -uintptr_t -nativeTailThunk(MyThread* t); - -uintptr_t -tailHelperThunk(MyThread* t); - uintptr_t aioobThunk(MyThread* t); @@ -1940,65 +1931,54 @@ compileDirectInvoke(MyThread* t, Frame* frame, object target, bool tailCall, { Compiler* c = frame->c; - if (tailCall and methodParameterFootprint(t, target) - > methodParameterFootprint(t, frame->context->method)) - { - return c->stackCall - (c->constant(tailHelperThunk(t)), - 0, - frame->trace(target, 0), - rSize, - methodParameterFootprint(t, target)); - } else if (tailCall and (methodFlags(t, target) & ACC_NATIVE)) { - return c->stackCall - (c->constant(nativeTailThunk(t)), - Compiler::TailCall, - frame->trace(target, TraceElement::TailCall), - rSize, - methodParameterFootprint(t, target)); - } else { - unsigned flags = (tailCall ? Compiler::TailJump : 0); + unsigned flags = (tailCall ? Compiler::TailJump : 0); - if (useThunk) { - if (tailCall) { - Compiler::Operand* result = c->stackCall - (c->promiseConstant - (new (frame->context->zone.allocate(sizeof(TraceElementPromise))) - TraceElementPromise(t->m->system, frame->trace(0, 0))), - flags | Compiler::Aligned, - 0, - rSize, - methodParameterFootprint(t, target)); + if (useThunk or (tailCall and (methodFlags(t, target) & ACC_NATIVE))) { + if (tailCall) { + TraceElement* trace = frame->trace(target, TraceElement::TailCall); + Compiler::Operand* returnAddress = c->promiseConstant + (new (frame->context->zone.allocate(sizeof(TraceElementPromise))) + TraceElementPromise(t->m->system, trace));; - c->call(c->constant(defaultTailThunk(t)), - 0, - frame->trace(target, TraceElement::TailCall), - 0, - 0); + Compiler::Operand* result = c->stackCall + (returnAddress, + flags | Compiler::Aligned, + 0, + rSize, + methodParameterFootprint(t, target)); - return result; + c->store(BytesPerWord, returnAddress, BytesPerWord, + c->memory(c->register_(t->arch->thread()), + difference(&(t->tailAddress), t))); + + if (methodFlags(t, target) & ACC_NATIVE) { + c->jmp(c->constant(nativeThunk(t))); } else { - return c->stackCall - (c->constant(defaultThunk(t)), - flags | Compiler::Aligned, - frame->trace(target, 0), - rSize, - methodParameterFootprint(t, target)); + c->jmp(c->constant(defaultThunk(t))); } - } else { - Compiler::Operand* address = - (addressPromise - ? c->promiseConstant(addressPromise) - : c->constant(methodAddress(t, target))); + return result; + } else { return c->stackCall - (address, - flags, - tailCall ? 0 : frame->trace - ((methodFlags(t, target) & ACC_NATIVE) ? target : 0, 0), + (c->constant(defaultThunk(t)), + flags | Compiler::Aligned, + frame->trace(target, 0), rSize, methodParameterFootprint(t, target)); } + } else { + Compiler::Operand* address = + (addressPromise + ? c->promiseConstant(addressPromise) + : c->constant(methodAddress(t, target))); + + return c->stackCall + (address, + flags, + tailCall ? 0 : frame->trace + ((methodFlags(t, target) & ACC_NATIVE) ? target : 0, 0), + rSize, + methodParameterFootprint(t, target)); } } @@ -2225,6 +2205,7 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, Frame* frame = &myFrame; Compiler* c = frame->c; Context* context = frame->context; + bool tailCall = false; object code = methodCode(t, context->method); PROTECT(t, code); @@ -2422,8 +2403,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, } break; case areturn: { - handleExit(t, frame); - c->return_(BytesPerWord, frame->popObject()); + Compiler::Operand* value = frame->popObject(); + if (not tailCall) { + handleExit(t, frame); + c->return_(BytesPerWord, value); + } } return; case arraylength: { @@ -3171,8 +3155,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, assert(t, (methodFlags(t, target) & ACC_STATIC) == 0); - compileDirectInvoke - (t, frame, target, isTailCall(t, code, ip, context->method, target)); + tailCall = isTailCall(t, code, ip, context->method, target); + + compileDirectInvoke(t, frame, target, tailCall); } break; case invokestatic: { @@ -3183,8 +3168,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, assert(t, methodFlags(t, target) & ACC_STATIC); - compileDirectInvoke - (t, frame, target, isTailCall(t, code, ip, context->method, target)); + tailCall = isTailCall(t, code, ip, context->method, target); + + compileDirectInvoke(t, frame, target, tailCall); } break; case invokevirtual: { @@ -3203,32 +3189,22 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, unsigned rSize = resultSize(t, methodReturnCode(t, target)); - bool tailCall = isTailCall(t, code, ip, context->method, target); + tailCall = isTailCall(t, code, ip, context->method, target); - Compiler::Operand* result; - if (tailCall and methodParameterFootprint(t, target) - > methodParameterFootprint(t, context->method)) - { - result = c->stackCall - (c->constant(tailHelperThunk(t)), - 0, - frame->trace(target, TraceElement::VirtualCall), - rSize, - parameterFootprint); - } else { - c->freezeRegister(t->arch->returnLow(), - c->and_(BytesPerWord, c->constant(PointerMask), - c->memory(instance, 0, 0, 1))); + Compiler::Operand* classOperand = c->and_ + (BytesPerWord, c->constant(PointerMask), + c->memory(instance, 0, 0, 1)); - result = c->stackCall - (c->memory(c->register_(t->arch->returnLow()), offset, 0, 1), - tailCall ? Compiler::TailCall : 0, - frame->trace(0, 0), - rSize, - parameterFootprint); + c->freezeRegister(t->arch->virtualCallClass(), classOperand); - c->thawRegister(t->arch->returnLow()); - } + Compiler::Operand* result = c->stackCall + (c->memory(classOperand, offset, 0, 1), + tailCall ? Compiler::TailCall : 0, + frame->trace(0, 0), + rSize, + parameterFootprint); + + c->thawRegister(t->arch->virtualCallClass()); frame->pop(parameterFootprint); @@ -3251,8 +3227,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, case ireturn: case freturn: { - handleExit(t, frame); - c->return_(4, frame->popInt()); + Compiler::Operand* value = frame->popInt(); + if (not tailCall) { + handleExit(t, frame); + c->return_(4, value); + } } return; case ishl: { @@ -3545,8 +3524,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, case lreturn: case dreturn: { - handleExit(t, frame); - c->return_(8, frame->popLong()); + Compiler::Operand* value = frame->popLong(); + if (not tailCall) { + handleExit(t, frame); + c->return_(8, value); + } } return; case lshl: { @@ -3841,12 +3823,14 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, return; case return_: - if (needsReturnBarrier(t, context->method)) { - c->storeStoreBarrier(); - } + if (not tailCall) { + if (needsReturnBarrier(t, context->method)) { + c->storeStoreBarrier(); + } - handleExit(t, frame); - c->return_(0, 0); + handleExit(t, frame); + c->return_(0, 0); + } return; case sipush: @@ -4680,66 +4664,6 @@ compileVirtualMethod(MyThread* t) } } -void* -tailCall2(MyThread* t, void* ip) -{ - object node = findCallNode(t, ip); - PROTECT(t, node); - - object target = callNodeTarget(t, node); - PROTECT(t, target); - - if (callNodeFlags(t, node) & TraceElement::VirtualCall) { - target = resolveTarget(t, t->stack, target); - } - - if (LIKELY(t->exception == 0)) { - compile(t, codeAllocator(t), 0, target); - } - - if (UNLIKELY(t->exception)) { - return 0; - } else { - void* base = t->base; - void* stack = t->stack; - t->arch->nextFrame(&stack, &base); - - if (t->arch->matchCall(t->arch->frameIp(stack), - reinterpret_cast(tailHelperThunk(t)))) - { - void* nextBase = base; - void* nextStack = stack; - t->arch->nextFrame(&nextStack, &nextBase); - - if (((reinterpret_cast(nextStack) - - reinterpret_cast(stack)) - / BytesPerWord) - - t->arch->frameFooterSize() - - t->arch->frameHeaderSize() - >= methodParameterFootprint(t, target)) - { - // there's room for the parameters in the previous frame, so use it - t->base = base; - t->stack = stack; - } - } - - return reinterpret_cast(methodAddress(t, target)); - } -} - -uint64_t -tailCall(MyThread* t) -{ - void* r = tailCall2(t, t->arch->frameIp(t->stack)); - - if (UNLIKELY(t->exception)) { - unwind(t); - } else { - return reinterpret_cast(r); - } -} - uint64_t invokeNative2(MyThread* t, object method) { @@ -5304,11 +5228,8 @@ class MyProcessor: public Processor { s(s), allocator(allocator), defaultThunk(0), - defaultTailThunk(0), defaultVirtualThunk(0), - tailHelperThunk(0), nativeThunk(0), - nativeTailThunk(0), aioobThunk(0), callTable(0), callTableSize(0), @@ -5318,9 +5239,7 @@ class MyProcessor: public Processor { staticTableArray(0), virtualThunks(0), codeAllocator(s, 0, 0) - { - expect(s, codeAllocator.base); - } + { } virtual Thread* makeThread(Machine* m, object javaThread, Thread* parent) @@ -5377,7 +5296,7 @@ class MyProcessor: public Processor { virtual void initVtable(Thread* t, object c) { - for (unsigned i = 0; i < classLength(t, c); ++i) { + for (int i = classLength(t, c) - 1; i >= 0; --i) { classVtable(t, c, i) = reinterpret_cast (virtualThunk(static_cast(t), i)); } @@ -5715,11 +5634,8 @@ class MyProcessor: public Processor { System* s; Allocator* allocator; uint8_t* defaultThunk; - uint8_t* defaultTailThunk; uint8_t* defaultVirtualThunk; - uint8_t* tailHelperThunk; uint8_t* nativeThunk; - uint8_t* nativeTailThunk; uint8_t* aioobThunk; uint8_t* thunkTable; unsigned thunkSize; @@ -6003,7 +5919,6 @@ fixupThunks(MyThread* t, BootImage* image, uint8_t* code) MyProcessor* p = processor(t); p->defaultThunk = code + image->defaultThunk; - p->defaultTailThunk = code + image->defaultTailThunk; updateCall(t, LongCall, false, code + image->compileMethodCall, voidPointer(::compileMethod)); @@ -6013,13 +5928,7 @@ fixupThunks(MyThread* t, BootImage* image, uint8_t* code) updateCall(t, LongCall, false, code + image->compileVirtualMethodCall, voidPointer(::compileVirtualMethod)); - p->tailHelperThunk = code + image->tailHelperThunk; - - updateCall(t, LongCall, false, code + image->tailCallCall, - voidPointer(::tailCall)); - p->nativeThunk = code + image->nativeThunk; - p->nativeTailThunk = code + image->nativeTailThunk; updateCall(t, LongCall, false, code + image->invokeNativeCall, voidPointer(invokeNative)); @@ -6145,13 +6054,8 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p, Zone zone(t->m->system, t->m->heap, 1024); ThunkContext defaultContext(t, &zone); - unsigned defaultTailOffset; { Assembler* a = defaultContext.context.assembler; - - a->popReturnAddress(difference(&(t->tailAddress), t)); - - defaultTailOffset = a->length(); a->saveFrame(difference(&(t->stack), t), difference(&(t->base), t)); @@ -6173,13 +6077,13 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p, { Assembler* a = defaultVirtualContext.context.assembler; - Assembler::Register class_(t->arch->returnLow()); + Assembler::Register class_(t->arch->virtualCallClass()); Assembler::Memory virtualCallClass (t->arch->thread(), difference(&(t->virtualCallClass), t)); a->apply(Move, BytesPerWord, RegisterOperand, &class_, BytesPerWord, MemoryOperand, &virtualCallClass); - Assembler::Register index(t->arch->returnHigh()); + Assembler::Register index(t->arch->virtualCallIndex()); Assembler::Memory virtualCallIndex (t->arch->thread(), difference(&(t->virtualCallIndex), t)); a->apply(Move, BytesPerWord, RegisterOperand, &index, @@ -6201,35 +6105,10 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p, a->endBlock(false)->resolve(0, 0); } - ThunkContext tailHelperContext(t, &zone); - - { Assembler* a = tailHelperContext.context.assembler; - - a->saveFrame(difference(&(t->stack), t), difference(&(t->base), t)); - - Assembler::Register thread(t->arch->thread()); - a->pushFrame(1, BytesPerWord, RegisterOperand, &thread); - - Assembler::Constant proc(&(tailHelperContext.promise)); - a->apply(LongCall, BytesPerWord, ConstantOperand, &proc); - - a->restoreFrame(difference(&(t->stack), t), difference(&(t->base), t)); - - Assembler::Register result(t->arch->returnLow()); - a->apply(Jump, BytesPerWord, RegisterOperand, &result); - - a->endBlock(false)->resolve(0, 0); - } - ThunkContext nativeContext(t, &zone); - unsigned nativeTailOffset; { Assembler* a = nativeContext.context.assembler; - a->popReturnAddress(difference(&(t->tailAddress), t)); - - nativeTailOffset = a->length(); - a->saveFrame(difference(&(t->stack), t), difference(&(t->base), t)); Assembler::Register thread(t->arch->thread()); @@ -6274,17 +6153,14 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p, p->thunkSize = pad(tableContext.context.assembler->length()); - p->defaultTailThunk = finish + p->defaultThunk = finish (t, allocator, defaultContext.context.assembler, "default"); - p->defaultThunk = p->defaultTailThunk + defaultTailOffset; - { void* call; defaultContext.promise.listener->resolve (reinterpret_cast(voidPointer(compileMethod)), &call); if (image) { - image->defaultTailThunk = p->defaultTailThunk - imageBase; image->defaultThunk = p->defaultThunk - imageBase; image->compileMethodCall = static_cast(call) - imageBase; } @@ -6304,30 +6180,14 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p, } } - p->tailHelperThunk = finish - (t, allocator, defaultContext.context.assembler, "tailHelper"); - - { void* call; - tailHelperContext.promise.listener->resolve - (reinterpret_cast(voidPointer(tailCall)), &call); - - if (image) { - image->tailHelperThunk = p->tailHelperThunk - imageBase; - image->tailCallCall = static_cast(call) - imageBase; - } - } - - p->nativeTailThunk = finish + p->nativeThunk = finish (t, allocator, nativeContext.context.assembler, "native"); - p->nativeThunk = p->nativeTailThunk + nativeTailOffset; - { void* call; nativeContext.promise.listener->resolve (reinterpret_cast(voidPointer(invokeNative)), &call); if (image) { - image->nativeTailThunk = p->nativeTailThunk - imageBase; image->nativeThunk = p->nativeThunk - imageBase; image->invokeNativeCall = static_cast(call) - imageBase; } @@ -6394,36 +6254,18 @@ defaultThunk(MyThread* t) return reinterpret_cast(processor(t)->defaultThunk); } -uintptr_t -defaultTailThunk(MyThread* t) -{ - return reinterpret_cast(processor(t)->defaultTailThunk); -} - uintptr_t defaultVirtualThunk(MyThread* t) { return reinterpret_cast(processor(t)->defaultVirtualThunk); } -uintptr_t -tailHelperThunk(MyThread* t) -{ - return reinterpret_cast(processor(t)->tailHelperThunk); -} - uintptr_t nativeThunk(MyThread* t) { return reinterpret_cast(processor(t)->nativeThunk); } -uintptr_t -nativeTailThunk(MyThread* t) -{ - return reinterpret_cast(processor(t)->nativeTailThunk); -} - uintptr_t aioobThunk(MyThread* t) { @@ -6438,19 +6280,23 @@ compileVirtualThunk(MyThread* t, unsigned index) ResolvedPromise indexPromise(index); Assembler::Constant indexConstant(&indexPromise); - Assembler::Register indexRegister(t->arch->returnHigh()); + Assembler::Register indexRegister(t->arch->virtualCallIndex()); a->apply(Move, BytesPerWord, ConstantOperand, &indexConstant, - BytesPerWord, ConstantOperand, &indexRegister); + BytesPerWord, RegisterOperand, &indexRegister); ResolvedPromise defaultVirtualThunkPromise(defaultVirtualThunk(t)); Assembler::Constant thunk(&defaultVirtualThunkPromise); a->apply(Jump, BytesPerWord, ConstantOperand, &thunk); + a->endBlock(false)->resolve(0, 0); + uint8_t* start = static_cast (codeAllocator(t)->allocate(a->length())); a->writeTo(start); + logCompile(t, start, a->length(), 0, "virtualThunk", 0); + return reinterpret_cast(start); } @@ -6460,7 +6306,7 @@ virtualThunk(MyThread* t, unsigned index) MyProcessor* p = processor(t); if (p->virtualThunks == 0 or wordArrayLength(t, p->virtualThunks) <= index) { - object newArray = makeWordArray(t, nextPowerOfTwo(index)); + object newArray = makeWordArray(t, nextPowerOfTwo(index + 1)); if (p->virtualThunks) { memcpy(&wordArrayBody(t, newArray, 0), &wordArrayBody(t, p->virtualThunks, 0), diff --git a/src/compiler.cpp b/src/compiler.cpp index edcc82d3a2..0a90033ce9 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -638,42 +638,43 @@ class Event { }; unsigned -usableFrameSize(Context* c) +totalFrameSize(Context* c) { - return c->alignedFrameSize - c->arch->frameFooterSize(); + return c->alignedFrameSize + + c->arch->frameHeaderSize() + + c->arch->argumentFootprint(c->parameterFootprint); } int -frameIndex(Context* c, int index) +frameIndex(Context* c, int localIndex) { - assert(c, static_cast - (usableFrameSize(c) + c->parameterFootprint - index - 1) >= 0); + assert(c, localIndex >= 0); + assert(c, localIndex < static_cast + (c->parameterFootprint + c->localFootprint)); - return usableFrameSize(c) + c->parameterFootprint - index - 1; + int index = c->alignedFrameSize + c->parameterFootprint - localIndex; + + if (localIndex < static_cast(c->parameterFootprint)) { + index += c->arch->frameHeaderSize(); + } else { + index -= c->arch->frameFooterSize(); + } + + assert(c, index >= 0); + + return index; } unsigned frameIndexToOffset(Context* c, unsigned frameIndex) { - return ((frameIndex >= usableFrameSize(c)) ? - (frameIndex - + (c->arch->frameFooterSize() * 2) - + c->arch->frameHeaderSize()) : - (frameIndex - + c->arch->frameFooterSize())) * BytesPerWord; + return (frameIndex + c->arch->frameFooterSize()) * BytesPerWord; } unsigned offsetToFrameIndex(Context* c, unsigned offset) { - unsigned normalizedOffset = offset / BytesPerWord; - - return ((normalizedOffset >= c->alignedFrameSize) ? - (normalizedOffset - - (c->arch->frameFooterSize() * 2) - - c->arch->frameHeaderSize()) : - (normalizedOffset - - c->arch->frameFooterSize())); + return (offset / BytesPerWord) - c->arch->frameFooterSize(); } class FrameIterator { @@ -1172,7 +1173,7 @@ pickAnyFrameTarget(Context* c, Value* v) { Target best; - unsigned count = usableFrameSize(c) + c->parameterFootprint; + unsigned count = totalFrameSize(c); for (unsigned i = 0; i < count; ++i) { Target mine(i, MemoryOperand, frameCost(c, v, i)); if (mine.cost == 1) { @@ -2271,8 +2272,7 @@ saveLocals(Context* c, Event* e) if (local->value) { if (DebugReads) { fprintf(stderr, "local save read %p at %d of %d\n", - local->value, ::frameIndex(c, li), - usableFrameSize(c) + c->parameterFootprint); + local->value, ::frameIndex(c, li), totalFrameSize(c)); } addRead(c, e, local->value, read @@ -2291,48 +2291,61 @@ class CallEvent: public Event { address(address), traceHandler(traceHandler), result(result), + returnAddressSurrogate(0), + framePointerSurrogate(0), popIndex(0), padIndex(0), padding(0), flags(flags), - resultSize(resultSize) + resultSize(resultSize), + stackArgumentFootprint(stackArgumentFootprint) { uint32_t registerMask = ~0; Stack* s = argumentStack; unsigned index = 0; unsigned frameIndex; + int returnAddressIndex = -1; + int framePointerIndex = -1; + if (flags & (Compiler::TailJump | Compiler::TailCall)) { - frameIndex = usableFrameSize(c); + assert(c, argumentCount == 0); + + unsigned base = c->alignedFrameSize - c->arch->frameFooterSize(); + returnAddressIndex = base + c->arch->returnAddressOffset(); + framePointerIndex = base + c->arch->framePointerOffset(); + + frameIndex = totalFrameSize(c) - c->arch->argumentFootprint + (stackArgumentFootprint); } else { frameIndex = 0; - } - if (argumentCount) { - while (true) { - Read* target; - if (index < c->arch->argumentRegisterCount()) { - int number = c->arch->argumentRegister(index); + if (argumentCount) { + while (true) { + Read* target; + if (index < c->arch->argumentRegisterCount()) { + int number = c->arch->argumentRegister(index); - if (DebugReads) { - fprintf(stderr, "reg %d arg read %p\n", number, s->value); + if (DebugReads) { + fprintf(stderr, "reg %d arg read %p\n", number, s->value); + } + + target = fixedRegisterRead(c, number); + registerMask &= ~(1 << number); + } else { + if (DebugReads) { + fprintf(stderr, "stack %d arg read %p\n", frameIndex, s->value); + } + + target = read(c, SiteMask(1 << MemoryOperand, 0, frameIndex)); + ++ frameIndex; } + addRead(c, this, s->value, target); - target = fixedRegisterRead(c, number); - registerMask &= ~(1 << number); - } else { - if (DebugReads) { - fprintf(stderr, "stack %d arg read %p\n", frameIndex, s->value); + if ((++ index) < argumentCount) { + s = s->next; + } else { + break; } - - target = read(c, SiteMask(1 << MemoryOperand, 0, frameIndex)); - ++ frameIndex; - } - addRead(c, this, s->value, target); - - if ((++ index) < argumentCount) { - s = s->next; - } else { - break; } } } @@ -2355,52 +2368,60 @@ class CallEvent: public Event { (typeMask, registerMask & planRegisterMask, AnyFrameIndex))); } - if ((flags & (Compiler::TailJump | Compiler::TailCall)) == 0) { - int footprint = stackArgumentFootprint; - for (Stack* s = stackBefore; s; s = s->next) { - if (s->value) { - if (footprint > 0) { - if (DebugReads) { - fprintf(stderr, "stack arg read %p at %d of %d\n", - s->value, frameIndex, - usableFrameSize(c) + c->parameterFootprint); - } + int footprint = stackArgumentFootprint; + for (Stack* s = stackBefore; s; s = s->next) { + if (s->value) { + if (footprint > 0) { + if (DebugReads) { + fprintf(stderr, "stack arg read %p at %d of %d\n", + s->value, frameIndex, totalFrameSize(c)); + } + if (static_cast(frameIndex) == returnAddressIndex) { + returnAddressSurrogate = s->value; + addRead(c, this, s->value, anyRegisterRead(c)); + } else if (static_cast(frameIndex) == framePointerIndex) { + framePointerSurrogate = s->value; + addRead(c, this, s->value, anyRegisterRead(c)); + } else { addRead(c, this, s->value, read (c, SiteMask(1 << MemoryOperand, 0, frameIndex))); - } else { - unsigned logicalIndex = ::frameIndex - (c, s->index + c->localFootprint); - - if (DebugReads) { - fprintf(stderr, "stack save read %p at %d of %d\n", - s->value, logicalIndex, - usableFrameSize(c) + c->parameterFootprint); - } - - addRead(c, this, s->value, read - (c, SiteMask(1 << MemoryOperand, 0, logicalIndex))); } } - - -- footprint; - - if (footprint == 0) { + else if ((flags & (Compiler::TailJump | Compiler::TailCall)) == 0) + { unsigned logicalIndex = ::frameIndex (c, s->index + c->localFootprint); - assert(c, logicalIndex >= frameIndex); + if (DebugReads) { + fprintf(stderr, "stack save read %p at %d of %d\n", + s->value, logicalIndex, totalFrameSize(c)); + } - padding = logicalIndex - frameIndex; - padIndex = s->index + c->localFootprint; + addRead(c, this, s->value, read + (c, SiteMask(1 << MemoryOperand, 0, logicalIndex))); } - - ++ frameIndex; } + -- footprint; + + if (footprint == 0) { + unsigned logicalIndex = ::frameIndex + (c, s->index + c->localFootprint); + + assert(c, logicalIndex >= frameIndex); + + padding = logicalIndex - frameIndex; + padIndex = s->index + c->localFootprint; + } + + ++ frameIndex; + } + + if ((flags & (Compiler::TailJump | Compiler::TailCall)) == 0) { popIndex - = usableFrameSize(c) - + c->parameterFootprint + = c->alignedFrameSize + - c->arch->frameFooterSize() - (stackBefore ? stackBefore->index + 1 - stackArgumentFootprint : 0) - c->localFootprint; @@ -2417,28 +2438,40 @@ class CallEvent: public Event { virtual void compile(Context* c) { UnaryOperation op; - if (flags & Compiler::TailJump) { - c->assembler->popFrame(); - - if (flags & Compiler::Aligned) { - op = AlignedJump; + if (flags & (Compiler::TailJump | Compiler::TailCall)) { + if (flags & Compiler::TailJump) { + if (flags & Compiler::Aligned) { + op = AlignedJump; + } else { + op = Jump; + } } else { - op = Jump; + if (flags & Compiler::Aligned) { + op = AlignedCall; + } else { + op = Call; + } } - } else if (flags & Compiler::TailCall) { - c->assembler->popFrame(); - if (flags & Compiler::Aligned) { - op = AlignedCall; - } else { - op = Call; - } + assert(c, returnAddressSurrogate == 0 + or returnAddressSurrogate->source->type(c) == RegisterOperand); + assert(c, framePointerSurrogate == 0 + or framePointerSurrogate->source->type(c) == RegisterOperand); + + int ras = (returnAddressSurrogate ? static_cast + (returnAddressSurrogate->source)->number : NoRegister); + int fps = (framePointerSurrogate ? static_cast + (framePointerSurrogate->source)->number : NoRegister); + + int offset + = static_cast(c->arch->argumentFootprint(c->parameterFootprint)) + - static_cast(c->arch->argumentFootprint(stackArgumentFootprint)); + + c->assembler->popFrameForTailCall(c->alignedFrameSize, offset, ras, fps); + } else if (flags & Compiler::Aligned) { + op = AlignedCall; } else { - if (flags & Compiler::Aligned) { - op = AlignedCall; - } else { - op = Call; - } + op = Call; } apply(c, op, BytesPerWord, address->source, 0); @@ -2461,11 +2494,14 @@ class CallEvent: public Event { Value* address; TraceHandler* traceHandler; Value* result; + Value* returnAddressSurrogate; + Value* framePointerSurrogate; unsigned popIndex; unsigned padIndex; unsigned padding; unsigned flags; unsigned resultSize; + unsigned stackArgumentFootprint; }; void @@ -2503,8 +2539,8 @@ class ReturnEvent: public Event { popRead(c, this, r->value); } - c->assembler->popFrame(); - c->assembler->apply(Return); + c->assembler->popFrameAndPopArgumentsAndReturn + (c->arch->argumentFootprint(c->parameterFootprint)); } Value* value; @@ -4875,7 +4911,7 @@ class MyCompiler: public Compiler { c.localFootprint = localFootprint; c.alignedFrameSize = alignedFrameSize; - unsigned frameResourceCount = usableFrameSize(&c) + parameterFootprint; + unsigned frameResourceCount = totalFrameSize(&c); c.frameResources = static_cast (c.zone->allocate(sizeof(FrameResource) * frameResourceCount)); @@ -4884,9 +4920,12 @@ class MyCompiler: public Compiler { new (c.frameResources + i) FrameResource; } + unsigned base = alignedFrameSize - c.arch->frameFooterSize(); + c.frameResources[base + c.arch->returnAddressOffset()].reserved = true; + c.frameResources[base + c.arch->framePointerOffset()].reserved = true; + // leave room for logical instruction -1 - unsigned codeSize = sizeof(LogicalInstruction*) - * (logicalCodeLength + 1); + unsigned codeSize = sizeof(LogicalInstruction*) * (logicalCodeLength + 1); c.logicalCode = static_cast (c.zone->allocate(codeSize)); memset(c.logicalCode, 0, codeSize); diff --git a/src/powerpc.cpp b/src/powerpc.cpp index 13d0ab819c..19c217a3e8 100644 --- a/src/powerpc.cpp +++ b/src/powerpc.cpp @@ -158,6 +158,8 @@ carry16(intptr_t v) const unsigned FrameFooterSize = 6; +const unsigned StackAlignmentInBytes = 16; + const int StackRegister = 1; const int ThreadRegister = 13; @@ -1699,8 +1701,12 @@ class MyArchitecture: public Assembler::Architecture { } } + virtual unsigned stackPadding(unsigned footprint) { + return max(footprint, StackAlignmentInWords); + } + virtual unsigned argumentFootprint(unsigned footprint) { - return footprint; + return max(pad(footprint, StackAlignmentInWords), StackAlignmentInWords); } virtual unsigned argumentRegisterCount() { @@ -1755,7 +1761,7 @@ class MyArchitecture: public Assembler::Architecture { } virtual unsigned alignFrameSize(unsigned sizeInWords) { - const unsigned alignment = 16 / BytesPerWord; + const unsigned alignment = StackAlignmentInBytes / BytesPerWord; return (ceiling(sizeInWords + FrameFooterSize, alignment) * alignment); } @@ -1963,13 +1969,71 @@ class MyAssembler: public Assembler { Memory stackSrc(StackRegister, 0); moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &stack); - Assembler::Register returnAddress(0); - Assembler::Memory returnAddressSrc(StackRegister, 8); + Register returnAddress(0); + Memory returnAddressSrc(StackRegister, 8); moveMR(&c, BytesPerWord, &returnAddressSrc, BytesPerWord, &returnAddress); issue(&c, mtlr(returnAddress.low)); } + virtual void popFrameForTailCall(unsigned footprint, + int offset, + int returnAddressSurrogate, + int framePointerSurrogate) + { + if (offset) { + Register tmp(0); + Memory returnAddressSrc(StackRegister, 8 + (footprint * BytesPerWord)); + moveMR(&c, BytesPerWord, &returnAddressSrc, BytesPerWord, &tmp); + + issue(&c, mtlr(tmp.low)); + + Memory stackSrc(StackRegister, footprint * BytesPerWord); + moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &tmp); + + Memory stackDst(StackRegister, (footprint - offset) * BytesPerWord); + moveAndUpdateRM(&c, BytesPerWord, &tmp, BytesPerWord, &stackDst); + + if (returnAddressSurrogate != NoRegister) { + assert(&c, offset > 0); + + Register ras(returnAddressSurrogate); + Memory dst(StackRegister, 8 + (offset * BytesPerWord)); + moveRM(&c, BytesPerWord, &ras, BytesPerWord, &dst); + } + + if (framePointerSurrogate != NoRegister) { + assert(&c, offset > 0); + + Register fps(framePointerSurrogate); + Memory dst(StackRegister, offset * BytesPerWord); + moveRM(&c, BytesPerWord, &fps, BytesPerWord, &dst); + } + } else { + popFrame(); + } + } + + virtual void popFrameAndPopArgumentsAndReturn(unsigned argumentFootprint) { + popFrame(); + + assert(c, argumentFootprint >= StackAlignmentInWords); + assert(c, (argumentFootprint % StackAlignmentInWords) == 0); + + if (argumentFootprint > StackAlignmentInWords) { + Register tmp(0); + Memory stackSrc(StackRegister, 0); + moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &tmp); + + Memory stackDst(StackRegister, + (argumentFootprint - StackAlignmentInWords) + * BytesPerWord); + moveAndUpdateRM(&c, BytesPerWord, &tmp, BytesPerWord, &stackDst); + } + + return_(&c); + } + virtual void apply(Operation op) { arch_->c.operations[op](&c); } diff --git a/src/x86.cpp b/src/x86.cpp index 05ec897bfb..53bce312d5 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -39,6 +39,9 @@ enum { const unsigned FrameHeaderSize = 2; +const unsigned StackAlignmentInBytes = 16; +const unsigned StackAlignmentInWords = StackAlignmentInBytes / BytesPerWord; + inline bool isInt8(intptr_t v) { @@ -2022,6 +2025,14 @@ class MyArchitecture: public Assembler::Architecture { return (BytesPerWord == 4 ? rdx : NoRegister); } + virtual int virtualCallClass() { + return rax; + } + + virtual int virtualCallIndex() { + return rdx; + } + virtual bool condensedAddressing() { return true; } @@ -2042,9 +2053,14 @@ class MyArchitecture: public Assembler::Architecture { } } + virtual unsigned stackPadding(unsigned footprint) { + return max(footprint > argumentRegisterCount() ? + footprint - argumentRegisterCount() : 0, + StackAlignmentInWords); + } + virtual unsigned argumentFootprint(unsigned footprint) { - return footprint > argumentRegisterCount() ? - footprint - argumentRegisterCount() : 0; + return max(pad(footprint, StackAlignmentInWords), StackAlignmentInWords); } virtual unsigned argumentRegisterCount() { @@ -2125,7 +2141,7 @@ class MyArchitecture: public Assembler::Architecture { } virtual unsigned alignFrameSize(unsigned sizeInWords) { - const unsigned alignment = 16 / BytesPerWord; + const unsigned alignment = StackAlignmentInBytes / BytesPerWord; return (ceiling(sizeInWords + FrameHeaderSize, alignment) * alignment) - FrameHeaderSize; } @@ -2146,6 +2162,14 @@ class MyArchitecture: public Assembler::Architecture { return 0; } + virtual unsigned returnAddressOffset() { + return 1; + } + + virtual unsigned framePointerOffset() { + return 0; + } + virtual void nextFrame(void** stack, void** base) { assert(&c, *static_cast(*base) != *base); @@ -2305,11 +2329,6 @@ class MyAssembler: public Assembler { return arch_; } - virtual void popReturnAddress(unsigned addressOffset) { - Memory addressDst(rbx, addressOffset); - popM(&c, BytesPerWord, &addressDst); - } - virtual void saveFrame(unsigned stackOffset, unsigned baseOffset) { Register stack(rsp); Memory stackDst(rbx, stackOffset); @@ -2392,6 +2411,72 @@ class MyAssembler: public Assembler { popR(&c, BytesPerWord, &base); } + virtual void popFrameForTailCall(unsigned footprint, + int offset, + int returnAddressSurrogate, + int framePointerSurrogate) + { + if (offset) { + Register tmp(c.client->acquireTemporary()); + + Memory returnAddressSrc(rsp, (footprint + 1) * BytesPerWord); + moveMR(&c, BytesPerWord, &returnAddressSrc, BytesPerWord, &tmp); + + Memory returnAddressDst(rsp, (footprint - offset + 1) * BytesPerWord); + moveRM(&c, BytesPerWord, &tmp, BytesPerWord, &returnAddressDst); + + c.client->releaseTemporary(tmp.low); + + Memory baseSrc(rbp, footprint * BytesPerWord); + Register base(rbp); + moveMR(&c, BytesPerWord, &baseSrc, BytesPerWord, &base); + + Register stack(rsp); + Constant footprintConstant(resolved(&c, footprint * BytesPerWord)); + addCR(&c, BytesPerWord, &footprintConstant, BytesPerWord, &stack); + + if (returnAddressSurrogate != NoRegister) { + assert(&c, offset > 0); + + Register ras(returnAddressSurrogate); + Memory dst(rsp, offset * BytesPerWord); + moveRM(&c, BytesPerWord, &ras, BytesPerWord, &dst); + } + + if (framePointerSurrogate != NoRegister) { + assert(&c, offset > 0); + + Register fps(framePointerSurrogate); + Memory dst(rsp, (offset - 1) * BytesPerWord); + moveRM(&c, BytesPerWord, &fps, BytesPerWord, &dst); + } + } else { + popFrame(); + } + } + + virtual void popFrameAndPopArgumentsAndReturn(unsigned argumentFootprint) { + popFrame(); + + assert(&c, argumentFootprint >= StackAlignmentInWords); + assert(&c, (argumentFootprint % StackAlignmentInWords) == 0); + + if (argumentFootprint > StackAlignmentInWords) { + Register returnAddress(rcx); + popR(&c, BytesPerWord, &returnAddress); + + Register stack(rsp); + Constant adjustment + (resolved(&c, (argumentFootprint - StackAlignmentInWords) + * BytesPerWord)); + addCR(&c, BytesPerWord, &adjustment, BytesPerWord, &stack); + + jumpR(&c, BytesPerWord, &returnAddress); + } else { + return_(&c); + } + } + virtual void apply(Operation op) { arch_->c.operations[op](&c); }