From 2608a2ee433a993ee634dddf14bdef2a0dd2aa88 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 26 May 2009 19:02:39 -0600 Subject: [PATCH] progress towards powerpc continuation and tail call support --- src/assembler.h | 3 +- src/common.h | 2 +- src/compile-powerpc.S | 163 +++++++++++++++++++++++++++++++++++++----- src/compile-x86.S | 23 +++--- src/compile.cpp | 17 ++--- src/compiler.cpp | 22 +++--- src/powerpc.S | 2 + src/powerpc.cpp | 49 ++++++++++--- src/x86.cpp | 8 +++ 9 files changed, 231 insertions(+), 58 deletions(-) diff --git a/src/assembler.h b/src/assembler.h index c89aee8f67..26b1f9b509 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -232,7 +232,7 @@ class Assembler { class Memory: public Operand { public: - Memory(int base, int offset, int index = NoRegister, unsigned scale = 0): + Memory(int base, int offset, int index = NoRegister, unsigned scale = 1): base(base), offset(offset), index(index), scale(scale) { } @@ -327,6 +327,7 @@ class Assembler { virtual void saveFrame(unsigned stackOffset, unsigned baseOffset) = 0; virtual void pushFrame(unsigned argumentCount, ...) = 0; virtual void allocateFrame(unsigned footprint) = 0; + virtual void adjustFrame(unsigned footprint) = 0; virtual void popFrame() = 0; virtual void popFrameForTailCall(unsigned footprint, int offset, int returnAddressSurrogate, diff --git a/src/common.h b/src/common.h index 8ab02e4041..94d351d5d3 100644 --- a/src/common.h +++ b/src/common.h @@ -30,7 +30,7 @@ #endif #if (defined __i386__) || (defined __POWERPC__) -# define LD "d" +# define LD "ld" # define LLD "lld" #ifdef __APPLE__ # define ULD "lu" diff --git a/src/compile-powerpc.S b/src/compile-powerpc.S index 3a72334fcf..034ca9321e 100644 --- a/src/compile-powerpc.S +++ b/src/compile-powerpc.S @@ -17,14 +17,28 @@ #define ARGUMENT_BASE BYTES_PER_WORD * LINKAGE_AREA #define LOCAL(x) L##x - + #ifdef __APPLE__ -.globl _vmInvoke -_vmInvoke: +# define GLOBAL(x) _##x #else -.globl vmInvoke -vmInvoke: +# define GLOBAL(x) x #endif + +#define THREAD_CONTINUATION 96 +#define THREAD_EXCEPTION 36 +#define THREAD_EXCEPTION_STACK_ADJUSTMENT 100 +#define THREAD_EXCEPTION_OFFSET 104 +#define THREAD_EXCEPTION_HANDLER 108 + +#define CONTINUATION_NEXT 4 +#define CONTINUATION_ADDRESS 16 +#define CONTINUATION_RETURN_ADDRESS_OFFSET 20 +#define CONTINUATION_FRAME_POINTER_OFFSET 24 +#define CONTINUATION_LENGTH 28 +#define CONTINUATION_BODY 32 + +.globl GLOBAL(vmInvoke) +GLOBAL(vmInvoke): // save return address mflr r0 stw r0,8(r1) @@ -73,24 +87,95 @@ vmInvoke: // copy arguments into place li r16,0 - b LOCAL(test) + b LOCAL(vmInvoke_argumentTest) -LOCAL(loop): +LOCAL(vmInvoke_argumentLoop): lwzx r17,r16,r5 addi r18,r16,ARGUMENT_BASE stwx r17,r18,r1 addi r16,r16,BYTES_PER_WORD -LOCAL(test): +LOCAL(vmInvoke_argumentTest): cmplw r16,r6 - blt LOCAL(loop) + blt LOCAL(vmInvoke_argumentLoop) // load and call function address mtctr r4 bctrl +.globl GLOBAL(vmInvoke_returnAddress) +GLOBAL(vmInvoke_returnAddress): // restore stack pointer lwz r1,0(r1) + +#ifdef AVIAN_CONTINUATIONS + // call the next continuation, if any + lwz r5,THREAD_CONTINUATION(r13) + cmplwi r5,0 + beq LOCAL(vmInvoke_exit) + + lwz r6,CONTINUATION_LENGTH(r5) + slwi r6,r6,2 + subfic r6,r6,-80 + stwux r1,r1,r6 + + addi r7,r5,CONTINUATION_BODY + + li r8,0 + b LOCAL(vmInvoke_continuationTest) + +LOCAL(vmInvoke_continuationLoop): + lwzx r9,r7,r8 + stwx r9,r1,r8 + addi r8,r8,4 + +LOCAL(vmInvoke_continuationTest): + cmplw r8,r6 + ble LOCAL(vmInvoke_continuationLoop) + + lwz r7,CONTINUATION_RETURN_ADDRESS_OFFSET(r5) + bl LOCAL(vmInvoke_getPC) + +LOCAL(vmInvoke_getPC): + mflr r10 + addis r10,r10,ha16(GLOBAL(vmInvoke_returnAddress)-LOCAL(vmInvoke_getPC)) + la r10,lo16(GLOBAL(vmInvoke_returnAddress)-LOCAL(vmInvoke_getPC))(r10) + stwx r10,r1,r7 + + lwz r7,CONTINUATION_FRAME_POINTER_OFFSET(r5) + lwz r8,0(r1) + add r7,r7,r1 + stw r8,0(r7) + stw r7,0(r1) + + lwz r7,CONTINUATION_NEXT(r5) + stw r7,THREAD_CONTINUATION(r13) + + // call the continuation unless we're handling an exception + lwz r7,THREAD_EXCEPTION(r13) + cmpwi r7,0 + bne LOCAL(vmInvoke_handleException) + addi r7,r5,CONTINUATION_ADDRESS + mtctr r7 + bctr + +LOCAL(vmInvoke_handleException): + // we're handling an exception - call the exception handler instead + li r8,0 + stw r8,THREAD_EXCEPTION(r13) + lwz r8,THREAD_EXCEPTION_STACK_ADJUSTMENT(r13) + lwz r9,0(r1) + subfic r8,r8,0 + stwux r9,r1,r8 + lwz r8,THREAD_EXCEPTION_OFFSET(r13) + stwx r7,r1,r8 + + addi r7,r13,THREAD_EXCEPTION_HANDLER + mtctr r7 + bctr + +LOCAL(vmInvoke_exit): +#endif // AVIAN_CONTINUATIONS // restore callee-saved registers subi r9,r1,80 @@ -118,23 +203,67 @@ LOCAL(test): // handle return value based on expected type lwz r8,44(r1) -LOCAL(void): +LOCAL(vmInvoke_void): cmplwi r8,VOID_TYPE - bne LOCAL(int64) - b LOCAL(exit) + bne LOCAL(vmInvoke_int64) + b LOCAL(vmInvoke_return) -LOCAL(int64): +LOCAL(vmInvoke_int64): cmplwi r8,INT64_TYPE - bne LOCAL(int32) - b LOCAL(exit) + bne LOCAL(vmInvoke_int32) + b LOCAL(vmInvoke_return) -LOCAL(int32): +LOCAL(vmInvoke_int32): li r3,0 -LOCAL(exit): +LOCAL(vmInvoke_return): // load return address lwz r0,8(r1) mtlr r0 // return blr + +.globl GLOBAL(vmJumpAndInvoke) +GLOBAL(vmJumpAndInvoke): + // r3: thread + // r4: address + // r5: (unused) + // r6: stack + // r7: argumentFootprint + // r8: arguments + // r9: frameSize + + stw r6,0(r1) + + mr r13,r3 + + // copy arguments into place + li r9,0 + addi r10,r6,ARGUMENT_BASE + b LOCAL(vmJumpAndInvoke_argumentTest) + +LOCAL(vmJumpAndInvoke_argumentLoop): + lwzx r11,r8,r9 + stwx r11,r10,r9 + addi r9,r9,4 + +LOCAL(vmJumpAndInvoke_argumentTest): + cmplw r9,r7 + ble LOCAL(vmJumpAndInvoke_argumentLoop) + + subf r7,r6,r9 + stw r6,0(r7) + mr r1,r7 + + // set return address + bl LOCAL(vmJumpAndInvoke_getPC) + +LOCAL(vmJumpAndInvoke_getPC): + mflr r10 + addis r10,r10,ha16(GLOBAL(vmInvoke_returnAddress)-LOCAL(vmJumpAndInvoke_getPC)) + la r10,lo16(GLOBAL(vmInvoke_returnAddress)-LOCAL(vmJumpAndInvoke_getPC))(r10) + mtlr r10 + + mtctr r4 + bctr diff --git a/src/compile-x86.S b/src/compile-x86.S index ccd402d3b8..cfc104c463 100644 --- a/src/compile-x86.S +++ b/src/compile-x86.S @@ -148,15 +148,20 @@ LOCAL(vmInvoke_exit): .globl vmJumpAndInvoke vmJumpAndInvoke: - // %rdi: thread - // %rsi: address - // %rdx: base - // %rcx: stack - // %r8 : argumentFootprint - // %r9 : arguments + // %rdi: thread + // %rsi: address + // %rdx: base + // %rcx: stack + // %r8 : argumentFootprint + // %r9 : arguments + // 8(%rsp): frameSize + movq %rdx,%rbp + movq %rdi,%rbx + subq 8(%rsp),%rcx + // set return address movq vmInvoke_returnAddress@GOTPCREL(%rip),%r10 movq %r10,(%rcx) @@ -174,7 +179,6 @@ LOCAL(vmJumpAndInvoke_argumentTest): cmpq %r8,%r11 jb LOCAL(vmJumpAndInvoke_argumentLoop) - movq %rdx,%rbp movq %rcx,%rsp jmp *%rsi @@ -351,8 +355,12 @@ vmJumpAndInvoke: // 16(%esp): stack // 20(%esp): argumentFootprint // 24(%esp): arguments + // 28(%esp): frameSize + movl 12(%esp),%ebp + movl 16(%esp),%ecx + subl 28(%esp),%ecx // set return address call LOCAL(getPC) @@ -377,7 +385,6 @@ LOCAL(vmJumpAndInvoke_argumentTest): movl 4(%esp),%ebx movl 8(%esp),%esi - movl 12(%esp),%ebp movl %ecx,%esp jmp *%esi diff --git a/src/compile.cpp b/src/compile.cpp index 5496779fb8..fe5a05d3ea 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -24,14 +24,15 @@ vmInvoke(void* thread, void* function, void* arguments, extern "C" void vmJumpAndInvoke(void* thread, void* function, void* base, void* stack, - unsigned argumentFootprint, uintptr_t* arguments); + unsigned argumentFootprint, uintptr_t* arguments, + unsigned frameSize); extern "C" void vmCall(); namespace { -const bool DebugCompile = false; +const bool DebugCompile = true; const bool DebugNatives = false; const bool DebugCallTable = false; const bool DebugMethodTree = false; @@ -5413,13 +5414,13 @@ jumpAndInvoke(MyThread* t, object method, void* base, void* stack, vmJumpAndInvoke (t, reinterpret_cast(methodAddress(t, method)), base, - static_cast(stack) - + oldArgumentFootprint - - t->arch->argumentFootprint(argumentCount) - - t->arch->frameFooterSize() - - t->arch->frameReturnAddressSize(), + stack, argumentCount * BytesPerWord, - arguments); + arguments, + (oldArgumentFootprint + - t->arch->argumentFootprint(argumentCount) + - t->arch->frameFooterSize() + - t->arch->frameReturnAddressSize()) * BytesPerWord); } void diff --git a/src/compiler.cpp b/src/compiler.cpp index 6742dbb459..e594e64717 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -384,13 +384,13 @@ unsigned RegisterResource::toString(Context* c, char* buffer, unsigned bufferSize) { return snprintf - (buffer, bufferSize, "register %"LD, this - c->registerResources); + (buffer, bufferSize, "register %d", this - c->registerResources); } unsigned FrameResource::toString(Context* c, char* buffer, unsigned bufferSize) { - return snprintf(buffer, bufferSize, "frame %"LD, this - c->frameResources); + return snprintf(buffer, bufferSize, "frame %d", this - c->frameResources); } class PoolPromise: public Promise { @@ -679,8 +679,7 @@ offsetToFrameIndex(Context* c, unsigned offset) { assert(c, static_cast ((offset / BytesPerWord) - c->arch->frameFooterSize()) >= 0); - - assert(c, (offset / BytesPerWord) - c->arch->frameFooterSize() + assert(c, ((offset / BytesPerWord) - c->arch->frameFooterSize()) < totalFrameSize(c)); return (offset / BytesPerWord) - c->arch->frameFooterSize(); @@ -689,7 +688,8 @@ offsetToFrameIndex(Context* c, unsigned offset) unsigned frameBase(Context* c) { - return c->alignedFrameSize - 1 + return c->alignedFrameSize + - c->arch->frameReturnAddressSize() - c->arch->frameFooterSize() + c->arch->frameHeaderSize(); } @@ -1262,7 +1262,7 @@ pickTarget(Context* c, Read* read, bool intersectRead, } } } - + best = pickTarget(c, read->value, mask, registerPenalty, best); if (best.cost <= Target::MinimumFrameCost) { return best; @@ -2539,14 +2539,8 @@ class CallEvent: public Event { (stackArgumentFootprint); if (footprint > c->arch->stackAlignmentInWords()) { - Assembler::Register stack(c->arch->stack()); - ResolvedPromise adjustmentPromise - ((footprint - c->arch->stackAlignmentInWords()) * BytesPerWord); - Assembler::Constant adjustmentConstant(&adjustmentPromise); - c->assembler->apply - (Subtract, BytesPerWord, ConstantOperand, &adjustmentConstant, - BytesPerWord, RegisterOperand, &stack, - BytesPerWord, RegisterOperand, &stack); + c->assembler->adjustFrame + (footprint - c->arch->stackAlignmentInWords()); } } } diff --git a/src/powerpc.S b/src/powerpc.S index fba6f52ec0..fce2079af7 100644 --- a/src/powerpc.S +++ b/src/powerpc.S @@ -161,4 +161,6 @@ vmJump: mtlr r3 mr r1,r5 mr r13,r6 + mr r4,r7 + mr r3,r8 blr diff --git a/src/powerpc.cpp b/src/powerpc.cpp index dc1e315ebb..2477cc5af0 100644 --- a/src/powerpc.cpp +++ b/src/powerpc.cpp @@ -65,7 +65,7 @@ inline int sth(int rs, int ra, int i) { return D(44, rs, ra, i); } inline int sthx(int rs, int ra, int rb) { return X(31, rs, ra, rb, 407, 0); } inline int stw(int rs, int ra, int i) { return D(36, rs, ra, i); } inline int stwu(int rs, int ra, int i) { return D(37, rs, ra, i); } -inline int stwux(int rs, int ra, int i) { return X(31, rs, ra, rb, 183, 0); } +inline int stwux(int rs, int ra, int rb) { return X(31, rs, ra, rb, 183, 0); } inline int stwx(int rs, int ra, int rb) { return X(31, rs, ra, rb, 151, 0); } inline int add(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 266, 0); } inline int addc(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 10, 0); } @@ -137,7 +137,7 @@ inline int blt(int i) { return bc(12, 0, i, 0); } inline int bgt(int i) { return bc(12, 1, i, 0); } inline int bge(int i) { return bc(4, 0, i, 0); } inline int ble(int i) { return bc(4, 1, i, 0); } -inline int be(int i) { return bc(12, 2, i, 0); } +inline int beq(int i) { return bc(12, 2, i, 0); } inline int bne(int i) { return bc(4, 2, i, 0); } inline int cmpw(int ra, int rb) { return cmp(0, ra, rb); } inline int cmplw(int ra, int rb) { return cmpl(0, ra, rb); } @@ -160,6 +160,7 @@ carry16(intptr_t v) const unsigned FrameFooterSize = 6; const unsigned StackAlignmentInBytes = 16; +const unsigned StackAlignmentInWords = StackAlignmentInBytes / BytesPerWord; const int StackRegister = 1; const int ThreadRegister = 13; @@ -1507,7 +1508,7 @@ jumpIfEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* target) assert(c, size == BytesPerWord); appendOffsetTask(c, target->value, offset(c), true); - issue(c, be(0)); + issue(c, beq(0)); } void @@ -1688,6 +1689,14 @@ class MyArchitecture: public Assembler::Architecture { return (BytesPerWord == 4 ? 3 : NoRegister); } + virtual int virtualCallTarget() { + return 4; + } + + virtual int virtualCallIndex() { + return 3; + } + virtual bool condensedAddressing() { return false; } @@ -1708,7 +1717,7 @@ class MyArchitecture: public Assembler::Architecture { } } - virtual unsigned stackPadding(unsigned footprint) { + virtual unsigned frameFootprint(unsigned footprint) { return max(footprint, StackAlignmentInWords); } @@ -1726,11 +1735,16 @@ class MyArchitecture: public Assembler::Architecture { return index + 3; } + virtual unsigned stackAlignmentInWords() { + return StackAlignmentInWords; + } + virtual bool matchCall(void* returnAddress, void* target) { uint32_t* instruction = static_cast(returnAddress) - 1; - return *instruction == bl(static_cast(target) - - reinterpret_cast(instruction)); + return *instruction == static_cast + (bl(static_cast(target) + - reinterpret_cast(instruction))); } virtual void updateCall(UnaryOperation op UNUSED, @@ -1788,6 +1802,14 @@ class MyArchitecture: public Assembler::Architecture { return FrameFooterSize; } + virtual int returnAddressOffset() { + return 8 / BytesPerWord; + } + + virtual int framePointerOffset() { + return 0; + } + virtual void nextFrame(void** stack, void**) { assert(&c, *static_cast(*stack) != *stack); @@ -1971,6 +1993,15 @@ class MyAssembler: public Assembler { moveAndUpdateRM(&c, BytesPerWord, &stack, BytesPerWord, &stackDst); } + virtual void adjustFrame(unsigned footprint) { + Register nextStack(0); + Memory stackSrc(StackRegister, 0); + moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &nextStack); + + Memory stackDst(StackRegister, -footprint * BytesPerWord); + moveAndUpdateRM(&c, BytesPerWord, &nextStack, BytesPerWord, &stackDst); + } + virtual void popFrame() { Register stack(StackRegister); Memory stackSrc(StackRegister, 0); @@ -2024,8 +2055,8 @@ class MyAssembler: public Assembler { virtual void popFrameAndPopArgumentsAndReturn(unsigned argumentFootprint) { popFrame(); - assert(c, argumentFootprint >= StackAlignmentInWords); - assert(c, (argumentFootprint % StackAlignmentInWords) == 0); + assert(&c, argumentFootprint >= StackAlignmentInWords); + assert(&c, (argumentFootprint % StackAlignmentInWords) == 0); if (argumentFootprint > StackAlignmentInWords) { Register tmp(0); @@ -2049,7 +2080,7 @@ class MyAssembler: public Assembler { moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &tmp1); Register tmp2(arch_->returnLow()); - Memory newStackSrc(ThreadRegister, stackOffsetFromThread); + Memory newStackSrc(ThreadRegister, stackOffset); moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &tmp2); Register stack(StackRegister); diff --git a/src/x86.cpp b/src/x86.cpp index 3fd93af296..50c8383862 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -2398,6 +2398,14 @@ class MyAssembler: public Assembler { BytesPerWord, RegisterOperand, &stack); } + virtual void adjustFrame(unsigned footprint) { + Register stack(rsp); + Constant footprintConstant(resolved(&c, footprint * BytesPerWord)); + apply(Subtract, BytesPerWord, ConstantOperand, &footprintConstant, + BytesPerWord, RegisterOperand, &stack, + BytesPerWord, RegisterOperand, &stack); + } + virtual void popFrame() { Register base(rbp); Register stack(rsp);