From 43cbfd3f3aba6c59c7f6a20ff79060c7572806a0 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sun, 16 Jan 2011 19:05:05 -0700 Subject: [PATCH] support stack unwinding without using a frame pointer Previously, we unwound the stack by following the chain of frame pointers for normal returns, stack trace creation, and exception unwinding. On x86, this required reserving EBP/RBP for frame pointer duties, making it unavailable for general computation and requiring that it be explicitly saved and restored on entry and exit, respectively. On PowerPC, we use an ABI that makes the stack pointer double as a frame pointer, so it doesn't cost us anything. We've been using the same convention on ARM, but it doesn't match the native calling convention, which makes it unusable when we want to call native code from Java and pass arguments on the stack. So far, the ARM calling convention mismatch hasn't been an issue because we've never passed more arguments from Java to native code than would fit in registers. However, we must now pass an extra argument (the thread pointer) to e.g. divideLong so it can throw an exception on divide by zero, which means the last argument must be passed on the stack. This will clobber the linkage area we've been using to hold the frame pointer, so we need to stop using it. One solution would be to use the same convention on ARM as we do on x86, but this would introduce the same overhead of making a register unavailable for general use and extra code at method entry and exit. Instead, this commit removes the need for a frame pointer. Unwinding involves consulting a map of instruction offsets to frame sizes which is generated at compile time. This is necessary because stack trace creation can happen at any time due to Thread.getStackTrace being called by another thread, and the frame size varies during the execution of a method. So far, only x86(_64) is working, and continuations and tail call optimization are probably broken. More to come. --- src/arch.h | 4 +- src/arm.S | 22 +++ src/arm.cpp | 4 + src/arm.h | 2 + src/assembler.h | 31 ++++- src/common.h | 63 ++++++--- src/compile-arm.S | 12 +- src/compile-x86.S | 225 ++++++++++++++---------------- src/compile.cpp | 346 +++++++++++++++++++++++++++++----------------- src/compiler.cpp | 40 +++++- src/jnienv.cpp | 42 +++--- src/machine.cpp | 4 +- src/machine.h | 6 +- src/posix.cpp | 14 +- src/system.h | 5 +- src/types.def | 1 + src/x86.S | 31 ++--- src/x86.cpp | 230 +++++++++++++++++++++--------- src/x86.h | 2 + 19 files changed, 675 insertions(+), 409 deletions(-) diff --git a/src/arch.h b/src/arch.h index f06dcef0f9..56afe1d5c3 100644 --- a/src/arch.h +++ b/src/arch.h @@ -22,8 +22,8 @@ #include "common.h" extern "C" void NO_RETURN -vmJump(void* address, void* base, void* stack, void* thread, - uintptr_t returnLow, uintptr_t returnHigh); +vmJump(void* address, void* stack, void* thread, uintptr_t returnLow, + uintptr_t returnHigh); namespace vm { diff --git a/src/arm.S b/src/arm.S index 6536017d60..458ece75cb 100644 --- a/src/arm.S +++ b/src/arm.S @@ -56,3 +56,25 @@ vmJump: mov sp, r2 mov r8, r3 bx lr + +#define CHECKPOINT_THREAD 4 +#define CHECKPOINT_STACK 24 + +.globl vmRun +vmRun: + // r0: function + // r1: arguments + // r2: checkpoint + stmfd sp!, {r4-r11, lr} + + str sp, [r2, #CHECKPOINT_STACK] + + mov r12, r0 + ldr r0, [r2, #CHECKPOINT_THREAD] + + blx r12 + +.globl vmRun_returnAddress +vmRun_returnAddress: + ldmfd sp!, {r4-r11, lr} + bx lr diff --git a/src/arm.cpp b/src/arm.cpp index bac3bc10a0..14528b5c59 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -1720,6 +1720,10 @@ class MyArchitecture: public Assembler::Architecture { return max(pad(footprint, StackAlignmentInWords), StackAlignmentInWords); } + virtual bool argumentAlignment() { + return true; + } + virtual unsigned argumentRegisterCount() { return 4; } diff --git a/src/arm.h b/src/arm.h index e66377e7f6..198a76945f 100644 --- a/src/arm.h +++ b/src/arm.h @@ -14,6 +14,8 @@ #include "types.h" #include "common.h" +#define VA_LIST(x) (&(x)) + #define IP_REGISTER(context) (context->uc_mcontext.arm_pc) #define STACK_REGISTER(context) (context->uc_mcontext.arm_sp) #define THREAD_REGISTER(context) (context->uc_mcontext.arm_ip) diff --git a/src/assembler.h b/src/assembler.h index 4c5f67b1a9..0cdc788ad1 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -22,6 +22,12 @@ const bool TailCalls = true; const bool TailCalls = false; #endif +#ifdef AVIAN_USE_FRAME_POINTER +const bool UseFramePointer = true; +#else +const bool UseFramePointer = false; +#endif + enum Operation { Return, LoadBarrier, @@ -298,6 +304,13 @@ class Assembler { virtual unsigned resolve(unsigned start, Block* next) = 0; }; + class FrameSizeEvent { + public: + virtual unsigned offset() = 0; + virtual int change() = 0; + virtual FrameSizeEvent* next() = 0; + }; + class Architecture { public: virtual unsigned floatRegisterSize() = 0; @@ -323,6 +336,7 @@ class Assembler { virtual unsigned frameFootprint(unsigned footprint) = 0; virtual unsigned argumentFootprint(unsigned footprint) = 0; + virtual bool argumentAlignment() = 0; virtual unsigned argumentRegisterCount() = 0; virtual int argumentRegister(unsigned index) = 0; @@ -343,7 +357,6 @@ class Assembler { virtual unsigned frameFooterSize() = 0; virtual int returnAddressOffset() = 0; virtual int framePointerOffset() = 0; - virtual void nextFrame(void** stack, void** base) = 0; virtual void plan (UnaryOperation op, @@ -385,17 +398,19 @@ class Assembler { virtual Architecture* arch() = 0; - virtual void saveFrame(unsigned stackOffset, unsigned baseOffset) = 0; + virtual void saveFrame(unsigned stackOffset) = 0; virtual void pushFrame(unsigned argumentCount, ...) = 0; virtual void allocateFrame(unsigned footprint) = 0; - virtual void adjustFrame(unsigned footprint) = 0; - virtual void popFrame() = 0; + virtual void adjustFrame(unsigned difference) = 0; + virtual void popFrame(unsigned footprint) = 0; virtual void popFrameForTailCall(unsigned footprint, int offset, int returnAddressSurrogate, int framePointerSurrogate) = 0; - virtual void popFrameAndPopArgumentsAndReturn(unsigned argumentFootprint) + virtual void popFrameAndPopArgumentsAndReturn(unsigned frameFootprint, + unsigned argumentFootprint) = 0; - virtual void popFrameAndUpdateStackAndReturn(unsigned stackOffsetFromThread) + virtual void popFrameAndUpdateStackAndReturn(unsigned frameFootprint, + unsigned stackOffsetFromThread) = 0; virtual void apply(Operation op) = 0; @@ -422,6 +437,10 @@ class Assembler { virtual unsigned length() = 0; + virtual unsigned frameSizeEventCount() = 0; + + virtual FrameSizeEvent* firstFrameSizeEvent() = 0; + virtual void dispose() = 0; }; diff --git a/src/common.h b/src/common.h index 57dea834ef..1514369d15 100644 --- a/src/common.h +++ b/src/common.h @@ -311,67 +311,98 @@ log(unsigned n) return r; } +template inline unsigned wordOf(unsigned i) { - return i / BitsPerWord; + return i / (sizeof(T) * 8); +} + +inline unsigned +wordOf(unsigned i) +{ + return wordOf(i); +} + +template +inline unsigned +bitOf(unsigned i) +{ + return i % (sizeof(T) * 8); } inline unsigned bitOf(unsigned i) { - return i % BitsPerWord; + return bitOf(i); +} + +template +inline unsigned +indexOf(unsigned word, unsigned bit) +{ + return (word * (sizeof(T) * 8)) + bit; } inline unsigned indexOf(unsigned word, unsigned bit) { - return (word * BitsPerWord) + bit; + return indexOf(word, bit); } +template inline void -markBit(uintptr_t* map, unsigned i) +markBit(T* map, unsigned i) { - map[wordOf(i)] |= static_cast(1) << bitOf(i); + map[wordOf(i)] |= static_cast(1) << bitOf(i); } +template inline void -clearBit(uintptr_t* map, unsigned i) +clearBit(T* map, unsigned i) { - map[wordOf(i)] &= ~(static_cast(1) << bitOf(i)); + map[wordOf(i)] &= ~(static_cast(1) << bitOf(i)); } +template inline unsigned -getBit(uintptr_t* map, unsigned i) +getBit(T* map, unsigned i) { - return (map[wordOf(i)] & (static_cast(1) << bitOf(i))) - >> bitOf(i); + return (map[wordOf(i)] & (static_cast(1) << bitOf(i))) + >> bitOf(i); } +// todo: the following (clearBits, setBits, and getBits) could be made +// more efficient by operating on a word at a time instead of a bit at +// a time: + +template inline void -clearBits(uintptr_t* map, unsigned bitsPerRecord, unsigned index) +clearBits(T* map, unsigned bitsPerRecord, unsigned index) { for (unsigned i = index, limit = index + bitsPerRecord; i < limit; ++i) { - clearBit(map, i); + clearBit(map, i); } } +template inline void -setBits(uintptr_t* map, unsigned bitsPerRecord, int index, unsigned v) +setBits(T* map, unsigned bitsPerRecord, int index, unsigned v) { for (int i = index + bitsPerRecord - 1; i >= index; --i) { - if (v & 1) markBit(map, i); else clearBit(map, i); + if (v & 1) markBit(map, i); else clearBit(map, i); v >>= 1; } } +template inline unsigned -getBits(uintptr_t* map, unsigned bitsPerRecord, unsigned index) +getBits(T* map, unsigned bitsPerRecord, unsigned index) { unsigned v = 0; for (unsigned i = index, limit = index + bitsPerRecord; i < limit; ++i) { v <<= 1; - v |= getBit(map, i); + v |= getBit(map, i); } return v; } diff --git a/src/compile-arm.S b/src/compile-arm.S index c61ebaff37..ed66b222a1 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -22,12 +22,12 @@ # define GLOBAL(x) x #endif -#define THREAD_STACK 2144 -#define THREAD_CONTINUATION 2148 +#define THREAD_STACK 2152 +#define THREAD_CONTINUATION 2156 #define THREAD_EXCEPTION 44 -#define THREAD_EXCEPTION_STACK_ADJUSTMENT 2152 -#define THREAD_EXCEPTION_OFFSET 2156 -#define THREAD_EXCEPTION_HANDLER 2160 +#define THREAD_EXCEPTION_STACK_ADJUSTMENT 2160 +#define THREAD_EXCEPTION_OFFSET 2164 +#define THREAD_EXCEPTION_HANDLER 2168 #define CONTINUATION_NEXT 4 #define CONTINUATION_ADDRESS 16 @@ -121,7 +121,7 @@ GLOBAL(vmInvoke_safeStack): mov r4,sp str r4,[sp,r7]! - add r7,r5,#CONTINUATION_BODY + add r7,r5,#CONTINUATION_BODY mov r11,#0 add r10,sp,#ARGUMENT_BASE diff --git a/src/compile-x86.S b/src/compile-x86.S index 4b79aa4e38..080b3ba5da 100644 --- a/src/compile-x86.S +++ b/src/compile-x86.S @@ -23,7 +23,8 @@ #ifdef __x86_64__ -#define THREAD_STACK 2232 +#define THREAD_STACK 2224 +#define THREAD_SCRATCH 2232 #if defined __MINGW32__ || defined __CYGWIN32__ @@ -41,22 +42,25 @@ GLOBAL(vmInvoke): // 48(%rbp) : frameSize // 56(%rbp) : returnType (ignored) - // allocate stack space, adding room for callee-saved registers - movl 48(%rbp),%eax - subq %rax,%rsp + // allocate stack space for callee-saved registers subq $CALLEE_SAVED_REGISTER_FOOTPRINT,%rsp + + // remember this stack position, since we won't be able to rely on + // %rbp being restored when the call returns + movq %rsp,THREAD_SCRATCH(%rcx) // save callee-saved registers - movq %rsp,%r11 - addq %rax,%r11 - - movq %rbx,0(%r11) - movq %r12,8(%r11) - movq %r13,16(%r11) - movq %r14,24(%r11) - movq %r15,32(%r11) - movq %rsi,40(%r11) - movq %rdi,48(%r11) + movq %rbx,0(%rsp) + movq %r12,8(%rsp) + movq %r13,16(%rsp) + movq %r14,24(%rsp) + movq %r15,32(%rsp) + movq %rsi,40(%rsp) + movq %rdi,48(%rsp) + + // allocate stack space for arguments + movl 48(%rbp),%eax + subq %rax,%rsp // we use rbx to hold the thread pointer, by convention mov %rcx,%rbx @@ -80,7 +84,7 @@ LOCAL(vmInvoke_argumentTest): .globl GLOBAL(vmInvoke_returnAddress) GLOBAL(vmInvoke_returnAddress): // restore stack pointer - movq %rbp,%rsp + movq THREAD_SCRATCH(%rbx),%rsp // clear MyThread::stack to avoid confusing another thread calling // java.lang.Thread.getStackTrace on this one. See @@ -96,19 +100,17 @@ GLOBAL(vmInvoke_safeStack): # include "continuations-x86.S" #endif // AVIAN_CONTINUATIONS - // restore callee-saved registers (below the stack pointer, but in - // the red zone) - movq %rsp,%r11 - subq $CALLEE_SAVED_REGISTER_FOOTPRINT,%r11 - - movq 0(%r11),%rbx - movq 8(%r11),%r12 - movq 16(%r11),%r13 - movq 24(%r11),%r14 - movq 32(%r11),%r15 - movq 40(%r11),%rsi - movq 48(%r11),%rdi + // restore callee-saved registers + movq 0(%rsp),%rbx + movq 8(%rsp),%r12 + movq 16(%rsp),%r13 + movq 24(%rsp),%r14 + movq 32(%rsp),%r15 + movq 40(%rsp),%rsi + movq 48(%rsp),%rdi + addq $CALLEE_SAVED_REGISTER_FOOTPRINT,%rsp + // return popq %rbp ret @@ -118,47 +120,39 @@ GLOBAL(vmJumpAndInvoke): #ifdef AVIAN_CONTINUATIONS // %rcx: thread // %rdx: address - // %r8 : base - // %r9 : (unused) - // 40(%rsp): argumentFootprint - // 48(%rsp): arguments - // 56(%rsp): frameSize - - movq %r8,%rbp + // %r8 : stack + // %r9 : argumentFootprint + // 40(%rsp): arguments + // 48(%rsp): frameSize - // restore (pseudo)-stack pointer (we don't want to touch the real - // stack pointer, since we haven't copied the arguments yet) - movq %rbp,%r9 - // allocate new frame, adding room for callee-saved registers - movl 56(%rsp),%eax - subq %rax,%r9 - subq $CALLEE_SAVED_REGISTER_FOOTPRINT,%r9 + movl 48(%rsp),%eax + subq %rax,%r8 + subq $CALLEE_SAVED_REGISTER_FOOTPRINT,%r8 movq %rcx,%rbx // set return address leaq GLOBAL(vmInvoke_returnAddress)(%rip),%r10 - movq %r10,(%r9) + movq %r10,(%r8) // copy arguments into place movq $0,%r11 - movl 48(%rsp),%r8d movl 40(%rsp),%eax jmp LOCAL(vmJumpAndInvoke_argumentTest) LOCAL(vmJumpAndInvoke_argumentLoop): - movq (%r8,%r11,1),%r10 - movq %r10,8(%r9,%r11,1) + movq (%rax,%r11,1),%r10 + movq %r10,8(%r8,%r11,1) addq $8,%r11 LOCAL(vmJumpAndInvoke_argumentTest): - cmpq %rax,%r11 + cmpq %9,%r11 jb LOCAL(vmJumpAndInvoke_argumentLoop) // the arguments have been copied, so we can set the real stack // pointer now - movq %r9,%rsp + movq %r8,%rsp jmp *%rdx #else // not AVIAN_CONTINUATIONS @@ -183,20 +177,23 @@ GLOBAL(vmInvoke): // %r8 : frameSize // %r9 : returnType (ignored) - // allocate stack space, adding room for callee-saved registers - subq %r8,%rsp + // allocate stack space for callee-saved registers subq $CALLEE_SAVED_REGISTER_FOOTPRINT,%rsp + // remember this stack position, since we won't be able to rely on + // %rbp being restored when the call returns + movq %rsp,THREAD_SCRATCH(%rdi) + // save callee-saved registers - movq %rsp,%r9 - addq %r8,%r9 - - movq %rbx,0(%r9) - movq %r12,8(%r9) - movq %r13,16(%r9) - movq %r14,24(%r9) - movq %r15,32(%r9) - + movq %rbx,0(%rsp) + movq %r12,8(%rsp) + movq %r13,16(%rsp) + movq %r14,24(%rsp) + movq %r15,32(%rsp) + + // allocate stack space for arguments + subq %r8,%rsp + // we use rbx to hold the thread pointer, by convention mov %rdi,%rbx @@ -219,7 +216,7 @@ LOCAL(vmInvoke_argumentTest): .globl GLOBAL(vmInvoke_returnAddress) GLOBAL(vmInvoke_returnAddress): // restore stack pointer - movq %rbp,%rsp + movq THREAD_SCRATCH(%rbx),%rsp // clear MyThread::stack to avoid confusing another thread calling // java.lang.Thread.getStackTrace on this one. See @@ -234,18 +231,16 @@ GLOBAL(vmInvoke_safeStack): #ifdef AVIAN_CONTINUATIONS # include "continuations-x86.S" #endif // AVIAN_CONTINUATIONS + + // restore callee-saved registers + movq 0(%rsp),%rbx + movq 8(%rsp),%r12 + movq 16(%rsp),%r13 + movq 24(%rsp),%r14 + movq 32(%rsp),%r15 + + addq $CALLEE_SAVED_REGISTER_FOOTPRINT,%rsp - // restore callee-saved registers (below the stack pointer, but in - // the red zone) - movq %rsp,%r9 - subq $CALLEE_SAVED_REGISTER_FOOTPRINT,%r9 - - movq 0(%r9),%rbx - movq 8(%r9),%r12 - movq 16(%r9),%r13 - movq 24(%r9),%r14 - movq 32(%r9),%r15 - // return popq %rbp ret @@ -255,45 +250,37 @@ GLOBAL(vmJumpAndInvoke): #ifdef AVIAN_CONTINUATIONS // %rdi: thread // %rsi: address - // %rdx: base - // %rcx: (unused) - // %r8 : argumentFootprint - // %r9 : arguments - // 8(%rsp): frameSize - - movq %rdx,%rbp - - // restore (pseudo)-stack pointer (we don't want to touch the real - // stack pointer, since we haven't copied the arguments yet) - movq %rbp,%rcx + // %rdx: stack + // %rcx: argumentFootprint + // %r8 : arguments + // %r9 : frameSize // allocate new frame, adding room for callee-saved registers - movl 8(%rsp),%eax - subq %rax,%rcx - subq $CALLEE_SAVED_REGISTER_FOOTPRINT,%rcx + subq %r9,%rdx + subq $CALLEE_SAVED_REGISTER_FOOTPRINT,%rdx movq %rdi,%rbx // set return address movq GLOBAL(vmInvoke_returnAddress)@GOTPCREL(%rip),%r10 - movq %r10,(%rcx) + movq %r10,(%rdx) // copy arguments into place movq $0,%r11 jmp LOCAL(vmJumpAndInvoke_argumentTest) LOCAL(vmJumpAndInvoke_argumentLoop): - movq (%r9,%r11,1),%r10 - movq %r10,8(%rcx,%r11,1) + movq (%r8,%r11,1),%r10 + movq %r10,8(%rdx,%r11,1) addq $8,%r11 LOCAL(vmJumpAndInvoke_argumentTest): - cmpq %r8,%r11 + cmpq %rcx,%r11 jb LOCAL(vmJumpAndInvoke_argumentLoop) // the arguments have been copied, so we can set the real stack // pointer now - movq %rcx,%rsp + movq %rdx,%rsp jmp *%rsi #else // not AVIAN_CONTINUATIONS @@ -306,7 +293,8 @@ LOCAL(vmJumpAndInvoke_argumentTest): #elif defined __i386__ -#define THREAD_STACK 2152 +#define THREAD_STACK 2148 +#define THREAD_SCRATCH 2152 #define CALLEE_SAVED_REGISTER_FOOTPRINT 16 @@ -321,21 +309,24 @@ GLOBAL(vmInvoke): // 20(%ebp): argumentFootprint // 24(%ebp): frameSize // 28(%ebp): returnType - - // allocate stack space, adding room for callee-saved registers - subl 24(%ebp),%esp + + // allocate stack space for callee-saved registers subl $CALLEE_SAVED_REGISTER_FOOTPRINT,%esp - // save callee-saved registers - movl %esp,%ecx - addl 24(%ebp),%ecx + // remember this stack position, since we won't be able to rely on + // %rbp being restored when the call returns + movl 8(%ebp),%eax + movl %esp,THREAD_SCRATCH(%eax) - movl %ebx,0(%ecx) - movl %esi,4(%ecx) - movl %edi,8(%ecx) + movl %ebx,0(%esp) + movl %esi,4(%esp) + movl %edi,8(%esp) + + // allocate stack space for arguments + subl 24(%ebp),%esp // we use ebx to hold the thread pointer, by convention - mov 8(%ebp),%ebx + mov %eax,%ebx // copy arguments into place movl $0,%ecx @@ -356,11 +347,8 @@ LOCAL(vmInvoke_argumentTest): .globl GLOBAL(vmInvoke_returnAddress) GLOBAL(vmInvoke_returnAddress): - // restore stack pointer, preserving the area containing saved - // registers - movl %ebp,%ecx - subl $CALLEE_SAVED_REGISTER_FOOTPRINT,%ecx - movl %ecx,%esp + // restore stack pointer + movl THREAD_SCRATCH(%ebx),%esp // clear MyThread::stack to avoid confusing another thread calling // java.lang.Thread.getStackTrace on this one. See @@ -380,11 +368,11 @@ GLOBAL(vmInvoke_safeStack): movl 0(%esp),%ebx movl 4(%esp),%esi movl 8(%esp),%edi - - // handle return value based on expected type - movl 28(%ebp),%ecx addl $CALLEE_SAVED_REGISTER_FOOTPRINT,%esp + + // handle return value based on expected type + movl 28(%esp),%ecx LOCAL(vmInvoke_void): cmpl $VOID_TYPE,%ecx @@ -412,20 +400,15 @@ GLOBAL(vmJumpAndInvoke): #ifdef AVIAN_CONTINUATIONS // 4(%esp): thread // 8(%esp): address - // 12(%esp): base - // 16(%esp): (unused) - // 20(%esp): argumentFootprint - // 24(%esp): arguments - // 28(%esp): frameSize + // 12(%esp): stack + // 16(%esp): argumentFootprint + // 20(%esp): arguments + // 24(%esp): frameSize - movl 12(%esp),%ebp - - // restore (pseudo)-stack pointer (we don't want to touch the real - // stack pointer, since we haven't copied the arguments yet) - movl %ebp,%ecx + movl 12(%esp),%ecx // allocate new frame, adding room for callee-saved registers - subl 28(%esp),%ecx + subl 24(%esp),%ecx subl $CALLEE_SAVED_REGISTER_FOOTPRINT,%ecx movl 4(%esp),%ebx @@ -447,8 +430,8 @@ LOCAL(vmJumpAndInvoke_offset): // copy arguments into place movl $0,%esi - movl 20(%esp),%edx - movl 24(%esp),%eax + movl 16(%esp),%edx + movl 20(%esp),%eax jmp LOCAL(vmJumpAndInvoke_argumentTest) LOCAL(vmJumpAndInvoke_argumentLoop): diff --git a/src/compile.cpp b/src/compile.cpp index 6069a0dc13..42791ca8b4 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -29,18 +29,15 @@ extern "C" void vmInvoke_safeStack(); extern "C" void -vmJumpAndInvoke(void* thread, void* function, void* base, void* stack, +vmJumpAndInvoke(void* thread, void* function, void* stack, unsigned argumentFootprint, uintptr_t* arguments, unsigned frameSize); -extern "C" void -vmCall(); - namespace { namespace local { -const bool DebugCompile = false; +const bool DebugCompile = true; const bool DebugNatives = false; const bool DebugCallTable = false; const bool DebugMethodTree = false; @@ -90,26 +87,28 @@ class MyThread: public Thread { public: CallTrace(MyThread* t, object method): t(t), - base(t->base), stack(t->stack), + scratch(t->scratch), continuation(t->continuation), nativeMethod((methodFlags(t, method) & ACC_NATIVE) ? method : 0), targetMethod(0), originalMethod(method), next(t->trace) { - doTransition(t, 0, 0, 0, 0, this); + doTransition(t, 0, 0, 0, this); } ~CallTrace() { assert(t, t->stack == 0); - doTransition(t, 0, stack, base, continuation, next); + t->scratch = scratch; + + doTransition(t, 0, stack, continuation, next); } MyThread* t; - void* base; void* stack; + void* scratch; object continuation; object nativeMethod; object targetMethod; @@ -132,11 +131,10 @@ class MyThread: public Thread { Context* context; }; - Context(MyThread* t, void* ip, void* stack, void* base, - object continuation, CallTrace* trace): + Context(MyThread* t, void* ip, void* stack, object continuation, + CallTrace* trace): ip(ip), stack(stack), - base(base), continuation(continuation), trace(trace), protector(t, this) @@ -144,7 +142,6 @@ class MyThread: public Thread { void* ip; void* stack; - void* base; object continuation; CallTrace* trace; MyProtector protector; @@ -152,9 +149,9 @@ class MyThread: public Thread { class TraceContext: public Context { public: - TraceContext(MyThread* t, void* ip, void* stack, void* base, - object continuation, CallTrace* trace): - Context(t, ip, stack, base, continuation, trace), + TraceContext(MyThread* t, void* ip, void* stack, object continuation, + CallTrace* trace): + Context(t, ip, stack, continuation, trace), t(t), next(t->traceContext) { @@ -162,7 +159,7 @@ class MyThread: public Thread { } TraceContext(MyThread* t): - Context(t, t->ip, t->stack, t->base, t->continuation, t->trace), + Context(t, t->ip, t->stack, t->continuation, t->trace), t(t), next(t->traceContext) { @@ -177,7 +174,7 @@ class MyThread: public Thread { TraceContext* next; }; - static void doTransition(MyThread* t, void* ip, void* stack, void* base, + static void doTransition(MyThread* t, void* ip, void* stack, object continuation, MyThread::CallTrace* trace) { // in this function, we "atomically" update the thread context @@ -187,7 +184,7 @@ class MyThread: public Thread { assert(t, t->transition == 0); - Context c(t, ip, stack, base, continuation, trace); + Context c(t, ip, stack, continuation, trace); compileTimeMemoryBarrier(); @@ -196,7 +193,6 @@ class MyThread: public Thread { compileTimeMemoryBarrier(); t->ip = ip; - t->base = base; t->stack = stack; t->continuation = continuation; t->trace = trace; @@ -210,8 +206,8 @@ class MyThread: public Thread { bool useNativeFeatures): Thread(m, javaThread, parent), ip(0), - base(0), stack(0), + scratch(0), continuation(0), exceptionStackAdjustment(0), exceptionOffset(0), @@ -232,8 +228,8 @@ class MyThread: public Thread { } void* ip; - void* base; void* stack; + void* scratch; object continuation; uintptr_t exceptionStackAdjustment; uintptr_t exceptionOffset; @@ -250,10 +246,10 @@ class MyThread: public Thread { }; void -transition(MyThread* t, void* ip, void* stack, void* base, object continuation, +transition(MyThread* t, void* ip, void* stack, object continuation, MyThread::CallTrace* trace) { - MyThread::doTransition(t, ip, stack, base, continuation, trace); + MyThread::doTransition(t, ip, stack, continuation, trace); } unsigned @@ -372,6 +368,104 @@ methodForIp(MyThread* t, void* ip) root(t, MethodTreeSentinal), compareIpToMethodBounds); } +unsigned +localSize(MyThread* t, object method) +{ + unsigned size = codeMaxLocals(t, methodCode(t, method)); + if ((methodFlags(t, method) & (ACC_SYNCHRONIZED | ACC_STATIC)) + == ACC_SYNCHRONIZED) + { + ++ size; + } + return size; +} + +unsigned +alignedFrameSize(MyThread* t, object method) +{ + return t->arch->alignFrameSize + (localSize(t, method) + - methodParameterFootprint(t, method) + + codeMaxStack(t, methodCode(t, method)) + + t->arch->frameFootprint(MaxNativeCallFootprint)); +} + +unsigned +bitsNeeded(unsigned v) +{ + return log(v + 1); +} + +void +setTableValue(Thread* t, object table, unsigned base, unsigned max, + unsigned index, unsigned value) +{ + unsigned bits = bitsNeeded(max); + setBits + (&intArrayBody(t, table, base), bits, index * bits, value); +} + +unsigned +getTableValue(Thread* t, object table, unsigned base, unsigned max, + unsigned index) +{ + unsigned bits = bitsNeeded(max); + return getBits + (&intArrayBody(t, table, base), bits, index * bits); +} + +unsigned +frameSize(MyThread* t, intptr_t ip, object method) +{ + object code = methodCode(t, method); + object table = codeFrameSizeTable(t, code); + unsigned count = intArrayBody(t, table, 0); + unsigned max = alignedFrameSize(t, method); + intptr_t start = codeCompiled(t, code); + int codeSize = compiledSize(start); + unsigned indexSize = ceiling(count * bitsNeeded(codeSize), 32); + + assert(t, ip >= start); + assert(t, ip <= start + codeSize); + + unsigned offset = ip - start; + unsigned bottom = 0; + unsigned top = count; + for (unsigned span = top - bottom; span; span = top - bottom) { + unsigned middle = bottom + (span / 2); + unsigned candidate = getTableValue(t, table, 1, codeSize, middle); + + if (offset >= candidate + and (middle + 1 == count + or offset < getTableValue(t, table, 1, codeSize, middle + 1))) + { + return getTableValue(t, table, 1 + indexSize, max, middle); + } else if (offset < candidate) { + top = middle; + } else if (offset > candidate) { + bottom = middle + 1; + } + } + + if (top == 0) { + return 0; + } else if (top < count) { + return getTableValue(t, table, 1 + indexSize, max, top); + } else if (top == count && count > 0) { + return getTableValue(t, table, 1 + indexSize, max, top - 1); + } else { + abort(t); + } +} + +void* +nextFrame(MyThread* t, void* ip, void* sp, object method) +{ + return reinterpret_cast(sp) + local::frameSize + (t, reinterpret_cast(ip), method) + + t->arch->frameReturnAddressSize(); +} + class MyStackWalker: public Processor::StackWalker { public: enum State { @@ -406,13 +500,11 @@ class MyStackWalker: public Processor::StackWalker { { if (t->traceContext) { ip_ = t->traceContext->ip; - base = t->traceContext->base; stack = t->traceContext->stack; trace = t->traceContext->trace; continuation = t->traceContext->continuation; } else { ip_ = 0; - base = t->base; stack = t->stack; trace = t->trace; continuation = t->continuation; @@ -423,7 +515,6 @@ class MyStackWalker: public Processor::StackWalker { t(w->t), state(w->state), ip_(w->ip_), - base(w->base), stack(w->stack), trace(w->trace), method_(w->method_), @@ -478,7 +569,6 @@ class MyStackWalker: public Processor::StackWalker { if (trace) { continuation = trace->continuation; stack = trace->stack; - base = trace->base; ip_ = t->arch->frameIp(stack); trace = trace->next; @@ -509,7 +599,7 @@ class MyStackWalker: public Processor::StackWalker { break; case Method: - t->arch->nextFrame(&stack, &base); + stack = nextFrame(t, ip_, stack, method_); ip_ = t->arch->frameIp(stack); break; @@ -561,7 +651,6 @@ class MyStackWalker: public Processor::StackWalker { MyThread* t; State state; void* ip_; - void* base; void* stack; MyThread::CallTrace* trace; object method_; @@ -569,28 +658,6 @@ class MyStackWalker: public Processor::StackWalker { MyProtector protector; }; -unsigned -localSize(MyThread* t, object method) -{ - unsigned size = codeMaxLocals(t, methodCode(t, method)); - if ((methodFlags(t, method) & (ACC_SYNCHRONIZED | ACC_STATIC)) - == ACC_SYNCHRONIZED) - { - ++ size; - } - return size; -} - -unsigned -alignedFrameSize(MyThread* t, object method) -{ - return t->arch->alignFrameSize - (localSize(t, method) - - methodParameterFootprint(t, method) - + codeMaxStack(t, methodCode(t, method)) - + t->arch->frameFootprint(MaxNativeCallFootprint)); -} - int localOffset(MyThread* t, int v, object method) { @@ -1929,22 +1996,19 @@ releaseLock(MyThread* t, object method, void* stack) } void -findUnwindTarget(MyThread* t, void** targetIp, void** targetBase, - void** targetStack, object* targetContinuation) +findUnwindTarget(MyThread* t, void** targetIp, void** targetStack, + object* targetContinuation) { void* ip; - void* base; void* stack; object continuation; if (t->traceContext) { ip = t->traceContext->ip; - base = t->traceContext->base; stack = t->traceContext->stack; continuation = t->traceContext->continuation; } else { ip = 0; - base = t->base; stack = t->stack; continuation = t->continuation; } @@ -1963,9 +2027,8 @@ findUnwindTarget(MyThread* t, void** targetIp, void** targetBase, if (handler) { *targetIp = handler; - *targetBase = base; - t->arch->nextFrame(&stack, &base); + stack = nextFrame(t, ip, stack, method); void** sp = static_cast(stackForFrame(t, stack, method)) + t->arch->frameReturnAddressSize(); @@ -1977,7 +2040,7 @@ findUnwindTarget(MyThread* t, void** targetIp, void** targetBase, t->exception = 0; } else { - t->arch->nextFrame(&stack, &base); + stack = nextFrame(t, ip, stack, method); ip = t->arch->frameIp(stack); if (t->exception) { @@ -1988,7 +2051,6 @@ findUnwindTarget(MyThread* t, void** targetIp, void** targetBase, } } else { *targetIp = ip; - *targetBase = base; *targetStack = static_cast(stack) + t->arch->frameReturnAddressSize(); *targetContinuation = continuation; @@ -2029,11 +2091,9 @@ findUnwindTarget(MyThread* t, void** targetIp, void** targetBase, } object -makeCurrentContinuation(MyThread* t, void** targetIp, void** targetBase, - void** targetStack) +makeCurrentContinuation(MyThread* t, void** targetIp, void** targetStack) { void* ip = t->arch->frameIp(t->stack); - void* base = t->base; void* stack = t->stack; object context = t->continuation @@ -2066,7 +2126,7 @@ makeCurrentContinuation(MyThread* t, void** targetIp, void** targetBase, top += argumentFootprint - alignment; } - t->arch->nextFrame(&stack, &base); + stack = nextFrame(t, ip, stack, method); void** bottom = static_cast(stack) + t->arch->frameReturnAddressSize(); @@ -2101,7 +2161,6 @@ makeCurrentContinuation(MyThread* t, void** targetIp, void** targetBase, target = method; } else { *targetIp = ip; - *targetBase = base; *targetStack = static_cast(stack) + t->arch->frameReturnAddressSize(); } @@ -2117,14 +2176,13 @@ void NO_RETURN unwind(MyThread* t) { void* ip; - void* base; void* stack; object continuation; - findUnwindTarget(t, &ip, &base, &stack, &continuation); + findUnwindTarget(t, &ip, &stack, &continuation); - transition(t, ip, stack, base, continuation, t->trace); + transition(t, ip, stack, continuation, t->trace); - vmJump(ip, base, stack, t, 0, 0); + vmJump(ip, stack, t, 0, 0); } class MyCheckpoint: public Thread::Checkpoint { @@ -5165,6 +5223,45 @@ translateLineNumberTable(MyThread* t, Compiler* c, object code, intptr_t start) } } +object +makeFrameSizeTable(MyThread* t, Context* c, unsigned codeSize) +{ + Assembler* a = c->assembler; + unsigned count = a->frameSizeEventCount(); + int max = alignedFrameSize(t, c->method); + unsigned indexSize = ceiling(count * bitsNeeded(codeSize), 32); + unsigned tableSize = ceiling(count * bitsNeeded(max), 32); + object table = makeIntArray(t, 1 + indexSize + tableSize); + + intArrayBody(t, table, 0) = count; + + unsigned index = 0; + int value = 0; + for (Assembler::FrameSizeEvent* e = a->firstFrameSizeEvent(); + e; e = e->next()) + { + assert(t, index < count); + + unsigned offset = e->offset(); + assert(t, offset <= codeSize); + + value += e->change(); + + fprintf(stderr, "offset %d change %d value %d\n", + offset, e->change(), value); + + assert(t, value >= 0); + assert(t, value <= max); + + setTableValue(t, table, 1, codeSize, index, offset); + setTableValue(t, table, 1 + indexSize, max, index, value); + + ++ index; + } + + return table; +} + void printSet(uintptr_t m, unsigned limit) { @@ -5842,18 +5939,23 @@ finish(MyThread* t, Allocator* allocator, Context* context) } } - object newExceptionHandlerTable = translateExceptionHandlerTable - (t, c, context->method, reinterpret_cast(start)); + { object newExceptionHandlerTable = translateExceptionHandlerTable + (t, c, context->method, reinterpret_cast(start)); - PROTECT(t, newExceptionHandlerTable); + PROTECT(t, newExceptionHandlerTable); - object newLineNumberTable = translateLineNumberTable - (t, c, methodCode(t, context->method), reinterpret_cast(start)); + object newLineNumberTable = translateLineNumberTable + (t, c, methodCode(t, context->method), + reinterpret_cast(start)); - { object code = methodCode(t, context->method); + PROTECT(t, newLineNumberTable); + + object frameSizeTable = makeFrameSizeTable(t, context, codeSize); + + object code = methodCode(t, context->method); code = makeCode - (t, 0, newExceptionHandlerTable, newLineNumberTable, + (t, 0, newExceptionHandlerTable, newLineNumberTable, frameSizeTable, reinterpret_cast(start), codeMaxStack(t, code), codeMaxLocals(t, code), 0); @@ -6423,8 +6525,7 @@ invokeNative(MyThread* t) stack += t->arch->frameReturnAddressSize(); - transition(t, t->arch->frameIp(t->stack), stack, t->base, t->continuation, - t->trace); + transition(t, t->arch->frameIp(t->stack), stack, t->continuation, t->trace); return result; } @@ -6595,7 +6696,6 @@ void visitStack(MyThread* t, Heap::Visitor* v) { void* ip = t->arch->frameIp(t->stack); - void* base = t->base; void* stack = t->stack; MyThread::CallTrace* trace = t->trace; @@ -6611,14 +6711,13 @@ visitStack(MyThread* t, Heap::Visitor* v) if (method) { PROTECT(t, method); - t->arch->nextFrame(&stack, &base); + stack = nextFrame(t, ip, stack, method); visitStackAndLocals(t, v, stack, method, ip); ip = t->arch->frameIp(stack); } else if (trace) { stack = trace->stack; - base = trace->base; ip = t->arch->frameIp(stack); trace = trace->next; @@ -6671,24 +6770,24 @@ walkContinuationBody(MyThread* t, Heap::Walker* w, object c, int start) void callContinuation(MyThread* t, object continuation, object result, - object exception, void* ip, void* base, void* stack) + object exception, void* ip, void* stack) { assert(t, t->exception == 0); if (exception) { t->exception = exception; - MyThread::TraceContext c(t, ip, stack, base, continuation, t->trace); + MyThread::TraceContext c(t, ip, stack, continuation, t->trace); - findUnwindTarget(t, &ip, &base, &stack, &continuation); + findUnwindTarget(t, &ip, &stack, &continuation); } t->trace->nativeMethod = 0; t->trace->targetMethod = 0; - transition(t, ip, stack, base, continuation, t->trace); + transition(t, ip, stack, continuation, t->trace); - vmJump(ip, base, stack, t, reinterpret_cast(result), 0); + vmJump(ip, stack, t, reinterpret_cast(result), 0); } int8_t* @@ -6746,7 +6845,7 @@ compatibleReturnType(MyThread* t, object oldMethod, object newMethod) } void -jumpAndInvoke(MyThread* t, object method, void* base, void* stack, ...) +jumpAndInvoke(MyThread* t, object method, void* stack, ...) { t->trace->targetMethod = 0; @@ -6766,7 +6865,6 @@ jumpAndInvoke(MyThread* t, object method, void* base, void* stack, ...) vmJumpAndInvoke (t, reinterpret_cast(methodAddress(t, method)), - base, stack, argumentCount * BytesPerWord, RUNTIME_ARRAY_BODY(arguments), @@ -6858,25 +6956,24 @@ callContinuation(MyThread* t, object continuation, object result, } void* ip; - void* base; void* stack; object threadContinuation; - findUnwindTarget(t, &ip, &base, &stack, &threadContinuation); + findUnwindTarget(t, &ip, &stack, &threadContinuation); switch (action) { case Call: { - callContinuation(t, continuation, result, exception, ip, base, stack); + callContinuation(t, continuation, result, exception, ip, stack); } break; case Unwind: { - callContinuation(t, nextContinuation, result, 0, ip, base, stack); + callContinuation(t, nextContinuation, result, 0, ip, stack); } break; case Rewind: { - transition(t, 0, 0, 0, nextContinuation, t->trace); + transition(t, 0, 0, nextContinuation, t->trace); jumpAndInvoke - (t, root(t, RewindMethod), base, stack, + (t, root(t, RewindMethod), stack, continuationContextBefore(t, continuationContext(t, nextContinuation)), continuation, result, exception); } break; @@ -6891,7 +6988,6 @@ callWithCurrentContinuation(MyThread* t, object receiver) { object method = 0; void* ip = 0; - void* base = 0; void* stack = 0; { PROTECT(t, receiver); @@ -6920,17 +7016,16 @@ callWithCurrentContinuation(MyThread* t, object receiver) compile(t, local::codeAllocator(t), 0, method); - t->continuation = makeCurrentContinuation(t, &ip, &base, &stack); + t->continuation = makeCurrentContinuation(t, &ip, &stack); } - jumpAndInvoke(t, method, base, stack, receiver, t->continuation); + jumpAndInvoke(t, method, stack, receiver, t->continuation); } void dynamicWind(MyThread* t, object before, object thunk, object after) { void* ip = 0; - void* base = 0; void* stack = 0; { PROTECT(t, before); @@ -6949,7 +7044,7 @@ dynamicWind(MyThread* t, object before, object thunk, object after) } } - t->continuation = makeCurrentContinuation(t, &ip, &base, &stack); + t->continuation = makeCurrentContinuation(t, &ip, &stack); object newContext = makeContinuationContext (t, continuationContext(t, t->continuation), before, after, @@ -6958,7 +7053,7 @@ dynamicWind(MyThread* t, object before, object thunk, object after) set(t, t->continuation, ContinuationContext, newContext); } - jumpAndInvoke(t, root(t, WindMethod), base, stack, before, thunk, after); + jumpAndInvoke(t, root(t, WindMethod), stack, before, thunk, after); } class ArgumentList { @@ -7168,9 +7263,7 @@ class SignalHandler: public System::SignalHandler { SignalHandler(Machine::Type type, Machine::Root root, unsigned fixedSize): m(0), type(type), root(root), fixedSize(fixedSize) { } - virtual bool handleSignal(void** ip, void** base, void** stack, - void** thread) - { + virtual bool handleSignal(void** ip, void** stack, void** thread) { MyThread* t = static_cast(m->localThread->get()); if (t and t->state == Thread::ActiveState) { object node = methodForIp(t, *ip); @@ -7180,7 +7273,7 @@ class SignalHandler: public System::SignalHandler { MyThread::TraceContext context (t, static_cast(*ip) + 1, static_cast(*stack) - t->arch->frameReturnAddressSize(), - *base, t->continuation, t->trace); + t->continuation, t->trace); if (ensure(t, fixedSize + traceSize(t))) { atomicOr(&(t->flags), Thread::TracingFlag); @@ -7195,9 +7288,9 @@ class SignalHandler: public System::SignalHandler { // printTrace(t, t->exception); object continuation; - findUnwindTarget(t, ip, base, stack, &continuation); + findUnwindTarget(t, ip, stack, &continuation); - transition(t, ip, stack, base, continuation, t->trace); + transition(t, ip, stack, continuation, t->trace); *thread = t; @@ -7291,6 +7384,7 @@ class MyProcessor: public Processor { if (false) { fprintf(stderr, "%d\n", difference(&(t->stack), t)); + fprintf(stderr, "%d\n", difference(&(t->scratch), t)); fprintf(stderr, "%d\n", difference(&(t->continuation), t)); fprintf(stderr, "%d\n", difference(&(t->exception), t)); fprintf(stderr, "%d\n", difference(&(t->exceptionStackAdjustment), t)); @@ -7571,18 +7665,16 @@ class MyProcessor: public Processor { t(t), p(p), target(target), trace(0) { } - virtual void visit(void* ip, void* base, void* stack) { + virtual void visit(void* ip, void* stack) { MyThread::TraceContext c(target); if (methodForIp(t, ip)) { // we caught the thread in Java code - use the register values c.ip = ip; - c.base = base; c.stack = stack; } else if (target->transition) { // we caught the thread in native code while in the middle - // of updating the context fields (MyThread::stack, - // MyThread::base, etc.) + // of updating the context fields (MyThread::stack, etc.) static_cast(c) = *(target->transition); } else if (isVmInvokeUnsafeStack(ip)) { // we caught the thread in native code just after returning @@ -7591,31 +7683,26 @@ class MyProcessor: public Processor { // Java frame, if any, can be found in // MyThread::continuation or MyThread::trace c.ip = 0; - c.base = 0; c.stack = 0; } else if (target->stack and (not isThunkUnsafeStack(t, ip)) and (not isVirtualThunk(t, ip))) { // we caught the thread in a thunk or native code, and the - // saved stack and base pointers indicate the most recent - // Java frame on the stack + // saved stack pointer indicates the most recent Java frame + // on the stack c.ip = t->arch->frameIp(target->stack); - c.base = target->base; c.stack = target->stack; } else if (isThunk(t, ip) or isVirtualThunk(t, ip)) { - // we caught the thread in a thunk where the stack and base - // registers indicate the most recent Java frame on the - // stack + // we caught the thread in a thunk where the stack register + // indicates the most recent Java frame on the stack c.ip = t->arch->frameIp(stack); - c.base = base; c.stack = stack; } else { // we caught the thread in native code, and the most recent // Java frame, if any, can be found in // MyThread::continuation or MyThread::trace c.ip = 0; - c.base = 0; c.stack = 0; } @@ -8316,7 +8403,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) { Assembler* a = defaultContext.context.assembler; - a->saveFrame(difference(&(t->stack), t), difference(&(t->base), t)); + a->saveFrame(difference(&(t->stack), t)); p->thunks.default_.frameSavedOffset = a->length(); @@ -8326,7 +8413,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) Assembler::Constant proc(&(defaultContext.promise)); a->apply(LongCall, BytesPerWord, ConstantOperand, &proc); - a->popFrame(); + a->popFrame(t->arch->alignFrameSize(1)); Assembler::Register result(t->arch->returnLow()); a->apply(Jump, BytesPerWord, RegisterOperand, &result); @@ -8360,7 +8447,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) a->apply(Move, BytesPerWord, RegisterOperand, &index, BytesPerWord, MemoryOperand, &virtualCallIndex); - a->saveFrame(difference(&(t->stack), t), difference(&(t->base), t)); + a->saveFrame(difference(&(t->stack), t)); p->thunks.defaultVirtual.frameSavedOffset = a->length(); @@ -8370,7 +8457,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) Assembler::Constant proc(&(defaultVirtualContext.promise)); a->apply(LongCall, BytesPerWord, ConstantOperand, &proc); - a->popFrame(); + a->popFrame(t->arch->alignFrameSize(1)); Assembler::Register result(t->arch->returnLow()); a->apply(Jump, BytesPerWord, RegisterOperand, &result); @@ -8382,7 +8469,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) { Assembler* a = nativeContext.context.assembler; - a->saveFrame(difference(&(t->stack), t), difference(&(t->base), t)); + a->saveFrame(difference(&(t->stack), t)); p->thunks.native.frameSavedOffset = a->length(); @@ -8392,7 +8479,8 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) Assembler::Constant proc(&(nativeContext.promise)); a->apply(LongCall, BytesPerWord, ConstantOperand, &proc); - a->popFrameAndUpdateStackAndReturn(difference(&(t->stack), t)); + a->popFrameAndUpdateStackAndReturn + (t->arch->alignFrameSize(1), difference(&(t->stack), t)); p->thunks.native.length = a->endBlock(false)->resolve(0, 0); } @@ -8401,7 +8489,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) { Assembler* a = aioobContext.context.assembler; - a->saveFrame(difference(&(t->stack), t), difference(&(t->base), t)); + a->saveFrame(difference(&(t->stack), t)); p->thunks.aioob.frameSavedOffset = a->length(); @@ -8418,7 +8506,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) { Assembler* a = stackOverflowContext.context.assembler; - a->saveFrame(difference(&(t->stack), t), difference(&(t->base), t)); + a->saveFrame(difference(&(t->stack), t)); p->thunks.stackOverflow.frameSavedOffset = a->length(); @@ -8435,7 +8523,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) { Assembler* a = tableContext.context.assembler; - a->saveFrame(difference(&(t->stack), t), difference(&(t->base), t)); + a->saveFrame(difference(&(t->stack), t)); p->thunks.table.frameSavedOffset = a->length(); diff --git a/src/compiler.cpp b/src/compiler.cpp index e25dfafb50..65b5437cad 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -3147,12 +3147,25 @@ class CallEvent: public Event { assert(c, stackArgumentFootprint == 0); Stack* s = argumentStack; - unsigned frameIndex = 0; unsigned index = 0; + unsigned argumentIndex = 0; while (true) { + unsigned footprint; + if (argumentIndex + 1 < argumentCount + and s->value->nextWord == s->next->value) + { + footprint = 2; + } else { + footprint = 1; + } + + if (footprint > 1 and index & 1 and c->arch->argumentAlignment()) { + ++ index; + } + SiteMask targetMask; - if (index < c->arch->argumentRegisterCount()) { + if (index + footprint <= c->arch->argumentRegisterCount()) { int number = c->arch->argumentRegister(index); if (DebugReads) { @@ -3162,17 +3175,24 @@ class CallEvent: public Event { targetMask = fixedRegisterMask(number); registerMask &= ~(1 << number); } else { + if (index < c->arch->argumentRegisterCount()) { + index = c->arch->argumentRegisterCount(); + } + + unsigned frameIndex = index - c->arch->argumentRegisterCount(); + if (DebugReads) { fprintf(stderr, "stack %d arg read %p\n", frameIndex, s->value); } targetMask = SiteMask(1 << MemoryOperand, 0, frameIndex); - ++ frameIndex; } addRead(c, this, s->value, targetMask); - if ((++ index) < argumentCount) { + ++ index; + + if ((++ argumentIndex) < argumentCount) { s = s->next; } else { break; @@ -3225,7 +3245,11 @@ class CallEvent: public Event { int base = frameBase(c); returnAddressIndex = base + c->arch->returnAddressOffset(); - framePointerIndex = base + c->arch->framePointerOffset(); + if (UseFramePointer) { + framePointerIndex = base + c->arch->framePointerOffset(); + } else { + framePointerIndex = -1; + } frameOffset = totalFrameSize(c) - c->arch->argumentFootprint(stackArgumentFootprint); @@ -3451,7 +3475,8 @@ class ReturnEvent: public Event { if (not unreachable(this)) { c->assembler->popFrameAndPopArgumentsAndReturn - (c->arch->argumentFootprint(c->parameterFootprint)); + (c->alignedFrameSize, + c->arch->argumentFootprint(c->parameterFootprint)); } } @@ -6042,7 +6067,8 @@ class MyCompiler: public Compiler { unsigned base = frameBase(&c); c.frameResources[base + c.arch->returnAddressOffset()].reserved = true; - c.frameResources[base + c.arch->framePointerOffset()].reserved = true; + c.frameResources[base + c.arch->framePointerOffset()].reserved + = UseFramePointer; // leave room for logical instruction -1 unsigned codeSize = sizeof(LogicalInstruction*) * (logicalCodeLength + 1); diff --git a/src/jnienv.cpp b/src/jnienv.cpp index 5a80e8d216..027025b1e3 100644 --- a/src/jnienv.cpp +++ b/src/jnienv.cpp @@ -561,7 +561,7 @@ NewObjectV(Thread* t, jclass c, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(c), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return reinterpret_cast(run(t, newObjectV, arguments)); } @@ -597,7 +597,7 @@ CallObjectMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return reinterpret_cast(run(t, callObjectMethodV, arguments)); } @@ -631,7 +631,7 @@ CallBooleanMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return run(t, callIntMethodV, arguments) != 0; } @@ -654,7 +654,7 @@ CallByteMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return run(t, callIntMethodV, arguments); } @@ -677,7 +677,7 @@ CallCharMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return run(t, callIntMethodV, arguments); } @@ -700,7 +700,7 @@ CallShortMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return run(t, callIntMethodV, arguments); } @@ -723,7 +723,7 @@ CallIntMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return run(t, callIntMethodV, arguments); } @@ -757,7 +757,7 @@ CallLongMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return run(t, callLongMethodV, arguments); } @@ -780,7 +780,7 @@ CallFloatMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return bitsToFloat(run(t, callIntMethodV, arguments)); } @@ -803,7 +803,7 @@ CallDoubleMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; return bitsToDouble(run(t, callLongMethodV, arguments)); } @@ -839,7 +839,7 @@ CallVoidMethodV(Thread* t, jobject o, jmethodID m, va_list a) { uintptr_t arguments[] = { reinterpret_cast(o), m, - reinterpret_cast(a) }; + reinterpret_cast(VA_LIST(a)) }; run(t, callVoidMethodV, arguments); } @@ -879,7 +879,7 @@ callStaticObjectMethodV(Thread* t, uintptr_t* arguments) jobject JNICALL CallStaticObjectMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; return reinterpret_cast(run(t, callStaticObjectMethodV, arguments)); } @@ -910,7 +910,7 @@ callStaticIntMethodV(Thread* t, uintptr_t* arguments) jboolean JNICALL CallStaticBooleanMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; return run(t, callStaticIntMethodV, arguments) != 0; } @@ -931,7 +931,7 @@ CallStaticBooleanMethod(Thread* t, jclass c, jmethodID m, ...) jbyte JNICALL CallStaticByteMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; return run(t, callStaticIntMethodV, arguments); } @@ -952,7 +952,7 @@ CallStaticByteMethod(Thread* t, jclass c, jmethodID m, ...) jchar JNICALL CallStaticCharMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; return run(t, callStaticIntMethodV, arguments); } @@ -973,7 +973,7 @@ CallStaticCharMethod(Thread* t, jclass c, jmethodID m, ...) jshort JNICALL CallStaticShortMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; return run(t, callStaticIntMethodV, arguments); } @@ -994,7 +994,7 @@ CallStaticShortMethod(Thread* t, jclass c, jmethodID m, ...) jint JNICALL CallStaticIntMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; return run(t, callStaticIntMethodV, arguments); } @@ -1025,7 +1025,7 @@ callStaticLongMethodV(Thread* t, uintptr_t* arguments) jlong JNICALL CallStaticLongMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; return run(t, callStaticLongMethodV, arguments); } @@ -1046,7 +1046,7 @@ CallStaticLongMethod(Thread* t, jclass c, jmethodID m, ...) jfloat JNICALL CallStaticFloatMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; return bitsToFloat(run(t, callStaticIntMethodV, arguments)); } @@ -1067,7 +1067,7 @@ CallStaticFloatMethod(Thread* t, jclass c, jmethodID m, ...) jdouble JNICALL CallStaticDoubleMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; return bitsToDouble(run(t, callStaticLongMethodV, arguments)); } @@ -1099,7 +1099,7 @@ callStaticVoidMethodV(Thread* t, uintptr_t* arguments) void JNICALL CallStaticVoidMethodV(Thread* t, jclass, jmethodID m, va_list a) { - uintptr_t arguments[] = { m, reinterpret_cast(a) }; + uintptr_t arguments[] = { m, reinterpret_cast(VA_LIST(a)) }; run(t, callStaticVoidMethodV, arguments); } diff --git a/src/machine.cpp b/src/machine.cpp index 6a93f39fb2..9a8116d165 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -1227,7 +1227,7 @@ parseCode(Thread* t, Stream& s, object pool) unsigned maxLocals = s.read2(); unsigned length = s.read4(); - object code = makeCode(t, pool, 0, 0, 0, maxStack, maxLocals, length); + object code = makeCode(t, pool, 0, 0, 0, 0, maxStack, maxLocals, length); s.read(&codeBody(t, code, 0), length); PROTECT(t, code); @@ -2056,7 +2056,7 @@ boot(Thread* t) m->processor->boot(t, 0); - { object bootCode = makeCode(t, 0, 0, 0, 0, 0, 0, 1); + { object bootCode = makeCode(t, 0, 0, 0, 0, 0, 0, 0, 1); codeBody(t, bootCode, 0) = impdep1; object bootMethod = makeMethod (t, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, bootCode); diff --git a/src/machine.h b/src/machine.h index 187dc46e21..f721277b44 100644 --- a/src/machine.h +++ b/src/machine.h @@ -1465,19 +1465,17 @@ class Thread { public: RunCheckpoint(Thread* t): Checkpoint(t), - stack(0), - base(0) + stack(0) { } virtual void unwind() { void* stack = this->stack; this->stack = 0; expect(t->m->system, stack); - vmJump(voidPointer(vmRun_returnAddress), base, stack, t, 0, 0); + vmJump(voidPointer(vmRun_returnAddress), stack, t, 0, 0); } void* stack; - void* base; }; class Runnable: public System::Runnable { diff --git a/src/posix.cpp b/src/posix.cpp index 4439feea01..590d8e65ca 100644 --- a/src/posix.cpp +++ b/src/posix.cpp @@ -815,7 +815,7 @@ class MySystem: public System { } class NullSignalHandler: public SignalHandler { - virtual bool handleSignal(void**, void**, void**, void**) { return false; } + virtual bool handleSignal(void**, void**, void**) { return false; } } nullHandler; SignalHandler* handlers[SignalCount]; @@ -831,12 +831,7 @@ handleSignal(int signal, siginfo_t* info, void* context) { ucontext_t* c = static_cast(context); -#ifndef BASE_REGISTER -# define BASE_REGISTER(x) 0 -#endif - void* ip = reinterpret_cast(IP_REGISTER(c)); - void* base = reinterpret_cast(BASE_REGISTER(c)); void* stack = reinterpret_cast(STACK_REGISTER(c)); void* thread = reinterpret_cast(THREAD_REGISTER(c)); @@ -846,7 +841,7 @@ handleSignal(int signal, siginfo_t* info, void* context) case VisitSignal: { index = VisitSignalIndex; - system->threadVisitor->visit(ip, base, stack); + system->threadVisitor->visit(ip, stack); System::Thread* t = system->visitTarget; system->visitTarget = 0; @@ -875,8 +870,7 @@ handleSignal(int signal, siginfo_t* info, void* context) abort(); } - bool jump = system->handlers[index]->handleSignal - (&ip, &base, &stack, &thread); + bool jump = system->handlers[index]->handleSignal(&ip, &stack, &thread); if (jump) { // I'd like to use setcontext here (and get rid of the @@ -890,7 +884,7 @@ handleSignal(int signal, siginfo_t* info, void* context) sigaddset(&set, signal); sigprocmask(SIG_UNBLOCK, &set, 0); - vmJump(ip, base, stack, thread, 0, 0); + vmJump(ip, stack, thread, 0, 0); } } break; diff --git a/src/system.h b/src/system.h index 315f7151d2..8cf08ea88a 100644 --- a/src/system.h +++ b/src/system.h @@ -35,7 +35,7 @@ class System { class ThreadVisitor { public: - virtual void visit(void* ip, void* base, void* stack) = 0; + virtual void visit(void* ip, void* stack) = 0; }; class Runnable { @@ -96,8 +96,7 @@ class System { class SignalHandler { public: - virtual bool handleSignal(void** ip, void** base, void** stack, - void** thread) = 0; + virtual bool handleSignal(void** ip, void** stack, void** thread) = 0; }; class MonitorResource { diff --git a/src/types.def b/src/types.def index 8b8d980ffc..7509c431a9 100644 --- a/src/types.def +++ b/src/types.def @@ -86,6 +86,7 @@ (object pool) (object exceptionHandlerTable) (object lineNumberTable) + (object frameSizeTable) (intptr_t compiled) (uint16_t maxStack) (uint16_t maxLocals) diff --git a/src/x86.S b/src/x86.S index c7c6e5c504..08e05e4aaf 100644 --- a/src/x86.S +++ b/src/x86.S @@ -25,7 +25,6 @@ #define CHECKPOINT_THREAD 8 #define CHECKPOINT_STACK 48 -#define CHECKPOINT_BASE 56 #ifdef __MINGW32__ .globl GLOBAL(detectFeature) @@ -171,11 +170,10 @@ LOCAL(exit): .globl GLOBAL(vmJump) GLOBAL(vmJump): - movq %rdx,%rbp - movq 40(%rsp),%rax - movq 48(%rsp),%rdx - movq %r8,%rsp - movq %r9,%rbx + movq %r9,%rax + movq 40(%rsp),%rdx + movq %rdx,%rsp + movq %r8,%rbx jmp *%rcx #define VMRUN_FRAME_SIZE 80 @@ -198,7 +196,6 @@ GLOBAL(vmRun): movq %rdi,64(%rsp) movq %rsp,CHECKPOINT_STACK(%rcx) - movq %rbp,CHECKPOINT_BASE(%rcx) movq %rcx,%r11 movq CHECKPOINT_THREAD(%rdx),%rcx @@ -353,11 +350,10 @@ LOCAL(exit): .globl GLOBAL(vmJump) GLOBAL(vmJump): - movq %rsi,%rbp - movq %rdx,%rsp - movq %rcx,%rbx - movq %r8,%rax - movq %r9,%rdx + movq %rsi,%rsp + movq %rdx,%rbx + movq %rcx,%rax + movq %r8,%rdx jmp *%rdi #define VMRUN_FRAME_SIZE 64 @@ -378,7 +374,6 @@ GLOBAL(vmRun): movq %r15,48(%rsp) movq %rsp,CHECKPOINT_STACK(%rdx) - movq %rbp,CHECKPOINT_BASE(%rdx) movq %rdi,%r11 movq CHECKPOINT_THREAD(%rdx),%rdi @@ -513,11 +508,10 @@ LOCAL(exit): .globl GLOBAL(vmJump) GLOBAL(vmJump): movl 4(%esp),%esi - movl 8(%esp),%ebp - movl 16(%esp),%ebx - movl 20(%esp),%eax - movl 24(%esp),%edx - movl 12(%esp),%esp + movl 12(%esp),%ebx + movl 16(%esp),%eax + movl 20(%esp),%edx + movl 8(%esp),%esp jmp *%esi #define VMRUN_FRAME_SIZE 32 @@ -543,7 +537,6 @@ GLOBAL(vmRun): movl %eax,0(%esp) movl %esp,CHECKPOINT_STACK(%ecx) - movl %ebp,CHECKPOINT_BASE(%ecx) call *8(%ebp) diff --git a/src/x86.cpp b/src/x86.cpp index 4c11e8adfe..676c011da8 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -65,13 +65,15 @@ const unsigned GeneralRegisterMask const unsigned FloatRegisterMask = BytesPerWord == 4 ? 0x00ff0000 : 0xffff0000; -const unsigned FrameHeaderSize = 2; +const unsigned FrameHeaderSize = (UseFramePointer ? 2 : 1); const int LongJumpRegister = r10; const unsigned StackAlignmentInBytes = 16; const unsigned StackAlignmentInWords = StackAlignmentInBytes / BytesPerWord; +const int FrameSizePoison = -2147483647; + bool isInt8(intptr_t v) { @@ -86,16 +88,26 @@ isInt32(intptr_t v) class Task; class AlignmentPadding; +class MyFrameSizeEvent; unsigned padding(AlignmentPadding* p, unsigned index, unsigned offset, AlignmentPadding* limit); +class Context; +class MyBlock; + +void +appendFrameSizeEvent(Context* c, MyBlock* b, Promise* offset, int change); + +ResolvedPromise* + resolved(Context* c, int64_t value); + class MyBlock: public Assembler::Block { public: MyBlock(unsigned offset): - next(0), firstPadding(0), lastPadding(0), offset(offset), start(~0), - size(0) + next(0), firstPadding(0), lastPadding(0), firstFrameSizeEvent(0), + lastFrameSizeEvent(0), offset(offset), start(~0), size(0) { } virtual unsigned resolve(unsigned start, Assembler::Block* next) { @@ -108,13 +120,13 @@ class MyBlock: public Assembler::Block { MyBlock* next; AlignmentPadding* firstPadding; AlignmentPadding* lastPadding; + MyFrameSizeEvent* firstFrameSizeEvent; + MyFrameSizeEvent* lastFrameSizeEvent; unsigned offset; unsigned start; unsigned size; }; -class Context; - typedef void (*OperationType)(Context*); typedef void (*UnaryOperationType)(Context*, unsigned, Assembler::Operand*); @@ -152,7 +164,8 @@ class Context { Context(System* s, Allocator* a, Zone* zone, ArchitectureContext* ac): s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0), firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(0)), - lastBlock(firstBlock), ac(ac) + lastBlock(firstBlock), firstFrameSizeEvent(0), lastFrameSizeEvent(0), + ac(ac), frameSizeEventCount(0) { } System* s; @@ -163,7 +176,10 @@ class Context { uint8_t* result; MyBlock* firstBlock; MyBlock* lastBlock; + MyFrameSizeEvent* firstFrameSizeEvent; + MyFrameSizeEvent* lastFrameSizeEvent; ArchitectureContext* ac; + unsigned frameSizeEventCount; }; void NO_RETURN @@ -450,6 +466,54 @@ padding(AlignmentPadding* p, unsigned start, unsigned offset, return padding; } +class MyFrameSizeEvent: public Assembler::FrameSizeEvent { + public: + MyFrameSizeEvent(Context* c, Promise* offset, int change): + c(c), next_(0), offset_(offset), change_(change) + { } + + virtual unsigned offset() { + return offset_->value(); + } + + virtual int change() { + expect(c, change_ != FrameSizePoison); + + return change_; + } + + virtual Assembler::FrameSizeEvent* next() { + return next_; + } + + Context* c; + MyFrameSizeEvent* next_; + Promise* offset_; + int change_; +}; + +void +appendFrameSizeEvent(Context* c, MyBlock* b, Promise* offset, int change) +{ + MyFrameSizeEvent* e = new (c->zone->allocate(sizeof(MyFrameSizeEvent))) + MyFrameSizeEvent(c, offset, change); + + if (b->firstFrameSizeEvent) { + b->lastFrameSizeEvent->next_ = e; + } else { + b->firstFrameSizeEvent = e; + } + b->lastFrameSizeEvent = e; + + ++ c->frameSizeEventCount; +} + +void +appendFrameSizeEvent(Context* c, int change) +{ + appendFrameSizeEvent(c, c->lastBlock, offset(c), change); +} + extern "C" bool detectFeature(unsigned ecx, unsigned edx); @@ -881,22 +945,6 @@ popR(Context* c, unsigned size, Assembler::Register* a) } } -void -popM(Context* c, unsigned size, Assembler::Memory* a) -{ - if (BytesPerWord == 4 and size == 8) { - Assembler::Memory ah(a->base, a->offset + 4, a->index, a->scale); - - popM(c, 4, a); - popM(c, 4, &ah); - } else { - assert(c, BytesPerWord == 4 or size == 8); - - opcode(c, 0x8f); - modrmSibImm(c, 0, a->scale, a->index, a->base, a->offset); - } -} - void addCarryCR(Context* c, unsigned size, Assembler::Constant* a, Assembler::Register* b); @@ -2701,6 +2749,8 @@ class MyArchitecture: public Assembler::Architecture { virtual bool reserved(int register_) { switch (register_) { case rbp: + return UseFramePointer; + case rsp: case rbx: return true; @@ -2724,6 +2774,10 @@ class MyArchitecture: public Assembler::Architecture { return max(pad(footprint, StackAlignmentInWords), StackAlignmentInWords); } + virtual bool argumentAlignment() { + return false; + } + virtual unsigned argumentRegisterCount() { #ifdef PLATFORM_WINDOWS if (BytesPerWord == 8) return 4; else @@ -2893,13 +2947,6 @@ class MyArchitecture: public Assembler::Architecture { return -1; } - virtual void nextFrame(void** stack, void** base) { - assert(&c, *static_cast(*base) != *base); - - *stack = static_cast(*base) + 1; - *base = *static_cast(*base); - } - virtual void plan (UnaryOperation, unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask, @@ -3288,16 +3335,11 @@ class MyAssembler: public Assembler { return arch_; } - virtual void saveFrame(unsigned stackOffset, unsigned baseOffset) { + virtual void saveFrame(unsigned stackOffset) { Register stack(rsp); Memory stackDst(rbx, stackOffset); apply(Move, BytesPerWord, RegisterOperand, &stack, BytesPerWord, MemoryOperand, &stackDst); - - Register base(rbp); - Memory baseDst(rbx, baseOffset); - apply(Move, BytesPerWord, RegisterOperand, &base, - BytesPerWord, MemoryOperand, &baseDst); } virtual void pushFrame(unsigned argumentCount, ...) { @@ -3347,37 +3389,62 @@ class MyAssembler: public Assembler { } virtual void allocateFrame(unsigned footprint) { - Register base(rbp); - pushR(&c, BytesPerWord, &base); - Register stack(rsp); - apply(Move, BytesPerWord, RegisterOperand, &stack, - BytesPerWord, RegisterOperand, &base); + + if (UseFramePointer) { + Register base(rbp); + pushR(&c, BytesPerWord, &base); + + apply(Move, BytesPerWord, RegisterOperand, &stack, + BytesPerWord, RegisterOperand, &base); + + appendFrameSizeEvent(&c, 1); + } Constant footprintConstant(resolved(&c, footprint * BytesPerWord)); apply(Subtract, BytesPerWord, ConstantOperand, &footprintConstant, BytesPerWord, RegisterOperand, &stack, BytesPerWord, RegisterOperand, &stack); + + appendFrameSizeEvent(&c, footprint); } - virtual void adjustFrame(unsigned footprint) { + virtual void adjustFrame(unsigned difference) { + appendFrameSizeEvent(&c, - difference); + Register stack(rsp); - Constant footprintConstant(resolved(&c, footprint * BytesPerWord)); - apply(Subtract, BytesPerWord, ConstantOperand, &footprintConstant, + Constant differenceConstant(resolved(&c, difference * BytesPerWord)); + apply(Subtract, BytesPerWord, ConstantOperand, &differenceConstant, BytesPerWord, RegisterOperand, &stack, BytesPerWord, RegisterOperand, &stack); + + appendFrameSizeEvent(&c, difference); } - virtual void popFrame() { - Register base(rbp); - Register stack(rsp); - apply(Move, BytesPerWord, RegisterOperand, &base, - BytesPerWord, RegisterOperand, &stack); + virtual void popFrame(unsigned frameFootprint) { + if (UseFramePointer) { + Register base(rbp); + Register stack(rsp); + apply(Move, BytesPerWord, RegisterOperand, &base, + BytesPerWord, RegisterOperand, &stack); - popR(&c, BytesPerWord, &base); + appendFrameSizeEvent(&c, - frameFootprint); + + popR(&c, BytesPerWord, &base); + + appendFrameSizeEvent(&c, - 1); + } else { + Register stack(rsp); + Constant footprint(resolved(&c, frameFootprint * BytesPerWord)); + apply(Add, BytesPerWord, ConstantOperand, &footprint, + BytesPerWord, RegisterOperand, &stack, + BytesPerWord, RegisterOperand, &stack); + + appendFrameSizeEvent(&c, - frameFootprint); + } } - virtual void popFrameForTailCall(unsigned footprint, + virtual void popFrameForTailCall(unsigned frameFootprint, int offset, int returnAddressSurrogate, int framePointerSurrogate) @@ -3386,22 +3453,27 @@ class MyAssembler: public Assembler { if (offset) { Register tmp(c.client->acquireTemporary()); - Memory returnAddressSrc(rsp, (footprint + 1) * BytesPerWord); + Memory returnAddressSrc(rsp, (frameFootprint + 1) * BytesPerWord); moveMR(&c, BytesPerWord, &returnAddressSrc, BytesPerWord, &tmp); - Memory returnAddressDst(rsp, (footprint - offset + 1) * BytesPerWord); + Memory returnAddressDst + (rsp, (frameFootprint - offset + 1) * BytesPerWord); moveRM(&c, BytesPerWord, &tmp, BytesPerWord, &returnAddressDst); c.client->releaseTemporary(tmp.low); - Memory baseSrc(rsp, footprint * BytesPerWord); - Register base(rbp); - moveMR(&c, BytesPerWord, &baseSrc, BytesPerWord, &base); + if (UseFramePointer) { + Memory baseSrc(rsp, frameFootprint * BytesPerWord); + Register base(rbp); + moveMR(&c, BytesPerWord, &baseSrc, BytesPerWord, &base); + } Register stack(rsp); - Constant footprintConstant - (resolved(&c, (footprint - offset + 1) * BytesPerWord)); - addCR(&c, BytesPerWord, &footprintConstant, BytesPerWord, &stack); + Constant footprint + (resolved(&c, (frameFootprint - offset + 1) * BytesPerWord)); + addCR(&c, BytesPerWord, &footprint, BytesPerWord, &stack); + + appendFrameSizeEvent(&c, - (frameFootprint - offset + 1)); if (returnAddressSurrogate != NoRegister) { assert(&c, offset > 0); @@ -3419,15 +3491,17 @@ class MyAssembler: public Assembler { moveRM(&c, BytesPerWord, &fps, BytesPerWord, &dst); } } else { - popFrame(); + popFrame(frameFootprint); } } else { abort(&c); } } - virtual void popFrameAndPopArgumentsAndReturn(unsigned argumentFootprint) { - popFrame(); + virtual void popFrameAndPopArgumentsAndReturn(unsigned frameFootprint, + unsigned argumentFootprint) + { + popFrame(frameFootprint); assert(&c, argumentFootprint >= StackAlignmentInWords); assert(&c, (argumentFootprint % StackAlignmentInWords) == 0); @@ -3442,23 +3516,36 @@ class MyAssembler: public Assembler { * BytesPerWord)); addCR(&c, BytesPerWord, &adjustment, BytesPerWord, &stack); + appendFrameSizeEvent(&c, - (argumentFootprint - StackAlignmentInWords)); + jumpR(&c, BytesPerWord, &returnAddress); } else { return_(&c); } + + // todo: this is not necessary if there are no instructions to + // follow: + appendFrameSizeEvent(&c, frameFootprint); } - virtual void popFrameAndUpdateStackAndReturn(unsigned stackOffsetFromThread) + virtual void popFrameAndUpdateStackAndReturn(unsigned frameFootprint, + unsigned stackOffsetFromThread) { - popFrame(); + popFrame(frameFootprint); Register returnAddress(rcx); popR(&c, BytesPerWord, &returnAddress); + appendFrameSizeEvent(&c, -1); + Register stack(rsp); Memory stackSrc(rbx, stackOffsetFromThread); moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &stack); + // we can't statically determine the frame size at this point, so + // we poison any attempt to query for it: + appendFrameSizeEvent(&c, FrameSizePoison); + jumpR(&c, BytesPerWord, &returnAddress); } @@ -3507,6 +3594,15 @@ class MyAssembler: public Assembler { c.result = dst; for (MyBlock* b = c.firstBlock; b; b = b->next) { + if (b->firstFrameSizeEvent) { + if (c.firstFrameSizeEvent) { + c.lastFrameSizeEvent->next_ = b->firstFrameSizeEvent; + } else { + c.firstFrameSizeEvent = b->firstFrameSizeEvent; + } + c.lastFrameSizeEvent = b->lastFrameSizeEvent; + } + unsigned index = 0; unsigned padding = 0; for (AlignmentPadding* p = b->firstPadding; p; p = p->next) { @@ -3560,6 +3656,14 @@ class MyAssembler: public Assembler { return c.code.length(); } + virtual unsigned frameSizeEventCount() { + return c.frameSizeEventCount; + } + + virtual FrameSizeEvent* firstFrameSizeEvent() { + return c.firstFrameSizeEvent; + } + virtual void dispose() { c.code.dispose(); } diff --git a/src/x86.h b/src/x86.h index a125e4425c..3073d1f46c 100644 --- a/src/x86.h +++ b/src/x86.h @@ -22,6 +22,8 @@ # undef interface #endif +#define VA_LIST(x) x + #ifdef ARCH_x86_32 # ifdef __APPLE__