From c9026a6053d015f6b37af3dbfa6a66ec6f80e417 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 30 Dec 2014 15:30:04 -0700 Subject: [PATCH] add continuations support for ARM64 Also, replace some preprocessor conditionals with C++ conditionals and add some todo comments and sample code for future work towards better ABI compatibility in the JIT compiled code. --- src/arm64.S | 2 + src/codegen/target/arm/assembler.cpp | 142 +++++++++++++++++---------- src/codegen/target/arm/fixup.cpp | 10 +- src/codegen/target/arm/fixup.h | 6 +- src/codegen/target/arm/registers.h | 3 +- src/compile-arm.S | 4 +- src/compile-arm64.S | 110 +++++++++++++++++++-- src/compile.cpp | 2 + 8 files changed, 204 insertions(+), 75 deletions(-) diff --git a/src/arm64.S b/src/arm64.S index 6953ea0cf6..b5ce9a5000 100644 --- a/src/arm64.S +++ b/src/arm64.S @@ -35,6 +35,7 @@ GLOBAL(vmNativeCall): // allocate frame stp x29, x30, [sp,#-64]! + mov x29, sp // save callee-saved register values so we can clobber them stp x19, x20, [sp,#16] @@ -118,6 +119,7 @@ GLOBAL(vmRun): // allocate frame stp x29, x30, [sp,#-96]! + mov x29, sp // save callee-saved register values stp x19, x20, [sp,#16] diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index cb9f871f7e..3130662073 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -232,6 +232,7 @@ class MyArchitecture : public Architecture { { switch (register_.index()) { case LinkRegister.index(): + case FrameRegister.index(): case StackRegister.index(): case ThreadRegister.index(): case ProgramCounter.index(): @@ -320,13 +321,13 @@ class MyArchitecture : public Architecture { case lir::AlignedLongCall: case lir::AlignedLongJump: { uint32_t* p = static_cast(returnAddress) - 2; -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 - const int32_t mask = (PoolOffsetMask >> 2) << 5; - *reinterpret_cast(p + ((*p & mask) >> 5)) = newTarget; -#else - *reinterpret_cast(p + (((*p & PoolOffsetMask) + 8) / 4)) - = newTarget; -#endif + if (TargetBytesPerWord == 8) { + const int32_t mask = (PoolOffsetMask >> 2) << 5; + *reinterpret_cast(p + ((*p & mask) >> 5)) = newTarget; + } else { + *reinterpret_cast(p + (((*p & PoolOffsetMask) + 8) / 4)) + = newTarget; + } } break; default: @@ -769,24 +770,45 @@ class MyAssembler : public Assembler { // how to handle them: assertT(&con, footprint < 256); - // todo: ARM64 frame allocation should be of the form: - // stp x29, x30, [sp,#size]! - // and deallocation should be of the form: - // ldp x29, x30, [sp],#size + // todo: the ARM ABI says the frame preamble should be of the form + // + // stp x29, x30, [sp,#-footprint]! + // mov x29, sp + // + // and the frame should be popped with e.g. + // + // ldp x29, x30, [sp],#footprint + // br x30 + // + // However, that will invalidate a lot of assumptions elsewhere + // about the return address being stored at the opposite end of + // the frame, so lots of other code will need to change before we + // can do that. The code below can be enabled as a starting point + // when we're ready to tackle that. + if (false and TargetBytesPerWord == 8) { + // stp x29, x30, [sp,#-footprint]! + con.code.append4(0xa9800000 | ((-footprint & 0x7f) << 15) + | (StackRegister.index() << 5) + | (LinkRegister.index() << 10) | FrameRegister.index()); - lir::RegisterPair stack(StackRegister); - ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); - lir::Constant footprintConstant(&footprintPromise); - subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + lir::RegisterPair stack(StackRegister); + lir::RegisterPair frame(FrameRegister); + moveRR(&con, TargetBytesPerWord, &stack, TargetBytesPerWord, &frame); + } else { + lir::RegisterPair stack(StackRegister); + ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); + lir::Constant footprintConstant(&footprintPromise); + subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); - lir::RegisterPair returnAddress(LinkRegister); - lir::Memory returnAddressDst(StackRegister, - (footprint - 1) * TargetBytesPerWord); - moveRM(&con, - TargetBytesPerWord, - &returnAddress, - TargetBytesPerWord, - &returnAddressDst); + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressDst(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveRM(&con, + TargetBytesPerWord, + &returnAddress, + TargetBytesPerWord, + &returnAddressDst); + } } virtual void adjustFrame(unsigned difference) @@ -801,19 +823,26 @@ class MyAssembler : public Assembler { { footprint += FrameHeaderSize; - lir::RegisterPair returnAddress(LinkRegister); - lir::Memory returnAddressSrc(StackRegister, - (footprint - 1) * TargetBytesPerWord); - moveMR(&con, - TargetBytesPerWord, - &returnAddressSrc, - TargetBytesPerWord, - &returnAddress); + // see comment regarding the ARM64 ABI in allocateFrame + if (false and TargetBytesPerWord == 8) { + // ldp x29, x30, [sp],#footprint + con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10) + | 29); + } else { + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressSrc(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveMR(&con, + TargetBytesPerWord, + &returnAddressSrc, + TargetBytesPerWord, + &returnAddress); - lir::RegisterPair stack(StackRegister); - ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); - lir::Constant footprintConstant(&footprintPromise); - addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + lir::RegisterPair stack(StackRegister); + ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); + lir::Constant footprintConstant(&footprintPromise); + addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + } } virtual void popFrameForTailCall(unsigned footprint, @@ -885,14 +914,21 @@ class MyAssembler : public Assembler { { footprint += FrameHeaderSize; - lir::RegisterPair returnAddress(LinkRegister); - lir::Memory returnAddressSrc(StackRegister, - (footprint - 1) * TargetBytesPerWord); - moveMR(&con, - TargetBytesPerWord, - &returnAddressSrc, - TargetBytesPerWord, - &returnAddress); + // see comment regarding the ARM64 ABI in allocateFrame + if (false and TargetBytesPerWord == 8) { + // ldp x29, x30, [sp],#footprint + con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10) + | 29); + } else { + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressSrc(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveMR(&con, + TargetBytesPerWord, + &returnAddressSrc, + TargetBytesPerWord, + &returnAddress); + } lir::RegisterPair stack(StackRegister); lir::Memory newStackSrc(ThreadRegister, stackOffsetFromThread); @@ -986,18 +1022,18 @@ class MyAssembler : public Assembler { int32_t* p = reinterpret_cast(dst + instruction); -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 - int32_t v = entry - instruction; - expect(&con, v == (v & PoolOffsetMask)); + if (TargetBytesPerWord == 8) { + int32_t v = entry - instruction; + expect(&con, v == (v & PoolOffsetMask)); - const int32_t mask = (PoolOffsetMask >> 2) << 5; - *p = (((v >> 2) << 5) & mask) | ((~mask) & *p); -#else - int32_t v = (entry - 8) - instruction; - expect(&con, v == (v & PoolOffsetMask)); + const int32_t mask = (PoolOffsetMask >> 2) << 5; + *p = (((v >> 2) << 5) & mask) | ((~mask) & *p); + } else { + int32_t v = (entry - 8) - instruction; + expect(&con, v == (v & PoolOffsetMask)); - *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); -#endif + *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); + } poolSize += TargetBytesPerWord; } diff --git a/src/codegen/target/arm/fixup.cpp b/src/codegen/target/arm/fixup.cpp index 4413a399db..3117688b15 100644 --- a/src/codegen/target/arm/fixup.cpp +++ b/src/codegen/target/arm/fixup.cpp @@ -99,9 +99,9 @@ void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) { int32_t* p = reinterpret_cast(instruction); -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 int32_t v; int32_t mask; + if (vm::TargetBytesPerWord == 8) { if ((*p >> 24) == 0x54) { // conditional branch v = ((reinterpret_cast(value) - instruction) >> 2) << 5; @@ -111,10 +111,10 @@ void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) v = (reinterpret_cast(value) - instruction) >> 2; mask = 0x3FFFFFF; } -#else - int32_t v = (reinterpret_cast(value) - (instruction + 8)) >> 2; - const int32_t mask = 0xFFFFFF; -#endif + } else { + v = (reinterpret_cast(value) - (instruction + 8)) >> 2; + mask = 0xFFFFFF; + } expect(s, bounded(0, 8, v)); diff --git a/src/codegen/target/arm/fixup.h b/src/codegen/target/arm/fixup.h index 2e9c0aca01..cce2b59dce 100644 --- a/src/codegen/target/arm/fixup.h +++ b/src/codegen/target/arm/fixup.h @@ -27,11 +27,7 @@ namespace arm { const bool DebugPool = false; -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 -const int32_t PoolOffsetMask = 0x1FFFFF; -#else -const int32_t PoolOffsetMask = 0xFFF; -#endif +const int32_t PoolOffsetMask = vm::TargetBytesPerWord == 8 ? 0x1FFFFF : 0xFFF; class Task { public: diff --git a/src/codegen/target/arm/registers.h b/src/codegen/target/arm/registers.h index d439ddc8ba..3bf4dc4041 100644 --- a/src/codegen/target/arm/registers.h +++ b/src/codegen/target/arm/registers.h @@ -23,10 +23,11 @@ namespace arm { const uint64_t MASK_LO32 = 0xffffffff; const unsigned MASK_LO8 = 0xff; -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 +#if TARGET_BYTES_PER_WORD == 8 constexpr Register ThreadRegister(19); constexpr Register StackRegister(31); constexpr Register LinkRegister(30); +constexpr Register FrameRegister(29); constexpr Register ProgramCounter(0xFE); // i.e. unaddressable const int N_GPRS = 32; diff --git a/src/compile-arm.S b/src/compile-arm.S index 2f566f9558..83703af607 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -109,7 +109,7 @@ GLOBAL(vmInvoke_safeStack): ldr r6,[r5,#CONTINUATION_LENGTH] lsl r6,r6,#2 neg r7,r6 - add r7,r7,#-80 + add r7,r7,#-80 // 80 bytes for callee-saved register values mov r4,sp str r4,[sp,r7]! @@ -167,10 +167,10 @@ LOCAL(vmInvoke_handleException): bx r7 LOCAL(vmInvoke_exit): -#endif // AVIAN_CONTINUATIONS mov ip, #0 str ip, [r8, #TARGET_THREAD_STACK] +#endif // AVIAN_CONTINUATIONS // restore return type ldr ip, [sp], #4 diff --git a/src/compile-arm64.S b/src/compile-arm64.S index 62816ccf9f..c1c9c942b2 100644 --- a/src/compile-arm64.S +++ b/src/compile-arm64.S @@ -23,12 +23,12 @@ # define GLOBAL(x) x #endif -#define CONTINUATION_NEXT 4 -#define CONTINUATION_ADDRESS 16 -#define CONTINUATION_RETURN_ADDRESS_OFFSET 20 -#define CONTINUATION_FRAME_POINTER_OFFSET 24 -#define CONTINUATION_LENGTH 28 -#define CONTINUATION_BODY 32 +#define CONTINUATION_NEXT 8 +#define CONTINUATION_ADDRESS 32 +#define CONTINUATION_RETURN_ADDRESS_OFFSET 40 +#define CONTINUATION_FRAME_POINTER_OFFSET 48 +#define CONTINUATION_LENGTH 56 +#define CONTINUATION_BODY 64 .globl GLOBAL(vmInvoke) .align 2 @@ -43,6 +43,7 @@ GLOBAL(vmInvoke): // allocate frame stp x29, x30, [sp,#-96]! + mov x29, sp // save callee-saved register values stp x19, x20, [sp,#16] @@ -96,11 +97,65 @@ GLOBAL(vmInvoke_returnAddress): GLOBAL(vmInvoke_safeStack): #ifdef AVIAN_CONTINUATIONS -#error todo -#endif // AVIAN_CONTINUATIONS + // call the next continuation, if any + ldr x5, [x19,#TARGET_THREAD_CONTINUATION] + cmp x5, xzr + b.eq LOCAL(vmInvoke_exit) + ldr x6, [x5,#CONTINUATION_LENGTH] + lsl x6, x6, #3 + neg x7, x6 + add x7, x7, #-128 // 128 bytes for callee-saved register values + mov x4, sp + add sp, sp, x7 + str x4, [sp] + + add x7, x5, #CONTINUATION_BODY + mov x11, xzr + b LOCAL(vmInvoke_continuationTest) + +LOCAL(vmInvoke_continuationLoop): + ldr x9, [x7,x11] + str x9, [sp,x11] + add x11, x11, #8 + +LOCAL(vmInvoke_continuationTest): + cmp x11, x6 + b.le LOCAL(vmInvoke_continuationLoop) + + ldr x7, [x5,#CONTINUATION_RETURN_ADDRESS_OFFSET] + adr x11, GLOBAL(vmInvoke_returnAddress) + str x11, [sp,x7] + + ldr x7, [x5,#CONTINUATION_NEXT] + str x7, [x19,#TARGET_THREAD_CONTINUATION] + + // call the continuation unless we're handling an exception + ldr x7, [x19,#TARGET_THREAD_EXCEPTION] + cmp x7, xzr + b.ne LOCAL(vmInvoke_handleException) + ldr x7, [x5,#CONTINUATION_ADDRESS] + br x7 + +LOCAL(vmInvoke_handleException): + // we're handling an exception - call the exception handler instead + str xzr, [x19,#TARGET_THREAD_EXCEPTION] + ldr x11, [x19,#TARGET_THREAD_EXCEPTIONSTACKADJUSTMENT] + ldr x9, [sp] + neg x11, x11 + add sp, sp, x11 + str x9, [sp] + ldr x11, [x19,#TARGET_THREAD_EXCEPTIONOFFSET] + str x7, [sp,x11] + + ldr x7, [x19,#TARGET_THREAD_EXCEPTIONHANDLER] + br x7 + +LOCAL(vmInvoke_exit): str xzr, [x19, #TARGET_THREAD_STACK] +#endif // AVIAN_CONTINUATIONS + // restore return type ldr w5, [sp],#16 @@ -119,7 +174,44 @@ LOCAL(vmInvoke_return): .align 2 GLOBAL(vmJumpAndInvoke): #ifdef AVIAN_CONTINUATIONS -#error todo + // x0: thread + // x1: address + // x2: stack + // x3: argumentFootprint + // x4: arguments + // x5: frameSize + + // allocate new frame, adding room for callee-saved registers, plus + // 8 bytes of padding since the calculation of frameSize assumes 8 + // bytes have already been allocated to save the return address, + // which is not true in this case + sub x2, x2, x5 + sub x2, x2, #136 + + mov x19, x0 + + // copy arguments into place + mov x6, xzr + b LOCAL(vmJumpAndInvoke_argumentTest) + +LOCAL(vmJumpAndInvoke_argumentLoop): + ldr x12, [x4,x6] + str x12, [x2,x6] + add x6, x6, #4 + +LOCAL(vmJumpAndInvoke_argumentTest): + cmp x6, x3 + ble LOCAL(vmJumpAndInvoke_argumentLoop) + + // the arguments have been copied, so we can set the real stack + // pointer now + mov sp, x2 + + // set return address to vmInvoke_returnAddress + adr x30, GLOBAL(vmInvoke_returnAddress) + + br x1 + #else // not AVIAN_CONTINUATIONS // vmJumpAndInvoke should only be called when continuations are // enabled, so we force a crash if we reach here: diff --git a/src/compile.cpp b/src/compile.cpp index 47b55574e7..51790bb0b3 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -2189,6 +2189,8 @@ GcContinuation* makeCurrentContinuation(MyThread* t, *targetIp = 0; while (*targetIp == 0) { + assertT(t, ip); + GcMethod* method = methodForIp(t, ip); if (method) { PROTECT(t, method);