add continuations support for ARM64

Also, replace some preprocessor conditionals with C++ conditionals and
add some todo comments and sample code for future work towards better
ABI compatibility in the JIT compiled code.
This commit is contained in:
Joel Dice 2014-12-30 15:30:04 -07:00
parent e3ea60fc31
commit c9026a6053
8 changed files with 204 additions and 75 deletions

View File

@ -35,6 +35,7 @@ GLOBAL(vmNativeCall):
// allocate frame // allocate frame
stp x29, x30, [sp,#-64]! stp x29, x30, [sp,#-64]!
mov x29, sp
// save callee-saved register values so we can clobber them // save callee-saved register values so we can clobber them
stp x19, x20, [sp,#16] stp x19, x20, [sp,#16]
@ -118,6 +119,7 @@ GLOBAL(vmRun):
// allocate frame // allocate frame
stp x29, x30, [sp,#-96]! stp x29, x30, [sp,#-96]!
mov x29, sp
// save callee-saved register values // save callee-saved register values
stp x19, x20, [sp,#16] stp x19, x20, [sp,#16]

View File

@ -232,6 +232,7 @@ class MyArchitecture : public Architecture {
{ {
switch (register_.index()) { switch (register_.index()) {
case LinkRegister.index(): case LinkRegister.index():
case FrameRegister.index():
case StackRegister.index(): case StackRegister.index():
case ThreadRegister.index(): case ThreadRegister.index():
case ProgramCounter.index(): case ProgramCounter.index():
@ -320,13 +321,13 @@ class MyArchitecture : public Architecture {
case lir::AlignedLongCall: case lir::AlignedLongCall:
case lir::AlignedLongJump: { case lir::AlignedLongJump: {
uint32_t* p = static_cast<uint32_t*>(returnAddress) - 2; uint32_t* p = static_cast<uint32_t*>(returnAddress) - 2;
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 if (TargetBytesPerWord == 8) {
const int32_t mask = (PoolOffsetMask >> 2) << 5; const int32_t mask = (PoolOffsetMask >> 2) << 5;
*reinterpret_cast<void**>(p + ((*p & mask) >> 5)) = newTarget; *reinterpret_cast<void**>(p + ((*p & mask) >> 5)) = newTarget;
#else } else {
*reinterpret_cast<void**>(p + (((*p & PoolOffsetMask) + 8) / 4)) *reinterpret_cast<void**>(p + (((*p & PoolOffsetMask) + 8) / 4))
= newTarget; = newTarget;
#endif }
} break; } break;
default: default:
@ -769,11 +770,31 @@ class MyAssembler : public Assembler {
// how to handle them: // how to handle them:
assertT(&con, footprint < 256); assertT(&con, footprint < 256);
// todo: ARM64 frame allocation should be of the form: // todo: the ARM ABI says the frame preamble should be of the form
// stp x29, x30, [sp,#size]! //
// and deallocation should be of the form: // stp x29, x30, [sp,#-footprint]!
// ldp x29, x30, [sp],#size // mov x29, sp
//
// and the frame should be popped with e.g.
//
// ldp x29, x30, [sp],#footprint
// br x30
//
// However, that will invalidate a lot of assumptions elsewhere
// about the return address being stored at the opposite end of
// the frame, so lots of other code will need to change before we
// can do that. The code below can be enabled as a starting point
// when we're ready to tackle that.
if (false and TargetBytesPerWord == 8) {
// stp x29, x30, [sp,#-footprint]!
con.code.append4(0xa9800000 | ((-footprint & 0x7f) << 15)
| (StackRegister.index() << 5)
| (LinkRegister.index() << 10) | FrameRegister.index());
lir::RegisterPair stack(StackRegister);
lir::RegisterPair frame(FrameRegister);
moveRR(&con, TargetBytesPerWord, &stack, TargetBytesPerWord, &frame);
} else {
lir::RegisterPair stack(StackRegister); lir::RegisterPair stack(StackRegister);
ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); ResolvedPromise footprintPromise(footprint * TargetBytesPerWord);
lir::Constant footprintConstant(&footprintPromise); lir::Constant footprintConstant(&footprintPromise);
@ -788,6 +809,7 @@ class MyAssembler : public Assembler {
TargetBytesPerWord, TargetBytesPerWord,
&returnAddressDst); &returnAddressDst);
} }
}
virtual void adjustFrame(unsigned difference) virtual void adjustFrame(unsigned difference)
{ {
@ -801,6 +823,12 @@ class MyAssembler : public Assembler {
{ {
footprint += FrameHeaderSize; footprint += FrameHeaderSize;
// see comment regarding the ARM64 ABI in allocateFrame
if (false and TargetBytesPerWord == 8) {
// ldp x29, x30, [sp],#footprint
con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10)
| 29);
} else {
lir::RegisterPair returnAddress(LinkRegister); lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressSrc(StackRegister, lir::Memory returnAddressSrc(StackRegister,
(footprint - 1) * TargetBytesPerWord); (footprint - 1) * TargetBytesPerWord);
@ -815,6 +843,7 @@ class MyAssembler : public Assembler {
lir::Constant footprintConstant(&footprintPromise); lir::Constant footprintConstant(&footprintPromise);
addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack);
} }
}
virtual void popFrameForTailCall(unsigned footprint, virtual void popFrameForTailCall(unsigned footprint,
int offset, int offset,
@ -885,6 +914,12 @@ class MyAssembler : public Assembler {
{ {
footprint += FrameHeaderSize; footprint += FrameHeaderSize;
// see comment regarding the ARM64 ABI in allocateFrame
if (false and TargetBytesPerWord == 8) {
// ldp x29, x30, [sp],#footprint
con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10)
| 29);
} else {
lir::RegisterPair returnAddress(LinkRegister); lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressSrc(StackRegister, lir::Memory returnAddressSrc(StackRegister,
(footprint - 1) * TargetBytesPerWord); (footprint - 1) * TargetBytesPerWord);
@ -893,6 +928,7 @@ class MyAssembler : public Assembler {
&returnAddressSrc, &returnAddressSrc,
TargetBytesPerWord, TargetBytesPerWord,
&returnAddress); &returnAddress);
}
lir::RegisterPair stack(StackRegister); lir::RegisterPair stack(StackRegister);
lir::Memory newStackSrc(ThreadRegister, stackOffsetFromThread); lir::Memory newStackSrc(ThreadRegister, stackOffsetFromThread);
@ -986,18 +1022,18 @@ class MyAssembler : public Assembler {
int32_t* p = reinterpret_cast<int32_t*>(dst + instruction); int32_t* p = reinterpret_cast<int32_t*>(dst + instruction);
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 if (TargetBytesPerWord == 8) {
int32_t v = entry - instruction; int32_t v = entry - instruction;
expect(&con, v == (v & PoolOffsetMask)); expect(&con, v == (v & PoolOffsetMask));
const int32_t mask = (PoolOffsetMask >> 2) << 5; const int32_t mask = (PoolOffsetMask >> 2) << 5;
*p = (((v >> 2) << 5) & mask) | ((~mask) & *p); *p = (((v >> 2) << 5) & mask) | ((~mask) & *p);
#else } else {
int32_t v = (entry - 8) - instruction; int32_t v = (entry - 8) - instruction;
expect(&con, v == (v & PoolOffsetMask)); expect(&con, v == (v & PoolOffsetMask));
*p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p);
#endif }
poolSize += TargetBytesPerWord; poolSize += TargetBytesPerWord;
} }

View File

@ -99,9 +99,9 @@ void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value)
{ {
int32_t* p = reinterpret_cast<int32_t*>(instruction); int32_t* p = reinterpret_cast<int32_t*>(instruction);
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64
int32_t v; int32_t v;
int32_t mask; int32_t mask;
if (vm::TargetBytesPerWord == 8) {
if ((*p >> 24) == 0x54) { if ((*p >> 24) == 0x54) {
// conditional branch // conditional branch
v = ((reinterpret_cast<uint8_t*>(value) - instruction) >> 2) << 5; v = ((reinterpret_cast<uint8_t*>(value) - instruction) >> 2) << 5;
@ -111,10 +111,10 @@ void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value)
v = (reinterpret_cast<uint8_t*>(value) - instruction) >> 2; v = (reinterpret_cast<uint8_t*>(value) - instruction) >> 2;
mask = 0x3FFFFFF; mask = 0x3FFFFFF;
} }
#else } else {
int32_t v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2; v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
const int32_t mask = 0xFFFFFF; mask = 0xFFFFFF;
#endif }
expect(s, bounded(0, 8, v)); expect(s, bounded(0, 8, v));

View File

@ -27,11 +27,7 @@ namespace arm {
const bool DebugPool = false; const bool DebugPool = false;
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 const int32_t PoolOffsetMask = vm::TargetBytesPerWord == 8 ? 0x1FFFFF : 0xFFF;
const int32_t PoolOffsetMask = 0x1FFFFF;
#else
const int32_t PoolOffsetMask = 0xFFF;
#endif
class Task { class Task {
public: public:

View File

@ -23,10 +23,11 @@ namespace arm {
const uint64_t MASK_LO32 = 0xffffffff; const uint64_t MASK_LO32 = 0xffffffff;
const unsigned MASK_LO8 = 0xff; const unsigned MASK_LO8 = 0xff;
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 #if TARGET_BYTES_PER_WORD == 8
constexpr Register ThreadRegister(19); constexpr Register ThreadRegister(19);
constexpr Register StackRegister(31); constexpr Register StackRegister(31);
constexpr Register LinkRegister(30); constexpr Register LinkRegister(30);
constexpr Register FrameRegister(29);
constexpr Register ProgramCounter(0xFE); // i.e. unaddressable constexpr Register ProgramCounter(0xFE); // i.e. unaddressable
const int N_GPRS = 32; const int N_GPRS = 32;

View File

@ -109,7 +109,7 @@ GLOBAL(vmInvoke_safeStack):
ldr r6,[r5,#CONTINUATION_LENGTH] ldr r6,[r5,#CONTINUATION_LENGTH]
lsl r6,r6,#2 lsl r6,r6,#2
neg r7,r6 neg r7,r6
add r7,r7,#-80 add r7,r7,#-80 // 80 bytes for callee-saved register values
mov r4,sp mov r4,sp
str r4,[sp,r7]! str r4,[sp,r7]!
@ -167,10 +167,10 @@ LOCAL(vmInvoke_handleException):
bx r7 bx r7
LOCAL(vmInvoke_exit): LOCAL(vmInvoke_exit):
#endif // AVIAN_CONTINUATIONS
mov ip, #0 mov ip, #0
str ip, [r8, #TARGET_THREAD_STACK] str ip, [r8, #TARGET_THREAD_STACK]
#endif // AVIAN_CONTINUATIONS
// restore return type // restore return type
ldr ip, [sp], #4 ldr ip, [sp], #4

View File

@ -23,12 +23,12 @@
# define GLOBAL(x) x # define GLOBAL(x) x
#endif #endif
#define CONTINUATION_NEXT 4 #define CONTINUATION_NEXT 8
#define CONTINUATION_ADDRESS 16 #define CONTINUATION_ADDRESS 32
#define CONTINUATION_RETURN_ADDRESS_OFFSET 20 #define CONTINUATION_RETURN_ADDRESS_OFFSET 40
#define CONTINUATION_FRAME_POINTER_OFFSET 24 #define CONTINUATION_FRAME_POINTER_OFFSET 48
#define CONTINUATION_LENGTH 28 #define CONTINUATION_LENGTH 56
#define CONTINUATION_BODY 32 #define CONTINUATION_BODY 64
.globl GLOBAL(vmInvoke) .globl GLOBAL(vmInvoke)
.align 2 .align 2
@ -43,6 +43,7 @@ GLOBAL(vmInvoke):
// allocate frame // allocate frame
stp x29, x30, [sp,#-96]! stp x29, x30, [sp,#-96]!
mov x29, sp
// save callee-saved register values // save callee-saved register values
stp x19, x20, [sp,#16] stp x19, x20, [sp,#16]
@ -96,11 +97,65 @@ GLOBAL(vmInvoke_returnAddress):
GLOBAL(vmInvoke_safeStack): GLOBAL(vmInvoke_safeStack):
#ifdef AVIAN_CONTINUATIONS #ifdef AVIAN_CONTINUATIONS
#error todo // call the next continuation, if any
#endif // AVIAN_CONTINUATIONS ldr x5, [x19,#TARGET_THREAD_CONTINUATION]
cmp x5, xzr
b.eq LOCAL(vmInvoke_exit)
ldr x6, [x5,#CONTINUATION_LENGTH]
lsl x6, x6, #3
neg x7, x6
add x7, x7, #-128 // 128 bytes for callee-saved register values
mov x4, sp
add sp, sp, x7
str x4, [sp]
add x7, x5, #CONTINUATION_BODY
mov x11, xzr
b LOCAL(vmInvoke_continuationTest)
LOCAL(vmInvoke_continuationLoop):
ldr x9, [x7,x11]
str x9, [sp,x11]
add x11, x11, #8
LOCAL(vmInvoke_continuationTest):
cmp x11, x6
b.le LOCAL(vmInvoke_continuationLoop)
ldr x7, [x5,#CONTINUATION_RETURN_ADDRESS_OFFSET]
adr x11, GLOBAL(vmInvoke_returnAddress)
str x11, [sp,x7]
ldr x7, [x5,#CONTINUATION_NEXT]
str x7, [x19,#TARGET_THREAD_CONTINUATION]
// call the continuation unless we're handling an exception
ldr x7, [x19,#TARGET_THREAD_EXCEPTION]
cmp x7, xzr
b.ne LOCAL(vmInvoke_handleException)
ldr x7, [x5,#CONTINUATION_ADDRESS]
br x7
LOCAL(vmInvoke_handleException):
// we're handling an exception - call the exception handler instead
str xzr, [x19,#TARGET_THREAD_EXCEPTION]
ldr x11, [x19,#TARGET_THREAD_EXCEPTIONSTACKADJUSTMENT]
ldr x9, [sp]
neg x11, x11
add sp, sp, x11
str x9, [sp]
ldr x11, [x19,#TARGET_THREAD_EXCEPTIONOFFSET]
str x7, [sp,x11]
ldr x7, [x19,#TARGET_THREAD_EXCEPTIONHANDLER]
br x7
LOCAL(vmInvoke_exit):
str xzr, [x19, #TARGET_THREAD_STACK] str xzr, [x19, #TARGET_THREAD_STACK]
#endif // AVIAN_CONTINUATIONS
// restore return type // restore return type
ldr w5, [sp],#16 ldr w5, [sp],#16
@ -119,7 +174,44 @@ LOCAL(vmInvoke_return):
.align 2 .align 2
GLOBAL(vmJumpAndInvoke): GLOBAL(vmJumpAndInvoke):
#ifdef AVIAN_CONTINUATIONS #ifdef AVIAN_CONTINUATIONS
#error todo // x0: thread
// x1: address
// x2: stack
// x3: argumentFootprint
// x4: arguments
// x5: frameSize
// allocate new frame, adding room for callee-saved registers, plus
// 8 bytes of padding since the calculation of frameSize assumes 8
// bytes have already been allocated to save the return address,
// which is not true in this case
sub x2, x2, x5
sub x2, x2, #136
mov x19, x0
// copy arguments into place
mov x6, xzr
b LOCAL(vmJumpAndInvoke_argumentTest)
LOCAL(vmJumpAndInvoke_argumentLoop):
ldr x12, [x4,x6]
str x12, [x2,x6]
add x6, x6, #4
LOCAL(vmJumpAndInvoke_argumentTest):
cmp x6, x3
ble LOCAL(vmJumpAndInvoke_argumentLoop)
// the arguments have been copied, so we can set the real stack
// pointer now
mov sp, x2
// set return address to vmInvoke_returnAddress
adr x30, GLOBAL(vmInvoke_returnAddress)
br x1
#else // not AVIAN_CONTINUATIONS #else // not AVIAN_CONTINUATIONS
// vmJumpAndInvoke should only be called when continuations are // vmJumpAndInvoke should only be called when continuations are
// enabled, so we force a crash if we reach here: // enabled, so we force a crash if we reach here:

View File

@ -2189,6 +2189,8 @@ GcContinuation* makeCurrentContinuation(MyThread* t,
*targetIp = 0; *targetIp = 0;
while (*targetIp == 0) { while (*targetIp == 0) {
assertT(t, ip);
GcMethod* method = methodForIp(t, ip); GcMethod* method = methodForIp(t, ip);
if (method) { if (method) {
PROTECT(t, method); PROTECT(t, method);