add continuations support for ARM64

Also, replace some preprocessor conditionals with C++ conditionals and
add some todo comments and sample code for future work towards better
ABI compatibility in the JIT compiled code.
This commit is contained in:
Joel Dice 2014-12-30 15:30:04 -07:00
parent e3ea60fc31
commit c9026a6053
8 changed files with 204 additions and 75 deletions

View File

@ -35,6 +35,7 @@ GLOBAL(vmNativeCall):
// allocate frame
stp x29, x30, [sp,#-64]!
mov x29, sp
// save callee-saved register values so we can clobber them
stp x19, x20, [sp,#16]
@ -118,6 +119,7 @@ GLOBAL(vmRun):
// allocate frame
stp x29, x30, [sp,#-96]!
mov x29, sp
// save callee-saved register values
stp x19, x20, [sp,#16]

View File

@ -232,6 +232,7 @@ class MyArchitecture : public Architecture {
{
switch (register_.index()) {
case LinkRegister.index():
case FrameRegister.index():
case StackRegister.index():
case ThreadRegister.index():
case ProgramCounter.index():
@ -320,13 +321,13 @@ class MyArchitecture : public Architecture {
case lir::AlignedLongCall:
case lir::AlignedLongJump: {
uint32_t* p = static_cast<uint32_t*>(returnAddress) - 2;
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64
const int32_t mask = (PoolOffsetMask >> 2) << 5;
*reinterpret_cast<void**>(p + ((*p & mask) >> 5)) = newTarget;
#else
*reinterpret_cast<void**>(p + (((*p & PoolOffsetMask) + 8) / 4))
= newTarget;
#endif
if (TargetBytesPerWord == 8) {
const int32_t mask = (PoolOffsetMask >> 2) << 5;
*reinterpret_cast<void**>(p + ((*p & mask) >> 5)) = newTarget;
} else {
*reinterpret_cast<void**>(p + (((*p & PoolOffsetMask) + 8) / 4))
= newTarget;
}
} break;
default:
@ -769,24 +770,45 @@ class MyAssembler : public Assembler {
// how to handle them:
assertT(&con, footprint < 256);
// todo: ARM64 frame allocation should be of the form:
// stp x29, x30, [sp,#size]!
// and deallocation should be of the form:
// ldp x29, x30, [sp],#size
// todo: the ARM ABI says the frame preamble should be of the form
//
// stp x29, x30, [sp,#-footprint]!
// mov x29, sp
//
// and the frame should be popped with e.g.
//
// ldp x29, x30, [sp],#footprint
// br x30
//
// However, that will invalidate a lot of assumptions elsewhere
// about the return address being stored at the opposite end of
// the frame, so lots of other code will need to change before we
// can do that. The code below can be enabled as a starting point
// when we're ready to tackle that.
if (false and TargetBytesPerWord == 8) {
// stp x29, x30, [sp,#-footprint]!
con.code.append4(0xa9800000 | ((-footprint & 0x7f) << 15)
| (StackRegister.index() << 5)
| (LinkRegister.index() << 10) | FrameRegister.index());
lir::RegisterPair stack(StackRegister);
ResolvedPromise footprintPromise(footprint * TargetBytesPerWord);
lir::Constant footprintConstant(&footprintPromise);
subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack);
lir::RegisterPair stack(StackRegister);
lir::RegisterPair frame(FrameRegister);
moveRR(&con, TargetBytesPerWord, &stack, TargetBytesPerWord, &frame);
} else {
lir::RegisterPair stack(StackRegister);
ResolvedPromise footprintPromise(footprint * TargetBytesPerWord);
lir::Constant footprintConstant(&footprintPromise);
subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack);
lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressDst(StackRegister,
(footprint - 1) * TargetBytesPerWord);
moveRM(&con,
TargetBytesPerWord,
&returnAddress,
TargetBytesPerWord,
&returnAddressDst);
lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressDst(StackRegister,
(footprint - 1) * TargetBytesPerWord);
moveRM(&con,
TargetBytesPerWord,
&returnAddress,
TargetBytesPerWord,
&returnAddressDst);
}
}
virtual void adjustFrame(unsigned difference)
@ -801,19 +823,26 @@ class MyAssembler : public Assembler {
{
footprint += FrameHeaderSize;
lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressSrc(StackRegister,
(footprint - 1) * TargetBytesPerWord);
moveMR(&con,
TargetBytesPerWord,
&returnAddressSrc,
TargetBytesPerWord,
&returnAddress);
// see comment regarding the ARM64 ABI in allocateFrame
if (false and TargetBytesPerWord == 8) {
// ldp x29, x30, [sp],#footprint
con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10)
| 29);
} else {
lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressSrc(StackRegister,
(footprint - 1) * TargetBytesPerWord);
moveMR(&con,
TargetBytesPerWord,
&returnAddressSrc,
TargetBytesPerWord,
&returnAddress);
lir::RegisterPair stack(StackRegister);
ResolvedPromise footprintPromise(footprint * TargetBytesPerWord);
lir::Constant footprintConstant(&footprintPromise);
addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack);
lir::RegisterPair stack(StackRegister);
ResolvedPromise footprintPromise(footprint * TargetBytesPerWord);
lir::Constant footprintConstant(&footprintPromise);
addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack);
}
}
virtual void popFrameForTailCall(unsigned footprint,
@ -885,14 +914,21 @@ class MyAssembler : public Assembler {
{
footprint += FrameHeaderSize;
lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressSrc(StackRegister,
(footprint - 1) * TargetBytesPerWord);
moveMR(&con,
TargetBytesPerWord,
&returnAddressSrc,
TargetBytesPerWord,
&returnAddress);
// see comment regarding the ARM64 ABI in allocateFrame
if (false and TargetBytesPerWord == 8) {
// ldp x29, x30, [sp],#footprint
con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10)
| 29);
} else {
lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressSrc(StackRegister,
(footprint - 1) * TargetBytesPerWord);
moveMR(&con,
TargetBytesPerWord,
&returnAddressSrc,
TargetBytesPerWord,
&returnAddress);
}
lir::RegisterPair stack(StackRegister);
lir::Memory newStackSrc(ThreadRegister, stackOffsetFromThread);
@ -986,18 +1022,18 @@ class MyAssembler : public Assembler {
int32_t* p = reinterpret_cast<int32_t*>(dst + instruction);
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64
int32_t v = entry - instruction;
expect(&con, v == (v & PoolOffsetMask));
if (TargetBytesPerWord == 8) {
int32_t v = entry - instruction;
expect(&con, v == (v & PoolOffsetMask));
const int32_t mask = (PoolOffsetMask >> 2) << 5;
*p = (((v >> 2) << 5) & mask) | ((~mask) & *p);
#else
int32_t v = (entry - 8) - instruction;
expect(&con, v == (v & PoolOffsetMask));
const int32_t mask = (PoolOffsetMask >> 2) << 5;
*p = (((v >> 2) << 5) & mask) | ((~mask) & *p);
} else {
int32_t v = (entry - 8) - instruction;
expect(&con, v == (v & PoolOffsetMask));
*p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p);
#endif
*p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p);
}
poolSize += TargetBytesPerWord;
}

View File

@ -99,9 +99,9 @@ void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value)
{
int32_t* p = reinterpret_cast<int32_t*>(instruction);
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64
int32_t v;
int32_t mask;
if (vm::TargetBytesPerWord == 8) {
if ((*p >> 24) == 0x54) {
// conditional branch
v = ((reinterpret_cast<uint8_t*>(value) - instruction) >> 2) << 5;
@ -111,10 +111,10 @@ void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value)
v = (reinterpret_cast<uint8_t*>(value) - instruction) >> 2;
mask = 0x3FFFFFF;
}
#else
int32_t v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
const int32_t mask = 0xFFFFFF;
#endif
} else {
v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
mask = 0xFFFFFF;
}
expect(s, bounded(0, 8, v));

View File

@ -27,11 +27,7 @@ namespace arm {
const bool DebugPool = false;
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64
const int32_t PoolOffsetMask = 0x1FFFFF;
#else
const int32_t PoolOffsetMask = 0xFFF;
#endif
const int32_t PoolOffsetMask = vm::TargetBytesPerWord == 8 ? 0x1FFFFF : 0xFFF;
class Task {
public:

View File

@ -23,10 +23,11 @@ namespace arm {
const uint64_t MASK_LO32 = 0xffffffff;
const unsigned MASK_LO8 = 0xff;
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64
#if TARGET_BYTES_PER_WORD == 8
constexpr Register ThreadRegister(19);
constexpr Register StackRegister(31);
constexpr Register LinkRegister(30);
constexpr Register FrameRegister(29);
constexpr Register ProgramCounter(0xFE); // i.e. unaddressable
const int N_GPRS = 32;

View File

@ -109,7 +109,7 @@ GLOBAL(vmInvoke_safeStack):
ldr r6,[r5,#CONTINUATION_LENGTH]
lsl r6,r6,#2
neg r7,r6
add r7,r7,#-80
add r7,r7,#-80 // 80 bytes for callee-saved register values
mov r4,sp
str r4,[sp,r7]!
@ -167,10 +167,10 @@ LOCAL(vmInvoke_handleException):
bx r7
LOCAL(vmInvoke_exit):
#endif // AVIAN_CONTINUATIONS
mov ip, #0
str ip, [r8, #TARGET_THREAD_STACK]
#endif // AVIAN_CONTINUATIONS
// restore return type
ldr ip, [sp], #4

View File

@ -23,12 +23,12 @@
# define GLOBAL(x) x
#endif
#define CONTINUATION_NEXT 4
#define CONTINUATION_ADDRESS 16
#define CONTINUATION_RETURN_ADDRESS_OFFSET 20
#define CONTINUATION_FRAME_POINTER_OFFSET 24
#define CONTINUATION_LENGTH 28
#define CONTINUATION_BODY 32
#define CONTINUATION_NEXT 8
#define CONTINUATION_ADDRESS 32
#define CONTINUATION_RETURN_ADDRESS_OFFSET 40
#define CONTINUATION_FRAME_POINTER_OFFSET 48
#define CONTINUATION_LENGTH 56
#define CONTINUATION_BODY 64
.globl GLOBAL(vmInvoke)
.align 2
@ -43,6 +43,7 @@ GLOBAL(vmInvoke):
// allocate frame
stp x29, x30, [sp,#-96]!
mov x29, sp
// save callee-saved register values
stp x19, x20, [sp,#16]
@ -96,11 +97,65 @@ GLOBAL(vmInvoke_returnAddress):
GLOBAL(vmInvoke_safeStack):
#ifdef AVIAN_CONTINUATIONS
#error todo
#endif // AVIAN_CONTINUATIONS
// call the next continuation, if any
ldr x5, [x19,#TARGET_THREAD_CONTINUATION]
cmp x5, xzr
b.eq LOCAL(vmInvoke_exit)
ldr x6, [x5,#CONTINUATION_LENGTH]
lsl x6, x6, #3
neg x7, x6
add x7, x7, #-128 // 128 bytes for callee-saved register values
mov x4, sp
add sp, sp, x7
str x4, [sp]
add x7, x5, #CONTINUATION_BODY
mov x11, xzr
b LOCAL(vmInvoke_continuationTest)
LOCAL(vmInvoke_continuationLoop):
ldr x9, [x7,x11]
str x9, [sp,x11]
add x11, x11, #8
LOCAL(vmInvoke_continuationTest):
cmp x11, x6
b.le LOCAL(vmInvoke_continuationLoop)
ldr x7, [x5,#CONTINUATION_RETURN_ADDRESS_OFFSET]
adr x11, GLOBAL(vmInvoke_returnAddress)
str x11, [sp,x7]
ldr x7, [x5,#CONTINUATION_NEXT]
str x7, [x19,#TARGET_THREAD_CONTINUATION]
// call the continuation unless we're handling an exception
ldr x7, [x19,#TARGET_THREAD_EXCEPTION]
cmp x7, xzr
b.ne LOCAL(vmInvoke_handleException)
ldr x7, [x5,#CONTINUATION_ADDRESS]
br x7
LOCAL(vmInvoke_handleException):
// we're handling an exception - call the exception handler instead
str xzr, [x19,#TARGET_THREAD_EXCEPTION]
ldr x11, [x19,#TARGET_THREAD_EXCEPTIONSTACKADJUSTMENT]
ldr x9, [sp]
neg x11, x11
add sp, sp, x11
str x9, [sp]
ldr x11, [x19,#TARGET_THREAD_EXCEPTIONOFFSET]
str x7, [sp,x11]
ldr x7, [x19,#TARGET_THREAD_EXCEPTIONHANDLER]
br x7
LOCAL(vmInvoke_exit):
str xzr, [x19, #TARGET_THREAD_STACK]
#endif // AVIAN_CONTINUATIONS
// restore return type
ldr w5, [sp],#16
@ -119,7 +174,44 @@ LOCAL(vmInvoke_return):
.align 2
GLOBAL(vmJumpAndInvoke):
#ifdef AVIAN_CONTINUATIONS
#error todo
// x0: thread
// x1: address
// x2: stack
// x3: argumentFootprint
// x4: arguments
// x5: frameSize
// allocate new frame, adding room for callee-saved registers, plus
// 8 bytes of padding since the calculation of frameSize assumes 8
// bytes have already been allocated to save the return address,
// which is not true in this case
sub x2, x2, x5
sub x2, x2, #136
mov x19, x0
// copy arguments into place
mov x6, xzr
b LOCAL(vmJumpAndInvoke_argumentTest)
LOCAL(vmJumpAndInvoke_argumentLoop):
ldr x12, [x4,x6]
str x12, [x2,x6]
add x6, x6, #4
LOCAL(vmJumpAndInvoke_argumentTest):
cmp x6, x3
ble LOCAL(vmJumpAndInvoke_argumentLoop)
// the arguments have been copied, so we can set the real stack
// pointer now
mov sp, x2
// set return address to vmInvoke_returnAddress
adr x30, GLOBAL(vmInvoke_returnAddress)
br x1
#else // not AVIAN_CONTINUATIONS
// vmJumpAndInvoke should only be called when continuations are
// enabled, so we force a crash if we reach here:

View File

@ -2189,6 +2189,8 @@ GcContinuation* makeCurrentContinuation(MyThread* t,
*targetIp = 0;
while (*targetIp == 0) {
assertT(t, ip);
GcMethod* method = methodForIp(t, ip);
if (method) {
PROTECT(t, method);