corda/src/compile-arm.S

240 lines
5.6 KiB
ArmAsm
Raw Normal View History

2012-05-11 23:43:27 +00:00
/* Copyright (c) 2010-2012, Avian Contributors
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
There is NO WARRANTY for this software. See license.txt for
details. */
#include "types.h"
.text
#define BYTES_PER_WORD 4
#define LOCAL(x) .L##x
#ifdef __APPLE__
# define GLOBAL(x) _##x
#else
# define GLOBAL(x) x
#endif
2011-01-29 00:16:08 +00:00
#define THREAD_STACK 2148
2011-01-29 18:10:54 +00:00
#define THREAD_SCRATCH 2152
support stack unwinding without using a frame pointer Previously, we unwound the stack by following the chain of frame pointers for normal returns, stack trace creation, and exception unwinding. On x86, this required reserving EBP/RBP for frame pointer duties, making it unavailable for general computation and requiring that it be explicitly saved and restored on entry and exit, respectively. On PowerPC, we use an ABI that makes the stack pointer double as a frame pointer, so it doesn't cost us anything. We've been using the same convention on ARM, but it doesn't match the native calling convention, which makes it unusable when we want to call native code from Java and pass arguments on the stack. So far, the ARM calling convention mismatch hasn't been an issue because we've never passed more arguments from Java to native code than would fit in registers. However, we must now pass an extra argument (the thread pointer) to e.g. divideLong so it can throw an exception on divide by zero, which means the last argument must be passed on the stack. This will clobber the linkage area we've been using to hold the frame pointer, so we need to stop using it. One solution would be to use the same convention on ARM as we do on x86, but this would introduce the same overhead of making a register unavailable for general use and extra code at method entry and exit. Instead, this commit removes the need for a frame pointer. Unwinding involves consulting a map of instruction offsets to frame sizes which is generated at compile time. This is necessary because stack trace creation can happen at any time due to Thread.getStackTrace being called by another thread, and the frame size varies during the execution of a method. So far, only x86(_64) is working, and continuations and tail call optimization are probably broken. More to come.
2011-01-17 02:05:05 +00:00
#define THREAD_CONTINUATION 2156
#define THREAD_EXCEPTION 44
support stack unwinding without using a frame pointer Previously, we unwound the stack by following the chain of frame pointers for normal returns, stack trace creation, and exception unwinding. On x86, this required reserving EBP/RBP for frame pointer duties, making it unavailable for general computation and requiring that it be explicitly saved and restored on entry and exit, respectively. On PowerPC, we use an ABI that makes the stack pointer double as a frame pointer, so it doesn't cost us anything. We've been using the same convention on ARM, but it doesn't match the native calling convention, which makes it unusable when we want to call native code from Java and pass arguments on the stack. So far, the ARM calling convention mismatch hasn't been an issue because we've never passed more arguments from Java to native code than would fit in registers. However, we must now pass an extra argument (the thread pointer) to e.g. divideLong so it can throw an exception on divide by zero, which means the last argument must be passed on the stack. This will clobber the linkage area we've been using to hold the frame pointer, so we need to stop using it. One solution would be to use the same convention on ARM as we do on x86, but this would introduce the same overhead of making a register unavailable for general use and extra code at method entry and exit. Instead, this commit removes the need for a frame pointer. Unwinding involves consulting a map of instruction offsets to frame sizes which is generated at compile time. This is necessary because stack trace creation can happen at any time due to Thread.getStackTrace being called by another thread, and the frame size varies during the execution of a method. So far, only x86(_64) is working, and continuations and tail call optimization are probably broken. More to come.
2011-01-17 02:05:05 +00:00
#define THREAD_EXCEPTION_STACK_ADJUSTMENT 2160
#define THREAD_EXCEPTION_OFFSET 2164
#define THREAD_EXCEPTION_HANDLER 2168
#define CONTINUATION_NEXT 4
#define CONTINUATION_ADDRESS 16
#define CONTINUATION_RETURN_ADDRESS_OFFSET 20
#define CONTINUATION_FRAME_POINTER_OFFSET 24
#define CONTINUATION_LENGTH 28
#define CONTINUATION_BODY 32
.globl GLOBAL(vmInvoke)
.align 2
GLOBAL(vmInvoke):
/*
arguments
r0 : thread
r1 : function
r2 : arguments
r3 : argumentFootprint
[sp, #0] : frameSize (not used)
[sp, #4] : returnType
*/
// save all non-volatile registers
stmfd sp!, {r4-r11, lr}
// save return type
2011-01-29 18:10:54 +00:00
ldr r4, [sp, #4]
str r4, [sp, #-4]!
2011-01-29 18:10:54 +00:00
str sp, [r0, #THREAD_SCRATCH]
// align stack, if necessary
eor r4, sp, r3
tst r4, #4
subne sp, sp, #4
2011-01-29 18:10:54 +00:00
// copy arguments into place
sub sp, r3
mov r4, #0
b LOCAL(vmInvoke_argumentTest)
LOCAL(vmInvoke_argumentLoop):
ldr r5, [r2, r4]
str r5, [sp, r4]
add r4, r4, #BYTES_PER_WORD
LOCAL(vmInvoke_argumentTest):
cmp r4, r3
blt LOCAL(vmInvoke_argumentLoop)
// we use r8 to hold the thread pointer, by convention
mov r8, r0
// load and call function address
blx r1
.globl GLOBAL(vmInvoke_returnAddress)
.align 2
GLOBAL(vmInvoke_returnAddress):
2011-01-29 18:10:54 +00:00
// restore stack pointer
ldr sp, [r8, #THREAD_SCRATCH]
// clear MyThread::stack to avoid confusing another thread calling
// java.lang.Thread.getStackTrace on this one. See
// MyProcess::getStackTrace in compile.cpp for details on how we get
// a reliable stack trace from a thread that might be interrupted at
// any point in its execution.
2011-01-29 18:10:54 +00:00
mov r5, #0
str r5, [r8, #THREAD_STACK]
.globl GLOBAL(vmInvoke_safeStack)
.align 2
GLOBAL(vmInvoke_safeStack):
#ifdef AVIAN_CONTINUATIONS
// call the next continuation, if any
ldr r5,[r8,#THREAD_CONTINUATION]
cmp r5,#0
beq LOCAL(vmInvoke_exit)
ldr r6,[r5,#CONTINUATION_LENGTH]
lsl r6,r6,#2
neg r7,r6
add r7,r7,#-80
mov r4,sp
str r4,[sp,r7]!
support stack unwinding without using a frame pointer Previously, we unwound the stack by following the chain of frame pointers for normal returns, stack trace creation, and exception unwinding. On x86, this required reserving EBP/RBP for frame pointer duties, making it unavailable for general computation and requiring that it be explicitly saved and restored on entry and exit, respectively. On PowerPC, we use an ABI that makes the stack pointer double as a frame pointer, so it doesn't cost us anything. We've been using the same convention on ARM, but it doesn't match the native calling convention, which makes it unusable when we want to call native code from Java and pass arguments on the stack. So far, the ARM calling convention mismatch hasn't been an issue because we've never passed more arguments from Java to native code than would fit in registers. However, we must now pass an extra argument (the thread pointer) to e.g. divideLong so it can throw an exception on divide by zero, which means the last argument must be passed on the stack. This will clobber the linkage area we've been using to hold the frame pointer, so we need to stop using it. One solution would be to use the same convention on ARM as we do on x86, but this would introduce the same overhead of making a register unavailable for general use and extra code at method entry and exit. Instead, this commit removes the need for a frame pointer. Unwinding involves consulting a map of instruction offsets to frame sizes which is generated at compile time. This is necessary because stack trace creation can happen at any time due to Thread.getStackTrace being called by another thread, and the frame size varies during the execution of a method. So far, only x86(_64) is working, and continuations and tail call optimization are probably broken. More to come.
2011-01-17 02:05:05 +00:00
add r7,r5,#CONTINUATION_BODY
mov r11,#0
b LOCAL(vmInvoke_continuationTest)
LOCAL(vmInvoke_continuationLoop):
ldr r9,[r7,r11]
str r9,[sp,r11]
add r11,r11,#4
LOCAL(vmInvoke_continuationTest):
cmp r11,r6
ble LOCAL(vmInvoke_continuationLoop)
ldr r7,[r5,#CONTINUATION_RETURN_ADDRESS_OFFSET]
ldr r10,LOCAL(vmInvoke_returnAddress_word)
ldr r11,LOCAL(vmInvoke_getAddress_word)
LOCAL(vmInvoke_getAddress):
add r11,pc,r11
ldr r11,[r11,r10]
str r11,[sp,r7]
ldr r7,[r5,#CONTINUATION_NEXT]
str r7,[r8,#THREAD_CONTINUATION]
// call the continuation unless we're handling an exception
ldr r7,[r8,#THREAD_EXCEPTION]
cmp r7,#0
bne LOCAL(vmInvoke_handleException)
ldr r7,[r5,#CONTINUATION_ADDRESS]
bx r7
LOCAL(vmInvoke_handleException):
// we're handling an exception - call the exception handler instead
mov r11,#0
str r11,[r8,#THREAD_EXCEPTION]
ldr r11,[r8,#THREAD_EXCEPTION_STACK_ADJUSTMENT]
ldr r9,[sp]
neg r11,r11
str r9,[sp,r11]!
ldr r11,[r8,#THREAD_EXCEPTION_OFFSET]
str r7,[sp,r11]
ldr r7,[r8,#THREAD_EXCEPTION_HANDLER]
bx r7
LOCAL(vmInvoke_exit):
#endif // AVIAN_CONTINUATIONS
mov ip, #0
str ip, [r8, #THREAD_STACK]
// restore return type
ldr ip, [sp], #4
// restore callee-saved registers
ldmfd sp!, {r4-r11, lr}
LOCAL(vmInvoke_return):
bx lr
.globl GLOBAL(vmJumpAndInvoke)
.align 2
GLOBAL(vmJumpAndInvoke):
#ifdef AVIAN_CONTINUATIONS
// r0: thread
// r1: address
// r2: stack
// r3: argumentFootprint
// [sp,#0]: arguments
// [sp,#4]: frameSize
ldr r5,[sp,#0]
ldr r6,[sp,#4]
2011-02-20 20:30:56 +00:00
// allocate new frame, adding room for callee-saved registers, plus
// 4 bytes of padding since the calculation of frameSize assumes 4
// bytes have already been allocated to save the return address,
// which is not true in this case
sub r2,r2,r6
2011-02-20 20:30:56 +00:00
sub r2,r2,#84
mov r8,r0
// copy arguments into place
mov r6,#0
b LOCAL(vmJumpAndInvoke_argumentTest)
LOCAL(vmJumpAndInvoke_argumentLoop):
ldr r12,[r5,r6]
str r12,[r2,r6]
add r6,r6,#4
LOCAL(vmJumpAndInvoke_argumentTest):
2011-02-20 20:30:56 +00:00
cmp r6,r3
ble LOCAL(vmJumpAndInvoke_argumentLoop)
// the arguments have been copied, so we can set the real stack
// pointer now
mov sp,r2
// set return address to vmInvoke_returnAddress
ldr r10,LOCAL(vmInvoke_returnAddress_word)
ldr r11,LOCAL(vmJumpAndInvoke_getAddress_word)
LOCAL(vmJumpAndInvoke_getAddress):
add r11,pc,r11
ldr lr,[r11,r10]
bx r1
LOCAL(vmInvoke_returnAddress_word):
.word GLOBAL(vmInvoke_returnAddress)(GOT)
LOCAL(vmInvoke_getAddress_word):
.word _GLOBAL_OFFSET_TABLE_-(LOCAL(vmInvoke_getAddress)+8)
LOCAL(vmJumpAndInvoke_getAddress_word):
.word _GLOBAL_OFFSET_TABLE_-(LOCAL(vmJumpAndInvoke_getAddress)+8)
2010-11-09 02:13:23 +00:00
#else // not AVIAN_CONTINUATIONS
// vmJumpAndInvoke should only be called when continuations are
// enabled
bkpt
#endif // not AVIAN_CONTINUATIONS