corda/src/arm.h

145 lines
3.7 KiB
C
Raw Normal View History

2010-12-05 20:21:09 -07:00
/* Copyright (c) 2008-2010, Avian Contributors
2009-08-06 11:52:36 -06:00
Permission to use, copy, modify, and/or distribute this software
for any purpose with or without fee is hereby granted, provided
that the above copyright notice and this permission notice appear
in all copies.
There is NO WARRANTY for this software. See license.txt for
details. */
#ifndef ARM_H
#define ARM_H
#include "types.h"
#include "common.h"
support stack unwinding without using a frame pointer Previously, we unwound the stack by following the chain of frame pointers for normal returns, stack trace creation, and exception unwinding. On x86, this required reserving EBP/RBP for frame pointer duties, making it unavailable for general computation and requiring that it be explicitly saved and restored on entry and exit, respectively. On PowerPC, we use an ABI that makes the stack pointer double as a frame pointer, so it doesn't cost us anything. We've been using the same convention on ARM, but it doesn't match the native calling convention, which makes it unusable when we want to call native code from Java and pass arguments on the stack. So far, the ARM calling convention mismatch hasn't been an issue because we've never passed more arguments from Java to native code than would fit in registers. However, we must now pass an extra argument (the thread pointer) to e.g. divideLong so it can throw an exception on divide by zero, which means the last argument must be passed on the stack. This will clobber the linkage area we've been using to hold the frame pointer, so we need to stop using it. One solution would be to use the same convention on ARM as we do on x86, but this would introduce the same overhead of making a register unavailable for general use and extra code at method entry and exit. Instead, this commit removes the need for a frame pointer. Unwinding involves consulting a map of instruction offsets to frame sizes which is generated at compile time. This is necessary because stack trace creation can happen at any time due to Thread.getStackTrace being called by another thread, and the frame size varies during the execution of a method. So far, only x86(_64) is working, and continuations and tail call optimization are probably broken. More to come.
2011-01-16 19:05:05 -07:00
#define VA_LIST(x) (&(x))
2010-04-20 15:51:35 -06:00
#define IP_REGISTER(context) (context->uc_mcontext.arm_pc)
#define STACK_REGISTER(context) (context->uc_mcontext.arm_sp)
#define THREAD_REGISTER(context) (context->uc_mcontext.arm_ip)
2011-01-28 17:16:08 -07:00
#define LINK_REGISTER(context) (context->uc_mcontext.arm_lr)
2009-08-06 11:52:36 -06:00
extern "C" uint64_t
vmNativeCall(void* function, unsigned stackTotal, void* memoryTable,
unsigned memoryCount, void* gprTable);
namespace vm {
inline void
trap()
{
asm("bkpt");
2009-08-06 11:52:36 -06:00
}
inline void
memoryBarrier()
{
asm("nop");
}
inline void
storeStoreMemoryBarrier()
{
memoryBarrier();
}
inline void
storeLoadMemoryBarrier()
{
memoryBarrier();
}
inline void
loadMemoryBarrier()
{
memoryBarrier();
}
inline void
syncInstructionCache(const void* start, unsigned size)
2009-08-06 11:52:36 -06:00
{
__clear_cache
(const_cast<void*>(start),
const_cast<uint8_t*>(static_cast<const uint8_t*>(start) + size));
2009-08-06 11:52:36 -06:00
}
2010-04-20 15:51:35 -06:00
typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
inline bool
atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_)
{
int r = __kernel_cmpxchg(static_cast<int>(old), static_cast<int>(new_), reinterpret_cast<int*>(p));
return (!r ? true : false);
}
inline bool
atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_)
{
return atomicCompareAndSwap32(reinterpret_cast<uint32_t*>(p), old, new_);
}
2009-08-06 11:52:36 -06:00
inline uint64_t
dynamicCall(void* function, uintptr_t* arguments, uint8_t* argumentTypes,
2010-04-20 15:51:35 -06:00
unsigned argumentCount, unsigned argumentsSize UNUSED,
2009-08-06 11:52:36 -06:00
unsigned returnType UNUSED)
{
const unsigned GprCount = 4;
uintptr_t gprTable[GprCount];
unsigned gprIndex = 0;
2010-04-14 09:26:50 -06:00
uintptr_t stack[(argumentCount * 8) / BytesPerWord]; // is > argumentSize to account for padding
2009-08-06 11:52:36 -06:00
unsigned stackIndex = 0;
unsigned ai = 0;
for (unsigned ati = 0; ati < argumentCount; ++ ati) {
switch (argumentTypes[ati]) {
case DOUBLE_TYPE:
case INT64_TYPE: {
2010-04-14 09:26:50 -06:00
if (gprIndex + (8 / BytesPerWord) <= GprCount) { // pass argument on registers
if (gprIndex & 1) { // 8-byte alignment
memset(gprTable + gprIndex, 0, 4); // probably not necessary, but for good luck
++gprIndex;
}
2009-08-06 11:52:36 -06:00
memcpy(gprTable + gprIndex, arguments + ai, 8);
gprIndex += 8 / BytesPerWord;
2010-04-14 09:26:50 -06:00
} else { // pass argument on stack
gprIndex = GprCount;
2010-04-14 09:26:50 -06:00
if (stackIndex & 1) { // 8-byte alignment
memset(stack + stackIndex, 0, 4); // probably not necessary, but for good luck
++stackIndex;
}
2009-08-06 11:52:36 -06:00
memcpy(stack + stackIndex, arguments + ai, 8);
stackIndex += 8 / BytesPerWord;
}
ai += 8 / BytesPerWord;
} break;
default: {
if (gprIndex < GprCount) {
gprTable[gprIndex++] = arguments[ai];
} else {
stack[stackIndex++] = arguments[ai];
}
++ ai;
} break;
}
}
if (gprIndex < GprCount) { // pad since assembly loads all GPRs
memset(gprTable + gprIndex, 0, (GprCount-gprIndex)*4);
gprIndex = GprCount;
}
unsigned stackSize = stackIndex*BytesPerWord + ((stackIndex & 1) << 2);
return vmNativeCall
(function, stackSize, stack, stackIndex * BytesPerWord,
(gprIndex ? gprTable : 0));
}
} // namespace vm
#endif // ARM_H