corda/src/arm.h

/* Copyright (c) 2008-2011, Avian Contributors

   Permission to use, copy, modify, and/or distribute this software
   for any purpose with or without fee is hereby granted, provided
   that the above copyright notice and this permission notice appear
   in all copies.

   There is NO WARRANTY for this software.  See license.txt for
   details. */

#ifndef ARM_H
#define ARM_H

#include "types.h"
#include "common.h"

#define VA_LIST(x) (&(x))

#define IP_REGISTER(context) (context->uc_mcontext.arm_pc)
#define STACK_REGISTER(context) (context->uc_mcontext.arm_sp)
#define THREAD_REGISTER(context) (context->uc_mcontext.arm_ip)
#define LINK_REGISTER(context) (context->uc_mcontext.arm_lr)

extern "C" uint64_t
vmNativeCall(void* function, unsigned stackTotal, void* memoryTable,
             unsigned memoryCount, void* gprTable);

namespace vm {

inline void
trap()
{
  asm("bkpt");
}

inline void
memoryBarrier()
{
  asm("nop");
}

inline void
storeStoreMemoryBarrier()
{
  memoryBarrier();
}

inline void
storeLoadMemoryBarrier()
{
  memoryBarrier();
}

inline void
loadMemoryBarrier()
{
  memoryBarrier();
}

inline void
syncInstructionCache(const void* start, unsigned size)
{
  __clear_cache
    (const_cast<void*>(start),
     const_cast<uint8_t*>(static_cast<const uint8_t*>(start) + size));
}

typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)

inline bool
atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_)
{
  int r = __kernel_cmpxchg(static_cast<int>(old), static_cast<int>(new_), reinterpret_cast<int*>(p));
  return (!r ? true : false);
}

inline bool
atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_)
{
  return atomicCompareAndSwap32(reinterpret_cast<uint32_t*>(p), old, new_);
}

inline uint64_t
dynamicCall(void* function, uintptr_t* arguments, uint8_t* argumentTypes,
            unsigned argumentCount, unsigned argumentsSize UNUSED,
            unsigned returnType UNUSED)
{
  const unsigned GprCount = 4;
  uintptr_t gprTable[GprCount];
  unsigned gprIndex = 0;

  uintptr_t stack[(argumentCount * 8) / BytesPerWord]; // is > argumentSize to account for padding
  unsigned stackIndex = 0;

  unsigned ai = 0;
  for (unsigned ati = 0; ati < argumentCount; ++ ati) {
    switch (argumentTypes[ati]) {
    case DOUBLE_TYPE:
    case INT64_TYPE: {
      if (gprIndex + (8 / BytesPerWord) <= GprCount) { // pass argument on registers
        if (gprIndex & 1) {                            // 8-byte alignment
          memset(gprTable + gprIndex, 0, 4);           // probably not necessary, but for good luck
          ++gprIndex;
        }
        memcpy(gprTable + gprIndex, arguments + ai, 8);
        gprIndex += 8 / BytesPerWord;
      } else {                                         // pass argument on stack
        gprIndex = GprCount;
        if (stackIndex & 1) {                          // 8-byte alignment
          memset(stack + stackIndex, 0, 4);            // probably not necessary, but for good luck
          ++stackIndex;
        }
        memcpy(stack + stackIndex, arguments + ai, 8);
        stackIndex += 8 / BytesPerWord;
      }
      ai += 8 / BytesPerWord;
    } break;

    default: {
      if (gprIndex < GprCount) {
        gprTable[gprIndex++] = arguments[ai];
      } else {
        stack[stackIndex++] = arguments[ai];
      }
      ++ ai;
    } break;
    }
  }

  if (gprIndex < GprCount) { // pad since assembly loads all GPRs
    memset(gprTable + gprIndex, 0, (GprCount-gprIndex)*4);
    gprIndex = GprCount;
  }

  unsigned stackSize = stackIndex*BytesPerWord + ((stackIndex & 1) << 2);
  return vmNativeCall
    (function, stackSize, stack, stackIndex * BytesPerWord,
     (gprIndex ? gprTable : 0));
}

} // namespace vm

#endif // ARM_H
update copyright years and increment version number 2011-07-13 14:25:21 +00:00			`/* Copyright (c) 2008-2011, Avian Contributors`
added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00
			`Permission to use, copy, modify, and/or distribute this software`
			`for any purpose with or without fee is hereby granted, provided`
			`that the above copyright notice and this permission notice appear`
			`in all copies.`

			`There is NO WARRANTY for this software. See license.txt for`
			`details. */`

			`#ifndef ARM_H`
			`#define ARM_H`

			`#include "types.h"`
			`#include "common.h"`

support stack unwinding without using a frame pointer Previously, we unwound the stack by following the chain of frame pointers for normal returns, stack trace creation, and exception unwinding. On x86, this required reserving EBP/RBP for frame pointer duties, making it unavailable for general computation and requiring that it be explicitly saved and restored on entry and exit, respectively. On PowerPC, we use an ABI that makes the stack pointer double as a frame pointer, so it doesn't cost us anything. We've been using the same convention on ARM, but it doesn't match the native calling convention, which makes it unusable when we want to call native code from Java and pass arguments on the stack. So far, the ARM calling convention mismatch hasn't been an issue because we've never passed more arguments from Java to native code than would fit in registers. However, we must now pass an extra argument (the thread pointer) to e.g. divideLong so it can throw an exception on divide by zero, which means the last argument must be passed on the stack. This will clobber the linkage area we've been using to hold the frame pointer, so we need to stop using it. One solution would be to use the same convention on ARM as we do on x86, but this would introduce the same overhead of making a register unavailable for general use and extra code at method entry and exit. Instead, this commit removes the need for a frame pointer. Unwinding involves consulting a map of instruction offsets to frame sizes which is generated at compile time. This is necessary because stack trace creation can happen at any time due to Thread.getStackTrace being called by another thread, and the frame size varies during the execution of a method. So far, only x86(_64) is working, and continuations and tail call optimization are probably broken. More to come. 2011-01-17 02:05:05 +00:00			`#define VA_LIST(x) (&(x))`

fixed ARM interpreted-mode regression 2010-04-20 21:51:35 +00:00			`#define IP_REGISTER(context) (context->uc_mcontext.arm_pc)`
			`#define STACK_REGISTER(context) (context->uc_mcontext.arm_sp)`
			`#define THREAD_REGISTER(context) (context->uc_mcontext.arm_ip)`
progress towards fixing the ARM build 2011-01-29 00:16:08 +00:00			`#define LINK_REGISTER(context) (context->uc_mcontext.arm_lr)`
added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00
			`extern "C" uint64_t`
			`vmNativeCall(void* function, unsigned stackTotal, void* memoryTable,`
			`unsigned memoryCount, void* gprTable);`

			`namespace vm {`

			`inline void`
			`trap()`
			`{`
First version; interpreted mode works and JIT mode compiles. 2010-07-12 20:18:36 +00:00			`asm("bkpt");`
added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00			`}`

			`inline void`
			`memoryBarrier()`
			`{`
			`asm("nop");`
			`}`

			`inline void`
			`storeStoreMemoryBarrier()`
			`{`
			`memoryBarrier();`
			`}`

			`inline void`
			`storeLoadMemoryBarrier()`
			`{`
			`memoryBarrier();`
			`}`

			`inline void`
			`loadMemoryBarrier()`
			`{`
			`memoryBarrier();`
			`}`

			`inline void`
implement syncInstructionCache for ARM Like, PowerPC, ARM has an instruction cache which must be manually flushed if/when we compile a new method. This commit updates syncInstructionCache to use GCC's builtin __clear_cache routine. 2011-01-31 22:39:59 +00:00			`syncInstructionCache(const void* start, unsigned size)`
added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00			`{`
implement syncInstructionCache for ARM Like, PowerPC, ARM has an instruction cache which must be manually flushed if/when we compile a new method. This commit updates syncInstructionCache to use GCC's builtin __clear_cache routine. 2011-01-31 22:39:59 +00:00			`__clear_cache`
			`(const_cast<void*>(start),`
			`const_cast<uint8_t>(static_cast<const uint8_t>(start) + size));`
added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00			`}`

fixed ARM interpreted-mode regression 2010-04-20 21:51:35 +00:00			`typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);`
			`#define __kernel_cmpxchg ((__kernel_cmpxchg_t )0xffff0fc0)`

			`inline bool`
			`atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_)`
			`{`
			`int r = __kernel_cmpxchg(static_cast<int>(old), static_cast<int>(new_), reinterpret_cast<int*>(p));`
			`return (!r ? true : false);`
			`}`

			`inline bool`
			`atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_)`
			`{`
			`return atomicCompareAndSwap32(reinterpret_cast<uint32_t*>(p), old, new_);`
			`}`

added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00			`inline uint64_t`
			`dynamicCall(void* function, uintptr_t* arguments, uint8_t* argumentTypes,`
fixed ARM interpreted-mode regression 2010-04-20 21:51:35 +00:00			`unsigned argumentCount, unsigned argumentsSize UNUSED,`
added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00			`unsigned returnType UNUSED)`
			`{`
			`const unsigned GprCount = 4;`
			`uintptr_t gprTable[GprCount];`
			`unsigned gprIndex = 0;`

ARM and UTF-8 work 2010-04-14 15:26:50 +00:00			`uintptr_t stack[(argumentCount * 8) / BytesPerWord]; // is > argumentSize to account for padding`
added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00			`unsigned stackIndex = 0;`

			`unsigned ai = 0;`
			`for (unsigned ati = 0; ati < argumentCount; ++ ati) {`
			`switch (argumentTypes[ati]) {`
			`case DOUBLE_TYPE:`
			`case INT64_TYPE: {`
ARM and UTF-8 work 2010-04-14 15:26:50 +00:00			`if (gprIndex + (8 / BytesPerWord) <= GprCount) { // pass argument on registers`
			`if (gprIndex & 1) { // 8-byte alignment`
			`memset(gprTable + gprIndex, 0, 4); // probably not necessary, but for good luck`
			`++gprIndex;`
			`}`
added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00			`memcpy(gprTable + gprIndex, arguments + ai, 8);`
			`gprIndex += 8 / BytesPerWord;`
ARM and UTF-8 work 2010-04-14 15:26:50 +00:00			`} else { // pass argument on stack`
update gprIndex when switching to stack-based argument passing This is necessary when passing a 64-bit value on 32-bit ARM since otherwise we risk using a register for the following argument instead of the stack. 2010-12-10 21:01:22 +00:00			`gprIndex = GprCount;`
ARM and UTF-8 work 2010-04-14 15:26:50 +00:00			`if (stackIndex & 1) { // 8-byte alignment`
			`memset(stack + stackIndex, 0, 4); // probably not necessary, but for good luck`
			`++stackIndex;`
			`}`
added ARM interpreted mode supported 2009-08-06 17:52:36 +00:00			`memcpy(stack + stackIndex, arguments + ai, 8);`
			`stackIndex += 8 / BytesPerWord;`
			`}`
			`ai += 8 / BytesPerWord;`
			`} break;`

			`default: {`
			`if (gprIndex < GprCount) {`
			`gprTable[gprIndex++] = arguments[ai];`
			`} else {`
			`stack[stackIndex++] = arguments[ai];`
			`}`
			`++ ai;`
			`} break;`
			`}`
			`}`

			`if (gprIndex < GprCount) { // pad since assembly loads all GPRs`
			`memset(gprTable + gprIndex, 0, (GprCount-gprIndex)*4);`
			`gprIndex = GprCount;`
			`}`

			`unsigned stackSize = stackIndex*BytesPerWord + ((stackIndex & 1) << 2);`
			`return vmNativeCall`
			`(function, stackSize, stack, stackIndex * BytesPerWord,`
			`(gprIndex ? gprTable : 0));`
			`}`

			`} // namespace vm`

			`#endif // ARM_H`