add support for the ARM hardware floating point ABI

This ABI's calling convention passes arguments and returns results
using VFP registers, so we need to tweak vmNativeCall to match it.

Thanks to Damjan Jovanovic for pointing this out and providing an
initial patch.
This commit is contained in:
Joel Dice 2012-07-31 16:36:01 +00:00
parent 836fc21106
commit a97c5728bb
2 changed files with 68 additions and 5 deletions

View File

@ -9,6 +9,8 @@
There is NO WARRANTY for this software. See license.txt for There is NO WARRANTY for this software. See license.txt for
details. */ details. */
#include "types.h"
.text .text
#define LOCAL(x) .L##x #define LOCAL(x) .L##x
@ -29,14 +31,18 @@ GLOBAL(vmNativeCall):
r2 : memoryTable r2 : memoryTable
r3 : memoryCount r3 : memoryCount
[sp, #0] -> r6 : gprTable [sp, #0] -> r6 : gprTable
[sp, #4] -> r7 : vfpTable
[sp, #8] -> r8 : returnType
*/ */
mov ip, sp // save stack frame mov ip, sp // save stack frame
stmfd sp!, {r4-r6, lr} // save clobbered non-volatile regs stmfd sp!, {r4-r8, lr} // save clobbered non-volatile regs
// mv args into non-volatile regs // mv args into non-volatile regs
mov r4, r0 mov r4, r0
mov r5, r1 mov r5, r1
ldr r6, [ip] ldr r6, [ip]
ldr r7, [ip, #4]
ldr r8, [ip, #8]
// setup stack arguments if necessary // setup stack arguments if necessary
sub sp, sp, r5 // allocate stack sub sp, sp, r5 // allocate stack
@ -51,11 +57,28 @@ LOCAL(loop):
// setup argument registers if necessary // setup argument registers if necessary
tst r6, r6 tst r6, r6
ldmneia r6, {r0-r3} ldmneia r6, {r0-r3}
#if defined(__VFP_FP__) && (! defined(__SOFTFP__))
// and VFP registers
vldmia r7, {d0-d8}
#endif
blx r4 // call function blx r4 // call function
add sp, sp, r5 // deallocate stack add sp, sp, r5 // deallocate stack
ldmfd sp!, {r4-r6, pc} // restore non-volatile regs and return #if defined(__VFP_FP__) && (! defined(__SOFTFP__))
cmp r8,#FLOAT_TYPE
bne LOCAL(double)
fmrs r0,s0
b LOCAL(exit)
LOCAL(double):
cmp r8,#DOUBLE_TYPE
bne LOCAL(exit)
fmrrd r0,r1,d0
#endif
LOCAL(exit):
ldmfd sp!, {r4-r8, pc} // restore non-volatile regs and return
.globl GLOBAL(vmJump) .globl GLOBAL(vmJump)
.align 2 .align 2

View File

@ -60,7 +60,8 @@
extern "C" uint64_t extern "C" uint64_t
vmNativeCall(void* function, unsigned stackTotal, void* memoryTable, vmNativeCall(void* function, unsigned stackTotal, void* memoryTable,
unsigned memoryCount, void* gprTable); unsigned memoryCount, void* gprTable, void* vfpTable,
unsigned returnType);
namespace vm { namespace vm {
@ -131,7 +132,7 @@ atomicCompareAndSwap(uintptr_t* p, uintptr_t old, uintptr_t new_)
inline uint64_t inline uint64_t
dynamicCall(void* function, uintptr_t* arguments, uint8_t* argumentTypes, dynamicCall(void* function, uintptr_t* arguments, uint8_t* argumentTypes,
unsigned argumentCount, unsigned argumentsSize UNUSED, unsigned argumentCount, unsigned argumentsSize UNUSED,
unsigned returnType UNUSED) unsigned returnType)
{ {
#ifdef __APPLE__ #ifdef __APPLE__
const unsigned Alignment = 1; const unsigned Alignment = 1;
@ -143,6 +144,10 @@ dynamicCall(void* function, uintptr_t* arguments, uint8_t* argumentTypes,
uintptr_t gprTable[GprCount]; uintptr_t gprTable[GprCount];
unsigned gprIndex = 0; unsigned gprIndex = 0;
const unsigned VfpCount = 16;
uintptr_t vfpTable[VfpCount];
unsigned vfpIndex = 0;
uintptr_t stack[(argumentCount * 8) / BytesPerWord]; // is > argumentSize to account for padding uintptr_t stack[(argumentCount * 8) / BytesPerWord]; // is > argumentSize to account for padding
unsigned stackIndex = 0; unsigned stackIndex = 0;
@ -150,6 +155,36 @@ dynamicCall(void* function, uintptr_t* arguments, uint8_t* argumentTypes,
for (unsigned ati = 0; ati < argumentCount; ++ ati) { for (unsigned ati = 0; ati < argumentCount; ++ ati) {
switch (argumentTypes[ati]) { switch (argumentTypes[ati]) {
case DOUBLE_TYPE: case DOUBLE_TYPE:
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
{
if (vfpIndex + Alignment <= VfpCount) {
if (vfpIndex % Alignment) {
++ vfpIndex;
}
memcpy(vfpTable + vfpIndex, arguments + ai, 8);
vfpIndex += 8 / BytesPerWord;
} else {
vfpIndex = VfpCount;
if (stackIndex % Alignment) {
++ stackIndex;
}
memcpy(stack + stackIndex, arguments + ai, 8);
stackIndex += 8 / BytesPerWord;
}
ai += 8 / BytesPerWord;
} break;
case FLOAT_TYPE:
if (vfpIndex < VfpCount) {
vfpTable[vfpIndex++] = arguments[ai];
} else {
stack[stackIndex++] = arguments[ai];
}
++ ai;
break;
#endif
case INT64_TYPE: { case INT64_TYPE: {
if (gprIndex + Alignment <= GprCount) { // pass argument in register(s) if (gprIndex + Alignment <= GprCount) { // pass argument in register(s)
if (Alignment == 1 if (Alignment == 1
@ -193,11 +228,16 @@ dynamicCall(void* function, uintptr_t* arguments, uint8_t* argumentTypes,
memset(gprTable + gprIndex, 0, (GprCount-gprIndex)*4); memset(gprTable + gprIndex, 0, (GprCount-gprIndex)*4);
gprIndex = GprCount; gprIndex = GprCount;
} }
if (vfpIndex < VfpCount) {
memset(vfpTable + vfpIndex, 0, (VfpCount-vfpIndex)*4);
vfpIndex = VfpCount;
}
unsigned stackSize = stackIndex*BytesPerWord + ((stackIndex & 1) << 2); unsigned stackSize = stackIndex*BytesPerWord + ((stackIndex & 1) << 2);
return vmNativeCall return vmNativeCall
(function, stackSize, stack, stackIndex * BytesPerWord, (function, stackSize, stack, stackIndex * BytesPerWord,
(gprIndex ? gprTable : 0)); (gprIndex ? gprTable : 0),
(vfpIndex ? vfpTable : 0), returnType);
} }
} // namespace vm } // namespace vm