Merge pull request #383 from dicej/master

add ARM64 JIT support
This commit is contained in:
Joshua Warner 2014-12-30 15:56:26 -07:00
commit 7b1bdf127e
14 changed files with 2070 additions and 241 deletions

View File

@ -38,7 +38,7 @@ public class Classes {
public static native VMClass primitiveClass(char name); public static native VMClass primitiveClass(char name);
public static native void initialize(VMClass vmClass); public static native void initialize(VMClass vmClass);
public static native boolean isAssignableFrom(VMClass a, VMClass b); public static native boolean isAssignableFrom(VMClass a, VMClass b);
public static native VMClass getVMClass(Object o); public static native VMClass getVMClass(Object o);
@ -134,7 +134,7 @@ public class Classes {
array[i] = parseAnnotationValue(loader, pool, in); array[i] = parseAnnotationValue(loader, pool, in);
} }
return array; return array;
} }
default: throw new AssertionError(); default: throw new AssertionError();
} }
@ -207,7 +207,7 @@ public class Classes {
while (spec[end] != ';') ++ end; while (spec[end] != ';') ++ end;
++ end; ++ end;
break; break;
default: default:
++ end; ++ end;
} }
@ -295,9 +295,9 @@ public class Classes {
} }
Class c = loader.loadClass(name); Class c = loader.loadClass(name);
VMClass vmc = SystemClassLoader.vmClass(c); VMClass vmc = SystemClassLoader.vmClass(c);
Classes.link(vmc, loader); link(vmc, loader);
if (initialize) { if (initialize) {
Classes.initialize(vmc); initialize(vmc);
} }
return c; return c;
} }
@ -315,7 +315,7 @@ public class Classes {
} else { } else {
if (name.length() == 1) { if (name.length() == 1) {
return SystemClassLoader.getClass return SystemClassLoader.getClass
(Classes.primitiveClass(name.charAt(0))); (primitiveClass(name.charAt(0)));
} else { } else {
throw new ClassNotFoundException(name); throw new ClassNotFoundException(name);
} }
@ -378,7 +378,7 @@ public class Classes {
public static int findField(VMClass vmClass, String name) { public static int findField(VMClass vmClass, String name) {
if (vmClass.fieldTable != null) { if (vmClass.fieldTable != null) {
Classes.link(vmClass); link(vmClass);
for (int i = 0; i < vmClass.fieldTable.length; ++i) { for (int i = 0; i < vmClass.fieldTable.length; ++i) {
if (toString(vmClass.fieldTable[i].name).equals(name)) { if (toString(vmClass.fieldTable[i].name).equals(name)) {
@ -426,7 +426,7 @@ public class Classes {
{ {
VMMethod[] methodTable = vmClass.methodTable; VMMethod[] methodTable = vmClass.methodTable;
if (methodTable != null) { if (methodTable != null) {
Classes.link(vmClass); link(vmClass);
if (parameterTypes == null) { if (parameterTypes == null) {
parameterTypes = new Class[0]; parameterTypes = new Class[0];
@ -464,7 +464,7 @@ public class Classes {
Method[] array = new Method[countMethods(vmClass, publicOnly)]; Method[] array = new Method[countMethods(vmClass, publicOnly)];
VMMethod[] methodTable = vmClass.methodTable; VMMethod[] methodTable = vmClass.methodTable;
if (methodTable != null) { if (methodTable != null) {
Classes.link(vmClass); link(vmClass);
int ai = 0; int ai = 0;
for (int i = 0, j = declaredMethodCount(vmClass); i < j; ++i) { for (int i = 0, j = declaredMethodCount(vmClass); i < j; ++i) {
@ -498,7 +498,7 @@ public class Classes {
public static Field[] getFields(VMClass vmClass, boolean publicOnly) { public static Field[] getFields(VMClass vmClass, boolean publicOnly) {
Field[] array = new Field[countFields(vmClass, publicOnly)]; Field[] array = new Field[countFields(vmClass, publicOnly)];
if (vmClass.fieldTable != null) { if (vmClass.fieldTable != null) {
Classes.link(vmClass); link(vmClass);
int ai = 0; int ai = 0;
for (int i = 0; i < vmClass.fieldTable.length; ++i) { for (int i = 0; i < vmClass.fieldTable.length; ++i) {
@ -568,9 +568,9 @@ public class Classes {
return new ProtectionDomain(source, p); return new ProtectionDomain(source, p);
} }
public static native Method makeMethod(Class c, int slot); public static native Method makeMethod(Class c, int slot);
public static native Field makeField(Class c, int slot); public static native Field makeField(Class c, int slot);
private static native void acquireClassLock(); private static native void acquireClassLock();

View File

@ -28,8 +28,6 @@ namespace codegen {
class Assembler; class Assembler;
class RegisterFile;
class OperandMask { class OperandMask {
public: public:
uint8_t typeMask; uint8_t typeMask;

View File

@ -35,6 +35,7 @@ GLOBAL(vmNativeCall):
// allocate frame // allocate frame
stp x29, x30, [sp,#-64]! stp x29, x30, [sp,#-64]!
mov x29, sp
// save callee-saved register values so we can clobber them // save callee-saved register values so we can clobber them
stp x19, x20, [sp,#16] stp x19, x20, [sp,#16]
@ -118,6 +119,7 @@ GLOBAL(vmRun):
// allocate frame // allocate frame
stp x29, x30, [sp,#-96]! stp x29, x30, [sp,#-96]!
mov x29, sp
// save callee-saved register values // save callee-saved register values
stp x19, x20, [sp,#16] stp x19, x20, [sp,#16]

View File

@ -123,7 +123,7 @@ class Site {
virtual RegisterMask registerMask(Context*) virtual RegisterMask registerMask(Context*)
{ {
return 0; return RegisterMask(0);
} }
virtual bool isVolatile(Context*) virtual bool isVolatile(Context*)

View File

@ -4,5 +4,6 @@ add_library(avian_codegen_arm
context.cpp context.cpp
fixup.cpp fixup.cpp
multimethod.cpp multimethod.cpp
operations.cpp operations32.cpp
operations64.cpp
) )

View File

@ -39,7 +39,7 @@ namespace isa {
bool vfpSupported() bool vfpSupported()
{ {
// TODO: Use at runtime detection // TODO: Use at runtime detection
#if defined(__ARM_PCS_VFP) #if (defined __ARM_PCS_VFP) || (defined ARCH_arm64)
// armhf // armhf
return true; return true;
#else #else
@ -55,9 +55,9 @@ bool vfpSupported()
const RegisterFile MyRegisterFileWithoutFloats(GPR_MASK, 0); const RegisterFile MyRegisterFileWithoutFloats(GPR_MASK, 0);
const RegisterFile MyRegisterFileWithFloats(GPR_MASK, FPR_MASK); const RegisterFile MyRegisterFileWithFloats(GPR_MASK, FPR_MASK);
const unsigned FrameHeaderSize = 1; const unsigned FrameHeaderSize = TargetBytesPerWord / 4;
const unsigned StackAlignmentInBytes = 8; const unsigned StackAlignmentInBytes = TargetBytesPerWord * 2;
const unsigned StackAlignmentInWords = StackAlignmentInBytes const unsigned StackAlignmentInWords = StackAlignmentInBytes
/ TargetBytesPerWord; / TargetBytesPerWord;
@ -89,11 +89,11 @@ void nextFrame(ArchitectureContext* con,
void** stack) void** stack)
{ {
assertT(con, *ip >= start); assertT(con, *ip >= start);
assertT(con, *ip <= start + (size / TargetBytesPerWord)); assertT(con, *ip <= start + (size / 4));
uint32_t* instruction = static_cast<uint32_t*>(*ip); uint32_t* instruction = static_cast<uint32_t*>(*ip);
if ((*start >> 20) == 0xe59) { if ((*start >> 20) == (TargetBytesPerWord == 8 ? 0xf94 : 0xe59)) {
// skip stack overflow check // skip stack overflow check
start += 3; start += 3;
} }
@ -111,7 +111,8 @@ void nextFrame(ArchitectureContext* con,
return; return;
} }
if (*instruction == 0xe12fff1e) { // return if (*instruction == (TargetBytesPerWord == 8 ? 0xd61f03c0 : 0xe12fff1e)) {
// return
*ip = link; *ip = link;
return; return;
} }
@ -124,7 +125,20 @@ void nextFrame(ArchitectureContext* con,
// check for post-non-tail-call stack adjustment of the form "sub // check for post-non-tail-call stack adjustment of the form "sub
// sp, sp, #offset": // sp, sp, #offset":
if ((*instruction >> 12) == 0xe24dd) { if (TargetBytesPerWord == 8 and (*instruction & 0xff0003ff) == 0xd10003ff) {
unsigned value = (*instruction >> 10) & 0xfff;
unsigned shift = (*instruction >> 22) & 1;
switch (shift) {
case 0:
offset -= value / TargetBytesPerWord;
break;
case 1:
offset -= (value << 12) / TargetBytesPerWord;
break;
default:
abort(con);
}
} else if (TargetBytesPerWord == 4 and (*instruction >> 12) == 0xe24dd) {
unsigned value = *instruction & 0xff; unsigned value = *instruction & 0xff;
unsigned rotation = (*instruction >> 8) & 0xf; unsigned rotation = (*instruction >> 8) & 0xf;
switch (rotation) { switch (rotation) {
@ -218,6 +232,7 @@ class MyArchitecture : public Architecture {
{ {
switch (register_.index()) { switch (register_.index()) {
case LinkRegister.index(): case LinkRegister.index():
case FrameRegister.index():
case StackRegister.index(): case StackRegister.index():
case ThreadRegister.index(): case ThreadRegister.index():
case ProgramCounter.index(): case ProgramCounter.index():
@ -258,7 +273,7 @@ class MyArchitecture : public Architecture {
virtual unsigned argumentRegisterCount() virtual unsigned argumentRegisterCount()
{ {
return 4; return TargetBytesPerWord;
} }
virtual Register argumentRegister(unsigned index) virtual Register argumentRegister(unsigned index)
@ -306,8 +321,13 @@ class MyArchitecture : public Architecture {
case lir::AlignedLongCall: case lir::AlignedLongCall:
case lir::AlignedLongJump: { case lir::AlignedLongJump: {
uint32_t* p = static_cast<uint32_t*>(returnAddress) - 2; uint32_t* p = static_cast<uint32_t*>(returnAddress) - 2;
*reinterpret_cast<void**>(p + (((*p & PoolOffsetMask) + 8) / 4)) if (TargetBytesPerWord == 8) {
= newTarget; const int32_t mask = (PoolOffsetMask >> 2) << 5;
*reinterpret_cast<void**>(p + ((*p & mask) >> 5)) = newTarget;
} else {
*reinterpret_cast<void**>(p + (((*p & PoolOffsetMask) + 8) / 4))
= newTarget;
}
} break; } break;
default: default:
@ -434,11 +454,11 @@ class MyArchitecture : public Architecture {
break; break;
case lir::Float2Int: case lir::Float2Int:
// todo: Java requires different semantics than SSE for // todo: Java requires different semantics than VFP for
// converting floats to integers, we we need to either use // converting floats to integers, we we need to either use
// thunks or produce inline machine code which handles edge // thunks or produce inline machine code which handles edge
// cases properly. // cases properly.
if (false && vfpSupported() && bSize == 4) { if (false && vfpSupported() && bSize <= TargetBytesPerWord) {
aMask.typeMask = lir::Operand::RegisterPairMask; aMask.typeMask = lir::Operand::RegisterPairMask;
aMask.setLowHighRegisterMasks(FPR_MASK, FPR_MASK); aMask.setLowHighRegisterMasks(FPR_MASK, FPR_MASK);
} else { } else {
@ -447,7 +467,7 @@ class MyArchitecture : public Architecture {
break; break;
case lir::Int2Float: case lir::Int2Float:
if (vfpSupported() && aSize == 4) { if (vfpSupported() && aSize <= TargetBytesPerWord) {
aMask.typeMask = lir::Operand::RegisterPairMask; aMask.typeMask = lir::Operand::RegisterPairMask;
aMask.setLowHighRegisterMasks(GPR_MASK, GPR_MASK); aMask.setLowHighRegisterMasks(GPR_MASK, GPR_MASK);
} else { } else {
@ -544,7 +564,7 @@ class MyArchitecture : public Architecture {
case lir::ShiftLeft: case lir::ShiftLeft:
case lir::ShiftRight: case lir::ShiftRight:
case lir::UnsignedShiftRight: case lir::UnsignedShiftRight:
if (bSize == 8) if (bSize > TargetBytesPerWord)
aMask.typeMask = bMask.typeMask = lir::Operand::RegisterPairMask; aMask.typeMask = bMask.typeMask = lir::Operand::RegisterPairMask;
break; break;
@ -556,6 +576,11 @@ class MyArchitecture : public Architecture {
aMask.typeMask = bMask.typeMask = lir::Operand::RegisterPairMask; aMask.typeMask = bMask.typeMask = lir::Operand::RegisterPairMask;
break; break;
// todo: Although ARM has instructions for integer division and
// remainder, they don't trap on division by zero, which is why
// we use thunks. Alternatively, we could generate inline code
// with an explicit zero check, which would probably be a bit
// faster.
case lir::Divide: case lir::Divide:
case lir::Remainder: case lir::Remainder:
case lir::FloatRemainder: case lir::FloatRemainder:
@ -567,7 +592,7 @@ class MyArchitecture : public Architecture {
case lir::FloatMultiply: case lir::FloatMultiply:
case lir::FloatDivide: case lir::FloatDivide:
if (vfpSupported()) { if (vfpSupported()) {
bMask.typeMask = lir::Operand::RegisterPairMask; aMask.typeMask = lir::Operand::RegisterPairMask;
aMask.setLowHighRegisterMasks(FPR_MASK, FPR_MASK); aMask.setLowHighRegisterMasks(FPR_MASK, FPR_MASK);
bMask = aMask; bMask = aMask;
} else { } else {
@ -745,19 +770,45 @@ class MyAssembler : public Assembler {
// how to handle them: // how to handle them:
assertT(&con, footprint < 256); assertT(&con, footprint < 256);
lir::RegisterPair stack(StackRegister); // todo: the ARM ABI says the frame preamble should be of the form
ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); //
lir::Constant footprintConstant(&footprintPromise); // stp x29, x30, [sp,#-footprint]!
subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); // mov x29, sp
//
// and the frame should be popped with e.g.
//
// ldp x29, x30, [sp],#footprint
// br x30
//
// However, that will invalidate a lot of assumptions elsewhere
// about the return address being stored at the opposite end of
// the frame, so lots of other code will need to change before we
// can do that. The code below can be enabled as a starting point
// when we're ready to tackle that.
if (false and TargetBytesPerWord == 8) {
// stp x29, x30, [sp,#-footprint]!
con.code.append4(0xa9800000 | ((-footprint & 0x7f) << 15)
| (StackRegister.index() << 5)
| (LinkRegister.index() << 10) | FrameRegister.index());
lir::RegisterPair returnAddress(LinkRegister); lir::RegisterPair stack(StackRegister);
lir::Memory returnAddressDst(StackRegister, lir::RegisterPair frame(FrameRegister);
(footprint - 1) * TargetBytesPerWord); moveRR(&con, TargetBytesPerWord, &stack, TargetBytesPerWord, &frame);
moveRM(&con, } else {
TargetBytesPerWord, lir::RegisterPair stack(StackRegister);
&returnAddress, ResolvedPromise footprintPromise(footprint * TargetBytesPerWord);
TargetBytesPerWord, lir::Constant footprintConstant(&footprintPromise);
&returnAddressDst); subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack);
lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressDst(StackRegister,
(footprint - 1) * TargetBytesPerWord);
moveRM(&con,
TargetBytesPerWord,
&returnAddress,
TargetBytesPerWord,
&returnAddressDst);
}
} }
virtual void adjustFrame(unsigned difference) virtual void adjustFrame(unsigned difference)
@ -772,19 +823,26 @@ class MyAssembler : public Assembler {
{ {
footprint += FrameHeaderSize; footprint += FrameHeaderSize;
lir::RegisterPair returnAddress(LinkRegister); // see comment regarding the ARM64 ABI in allocateFrame
lir::Memory returnAddressSrc(StackRegister, if (false and TargetBytesPerWord == 8) {
(footprint - 1) * TargetBytesPerWord); // ldp x29, x30, [sp],#footprint
moveMR(&con, con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10)
TargetBytesPerWord, | 29);
&returnAddressSrc, } else {
TargetBytesPerWord, lir::RegisterPair returnAddress(LinkRegister);
&returnAddress); lir::Memory returnAddressSrc(StackRegister,
(footprint - 1) * TargetBytesPerWord);
moveMR(&con,
TargetBytesPerWord,
&returnAddressSrc,
TargetBytesPerWord,
&returnAddress);
lir::RegisterPair stack(StackRegister); lir::RegisterPair stack(StackRegister);
ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); ResolvedPromise footprintPromise(footprint * TargetBytesPerWord);
lir::Constant footprintConstant(&footprintPromise); lir::Constant footprintConstant(&footprintPromise);
addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack);
}
} }
virtual void popFrameForTailCall(unsigned footprint, virtual void popFrameForTailCall(unsigned footprint,
@ -851,10 +909,26 @@ class MyAssembler : public Assembler {
return_(&con); return_(&con);
} }
virtual void popFrameAndUpdateStackAndReturn(unsigned frameFootprint, virtual void popFrameAndUpdateStackAndReturn(unsigned footprint,
unsigned stackOffsetFromThread) unsigned stackOffsetFromThread)
{ {
popFrame(frameFootprint); footprint += FrameHeaderSize;
// see comment regarding the ARM64 ABI in allocateFrame
if (false and TargetBytesPerWord == 8) {
// ldp x29, x30, [sp],#footprint
con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10)
| 29);
} else {
lir::RegisterPair returnAddress(LinkRegister);
lir::Memory returnAddressSrc(StackRegister,
(footprint - 1) * TargetBytesPerWord);
moveMR(&con,
TargetBytesPerWord,
&returnAddressSrc,
TargetBytesPerWord,
&returnAddress);
}
lir::RegisterPair stack(StackRegister); lir::RegisterPair stack(StackRegister);
lir::Memory newStackSrc(ThreadRegister, stackOffsetFromThread); lir::Memory newStackSrc(ThreadRegister, stackOffsetFromThread);
@ -946,17 +1020,28 @@ class MyAssembler : public Assembler {
unsigned instruction = o->block->start + padding(o->block, o->offset) unsigned instruction = o->block->start + padding(o->block, o->offset)
+ o->offset; + o->offset;
int32_t v = (entry - 8) - instruction;
expect(&con, v == (v & PoolOffsetMask));
int32_t* p = reinterpret_cast<int32_t*>(dst + instruction); int32_t* p = reinterpret_cast<int32_t*>(dst + instruction);
*p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p);
if (TargetBytesPerWord == 8) {
int32_t v = entry - instruction;
expect(&con, v == (v & PoolOffsetMask));
const int32_t mask = (PoolOffsetMask >> 2) << 5;
*p = (((v >> 2) << 5) & mask) | ((~mask) & *p);
} else {
int32_t v = (entry - 8) - instruction;
expect(&con, v == (v & PoolOffsetMask));
*p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p);
}
poolSize += TargetBytesPerWord; poolSize += TargetBytesPerWord;
} }
bool jump = needJump(b); bool jump = needJump(b);
if (jump) { if (jump) {
expect(&con, TargetBytesPerWord == 4);
write4(dst + dstOffset, write4(dst + dstOffset,
isa::b((poolSize + TargetBytesPerWord - 8) >> 2)); isa::b((poolSize + TargetBytesPerWord - 8) >> 2));
} }

View File

@ -12,6 +12,12 @@
#include "fixup.h" #include "fixup.h"
#include "block.h" #include "block.h"
namespace {
const unsigned InstructionSize = 4;
} // namespace
namespace avian { namespace avian {
namespace codegen { namespace codegen {
namespace arm { namespace arm {
@ -38,8 +44,7 @@ int64_t OffsetPromise::value()
assertT(con, resolved()); assertT(con, resolved());
unsigned o = offset - block->offset; unsigned o = offset - block->offset;
return block->start return block->start + padding(block, forTrace ? o - InstructionSize : o) + o;
+ padding(block, forTrace ? o - vm::TargetBytesPerWord : o) + o;
} }
Promise* offsetPromise(Context* con, bool forTrace) Promise* offsetPromise(Context* con, bool forTrace)
@ -92,17 +97,30 @@ bool bounded(int right, int left, int32_t v)
void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value)
{ {
// ARM's PC is two words ahead, and branches drop the bottom 2 bits.
int32_t v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
int32_t mask;
expect(s, bounded(0, 8, v));
mask = 0xFFFFFF;
int32_t* p = reinterpret_cast<int32_t*>(instruction); int32_t* p = reinterpret_cast<int32_t*>(instruction);
int32_t v;
int32_t mask;
if (vm::TargetBytesPerWord == 8) {
if ((*p >> 24) == 0x54) {
// conditional branch
v = ((reinterpret_cast<uint8_t*>(value) - instruction) >> 2) << 5;
mask = 0xFFFFE0;
} else {
// unconditional branch
v = (reinterpret_cast<uint8_t*>(value) - instruction) >> 2;
mask = 0x3FFFFFF;
}
} else {
v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
mask = 0xFFFFFF;
}
expect(s, bounded(0, 8, v));
*p = (v & mask) | ((~mask) & *p); *p = (v & mask) | ((~mask) & *p);
return instruction + 4; return instruction + InstructionSize;
} }
ConstantPoolEntry::ConstantPoolEntry(Context* con, ConstantPoolEntry::ConstantPoolEntry(Context* con,
@ -214,6 +232,101 @@ void appendPoolEvent(Context* con,
b->poolEventTail = e; b->poolEventTail = e;
} }
bool needJump(MyBlock* b)
{
return b->next or b->size != (b->size & PoolOffsetMask);
}
unsigned padding(MyBlock* b, unsigned offset)
{
unsigned total = 0;
for (PoolEvent* e = b->poolEventHead; e; e = e->next) {
if (e->offset <= offset) {
if (needJump(b)) {
total += vm::TargetBytesPerWord;
}
for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) {
total += vm::TargetBytesPerWord;
}
} else {
break;
}
}
return total;
}
void resolve(MyBlock* b)
{
Context* con = b->context;
if (b->poolOffsetHead) {
if (con->poolOffsetTail) {
con->poolOffsetTail->next = b->poolOffsetHead;
} else {
con->poolOffsetHead = b->poolOffsetHead;
}
con->poolOffsetTail = b->poolOffsetTail;
}
if (con->poolOffsetHead) {
bool append;
if (b->next == 0 or b->next->poolEventHead) {
append = true;
} else {
int32_t v
= (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8)
- (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
append = (v != (v & PoolOffsetMask));
if (DebugPool) {
fprintf(stderr,
"current %p %d %d next %p %d %d\n",
b,
b->start,
b->size,
b->next,
b->start + b->size,
b->next->size);
fprintf(stderr,
"offset %p %d is of distance %d to next block; append? %d\n",
con->poolOffsetHead,
con->poolOffsetHead->offset,
v,
append);
}
}
if (append) {
#ifndef NDEBUG
int32_t v
= (b->start + b->size - 8)
- (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
expect(con, v == (v & PoolOffsetMask));
#endif // not NDEBUG
appendPoolEvent(
con, b, b->size, con->poolOffsetHead, con->poolOffsetTail);
if (DebugPool) {
for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) {
fprintf(stderr,
"include %p %d in pool event %p at offset %d in block %p\n",
o,
o->offset,
b->poolEventTail,
b->size,
b);
}
}
con->poolOffsetHead = 0;
con->poolOffsetTail = 0;
}
}
}
} // namespace arm } // namespace arm
} // namespace codegen } // namespace codegen
} // namespace avian } // namespace avian

View File

@ -27,7 +27,7 @@ namespace arm {
const bool DebugPool = false; const bool DebugPool = false;
const int32_t PoolOffsetMask = 0xFFF; const int32_t PoolOffsetMask = vm::TargetBytesPerWord == 8 ? 0x1FFFFF : 0xFFF;
class Task { class Task {
public: public:

View File

@ -15,6 +15,8 @@
#include "fixup.h" #include "fixup.h"
#include "multimethod.h" #include "multimethod.h"
#if TARGET_BYTES_PER_WORD == 4
namespace avian { namespace avian {
namespace codegen { namespace codegen {
namespace arm { namespace arm {
@ -179,101 +181,6 @@ void unsignedShiftRightC(Context* con,
} }
} }
bool needJump(MyBlock* b)
{
return b->next or b->size != (b->size & PoolOffsetMask);
}
unsigned padding(MyBlock* b, unsigned offset)
{
unsigned total = 0;
for (PoolEvent* e = b->poolEventHead; e; e = e->next) {
if (e->offset <= offset) {
if (needJump(b)) {
total += vm::TargetBytesPerWord;
}
for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) {
total += vm::TargetBytesPerWord;
}
} else {
break;
}
}
return total;
}
void resolve(MyBlock* b)
{
Context* con = b->context;
if (b->poolOffsetHead) {
if (con->poolOffsetTail) {
con->poolOffsetTail->next = b->poolOffsetHead;
} else {
con->poolOffsetHead = b->poolOffsetHead;
}
con->poolOffsetTail = b->poolOffsetTail;
}
if (con->poolOffsetHead) {
bool append;
if (b->next == 0 or b->next->poolEventHead) {
append = true;
} else {
int32_t v
= (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8)
- (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
append = (v != (v & PoolOffsetMask));
if (DebugPool) {
fprintf(stderr,
"current %p %d %d next %p %d %d\n",
b,
b->start,
b->size,
b->next,
b->start + b->size,
b->next->size);
fprintf(stderr,
"offset %p %d is of distance %d to next block; append? %d\n",
con->poolOffsetHead,
con->poolOffsetHead->offset,
v,
append);
}
}
if (append) {
#ifndef NDEBUG
int32_t v
= (b->start + b->size - 8)
- (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
expect(con, v == (v & PoolOffsetMask));
#endif // not NDEBUG
appendPoolEvent(
con, b, b->size, con->poolOffsetHead, con->poolOffsetTail);
if (DebugPool) {
for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) {
fprintf(stderr,
"include %p %d in pool event %p at offset %d in block %p\n",
o,
o->offset,
b->poolEventTail,
b->size,
b);
}
}
con->poolOffsetHead = 0;
con->poolOffsetTail = 0;
}
}
}
void jumpR(Context* con, unsigned size UNUSED, lir::RegisterPair* target) void jumpR(Context* con, unsigned size UNUSED, lir::RegisterPair* target)
{ {
assertT(con, size == vm::TargetBytesPerWord); assertT(con, size == vm::TargetBytesPerWord);
@ -410,7 +317,8 @@ void moveCR2(Context* con,
lir::RegisterPair dstHi(dst->high); lir::RegisterPair dstHi(dst->high);
moveCR(con, 4, &srcLo, 4, dst); moveCR(con, 4, &srcLo, 4, dst);
moveCR(con, 4, &srcHi, 4, &dstHi); moveCR(con, 4, &srcHi, 4, &dstHi);
} else if (src->value->resolved() and isOfWidth(getValue(src), 8)) { } else if (callOffset == 0 and src->value->resolved()
and isOfWidth(getValue(src), 8)) {
emit(con, movi(dst->low, lo8(getValue(src)))); // fits in immediate emit(con, movi(dst->low, lo8(getValue(src)))); // fits in immediate
} else { } else {
appendConstantPoolEntry(con, src->value, callOffset); appendConstantPoolEntry(con, src->value, callOffset);
@ -510,9 +418,9 @@ void multiplyR(Context* con,
if (size == 8) { if (size == 8) {
bool useTemporaries = b->low == t->low; bool useTemporaries = b->low == t->low;
Register tmpLow = useTemporaries ? con->client->acquireTemporary(GPR_MASK) Register tmpLow = useTemporaries ? con->client->acquireTemporary(GPR_MASK)
: t->low; : t->low;
Register tmpHigh = useTemporaries ? con->client->acquireTemporary(GPR_MASK) Register tmpHigh = useTemporaries ? con->client->acquireTemporary(GPR_MASK)
: t->high; : t->high;
emit(con, umull(tmpLow, tmpHigh, a->low, b->low)); emit(con, umull(tmpLow, tmpHigh, a->low, b->low));
emit(con, mla(tmpHigh, a->low, b->high, tmpHigh)); emit(con, mla(tmpHigh, a->low, b->high, tmpHigh));
@ -665,11 +573,11 @@ void floatDivideR(Context* con,
} }
Register normalize(Context* con, Register normalize(Context* con,
int offset, int offset,
Register index, Register index,
unsigned scale, unsigned scale,
bool* preserveIndex, bool* preserveIndex,
bool* release) bool* release)
{ {
if (offset != 0 or scale != 1) { if (offset != 0 or scale != 1) {
lir::RegisterPair normalizedIndex( lir::RegisterPair normalizedIndex(
@ -947,26 +855,8 @@ void load(Context* con,
case 8: { case 8: {
if (dstSize == 8) { if (dstSize == 8) {
lir::RegisterPair dstHigh(dst->high); lir::RegisterPair dstHigh(dst->high);
load(con, load(con, 4, base, offset, NoRegister, 1, 4, &dstHigh, false, false);
4, load(con, 4, base, offset + 4, NoRegister, 1, 4, dst, false, false);
base,
offset,
NoRegister,
1,
4,
&dstHigh,
false,
false);
load(con,
4,
base,
offset + 4,
NoRegister,
1,
4,
dst,
false,
false);
} else { } else {
emit(con, ldri(dst->low, base, offset)); emit(con, ldri(dst->low, base, offset));
} }
@ -1496,15 +1386,26 @@ void longCallC(Context* con, unsigned size UNUSED, lir::Constant* target)
callR(con, vm::TargetBytesPerWord, &tmp); callR(con, vm::TargetBytesPerWord, &tmp);
} }
void alignedLongCallC(Context* con, unsigned size, lir::Constant* target)
{
longCallC(con, size, target);
}
void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target) void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
{ {
assertT(con, size == vm::TargetBytesPerWord); assertT(con, size == vm::TargetBytesPerWord);
lir::RegisterPair tmp(Register(4)); // a non-arg reg that we don't mind clobbering lir::RegisterPair tmp(
Register(4)); // a non-arg reg that we don't mind clobbering
moveCR2(con, vm::TargetBytesPerWord, target, &tmp, offsetPromise(con)); moveCR2(con, vm::TargetBytesPerWord, target, &tmp, offsetPromise(con));
jumpR(con, vm::TargetBytesPerWord, &tmp); jumpR(con, vm::TargetBytesPerWord, &tmp);
} }
void alignedLongJumpC(Context* con, unsigned size, lir::Constant* target)
{
longJumpC(con, size, target);
}
void jumpC(Context* con, unsigned size UNUSED, lir::Constant* target) void jumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
{ {
assertT(con, size == vm::TargetBytesPerWord); assertT(con, size == vm::TargetBytesPerWord);
@ -1554,3 +1455,5 @@ void storeLoadBarrier(Context* con)
} // namespace arm } // namespace arm
} // namespace codegen } // namespace codegen
} // namespace avian } // namespace avian
#endif // TARGET_BYTES_PER_WORD == 4

File diff suppressed because it is too large Load Diff

View File

@ -14,6 +14,8 @@
#include <avian/codegen/lir.h> #include <avian/codegen/lir.h>
#include <avian/codegen/assembler.h> #include <avian/codegen/assembler.h>
#include "avian/environment.h"
namespace avian { namespace avian {
namespace codegen { namespace codegen {
namespace arm { namespace arm {
@ -21,16 +23,30 @@ namespace arm {
const uint64_t MASK_LO32 = 0xffffffff; const uint64_t MASK_LO32 = 0xffffffff;
const unsigned MASK_LO8 = 0xff; const unsigned MASK_LO8 = 0xff;
#if TARGET_BYTES_PER_WORD == 8
constexpr Register ThreadRegister(19);
constexpr Register StackRegister(31);
constexpr Register LinkRegister(30);
constexpr Register FrameRegister(29);
constexpr Register ProgramCounter(0xFE); // i.e. unaddressable
const int N_GPRS = 32;
const int N_FPRS = 32;
const RegisterMask GPR_MASK = 0xffffffff;
const RegisterMask FPR_MASK = 0xffffffff00000000;
#else
constexpr Register ThreadRegister(8);
constexpr Register StackRegister(13);
constexpr Register LinkRegister(14);
constexpr Register FrameRegister(0xFE); // i.e. there is none
constexpr Register ProgramCounter(15);
const int N_GPRS = 16; const int N_GPRS = 16;
const int N_FPRS = 16; const int N_FPRS = 16;
const RegisterMask GPR_MASK = 0xffff; const RegisterMask GPR_MASK = 0xffff;
const RegisterMask FPR_MASK = 0xffff0000; const RegisterMask FPR_MASK = 0xffff0000;
inline bool isFpr(lir::RegisterPair* reg)
{
return reg->low.index() >= N_GPRS;
}
inline int fpr64(Register reg) inline int fpr64(Register reg)
{ {
return reg.index() - N_GPRS; return reg.index() - N_GPRS;
@ -47,19 +63,13 @@ inline int fpr32(lir::RegisterPair* reg)
{ {
return fpr64(reg) << 1; return fpr64(reg) << 1;
} }
#ifdef ARCH_arm64
constexpr Register ThreadRegister(19);
constexpr Register StackRegister(31);
constexpr Register LinkRegister(30);
constexpr Register ProgramCounter(0xFE); // i.e. unaddressable
#else
constexpr Register ThreadRegister(8);
constexpr Register StackRegister(13);
constexpr Register LinkRegister(14);
constexpr Register ProgramCounter(15);
#endif #endif
inline bool isFpr(lir::RegisterPair* reg)
{
return reg->low.index() >= N_GPRS;
}
} // namespace arm } // namespace arm
} // namespace codegen } // namespace codegen
} // namespace avian } // namespace avian

View File

@ -16,11 +16,11 @@
#define BYTES_PER_WORD 4 #define BYTES_PER_WORD 4
#define LOCAL(x) .L##x #define LOCAL(x) .L##x
#ifdef __APPLE__ #ifdef __APPLE__
# define GLOBAL(x) _##x # define GLOBAL(x) _##x
#else #else
# define GLOBAL(x) x # define GLOBAL(x) x
#endif #endif
#define CONTINUATION_NEXT 4 #define CONTINUATION_NEXT 4
@ -29,7 +29,7 @@
#define CONTINUATION_FRAME_POINTER_OFFSET 24 #define CONTINUATION_FRAME_POINTER_OFFSET 24
#define CONTINUATION_LENGTH 28 #define CONTINUATION_LENGTH 28
#define CONTINUATION_BODY 32 #define CONTINUATION_BODY 32
.globl GLOBAL(vmInvoke) .globl GLOBAL(vmInvoke)
.align 2 .align 2
GLOBAL(vmInvoke): GLOBAL(vmInvoke):
@ -56,7 +56,7 @@ GLOBAL(vmInvoke):
eor r4, sp, r3 eor r4, sp, r3
tst r4, #4 tst r4, #4
subne sp, sp, #4 subne sp, sp, #4
// copy arguments into place // copy arguments into place
sub sp, r3 sub sp, r3
mov r4, #0 mov r4, #0
@ -87,7 +87,7 @@ LOCAL(vmInvoke_argumentTest):
GLOBAL(vmInvoke_returnAddress): GLOBAL(vmInvoke_returnAddress):
// restore stack pointer // restore stack pointer
ldr sp, [r8, #TARGET_THREAD_SCRATCH] ldr sp, [r8, #TARGET_THREAD_SCRATCH]
// clear MyThread::stack to avoid confusing another thread calling // clear MyThread::stack to avoid confusing another thread calling
// java.lang.Thread.getStackTrace on this one. See // java.lang.Thread.getStackTrace on this one. See
// MyProcess::getStackTrace in compile.cpp for details on how we get // MyProcess::getStackTrace in compile.cpp for details on how we get
@ -109,7 +109,7 @@ GLOBAL(vmInvoke_safeStack):
ldr r6,[r5,#CONTINUATION_LENGTH] ldr r6,[r5,#CONTINUATION_LENGTH]
lsl r6,r6,#2 lsl r6,r6,#2
neg r7,r6 neg r7,r6
add r7,r7,#-80 add r7,r7,#-80 // 80 bytes for callee-saved register values
mov r4,sp mov r4,sp
str r4,[sp,r7]! str r4,[sp,r7]!
@ -167,10 +167,10 @@ LOCAL(vmInvoke_handleException):
bx r7 bx r7
LOCAL(vmInvoke_exit): LOCAL(vmInvoke_exit):
#endif // AVIAN_CONTINUATIONS
mov ip, #0 mov ip, #0
str ip, [r8, #TARGET_THREAD_STACK] str ip, [r8, #TARGET_THREAD_STACK]
#endif // AVIAN_CONTINUATIONS
// restore return type // restore return type
ldr ip, [sp], #4 ldr ip, [sp], #4
@ -201,7 +201,7 @@ GLOBAL(vmJumpAndInvoke):
// which is not true in this case // which is not true in this case
sub r2,r2,r6 sub r2,r2,r6
sub r2,r2,#84 sub r2,r2,#84
mov r8,r0 mov r8,r0
// copy arguments into place // copy arguments into place
@ -220,7 +220,7 @@ LOCAL(vmJumpAndInvoke_argumentTest):
// the arguments have been copied, so we can set the real stack // the arguments have been copied, so we can set the real stack
// pointer now // pointer now
mov sp,r2 mov sp,r2
// set return address to vmInvoke_returnAddress // set return address to vmInvoke_returnAddress
#ifdef __APPLE__ #ifdef __APPLE__
movw r11, :lower16:(GLOBAL(vmInvoke_returnAddress)-(LOCAL(vmJumpAndInvoke_getAddress)+8)) movw r11, :lower16:(GLOBAL(vmInvoke_returnAddress)-(LOCAL(vmJumpAndInvoke_getAddress)+8))
@ -246,7 +246,7 @@ LOCAL(vmInvoke_getAddress_word):
LOCAL(vmJumpAndInvoke_getAddress_word): LOCAL(vmJumpAndInvoke_getAddress_word):
.word _GLOBAL_OFFSET_TABLE_-(LOCAL(vmJumpAndInvoke_getAddress)+8) .word _GLOBAL_OFFSET_TABLE_-(LOCAL(vmJumpAndInvoke_getAddress)+8)
#endif // not __APPLE__ #endif // not __APPLE__
#else // not AVIAN_CONTINUATIONS #else // not AVIAN_CONTINUATIONS
// vmJumpAndInvoke should only be called when continuations are // vmJumpAndInvoke should only be called when continuations are
// enabled, so we force a crash if we reach here: // enabled, so we force a crash if we reach here:

View File

@ -13,23 +13,23 @@
.text .text
#define BYTES_PER_WORD 4 #define BYTES_PER_WORD 8
#define LOCAL(x) .L##x #define LOCAL(x) .L##x
#ifdef __APPLE__ #ifdef __APPLE__
# define GLOBAL(x) _##x # define GLOBAL(x) _##x
#else #else
# define GLOBAL(x) x # define GLOBAL(x) x
#endif #endif
#define CONTINUATION_NEXT 4 #define CONTINUATION_NEXT 8
#define CONTINUATION_ADDRESS 16 #define CONTINUATION_ADDRESS 32
#define CONTINUATION_RETURN_ADDRESS_OFFSET 20 #define CONTINUATION_RETURN_ADDRESS_OFFSET 40
#define CONTINUATION_FRAME_POINTER_OFFSET 24 #define CONTINUATION_FRAME_POINTER_OFFSET 48
#define CONTINUATION_LENGTH 28 #define CONTINUATION_LENGTH 56
#define CONTINUATION_BODY 32 #define CONTINUATION_BODY 64
.globl GLOBAL(vmInvoke) .globl GLOBAL(vmInvoke)
.align 2 .align 2
GLOBAL(vmInvoke): GLOBAL(vmInvoke):
@ -43,6 +43,7 @@ GLOBAL(vmInvoke):
// allocate frame // allocate frame
stp x29, x30, [sp,#-96]! stp x29, x30, [sp,#-96]!
mov x29, sp
// save callee-saved register values // save callee-saved register values
stp x19, x20, [sp,#16] stp x19, x20, [sp,#16]
@ -59,7 +60,7 @@ GLOBAL(vmInvoke):
// copy arguments into place // copy arguments into place
sub sp, sp, w3, uxtw sub sp, sp, w3, uxtw
mov x5, #0 mov x4, #0
b LOCAL(vmInvoke_argumentTest) b LOCAL(vmInvoke_argumentTest)
LOCAL(vmInvoke_argumentLoop): LOCAL(vmInvoke_argumentLoop):
@ -89,22 +90,74 @@ GLOBAL(vmInvoke_returnAddress):
// MyProcess::getStackTrace in compile.cpp for details on how we get // MyProcess::getStackTrace in compile.cpp for details on how we get
// a reliable stack trace from a thread that might be interrupted at // a reliable stack trace from a thread that might be interrupted at
// any point in its execution. // any point in its execution.
mov x5, #0 str xzr, [x19, #TARGET_THREAD_STACK]
str x5, [x19, #TARGET_THREAD_STACK]
.globl GLOBAL(vmInvoke_safeStack) .globl GLOBAL(vmInvoke_safeStack)
.align 2 .align 2
GLOBAL(vmInvoke_safeStack): GLOBAL(vmInvoke_safeStack):
#ifdef AVIAN_CONTINUATIONS #ifdef AVIAN_CONTINUATIONS
#error todo // call the next continuation, if any
ldr x5, [x19,#TARGET_THREAD_CONTINUATION]
cmp x5, xzr
b.eq LOCAL(vmInvoke_exit)
ldr x6, [x5,#CONTINUATION_LENGTH]
lsl x6, x6, #3
neg x7, x6
add x7, x7, #-128 // 128 bytes for callee-saved register values
mov x4, sp
add sp, sp, x7
str x4, [sp]
add x7, x5, #CONTINUATION_BODY
mov x11, xzr
b LOCAL(vmInvoke_continuationTest)
LOCAL(vmInvoke_continuationLoop):
ldr x9, [x7,x11]
str x9, [sp,x11]
add x11, x11, #8
LOCAL(vmInvoke_continuationTest):
cmp x11, x6
b.le LOCAL(vmInvoke_continuationLoop)
ldr x7, [x5,#CONTINUATION_RETURN_ADDRESS_OFFSET]
adr x11, GLOBAL(vmInvoke_returnAddress)
str x11, [sp,x7]
ldr x7, [x5,#CONTINUATION_NEXT]
str x7, [x19,#TARGET_THREAD_CONTINUATION]
// call the continuation unless we're handling an exception
ldr x7, [x19,#TARGET_THREAD_EXCEPTION]
cmp x7, xzr
b.ne LOCAL(vmInvoke_handleException)
ldr x7, [x5,#CONTINUATION_ADDRESS]
br x7
LOCAL(vmInvoke_handleException):
// we're handling an exception - call the exception handler instead
str xzr, [x19,#TARGET_THREAD_EXCEPTION]
ldr x11, [x19,#TARGET_THREAD_EXCEPTIONSTACKADJUSTMENT]
ldr x9, [sp]
neg x11, x11
add sp, sp, x11
str x9, [sp]
ldr x11, [x19,#TARGET_THREAD_EXCEPTIONOFFSET]
str x7, [sp,x11]
ldr x7, [x19,#TARGET_THREAD_EXCEPTIONHANDLER]
br x7
LOCAL(vmInvoke_exit):
str xzr, [x19, #TARGET_THREAD_STACK]
#endif // AVIAN_CONTINUATIONS #endif // AVIAN_CONTINUATIONS
mov x5, #0
str x5, [x19, #TARGET_THREAD_STACK]
// restore return type // restore return type
ldr w5, [sp], #4 ldr w5, [sp],#16
// restore callee-saved register values // restore callee-saved register values
ldp x19, x20, [sp,#16] ldp x19, x20, [sp,#16]
@ -121,7 +174,44 @@ LOCAL(vmInvoke_return):
.align 2 .align 2
GLOBAL(vmJumpAndInvoke): GLOBAL(vmJumpAndInvoke):
#ifdef AVIAN_CONTINUATIONS #ifdef AVIAN_CONTINUATIONS
#error todo // x0: thread
// x1: address
// x2: stack
// x3: argumentFootprint
// x4: arguments
// x5: frameSize
// allocate new frame, adding room for callee-saved registers, plus
// 8 bytes of padding since the calculation of frameSize assumes 8
// bytes have already been allocated to save the return address,
// which is not true in this case
sub x2, x2, x5
sub x2, x2, #136
mov x19, x0
// copy arguments into place
mov x6, xzr
b LOCAL(vmJumpAndInvoke_argumentTest)
LOCAL(vmJumpAndInvoke_argumentLoop):
ldr x12, [x4,x6]
str x12, [x2,x6]
add x6, x6, #4
LOCAL(vmJumpAndInvoke_argumentTest):
cmp x6, x3
ble LOCAL(vmJumpAndInvoke_argumentLoop)
// the arguments have been copied, so we can set the real stack
// pointer now
mov sp, x2
// set return address to vmInvoke_returnAddress
adr x30, GLOBAL(vmInvoke_returnAddress)
br x1
#else // not AVIAN_CONTINUATIONS #else // not AVIAN_CONTINUATIONS
// vmJumpAndInvoke should only be called when continuations are // vmJumpAndInvoke should only be called when continuations are
// enabled, so we force a crash if we reach here: // enabled, so we force a crash if we reach here:

View File

@ -2189,6 +2189,8 @@ GcContinuation* makeCurrentContinuation(MyThread* t,
*targetIp = 0; *targetIp = 0;
while (*targetIp == 0) { while (*targetIp == 0) {
assertT(t, ip);
GcMethod* method = methodForIp(t, ip); GcMethod* method = methodForIp(t, ip);
if (method) { if (method) {
PROTECT(t, method); PROTECT(t, method);