various ARM64 JIT bugfixes

Three of the tests now pass.  Yay!
This commit is contained in:
Joel Dice 2014-12-23 16:59:04 -07:00
parent 9158ee39c0
commit cbea966d1d
6 changed files with 209 additions and 155 deletions

View File

@ -946,11 +946,20 @@ class MyAssembler : public Assembler {
unsigned instruction = o->block->start + padding(o->block, o->offset)
+ o->offset;
int32_t* p = reinterpret_cast<int32_t*>(dst + instruction);
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64
int32_t v = entry - instruction;
expect(&con, v == (v & PoolOffsetMask));
const int32_t mask = (PoolOffsetMask >> 2) << 5;
*p = (((v >> 2) << 5) & mask) | ((~mask) & *p);
#else
int32_t v = (entry - 8) - instruction;
expect(&con, v == (v & PoolOffsetMask));
int32_t* p = reinterpret_cast<int32_t*>(dst + instruction);
*p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p);
#endif
poolSize += TargetBytesPerWord;
}

View File

@ -92,14 +92,27 @@ bool bounded(int right, int left, int32_t v)
void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value)
{
// ARM's PC is two words ahead, and branches drop the bottom 2 bits.
int32_t v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
int32_t mask;
expect(s, bounded(0, 8, v));
mask = 0xFFFFFF;
int32_t* p = reinterpret_cast<int32_t*>(instruction);
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64
int32_t v;
int32_t mask;
if ((*p >> 24) == 0x54) {
// conditional branch
v = ((reinterpret_cast<uint8_t*>(value) - instruction) >> 2) << 5;
mask = 0xFFFFE0;
} else {
// unconditional branch
v = (reinterpret_cast<uint8_t*>(value) - instruction) >> 2;
mask = 0x3FFFFFF;
}
#else
int32_t v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
const int32_t mask = 0xFFFFFF;
#endif
expect(s, bounded(0, 8, v));
*p = (v & mask) | ((~mask) & *p);
return instruction + 4;
@ -214,6 +227,101 @@ void appendPoolEvent(Context* con,
b->poolEventTail = e;
}
bool needJump(MyBlock* b)
{
return b->next or b->size != (b->size & PoolOffsetMask);
}
unsigned padding(MyBlock* b, unsigned offset)
{
unsigned total = 0;
for (PoolEvent* e = b->poolEventHead; e; e = e->next) {
if (e->offset <= offset) {
if (needJump(b)) {
total += vm::TargetBytesPerWord;
}
for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) {
total += vm::TargetBytesPerWord;
}
} else {
break;
}
}
return total;
}
void resolve(MyBlock* b)
{
Context* con = b->context;
if (b->poolOffsetHead) {
if (con->poolOffsetTail) {
con->poolOffsetTail->next = b->poolOffsetHead;
} else {
con->poolOffsetHead = b->poolOffsetHead;
}
con->poolOffsetTail = b->poolOffsetTail;
}
if (con->poolOffsetHead) {
bool append;
if (b->next == 0 or b->next->poolEventHead) {
append = true;
} else {
int32_t v
= (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8)
- (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
append = (v != (v & PoolOffsetMask));
if (DebugPool) {
fprintf(stderr,
"current %p %d %d next %p %d %d\n",
b,
b->start,
b->size,
b->next,
b->start + b->size,
b->next->size);
fprintf(stderr,
"offset %p %d is of distance %d to next block; append? %d\n",
con->poolOffsetHead,
con->poolOffsetHead->offset,
v,
append);
}
}
if (append) {
#ifndef NDEBUG
int32_t v
= (b->start + b->size - 8)
- (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
expect(con, v == (v & PoolOffsetMask));
#endif // not NDEBUG
appendPoolEvent(
con, b, b->size, con->poolOffsetHead, con->poolOffsetTail);
if (DebugPool) {
for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) {
fprintf(stderr,
"include %p %d in pool event %p at offset %d in block %p\n",
o,
o->offset,
b->poolEventTail,
b->size,
b);
}
}
con->poolOffsetHead = 0;
con->poolOffsetTail = 0;
}
}
}
} // namespace arm
} // namespace codegen
} // namespace avian

View File

@ -27,7 +27,11 @@ namespace arm {
const bool DebugPool = false;
#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64
const int32_t PoolOffsetMask = 0x1FFFFF;
#else
const int32_t PoolOffsetMask = 0xFFF;
#endif
class Task {
public:

View File

@ -181,101 +181,6 @@ void unsignedShiftRightC(Context* con,
}
}
bool needJump(MyBlock* b)
{
return b->next or b->size != (b->size & PoolOffsetMask);
}
unsigned padding(MyBlock* b, unsigned offset)
{
unsigned total = 0;
for (PoolEvent* e = b->poolEventHead; e; e = e->next) {
if (e->offset <= offset) {
if (needJump(b)) {
total += vm::TargetBytesPerWord;
}
for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) {
total += vm::TargetBytesPerWord;
}
} else {
break;
}
}
return total;
}
void resolve(MyBlock* b)
{
Context* con = b->context;
if (b->poolOffsetHead) {
if (con->poolOffsetTail) {
con->poolOffsetTail->next = b->poolOffsetHead;
} else {
con->poolOffsetHead = b->poolOffsetHead;
}
con->poolOffsetTail = b->poolOffsetTail;
}
if (con->poolOffsetHead) {
bool append;
if (b->next == 0 or b->next->poolEventHead) {
append = true;
} else {
int32_t v
= (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8)
- (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
append = (v != (v & PoolOffsetMask));
if (DebugPool) {
fprintf(stderr,
"current %p %d %d next %p %d %d\n",
b,
b->start,
b->size,
b->next,
b->start + b->size,
b->next->size);
fprintf(stderr,
"offset %p %d is of distance %d to next block; append? %d\n",
con->poolOffsetHead,
con->poolOffsetHead->offset,
v,
append);
}
}
if (append) {
#ifndef NDEBUG
int32_t v
= (b->start + b->size - 8)
- (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
expect(con, v == (v & PoolOffsetMask));
#endif // not NDEBUG
appendPoolEvent(
con, b, b->size, con->poolOffsetHead, con->poolOffsetTail);
if (DebugPool) {
for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) {
fprintf(stderr,
"include %p %d in pool event %p at offset %d in block %p\n",
o,
o->offset,
b->poolEventTail,
b->size,
b);
}
}
con->poolOffsetHead = 0;
con->poolOffsetTail = 0;
}
}
}
void jumpR(Context* con, unsigned size UNUSED, lir::RegisterPair* target)
{
assertT(con, size == vm::TargetBytesPerWord);

View File

@ -125,9 +125,16 @@ uint32_t orr(Register Rd, Register Rn, Register Rm, unsigned size)
return (size == 8 ? 0xaa0003e0 : 0x2a0003e0) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index();
}
uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size)
{
return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0)
| (value << 10) | (Rn.index() << 5) | Rd.index();
}
uint32_t mov(Register Rd, Register Rn, unsigned size)
{
return orr(Rd, Register(31), Rn, size);
return Rn.index() == 31 ? addi(Rd, Rn, 0, 0, size)
: orr(Rd, Register(31), Rn, size);
}
uint32_t movz(Register Rd, int value, unsigned shift, unsigned size)
@ -150,7 +157,8 @@ uint32_t movk(Register Rd, int value, unsigned shift, unsigned size)
uint32_t ldrPCRel(Register Rd, int offset, unsigned size)
{
return (size == 8 ? 0x58000000 : 0x18000000) | (offset << 5) | Rd.index();
return (size == 8 ? 0x58000000 : 0x18000000) | ((offset >> 2) << 5)
| Rd.index();
}
uint32_t add(Register Rd, Register Rn, Register Rm, unsigned size)
@ -186,12 +194,6 @@ uint32_t mul(Register Rd, Register Rn, Register Rm, unsigned size)
return madd(Rd, Rn, Rm, Register(31), size);
}
uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size)
{
return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0)
| (value << 10) | (Rn.index() << 5) | Rd.index();
}
uint32_t subi(Register Rd, Register Rn, int value, int shift, unsigned size)
{
return (size == 8 ? 0xd1000000 : 0x51000000) | (shift ? 0x400000 : 0)
@ -307,8 +309,8 @@ uint32_t strhi(Register Rs, Register Rn, int offset)
uint32_t stri(Register Rs, Register Rn, int offset, unsigned size)
{
return (size == 8 ? 0xb9000000 : 0xf9000000) | (offset << 10)
| (Rn.index() << 5) | Rs.index();
return (size == 8 ? 0xf9000000 : 0xb9000000)
| ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Rs.index();
}
uint32_t ldrFd(Register Fd, Register Rn, Register Rm, unsigned size)
@ -381,8 +383,8 @@ uint32_t ldrswi(Register Rd, Register Rn, int offset)
uint32_t ldri(Register Rd, Register Rn, int offset, unsigned size)
{
return (size == 8 ? 0xb9400000 : 0xf9400000) | (offset << 10)
| (Rn.index() << 5) | Rd.index();
return (size == 8 ? 0xf9400000 : 0xb9400000)
| ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Rd.index();
}
uint32_t fcmp(Register Fn, Register Fm, unsigned size)
@ -400,7 +402,7 @@ uint32_t neg(Register Rd, Register Rm, unsigned size)
uint32_t cmp(Register Rn, Register Rm, unsigned size)
{
return (size == 8 ? 0xeb00001f : 0x6b00001f) | (Rm.index() << 16)
| (Rn.index() << 5);
| (Rn.index() == 31 ? 0x2063ff : (Rn.index() << 5));
}
uint32_t cmpi(Register Rn, int value, unsigned shift, unsigned size)
@ -426,42 +428,42 @@ uint32_t blr(Register Rn)
uint32_t beq(int offset)
{
return 0x54000000 | (offset >> 2);
return 0x54000000 | ((offset >> 2) << 5);
}
uint32_t bne(int offset)
{
return 0x54000001 | (offset >> 2);
return 0x54000001 | ((offset >> 2) << 5);
}
uint32_t blt(int offset)
{
return 0x5400000b | (offset >> 2);
return 0x5400000b | ((offset >> 2) << 5);
}
uint32_t bgt(int offset)
{
return 0x5400000c | (offset >> 2);
return 0x5400000c | ((offset >> 2) << 5);
}
uint32_t ble(int offset)
{
return 0x5400000d | (offset >> 2);
return 0x5400000d | ((offset >> 2) << 5);
}
uint32_t bge(int offset)
{
return 0x5400000a | (offset >> 2);
return 0x5400000a | ((offset >> 2) << 5);
}
uint32_t bhi(int offset)
{
return 0x54000008 | (offset >> 2);
return 0x54000008 | ((offset >> 2) << 5);
}
uint32_t bpl(int offset)
{
return 0x54000005 | (offset >> 2);
return 0x54000005 | ((offset >> 2) << 5);
}
uint32_t brk(int flag)
@ -966,7 +968,7 @@ void store(Context* c,
if (release) {
c->client->releaseTemporary(normalized);
}
} else if (abs(offset) == (abs(offset) & 0xFF)) {
} else if (abs(offset) == (abs(offset) & 0xFFF)) {
if (isFpr(src)) {
switch (size) {
case 4:
@ -988,7 +990,12 @@ void store(Context* c,
break;
case 4:
assertT(c, offset == (offset & (~3)));
append(c, stri(src->low, base, offset, size));
break;
case 8:
assertT(c, offset == (offset & (~7)));
append(c, stri(src->low, base, offset, size));
break;
@ -1020,8 +1027,21 @@ void moveRM(Context* c,
{
assertT(c, srcSize == dstSize);
store(
c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true);
if (src->low.index() == 31) {
assertT(c, c->client == 0); // the compiler should never ask us to
// store the SP; we'll only get here
// when assembling a thunk
lir::RegisterPair tmp(Register(9)); // we're in a thunk, so we can
// clobber this
moveRR(c, srcSize, src, srcSize, &tmp);
store(
c, srcSize, &tmp, dst->base, dst->offset, dst->index, dst->scale, true);
} else {
store(
c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true);
}
}
void load(Context* c,
@ -1085,7 +1105,7 @@ void load(Context* c,
if (release) {
c->client->releaseTemporary(normalized);
}
} else if (abs(offset) == (abs(offset) & 0xFF)) {
} else if (abs(offset) == (abs(offset) & 0xFFF)) {
if (isFpr(dst)) {
switch (srcSize) {
case 4:
@ -1119,6 +1139,7 @@ void load(Context* c,
if (signExtend and srcSize == 4 and dstSize == 8) {
append(c, ldrswi(dst->low, base, offset));
} else {
assertT(c, offset == (offset & (srcSize == 8 ? (~7) : (~3))));
append(c, ldri(dst->low, base, offset, srcSize));
}
break;
@ -1238,7 +1259,8 @@ void moveAR(Context* c,
unsigned dstSize,
lir::RegisterPair* dst)
{
assertT(c, srcSize == TargetBytesPerWord and dstSize == TargetBytesPerWord);
assertT(c, srcSize == vm::TargetBytesPerWord
and dstSize == vm::TargetBytesPerWord);
lir::Constant constant(src->address);
moveCR(c, srcSize, &constant, dstSize, dst);
@ -1312,6 +1334,20 @@ void compareRM(Context* c,
c->client->releaseTemporary(tmp.low);
}
void compareMR(Context* c,
unsigned aSize,
lir::Memory* a,
unsigned bSize,
lir::RegisterPair* b)
{
assertT(c, aSize == bSize);
lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK));
moveMR(c, aSize, a, aSize, &tmp);
compareRR(c, aSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low);
}
int32_t branch(Context* c, lir::TernaryOperation op)
{
switch (op) {
@ -1397,8 +1433,17 @@ void branchRM(Context* c,
assertT(c, not isFloatBranch(op));
assertT(c, size <= vm::TargetBytesPerWord);
compareRM(c, size, a, size, b);
branch(c, op, target);
if (a->low.index() == 31) {
// stack overflow checks need to compare to the stack pointer, but
// we can only encode that in the opposite operand order we're
// given, so we need to reverse everything:
assertT(c, op == lir::JumpIfGreaterOrEqual);
compareMR(c, size, b, size, a);
branch(c, lir::JumpIfLess, target);
} else {
compareRM(c, size, a, size, b);
branch(c, op, target);
}
}
void branchCM(Context* c,
@ -1537,21 +1582,6 @@ void storeLoadBarrier(Context* c)
memoryBarrier(c);
}
bool needJump(MyBlock*)
{
return false;
}
unsigned padding(MyBlock*, unsigned)
{
return 0;
}
void resolve(MyBlock*)
{
// ignore
}
} // namespace arm
} // namespace codegen
} // namespace avian

View File

@ -89,8 +89,7 @@ GLOBAL(vmInvoke_returnAddress):
// MyProcess::getStackTrace in compile.cpp for details on how we get
// a reliable stack trace from a thread that might be interrupted at
// any point in its execution.
mov x5, #0
str x5, [x19, #TARGET_THREAD_STACK]
str xzr, [x19, #TARGET_THREAD_STACK]
.globl GLOBAL(vmInvoke_safeStack)
.align 2
@ -100,11 +99,10 @@ GLOBAL(vmInvoke_safeStack):
#error todo
#endif // AVIAN_CONTINUATIONS
mov x5, #0
str x5, [x19, #TARGET_THREAD_STACK]
str xzr, [x19, #TARGET_THREAD_STACK]
// restore return type
ldr w5, [sp], #4
ldr w5, [sp,#16]!
// restore callee-saved register values
ldp x19, x20, [sp,#16]
@ -112,7 +110,7 @@ GLOBAL(vmInvoke_safeStack):
ldp x23, x24, [sp,#48]
ldp x25, x26, [sp,#64]
ldp x27, x28, [sp,#80]
ldp x29, x30, [sp],#96
ldp x29, x30, [sp,#96]!
LOCAL(vmInvoke_return):
br x30