diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index 23b07ef201..b4046223e1 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -946,11 +946,20 @@ class MyAssembler : public Assembler { unsigned instruction = o->block->start + padding(o->block, o->offset) + o->offset; + int32_t* p = reinterpret_cast(dst + instruction); + +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 + int32_t v = entry - instruction; + expect(&con, v == (v & PoolOffsetMask)); + + const int32_t mask = (PoolOffsetMask >> 2) << 5; + *p = (((v >> 2) << 5) & mask) | ((~mask) & *p); +#else int32_t v = (entry - 8) - instruction; expect(&con, v == (v & PoolOffsetMask)); - int32_t* p = reinterpret_cast(dst + instruction); *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); +#endif poolSize += TargetBytesPerWord; } diff --git a/src/codegen/target/arm/fixup.cpp b/src/codegen/target/arm/fixup.cpp index e1d41b6eb4..7f5c0ff277 100644 --- a/src/codegen/target/arm/fixup.cpp +++ b/src/codegen/target/arm/fixup.cpp @@ -92,14 +92,27 @@ bool bounded(int right, int left, int32_t v) void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) { - // ARM's PC is two words ahead, and branches drop the bottom 2 bits. - int32_t v = (reinterpret_cast(value) - (instruction + 8)) >> 2; - - int32_t mask; - expect(s, bounded(0, 8, v)); - mask = 0xFFFFFF; - int32_t* p = reinterpret_cast(instruction); + +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 + int32_t v; + int32_t mask; + if ((*p >> 24) == 0x54) { + // conditional branch + v = ((reinterpret_cast(value) - instruction) >> 2) << 5; + mask = 0xFFFFE0; + } else { + // unconditional branch + v = (reinterpret_cast(value) - instruction) >> 2; + mask = 0x3FFFFFF; + } +#else + int32_t v = (reinterpret_cast(value) - (instruction + 8)) >> 2; + const int32_t mask = 0xFFFFFF; +#endif + + expect(s, bounded(0, 8, v)); + *p = (v & mask) | ((~mask) & *p); return instruction + 4; @@ -214,6 +227,101 @@ void appendPoolEvent(Context* con, b->poolEventTail = e; } +bool needJump(MyBlock* b) +{ + return b->next or b->size != (b->size & PoolOffsetMask); +} + +unsigned padding(MyBlock* b, unsigned offset) +{ + unsigned total = 0; + for (PoolEvent* e = b->poolEventHead; e; e = e->next) { + if (e->offset <= offset) { + if (needJump(b)) { + total += vm::TargetBytesPerWord; + } + for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) { + total += vm::TargetBytesPerWord; + } + } else { + break; + } + } + return total; +} + +void resolve(MyBlock* b) +{ + Context* con = b->context; + + if (b->poolOffsetHead) { + if (con->poolOffsetTail) { + con->poolOffsetTail->next = b->poolOffsetHead; + } else { + con->poolOffsetHead = b->poolOffsetHead; + } + con->poolOffsetTail = b->poolOffsetTail; + } + + if (con->poolOffsetHead) { + bool append; + if (b->next == 0 or b->next->poolEventHead) { + append = true; + } else { + int32_t v + = (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8) + - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); + + append = (v != (v & PoolOffsetMask)); + + if (DebugPool) { + fprintf(stderr, + "current %p %d %d next %p %d %d\n", + b, + b->start, + b->size, + b->next, + b->start + b->size, + b->next->size); + fprintf(stderr, + "offset %p %d is of distance %d to next block; append? %d\n", + con->poolOffsetHead, + con->poolOffsetHead->offset, + v, + append); + } + } + + if (append) { +#ifndef NDEBUG + int32_t v + = (b->start + b->size - 8) + - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); + + expect(con, v == (v & PoolOffsetMask)); +#endif // not NDEBUG + + appendPoolEvent( + con, b, b->size, con->poolOffsetHead, con->poolOffsetTail); + + if (DebugPool) { + for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) { + fprintf(stderr, + "include %p %d in pool event %p at offset %d in block %p\n", + o, + o->offset, + b->poolEventTail, + b->size, + b); + } + } + + con->poolOffsetHead = 0; + con->poolOffsetTail = 0; + } + } +} + } // namespace arm } // namespace codegen } // namespace avian diff --git a/src/codegen/target/arm/fixup.h b/src/codegen/target/arm/fixup.h index 5460295d95..2e9c0aca01 100644 --- a/src/codegen/target/arm/fixup.h +++ b/src/codegen/target/arm/fixup.h @@ -27,7 +27,11 @@ namespace arm { const bool DebugPool = false; +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 +const int32_t PoolOffsetMask = 0x1FFFFF; +#else const int32_t PoolOffsetMask = 0xFFF; +#endif class Task { public: diff --git a/src/codegen/target/arm/operations32.cpp b/src/codegen/target/arm/operations32.cpp index 5a9f5e8a0e..07dd7f0175 100644 --- a/src/codegen/target/arm/operations32.cpp +++ b/src/codegen/target/arm/operations32.cpp @@ -181,101 +181,6 @@ void unsignedShiftRightC(Context* con, } } -bool needJump(MyBlock* b) -{ - return b->next or b->size != (b->size & PoolOffsetMask); -} - -unsigned padding(MyBlock* b, unsigned offset) -{ - unsigned total = 0; - for (PoolEvent* e = b->poolEventHead; e; e = e->next) { - if (e->offset <= offset) { - if (needJump(b)) { - total += vm::TargetBytesPerWord; - } - for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) { - total += vm::TargetBytesPerWord; - } - } else { - break; - } - } - return total; -} - -void resolve(MyBlock* b) -{ - Context* con = b->context; - - if (b->poolOffsetHead) { - if (con->poolOffsetTail) { - con->poolOffsetTail->next = b->poolOffsetHead; - } else { - con->poolOffsetHead = b->poolOffsetHead; - } - con->poolOffsetTail = b->poolOffsetTail; - } - - if (con->poolOffsetHead) { - bool append; - if (b->next == 0 or b->next->poolEventHead) { - append = true; - } else { - int32_t v - = (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8) - - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); - - append = (v != (v & PoolOffsetMask)); - - if (DebugPool) { - fprintf(stderr, - "current %p %d %d next %p %d %d\n", - b, - b->start, - b->size, - b->next, - b->start + b->size, - b->next->size); - fprintf(stderr, - "offset %p %d is of distance %d to next block; append? %d\n", - con->poolOffsetHead, - con->poolOffsetHead->offset, - v, - append); - } - } - - if (append) { -#ifndef NDEBUG - int32_t v - = (b->start + b->size - 8) - - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); - - expect(con, v == (v & PoolOffsetMask)); -#endif // not NDEBUG - - appendPoolEvent( - con, b, b->size, con->poolOffsetHead, con->poolOffsetTail); - - if (DebugPool) { - for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) { - fprintf(stderr, - "include %p %d in pool event %p at offset %d in block %p\n", - o, - o->offset, - b->poolEventTail, - b->size, - b); - } - } - - con->poolOffsetHead = 0; - con->poolOffsetTail = 0; - } - } -} - void jumpR(Context* con, unsigned size UNUSED, lir::RegisterPair* target) { assertT(con, size == vm::TargetBytesPerWord); diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index c3058102df..32a31cf2f5 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -125,9 +125,16 @@ uint32_t orr(Register Rd, Register Rn, Register Rm, unsigned size) return (size == 8 ? 0xaa0003e0 : 0x2a0003e0) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } +uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size) +{ + return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0) + | (value << 10) | (Rn.index() << 5) | Rd.index(); +} + uint32_t mov(Register Rd, Register Rn, unsigned size) { - return orr(Rd, Register(31), Rn, size); + return Rn.index() == 31 ? addi(Rd, Rn, 0, 0, size) + : orr(Rd, Register(31), Rn, size); } uint32_t movz(Register Rd, int value, unsigned shift, unsigned size) @@ -150,7 +157,8 @@ uint32_t movk(Register Rd, int value, unsigned shift, unsigned size) uint32_t ldrPCRel(Register Rd, int offset, unsigned size) { - return (size == 8 ? 0x58000000 : 0x18000000) | (offset << 5) | Rd.index(); + return (size == 8 ? 0x58000000 : 0x18000000) | ((offset >> 2) << 5) + | Rd.index(); } uint32_t add(Register Rd, Register Rn, Register Rm, unsigned size) @@ -186,12 +194,6 @@ uint32_t mul(Register Rd, Register Rn, Register Rm, unsigned size) return madd(Rd, Rn, Rm, Register(31), size); } -uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size) -{ - return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0) - | (value << 10) | (Rn.index() << 5) | Rd.index(); -} - uint32_t subi(Register Rd, Register Rn, int value, int shift, unsigned size) { return (size == 8 ? 0xd1000000 : 0x51000000) | (shift ? 0x400000 : 0) @@ -307,8 +309,8 @@ uint32_t strhi(Register Rs, Register Rn, int offset) uint32_t stri(Register Rs, Register Rn, int offset, unsigned size) { - return (size == 8 ? 0xb9000000 : 0xf9000000) | (offset << 10) - | (Rn.index() << 5) | Rs.index(); + return (size == 8 ? 0xf9000000 : 0xb9000000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Rs.index(); } uint32_t ldrFd(Register Fd, Register Rn, Register Rm, unsigned size) @@ -381,8 +383,8 @@ uint32_t ldrswi(Register Rd, Register Rn, int offset) uint32_t ldri(Register Rd, Register Rn, int offset, unsigned size) { - return (size == 8 ? 0xb9400000 : 0xf9400000) | (offset << 10) - | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0xf9400000 : 0xb9400000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t fcmp(Register Fn, Register Fm, unsigned size) @@ -400,7 +402,7 @@ uint32_t neg(Register Rd, Register Rm, unsigned size) uint32_t cmp(Register Rn, Register Rm, unsigned size) { return (size == 8 ? 0xeb00001f : 0x6b00001f) | (Rm.index() << 16) - | (Rn.index() << 5); + | (Rn.index() == 31 ? 0x2063ff : (Rn.index() << 5)); } uint32_t cmpi(Register Rn, int value, unsigned shift, unsigned size) @@ -426,42 +428,42 @@ uint32_t blr(Register Rn) uint32_t beq(int offset) { - return 0x54000000 | (offset >> 2); + return 0x54000000 | ((offset >> 2) << 5); } uint32_t bne(int offset) { - return 0x54000001 | (offset >> 2); + return 0x54000001 | ((offset >> 2) << 5); } uint32_t blt(int offset) { - return 0x5400000b | (offset >> 2); + return 0x5400000b | ((offset >> 2) << 5); } uint32_t bgt(int offset) { - return 0x5400000c | (offset >> 2); + return 0x5400000c | ((offset >> 2) << 5); } uint32_t ble(int offset) { - return 0x5400000d | (offset >> 2); + return 0x5400000d | ((offset >> 2) << 5); } uint32_t bge(int offset) { - return 0x5400000a | (offset >> 2); + return 0x5400000a | ((offset >> 2) << 5); } uint32_t bhi(int offset) { - return 0x54000008 | (offset >> 2); + return 0x54000008 | ((offset >> 2) << 5); } uint32_t bpl(int offset) { - return 0x54000005 | (offset >> 2); + return 0x54000005 | ((offset >> 2) << 5); } uint32_t brk(int flag) @@ -966,7 +968,7 @@ void store(Context* c, if (release) { c->client->releaseTemporary(normalized); } - } else if (abs(offset) == (abs(offset) & 0xFF)) { + } else if (abs(offset) == (abs(offset) & 0xFFF)) { if (isFpr(src)) { switch (size) { case 4: @@ -988,7 +990,12 @@ void store(Context* c, break; case 4: + assertT(c, offset == (offset & (~3))); + append(c, stri(src->low, base, offset, size)); + break; + case 8: + assertT(c, offset == (offset & (~7))); append(c, stri(src->low, base, offset, size)); break; @@ -1020,8 +1027,21 @@ void moveRM(Context* c, { assertT(c, srcSize == dstSize); - store( - c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true); + if (src->low.index() == 31) { + assertT(c, c->client == 0); // the compiler should never ask us to + // store the SP; we'll only get here + // when assembling a thunk + + lir::RegisterPair tmp(Register(9)); // we're in a thunk, so we can + // clobber this + + moveRR(c, srcSize, src, srcSize, &tmp); + store( + c, srcSize, &tmp, dst->base, dst->offset, dst->index, dst->scale, true); + } else { + store( + c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true); + } } void load(Context* c, @@ -1085,7 +1105,7 @@ void load(Context* c, if (release) { c->client->releaseTemporary(normalized); } - } else if (abs(offset) == (abs(offset) & 0xFF)) { + } else if (abs(offset) == (abs(offset) & 0xFFF)) { if (isFpr(dst)) { switch (srcSize) { case 4: @@ -1119,6 +1139,7 @@ void load(Context* c, if (signExtend and srcSize == 4 and dstSize == 8) { append(c, ldrswi(dst->low, base, offset)); } else { + assertT(c, offset == (offset & (srcSize == 8 ? (~7) : (~3)))); append(c, ldri(dst->low, base, offset, srcSize)); } break; @@ -1238,7 +1259,8 @@ void moveAR(Context* c, unsigned dstSize, lir::RegisterPair* dst) { - assertT(c, srcSize == TargetBytesPerWord and dstSize == TargetBytesPerWord); + assertT(c, srcSize == vm::TargetBytesPerWord + and dstSize == vm::TargetBytesPerWord); lir::Constant constant(src->address); moveCR(c, srcSize, &constant, dstSize, dst); @@ -1312,6 +1334,20 @@ void compareRM(Context* c, c->client->releaseTemporary(tmp.low); } +void compareMR(Context* c, + unsigned aSize, + lir::Memory* a, + unsigned bSize, + lir::RegisterPair* b) +{ + assertT(c, aSize == bSize); + + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveMR(c, aSize, a, aSize, &tmp); + compareRR(c, aSize, &tmp, bSize, b); + c->client->releaseTemporary(tmp.low); +} + int32_t branch(Context* c, lir::TernaryOperation op) { switch (op) { @@ -1397,8 +1433,17 @@ void branchRM(Context* c, assertT(c, not isFloatBranch(op)); assertT(c, size <= vm::TargetBytesPerWord); - compareRM(c, size, a, size, b); - branch(c, op, target); + if (a->low.index() == 31) { + // stack overflow checks need to compare to the stack pointer, but + // we can only encode that in the opposite operand order we're + // given, so we need to reverse everything: + assertT(c, op == lir::JumpIfGreaterOrEqual); + compareMR(c, size, b, size, a); + branch(c, lir::JumpIfLess, target); + } else { + compareRM(c, size, a, size, b); + branch(c, op, target); + } } void branchCM(Context* c, @@ -1537,21 +1582,6 @@ void storeLoadBarrier(Context* c) memoryBarrier(c); } -bool needJump(MyBlock*) -{ - return false; -} - -unsigned padding(MyBlock*, unsigned) -{ - return 0; -} - -void resolve(MyBlock*) -{ - // ignore -} - } // namespace arm } // namespace codegen } // namespace avian diff --git a/src/compile-arm64.S b/src/compile-arm64.S index 65f76df6f3..744e6cd71e 100644 --- a/src/compile-arm64.S +++ b/src/compile-arm64.S @@ -16,11 +16,11 @@ #define BYTES_PER_WORD 4 #define LOCAL(x) .L##x - + #ifdef __APPLE__ # define GLOBAL(x) _##x #else -# define GLOBAL(x) x +# define GLOBAL(x) x #endif #define CONTINUATION_NEXT 4 @@ -29,7 +29,7 @@ #define CONTINUATION_FRAME_POINTER_OFFSET 24 #define CONTINUATION_LENGTH 28 #define CONTINUATION_BODY 32 - + .globl GLOBAL(vmInvoke) .align 2 GLOBAL(vmInvoke): @@ -89,8 +89,7 @@ GLOBAL(vmInvoke_returnAddress): // MyProcess::getStackTrace in compile.cpp for details on how we get // a reliable stack trace from a thread that might be interrupted at // any point in its execution. - mov x5, #0 - str x5, [x19, #TARGET_THREAD_STACK] + str xzr, [x19, #TARGET_THREAD_STACK] .globl GLOBAL(vmInvoke_safeStack) .align 2 @@ -100,11 +99,10 @@ GLOBAL(vmInvoke_safeStack): #error todo #endif // AVIAN_CONTINUATIONS - mov x5, #0 - str x5, [x19, #TARGET_THREAD_STACK] + str xzr, [x19, #TARGET_THREAD_STACK] // restore return type - ldr w5, [sp], #4 + ldr w5, [sp,#16]! // restore callee-saved register values ldp x19, x20, [sp,#16] @@ -112,7 +110,7 @@ GLOBAL(vmInvoke_safeStack): ldp x23, x24, [sp,#48] ldp x25, x26, [sp,#64] ldp x27, x28, [sp,#80] - ldp x29, x30, [sp],#96 + ldp x29, x30, [sp,#96]! LOCAL(vmInvoke_return): br x30