From b3bd58aefff1ee073e16f36b033b3c3bf3ee0f15 Mon Sep 17 00:00:00 2001 From: Joshua Warner Date: Fri, 5 Dec 2014 15:58:52 -0700 Subject: [PATCH 01/20] work in progress towards 64-bit ARM JIT support This won't build, it's just a snapshot of what I have so far. Conflicts: include/avian/codegen/architecture.h include/avian/codegen/registers.h src/codegen/compiler.cpp src/codegen/compiler/event.cpp src/codegen/compiler/site.cpp src/codegen/compiler/site.h src/codegen/registers.cpp src/codegen/target/arm/assembler.cpp src/codegen/target/arm/registers.h --- include/avian/codegen/architecture.h | 2 - src/codegen/compiler/site.h | 2 +- src/codegen/target/arm/assembler.cpp | 16 +- .../arm/{operations.cpp => operations32.cpp} | 4 + src/codegen/target/arm/operations64.cpp | 1218 +++++++++++++++++ src/codegen/target/arm/registers.h | 31 +- src/compile-arm.S | 120 +- 7 files changed, 1362 insertions(+), 31 deletions(-) rename src/codegen/target/arm/{operations.cpp => operations32.cpp} (99%) create mode 100644 src/codegen/target/arm/operations64.cpp diff --git a/include/avian/codegen/architecture.h b/include/avian/codegen/architecture.h index 47687aefaf..528be74858 100644 --- a/include/avian/codegen/architecture.h +++ b/include/avian/codegen/architecture.h @@ -28,8 +28,6 @@ namespace codegen { class Assembler; -class RegisterFile; - class OperandMask { public: uint8_t typeMask; diff --git a/src/codegen/compiler/site.h b/src/codegen/compiler/site.h index b2c10ddc39..5099704a34 100644 --- a/src/codegen/compiler/site.h +++ b/src/codegen/compiler/site.h @@ -123,7 +123,7 @@ class Site { virtual RegisterMask registerMask(Context*) { - return 0; + return RegisterMask(0); } virtual bool isVolatile(Context*) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index a6c7491279..23b07ef201 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -39,7 +39,7 @@ namespace isa { bool vfpSupported() { // TODO: Use at runtime detection -#if defined(__ARM_PCS_VFP) +#if (defined __ARM_PCS_VFP) || (defined ARCH_arm64) // armhf return true; #else @@ -55,9 +55,9 @@ bool vfpSupported() const RegisterFile MyRegisterFileWithoutFloats(GPR_MASK, 0); const RegisterFile MyRegisterFileWithFloats(GPR_MASK, FPR_MASK); -const unsigned FrameHeaderSize = 1; +const unsigned FrameHeaderSize = TargetBytesPerWord / 4; -const unsigned StackAlignmentInBytes = 8; +const unsigned StackAlignmentInBytes = TargetBytesPerWord * 2; const unsigned StackAlignmentInWords = StackAlignmentInBytes / TargetBytesPerWord; @@ -258,7 +258,7 @@ class MyArchitecture : public Architecture { virtual unsigned argumentRegisterCount() { - return 4; + return TargetBytesPerWord; } virtual Register argumentRegister(unsigned index) @@ -434,11 +434,11 @@ class MyArchitecture : public Architecture { break; case lir::Float2Int: - // todo: Java requires different semantics than SSE for + // todo: Java requires different semantics than VFP for // converting floats to integers, we we need to either use // thunks or produce inline machine code which handles edge // cases properly. - if (false && vfpSupported() && bSize == 4) { + if (false && vfpSupported() && bSize <= TargetBytesPerWord) { aMask.typeMask = lir::Operand::RegisterPairMask; aMask.setLowHighRegisterMasks(FPR_MASK, FPR_MASK); } else { @@ -447,7 +447,7 @@ class MyArchitecture : public Architecture { break; case lir::Int2Float: - if (vfpSupported() && aSize == 4) { + if (vfpSupported() && aSize <= TargetBytesPerWord) { aMask.typeMask = lir::Operand::RegisterPairMask; aMask.setLowHighRegisterMasks(GPR_MASK, GPR_MASK); } else { @@ -544,7 +544,7 @@ class MyArchitecture : public Architecture { case lir::ShiftLeft: case lir::ShiftRight: case lir::UnsignedShiftRight: - if (bSize == 8) + if (bSize > TargetBytesPerWord) aMask.typeMask = bMask.typeMask = lir::Operand::RegisterPairMask; break; diff --git a/src/codegen/target/arm/operations.cpp b/src/codegen/target/arm/operations32.cpp similarity index 99% rename from src/codegen/target/arm/operations.cpp rename to src/codegen/target/arm/operations32.cpp index 87d88613fd..5a9f5e8a0e 100644 --- a/src/codegen/target/arm/operations.cpp +++ b/src/codegen/target/arm/operations32.cpp @@ -15,6 +15,8 @@ #include "fixup.h" #include "multimethod.h" +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM + namespace avian { namespace codegen { namespace arm { @@ -1554,3 +1556,5 @@ void storeLoadBarrier(Context* con) } // namespace arm } // namespace codegen } // namespace avian + +#endif // AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp new file mode 100644 index 0000000000..0f713645ad --- /dev/null +++ b/src/codegen/target/arm/operations64.cpp @@ -0,0 +1,1218 @@ +/* Copyright (c) 2008-2014, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#include "context.h" +#include "operations.h" +#include "block.h" +#include "fixup.h" +#include "multimethod.h" + +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 + +namespace { + +void append(Context* c, uint32_t instruction, unsigned size) +{ + c->code.append4(instruction | (size == 8 ? 0x80000000 : 0)); +} + +uint32_t lslv(int Rd, int Rn, int Rm, unsigned size) +{ + return (size == 8 ? 0x9ac12000 : 0x1ac02000) | (Rm << 16) | (Rn << 5) | Rd; +} + +uint32_t ubfm(int Rd, int Rn, int r, int s, unsigned size) +{ + return (size == 8 ? 0xd3608000 : 0x53000000) | (r << 16) | (s << 10) | (Rn << 5) | Rd; +} + +uint32_t sbfm(int Rd, int Rn, int r, int s, unsigned size) +{ + return (size == 8 ? 0x93408000 : 0x13000000) | (r << 16) | (s << 10) | (Rn << 5) | Rd; +} + +uint32_t lsli(int Rd, int Rn, int shift, unsigned size) +{ + if (size == 4) { + return ubfm(Rd, Rn, (32 - shift) & 0x1f, 31 - shift, size); + } else { + return ubfm(Rd, Rn, (64 - shift) & 0x3f, 63 - shift, size); + } +} + +uint32_t asrv(int Rd, int Rn, int Rm, unsigned size) +{ + return (size == 8 ? 0x9ac02800 : 0x1ac02800) | (Rm << 16) | (Rn << 5) | Rd; +} + +uint32_t lsrv(int Rd, int Rn, int Rm, unsigned size) +{ + return (size == 8 ? 0x9ac02400 : 0x1ac02400) | (Rm << 16) | (Rn << 5) | Rd; +} + +uint32_t lsri(int Rd, int Rn, int shift, unsigned size) +{ + return ubfm(Rd, Rn, shift, size == 8 ? 63 : 31, size); +} + +uint32_t asri(int Rd, int Rn, int shift, unsigned size) +{ + return sbfm(Rd, Rn, shift, size == 8 ? 63 : 31, size); +} + +uint32_t sxtb(int Rd, int Rn) +{ + return sbfm(Rd, Rn, 0, 7, 8); +} + +uint32_t sxth(int Rd, int Rn) +{ + return sbfm(Rd, Rn, 0, 15, 8); +} + +uint32_t uxth(int Rd, int Rn) +{ + return ubfm(Rd, Rn, 0, 15, 4); +} + +uint32_t sxtw(int Rd, int Rn) +{ + return sbfm(Rd, Rn, 0, 31, 8); +} + +uint32_t br(int Rn) +{ + return 0xd61f0000 | (Rn << 5); +} + +uint32_t fmovFdFn(int Fd, int Fn, unsigned size) +{ + return (size == 8 ? 0x1e604000 : 0x1e204000) | (Fn << 5) | Fd; +} + +uint32_t fmovRdFn(int Rd, int Fn, unsigned size) +{ + return (size == 8 ? 0x9e660000 : 0x1e260000) | (Fn << 5) | Rd; +} + +uint32_t fmovFdRn(int Fd, int Rn, unsigned size) +{ + return (size == 8 ? 0x9e670000 : 0x1e270000) | (Rn << 5) | Fd; +} + +uint32_t orr(int Rd, int Rn, int Rm, unsigned size) +{ + return (size == 8 ? 0xaa0003e0 : 0x2a0003e0) | (Rm << 16) | (Rn << 5) | Rd; +} + +uint32_t mov(int Rd, int Rn, unsigned size) +{ + return orr(Rd, 31, Rn, size); +} + +uint32_t ldrPCRel(int Rd, int offset, unsigned size) +{ + return (size == 8 ? 0x58000000 : 0x18000000) | (offset << 5) | Rd; +} + +uint32_t add(int Rd, int Rn, int Rm, unsigned size) +{ + return (size == 8 ? 0x8b000000 : 0x0b000000) | (Rm << 16) | (Rn << 5) | Rd; +} + +uint32_t sub(int Rd, int Rn, int Rm, unsigned size) +{ + return (size == 8 ? 0xcb000000 : 0x4b000000) | (Rm << 16) | (Rn << 5) | Rd; +} + +uint32_t madd(int Rd, int Rn, int Rm, int Ra, unsigned size) +{ + return (size == 8 ? 0x9b000000 : 0x1b000000) + | (Rm << 16) | (Ra << 10) | (Rn << 5) | Rd; +} + +uint32_t mul(int Rd, int Rn, int Rm, unsigned size) +{ + return madd(Rd, Rn, Rm, 31, size); +} + +uint32_t addi(int Rd, int Rn, int value, int shift, unsigned size) +{ + return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0) + | (value << 10) | (Rn << 5) | Rd; +} + +uint32_t subi(int Rd, int Rn, int value, int shift, unsigned size) +{ + return (size == 8 ? 0xd1000000 : 0x51000000) | (shift ? 0x400000 : 0) + | (value << 10) | (Rn << 5) | Rd; +} + +} // namespace + +namespace avian { +namespace codegen { +namespace arm { + +using namespace isa; +using namespace avian::util; + +void shiftLeftR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, lslv(dst->low, b->low, a->low, size)); +} + +void shiftLeftC(Context* c, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + uint64_t value = a->value->value(); + if (size == 4 and (value & 0x1F)) { + append(c, lsli(dst->low, b->low, value, 4)); + } else (size == 8 and (value & 0x3F)) { + append(c, lsli(dst->low, b->low, value, 8)); + } else { + moveRR(c, size, b, size, dst); + } +} + +void shiftRightR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, asrv(dst->low, b->low, a->low, size)); +} + +void shiftRightC(Context* c, + unsigned size UNUSED, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + uint64_t value = a->value->value(); + if (size == 4 and (value & 0x1F)) { + append(c, lsri(dst->low, b->low, value, 4), 4); + } else (size == 8 and (value & 0x3F)) { + append(c, lsri(dst->low, b->low, value, 8), 8); + } else { + moveRR(c, size, b, size, dst); + } +} + +void unsignedShiftRightR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, lsrv(dst->low, b->low, a->low, size)); +} + +void unsignedShiftRightC(Context* c, + unsigned size UNUSED, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + uint64_t value = a->value->value(); + if (size == 4 and (value & 0x1F)) { + append(c, asri(dst->low, b->low, value, 4), 4); + } else (size == 8 and (value & 0x3F)) { + append(c, asri(dst->low, b->low, value, 8), 8); + } else { + moveRR(c, size, b, size, dst); + } +} + +void jumpR(Context* c, unsigned size UNUSED, lir::RegisterPair* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + append(c, br(target->low)); +} + +void moveRR(Context* c, + unsigned srcSize, + lir::RegisterPair* src, + unsigned dstSize, + lir::RegisterPair* dst) +{ + bool srcIsFpr = isFpr(src); + bool dstIsFpr = isFpr(dst); + if (srcIsFpr or dstIsFpr) { + assertT(c, srcSize == dstSize); + + if (srcIsFpr and dstIsFpr) { + append(c, fmovFdFn(fpr(dst), fpr(src), srcSize)); + } else if (srcIsFpr) { + append(c, fmovRdFn(fpr(dst), fpr(src), srcSize)); + } else { + append(c, fmovFdRn(fpr(dst), fpr(src), srcSize)); + } + } else { + switch (srcSize) { + case 1: + append(c, sxtb(dst->low, src->low)); + break; + + case 2: + append(c, sxth(dst->low, src->low)); + break; + + case 4: + if (dstSize == 4) { + append(c, mov(dst->low, src->low, srcSize)); + } else { + append(c, sxtw(dst->low, src->low)); + } + break; + + case 8: + append(c, mov(dst->low, src->low, srcSize)); + break; + + default: + abort(c); + } + } +} + +void moveZRR(Context* c, + unsigned srcSize, + lir::RegisterPair* src, + unsigned, + lir::RegisterPair* dst) +{ + switch (srcSize) { + case 2: + aapend(c, uxth(dst->low, src->low)); + break; + + default: + abort(c); + } +} + +void moveCR2(Context* c, + unsigned size, + lir::Constant* src, + lir::RegisterPair* dst, + Promise* callOffset) +{ + if (isFpr(dst)) { + // todo: could use a single fmov here and avoid the temporary for + // constants that fit + lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + moveCR(c, size, src, size, &tmp); + moveRR(c, size, &tmp, size, dst); + c->client->releaseTemporary(tmp.low); + } else if (src->value->resolved()) { + int64_t value = src->value->value(); + if (value > 0) { + append(c, mov(dst->low, value & 0xFFFF)); + if (value >> 16) { + append(c, movk(dst->low, (value >> 16) & 0xFFFF), 16); + if (value >> 32) { + append(c, movk(dst->low, (value >> 32) & 0xFFFF), 32); + if (value >> 48) { + append(c, movk(dst->low, (value >> 48) & 0xFFFF), 48); + } + } + } + } else if (value < 0) { + append(c, movn(dst->low, (~value) & 0xFFFF)); + if (~(value >> 16)) { + append(c, movk(dst->low, (value >> 16) & 0xFFFF), 16); + if (~(value >> 32)) { + append(c, movk(dst->low, (value >> 32) & 0xFFFF), 32); + if (~(value >> 48)) { + append(c, movk(dst->low, (value >> 48) & 0xFFFF), 48); + } + } + } + } + } else { + appendConstantPoolEntry(c, src->value, callOffset); + append(c, ldrPCRel(dst->low, 0)); + } +} + +void moveCR(Context* c, + unsigned size, + lir::Constant* src, + unsigned, + lir::RegisterPair* dst) +{ + moveCR2(c, size, src, dst, 0); +} + +void addR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, add(dst, a, b, size)); +} + +void subR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, sub(dst, a, b, size)); +} + +void addC(Context* c, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + int32_t v = a->value->value(); + if (v) { + if (v > 0 and v < 0x1000) { + append(c, addi(dst->low, b->low, v, 0, size)); + } else if (v > 0 and v < 0x1000000 and v % 0x1000 == 0) { + append(c, addi(dst->low, b->low, v >> 12, 12, size)); + } else { + // todo + abort(c); + } + } else { + moveRR(c, size, b, size, dst); + } +} + +void subC(Context* c, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + int32_t v = a->value->value(); + if (v) { + if (v > 0 and v < 0x1000) { + append(c, subi(dst->low, b->low, v, 0, size)); + } else if (v > 0 and v < 0x1000000 and v % 0x1000 == 0) { + append(c, subi(dst->low, b->low, v >> 12, 12, size)); + } else { + // todo + abort(c); + } + } else { + moveRR(c, size, b, size, dst); + } +} + +void multiplyR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, mul(dst->low, a->low, b->low)); +} + +void floatAbsoluteRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + append(c, fabs(fpr(b), fpr(a), size)); +} + +void floatNegateRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + append(c, fneg(fpr(b), fpr(a), size)); +} + +void float2FloatRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + if (size == 8) { + append(c, fcvtSdDn(fpr(b), fpr(a))); + } else { + append(c, fcvtDdSn(fpr(b), fpr(a))); + } +} + +void float2IntRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + if (size == 8) { + append(c, fcvtasWdDn(b->low, fpr(a))); + } else { + append(c, fcvtasWdSn(b->low, fpr(a))); + } +} + +void int2FloatRR(Context* c, + unsigned, + lir::RegisterPair* a, + unsigned size, + lir::RegisterPair* b) +{ + if (size == 8) { + append(c, scvtfDdWn(fpr(b), b->low)); + } else { + append(c, scvtfSdWn(fpr(b), b->low)); + } +} + +void floatSqrtRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + append(c, fsqrt(fpr(b), fpr(a), size)); +} + +void floatAddR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, fadd(fpr, dst, fpr(b), fpr(a), size)); +} + +void floatSubtractR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, fsub(fpr, dst, fpr(b), fpr(a), size)); +} + +void floatMultiplyR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, fmul(fpr, dst, fpr(b), fpr(a), size)); +} + +void floatDivideR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, fdiv(fpr, dst, fpr(b), fpr(a), size)); +} + +int normalize(Context* c, + int offset, + int index, + unsigned scale, + bool* preserveIndex, + bool* release) +{ + if (offset != 0 or scale != 1) { + lir::Register normalizedIndex( + *preserveIndex ? con->client->acquireTemporary(GPR_MASK) : index); + + if (*preserveIndex) { + *release = true; + *preserveIndex = false; + } else { + *release = false; + } + + int scaled; + + if (scale != 1) { + lir::Register unscaledIndex(index); + + ResolvedPromise scalePromise(log(scale)); + lir::Constant scaleConstant(&scalePromise); + + shiftLeftC(c, + vm::TargetBytesPerWord, + &scaleConstant, + &unscaledIndex, + &normalizedIndex); + + scaled = normalizedIndex.low; + } else { + scaled = index; + } + + if (offset != 0) { + lir::Register untranslatedIndex(scaled); + + ResolvedPromise offsetPromise(offset); + lir::Constant offsetConstant(&offsetPromise); + + lir::Register tmp(con->client->acquireTemporary(GPR_MASK)); + moveCR(c, + vm::TargetBytesPerWord, + &offsetConstant, + vm::TargetBytesPerWord, + &tmp); + addR(c, + vm::TargetBytesPerWord, + &tmp, + &untranslatedIndex, + &normalizedIndex); + con->client->releaseTemporary(tmp.low); + } + + return normalizedIndex.low; + } else { + *release = false; + return index; + } +} + +void store(Context* c, + unsigned size, + lir::RegisterPair* src, + int base, + int offset, + int index, + unsigned scale, + bool preserveIndex) +{ + if (index != lir::NoRegister) { + bool release; + int normalized + = normalize(c, offset, index, scale, &preserveIndex, &release); + + if (isFpr(src)) { + switch (size) { + case 4: + case 8: + append(c, strFs(fpr(src->low), base, normalized, size)); + break; + + default: + abort(c); + } + } else { + switch (size) { + case 1: + append(c, strb(src->low, base, normalized)); + break; + + case 2: + append(c, strh(src->low, base, normalized)); + break; + + case 4: + case 8: + append(c, str(src->low, base, normalized, size)); + break; + + default: + abort(c); + } + } + + if (release) { + c->client->releaseTemporary(normalized); + } + } else if (abs(offset) == (abs(offset) & 0xFF)) { + if (isFpr(src)) { + switch (size) { + case 4: + case 8: + append(c, striFs(fpr(src->low), base, offset, size)); + break; + + default: + abort(c); + } + } else { // FPR store + switch (size) { + case 1: + append(c, strbi(src->low, base, offset)); + break; + + case 2: + append(c, strhi(src->low, base, offset)); + break; + + case 4: + case 8: + append(c, stri(src->low, base, offset, size)); + break; + + default: + abort(c); + } + } + } else { + lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + ResolvedPromise offsetPromise(offset); + lir::Constant offsetConstant(&offsetPromise); + moveCR(c, + vm::TargetBytesPerWord, + &offsetConstant, + vm::TargetBytesPerWord, + &tmp); + + store(c, size, src, base, 0, tmp.low, 1, false); + + c->client->releaseTemporary(tmp.low); + } +} + +void moveRM(Context* c, + unsigned srcSize, + lir::RegisterPair* src, + unsigned dstSize UNUSED, + lir::Memory* dst) +{ + assertT(c, srcSize == dstSize); + + store( + c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true); +} + +void load(Context* c, + unsigned srcSize, + int base, + int offset, + int index, + unsigned scale, + unsigned dstSize, + lir::RegisterPair* dst, + bool preserveIndex, + bool signExtend) +{ + if (index != lir::NoRegister) { + bool release; + int normalized + = normalize(c, offset, index, scale, &preserveIndex, &release); + + if (isFpr(dst)) { // FPR load + switch (srcSize) { + case 4: + case 8: + append(c, ldrFd(fpr(dst->low), base, normalized, srcSize)); + break; + + default: + abort(c); + } + } else { + switch (srcSize) { + case 1: + if (signExtend) { + append(c, ldrsb(dst->low, base, normalized)); + } else { + append(c, ldrb(dst->low, base, normalized)); + } + break; + + case 2: + if (signExtend) { + append(c, ldrsh(dst->low, base, normalized)); + } else { + append(c, ldrh(dst->low, base, normalized)); + } + break; + + case 4: + case 8: + if (signExtend and srcSize == 4 and dstSize == 8) { + append(c, ldrsw(dst->low, base, normalized)); + } else { + append(c, ldr(dst->low, base, normalized, srcSize)); + } + break; + + default: + abort(c); + } + } + + if (release) { + c->client->releaseTemporary(normalized); + } + } else if (abs(offset) == (abs(offset) & 0xFF)) { + if (isFpr(dst)) { + switch (srcSize) { + case 4: + case 8: + append(c, ldriFd(fpr(dst->low), base, offset)); + break; + + default: + abort(c); + } + } else { + switch (srcSize) { + case 1: + if (signExtend) { + append(c, ldrsbi(dst->low, base, offset)); + } else { + append(c, ldrbi(dst->low, base, offset)); + } + break; + + case 2: + if (signExtend) { + append(c, ldrshi(dst->low, base, offset)); + } else { + append(c, ldrhi(dst->low, base, offset)); + } + break; + + case 4: + case 8: + if (signExtend and srcSize == 4 and dstSize == 8) { + append(c, ldrswi(dst->low, base, offset)); + } else { + append(c, ldri(dst->low, base, offset, size)); + } + break; + + default: + abort(c); + } + } + } else { + lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + ResolvedPromise offsetPromise(offset); + lir::Constant offsetConstant(&offsetPromise); + moveCR(c, + vm::TargetBytesPerWord, + &offsetConstant, + vm::TargetBytesPerWord, + &tmp); + + load(c, srcSize, base, 0, tmp.low, 1, dstSize, dst, false, signExtend); + + c->client->releaseTemporary(tmp.low); + } +} + +void moveMR(Context* c, + unsigned srcSize, + lir::Memory* src, + unsigned dstSize, + lir::RegisterPair* dst) +{ + load(c, + srcSize, + src->base, + src->offset, + src->index, + src->scale, + dstSize, + dst, + true, + true); +} + +void moveZMR(Context* c, + unsigned srcSize, + lir::Memory* src, + unsigned dstSize, + lir::RegisterPair* dst) +{ + load(c, + srcSize, + src->base, + src->offset, + src->index, + src->scale, + dstSize, + dst, + true, + false); +} + +void andR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, and_(dst->low, a->low, b->low, size)); +} + +void andC(Context* c, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + int64_t v = a->value->value(); + + if (~v) { + bool useTemporary = b->low == dst->low; + lir::Register tmp(dst->low); + if (useTemporary) { + tmp.low = c->client->acquireTemporary(GPR_MASK); + } + + moveCR(c, size, a, size, &tmp); + andR(c, size, b, &tmp, dst); + + if (useTemporary) { + c->client->releaseTemporary(tmp.low); + } + } else { + moveRR(c, size, b, size, dst); + } +} + +void orR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, orr(dst->low, a->low, b->low, size)); +} + +void xorR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, eor(dst->low, a->low, b->low, size)); +} + +void moveAR(Context* c, + unsigned srcSize, + lir::Address* src, + unsigned dstSize, + lir::RegisterPair* dst) +{ + assertT(c, srcSize == TargetBytesPerWord and dstSize == TargetBytesPerWord); + + lir::Constant constant(src->address); + moveCR(c, srcSize, &constant, dstSize, dst); + + lir::Memory memory(dst->low, 0, -1, 0); + moveMR(c, dstSize, &memory, dstSize, dst); +} + +void compareRR(Context* c, + unsigned aSize, + lir::RegisterPair* a, + unsigned bSize UNUSED, + lir::RegisterPair* b) +{ + assertT(c, not (isFpr(a) xor isFpr(b))); + assertT(c, aSize == bSize); + + if (isFpr(a)) { + append(c, fcmp(fpr(b), fpr(a), aSize)); + } else { + append(c, cmp(b->low, a->low, aSize)); + } +} + +void compareCR(Context* c, + unsigned aSize, + lir::Constant* a, + unsigned bSize, + lir::RegisterPair* b) +{ + assertT(c, aSize == bSize); + + int32_t v = a->value->value(); + if (v) { + if (v > 0 and v < 0x1000) { + append(c, cmpi(b->low, v, 0, size)); + } else if (v > 0 and v < 0x1000000 and v % 0x1000 == 0) { + append(c, cmpi(b->low, v >> 12, 12, size)); + } else { + // todo + abort(c); + } + } +} + +void compareCM(Context* c, + unsigned aSize, + lir::Constant* a, + unsigned bSize, + lir::Memory* b) +{ + assertT(c, aSize == bSize); + + lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + moveMR(c, bSize, b, bSize, &tmp); + compareCR(c, aSize, a, bSize, &tmp); + c->client->releaseTemporary(tmp.low); +} + +void compareRM(Context* c, + unsigned aSize, + lir::RegisterPair* a, + unsigned bSize, + lir::Memory* b) +{ + assertT(c, aSize == bSize); + + lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + moveMR(c, bSize, b, bSize, &tmp); + compareRR(c, aSize, a, bSize, &tmp); + c->client->releaseTemporary(tmp.low); +} + +int32_t branch(Context* c, lir::TernaryOperation op) +{ + switch (op) { + case lir::JumpIfEqual: + case lir::JumpIfFloatEqual: + return beq(0); + + case lir::JumpIfNotEqual: + case lir::JumpIfFloatNotEqual: + return bne(0); + + case lir::JumpIfLess: + case lir::JumpIfFloatLess: + case lir::JumpIfFloatLessOrUnordered: + return blt(0); + + case lir::JumpIfGreater: + case lir::JumpIfFloatGreater: + return bgt(0); + + case lir::JumpIfLessOrEqual: + case lir::JumpIfFloatLessOrEqual: + case lir::JumpIfFloatLessOrEqualOrUnordered: + return ble(0); + + case lir::JumpIfGreaterOrEqual: + case lir::JumpIfFloatGreaterOrEqual: + return bge(0); + + case lir::JumpIfFloatGreaterOrUnordered: + return bhi(0); + + case lir::JumpIfFloatGreaterOrEqualOrUnordered: + return bpl(0); + + default: + abort(c); + } +} + +void conditional(Context* c, int32_t branch, lir::Constant* target) +{ + appendOffsetTask(c, target->value, offsetPromise(con)); + append(c, branch); +} + +void branch(Context* c, lir::TernaryOperation op, lir::Constant* target) +{ + conditional(c, branch(c, op), target); +} + +void branchRR(Context* c, + lir::TernaryOperation op, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::Constant* target) +{ + compareRR(c, size, a, size, b); + branch(c, op, target); +} + +void branchCR(Context* c, + lir::TernaryOperation op, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::Constant* target) +{ + assertT(c, not isFloatBranch(op)); + + compareCR(c, size, a, size, b); + branch(c, op, target); +} + +void branchRM(Context* c, + lir::TernaryOperation op, + unsigned size, + lir::RegisterPair* a, + lir::Memory* b, + lir::Constant* target) +{ + assertT(c, not isFloatBranch(op)); + assertT(c, size <= vm::TargetBytesPerWord); + + compareRM(c, size, a, size, b); + branch(c, op, target); +} + +void branchCM(Context* c, + lir::TernaryOperation op, + unsigned size, + lir::Constant* a, + lir::Memory* b, + lir::Constant* target) +{ + assertT(c, not isFloatBranch(op)); + assertT(c, size <= vm::TargetBytesPerWord); + + compareCM(c, size, a, size, b); + branch(c, op, target); +} + +ShiftMaskPromise* shiftMaskPromise(Context* c, + Promise* base, + unsigned shift, + int64_t mask) +{ + return new (con->zone) ShiftMaskPromise(base, shift, mask); +} + +void moveCM(Context* c, + unsigned srcSize, + lir::Constant* src, + unsigned dstSize, + lir::Memory* dst) +{ + switch (dstSize) { + case 8: { + lir::Constant srcHigh(shiftMaskPromise(c, src->value, 32, 0xFFFFFFFF)); + lir::Constant srcLow(shiftMaskPromise(c, src->value, 0, 0xFFFFFFFF)); + + lir::Memory dstLow(dst->base, dst->offset + 4, dst->index, dst->scale); + + moveCM(c, 4, &srcLow, 4, &dstLow); + moveCM(c, 4, &srcHigh, 4, dst); + } break; + + default: + lir::Register tmp(con->client->acquireTemporary(GPR_MASK)); + moveCR(c, srcSize, src, dstSize, &tmp); + moveRM(c, dstSize, &tmp, dstSize, dst); + con->client->releaseTemporary(tmp.low); + } +} + +void negateRR(Context* c, + unsigned srcSize, + lir::RegisterPair* src, + unsigned dstSize UNUSED, + lir::RegisterPair* dst) +{ + assertT(c, srcSize == dstSize); + + append(c, neg(dst->low, src->low, srcSize)); +} + +void callR(Context* c, unsigned size UNUSED, lir::RegisterPair* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + append(c, blr(target->low)); +} + +void callC(Context* c, unsigned size UNUSED, lir::Constant* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + + appendOffsetTask(c, target->value, offsetPromise(c)); + append(c, bl(0)); +} + +void longCallC(Context* c, unsigned size UNUSED, lir::Constant* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + + lir::Register tmp(9); // a non-arg reg that we don't mind clobbering + moveCR2(c, vm::TargetBytesPerWord, target, &tmp, offsetPromise(c)); + callR(c, vm::TargetBytesPerWord, &tmp); +} + +void longJumpC(Context* c, unsigned size UNUSED, lir::Constant* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + + lir::Register tmp(9); // a non-arg reg that we don't mind clobbering + moveCR2(c, vm::TargetBytesPerWord, target, &tmp, offsetPromise(c)); + jumpR(c, vm::TargetBytesPerWord, &tmp); +} + +void jumpC(Context* c, unsigned size UNUSED, lir::Constant* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + + appendOffsetTask(c, target->value, offsetPromise(c)); + append(c, b(0)); +} + +void return_(Context* c) +{ + append(c, br(LinkRegister)); +} + +void trap(Context* c) +{ + append(c, brk(0)); +} + +// todo: determine the minimal operation types and domains needed to +// implement the following barriers (see +// http://community.arm.com/groups/processors/blog/2011/10/19/memory-access-ordering-part-3--memory-access-ordering-in-the-arm-architecture). +// For now, we just use DMB SY as a conservative but not necessarily +// performant choice. + +void memoryBarrier(Context* c) +{ + append(c, dmb()); +} + +void loadBarrier(Context* c) +{ + memoryBarrier(c); +} + +void storeStoreBarrier(Context* c) +{ + memoryBarrier(c); +} + +void storeLoadBarrier(Context* c) +{ + memoryBarrier(c); +} + +} // namespace arm +} // namespace codegen +} // namespace avian + +#endif // AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 diff --git a/src/codegen/target/arm/registers.h b/src/codegen/target/arm/registers.h index ad13db466a..476cff546f 100644 --- a/src/codegen/target/arm/registers.h +++ b/src/codegen/target/arm/registers.h @@ -21,10 +21,29 @@ namespace arm { const uint64_t MASK_LO32 = 0xffffffff; const unsigned MASK_LO8 = 0xff; +#ifdef ARCH_arm64 +constexpr Register ThreadRegister(19); +constexpr Register StackRegister(31); +constexpr Register LinkRegister(30); +constexpr Register LinkRegister(29); +constexpr Register ProgramCounter(0xFE); // i.e. unaddressable + +const int N_GPRS = 32; +const int N_FPRS = 32; +const uint64_t GPR_MASK = 0xffffffff; +const uint64_t FPR_MASK = 0xffffffff00000000; +#else +constexpr Register ThreadRegister(8); +constexpr Register StackRegister(13); +constexpr Register LinkRegister(14); +constexpr Register FrameRegister(0xFE); // i.e. there is none +constexpr Register ProgramCounter(15); + const int N_GPRS = 16; const int N_FPRS = 16; const RegisterMask GPR_MASK = 0xffff; const RegisterMask FPR_MASK = 0xffff0000; +#endif inline bool isFpr(lir::RegisterPair* reg) { @@ -48,18 +67,6 @@ inline int fpr32(lir::RegisterPair* reg) return fpr64(reg) << 1; } -#ifdef ARCH_arm64 -constexpr Register ThreadRegister(19); -constexpr Register StackRegister(31); -constexpr Register LinkRegister(30); -constexpr Register ProgramCounter(0xFE); // i.e. unaddressable -#else -constexpr Register ThreadRegister(8); -constexpr Register StackRegister(13); -constexpr Register LinkRegister(14); -constexpr Register ProgramCounter(15); -#endif - } // namespace arm } // namespace codegen } // namespace avian diff --git a/src/compile-arm.S b/src/compile-arm.S index 37b61da454..432b79e646 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -16,20 +16,122 @@ #define BYTES_PER_WORD 4 #define LOCAL(x) .L##x - + #ifdef __APPLE__ # define GLOBAL(x) _##x #else -# define GLOBAL(x) x +# define GLOBAL(x) x #endif +#ifdef __aarch64__ + +.globl GLOBAL(vmInvoke) +.align 2 +GLOBAL(vmInvoke): + // arguments: + // x0 : thread + // x1 : function + // x2 : arguments + // w3 : argumentFootprint + // w4 : frameSize (not used) + // w5 : returnType + + // allocate frame + stp x29, x30, [sp,#-96]! + + // save callee-saved register values + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + + // save return type + str w5, [sp,#-16]! + + mov x5, sp + str x5, [x0,#TARGET_THREAD_SCRATCH] + + // copy arguments into place + sub sp, sp, w3 + mov x5, #0 + b LOCAL(vmInvoke_argumentTest) + +LOCAL(vmInvoke_argumentLoop): + ldr x5, [x2, x4] + str x5, [sp, x4] + add x4, x4, #BYTES_PER_WORD + +LOCAL(vmInvoke_argumentTest): + cmp x4, x3 + blt LOCAL(vmInvoke_argumentLoop) + + // we use x19 to hold the thread pointer, by convention + mov x19, x0 + + // load and call function address + blr x1 + +.globl GLOBAL(vmInvoke_returnAddress) +.align 2 +GLOBAL(vmInvoke_returnAddress): + // restore stack pointer + ldr x5, [x19, #TARGET_THREAD_SCRATCH] + mov sp, x5 + + // clear MyThread::stack to avoid confusing another thread calling + // java.lang.Thread.getStackTrace on this one. See + // MyProcess::getStackTrace in compile.cpp for details on how we get + // a reliable stack trace from a thread that might be interrupted at + // any point in its execution. + mov x5, #0 + str x5, [x19, #TARGET_THREAD_STACK] + +.globl GLOBAL(vmInvoke_safeStack) +.align 2 +GLOBAL(vmInvoke_safeStack): + +#ifdef AVIAN_CONTINUATIONS +#error todo +#endif // AVIAN_CONTINUATIONS + + mov x5, #0 + str x5, [x19, #TARGET_THREAD_STACK] + + // restore return type + ldr w5, [sp], #4 + + // restore callee-saved register values + ldp x19, x20, [sp,#16] + ldp x21, x22, [sp,#32] + ldp x23, x24, [sp,#48] + ldp x25, x26, [sp,#64] + ldp x27, x28, [sp,#80] + ldp x29, x30, [sp],#96 + +LOCAL(vmInvoke_return): + br x30 + +.globl GLOBAL(vmJumpAndInvoke) +.align 2 +GLOBAL(vmJumpAndInvoke): +#ifdef AVIAN_CONTINUATIONS +#error todo +#else // not AVIAN_CONTINUATIONS + // vmJumpAndInvoke should only be called when continuations are + // enabled, so we force a crash if we reach here: + brk 0 +#endif // not AVIAN_CONTINUATIONS + +#elif defined __arm__ + #define CONTINUATION_NEXT 4 #define CONTINUATION_ADDRESS 16 #define CONTINUATION_RETURN_ADDRESS_OFFSET 20 #define CONTINUATION_FRAME_POINTER_OFFSET 24 #define CONTINUATION_LENGTH 28 #define CONTINUATION_BODY 32 - + .globl GLOBAL(vmInvoke) .align 2 GLOBAL(vmInvoke): @@ -56,7 +158,7 @@ GLOBAL(vmInvoke): eor r4, sp, r3 tst r4, #4 subne sp, sp, #4 - + // copy arguments into place sub sp, r3 mov r4, #0 @@ -87,7 +189,7 @@ LOCAL(vmInvoke_argumentTest): GLOBAL(vmInvoke_returnAddress): // restore stack pointer ldr sp, [r8, #TARGET_THREAD_SCRATCH] - + // clear MyThread::stack to avoid confusing another thread calling // java.lang.Thread.getStackTrace on this one. See // MyProcess::getStackTrace in compile.cpp for details on how we get @@ -201,7 +303,7 @@ GLOBAL(vmJumpAndInvoke): // which is not true in this case sub r2,r2,r6 sub r2,r2,#84 - + mov r8,r0 // copy arguments into place @@ -220,7 +322,7 @@ LOCAL(vmJumpAndInvoke_argumentTest): // the arguments have been copied, so we can set the real stack // pointer now mov sp,r2 - + // set return address to vmInvoke_returnAddress #ifdef __APPLE__ movw r11, :lower16:(GLOBAL(vmInvoke_returnAddress)-(LOCAL(vmJumpAndInvoke_getAddress)+8)) @@ -246,10 +348,12 @@ LOCAL(vmInvoke_getAddress_word): LOCAL(vmJumpAndInvoke_getAddress_word): .word _GLOBAL_OFFSET_TABLE_-(LOCAL(vmJumpAndInvoke_getAddress)+8) #endif // not __APPLE__ - + #else // not AVIAN_CONTINUATIONS // vmJumpAndInvoke should only be called when continuations are // enabled, so we force a crash if we reach here: mov r1,#0 ldr r1,[r1] #endif // not AVIAN_CONTINUATIONS + +#endif // __arm__ From 123570515f512acb178a5e80a82eefae069f715e Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 8 Dec 2014 14:07:11 -0700 Subject: [PATCH 02/20] snapshot of ARM64 instruction encoding work Still not building, but more progress. --- src/codegen/target/arm/operations64.cpp | 99 ++++++++++++++++++------- src/codegen/target/arm/registers.h | 26 +++++-- 2 files changed, 92 insertions(+), 33 deletions(-) diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index 0f713645ad..174642cb2f 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -18,9 +18,11 @@ namespace { -void append(Context* c, uint32_t instruction, unsigned size) +using namespace avian::codegen::arm; + +void append(Context* c, uint32_t instruction) { - c->code.append4(instruction | (size == 8 ? 0x80000000 : 0)); + c->code.append4(instruction); } uint32_t lslv(int Rd, int Rn, int Rm, unsigned size) @@ -117,6 +119,24 @@ uint32_t mov(int Rd, int Rn, unsigned size) return orr(Rd, 31, Rn, size); } +uint32_t movz(int Rd, int value, unsigned shift, unsigned size) +{ + return (size == 8 ? 0xd2800000 : 0x52800000) | ((shift >> 4) << 21) + | (value << 5) | Rd; +} + +uint32_t movn(int Rd, int value, unsigned shift, unsigned size) +{ + return (size == 8 ? 0x92800000 : 0x12800000) | ((shift >> 4) << 21) + | (value << 5) | Rd; +} + +uint32_t movk(int Rd, int value, unsigned shift, unsigned size) +{ + return (size == 8 ? 0xf2800000 : 0x72800000) | ((shift >> 4) << 21) + | (value << 5) | Rd; +} + uint32_t ldrPCRel(int Rd, int offset, unsigned size) { return (size == 8 ? 0x58000000 : 0x18000000) | (offset << 5) | Rd; @@ -155,13 +175,42 @@ uint32_t subi(int Rd, int Rn, int value, int shift, unsigned size) | (value << 10) | (Rn << 5) | Rd; } +uint32_t fabs(int Fd, int Fn, unsigned size) +{ + return (size == 8 ? 0x1e60c000 : 0x1e20c000) | (Fn << 5) | Fd; +} + +uint32_t fneg(int Fd, int Fn, unsigned size) +{ + return (size == 8 ? 0x1e614000 : 0x1e214000) | (Fn << 5) | Fd; +} + +uint32_t fcvtSdDn(int Fd, int Fn) +{ + return 0x1e624000 | (Fn << 5) | Fd; +} + +uint32_t fcvtDdSn(int Fd, int Fn) +{ + return 0x1e22c000 | (Fn << 5) | Fd; +} + +uint32_t fcvtasXdDn(int Rd, int Fn) +{ + return 0x9e640000 | (Fn << 5) | Rd; +} + +uint32_t fcvtasWdSn(int Rd, int Fn) +{ + return 0x1e240000 | (Fn << 5) | Rd; +} + } // namespace namespace avian { namespace codegen { namespace arm { -using namespace isa; using namespace avian::util; void shiftLeftR(Context* c, @@ -182,7 +231,7 @@ void shiftLeftC(Context* c, uint64_t value = a->value->value(); if (size == 4 and (value & 0x1F)) { append(c, lsli(dst->low, b->low, value, 4)); - } else (size == 8 and (value & 0x3F)) { + } else if (size == 8 and (value & 0x3F)) { append(c, lsli(dst->low, b->low, value, 8)); } else { moveRR(c, size, b, size, dst); @@ -206,9 +255,9 @@ void shiftRightC(Context* c, { uint64_t value = a->value->value(); if (size == 4 and (value & 0x1F)) { - append(c, lsri(dst->low, b->low, value, 4), 4); - } else (size == 8 and (value & 0x3F)) { - append(c, lsri(dst->low, b->low, value, 8), 8); + append(c, lsri(dst->low, b->low, value, 4)); + } else if (size == 8 and (value & 0x3F)) { + append(c, lsri(dst->low, b->low, value, 8)); } else { moveRR(c, size, b, size, dst); } @@ -231,9 +280,9 @@ void unsignedShiftRightC(Context* c, { uint64_t value = a->value->value(); if (size == 4 and (value & 0x1F)) { - append(c, asri(dst->low, b->low, value, 4), 4); - } else (size == 8 and (value & 0x3F)) { - append(c, asri(dst->low, b->low, value, 8), 8); + append(c, asri(dst->low, b->low, value, 4)); + } else if (size == 8 and (value & 0x3F)) { + append(c, asri(dst->low, b->low, value, 8)); } else { moveRR(c, size, b, size, dst); } @@ -299,7 +348,7 @@ void moveZRR(Context* c, { switch (srcSize) { case 2: - aapend(c, uxth(dst->low, src->low)); + append(c, uxth(dst->low, src->low)); break; default: @@ -323,31 +372,31 @@ void moveCR2(Context* c, } else if (src->value->resolved()) { int64_t value = src->value->value(); if (value > 0) { - append(c, mov(dst->low, value & 0xFFFF)); + append(c, movz(dst->low, value & 0xFFFF, 0, size)); if (value >> 16) { - append(c, movk(dst->low, (value >> 16) & 0xFFFF), 16); + append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); if (value >> 32) { - append(c, movk(dst->low, (value >> 32) & 0xFFFF), 32); + append(c, movk(dst->low, (value >> 32) & 0xFFFF, 32, size)); if (value >> 48) { - append(c, movk(dst->low, (value >> 48) & 0xFFFF), 48); + append(c, movk(dst->low, (value >> 48) & 0xFFFF, 48, size)); } } } } else if (value < 0) { - append(c, movn(dst->low, (~value) & 0xFFFF)); + append(c, movn(dst->low, (~value) & 0xFFFF, 0, size)); if (~(value >> 16)) { - append(c, movk(dst->low, (value >> 16) & 0xFFFF), 16); + append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); if (~(value >> 32)) { - append(c, movk(dst->low, (value >> 32) & 0xFFFF), 32); + append(c, movk(dst->low, (value >> 32) & 0xFFFF, 32, size)); if (~(value >> 48)) { - append(c, movk(dst->low, (value >> 48) & 0xFFFF), 48); + append(c, movk(dst->low, (value >> 48) & 0xFFFF, 48, size)); } } } } } else { appendConstantPoolEntry(c, src->value, callOffset); - append(c, ldrPCRel(dst->low, 0)); + append(c, ldrPCRel(dst->low, 0, size)); } } @@ -366,7 +415,7 @@ void addR(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - append(c, add(dst, a, b, size)); + append(c, add(dst->low, a->low, b->low, size)); } void subR(Context* c, @@ -375,7 +424,7 @@ void subR(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - append(c, sub(dst, a, b, size)); + append(c, sub(dst->low, a->low, b->low, size)); } void addC(Context* c, @@ -426,7 +475,7 @@ void multiplyR(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - append(c, mul(dst->low, a->low, b->low)); + append(c, mul(dst->low, a->low, b->low, size)); } void floatAbsoluteRR(Context* c, @@ -435,7 +484,7 @@ void floatAbsoluteRR(Context* c, unsigned, lir::RegisterPair* b) { - append(c, fabs(fpr(b), fpr(a), size)); + append(c, fabs_(fpr(b), fpr(a), size)); } void floatNegateRR(Context* c, @@ -467,7 +516,7 @@ void float2IntRR(Context* c, lir::RegisterPair* b) { if (size == 8) { - append(c, fcvtasWdDn(b->low, fpr(a))); + append(c, fcvtasXdDn(b->low, fpr(a))); } else { append(c, fcvtasWdSn(b->low, fpr(a))); } diff --git a/src/codegen/target/arm/registers.h b/src/codegen/target/arm/registers.h index 476cff546f..da2d7151fd 100644 --- a/src/codegen/target/arm/registers.h +++ b/src/codegen/target/arm/registers.h @@ -30,8 +30,18 @@ constexpr Register ProgramCounter(0xFE); // i.e. unaddressable const int N_GPRS = 32; const int N_FPRS = 32; -const uint64_t GPR_MASK = 0xffffffff; -const uint64_t FPR_MASK = 0xffffffff00000000; +const RegisterMask GPR_MASK = 0xffffffff; +const RegisterMask FPR_MASK = 0xffffffff00000000; + +inline int fpr(int reg) +{ + return reg - N_GPRS; +} + +inline int fpr(lir::RegisterPair* reg) +{ + return fpr(reg->low); +} #else constexpr Register ThreadRegister(8); constexpr Register StackRegister(13); @@ -43,12 +53,6 @@ const int N_GPRS = 16; const int N_FPRS = 16; const RegisterMask GPR_MASK = 0xffff; const RegisterMask FPR_MASK = 0xffff0000; -#endif - -inline bool isFpr(lir::RegisterPair* reg) -{ - return reg->low.index() >= N_GPRS; -} inline int fpr64(Register reg) { @@ -66,6 +70,12 @@ inline int fpr32(lir::RegisterPair* reg) { return fpr64(reg) << 1; } +#endif + +inline bool isFpr(lir::RegisterPair* reg) +{ + return reg->low.index() >= N_GPRS; +} } // namespace arm } // namespace codegen From a6e88a8faa52f7f11a5bc65aba17ca886935590f Mon Sep 17 00:00:00 2001 From: Joshua Warner Date: Wed, 10 Dec 2014 13:52:30 -0700 Subject: [PATCH 03/20] fix some merge-introduced problems --- src/codegen/target/arm/operations64.cpp | 128 ++++++++++++------------ src/codegen/target/arm/registers.h | 9 -- 2 files changed, 66 insertions(+), 71 deletions(-) diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index 174642cb2f..724f856d13 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -16,31 +16,41 @@ #if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 -namespace { +namespace avian { +namespace codegen { +namespace arm { -using namespace avian::codegen::arm; +inline int fpr(Register reg) +{ + return reg.index() - N_GPRS; +} + +inline int fpr(lir::RegisterPair* reg) +{ + return fpr(reg->low); +} void append(Context* c, uint32_t instruction) { c->code.append4(instruction); } -uint32_t lslv(int Rd, int Rn, int Rm, unsigned size) +uint32_t lslv(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0x9ac12000 : 0x1ac02000) | (Rm << 16) | (Rn << 5) | Rd; + return (size == 8 ? 0x9ac12000 : 0x1ac02000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } -uint32_t ubfm(int Rd, int Rn, int r, int s, unsigned size) +uint32_t ubfm(Register Rd, Register Rn, int r, int s, unsigned size) { - return (size == 8 ? 0xd3608000 : 0x53000000) | (r << 16) | (s << 10) | (Rn << 5) | Rd; + return (size == 8 ? 0xd3608000 : 0x53000000) | (r << 16) | (s << 10) | (Rn.index() << 5) | Rd.index(); } -uint32_t sbfm(int Rd, int Rn, int r, int s, unsigned size) +uint32_t sbfm(Register Rd, Register Rn, int r, int s, unsigned size) { - return (size == 8 ? 0x93408000 : 0x13000000) | (r << 16) | (s << 10) | (Rn << 5) | Rd; + return (size == 8 ? 0x93408000 : 0x13000000) | (r << 16) | (s << 10) | (Rn.index() << 5) | Rd.index(); } -uint32_t lsli(int Rd, int Rn, int shift, unsigned size) +uint32_t lsli(Register Rd, Register Rn, int shift, unsigned size) { if (size == 4) { return ubfm(Rd, Rn, (32 - shift) & 0x1f, 31 - shift, size); @@ -49,49 +59,49 @@ uint32_t lsli(int Rd, int Rn, int shift, unsigned size) } } -uint32_t asrv(int Rd, int Rn, int Rm, unsigned size) +uint32_t asrv(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0x9ac02800 : 0x1ac02800) | (Rm << 16) | (Rn << 5) | Rd; + return (size == 8 ? 0x9ac02800 : 0x1ac02800) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } -uint32_t lsrv(int Rd, int Rn, int Rm, unsigned size) +uint32_t lsrv(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0x9ac02400 : 0x1ac02400) | (Rm << 16) | (Rn << 5) | Rd; + return (size == 8 ? 0x9ac02400 : 0x1ac02400) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } -uint32_t lsri(int Rd, int Rn, int shift, unsigned size) +uint32_t lsri(Register Rd, Register Rn, int shift, unsigned size) { return ubfm(Rd, Rn, shift, size == 8 ? 63 : 31, size); } -uint32_t asri(int Rd, int Rn, int shift, unsigned size) +uint32_t asri(Register Rd, Register Rn, int shift, unsigned size) { return sbfm(Rd, Rn, shift, size == 8 ? 63 : 31, size); } -uint32_t sxtb(int Rd, int Rn) +uint32_t sxtb(Register Rd, Register Rn) { return sbfm(Rd, Rn, 0, 7, 8); } -uint32_t sxth(int Rd, int Rn) +uint32_t sxth(Register Rd, Register Rn) { return sbfm(Rd, Rn, 0, 15, 8); } -uint32_t uxth(int Rd, int Rn) +uint32_t uxth(Register Rd, Register Rn) { return ubfm(Rd, Rn, 0, 15, 4); } -uint32_t sxtw(int Rd, int Rn) +uint32_t sxtw(Register Rd, Register Rn) { return sbfm(Rd, Rn, 0, 31, 8); } -uint32_t br(int Rn) +uint32_t br(Register Rn) { - return 0xd61f0000 | (Rn << 5); + return 0xd61f0000 | (Rn.index() << 5); } uint32_t fmovFdFn(int Fd, int Fn, unsigned size) @@ -99,80 +109,80 @@ uint32_t fmovFdFn(int Fd, int Fn, unsigned size) return (size == 8 ? 0x1e604000 : 0x1e204000) | (Fn << 5) | Fd; } -uint32_t fmovRdFn(int Rd, int Fn, unsigned size) +uint32_t fmovRdFn(Register Rd, int Fn, unsigned size) { - return (size == 8 ? 0x9e660000 : 0x1e260000) | (Fn << 5) | Rd; + return (size == 8 ? 0x9e660000 : 0x1e260000) | (Fn << 5) | Rd.index(); } -uint32_t fmovFdRn(int Fd, int Rn, unsigned size) +uint32_t fmovFdRn(int Fd, Register Rn, unsigned size) { - return (size == 8 ? 0x9e670000 : 0x1e270000) | (Rn << 5) | Fd; + return (size == 8 ? 0x9e670000 : 0x1e270000) | (Rn.index() << 5) | Fd; } -uint32_t orr(int Rd, int Rn, int Rm, unsigned size) +uint32_t orr(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0xaa0003e0 : 0x2a0003e0) | (Rm << 16) | (Rn << 5) | Rd; + return (size == 8 ? 0xaa0003e0 : 0x2a0003e0) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } -uint32_t mov(int Rd, int Rn, unsigned size) +uint32_t mov(Register Rd, Register Rn, unsigned size) { - return orr(Rd, 31, Rn, size); + return orr(Rd, Register(31), Rn, size); } -uint32_t movz(int Rd, int value, unsigned shift, unsigned size) +uint32_t movz(Register Rd, int value, unsigned shift, unsigned size) { return (size == 8 ? 0xd2800000 : 0x52800000) | ((shift >> 4) << 21) - | (value << 5) | Rd; + | (value << 5) | Rd.index(); } -uint32_t movn(int Rd, int value, unsigned shift, unsigned size) +uint32_t movn(Register Rd, int value, unsigned shift, unsigned size) { return (size == 8 ? 0x92800000 : 0x12800000) | ((shift >> 4) << 21) - | (value << 5) | Rd; + | (value << 5) | Rd.index(); } -uint32_t movk(int Rd, int value, unsigned shift, unsigned size) +uint32_t movk(Register Rd, int value, unsigned shift, unsigned size) { return (size == 8 ? 0xf2800000 : 0x72800000) | ((shift >> 4) << 21) - | (value << 5) | Rd; + | (value << 5) | Rd.index(); } -uint32_t ldrPCRel(int Rd, int offset, unsigned size) +uint32_t ldrPCRel(Register Rd, int offset, unsigned size) { - return (size == 8 ? 0x58000000 : 0x18000000) | (offset << 5) | Rd; + return (size == 8 ? 0x58000000 : 0x18000000) | (offset << 5) | Rd.index(); } -uint32_t add(int Rd, int Rn, int Rm, unsigned size) +uint32_t add(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0x8b000000 : 0x0b000000) | (Rm << 16) | (Rn << 5) | Rd; + return (size == 8 ? 0x8b000000 : 0x0b000000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } -uint32_t sub(int Rd, int Rn, int Rm, unsigned size) +uint32_t sub(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0xcb000000 : 0x4b000000) | (Rm << 16) | (Rn << 5) | Rd; + return (size == 8 ? 0xcb000000 : 0x4b000000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } -uint32_t madd(int Rd, int Rn, int Rm, int Ra, unsigned size) +uint32_t madd(Register Rd, Register Rn, Register Rm, Register Ra, unsigned size) { return (size == 8 ? 0x9b000000 : 0x1b000000) - | (Rm << 16) | (Ra << 10) | (Rn << 5) | Rd; + | (Rm.index() << 16) | (Ra.index() << 10) | (Rn.index() << 5) | Rd.index(); } -uint32_t mul(int Rd, int Rn, int Rm, unsigned size) +uint32_t mul(Register Rd, Register Rn, Register Rm, unsigned size) { - return madd(Rd, Rn, Rm, 31, size); + return madd(Rd, Rn, Rm, Register(31), size); } -uint32_t addi(int Rd, int Rn, int value, int shift, unsigned size) +uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size) { return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0) - | (value << 10) | (Rn << 5) | Rd; + | (value << 10) | (Rn.index() << 5) | Rd.index(); } -uint32_t subi(int Rd, int Rn, int value, int shift, unsigned size) +uint32_t subi(Register Rd, Register Rn, int value, int shift, unsigned size) { return (size == 8 ? 0xd1000000 : 0x51000000) | (shift ? 0x400000 : 0) - | (value << 10) | (Rn << 5) | Rd; + | (value << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t fabs(int Fd, int Fn, unsigned size) @@ -195,22 +205,16 @@ uint32_t fcvtDdSn(int Fd, int Fn) return 0x1e22c000 | (Fn << 5) | Fd; } -uint32_t fcvtasXdDn(int Rd, int Fn) +uint32_t fcvtasXdDn(Register Rd, int Fn) { - return 0x9e640000 | (Fn << 5) | Rd; + return 0x9e640000 | (Fn << 5) | Rd.index(); } -uint32_t fcvtasWdSn(int Rd, int Fn) +uint32_t fcvtasWdSn(Register Rd, int Fn) { - return 0x1e240000 | (Fn << 5) | Rd; + return 0x1e240000 | (Fn << 5) | Rd.index(); } -} // namespace - -namespace avian { -namespace codegen { -namespace arm { - using namespace avian::util; void shiftLeftR(Context* c, @@ -308,9 +312,9 @@ void moveRR(Context* c, if (srcIsFpr and dstIsFpr) { append(c, fmovFdFn(fpr(dst), fpr(src), srcSize)); } else if (srcIsFpr) { - append(c, fmovRdFn(fpr(dst), fpr(src), srcSize)); + append(c, fmovRdFn(dst->low, fpr(src), srcSize)); } else { - append(c, fmovFdRn(fpr(dst), fpr(src), srcSize)); + append(c, fmovFdRn(fpr(dst), src->low, srcSize)); } } else { switch (srcSize) { @@ -365,7 +369,7 @@ void moveCR2(Context* c, if (isFpr(dst)) { // todo: could use a single fmov here and avoid the temporary for // constants that fit - lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); moveCR(c, size, src, size, &tmp); moveRR(c, size, &tmp, size, dst); c->client->releaseTemporary(tmp.low); diff --git a/src/codegen/target/arm/registers.h b/src/codegen/target/arm/registers.h index da2d7151fd..33784dbb1b 100644 --- a/src/codegen/target/arm/registers.h +++ b/src/codegen/target/arm/registers.h @@ -33,15 +33,6 @@ const int N_FPRS = 32; const RegisterMask GPR_MASK = 0xffffffff; const RegisterMask FPR_MASK = 0xffffffff00000000; -inline int fpr(int reg) -{ - return reg - N_GPRS; -} - -inline int fpr(lir::RegisterPair* reg) -{ - return fpr(reg->low); -} #else constexpr Register ThreadRegister(8); constexpr Register StackRegister(13); From b519e245e24474e8ffb9105352320c5ba2535047 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 22 Dec 2014 12:57:18 -0700 Subject: [PATCH 04/20] finish implementing operations64.cpp for ARM64 support This is totally untested so far. --- src/codegen/target/arm/operations64.cpp | 426 ++++++++++++++++++++---- src/codegen/target/arm/registers.h | 5 +- 2 files changed, 360 insertions(+), 71 deletions(-) diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index 724f856d13..c3058102df 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -16,16 +16,17 @@ #if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 -namespace avian { -namespace codegen { -namespace arm { +namespace { -inline int fpr(Register reg) +using namespace avian::codegen; +using namespace avian::codegen::arm; + +Register fpr(Register reg) { - return reg.index() - N_GPRS; + return Register(reg.index() - N_GPRS); } -inline int fpr(lir::RegisterPair* reg) +Register fpr(lir::RegisterPair* reg) { return fpr(reg->low); } @@ -104,19 +105,19 @@ uint32_t br(Register Rn) return 0xd61f0000 | (Rn.index() << 5); } -uint32_t fmovFdFn(int Fd, int Fn, unsigned size) +uint32_t fmovFdFn(Register Fd, Register Fn, unsigned size) { - return (size == 8 ? 0x1e604000 : 0x1e204000) | (Fn << 5) | Fd; + return (size == 8 ? 0x1e604000 : 0x1e204000) | (Fn.index() << 5) | Fd.index(); } -uint32_t fmovRdFn(Register Rd, int Fn, unsigned size) +uint32_t fmovRdFn(Register Rd, Register Fn, unsigned size) { - return (size == 8 ? 0x9e660000 : 0x1e260000) | (Fn << 5) | Rd.index(); + return (size == 8 ? 0x9e660000 : 0x1e260000) | (Fn.index() << 5) | Rd.index(); } -uint32_t fmovFdRn(int Fd, Register Rn, unsigned size) +uint32_t fmovFdRn(Register Fd, Register Rn, unsigned size) { - return (size == 8 ? 0x9e670000 : 0x1e270000) | (Rn.index() << 5) | Fd; + return (size == 8 ? 0x9e670000 : 0x1e270000) | (Rn.index() << 5) | Fd.index(); } uint32_t orr(Register Rd, Register Rn, Register Rm, unsigned size) @@ -162,6 +163,18 @@ uint32_t sub(Register Rd, Register Rn, Register Rm, unsigned size) return (size == 8 ? 0xcb000000 : 0x4b000000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } +uint32_t and_(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0x8a000000 : 0x0a000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t eor(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xca000000 : 0x4a000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + uint32_t madd(Register Rd, Register Rn, Register Rm, Register Ra, unsigned size) { return (size == 8 ? 0x9b000000 : 0x1b000000) @@ -185,36 +198,288 @@ uint32_t subi(Register Rd, Register Rn, int value, int shift, unsigned size) | (value << 10) | (Rn.index() << 5) | Rd.index(); } -uint32_t fabs(int Fd, int Fn, unsigned size) +uint32_t fabs_(Register Fd, Register Fn, unsigned size) { - return (size == 8 ? 0x1e60c000 : 0x1e20c000) | (Fn << 5) | Fd; + return (size == 8 ? 0x1e60c000 : 0x1e20c000) | (Fn.index() << 5) | Fd.index(); } -uint32_t fneg(int Fd, int Fn, unsigned size) +uint32_t fneg(Register Fd, Register Fn, unsigned size) { - return (size == 8 ? 0x1e614000 : 0x1e214000) | (Fn << 5) | Fd; + return (size == 8 ? 0x1e614000 : 0x1e214000) | (Fn.index() << 5) | Fd.index(); } -uint32_t fcvtSdDn(int Fd, int Fn) +uint32_t fsqrt(Register Fd, Register Fn, unsigned size) { - return 0x1e624000 | (Fn << 5) | Fd; + return (size == 8 ? 0x1e61c000 : 0x1e21c000) | (Fn.index() << 5) | Fd.index(); } -uint32_t fcvtDdSn(int Fd, int Fn) +uint32_t fadd(Register Fd, Register Fn, Register Fm, unsigned size) { - return 0x1e22c000 | (Fn << 5) | Fd; + return (size == 8 ? 0x1e602800 : 0x1e202800) | (Fm.index() << 16) + | (Fn.index() << 5) | Fd.index(); } -uint32_t fcvtasXdDn(Register Rd, int Fn) +uint32_t fsub(Register Fd, Register Fn, Register Fm, unsigned size) { - return 0x9e640000 | (Fn << 5) | Rd.index(); + return (size == 8 ? 0x1e603800 : 0x1e203800) | (Fm.index() << 16) + | (Fn.index() << 5) | Fd.index(); } -uint32_t fcvtasWdSn(Register Rd, int Fn) +uint32_t fmul(Register Fd, Register Fn, Register Fm, unsigned size) { - return 0x1e240000 | (Fn << 5) | Rd.index(); + return (size == 8 ? 0x1e600800 : 0x1e200800) | (Fm.index() << 16) + | (Fn.index() << 5) | Fd.index(); } +uint32_t fdiv(Register Fd, Register Fn, Register Fm, unsigned size) +{ + return (size == 8 ? 0x1e601800 : 0x1e201800) | (Fm.index() << 16) + | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fcvtSdDn(Register Fd, Register Fn) +{ + return 0x1e624000 | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fcvtDdSn(Register Fd, Register Fn) +{ + return 0x1e22c000 | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fcvtasXdDn(Register Rd, Register Fn) +{ + return 0x9e640000 | (Fn.index() << 5) | Rd.index(); +} + +uint32_t fcvtasWdSn(Register Rd, Register Fn) +{ + return 0x1e240000 | (Fn.index() << 5) | Rd.index(); +} + +uint32_t scvtfDdWn(Register Fd, Register Rn) +{ + return 0x1e620000 | (Rn.index() << 5) | Fd.index(); +} + +uint32_t scvtfSdWn(Register Fd, Register Rn) +{ + return 0x1e220000 | (Rn.index() << 5) | Fd.index(); +} + +uint32_t strFs(Register Fs, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xfc206800 : 0xbc206800) | (Rm.index() << 16) + | (Rn.index() << 5) | Fs.index(); +} + +uint32_t strb(Register Rs, Register Rn, Register Rm) +{ + return 0x38206800 | (Rm.index() << 16) | (Rn.index() << 5) | Rs.index(); +} + +uint32_t strh(Register Rs, Register Rn, Register Rm) +{ + return 0x78206800 | (Rm.index() << 16) | (Rn.index() << 5) | Rs.index(); +} + +uint32_t striFs(Register Fs, Register Rn, int offset, unsigned size) +{ + return (size == 8 ? 0xfc000000 : 0xbc000000) | (offset << 16) + | (Rn.index() << 5) | Fs.index(); +} + +uint32_t str(Register Rs, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xf8206800 : 0xb8206800) | (Rm.index() << 16) + | (Rn.index() << 5) | Rs.index(); +} + +uint32_t strbi(Register Rs, Register Rn, int offset) +{ + return 0x39000000 | (offset << 10) | (Rn.index() << 5) | Rs.index(); +} + +uint32_t strhi(Register Rs, Register Rn, int offset) +{ + return 0x79000000 | (offset << 10) | (Rn.index() << 5) | Rs.index(); +} + +uint32_t stri(Register Rs, Register Rn, int offset, unsigned size) +{ + return (size == 8 ? 0xb9000000 : 0xf9000000) | (offset << 10) + | (Rn.index() << 5) | Rs.index(); +} + +uint32_t ldrFd(Register Fd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xfc606800 : 0xbc606800) | (Rm.index() << 16) + | (Rn.index() << 5) | Fd.index(); +} + +uint32_t ldrb(Register Rd, Register Rn, Register Rm) +{ + return 0x38606800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrsb(Register Rd, Register Rn, Register Rm) +{ + return 0x38e06800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrh(Register Rd, Register Rn, Register Rm) +{ + return 0x78606800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrsh(Register Rd, Register Rn, Register Rm) +{ + return 0x78e06800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrsw(Register Rd, Register Rn, Register Rm) +{ + return 0xb8a06800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldr(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xf8606800 : 0xb8606800) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldriFd(Register Fd, Register Rn, int offset, unsigned size) +{ + return (size == 8 ? 0xfc400000 : 0xbc400000) | (offset << 16) + | (Rn.index() << 5) | Fd.index(); +} + +uint32_t ldrbi(Register Rd, Register Rn, int offset) +{ + return 0x39400000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrsbi(Register Rd, Register Rn, int offset) +{ + return 0x39c00000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrhi(Register Rd, Register Rn, int offset) +{ + return 0x79400000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrshi(Register Rd, Register Rn, int offset) +{ + return 0x79c00000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrswi(Register Rd, Register Rn, int offset) +{ + return 0xb9800000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldri(Register Rd, Register Rn, int offset, unsigned size) +{ + return (size == 8 ? 0xb9400000 : 0xf9400000) | (offset << 10) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t fcmp(Register Fn, Register Fm, unsigned size) +{ + return (size == 8 ? 0x1e602000 : 0x1e202000) | (Fm.index() << 16) + | (Fn.index() << 5); +} + +uint32_t neg(Register Rd, Register Rm, unsigned size) +{ + return (size == 8 ? 0xcb0003e0 : 0x4b0003e0) | (Rm.index() << 16) + | Rd.index(); +} + +uint32_t cmp(Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xeb00001f : 0x6b00001f) | (Rm.index() << 16) + | (Rn.index() << 5); +} + +uint32_t cmpi(Register Rn, int value, unsigned shift, unsigned size) +{ + return (size == 8 ? 0xf100001f : 0x7100001f) | (shift == 12 ? 0x400000 : 0) + | (value << 10) | (Rn.index() << 5); +} + +uint32_t b(int offset) +{ + return 0x14000000 | (offset >> 2); +} + +uint32_t bl(int offset) +{ + return 0x94000000 | (offset >> 2); +} + +uint32_t blr(Register Rn) +{ + return 0xd63f0000 | (Rn.index() << 5); +} + +uint32_t beq(int offset) +{ + return 0x54000000 | (offset >> 2); +} + +uint32_t bne(int offset) +{ + return 0x54000001 | (offset >> 2); +} + +uint32_t blt(int offset) +{ + return 0x5400000b | (offset >> 2); +} + +uint32_t bgt(int offset) +{ + return 0x5400000c | (offset >> 2); +} + +uint32_t ble(int offset) +{ + return 0x5400000d | (offset >> 2); +} + +uint32_t bge(int offset) +{ + return 0x5400000a | (offset >> 2); +} + +uint32_t bhi(int offset) +{ + return 0x54000008 | (offset >> 2); +} + +uint32_t bpl(int offset) +{ + return 0x54000005 | (offset >> 2); +} + +uint32_t brk(int flag) +{ + return 0xd4200020 | (flag << 5); +} + +uint32_t dmb(int flag) +{ + return 0xd50330bf | (flag << 8); +} + +} // namespace + +namespace avian { +namespace codegen { +namespace arm { + using namespace avian::util; void shiftLeftR(Context* c, @@ -533,9 +798,9 @@ void int2FloatRR(Context* c, lir::RegisterPair* b) { if (size == 8) { - append(c, scvtfDdWn(fpr(b), b->low)); + append(c, scvtfDdWn(fpr(a), b->low)); } else { - append(c, scvtfSdWn(fpr(b), b->low)); + append(c, scvtfSdWn(fpr(a), b->low)); } } @@ -554,7 +819,7 @@ void floatAddR(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - append(c, fadd(fpr, dst, fpr(b), fpr(a), size)); + append(c, fadd(fpr(dst), fpr(b), fpr(a), size)); } void floatSubtractR(Context* c, @@ -563,7 +828,7 @@ void floatSubtractR(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - append(c, fsub(fpr, dst, fpr(b), fpr(a), size)); + append(c, fsub(fpr(dst), fpr(b), fpr(a), size)); } void floatMultiplyR(Context* c, @@ -572,7 +837,7 @@ void floatMultiplyR(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - append(c, fmul(fpr, dst, fpr(b), fpr(a), size)); + append(c, fmul(fpr(dst), fpr(b), fpr(a), size)); } void floatDivideR(Context* c, @@ -581,19 +846,19 @@ void floatDivideR(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - append(c, fdiv(fpr, dst, fpr(b), fpr(a), size)); + append(c, fdiv(fpr(dst), fpr(b), fpr(a), size)); } -int normalize(Context* c, - int offset, - int index, - unsigned scale, - bool* preserveIndex, - bool* release) +Register normalize(Context* c, + int offset, + Register index, + unsigned scale, + bool* preserveIndex, + bool* release) { if (offset != 0 or scale != 1) { - lir::Register normalizedIndex( - *preserveIndex ? con->client->acquireTemporary(GPR_MASK) : index); + lir::RegisterPair normalizedIndex( + *preserveIndex ? c->client->acquireTemporary(GPR_MASK) : index); if (*preserveIndex) { *release = true; @@ -602,10 +867,10 @@ int normalize(Context* c, *release = false; } - int scaled; + Register scaled; if (scale != 1) { - lir::Register unscaledIndex(index); + lir::RegisterPair unscaledIndex(index); ResolvedPromise scalePromise(log(scale)); lir::Constant scaleConstant(&scalePromise); @@ -622,12 +887,12 @@ int normalize(Context* c, } if (offset != 0) { - lir::Register untranslatedIndex(scaled); + lir::RegisterPair untranslatedIndex(scaled); ResolvedPromise offsetPromise(offset); lir::Constant offsetConstant(&offsetPromise); - lir::Register tmp(con->client->acquireTemporary(GPR_MASK)); + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); moveCR(c, vm::TargetBytesPerWord, &offsetConstant, @@ -638,7 +903,7 @@ int normalize(Context* c, &tmp, &untranslatedIndex, &normalizedIndex); - con->client->releaseTemporary(tmp.low); + c->client->releaseTemporary(tmp.low); } return normalizedIndex.low; @@ -651,15 +916,21 @@ int normalize(Context* c, void store(Context* c, unsigned size, lir::RegisterPair* src, - int base, + Register base, int offset, - int index, + Register index, unsigned scale, bool preserveIndex) { - if (index != lir::NoRegister) { + if (index != NoRegister) { bool release; - int normalized + + // todo: browsing the instruction set, it looks like we could do a + // scaled store or load in a single instruction if the offset is + // zero, and we could simplify things for the case of non-zero + // offsets also + + Register normalized = normalize(c, offset, index, scale, &preserveIndex, &release); if (isFpr(src)) { @@ -726,7 +997,7 @@ void store(Context* c, } } } else { - lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); ResolvedPromise offsetPromise(offset); lir::Constant offsetConstant(&offsetPromise); moveCR(c, @@ -755,18 +1026,18 @@ void moveRM(Context* c, void load(Context* c, unsigned srcSize, - int base, + Register base, int offset, - int index, + Register index, unsigned scale, unsigned dstSize, lir::RegisterPair* dst, bool preserveIndex, bool signExtend) { - if (index != lir::NoRegister) { + if (index != NoRegister) { bool release; - int normalized + Register normalized = normalize(c, offset, index, scale, &preserveIndex, &release); if (isFpr(dst)) { // FPR load @@ -819,7 +1090,7 @@ void load(Context* c, switch (srcSize) { case 4: case 8: - append(c, ldriFd(fpr(dst->low), base, offset)); + append(c, ldriFd(fpr(dst->low), base, offset, srcSize)); break; default: @@ -848,7 +1119,7 @@ void load(Context* c, if (signExtend and srcSize == 4 and dstSize == 8) { append(c, ldrswi(dst->low, base, offset)); } else { - append(c, ldri(dst->low, base, offset, size)); + append(c, ldri(dst->low, base, offset, srcSize)); } break; @@ -857,7 +1128,7 @@ void load(Context* c, } } } else { - lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); ResolvedPromise offsetPromise(offset); lir::Constant offsetConstant(&offsetPromise); moveCR(c, @@ -927,7 +1198,7 @@ void andC(Context* c, if (~v) { bool useTemporary = b->low == dst->low; - lir::Register tmp(dst->low); + lir::RegisterPair tmp(dst->low); if (useTemporary) { tmp.low = c->client->acquireTemporary(GPR_MASK); } @@ -972,7 +1243,7 @@ void moveAR(Context* c, lir::Constant constant(src->address); moveCR(c, srcSize, &constant, dstSize, dst); - lir::Memory memory(dst->low, 0, -1, 0); + lir::Memory memory(dst->low, 0, NoRegister, 0); moveMR(c, dstSize, &memory, dstSize, dst); } @@ -995,7 +1266,7 @@ void compareRR(Context* c, void compareCR(Context* c, unsigned aSize, lir::Constant* a, - unsigned bSize, + unsigned bSize UNUSED, lir::RegisterPair* b) { assertT(c, aSize == bSize); @@ -1003,9 +1274,9 @@ void compareCR(Context* c, int32_t v = a->value->value(); if (v) { if (v > 0 and v < 0x1000) { - append(c, cmpi(b->low, v, 0, size)); + append(c, cmpi(b->low, v, 0, aSize)); } else if (v > 0 and v < 0x1000000 and v % 0x1000 == 0) { - append(c, cmpi(b->low, v >> 12, 12, size)); + append(c, cmpi(b->low, v >> 12, 12, aSize)); } else { // todo abort(c); @@ -1021,7 +1292,7 @@ void compareCM(Context* c, { assertT(c, aSize == bSize); - lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); moveMR(c, bSize, b, bSize, &tmp); compareCR(c, aSize, a, bSize, &tmp); c->client->releaseTemporary(tmp.low); @@ -1035,7 +1306,7 @@ void compareRM(Context* c, { assertT(c, aSize == bSize); - lir::Register tmp(c->client->acquireTemporary(GPR_MASK)); + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); moveMR(c, bSize, b, bSize, &tmp); compareRR(c, aSize, a, bSize, &tmp); c->client->releaseTemporary(tmp.low); @@ -1083,7 +1354,7 @@ int32_t branch(Context* c, lir::TernaryOperation op) void conditional(Context* c, int32_t branch, lir::Constant* target) { - appendOffsetTask(c, target->value, offsetPromise(con)); + appendOffsetTask(c, target->value, offsetPromise(c)); append(c, branch); } @@ -1149,7 +1420,7 @@ ShiftMaskPromise* shiftMaskPromise(Context* c, unsigned shift, int64_t mask) { - return new (con->zone) ShiftMaskPromise(base, shift, mask); + return new (c->zone) ShiftMaskPromise(base, shift, mask); } void moveCM(Context* c, @@ -1170,10 +1441,10 @@ void moveCM(Context* c, } break; default: - lir::Register tmp(con->client->acquireTemporary(GPR_MASK)); + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); moveCR(c, srcSize, src, dstSize, &tmp); moveRM(c, dstSize, &tmp, dstSize, dst); - con->client->releaseTemporary(tmp.low); + c->client->releaseTemporary(tmp.low); } } @@ -1206,7 +1477,8 @@ void longCallC(Context* c, unsigned size UNUSED, lir::Constant* target) { assertT(c, size == vm::TargetBytesPerWord); - lir::Register tmp(9); // a non-arg reg that we don't mind clobbering + lir::RegisterPair tmp( + Register(9)); // a non-arg reg that we don't mind clobbering moveCR2(c, vm::TargetBytesPerWord, target, &tmp, offsetPromise(c)); callR(c, vm::TargetBytesPerWord, &tmp); } @@ -1215,7 +1487,8 @@ void longJumpC(Context* c, unsigned size UNUSED, lir::Constant* target) { assertT(c, size == vm::TargetBytesPerWord); - lir::Register tmp(9); // a non-arg reg that we don't mind clobbering + lir::RegisterPair tmp( + Register(9)); // a non-arg reg that we don't mind clobbering moveCR2(c, vm::TargetBytesPerWord, target, &tmp, offsetPromise(c)); jumpR(c, vm::TargetBytesPerWord, &tmp); } @@ -1246,7 +1519,7 @@ void trap(Context* c) void memoryBarrier(Context* c) { - append(c, dmb()); + append(c, dmb(0xF)); } void loadBarrier(Context* c) @@ -1264,6 +1537,21 @@ void storeLoadBarrier(Context* c) memoryBarrier(c); } +bool needJump(MyBlock*) +{ + return false; +} + +unsigned padding(MyBlock*, unsigned) +{ + return 0; +} + +void resolve(MyBlock*) +{ + // ignore +} + } // namespace arm } // namespace codegen } // namespace avian diff --git a/src/codegen/target/arm/registers.h b/src/codegen/target/arm/registers.h index 33784dbb1b..18622a81e5 100644 --- a/src/codegen/target/arm/registers.h +++ b/src/codegen/target/arm/registers.h @@ -14,6 +14,8 @@ #include #include +#include "avian/environment.h" + namespace avian { namespace codegen { namespace arm { @@ -21,11 +23,10 @@ namespace arm { const uint64_t MASK_LO32 = 0xffffffff; const unsigned MASK_LO8 = 0xff; -#ifdef ARCH_arm64 +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 constexpr Register ThreadRegister(19); constexpr Register StackRegister(31); constexpr Register LinkRegister(30); -constexpr Register LinkRegister(29); constexpr Register ProgramCounter(0xFE); // i.e. unaddressable const int N_GPRS = 32; From 9158ee39c00c35f42090f75cc496ef97391fd5ba Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 23 Dec 2014 16:57:40 -0700 Subject: [PATCH 05/20] remove 64-bit code from compile-arm.S since it's moved to compile-arm64.S --- src/compile-arm.S | 104 ---------------------------------------------- 1 file changed, 104 deletions(-) diff --git a/src/compile-arm.S b/src/compile-arm.S index 432b79e646..2f566f9558 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -23,108 +23,6 @@ # define GLOBAL(x) x #endif -#ifdef __aarch64__ - -.globl GLOBAL(vmInvoke) -.align 2 -GLOBAL(vmInvoke): - // arguments: - // x0 : thread - // x1 : function - // x2 : arguments - // w3 : argumentFootprint - // w4 : frameSize (not used) - // w5 : returnType - - // allocate frame - stp x29, x30, [sp,#-96]! - - // save callee-saved register values - stp x19, x20, [sp,#16] - stp x21, x22, [sp,#32] - stp x23, x24, [sp,#48] - stp x25, x26, [sp,#64] - stp x27, x28, [sp,#80] - - // save return type - str w5, [sp,#-16]! - - mov x5, sp - str x5, [x0,#TARGET_THREAD_SCRATCH] - - // copy arguments into place - sub sp, sp, w3 - mov x5, #0 - b LOCAL(vmInvoke_argumentTest) - -LOCAL(vmInvoke_argumentLoop): - ldr x5, [x2, x4] - str x5, [sp, x4] - add x4, x4, #BYTES_PER_WORD - -LOCAL(vmInvoke_argumentTest): - cmp x4, x3 - blt LOCAL(vmInvoke_argumentLoop) - - // we use x19 to hold the thread pointer, by convention - mov x19, x0 - - // load and call function address - blr x1 - -.globl GLOBAL(vmInvoke_returnAddress) -.align 2 -GLOBAL(vmInvoke_returnAddress): - // restore stack pointer - ldr x5, [x19, #TARGET_THREAD_SCRATCH] - mov sp, x5 - - // clear MyThread::stack to avoid confusing another thread calling - // java.lang.Thread.getStackTrace on this one. See - // MyProcess::getStackTrace in compile.cpp for details on how we get - // a reliable stack trace from a thread that might be interrupted at - // any point in its execution. - mov x5, #0 - str x5, [x19, #TARGET_THREAD_STACK] - -.globl GLOBAL(vmInvoke_safeStack) -.align 2 -GLOBAL(vmInvoke_safeStack): - -#ifdef AVIAN_CONTINUATIONS -#error todo -#endif // AVIAN_CONTINUATIONS - - mov x5, #0 - str x5, [x19, #TARGET_THREAD_STACK] - - // restore return type - ldr w5, [sp], #4 - - // restore callee-saved register values - ldp x19, x20, [sp,#16] - ldp x21, x22, [sp,#32] - ldp x23, x24, [sp,#48] - ldp x25, x26, [sp,#64] - ldp x27, x28, [sp,#80] - ldp x29, x30, [sp],#96 - -LOCAL(vmInvoke_return): - br x30 - -.globl GLOBAL(vmJumpAndInvoke) -.align 2 -GLOBAL(vmJumpAndInvoke): -#ifdef AVIAN_CONTINUATIONS -#error todo -#else // not AVIAN_CONTINUATIONS - // vmJumpAndInvoke should only be called when continuations are - // enabled, so we force a crash if we reach here: - brk 0 -#endif // not AVIAN_CONTINUATIONS - -#elif defined __arm__ - #define CONTINUATION_NEXT 4 #define CONTINUATION_ADDRESS 16 #define CONTINUATION_RETURN_ADDRESS_OFFSET 20 @@ -355,5 +253,3 @@ LOCAL(vmJumpAndInvoke_getAddress_word): mov r1,#0 ldr r1,[r1] #endif // not AVIAN_CONTINUATIONS - -#endif // __arm__ From cbea966d1dceede318c814563ef38578c3f5a5fe Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 23 Dec 2014 16:59:04 -0700 Subject: [PATCH 06/20] various ARM64 JIT bugfixes Three of the tests now pass. Yay! --- src/codegen/target/arm/assembler.cpp | 11 ++- src/codegen/target/arm/fixup.cpp | 122 ++++++++++++++++++++++-- src/codegen/target/arm/fixup.h | 4 + src/codegen/target/arm/operations32.cpp | 95 ------------------ src/codegen/target/arm/operations64.cpp | 116 +++++++++++++--------- src/compile-arm64.S | 16 ++-- 6 files changed, 209 insertions(+), 155 deletions(-) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index 23b07ef201..b4046223e1 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -946,11 +946,20 @@ class MyAssembler : public Assembler { unsigned instruction = o->block->start + padding(o->block, o->offset) + o->offset; + int32_t* p = reinterpret_cast(dst + instruction); + +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 + int32_t v = entry - instruction; + expect(&con, v == (v & PoolOffsetMask)); + + const int32_t mask = (PoolOffsetMask >> 2) << 5; + *p = (((v >> 2) << 5) & mask) | ((~mask) & *p); +#else int32_t v = (entry - 8) - instruction; expect(&con, v == (v & PoolOffsetMask)); - int32_t* p = reinterpret_cast(dst + instruction); *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); +#endif poolSize += TargetBytesPerWord; } diff --git a/src/codegen/target/arm/fixup.cpp b/src/codegen/target/arm/fixup.cpp index e1d41b6eb4..7f5c0ff277 100644 --- a/src/codegen/target/arm/fixup.cpp +++ b/src/codegen/target/arm/fixup.cpp @@ -92,14 +92,27 @@ bool bounded(int right, int left, int32_t v) void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) { - // ARM's PC is two words ahead, and branches drop the bottom 2 bits. - int32_t v = (reinterpret_cast(value) - (instruction + 8)) >> 2; - - int32_t mask; - expect(s, bounded(0, 8, v)); - mask = 0xFFFFFF; - int32_t* p = reinterpret_cast(instruction); + +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 + int32_t v; + int32_t mask; + if ((*p >> 24) == 0x54) { + // conditional branch + v = ((reinterpret_cast(value) - instruction) >> 2) << 5; + mask = 0xFFFFE0; + } else { + // unconditional branch + v = (reinterpret_cast(value) - instruction) >> 2; + mask = 0x3FFFFFF; + } +#else + int32_t v = (reinterpret_cast(value) - (instruction + 8)) >> 2; + const int32_t mask = 0xFFFFFF; +#endif + + expect(s, bounded(0, 8, v)); + *p = (v & mask) | ((~mask) & *p); return instruction + 4; @@ -214,6 +227,101 @@ void appendPoolEvent(Context* con, b->poolEventTail = e; } +bool needJump(MyBlock* b) +{ + return b->next or b->size != (b->size & PoolOffsetMask); +} + +unsigned padding(MyBlock* b, unsigned offset) +{ + unsigned total = 0; + for (PoolEvent* e = b->poolEventHead; e; e = e->next) { + if (e->offset <= offset) { + if (needJump(b)) { + total += vm::TargetBytesPerWord; + } + for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) { + total += vm::TargetBytesPerWord; + } + } else { + break; + } + } + return total; +} + +void resolve(MyBlock* b) +{ + Context* con = b->context; + + if (b->poolOffsetHead) { + if (con->poolOffsetTail) { + con->poolOffsetTail->next = b->poolOffsetHead; + } else { + con->poolOffsetHead = b->poolOffsetHead; + } + con->poolOffsetTail = b->poolOffsetTail; + } + + if (con->poolOffsetHead) { + bool append; + if (b->next == 0 or b->next->poolEventHead) { + append = true; + } else { + int32_t v + = (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8) + - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); + + append = (v != (v & PoolOffsetMask)); + + if (DebugPool) { + fprintf(stderr, + "current %p %d %d next %p %d %d\n", + b, + b->start, + b->size, + b->next, + b->start + b->size, + b->next->size); + fprintf(stderr, + "offset %p %d is of distance %d to next block; append? %d\n", + con->poolOffsetHead, + con->poolOffsetHead->offset, + v, + append); + } + } + + if (append) { +#ifndef NDEBUG + int32_t v + = (b->start + b->size - 8) + - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); + + expect(con, v == (v & PoolOffsetMask)); +#endif // not NDEBUG + + appendPoolEvent( + con, b, b->size, con->poolOffsetHead, con->poolOffsetTail); + + if (DebugPool) { + for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) { + fprintf(stderr, + "include %p %d in pool event %p at offset %d in block %p\n", + o, + o->offset, + b->poolEventTail, + b->size, + b); + } + } + + con->poolOffsetHead = 0; + con->poolOffsetTail = 0; + } + } +} + } // namespace arm } // namespace codegen } // namespace avian diff --git a/src/codegen/target/arm/fixup.h b/src/codegen/target/arm/fixup.h index 5460295d95..2e9c0aca01 100644 --- a/src/codegen/target/arm/fixup.h +++ b/src/codegen/target/arm/fixup.h @@ -27,7 +27,11 @@ namespace arm { const bool DebugPool = false; +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 +const int32_t PoolOffsetMask = 0x1FFFFF; +#else const int32_t PoolOffsetMask = 0xFFF; +#endif class Task { public: diff --git a/src/codegen/target/arm/operations32.cpp b/src/codegen/target/arm/operations32.cpp index 5a9f5e8a0e..07dd7f0175 100644 --- a/src/codegen/target/arm/operations32.cpp +++ b/src/codegen/target/arm/operations32.cpp @@ -181,101 +181,6 @@ void unsignedShiftRightC(Context* con, } } -bool needJump(MyBlock* b) -{ - return b->next or b->size != (b->size & PoolOffsetMask); -} - -unsigned padding(MyBlock* b, unsigned offset) -{ - unsigned total = 0; - for (PoolEvent* e = b->poolEventHead; e; e = e->next) { - if (e->offset <= offset) { - if (needJump(b)) { - total += vm::TargetBytesPerWord; - } - for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) { - total += vm::TargetBytesPerWord; - } - } else { - break; - } - } - return total; -} - -void resolve(MyBlock* b) -{ - Context* con = b->context; - - if (b->poolOffsetHead) { - if (con->poolOffsetTail) { - con->poolOffsetTail->next = b->poolOffsetHead; - } else { - con->poolOffsetHead = b->poolOffsetHead; - } - con->poolOffsetTail = b->poolOffsetTail; - } - - if (con->poolOffsetHead) { - bool append; - if (b->next == 0 or b->next->poolEventHead) { - append = true; - } else { - int32_t v - = (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8) - - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); - - append = (v != (v & PoolOffsetMask)); - - if (DebugPool) { - fprintf(stderr, - "current %p %d %d next %p %d %d\n", - b, - b->start, - b->size, - b->next, - b->start + b->size, - b->next->size); - fprintf(stderr, - "offset %p %d is of distance %d to next block; append? %d\n", - con->poolOffsetHead, - con->poolOffsetHead->offset, - v, - append); - } - } - - if (append) { -#ifndef NDEBUG - int32_t v - = (b->start + b->size - 8) - - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); - - expect(con, v == (v & PoolOffsetMask)); -#endif // not NDEBUG - - appendPoolEvent( - con, b, b->size, con->poolOffsetHead, con->poolOffsetTail); - - if (DebugPool) { - for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) { - fprintf(stderr, - "include %p %d in pool event %p at offset %d in block %p\n", - o, - o->offset, - b->poolEventTail, - b->size, - b); - } - } - - con->poolOffsetHead = 0; - con->poolOffsetTail = 0; - } - } -} - void jumpR(Context* con, unsigned size UNUSED, lir::RegisterPair* target) { assertT(con, size == vm::TargetBytesPerWord); diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index c3058102df..32a31cf2f5 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -125,9 +125,16 @@ uint32_t orr(Register Rd, Register Rn, Register Rm, unsigned size) return (size == 8 ? 0xaa0003e0 : 0x2a0003e0) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } +uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size) +{ + return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0) + | (value << 10) | (Rn.index() << 5) | Rd.index(); +} + uint32_t mov(Register Rd, Register Rn, unsigned size) { - return orr(Rd, Register(31), Rn, size); + return Rn.index() == 31 ? addi(Rd, Rn, 0, 0, size) + : orr(Rd, Register(31), Rn, size); } uint32_t movz(Register Rd, int value, unsigned shift, unsigned size) @@ -150,7 +157,8 @@ uint32_t movk(Register Rd, int value, unsigned shift, unsigned size) uint32_t ldrPCRel(Register Rd, int offset, unsigned size) { - return (size == 8 ? 0x58000000 : 0x18000000) | (offset << 5) | Rd.index(); + return (size == 8 ? 0x58000000 : 0x18000000) | ((offset >> 2) << 5) + | Rd.index(); } uint32_t add(Register Rd, Register Rn, Register Rm, unsigned size) @@ -186,12 +194,6 @@ uint32_t mul(Register Rd, Register Rn, Register Rm, unsigned size) return madd(Rd, Rn, Rm, Register(31), size); } -uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size) -{ - return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0) - | (value << 10) | (Rn.index() << 5) | Rd.index(); -} - uint32_t subi(Register Rd, Register Rn, int value, int shift, unsigned size) { return (size == 8 ? 0xd1000000 : 0x51000000) | (shift ? 0x400000 : 0) @@ -307,8 +309,8 @@ uint32_t strhi(Register Rs, Register Rn, int offset) uint32_t stri(Register Rs, Register Rn, int offset, unsigned size) { - return (size == 8 ? 0xb9000000 : 0xf9000000) | (offset << 10) - | (Rn.index() << 5) | Rs.index(); + return (size == 8 ? 0xf9000000 : 0xb9000000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Rs.index(); } uint32_t ldrFd(Register Fd, Register Rn, Register Rm, unsigned size) @@ -381,8 +383,8 @@ uint32_t ldrswi(Register Rd, Register Rn, int offset) uint32_t ldri(Register Rd, Register Rn, int offset, unsigned size) { - return (size == 8 ? 0xb9400000 : 0xf9400000) | (offset << 10) - | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0xf9400000 : 0xb9400000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t fcmp(Register Fn, Register Fm, unsigned size) @@ -400,7 +402,7 @@ uint32_t neg(Register Rd, Register Rm, unsigned size) uint32_t cmp(Register Rn, Register Rm, unsigned size) { return (size == 8 ? 0xeb00001f : 0x6b00001f) | (Rm.index() << 16) - | (Rn.index() << 5); + | (Rn.index() == 31 ? 0x2063ff : (Rn.index() << 5)); } uint32_t cmpi(Register Rn, int value, unsigned shift, unsigned size) @@ -426,42 +428,42 @@ uint32_t blr(Register Rn) uint32_t beq(int offset) { - return 0x54000000 | (offset >> 2); + return 0x54000000 | ((offset >> 2) << 5); } uint32_t bne(int offset) { - return 0x54000001 | (offset >> 2); + return 0x54000001 | ((offset >> 2) << 5); } uint32_t blt(int offset) { - return 0x5400000b | (offset >> 2); + return 0x5400000b | ((offset >> 2) << 5); } uint32_t bgt(int offset) { - return 0x5400000c | (offset >> 2); + return 0x5400000c | ((offset >> 2) << 5); } uint32_t ble(int offset) { - return 0x5400000d | (offset >> 2); + return 0x5400000d | ((offset >> 2) << 5); } uint32_t bge(int offset) { - return 0x5400000a | (offset >> 2); + return 0x5400000a | ((offset >> 2) << 5); } uint32_t bhi(int offset) { - return 0x54000008 | (offset >> 2); + return 0x54000008 | ((offset >> 2) << 5); } uint32_t bpl(int offset) { - return 0x54000005 | (offset >> 2); + return 0x54000005 | ((offset >> 2) << 5); } uint32_t brk(int flag) @@ -966,7 +968,7 @@ void store(Context* c, if (release) { c->client->releaseTemporary(normalized); } - } else if (abs(offset) == (abs(offset) & 0xFF)) { + } else if (abs(offset) == (abs(offset) & 0xFFF)) { if (isFpr(src)) { switch (size) { case 4: @@ -988,7 +990,12 @@ void store(Context* c, break; case 4: + assertT(c, offset == (offset & (~3))); + append(c, stri(src->low, base, offset, size)); + break; + case 8: + assertT(c, offset == (offset & (~7))); append(c, stri(src->low, base, offset, size)); break; @@ -1020,8 +1027,21 @@ void moveRM(Context* c, { assertT(c, srcSize == dstSize); - store( - c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true); + if (src->low.index() == 31) { + assertT(c, c->client == 0); // the compiler should never ask us to + // store the SP; we'll only get here + // when assembling a thunk + + lir::RegisterPair tmp(Register(9)); // we're in a thunk, so we can + // clobber this + + moveRR(c, srcSize, src, srcSize, &tmp); + store( + c, srcSize, &tmp, dst->base, dst->offset, dst->index, dst->scale, true); + } else { + store( + c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true); + } } void load(Context* c, @@ -1085,7 +1105,7 @@ void load(Context* c, if (release) { c->client->releaseTemporary(normalized); } - } else if (abs(offset) == (abs(offset) & 0xFF)) { + } else if (abs(offset) == (abs(offset) & 0xFFF)) { if (isFpr(dst)) { switch (srcSize) { case 4: @@ -1119,6 +1139,7 @@ void load(Context* c, if (signExtend and srcSize == 4 and dstSize == 8) { append(c, ldrswi(dst->low, base, offset)); } else { + assertT(c, offset == (offset & (srcSize == 8 ? (~7) : (~3)))); append(c, ldri(dst->low, base, offset, srcSize)); } break; @@ -1238,7 +1259,8 @@ void moveAR(Context* c, unsigned dstSize, lir::RegisterPair* dst) { - assertT(c, srcSize == TargetBytesPerWord and dstSize == TargetBytesPerWord); + assertT(c, srcSize == vm::TargetBytesPerWord + and dstSize == vm::TargetBytesPerWord); lir::Constant constant(src->address); moveCR(c, srcSize, &constant, dstSize, dst); @@ -1312,6 +1334,20 @@ void compareRM(Context* c, c->client->releaseTemporary(tmp.low); } +void compareMR(Context* c, + unsigned aSize, + lir::Memory* a, + unsigned bSize, + lir::RegisterPair* b) +{ + assertT(c, aSize == bSize); + + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveMR(c, aSize, a, aSize, &tmp); + compareRR(c, aSize, &tmp, bSize, b); + c->client->releaseTemporary(tmp.low); +} + int32_t branch(Context* c, lir::TernaryOperation op) { switch (op) { @@ -1397,8 +1433,17 @@ void branchRM(Context* c, assertT(c, not isFloatBranch(op)); assertT(c, size <= vm::TargetBytesPerWord); - compareRM(c, size, a, size, b); - branch(c, op, target); + if (a->low.index() == 31) { + // stack overflow checks need to compare to the stack pointer, but + // we can only encode that in the opposite operand order we're + // given, so we need to reverse everything: + assertT(c, op == lir::JumpIfGreaterOrEqual); + compareMR(c, size, b, size, a); + branch(c, lir::JumpIfLess, target); + } else { + compareRM(c, size, a, size, b); + branch(c, op, target); + } } void branchCM(Context* c, @@ -1537,21 +1582,6 @@ void storeLoadBarrier(Context* c) memoryBarrier(c); } -bool needJump(MyBlock*) -{ - return false; -} - -unsigned padding(MyBlock*, unsigned) -{ - return 0; -} - -void resolve(MyBlock*) -{ - // ignore -} - } // namespace arm } // namespace codegen } // namespace avian diff --git a/src/compile-arm64.S b/src/compile-arm64.S index 65f76df6f3..744e6cd71e 100644 --- a/src/compile-arm64.S +++ b/src/compile-arm64.S @@ -16,11 +16,11 @@ #define BYTES_PER_WORD 4 #define LOCAL(x) .L##x - + #ifdef __APPLE__ # define GLOBAL(x) _##x #else -# define GLOBAL(x) x +# define GLOBAL(x) x #endif #define CONTINUATION_NEXT 4 @@ -29,7 +29,7 @@ #define CONTINUATION_FRAME_POINTER_OFFSET 24 #define CONTINUATION_LENGTH 28 #define CONTINUATION_BODY 32 - + .globl GLOBAL(vmInvoke) .align 2 GLOBAL(vmInvoke): @@ -89,8 +89,7 @@ GLOBAL(vmInvoke_returnAddress): // MyProcess::getStackTrace in compile.cpp for details on how we get // a reliable stack trace from a thread that might be interrupted at // any point in its execution. - mov x5, #0 - str x5, [x19, #TARGET_THREAD_STACK] + str xzr, [x19, #TARGET_THREAD_STACK] .globl GLOBAL(vmInvoke_safeStack) .align 2 @@ -100,11 +99,10 @@ GLOBAL(vmInvoke_safeStack): #error todo #endif // AVIAN_CONTINUATIONS - mov x5, #0 - str x5, [x19, #TARGET_THREAD_STACK] + str xzr, [x19, #TARGET_THREAD_STACK] // restore return type - ldr w5, [sp], #4 + ldr w5, [sp,#16]! // restore callee-saved register values ldp x19, x20, [sp,#16] @@ -112,7 +110,7 @@ GLOBAL(vmInvoke_safeStack): ldp x23, x24, [sp,#48] ldp x25, x26, [sp,#64] ldp x27, x28, [sp,#80] - ldp x29, x30, [sp],#96 + ldp x29, x30, [sp,#96]! LOCAL(vmInvoke_return): br x30 From 78735b35a8c8be1eb6d283f33eabca578f97b9be Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 23 Dec 2014 21:09:43 -0700 Subject: [PATCH 07/20] more ARM64 bugfixes, more passing tests --- src/codegen/target/arm/assembler.cpp | 23 +++++++++--- src/codegen/target/arm/operations64.cpp | 47 +++++++++++-------------- src/compile-arm64.S | 4 +-- 3 files changed, 42 insertions(+), 32 deletions(-) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index b4046223e1..a50c36d173 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -89,11 +89,11 @@ void nextFrame(ArchitectureContext* con, void** stack) { assertT(con, *ip >= start); - assertT(con, *ip <= start + (size / TargetBytesPerWord)); + assertT(con, *ip <= start + (size / 4)); uint32_t* instruction = static_cast(*ip); - if ((*start >> 20) == 0xe59) { + if ((*start >> 20) == (TargetBytesPerWord == 8 ? 0xf94 : 0xe59)) { // skip stack overflow check start += 3; } @@ -111,7 +111,8 @@ void nextFrame(ArchitectureContext* con, return; } - if (*instruction == 0xe12fff1e) { // return + if (*instruction == (TargetBytesPerWord == 8 ? 0xd61f03c0 : 0xe12fff1e)) { + // return *ip = link; return; } @@ -124,7 +125,21 @@ void nextFrame(ArchitectureContext* con, // check for post-non-tail-call stack adjustment of the form "sub // sp, sp, #offset": - if ((*instruction >> 12) == 0xe24dd) { + if (TargetBytesPerWord == 8 and (*instruction & 0xff0003ff) == 0xd10003ff) + { + unsigned value = (*instruction >> 10) & 0xfff; + unsigned shift = (*instruction >> 22) & 1; + switch (shift) { + case 0: + offset -= value; + break; + case 1: + offset -= value << 12; + break; + default: + abort(con); + } + } else if (TargetBytesPerWord == 4 and (*instruction >> 12) == 0xe24dd) { unsigned value = *instruction & 0xff; unsigned rotation = (*instruction >> 8) & 0xf; switch (rotation) { diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index 32a31cf2f5..a1c5e3c23b 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -378,7 +378,7 @@ uint32_t ldrshi(Register Rd, Register Rn, int offset) uint32_t ldrswi(Register Rd, Register Rn, int offset) { - return 0xb9800000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); + return 0xb9800000 | ((offset >> 2) << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t ldri(Register Rd, Register Rn, int offset, unsigned size) @@ -642,7 +642,7 @@ void moveCR2(Context* c, c->client->releaseTemporary(tmp.low); } else if (src->value->resolved()) { int64_t value = src->value->value(); - if (value > 0) { + if (value >= 0) { append(c, movz(dst->low, value & 0xFFFF, 0, size)); if (value >> 16) { append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); @@ -695,7 +695,7 @@ void subR(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - append(c, sub(dst->low, a->low, b->low, size)); + append(c, sub(dst->low, b->low, a->low, size)); } void addC(Context* c, @@ -1137,6 +1137,7 @@ void load(Context* c, case 4: case 8: if (signExtend and srcSize == 4 and dstSize == 8) { + assertT(c, offset == (offset & (~3))); append(c, ldrswi(dst->low, base, offset)); } else { assertT(c, offset == (offset & (srcSize == 8 ? (~7) : (~3)))); @@ -1293,17 +1294,24 @@ void compareCR(Context* c, { assertT(c, aSize == bSize); - int32_t v = a->value->value(); - if (v) { - if (v > 0 and v < 0x1000) { + if (!isFpr(b) && a->value->resolved()) { + int32_t v = a->value->value(); + if (v == 0) { + append(c, cmp(b->low, Register(31), aSize)); + return; + } else if (v > 0 and v < 0x1000) { append(c, cmpi(b->low, v, 0, aSize)); + return; } else if (v > 0 and v < 0x1000000 and v % 0x1000 == 0) { append(c, cmpi(b->low, v >> 12, 12, aSize)); - } else { - // todo - abort(c); + return; } } + + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveCR(c, aSize, a, bSize, &tmp); + compareRR(c, bSize, &tmp, bSize, b); + c->client->releaseTemporary(tmp.low); } void compareCM(Context* c, @@ -1474,23 +1482,10 @@ void moveCM(Context* c, unsigned dstSize, lir::Memory* dst) { - switch (dstSize) { - case 8: { - lir::Constant srcHigh(shiftMaskPromise(c, src->value, 32, 0xFFFFFFFF)); - lir::Constant srcLow(shiftMaskPromise(c, src->value, 0, 0xFFFFFFFF)); - - lir::Memory dstLow(dst->base, dst->offset + 4, dst->index, dst->scale); - - moveCM(c, 4, &srcLow, 4, &dstLow); - moveCM(c, 4, &srcHigh, 4, dst); - } break; - - default: - lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); - moveCR(c, srcSize, src, dstSize, &tmp); - moveRM(c, dstSize, &tmp, dstSize, dst); - c->client->releaseTemporary(tmp.low); - } + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveCR(c, srcSize, src, dstSize, &tmp); + moveRM(c, dstSize, &tmp, dstSize, dst); + c->client->releaseTemporary(tmp.low); } void negateRR(Context* c, diff --git a/src/compile-arm64.S b/src/compile-arm64.S index 744e6cd71e..e319aa744e 100644 --- a/src/compile-arm64.S +++ b/src/compile-arm64.S @@ -102,7 +102,7 @@ GLOBAL(vmInvoke_safeStack): str xzr, [x19, #TARGET_THREAD_STACK] // restore return type - ldr w5, [sp,#16]! + ldr w5, [sp],#16 // restore callee-saved register values ldp x19, x20, [sp,#16] @@ -110,7 +110,7 @@ GLOBAL(vmInvoke_safeStack): ldp x23, x24, [sp,#48] ldp x25, x26, [sp,#64] ldp x27, x28, [sp,#80] - ldp x29, x30, [sp,#96]! + ldp x29, x30, [sp],#96 LOCAL(vmInvoke_return): br x30 From 85fcbb82b31fd9c6d18f6dcc398421ad6bce8d64 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Wed, 24 Dec 2014 08:12:36 -0700 Subject: [PATCH 08/20] more ARM64 bugfixes --- src/codegen/target/arm/operations64.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index a1c5e3c23b..f6fcddee5b 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -122,7 +122,7 @@ uint32_t fmovFdRn(Register Fd, Register Rn, unsigned size) uint32_t orr(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0xaa0003e0 : 0x2a0003e0) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0xaa000000 : 0x2a000000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size) @@ -368,12 +368,12 @@ uint32_t ldrsbi(Register Rd, Register Rn, int offset) uint32_t ldrhi(Register Rd, Register Rn, int offset) { - return 0x79400000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); + return 0x79400000 | ((offset >> 1) << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t ldrshi(Register Rd, Register Rn, int offset) { - return 0x79c00000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); + return 0x79c00000 | ((offset >> 1) << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t ldrswi(Register Rd, Register Rn, int offset) @@ -653,7 +653,7 @@ void moveCR2(Context* c, } } } - } else if (value < 0) { + } else { append(c, movn(dst->low, (~value) & 0xFFFF, 0, size)); if (~(value >> 16)) { append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); @@ -986,6 +986,7 @@ void store(Context* c, break; case 2: + assertT(c, offset == (offset & (~1))); append(c, strhi(src->low, base, offset)); break; @@ -1127,6 +1128,7 @@ void load(Context* c, break; case 2: + assertT(c, offset == (offset & (~1))); if (signExtend) { append(c, ldrshi(dst->low, base, offset)); } else { From 67f5461d82bf7f700b82fa1f3c34d5ae9d2a9af3 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Wed, 24 Dec 2014 10:05:20 -0700 Subject: [PATCH 09/20] more ARM64 bugfixes --- src/codegen/target/arm/operations64.cpp | 40 +++++++++++++++---------- src/compile-arm64.S | 4 +-- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index f6fcddee5b..b8f6c13221 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -38,17 +38,17 @@ void append(Context* c, uint32_t instruction) uint32_t lslv(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0x9ac12000 : 0x1ac02000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0x9ac02000 : 0x1ac02000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); } uint32_t ubfm(Register Rd, Register Rn, int r, int s, unsigned size) { - return (size == 8 ? 0xd3608000 : 0x53000000) | (r << 16) | (s << 10) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0xd3400000 : 0x53000000) | (r << 16) | (s << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t sbfm(Register Rd, Register Rn, int r, int s, unsigned size) { - return (size == 8 ? 0x93408000 : 0x13000000) | (r << 16) | (s << 10) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0x93400000 : 0x13000000) | (r << 16) | (s << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t lsli(Register Rd, Register Rn, int shift, unsigned size) @@ -501,9 +501,9 @@ void shiftLeftC(Context* c, { uint64_t value = a->value->value(); if (size == 4 and (value & 0x1F)) { - append(c, lsli(dst->low, b->low, value, 4)); + append(c, lsli(dst->low, b->low, value & 0x1F, 4)); } else if (size == 8 and (value & 0x3F)) { - append(c, lsli(dst->low, b->low, value, 8)); + append(c, lsli(dst->low, b->low, value & 0x3F, 8)); } else { moveRR(c, size, b, size, dst); } @@ -526,9 +526,9 @@ void shiftRightC(Context* c, { uint64_t value = a->value->value(); if (size == 4 and (value & 0x1F)) { - append(c, lsri(dst->low, b->low, value, 4)); + append(c, asri(dst->low, b->low, value & 0x1F, 4)); } else if (size == 8 and (value & 0x3F)) { - append(c, lsri(dst->low, b->low, value, 8)); + append(c, asri(dst->low, b->low, value & 0x3F, 8)); } else { moveRR(c, size, b, size, dst); } @@ -551,9 +551,9 @@ void unsignedShiftRightC(Context* c, { uint64_t value = a->value->value(); if (size == 4 and (value & 0x1F)) { - append(c, asri(dst->low, b->low, value, 4)); + append(c, lsri(dst->low, b->low, value & 0x1F, 4)); } else if (size == 8 and (value & 0x3F)) { - append(c, asri(dst->low, b->low, value, 8)); + append(c, lsri(dst->low, b->low, value & 0x3F, 8)); } else { moveRR(c, size, b, size, dst); } @@ -645,9 +645,13 @@ void moveCR2(Context* c, if (value >= 0) { append(c, movz(dst->low, value & 0xFFFF, 0, size)); if (value >> 16) { - append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); + if ((value >> 16) & 0xFFFF) { + append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); + } if (value >> 32) { - append(c, movk(dst->low, (value >> 32) & 0xFFFF, 32, size)); + if ((value >> 32) & 0xFFFF) { + append(c, movk(dst->low, (value >> 32) & 0xFFFF, 32, size)); + } if (value >> 48) { append(c, movk(dst->low, (value >> 48) & 0xFFFF, 48, size)); } @@ -656,9 +660,13 @@ void moveCR2(Context* c, } else { append(c, movn(dst->low, (~value) & 0xFFFF, 0, size)); if (~(value >> 16)) { - append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); + if (((value >> 16) & 0xFFFF) != 0xFFFF) { + append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); + } if (~(value >> 32)) { - append(c, movk(dst->low, (value >> 32) & 0xFFFF, 32, size)); + if (((value >> 32) & 0xFFFF) != 0xFFFF) { + append(c, movk(dst->low, (value >> 32) & 0xFFFF, 32, size)); + } if (~(value >> 48)) { append(c, movk(dst->low, (value >> 48) & 0xFFFF, 48, size)); } @@ -704,7 +712,7 @@ void addC(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - int32_t v = a->value->value(); + int64_t v = a->value->value(); if (v) { if (v > 0 and v < 0x1000) { append(c, addi(dst->low, b->low, v, 0, size)); @@ -725,7 +733,7 @@ void subC(Context* c, lir::RegisterPair* b, lir::RegisterPair* dst) { - int32_t v = a->value->value(); + int64_t v = a->value->value(); if (v) { if (v > 0 and v < 0x1000) { append(c, subi(dst->low, b->low, v, 0, size)); @@ -1297,7 +1305,7 @@ void compareCR(Context* c, assertT(c, aSize == bSize); if (!isFpr(b) && a->value->resolved()) { - int32_t v = a->value->value(); + int64_t v = a->value->value(); if (v == 0) { append(c, cmp(b->low, Register(31), aSize)); return; diff --git a/src/compile-arm64.S b/src/compile-arm64.S index e319aa744e..62816ccf9f 100644 --- a/src/compile-arm64.S +++ b/src/compile-arm64.S @@ -13,7 +13,7 @@ .text -#define BYTES_PER_WORD 4 +#define BYTES_PER_WORD 8 #define LOCAL(x) .L##x @@ -59,7 +59,7 @@ GLOBAL(vmInvoke): // copy arguments into place sub sp, sp, w3, uxtw - mov x5, #0 + mov x4, #0 b LOCAL(vmInvoke_argumentTest) LOCAL(vmInvoke_argumentLoop): From 3e2545e5a7127c6d370c36563e3b4fb9e4bca82b Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 29 Dec 2014 08:02:37 -0700 Subject: [PATCH 10/20] more ARM64 bugfixes --- src/codegen/target/arm/assembler.cpp | 2 +- src/codegen/target/arm/operations64.cpp | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index a50c36d173..1ff070bcea 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -582,7 +582,7 @@ class MyArchitecture : public Architecture { case lir::FloatMultiply: case lir::FloatDivide: if (vfpSupported()) { - bMask.typeMask = lir::Operand::RegisterPairMask; + aMask.typeMask = lir::Operand::RegisterPairMask; aMask.setLowHighRegisterMasks(FPR_MASK, FPR_MASK); bMask = aMask; } else { diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index b8f6c13221..529c1d0bd0 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -259,9 +259,9 @@ uint32_t fcvtasWdSn(Register Rd, Register Fn) return 0x1e240000 | (Fn.index() << 5) | Rd.index(); } -uint32_t scvtfDdWn(Register Fd, Register Rn) +uint32_t scvtfDdXn(Register Fd, Register Rn) { - return 0x1e620000 | (Rn.index() << 5) | Fd.index(); + return 0x9e620000 | (Rn.index() << 5) | Fd.index(); } uint32_t scvtfSdWn(Register Fd, Register Rn) @@ -287,8 +287,8 @@ uint32_t strh(Register Rs, Register Rn, Register Rm) uint32_t striFs(Register Fs, Register Rn, int offset, unsigned size) { - return (size == 8 ? 0xfc000000 : 0xbc000000) | (offset << 16) - | (Rn.index() << 5) | Fs.index(); + return (size == 8 ? 0xfd000000 : 0xbd000000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Fs.index(); } uint32_t str(Register Rs, Register Rn, Register Rm, unsigned size) @@ -352,8 +352,8 @@ uint32_t ldr(Register Rd, Register Rn, Register Rm, unsigned size) uint32_t ldriFd(Register Fd, Register Rn, int offset, unsigned size) { - return (size == 8 ? 0xfc400000 : 0xbc400000) | (offset << 16) - | (Rn.index() << 5) | Fd.index(); + return (size == 8 ? 0xfd400000 : 0xbd400000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Fd.index(); } uint32_t ldrbi(Register Rd, Register Rn, int offset) @@ -808,9 +808,9 @@ void int2FloatRR(Context* c, lir::RegisterPair* b) { if (size == 8) { - append(c, scvtfDdWn(fpr(a), b->low)); + append(c, scvtfDdXn(fpr(b), a->low)); } else { - append(c, scvtfSdWn(fpr(a), b->low)); + append(c, scvtfSdWn(fpr(b), a->low)); } } @@ -981,6 +981,7 @@ void store(Context* c, switch (size) { case 4: case 8: + assertT(c, offset == (offset & (size == 8 ? (~7) : (~3)))); append(c, striFs(fpr(src->low), base, offset, size)); break; @@ -1119,6 +1120,7 @@ void load(Context* c, switch (srcSize) { case 4: case 8: + assertT(c, offset == (offset & (srcSize == 8 ? (~7) : (~3)))); append(c, ldriFd(fpr(dst->low), base, offset, srcSize)); break; From d37cb93d5021433eef04a2aedaada1a780593b61 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 29 Dec 2014 10:00:51 -0700 Subject: [PATCH 11/20] remove redundant class qualifiers from Classes.java --- classpath/avian/Classes.java | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/classpath/avian/Classes.java b/classpath/avian/Classes.java index 34877d5ef2..3fc9f37144 100644 --- a/classpath/avian/Classes.java +++ b/classpath/avian/Classes.java @@ -38,7 +38,7 @@ public class Classes { public static native VMClass primitiveClass(char name); public static native void initialize(VMClass vmClass); - + public static native boolean isAssignableFrom(VMClass a, VMClass b); public static native VMClass getVMClass(Object o); @@ -134,7 +134,7 @@ public class Classes { array[i] = parseAnnotationValue(loader, pool, in); } return array; - } + } default: throw new AssertionError(); } @@ -207,7 +207,7 @@ public class Classes { while (spec[end] != ';') ++ end; ++ end; break; - + default: ++ end; } @@ -295,9 +295,9 @@ public class Classes { } Class c = loader.loadClass(name); VMClass vmc = SystemClassLoader.vmClass(c); - Classes.link(vmc, loader); + link(vmc, loader); if (initialize) { - Classes.initialize(vmc); + initialize(vmc); } return c; } @@ -315,7 +315,7 @@ public class Classes { } else { if (name.length() == 1) { return SystemClassLoader.getClass - (Classes.primitiveClass(name.charAt(0))); + (primitiveClass(name.charAt(0))); } else { throw new ClassNotFoundException(name); } @@ -378,7 +378,7 @@ public class Classes { public static int findField(VMClass vmClass, String name) { if (vmClass.fieldTable != null) { - Classes.link(vmClass); + link(vmClass); for (int i = 0; i < vmClass.fieldTable.length; ++i) { if (toString(vmClass.fieldTable[i].name).equals(name)) { @@ -426,7 +426,7 @@ public class Classes { { VMMethod[] methodTable = vmClass.methodTable; if (methodTable != null) { - Classes.link(vmClass); + link(vmClass); if (parameterTypes == null) { parameterTypes = new Class[0]; @@ -464,7 +464,7 @@ public class Classes { Method[] array = new Method[countMethods(vmClass, publicOnly)]; VMMethod[] methodTable = vmClass.methodTable; if (methodTable != null) { - Classes.link(vmClass); + link(vmClass); int ai = 0; for (int i = 0, j = declaredMethodCount(vmClass); i < j; ++i) { @@ -498,7 +498,7 @@ public class Classes { public static Field[] getFields(VMClass vmClass, boolean publicOnly) { Field[] array = new Field[countFields(vmClass, publicOnly)]; if (vmClass.fieldTable != null) { - Classes.link(vmClass); + link(vmClass); int ai = 0; for (int i = 0; i < vmClass.fieldTable.length; ++i) { @@ -568,9 +568,9 @@ public class Classes { return new ProtectionDomain(source, p); } - + public static native Method makeMethod(Class c, int slot); - + public static native Field makeField(Class c, int slot); private static native void acquireClassLock(); From 98a1fefefc348df4ab4c2581236d09ba2c9cbdd5 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 29 Dec 2014 10:02:30 -0700 Subject: [PATCH 12/20] fix offset encoding for strhi instruction --- src/codegen/target/arm/operations64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index 529c1d0bd0..401915f5ce 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -304,7 +304,7 @@ uint32_t strbi(Register Rs, Register Rn, int offset) uint32_t strhi(Register Rs, Register Rn, int offset) { - return 0x79000000 | (offset << 10) | (Rn.index() << 5) | Rs.index(); + return 0x79000000 | ((offset >> 1) << 10) | (Rn.index() << 5) | Rs.index(); } uint32_t stri(Register Rs, Register Rn, int offset, unsigned size) From e3f50e6d67e5e4010384ab6b63fad530e6c368a8 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 29 Dec 2014 11:04:27 -0700 Subject: [PATCH 13/20] fix ARM64 OffsetPromise::value I must have done a search-and-replace from 4 to TargetBytesPerWord earlier, but in this case it should have been the instruction size (4), not the word size. --- src/codegen/target/arm/assembler.cpp | 2 ++ src/codegen/target/arm/fixup.cpp | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index 1ff070bcea..d34f9992f1 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -981,6 +981,8 @@ class MyAssembler : public Assembler { bool jump = needJump(b); if (jump) { + expect(&con, TargetBytesPerWord == 4); + write4(dst + dstOffset, isa::b((poolSize + TargetBytesPerWord - 8) >> 2)); } diff --git a/src/codegen/target/arm/fixup.cpp b/src/codegen/target/arm/fixup.cpp index 7f5c0ff277..2e32813cbb 100644 --- a/src/codegen/target/arm/fixup.cpp +++ b/src/codegen/target/arm/fixup.cpp @@ -12,6 +12,12 @@ #include "fixup.h" #include "block.h" +namespace { + +const unsigned InstructionSize = 4; + +} // namespace + namespace avian { namespace codegen { namespace arm { @@ -39,7 +45,7 @@ int64_t OffsetPromise::value() unsigned o = offset - block->offset; return block->start - + padding(block, forTrace ? o - vm::TargetBytesPerWord : o) + o; + + padding(block, forTrace ? o - InstructionSize : o) + o; } Promise* offsetPromise(Context* con, bool forTrace) @@ -115,7 +121,7 @@ void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) *p = (v & mask) | ((~mask) & *p); - return instruction + 4; + return instruction + InstructionSize; } ConstantPoolEntry::ConstantPoolEntry(Context* con, From cdcf173601bf3b4206b2370873dad55151c7ea34 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 29 Dec 2014 12:09:53 -0700 Subject: [PATCH 14/20] format recent changes using clang-format --- src/codegen/target/arm/assembler.cpp | 3 +- src/codegen/target/arm/fixup.cpp | 5 +- src/codegen/target/arm/operations32.cpp | 41 ++++---------- src/codegen/target/arm/operations64.cpp | 75 +++++++++++++++---------- src/codegen/target/arm/registers.h | 4 +- 5 files changed, 61 insertions(+), 67 deletions(-) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index d34f9992f1..c9ddec0cc1 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -125,8 +125,7 @@ void nextFrame(ArchitectureContext* con, // check for post-non-tail-call stack adjustment of the form "sub // sp, sp, #offset": - if (TargetBytesPerWord == 8 and (*instruction & 0xff0003ff) == 0xd10003ff) - { + if (TargetBytesPerWord == 8 and (*instruction & 0xff0003ff) == 0xd10003ff) { unsigned value = (*instruction >> 10) & 0xfff; unsigned shift = (*instruction >> 22) & 1; switch (shift) { diff --git a/src/codegen/target/arm/fixup.cpp b/src/codegen/target/arm/fixup.cpp index 2e32813cbb..4413a399db 100644 --- a/src/codegen/target/arm/fixup.cpp +++ b/src/codegen/target/arm/fixup.cpp @@ -16,7 +16,7 @@ namespace { const unsigned InstructionSize = 4; -} // namespace +} // namespace namespace avian { namespace codegen { @@ -44,8 +44,7 @@ int64_t OffsetPromise::value() assertT(con, resolved()); unsigned o = offset - block->offset; - return block->start - + padding(block, forTrace ? o - InstructionSize : o) + o; + return block->start + padding(block, forTrace ? o - InstructionSize : o) + o; } Promise* offsetPromise(Context* con, bool forTrace) diff --git a/src/codegen/target/arm/operations32.cpp b/src/codegen/target/arm/operations32.cpp index 07dd7f0175..a16a0e0e24 100644 --- a/src/codegen/target/arm/operations32.cpp +++ b/src/codegen/target/arm/operations32.cpp @@ -417,9 +417,9 @@ void multiplyR(Context* con, if (size == 8) { bool useTemporaries = b->low == t->low; Register tmpLow = useTemporaries ? con->client->acquireTemporary(GPR_MASK) - : t->low; + : t->low; Register tmpHigh = useTemporaries ? con->client->acquireTemporary(GPR_MASK) - : t->high; + : t->high; emit(con, umull(tmpLow, tmpHigh, a->low, b->low)); emit(con, mla(tmpHigh, a->low, b->high, tmpHigh)); @@ -572,11 +572,11 @@ void floatDivideR(Context* con, } Register normalize(Context* con, - int offset, - Register index, - unsigned scale, - bool* preserveIndex, - bool* release) + int offset, + Register index, + unsigned scale, + bool* preserveIndex, + bool* release) { if (offset != 0 or scale != 1) { lir::RegisterPair normalizedIndex( @@ -854,26 +854,8 @@ void load(Context* con, case 8: { if (dstSize == 8) { lir::RegisterPair dstHigh(dst->high); - load(con, - 4, - base, - offset, - NoRegister, - 1, - 4, - &dstHigh, - false, - false); - load(con, - 4, - base, - offset + 4, - NoRegister, - 1, - 4, - dst, - false, - false); + load(con, 4, base, offset, NoRegister, 1, 4, &dstHigh, false, false); + load(con, 4, base, offset + 4, NoRegister, 1, 4, dst, false, false); } else { emit(con, ldri(dst->low, base, offset)); } @@ -1407,7 +1389,8 @@ void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target) { assertT(con, size == vm::TargetBytesPerWord); - lir::RegisterPair tmp(Register(4)); // a non-arg reg that we don't mind clobbering + lir::RegisterPair tmp( + Register(4)); // a non-arg reg that we don't mind clobbering moveCR2(con, vm::TargetBytesPerWord, target, &tmp, offsetPromise(con)); jumpR(con, vm::TargetBytesPerWord, &tmp); } @@ -1462,4 +1445,4 @@ void storeLoadBarrier(Context* con) } // namespace codegen } // namespace avian -#endif // AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM +#endif // AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index 401915f5ce..ecd221a0bf 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -38,17 +38,20 @@ void append(Context* c, uint32_t instruction) uint32_t lslv(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0x9ac02000 : 0x1ac02000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0x9ac02000 : 0x1ac02000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); } uint32_t ubfm(Register Rd, Register Rn, int r, int s, unsigned size) { - return (size == 8 ? 0xd3400000 : 0x53000000) | (r << 16) | (s << 10) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0xd3400000 : 0x53000000) | (r << 16) | (s << 10) + | (Rn.index() << 5) | Rd.index(); } uint32_t sbfm(Register Rd, Register Rn, int r, int s, unsigned size) { - return (size == 8 ? 0x93400000 : 0x13000000) | (r << 16) | (s << 10) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0x93400000 : 0x13000000) | (r << 16) | (s << 10) + | (Rn.index() << 5) | Rd.index(); } uint32_t lsli(Register Rd, Register Rn, int shift, unsigned size) @@ -62,12 +65,14 @@ uint32_t lsli(Register Rd, Register Rn, int shift, unsigned size) uint32_t asrv(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0x9ac02800 : 0x1ac02800) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0x9ac02800 : 0x1ac02800) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); } uint32_t lsrv(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0x9ac02400 : 0x1ac02400) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0x9ac02400 : 0x1ac02400) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); } uint32_t lsri(Register Rd, Register Rn, int shift, unsigned size) @@ -122,37 +127,38 @@ uint32_t fmovFdRn(Register Fd, Register Rn, unsigned size) uint32_t orr(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0xaa000000 : 0x2a000000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0xaa000000 : 0x2a000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); } uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size) { return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0) - | (value << 10) | (Rn.index() << 5) | Rd.index(); + | (value << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t mov(Register Rd, Register Rn, unsigned size) { return Rn.index() == 31 ? addi(Rd, Rn, 0, 0, size) - : orr(Rd, Register(31), Rn, size); + : orr(Rd, Register(31), Rn, size); } uint32_t movz(Register Rd, int value, unsigned shift, unsigned size) { return (size == 8 ? 0xd2800000 : 0x52800000) | ((shift >> 4) << 21) - | (value << 5) | Rd.index(); + | (value << 5) | Rd.index(); } uint32_t movn(Register Rd, int value, unsigned shift, unsigned size) { return (size == 8 ? 0x92800000 : 0x12800000) | ((shift >> 4) << 21) - | (value << 5) | Rd.index(); + | (value << 5) | Rd.index(); } uint32_t movk(Register Rd, int value, unsigned shift, unsigned size) { return (size == 8 ? 0xf2800000 : 0x72800000) | ((shift >> 4) << 21) - | (value << 5) | Rd.index(); + | (value << 5) | Rd.index(); } uint32_t ldrPCRel(Register Rd, int offset, unsigned size) @@ -163,12 +169,14 @@ uint32_t ldrPCRel(Register Rd, int offset, unsigned size) uint32_t add(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0x8b000000 : 0x0b000000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0x8b000000 : 0x0b000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); } uint32_t sub(Register Rd, Register Rn, Register Rm, unsigned size) { - return (size == 8 ? 0xcb000000 : 0x4b000000) | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0xcb000000 : 0x4b000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); } uint32_t and_(Register Rd, Register Rn, Register Rm, unsigned size) @@ -185,8 +193,8 @@ uint32_t eor(Register Rd, Register Rn, Register Rm, unsigned size) uint32_t madd(Register Rd, Register Rn, Register Rm, Register Ra, unsigned size) { - return (size == 8 ? 0x9b000000 : 0x1b000000) - | (Rm.index() << 16) | (Ra.index() << 10) | (Rn.index() << 5) | Rd.index(); + return (size == 8 ? 0x9b000000 : 0x1b000000) | (Rm.index() << 16) + | (Ra.index() << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t mul(Register Rd, Register Rn, Register Rm, unsigned size) @@ -197,7 +205,7 @@ uint32_t mul(Register Rd, Register Rn, Register Rm, unsigned size) uint32_t subi(Register Rd, Register Rn, int value, int shift, unsigned size) { return (size == 8 ? 0xd1000000 : 0x51000000) | (shift ? 0x400000 : 0) - | (value << 10) | (Rn.index() << 5) | Rd.index(); + | (value << 10) | (Rn.index() << 5) | Rd.index(); } uint32_t fabs_(Register Fd, Register Fn, unsigned size) @@ -288,7 +296,8 @@ uint32_t strh(Register Rs, Register Rn, Register Rm) uint32_t striFs(Register Fs, Register Rn, int offset, unsigned size) { return (size == 8 ? 0xfd000000 : 0xbd000000) - | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Fs.index(); + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) + | Fs.index(); } uint32_t str(Register Rs, Register Rn, Register Rm, unsigned size) @@ -310,7 +319,8 @@ uint32_t strhi(Register Rs, Register Rn, int offset) uint32_t stri(Register Rs, Register Rn, int offset, unsigned size) { return (size == 8 ? 0xf9000000 : 0xb9000000) - | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Rs.index(); + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) + | Rs.index(); } uint32_t ldrFd(Register Fd, Register Rn, Register Rm, unsigned size) @@ -353,7 +363,8 @@ uint32_t ldr(Register Rd, Register Rn, Register Rm, unsigned size) uint32_t ldriFd(Register Fd, Register Rn, int offset, unsigned size) { return (size == 8 ? 0xfd400000 : 0xbd400000) - | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Fd.index(); + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) + | Fd.index(); } uint32_t ldrbi(Register Rd, Register Rn, int offset) @@ -384,7 +395,8 @@ uint32_t ldrswi(Register Rd, Register Rn, int offset) uint32_t ldri(Register Rd, Register Rn, int offset, unsigned size) { return (size == 8 ? 0xf9400000 : 0xb9400000) - | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) | Rd.index(); + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) + | Rd.index(); } uint32_t fcmp(Register Fn, Register Fm, unsigned size) @@ -1038,19 +1050,19 @@ void moveRM(Context* c, assertT(c, srcSize == dstSize); if (src->low.index() == 31) { - assertT(c, c->client == 0); // the compiler should never ask us to - // store the SP; we'll only get here - // when assembling a thunk + assertT(c, c->client == 0); // the compiler should never ask us to + // store the SP; we'll only get here + // when assembling a thunk - lir::RegisterPair tmp(Register(9)); // we're in a thunk, so we can - // clobber this + lir::RegisterPair tmp(Register(9)); // we're in a thunk, so we can + // clobber this moveRR(c, srcSize, src, srcSize, &tmp); store( - c, srcSize, &tmp, dst->base, dst->offset, dst->index, dst->scale, true); + c, srcSize, &tmp, dst->base, dst->offset, dst->index, dst->scale, true); } else { store( - c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true); + c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true); } } @@ -1272,8 +1284,9 @@ void moveAR(Context* c, unsigned dstSize, lir::RegisterPair* dst) { - assertT(c, srcSize == vm::TargetBytesPerWord - and dstSize == vm::TargetBytesPerWord); + assertT( + c, + srcSize == vm::TargetBytesPerWord and dstSize == vm::TargetBytesPerWord); lir::Constant constant(src->address); moveCR(c, srcSize, &constant, dstSize, dst); @@ -1288,7 +1301,7 @@ void compareRR(Context* c, unsigned bSize UNUSED, lir::RegisterPair* b) { - assertT(c, not (isFpr(a) xor isFpr(b))); + assertT(c, not(isFpr(a) xor isFpr(b))); assertT(c, aSize == bSize); if (isFpr(a)) { @@ -1593,4 +1606,4 @@ void storeLoadBarrier(Context* c) } // namespace codegen } // namespace avian -#endif // AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 +#endif // AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 diff --git a/src/codegen/target/arm/registers.h b/src/codegen/target/arm/registers.h index 18622a81e5..d439ddc8ba 100644 --- a/src/codegen/target/arm/registers.h +++ b/src/codegen/target/arm/registers.h @@ -27,7 +27,7 @@ const unsigned MASK_LO8 = 0xff; constexpr Register ThreadRegister(19); constexpr Register StackRegister(31); constexpr Register LinkRegister(30); -constexpr Register ProgramCounter(0xFE); // i.e. unaddressable +constexpr Register ProgramCounter(0xFE); // i.e. unaddressable const int N_GPRS = 32; const int N_FPRS = 32; @@ -38,7 +38,7 @@ const RegisterMask FPR_MASK = 0xffffffff00000000; constexpr Register ThreadRegister(8); constexpr Register StackRegister(13); constexpr Register LinkRegister(14); -constexpr Register FrameRegister(0xFE); // i.e. there is none +constexpr Register FrameRegister(0xFE); // i.e. there is none constexpr Register ProgramCounter(15); const int N_GPRS = 16; From ea0a108cd220379ca1505c4d1393475387b3602e Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 29 Dec 2014 12:16:28 -0700 Subject: [PATCH 15/20] add todo comment regarding integer division --- src/codegen/target/arm/assembler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index c9ddec0cc1..5d36fdf2f6 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -570,6 +570,11 @@ class MyArchitecture : public Architecture { aMask.typeMask = bMask.typeMask = lir::Operand::RegisterPairMask; break; + // todo: Although ARM has instructions for integer division and + // remainder, they don't trap on division by zero, which is why + // we use thunks. Alternatively, we could generate inline code + // with an explicit zero check, which would probably be a bit + // faster. case lir::Divide: case lir::Remainder: case lir::FloatRemainder: From d8b32f2c67f56c6f49f4c77c8abecea3f2226210 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 29 Dec 2014 12:25:56 -0700 Subject: [PATCH 16/20] update CMakeLists.txt to fix CMake build --- src/codegen/target/arm/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/codegen/target/arm/CMakeLists.txt b/src/codegen/target/arm/CMakeLists.txt index bc26352adc..23faf6694f 100644 --- a/src/codegen/target/arm/CMakeLists.txt +++ b/src/codegen/target/arm/CMakeLists.txt @@ -4,5 +4,6 @@ add_library(avian_codegen_arm context.cpp fixup.cpp multimethod.cpp - operations.cpp + operations32.cpp + operations64.cpp ) From 8c277e2af89a848d12a66f6a094751f3ad443e32 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 29 Dec 2014 14:47:31 -0700 Subject: [PATCH 17/20] conditionally compile ARM operations based on TARGET_BYTES_PER_WORD This fixes the codegen-targets=all build regression. --- src/codegen/target/arm/operations32.cpp | 4 ++-- src/codegen/target/arm/operations64.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/codegen/target/arm/operations32.cpp b/src/codegen/target/arm/operations32.cpp index a16a0e0e24..77b5f2c6f9 100644 --- a/src/codegen/target/arm/operations32.cpp +++ b/src/codegen/target/arm/operations32.cpp @@ -15,7 +15,7 @@ #include "fixup.h" #include "multimethod.h" -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM +#if TARGET_BYTES_PER_WORD == 4 namespace avian { namespace codegen { @@ -1445,4 +1445,4 @@ void storeLoadBarrier(Context* con) } // namespace codegen } // namespace avian -#endif // AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM +#endif // TARGET_BYTES_PER_WORD == 4 diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index ecd221a0bf..72291ea69c 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -14,7 +14,7 @@ #include "fixup.h" #include "multimethod.h" -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 +#if TARGET_BYTES_PER_WORD == 8 namespace { @@ -1606,4 +1606,4 @@ void storeLoadBarrier(Context* c) } // namespace codegen } // namespace avian -#endif // AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 +#endif // TARGET_BYTES_PER_WORD == 8 From 76bfcaa8c099ff2f6987304e96008062eced817a Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 29 Dec 2014 18:11:54 -0700 Subject: [PATCH 18/20] fix ARM64 bootimage=true build This fixes a problem with atomically updating JIT-compiled static calls to AOT-compiled code. It turns out there was also a problem with the 32-bit ARM code as well, but we never hit it because it is extremely unlikely that a code address can be loaded with a single immediate load instruction on 32-bit ARM since it can only handle numbers with 8 significant bits. I've fixed that as well. --- src/codegen/target/arm/assembler.cpp | 5 +++++ src/codegen/target/arm/operations32.cpp | 13 ++++++++++++- src/codegen/target/arm/operations64.cpp | 2 +- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index 5d36fdf2f6..831be22fe4 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -320,8 +320,13 @@ class MyArchitecture : public Architecture { case lir::AlignedLongCall: case lir::AlignedLongJump: { uint32_t* p = static_cast(returnAddress) - 2; +#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 + const int32_t mask = (PoolOffsetMask >> 2) << 5; + *reinterpret_cast(p + ((*p & mask) >> 5)) = newTarget; +#else *reinterpret_cast(p + (((*p & PoolOffsetMask) + 8) / 4)) = newTarget; +#endif } break; default: diff --git a/src/codegen/target/arm/operations32.cpp b/src/codegen/target/arm/operations32.cpp index 77b5f2c6f9..e9cd601fe3 100644 --- a/src/codegen/target/arm/operations32.cpp +++ b/src/codegen/target/arm/operations32.cpp @@ -317,7 +317,8 @@ void moveCR2(Context* con, lir::RegisterPair dstHi(dst->high); moveCR(con, 4, &srcLo, 4, dst); moveCR(con, 4, &srcHi, 4, &dstHi); - } else if (src->value->resolved() and isOfWidth(getValue(src), 8)) { + } else if (callOffset == 0 and src->value->resolved() + and isOfWidth(getValue(src), 8)) { emit(con, movi(dst->low, lo8(getValue(src)))); // fits in immediate } else { appendConstantPoolEntry(con, src->value, callOffset); @@ -1385,6 +1386,11 @@ void longCallC(Context* con, unsigned size UNUSED, lir::Constant* target) callR(con, vm::TargetBytesPerWord, &tmp); } +void alignedLongCallC(Context* con, unsigned size, lir::Constant* target) +{ + longCallC(con, size, target); +} + void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target) { assertT(con, size == vm::TargetBytesPerWord); @@ -1395,6 +1401,11 @@ void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target) jumpR(con, vm::TargetBytesPerWord, &tmp); } +void alignedLongJumpC(Context* con, unsigned size, lir::Constant* target) +{ + longJumpC(con, size, target); +} + void jumpC(Context* con, unsigned size UNUSED, lir::Constant* target) { assertT(con, size == vm::TargetBytesPerWord); diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index 72291ea69c..5f3fc74df4 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -652,7 +652,7 @@ void moveCR2(Context* c, moveCR(c, size, src, size, &tmp); moveRR(c, size, &tmp, size, dst); c->client->releaseTemporary(tmp.low); - } else if (src->value->resolved()) { + } else if (callOffset == 0 and src->value->resolved()) { int64_t value = src->value->value(); if (value >= 0) { append(c, movz(dst->low, value & 0xFFFF, 0, size)); From e3ea60fc317fb40647a5157594ea984f9b08f46a Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 30 Dec 2014 09:37:26 -0700 Subject: [PATCH 19/20] fix ARM64 tails=true build --- src/codegen/target/arm/assembler.cpp | 22 +++++++++++--- src/codegen/target/arm/operations64.cpp | 40 +++++++++++++++++-------- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index 831be22fe4..cb9f871f7e 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -130,10 +130,10 @@ void nextFrame(ArchitectureContext* con, unsigned shift = (*instruction >> 22) & 1; switch (shift) { case 0: - offset -= value; + offset -= value / TargetBytesPerWord; break; case 1: - offset -= value << 12; + offset -= (value << 12) / TargetBytesPerWord; break; default: abort(con); @@ -769,6 +769,11 @@ class MyAssembler : public Assembler { // how to handle them: assertT(&con, footprint < 256); + // todo: ARM64 frame allocation should be of the form: + // stp x29, x30, [sp,#size]! + // and deallocation should be of the form: + // ldp x29, x30, [sp],#size + lir::RegisterPair stack(StackRegister); ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); lir::Constant footprintConstant(&footprintPromise); @@ -875,10 +880,19 @@ class MyAssembler : public Assembler { return_(&con); } - virtual void popFrameAndUpdateStackAndReturn(unsigned frameFootprint, + virtual void popFrameAndUpdateStackAndReturn(unsigned footprint, unsigned stackOffsetFromThread) { - popFrame(frameFootprint); + footprint += FrameHeaderSize; + + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressSrc(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveMR(&con, + TargetBytesPerWord, + &returnAddressSrc, + TargetBytesPerWord, + &returnAddress); lir::RegisterPair stack(StackRegister); lir::Memory newStackSrc(ThreadRegister, stackOffsetFromThread); diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp index 5f3fc74df4..e0c4a69ed6 100644 --- a/src/codegen/target/arm/operations64.cpp +++ b/src/codegen/target/arm/operations64.cpp @@ -139,8 +139,8 @@ uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size) uint32_t mov(Register Rd, Register Rn, unsigned size) { - return Rn.index() == 31 ? addi(Rd, Rn, 0, 0, size) - : orr(Rd, Register(31), Rn, size); + return Rn.index() == 31 or Rd.index() == 31 ? addi(Rd, Rn, 0, 0, size) + : orr(Rd, Register(31), Rn, size); } uint32_t movz(Register Rd, int value, unsigned shift, unsigned size) @@ -653,6 +653,10 @@ void moveCR2(Context* c, moveRR(c, size, &tmp, size, dst); c->client->releaseTemporary(tmp.low); } else if (callOffset == 0 and src->value->resolved()) { + // todo: Is it better performance-wise to load using immediate + // moves or via a PC-relative constant pool? Does it depend on + // how many significant bits there are? + int64_t value = src->value->value(); if (value >= 0) { append(c, movz(dst->low, value & 0xFFFF, 0, size)); @@ -1195,16 +1199,28 @@ void moveMR(Context* c, unsigned dstSize, lir::RegisterPair* dst) { - load(c, - srcSize, - src->base, - src->offset, - src->index, - src->scale, - dstSize, - dst, - true, - true); + if (dst->low.index() == 31) { + assertT(c, c->client == 0); // the compiler should never ask us to + // load the SP; we'll only get here + // when assembling a thunk + + lir::RegisterPair tmp(Register(9)); // we're in a thunk, so we can + // clobber this + + load(c, srcSize, src->base, src->offset, src->index, src->scale, dstSize, &tmp, true, true); + moveRR(c, dstSize, &tmp, dstSize, dst); + } else { + load(c, + srcSize, + src->base, + src->offset, + src->index, + src->scale, + dstSize, + dst, + true, + true); + } } void moveZMR(Context* c, From c9026a6053d015f6b37af3dbfa6a66ec6f80e417 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 30 Dec 2014 15:30:04 -0700 Subject: [PATCH 20/20] add continuations support for ARM64 Also, replace some preprocessor conditionals with C++ conditionals and add some todo comments and sample code for future work towards better ABI compatibility in the JIT compiled code. --- src/arm64.S | 2 + src/codegen/target/arm/assembler.cpp | 142 +++++++++++++++++---------- src/codegen/target/arm/fixup.cpp | 10 +- src/codegen/target/arm/fixup.h | 6 +- src/codegen/target/arm/registers.h | 3 +- src/compile-arm.S | 4 +- src/compile-arm64.S | 110 +++++++++++++++++++-- src/compile.cpp | 2 + 8 files changed, 204 insertions(+), 75 deletions(-) diff --git a/src/arm64.S b/src/arm64.S index 6953ea0cf6..b5ce9a5000 100644 --- a/src/arm64.S +++ b/src/arm64.S @@ -35,6 +35,7 @@ GLOBAL(vmNativeCall): // allocate frame stp x29, x30, [sp,#-64]! + mov x29, sp // save callee-saved register values so we can clobber them stp x19, x20, [sp,#16] @@ -118,6 +119,7 @@ GLOBAL(vmRun): // allocate frame stp x29, x30, [sp,#-96]! + mov x29, sp // save callee-saved register values stp x19, x20, [sp,#16] diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index cb9f871f7e..3130662073 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -232,6 +232,7 @@ class MyArchitecture : public Architecture { { switch (register_.index()) { case LinkRegister.index(): + case FrameRegister.index(): case StackRegister.index(): case ThreadRegister.index(): case ProgramCounter.index(): @@ -320,13 +321,13 @@ class MyArchitecture : public Architecture { case lir::AlignedLongCall: case lir::AlignedLongJump: { uint32_t* p = static_cast(returnAddress) - 2; -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 - const int32_t mask = (PoolOffsetMask >> 2) << 5; - *reinterpret_cast(p + ((*p & mask) >> 5)) = newTarget; -#else - *reinterpret_cast(p + (((*p & PoolOffsetMask) + 8) / 4)) - = newTarget; -#endif + if (TargetBytesPerWord == 8) { + const int32_t mask = (PoolOffsetMask >> 2) << 5; + *reinterpret_cast(p + ((*p & mask) >> 5)) = newTarget; + } else { + *reinterpret_cast(p + (((*p & PoolOffsetMask) + 8) / 4)) + = newTarget; + } } break; default: @@ -769,24 +770,45 @@ class MyAssembler : public Assembler { // how to handle them: assertT(&con, footprint < 256); - // todo: ARM64 frame allocation should be of the form: - // stp x29, x30, [sp,#size]! - // and deallocation should be of the form: - // ldp x29, x30, [sp],#size + // todo: the ARM ABI says the frame preamble should be of the form + // + // stp x29, x30, [sp,#-footprint]! + // mov x29, sp + // + // and the frame should be popped with e.g. + // + // ldp x29, x30, [sp],#footprint + // br x30 + // + // However, that will invalidate a lot of assumptions elsewhere + // about the return address being stored at the opposite end of + // the frame, so lots of other code will need to change before we + // can do that. The code below can be enabled as a starting point + // when we're ready to tackle that. + if (false and TargetBytesPerWord == 8) { + // stp x29, x30, [sp,#-footprint]! + con.code.append4(0xa9800000 | ((-footprint & 0x7f) << 15) + | (StackRegister.index() << 5) + | (LinkRegister.index() << 10) | FrameRegister.index()); - lir::RegisterPair stack(StackRegister); - ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); - lir::Constant footprintConstant(&footprintPromise); - subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + lir::RegisterPair stack(StackRegister); + lir::RegisterPair frame(FrameRegister); + moveRR(&con, TargetBytesPerWord, &stack, TargetBytesPerWord, &frame); + } else { + lir::RegisterPair stack(StackRegister); + ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); + lir::Constant footprintConstant(&footprintPromise); + subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); - lir::RegisterPair returnAddress(LinkRegister); - lir::Memory returnAddressDst(StackRegister, - (footprint - 1) * TargetBytesPerWord); - moveRM(&con, - TargetBytesPerWord, - &returnAddress, - TargetBytesPerWord, - &returnAddressDst); + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressDst(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveRM(&con, + TargetBytesPerWord, + &returnAddress, + TargetBytesPerWord, + &returnAddressDst); + } } virtual void adjustFrame(unsigned difference) @@ -801,19 +823,26 @@ class MyAssembler : public Assembler { { footprint += FrameHeaderSize; - lir::RegisterPair returnAddress(LinkRegister); - lir::Memory returnAddressSrc(StackRegister, - (footprint - 1) * TargetBytesPerWord); - moveMR(&con, - TargetBytesPerWord, - &returnAddressSrc, - TargetBytesPerWord, - &returnAddress); + // see comment regarding the ARM64 ABI in allocateFrame + if (false and TargetBytesPerWord == 8) { + // ldp x29, x30, [sp],#footprint + con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10) + | 29); + } else { + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressSrc(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveMR(&con, + TargetBytesPerWord, + &returnAddressSrc, + TargetBytesPerWord, + &returnAddress); - lir::RegisterPair stack(StackRegister); - ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); - lir::Constant footprintConstant(&footprintPromise); - addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + lir::RegisterPair stack(StackRegister); + ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); + lir::Constant footprintConstant(&footprintPromise); + addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + } } virtual void popFrameForTailCall(unsigned footprint, @@ -885,14 +914,21 @@ class MyAssembler : public Assembler { { footprint += FrameHeaderSize; - lir::RegisterPair returnAddress(LinkRegister); - lir::Memory returnAddressSrc(StackRegister, - (footprint - 1) * TargetBytesPerWord); - moveMR(&con, - TargetBytesPerWord, - &returnAddressSrc, - TargetBytesPerWord, - &returnAddress); + // see comment regarding the ARM64 ABI in allocateFrame + if (false and TargetBytesPerWord == 8) { + // ldp x29, x30, [sp],#footprint + con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10) + | 29); + } else { + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressSrc(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveMR(&con, + TargetBytesPerWord, + &returnAddressSrc, + TargetBytesPerWord, + &returnAddress); + } lir::RegisterPair stack(StackRegister); lir::Memory newStackSrc(ThreadRegister, stackOffsetFromThread); @@ -986,18 +1022,18 @@ class MyAssembler : public Assembler { int32_t* p = reinterpret_cast(dst + instruction); -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 - int32_t v = entry - instruction; - expect(&con, v == (v & PoolOffsetMask)); + if (TargetBytesPerWord == 8) { + int32_t v = entry - instruction; + expect(&con, v == (v & PoolOffsetMask)); - const int32_t mask = (PoolOffsetMask >> 2) << 5; - *p = (((v >> 2) << 5) & mask) | ((~mask) & *p); -#else - int32_t v = (entry - 8) - instruction; - expect(&con, v == (v & PoolOffsetMask)); + const int32_t mask = (PoolOffsetMask >> 2) << 5; + *p = (((v >> 2) << 5) & mask) | ((~mask) & *p); + } else { + int32_t v = (entry - 8) - instruction; + expect(&con, v == (v & PoolOffsetMask)); - *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); -#endif + *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); + } poolSize += TargetBytesPerWord; } diff --git a/src/codegen/target/arm/fixup.cpp b/src/codegen/target/arm/fixup.cpp index 4413a399db..3117688b15 100644 --- a/src/codegen/target/arm/fixup.cpp +++ b/src/codegen/target/arm/fixup.cpp @@ -99,9 +99,9 @@ void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) { int32_t* p = reinterpret_cast(instruction); -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 int32_t v; int32_t mask; + if (vm::TargetBytesPerWord == 8) { if ((*p >> 24) == 0x54) { // conditional branch v = ((reinterpret_cast(value) - instruction) >> 2) << 5; @@ -111,10 +111,10 @@ void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) v = (reinterpret_cast(value) - instruction) >> 2; mask = 0x3FFFFFF; } -#else - int32_t v = (reinterpret_cast(value) - (instruction + 8)) >> 2; - const int32_t mask = 0xFFFFFF; -#endif + } else { + v = (reinterpret_cast(value) - (instruction + 8)) >> 2; + mask = 0xFFFFFF; + } expect(s, bounded(0, 8, v)); diff --git a/src/codegen/target/arm/fixup.h b/src/codegen/target/arm/fixup.h index 2e9c0aca01..cce2b59dce 100644 --- a/src/codegen/target/arm/fixup.h +++ b/src/codegen/target/arm/fixup.h @@ -27,11 +27,7 @@ namespace arm { const bool DebugPool = false; -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 -const int32_t PoolOffsetMask = 0x1FFFFF; -#else -const int32_t PoolOffsetMask = 0xFFF; -#endif +const int32_t PoolOffsetMask = vm::TargetBytesPerWord == 8 ? 0x1FFFFF : 0xFFF; class Task { public: diff --git a/src/codegen/target/arm/registers.h b/src/codegen/target/arm/registers.h index d439ddc8ba..3bf4dc4041 100644 --- a/src/codegen/target/arm/registers.h +++ b/src/codegen/target/arm/registers.h @@ -23,10 +23,11 @@ namespace arm { const uint64_t MASK_LO32 = 0xffffffff; const unsigned MASK_LO8 = 0xff; -#if AVIAN_TARGET_ARCH == AVIAN_ARCH_ARM64 +#if TARGET_BYTES_PER_WORD == 8 constexpr Register ThreadRegister(19); constexpr Register StackRegister(31); constexpr Register LinkRegister(30); +constexpr Register FrameRegister(29); constexpr Register ProgramCounter(0xFE); // i.e. unaddressable const int N_GPRS = 32; diff --git a/src/compile-arm.S b/src/compile-arm.S index 2f566f9558..83703af607 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -109,7 +109,7 @@ GLOBAL(vmInvoke_safeStack): ldr r6,[r5,#CONTINUATION_LENGTH] lsl r6,r6,#2 neg r7,r6 - add r7,r7,#-80 + add r7,r7,#-80 // 80 bytes for callee-saved register values mov r4,sp str r4,[sp,r7]! @@ -167,10 +167,10 @@ LOCAL(vmInvoke_handleException): bx r7 LOCAL(vmInvoke_exit): -#endif // AVIAN_CONTINUATIONS mov ip, #0 str ip, [r8, #TARGET_THREAD_STACK] +#endif // AVIAN_CONTINUATIONS // restore return type ldr ip, [sp], #4 diff --git a/src/compile-arm64.S b/src/compile-arm64.S index 62816ccf9f..c1c9c942b2 100644 --- a/src/compile-arm64.S +++ b/src/compile-arm64.S @@ -23,12 +23,12 @@ # define GLOBAL(x) x #endif -#define CONTINUATION_NEXT 4 -#define CONTINUATION_ADDRESS 16 -#define CONTINUATION_RETURN_ADDRESS_OFFSET 20 -#define CONTINUATION_FRAME_POINTER_OFFSET 24 -#define CONTINUATION_LENGTH 28 -#define CONTINUATION_BODY 32 +#define CONTINUATION_NEXT 8 +#define CONTINUATION_ADDRESS 32 +#define CONTINUATION_RETURN_ADDRESS_OFFSET 40 +#define CONTINUATION_FRAME_POINTER_OFFSET 48 +#define CONTINUATION_LENGTH 56 +#define CONTINUATION_BODY 64 .globl GLOBAL(vmInvoke) .align 2 @@ -43,6 +43,7 @@ GLOBAL(vmInvoke): // allocate frame stp x29, x30, [sp,#-96]! + mov x29, sp // save callee-saved register values stp x19, x20, [sp,#16] @@ -96,11 +97,65 @@ GLOBAL(vmInvoke_returnAddress): GLOBAL(vmInvoke_safeStack): #ifdef AVIAN_CONTINUATIONS -#error todo -#endif // AVIAN_CONTINUATIONS + // call the next continuation, if any + ldr x5, [x19,#TARGET_THREAD_CONTINUATION] + cmp x5, xzr + b.eq LOCAL(vmInvoke_exit) + ldr x6, [x5,#CONTINUATION_LENGTH] + lsl x6, x6, #3 + neg x7, x6 + add x7, x7, #-128 // 128 bytes for callee-saved register values + mov x4, sp + add sp, sp, x7 + str x4, [sp] + + add x7, x5, #CONTINUATION_BODY + mov x11, xzr + b LOCAL(vmInvoke_continuationTest) + +LOCAL(vmInvoke_continuationLoop): + ldr x9, [x7,x11] + str x9, [sp,x11] + add x11, x11, #8 + +LOCAL(vmInvoke_continuationTest): + cmp x11, x6 + b.le LOCAL(vmInvoke_continuationLoop) + + ldr x7, [x5,#CONTINUATION_RETURN_ADDRESS_OFFSET] + adr x11, GLOBAL(vmInvoke_returnAddress) + str x11, [sp,x7] + + ldr x7, [x5,#CONTINUATION_NEXT] + str x7, [x19,#TARGET_THREAD_CONTINUATION] + + // call the continuation unless we're handling an exception + ldr x7, [x19,#TARGET_THREAD_EXCEPTION] + cmp x7, xzr + b.ne LOCAL(vmInvoke_handleException) + ldr x7, [x5,#CONTINUATION_ADDRESS] + br x7 + +LOCAL(vmInvoke_handleException): + // we're handling an exception - call the exception handler instead + str xzr, [x19,#TARGET_THREAD_EXCEPTION] + ldr x11, [x19,#TARGET_THREAD_EXCEPTIONSTACKADJUSTMENT] + ldr x9, [sp] + neg x11, x11 + add sp, sp, x11 + str x9, [sp] + ldr x11, [x19,#TARGET_THREAD_EXCEPTIONOFFSET] + str x7, [sp,x11] + + ldr x7, [x19,#TARGET_THREAD_EXCEPTIONHANDLER] + br x7 + +LOCAL(vmInvoke_exit): str xzr, [x19, #TARGET_THREAD_STACK] +#endif // AVIAN_CONTINUATIONS + // restore return type ldr w5, [sp],#16 @@ -119,7 +174,44 @@ LOCAL(vmInvoke_return): .align 2 GLOBAL(vmJumpAndInvoke): #ifdef AVIAN_CONTINUATIONS -#error todo + // x0: thread + // x1: address + // x2: stack + // x3: argumentFootprint + // x4: arguments + // x5: frameSize + + // allocate new frame, adding room for callee-saved registers, plus + // 8 bytes of padding since the calculation of frameSize assumes 8 + // bytes have already been allocated to save the return address, + // which is not true in this case + sub x2, x2, x5 + sub x2, x2, #136 + + mov x19, x0 + + // copy arguments into place + mov x6, xzr + b LOCAL(vmJumpAndInvoke_argumentTest) + +LOCAL(vmJumpAndInvoke_argumentLoop): + ldr x12, [x4,x6] + str x12, [x2,x6] + add x6, x6, #4 + +LOCAL(vmJumpAndInvoke_argumentTest): + cmp x6, x3 + ble LOCAL(vmJumpAndInvoke_argumentLoop) + + // the arguments have been copied, so we can set the real stack + // pointer now + mov sp, x2 + + // set return address to vmInvoke_returnAddress + adr x30, GLOBAL(vmInvoke_returnAddress) + + br x1 + #else // not AVIAN_CONTINUATIONS // vmJumpAndInvoke should only be called when continuations are // enabled, so we force a crash if we reach here: diff --git a/src/compile.cpp b/src/compile.cpp index 47b55574e7..51790bb0b3 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -2189,6 +2189,8 @@ GcContinuation* makeCurrentContinuation(MyThread* t, *targetIp = 0; while (*targetIp == 0) { + assertT(t, ip); + GcMethod* method = methodForIp(t, ip); if (method) { PROTECT(t, method);