diff --git a/src/arm.cpp b/src/arm.cpp index 885a320594..739faa0bef 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -57,8 +57,8 @@ inline int SWAP(int cond, int B, int Rn, int Rd, int Rm) { return cond<<28 | 1<<24 | B<<22 | Rn<<16 | Rd<<12 | 9<<4 | Rm; } inline int COOP(int cond, int opcode_1, int CRn, int CRd, int cp_num, int opcode_2, int CRm) { return cond<<28 | 0xe<<24 | opcode_1<<20 | CRn<<16 | CRd<<12 | cp_num<<8 | opcode_2<<5 | CRm; } -inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset) -{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff); } +inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset) // offset is in words, not bytes +{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff)>>2; } inline int COREG(int cond, int opcode_1, int L, int CRn, int Rd, int cp_num, int opcode_2, int CRm) { return cond<<28 | 0xe<<24 | opcode_1<<21 | L<<20 | CRn<<16 | Rd<<12 | cp_num<<8 | opcode_2<<5 | 1<<4 | CRm; } inline int COREG2(int cond, int L, int Rn, int Rd, int cp_num, int opcode, int CRm) @@ -270,14 +270,17 @@ const int N_GPRS = 16; const int N_FPRS = 16; const uint32_t GPR_MASK = 0xffff; const uint32_t FPR_MASK = 0xffff0000; +// for source-to-destination masks +const uint64_t GPR_MASK64 = GPR_MASK | (uint64_t)GPR_MASK << 32; +// making the following const somehow breaks debug symbol output in GDB +/* const */ uint64_t FPR_MASK64 = FPR_MASK | (uint64_t)FPR_MASK << 32; inline bool isFpr(Assembler::Register* reg) { return reg->low >= N_GPRS; } -inline int toFpr(Assembler::Register* reg) { - return reg->low - N_GPRS; -} +inline int fpr(Assembler::Register* reg) { return reg->low - N_GPRS; } +inline int fpr(int reg) { return reg - N_GPRS; } const unsigned FrameHeaderSize = 1; @@ -584,7 +587,7 @@ using namespace isa; inline void emit(Context* con, int code) { con->code.append4(code); } inline int newTemp(Context* con) { - return con->client->acquireTemporary(); + return con->client->acquireTemporary(GPR_MASK); } inline int newTemp(Context* con, unsigned mask) { @@ -920,13 +923,12 @@ moveRR(Context* con, unsigned srcSize, Assembler::Register* src, bool srcIsFpr = isFpr(src); bool dstIsFpr = isFpr(dst); if (srcIsFpr || dstIsFpr) { // floating-point register(s) involved - /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- %d\n", dst->low, src->low); // FPR to FPR - if (srcIsFpr && dstIsFpr) emit(con, fcpys(toFpr(dst), toFpr(src))); + if (srcIsFpr && dstIsFpr) emit(con, fcpys(fpr(dst), fpr(src))); // FPR to GPR - else if (srcIsFpr) emit(con, fmrs(dst->low, toFpr(src))); + else if (srcIsFpr) emit(con, fmrs(dst->low, fpr(src))); // GPR to FPR - else emit(con, fmsr(toFpr(dst), src->low)); + else emit(con, fmsr(fpr(dst), src->low)); return; } @@ -990,7 +992,6 @@ moveCR2(Context* con, unsigned size, Assembler::Constant* src, { if (isFpr(dst)) { // floating-point Assembler::Register tmp = makeTemp(con); - /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- 0x%llx\n", tmp.low, getValue(src)); moveCR2(con, size, src, &tmp, 0); moveRR(con, size, &tmp, size, dst); freeTemp(con, tmp); @@ -1096,88 +1097,86 @@ void multiplyR(Context* con, unsigned size, Assembler::Register* a, Assembler::R void floatAbsoluteRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) { if (size == 8) { - emit(con, fabsd(b->low, a->low)); + emit(con, fabsd(fpr(b), fpr(a))); } else { - emit(con, fabss(b->low, a->low)); + emit(con, fabss(fpr(b), fpr(a))); } } void floatNegateRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) { if (size == 8) { - /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> invalid 64-bit Scheiße\n"); - emit(con, fnegd(b->low, a->low)); + emit(con, fnegd(fpr(b), fpr(a))); } else { - /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- -%d\n", b->low, a->low); - emit(con, fnegs(b->low, a->low)); + emit(con, fnegs(fpr(b), fpr(a))); } } void float2FloatRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) { if (size == 8) { - emit(con, fcvtsd(b->low, a->low)); + emit(con, fcvtsd(fpr(b), fpr(a))); } else { - emit(con, fcvtds(b->low, a->low)); + emit(con, fcvtds(fpr(b), fpr(a))); } } void float2IntRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) { int tmp = newTemp(con, FPR_MASK); + int ftmp = fpr(tmp); if (size == 8) { // double to int - emit(con, ftosid(tmp, a->low)); + emit(con, ftosizd(ftmp, fpr(a))); } else { // float to int - emit(con, ftosis(tmp, a->low)); + emit(con, ftosizs(ftmp, fpr(a))); } // else thunked - emit(con, fmrs(b->low, tmp)); + emit(con, fmrs(b->low, ftmp)); freeTemp(con, tmp); } void int2FloatRR(Context* con, unsigned UNUSED, Assembler::Register* a, unsigned size, Assembler::Register* b) { - emit(con, fmsr(b->low, a->low)); + emit(con, fmsr(fpr(b), a->low)); if (size == 8) { // int to double - emit(con, fsitod(b->low, b->low)); + emit(con, fsitod(fpr(b), fpr(b))); } else { // int to float - emit(con, fsitos(b->low, b->low)); + emit(con, fsitos(fpr(b), fpr(b))); } // else thunked } void floatSqrtRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) { - if (size == 8) { - emit(con, fsqrtd(b->low, a->low)); + if (size == 8) { + emit(con, fsqrtd(fpr(b), fpr(a))); } else { - emit(con, fsqrts(b->low, a->low)); + emit(con, fsqrts(fpr(b), fpr(a))); } } void floatAddR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { - if (size == 8) { - emit(con, faddd(t->low, a->low, b->low)); + if (size == 8) { + emit(con, faddd(fpr(t), fpr(a), fpr(b))); } else { - fprintf(stderr, "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ %d <- %d + %d\n", toFpr(t), toFpr(a), toFpr(b)); - emit(con, fadds(toFpr(t), toFpr(a), toFpr(b))); + emit(con, fadds(fpr(t), fpr(a), fpr(b))); } } void floatSubtractR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { - if (size == 8) { - emit(con, fsubd(t->low, a->low, b->low)); + if (size == 8) { + emit(con, fsubd(fpr(t), fpr(b), fpr(a))); } else { - emit(con, fsubs(t->low, a->low, b->low)); + emit(con, fsubs(fpr(t), fpr(b), fpr(a))); } } void floatMultiplyR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { - emit(con, fmuld(t->low, a->low, b->low)); + emit(con, fmuld(fpr(t), fpr(a), fpr(b))); } else { - emit(con, fmuls(t->low, a->low, b->low)); + emit(con, fmuls(fpr(t), fpr(a), fpr(b))); } } void floatDivideR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { - emit(con, fdivd(t->low, a->low, b->low)); + emit(con, fdivd(fpr(t), fpr(b), fpr(a))); } else { - emit(con, fdivs(t->low, a->low, b->low)); + emit(con, fdivs(fpr(t), fpr(b), fpr(a))); } } @@ -1242,12 +1241,11 @@ store(Context* con, unsigned size, Assembler::Register* src, if (isFpr(src)) { // floating-point store if (size == 4) { - /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> fpr store base-indexed\n"); Assembler::Register base_(base), normalized_(normalized), absAddr = makeTemp(con); addR(con, size, &base_, &normalized_, &absAddr); - emit(con, fsts(toFpr(src), absAddr.low)); + emit(con, fsts(fpr(src), absAddr.low)); freeTemp(con, absAddr); } else abort(con); @@ -1281,8 +1279,7 @@ store(Context* con, unsigned size, Assembler::Register* src, or (size != 2 and abs(offset) == (abs(offset) & 0xFFF))) { if (isFpr(src)) { - /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> [%d + 0x%x] <- %d\n", base, offset, src->low); - if (size == 4) emit(con, fsts(toFpr(src), base, offset)); + if (size == 4) emit(con, fsts(fpr(src), base, offset)); else abort(con); } else { switch (size) { @@ -1358,12 +1355,11 @@ load(Context* con, unsigned srcSize, int base, int offset, int index, if (isFpr(dst)) { // floating-point store if (srcSize == 4) { - /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> fpr load base-indexed\n"); Assembler::Register base_(base), normalized_(normalized), absAddr = makeTemp(con); addR(con, srcSize, &base_, &normalized_, &absAddr); - emit(con, flds(toFpr(dst), absAddr.low)); + emit(con, flds(fpr(dst), absAddr.low)); freeTemp(con, absAddr); } else abort(con); @@ -1414,8 +1410,7 @@ load(Context* con, unsigned srcSize, int base, int offset, int index, and abs(offset) == (abs(offset) & 0xFFF))) { if (isFpr(dst)) { - /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- [%d + 0x%x]\n", dst->low, base, offset); - if (srcSize == 4) emit(con, flds(toFpr(dst), base, offset)); + if (srcSize == 4) emit(con, flds(fpr(dst), base, offset)); else abort(con); } else { switch (srcSize) { @@ -1581,14 +1576,15 @@ compareRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a, unsigned bSize UNUSED, Assembler::Register* b) { assert(c, aSize == 4 and bSize == 4); - assert(c, b->low != a->low); assert(c, !(isFpr(a) ^ isFpr(b))); if (isFpr(a)) { - emit(c, fcmps(toFpr(b), toFpr(a))); + emit(c, fcmps(fpr(b), fpr(a))); emit(c, fmstat()); + } else { + assert(c, b->low != a->low); + emit(c, cmp(b->low, a->low)); } - else emit(c, cmp(b->low, a->low)); } void @@ -1699,6 +1695,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, switch (op) { case JumpIfEqual: + case JumpIfFloatEqual: next = c->code.length(); emit(c, bne(0)); @@ -1707,6 +1704,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, break; case JumpIfNotEqual: + case JumpIfFloatNotEqual: conditional(c, bne(0), target); compareSigned(c, 4, al, 4, bl); @@ -1714,6 +1712,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, break; case JumpIfLess: + case JumpIfFloatLess: conditional(c, blt(0), target); next = c->code.length(); @@ -1724,6 +1723,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, break; case JumpIfGreater: + case JumpIfFloatGreater: conditional(c, bgt(0), target); next = c->code.length(); @@ -1734,6 +1734,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, break; case JumpIfLessOrEqual: + case JumpIfFloatLessOrEqual: conditional(c, blt(0), target); next = c->code.length(); @@ -1744,6 +1745,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, break; case JumpIfGreaterOrEqual: + case JumpIfFloatGreaterOrEqual: conditional(c, bgt(0), target); next = c->code.length(); @@ -2326,13 +2328,16 @@ class MyArchitecture: public Assembler::Architecture { break; case Absolute: + *thunk = true; + break; + case FloatAbsolute: case FloatSquareRoot: case FloatNegate: case Float2Float: if (vfpSupported()) { *aTypeMask = (1 << RegisterOperand); - *aRegisterMask = FPR_MASK; + *aRegisterMask = FPR_MASK64; } else { *thunk = true; } @@ -2341,7 +2346,7 @@ class MyArchitecture: public Assembler::Architecture { case Float2Int: if (vfpSupported() && bSize == 4 && aSize == 4) { *aTypeMask = (1 << RegisterOperand); - *aRegisterMask = FPR_MASK; + *aRegisterMask = FPR_MASK64; } else { *thunk = true; } @@ -2350,7 +2355,7 @@ class MyArchitecture: public Assembler::Architecture { case Int2Float: if (vfpSupported() && aSize == 4 && bSize == 4) { *aTypeMask = (1 << RegisterOperand); - *aRegisterMask = FPR_MASK; + *aRegisterMask = GPR_MASK64; } else { *thunk = true; } @@ -2363,8 +2368,8 @@ class MyArchitecture: public Assembler::Architecture { virtual void planDestination (BinaryOperation op, - unsigned, uint8_t, uint64_t, - unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask) + unsigned, uint8_t aTypeMask, uint64_t, + unsigned , uint8_t* bTypeMask, uint64_t* bRegisterMask) { *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); *bRegisterMask = ~static_cast(0); @@ -2374,6 +2379,26 @@ class MyArchitecture: public Assembler::Architecture { *bTypeMask = (1 << RegisterOperand); break; + case FloatAbsolute: + case FloatSquareRoot: + case FloatNegate: + case Float2Float: + case Int2Float: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = FPR_MASK64; + break; + + case Float2Int: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = GPR_MASK64; + break; + + case Move: + if (!(aTypeMask & 1 << RegisterOperand)) { + *bTypeMask = 1 << RegisterOperand; + } + break; + default: break; } @@ -2382,7 +2407,7 @@ class MyArchitecture: public Assembler::Architecture { virtual void planMove (unsigned, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, - uint8_t dstTypeMask, uint64_t) + uint8_t dstTypeMask, uint64_t dstRegisterMask) { *srcTypeMask = ~0; *srcRegisterMask = ~static_cast(0); @@ -2394,6 +2419,11 @@ class MyArchitecture: public Assembler::Architecture { // can't move directly from memory or constant to memory *srcTypeMask = 1 << RegisterOperand; *tmpTypeMask = 1 << RegisterOperand; + *tmpRegisterMask = GPR_MASK64; + } else if (dstTypeMask & 1 << RegisterOperand && + dstRegisterMask & FPR_MASK) { + *srcTypeMask = *tmpTypeMask = 1 << RegisterOperand | + 1 << MemoryOperand; *tmpRegisterMask = ~static_cast(0); } } @@ -2429,6 +2459,7 @@ class MyArchitecture: public Assembler::Architecture { case Divide: case Remainder: + case FloatRemainder: *thunk = true; break; @@ -2436,7 +2467,14 @@ class MyArchitecture: public Assembler::Architecture { case FloatSubtract: case FloatMultiply: case FloatDivide: - case FloatRemainder: + if (vfpSupported()) { + *aTypeMask = *bTypeMask = (1 << RegisterOperand); + *aRegisterMask = *bRegisterMask = FPR_MASK64; + } else { + *thunk = true; + } + break; + case JumpIfFloatEqual: case JumpIfFloatNotEqual: case JumpIfFloatLess: @@ -2449,7 +2487,7 @@ class MyArchitecture: public Assembler::Architecture { case JumpIfFloatGreaterOrEqualOrUnordered: if (vfpSupported()) { *aTypeMask = *bTypeMask = (1 << RegisterOperand); - *aRegisterMask = *bRegisterMask = FPR_MASK; + *aRegisterMask = *bRegisterMask = FPR_MASK64; } else { *thunk = true; }