single-precision hardfloat emission working

anything involving doubles (including conversions and thunking from
singles) is still broken
This commit is contained in:
JET 2012-06-04 12:39:53 -06:00
parent 714aebfec1
commit 11d750af70

View File

@ -57,8 +57,8 @@ inline int SWAP(int cond, int B, int Rn, int Rd, int Rm)
{ return cond<<28 | 1<<24 | B<<22 | Rn<<16 | Rd<<12 | 9<<4 | Rm; }
inline int COOP(int cond, int opcode_1, int CRn, int CRd, int cp_num, int opcode_2, int CRm)
{ return cond<<28 | 0xe<<24 | opcode_1<<20 | CRn<<16 | CRd<<12 | cp_num<<8 | opcode_2<<5 | CRm; }
inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset)
{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff); }
inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset) // offset is in words, not bytes
{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff)>>2; }
inline int COREG(int cond, int opcode_1, int L, int CRn, int Rd, int cp_num, int opcode_2, int CRm)
{ return cond<<28 | 0xe<<24 | opcode_1<<21 | L<<20 | CRn<<16 | Rd<<12 | cp_num<<8 | opcode_2<<5 | 1<<4 | CRm; }
inline int COREG2(int cond, int L, int Rn, int Rd, int cp_num, int opcode, int CRm)
@ -270,14 +270,17 @@ const int N_GPRS = 16;
const int N_FPRS = 16;
const uint32_t GPR_MASK = 0xffff;
const uint32_t FPR_MASK = 0xffff0000;
// for source-to-destination masks
const uint64_t GPR_MASK64 = GPR_MASK | (uint64_t)GPR_MASK << 32;
// making the following const somehow breaks debug symbol output in GDB
/* const */ uint64_t FPR_MASK64 = FPR_MASK | (uint64_t)FPR_MASK << 32;
inline bool isFpr(Assembler::Register* reg) {
return reg->low >= N_GPRS;
}
inline int toFpr(Assembler::Register* reg) {
return reg->low - N_GPRS;
}
inline int fpr(Assembler::Register* reg) { return reg->low - N_GPRS; }
inline int fpr(int reg) { return reg - N_GPRS; }
const unsigned FrameHeaderSize = 1;
@ -584,7 +587,7 @@ using namespace isa;
inline void emit(Context* con, int code) { con->code.append4(code); }
inline int newTemp(Context* con) {
return con->client->acquireTemporary();
return con->client->acquireTemporary(GPR_MASK);
}
inline int newTemp(Context* con, unsigned mask) {
@ -920,13 +923,12 @@ moveRR(Context* con, unsigned srcSize, Assembler::Register* src,
bool srcIsFpr = isFpr(src);
bool dstIsFpr = isFpr(dst);
if (srcIsFpr || dstIsFpr) { // floating-point register(s) involved
/**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- %d\n", dst->low, src->low);
// FPR to FPR
if (srcIsFpr && dstIsFpr) emit(con, fcpys(toFpr(dst), toFpr(src)));
if (srcIsFpr && dstIsFpr) emit(con, fcpys(fpr(dst), fpr(src)));
// FPR to GPR
else if (srcIsFpr) emit(con, fmrs(dst->low, toFpr(src)));
else if (srcIsFpr) emit(con, fmrs(dst->low, fpr(src)));
// GPR to FPR
else emit(con, fmsr(toFpr(dst), src->low));
else emit(con, fmsr(fpr(dst), src->low));
return;
}
@ -990,7 +992,6 @@ moveCR2(Context* con, unsigned size, Assembler::Constant* src,
{
if (isFpr(dst)) { // floating-point
Assembler::Register tmp = makeTemp(con);
/**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- 0x%llx\n", tmp.low, getValue(src));
moveCR2(con, size, src, &tmp, 0);
moveRR(con, size, &tmp, size, dst);
freeTemp(con, tmp);
@ -1096,88 +1097,86 @@ void multiplyR(Context* con, unsigned size, Assembler::Register* a, Assembler::R
void floatAbsoluteRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
if (size == 8) {
emit(con, fabsd(b->low, a->low));
emit(con, fabsd(fpr(b), fpr(a)));
} else {
emit(con, fabss(b->low, a->low));
emit(con, fabss(fpr(b), fpr(a)));
}
}
void floatNegateRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
if (size == 8) {
/**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> invalid 64-bit Scheiße\n");
emit(con, fnegd(b->low, a->low));
emit(con, fnegd(fpr(b), fpr(a)));
} else {
/**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- -%d\n", b->low, a->low);
emit(con, fnegs(b->low, a->low));
emit(con, fnegs(fpr(b), fpr(a)));
}
}
void float2FloatRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
if (size == 8) {
emit(con, fcvtsd(b->low, a->low));
emit(con, fcvtsd(fpr(b), fpr(a)));
} else {
emit(con, fcvtds(b->low, a->low));
emit(con, fcvtds(fpr(b), fpr(a)));
}
}
void float2IntRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
int tmp = newTemp(con, FPR_MASK);
int ftmp = fpr(tmp);
if (size == 8) { // double to int
emit(con, ftosid(tmp, a->low));
emit(con, ftosizd(ftmp, fpr(a)));
} else { // float to int
emit(con, ftosis(tmp, a->low));
emit(con, ftosizs(ftmp, fpr(a)));
} // else thunked
emit(con, fmrs(b->low, tmp));
emit(con, fmrs(b->low, ftmp));
freeTemp(con, tmp);
}
void int2FloatRR(Context* con, unsigned UNUSED, Assembler::Register* a, unsigned size, Assembler::Register* b) {
emit(con, fmsr(b->low, a->low));
emit(con, fmsr(fpr(b), a->low));
if (size == 8) { // int to double
emit(con, fsitod(b->low, b->low));
emit(con, fsitod(fpr(b), fpr(b)));
} else { // int to float
emit(con, fsitos(b->low, b->low));
emit(con, fsitos(fpr(b), fpr(b)));
} // else thunked
}
void floatSqrtRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
if (size == 8) {
emit(con, fsqrtd(b->low, a->low));
if (size == 8) {
emit(con, fsqrtd(fpr(b), fpr(a)));
} else {
emit(con, fsqrts(b->low, a->low));
emit(con, fsqrts(fpr(b), fpr(a)));
}
}
void floatAddR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) {
if (size == 8) {
emit(con, faddd(t->low, a->low, b->low));
if (size == 8) {
emit(con, faddd(fpr(t), fpr(a), fpr(b)));
} else {
fprintf(stderr, "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ %d <- %d + %d\n", toFpr(t), toFpr(a), toFpr(b));
emit(con, fadds(toFpr(t), toFpr(a), toFpr(b)));
emit(con, fadds(fpr(t), fpr(a), fpr(b)));
}
}
void floatSubtractR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) {
if (size == 8) {
emit(con, fsubd(t->low, a->low, b->low));
if (size == 8) {
emit(con, fsubd(fpr(t), fpr(b), fpr(a)));
} else {
emit(con, fsubs(t->low, a->low, b->low));
emit(con, fsubs(fpr(t), fpr(b), fpr(a)));
}
}
void floatMultiplyR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) {
if (size == 8) {
emit(con, fmuld(t->low, a->low, b->low));
emit(con, fmuld(fpr(t), fpr(a), fpr(b)));
} else {
emit(con, fmuls(t->low, a->low, b->low));
emit(con, fmuls(fpr(t), fpr(a), fpr(b)));
}
}
void floatDivideR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) {
if (size == 8) {
emit(con, fdivd(t->low, a->low, b->low));
emit(con, fdivd(fpr(t), fpr(b), fpr(a)));
} else {
emit(con, fdivs(t->low, a->low, b->low));
emit(con, fdivs(fpr(t), fpr(b), fpr(a)));
}
}
@ -1242,12 +1241,11 @@ store(Context* con, unsigned size, Assembler::Register* src,
if (isFpr(src)) { // floating-point store
if (size == 4) {
/**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> fpr store base-indexed\n");
Assembler::Register base_(base),
normalized_(normalized),
absAddr = makeTemp(con);
addR(con, size, &base_, &normalized_, &absAddr);
emit(con, fsts(toFpr(src), absAddr.low));
emit(con, fsts(fpr(src), absAddr.low));
freeTemp(con, absAddr);
}
else abort(con);
@ -1281,8 +1279,7 @@ store(Context* con, unsigned size, Assembler::Register* src,
or (size != 2 and abs(offset) == (abs(offset) & 0xFFF)))
{
if (isFpr(src)) {
/**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> [%d + 0x%x] <- %d\n", base, offset, src->low);
if (size == 4) emit(con, fsts(toFpr(src), base, offset));
if (size == 4) emit(con, fsts(fpr(src), base, offset));
else abort(con);
} else {
switch (size) {
@ -1358,12 +1355,11 @@ load(Context* con, unsigned srcSize, int base, int offset, int index,
if (isFpr(dst)) { // floating-point store
if (srcSize == 4) {
/**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> fpr load base-indexed\n");
Assembler::Register base_(base),
normalized_(normalized),
absAddr = makeTemp(con);
addR(con, srcSize, &base_, &normalized_, &absAddr);
emit(con, flds(toFpr(dst), absAddr.low));
emit(con, flds(fpr(dst), absAddr.low));
freeTemp(con, absAddr);
}
else abort(con);
@ -1414,8 +1410,7 @@ load(Context* con, unsigned srcSize, int base, int offset, int index,
and abs(offset) == (abs(offset) & 0xFFF)))
{
if (isFpr(dst)) {
/**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- [%d + 0x%x]\n", dst->low, base, offset);
if (srcSize == 4) emit(con, flds(toFpr(dst), base, offset));
if (srcSize == 4) emit(con, flds(fpr(dst), base, offset));
else abort(con);
} else {
switch (srcSize) {
@ -1581,14 +1576,15 @@ compareRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
assert(c, aSize == 4 and bSize == 4);
assert(c, b->low != a->low);
assert(c, !(isFpr(a) ^ isFpr(b)));
if (isFpr(a)) {
emit(c, fcmps(toFpr(b), toFpr(a)));
emit(c, fcmps(fpr(b), fpr(a)));
emit(c, fmstat());
} else {
assert(c, b->low != a->low);
emit(c, cmp(b->low, a->low));
}
else emit(c, cmp(b->low, a->low));
}
void
@ -1699,6 +1695,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
switch (op) {
case JumpIfEqual:
case JumpIfFloatEqual:
next = c->code.length();
emit(c, bne(0));
@ -1707,6 +1704,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
break;
case JumpIfNotEqual:
case JumpIfFloatNotEqual:
conditional(c, bne(0), target);
compareSigned(c, 4, al, 4, bl);
@ -1714,6 +1712,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
break;
case JumpIfLess:
case JumpIfFloatLess:
conditional(c, blt(0), target);
next = c->code.length();
@ -1724,6 +1723,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
break;
case JumpIfGreater:
case JumpIfFloatGreater:
conditional(c, bgt(0), target);
next = c->code.length();
@ -1734,6 +1734,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
break;
case JumpIfLessOrEqual:
case JumpIfFloatLessOrEqual:
conditional(c, blt(0), target);
next = c->code.length();
@ -1744,6 +1745,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
break;
case JumpIfGreaterOrEqual:
case JumpIfFloatGreaterOrEqual:
conditional(c, bgt(0), target);
next = c->code.length();
@ -2326,13 +2328,16 @@ class MyArchitecture: public Assembler::Architecture {
break;
case Absolute:
*thunk = true;
break;
case FloatAbsolute:
case FloatSquareRoot:
case FloatNegate:
case Float2Float:
if (vfpSupported()) {
*aTypeMask = (1 << RegisterOperand);
*aRegisterMask = FPR_MASK;
*aRegisterMask = FPR_MASK64;
} else {
*thunk = true;
}
@ -2341,7 +2346,7 @@ class MyArchitecture: public Assembler::Architecture {
case Float2Int:
if (vfpSupported() && bSize == 4 && aSize == 4) {
*aTypeMask = (1 << RegisterOperand);
*aRegisterMask = FPR_MASK;
*aRegisterMask = FPR_MASK64;
} else {
*thunk = true;
}
@ -2350,7 +2355,7 @@ class MyArchitecture: public Assembler::Architecture {
case Int2Float:
if (vfpSupported() && aSize == 4 && bSize == 4) {
*aTypeMask = (1 << RegisterOperand);
*aRegisterMask = FPR_MASK;
*aRegisterMask = GPR_MASK64;
} else {
*thunk = true;
}
@ -2363,8 +2368,8 @@ class MyArchitecture: public Assembler::Architecture {
virtual void planDestination
(BinaryOperation op,
unsigned, uint8_t, uint64_t,
unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask)
unsigned, uint8_t aTypeMask, uint64_t,
unsigned , uint8_t* bTypeMask, uint64_t* bRegisterMask)
{
*bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
*bRegisterMask = ~static_cast<uint64_t>(0);
@ -2374,6 +2379,26 @@ class MyArchitecture: public Assembler::Architecture {
*bTypeMask = (1 << RegisterOperand);
break;
case FloatAbsolute:
case FloatSquareRoot:
case FloatNegate:
case Float2Float:
case Int2Float:
*bTypeMask = (1 << RegisterOperand);
*bRegisterMask = FPR_MASK64;
break;
case Float2Int:
*bTypeMask = (1 << RegisterOperand);
*bRegisterMask = GPR_MASK64;
break;
case Move:
if (!(aTypeMask & 1 << RegisterOperand)) {
*bTypeMask = 1 << RegisterOperand;
}
break;
default:
break;
}
@ -2382,7 +2407,7 @@ class MyArchitecture: public Assembler::Architecture {
virtual void planMove
(unsigned, uint8_t* srcTypeMask, uint64_t* srcRegisterMask,
uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask,
uint8_t dstTypeMask, uint64_t)
uint8_t dstTypeMask, uint64_t dstRegisterMask)
{
*srcTypeMask = ~0;
*srcRegisterMask = ~static_cast<uint64_t>(0);
@ -2394,6 +2419,11 @@ class MyArchitecture: public Assembler::Architecture {
// can't move directly from memory or constant to memory
*srcTypeMask = 1 << RegisterOperand;
*tmpTypeMask = 1 << RegisterOperand;
*tmpRegisterMask = GPR_MASK64;
} else if (dstTypeMask & 1 << RegisterOperand &&
dstRegisterMask & FPR_MASK) {
*srcTypeMask = *tmpTypeMask = 1 << RegisterOperand |
1 << MemoryOperand;
*tmpRegisterMask = ~static_cast<uint64_t>(0);
}
}
@ -2429,6 +2459,7 @@ class MyArchitecture: public Assembler::Architecture {
case Divide:
case Remainder:
case FloatRemainder:
*thunk = true;
break;
@ -2436,7 +2467,14 @@ class MyArchitecture: public Assembler::Architecture {
case FloatSubtract:
case FloatMultiply:
case FloatDivide:
case FloatRemainder:
if (vfpSupported()) {
*aTypeMask = *bTypeMask = (1 << RegisterOperand);
*aRegisterMask = *bRegisterMask = FPR_MASK64;
} else {
*thunk = true;
}
break;
case JumpIfFloatEqual:
case JumpIfFloatNotEqual:
case JumpIfFloatLess:
@ -2449,7 +2487,7 @@ class MyArchitecture: public Assembler::Architecture {
case JumpIfFloatGreaterOrEqualOrUnordered:
if (vfpSupported()) {
*aTypeMask = *bTypeMask = (1 << RegisterOperand);
*aRegisterMask = *bRegisterMask = FPR_MASK;
*aRegisterMask = *bRegisterMask = FPR_MASK64;
} else {
*thunk = true;
}