single-precision hardfloat emission working

anything involving doubles (including conversions and thunking from singles) is still broken
2025-04-18 08:10:47 +00:00 · 2012-06-04 12:39:53 -06:00 · 2012-06-04 12:39:53 -06:00 · 11d750af70
commit 11d750af70
parent 714aebfec1
1 changed files with 96 additions and 58 deletions
--- a/src/arm.cpp
+++ b/src/arm.cpp
@ -57,8 +57,8 @@ inline int SWAP(int cond, int B, int Rn, int Rd, int Rm)
 { return cond<<28 | 1<<24 | B<<22 | Rn<<16 | Rd<<12 | 9<<4 | Rm; }
 inline int COOP(int cond, int opcode_1, int CRn, int CRd, int cp_num, int opcode_2, int CRm)
 { return cond<<28 | 0xe<<24 | opcode_1<<20 | CRn<<16 | CRd<<12 | cp_num<<8 | opcode_2<<5 | CRm; }
-inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset)
-{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff); }
+inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset) // offset is in words, not bytes
+{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff)>>2; }
 inline int COREG(int cond, int opcode_1, int L, int CRn, int Rd, int cp_num, int opcode_2, int CRm)
 { return cond<<28 | 0xe<<24 | opcode_1<<21 | L<<20 | CRn<<16 | Rd<<12 | cp_num<<8 | opcode_2<<5 | 1<<4 | CRm; }
 inline int COREG2(int cond, int L, int Rn, int Rd, int cp_num, int opcode, int CRm)
@ -270,14 +270,17 @@ const int N_GPRS = 16;
 const int N_FPRS = 16;
 const uint32_t GPR_MASK = 0xffff;
 const uint32_t FPR_MASK = 0xffff0000;
+// for source-to-destination masks
+const uint64_t GPR_MASK64 = GPR_MASK | (uint64_t)GPR_MASK << 32;
+// making the following const somehow breaks debug symbol output in GDB
+/* const */ uint64_t FPR_MASK64 = FPR_MASK | (uint64_t)FPR_MASK << 32;

 inline bool isFpr(Assembler::Register* reg) {
  return reg->low >= N_GPRS;
 }

-inline int toFpr(Assembler::Register* reg) {
-  return reg->low - N_GPRS;
-}
+inline int fpr(Assembler::Register* reg) { return reg->low - N_GPRS; }
+inline int fpr(int reg) { return reg - N_GPRS; }

 const unsigned FrameHeaderSize = 1;

@ -584,7 +587,7 @@ using namespace isa;
 inline void emit(Context* con, int code) { con->code.append4(code); }

 inline int newTemp(Context* con) {
-  return con->client->acquireTemporary();
+  return con->client->acquireTemporary(GPR_MASK);
 }

 inline int newTemp(Context* con, unsigned mask) {
@ -920,13 +923,12 @@ moveRR(Context* con, unsigned srcSize, Assembler::Register* src,
  bool srcIsFpr = isFpr(src);
  bool dstIsFpr = isFpr(dst);
  if (srcIsFpr || dstIsFpr) { // floating-point register(s) involved
-    /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- %d\n", dst->low, src->low);
    // FPR to FPR
-    if (srcIsFpr && dstIsFpr) emit(con, fcpys(toFpr(dst), toFpr(src)));
+    if (srcIsFpr && dstIsFpr) emit(con, fcpys(fpr(dst), fpr(src)));
    // FPR to GPR
-    else if (srcIsFpr)        emit(con, fmrs(dst->low, toFpr(src)));
+    else if (srcIsFpr)        emit(con, fmrs(dst->low, fpr(src)));
    // GPR to FPR
-    else                      emit(con, fmsr(toFpr(dst), src->low));
+    else                      emit(con, fmsr(fpr(dst), src->low));
    return;
  }

@ -990,7 +992,6 @@ moveCR2(Context* con, unsigned size, Assembler::Constant* src,
 {
  if (isFpr(dst)) { // floating-point
    Assembler::Register tmp = makeTemp(con);
-    /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- 0x%llx\n", tmp.low, getValue(src));
    moveCR2(con, size, src, &tmp, 0);
    moveRR(con, size, &tmp, size, dst);
    freeTemp(con, tmp);
@ -1096,88 +1097,86 @@ void multiplyR(Context* con, unsigned size, Assembler::Register* a, Assembler::R

 void floatAbsoluteRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
  if (size == 8) {
-    emit(con, fabsd(b->low, a->low));
+    emit(con, fabsd(fpr(b), fpr(a)));
  } else {
-    emit(con, fabss(b->low, a->low));
+    emit(con, fabss(fpr(b), fpr(a)));
  }
 }

 void floatNegateRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
  if (size == 8) {
-    /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> invalid 64-bit Scheiße\n");
-    emit(con, fnegd(b->low, a->low));
+    emit(con, fnegd(fpr(b), fpr(a)));
  } else {
-    /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- -%d\n", b->low, a->low);
-    emit(con, fnegs(b->low, a->low));
+    emit(con, fnegs(fpr(b), fpr(a)));
  }
 }

 void float2FloatRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
  if (size == 8) {
-    emit(con, fcvtsd(b->low, a->low));
+    emit(con, fcvtsd(fpr(b), fpr(a)));
  } else {
-    emit(con, fcvtds(b->low, a->low));
+    emit(con, fcvtds(fpr(b), fpr(a)));
  }
 }

 void float2IntRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
  int tmp = newTemp(con, FPR_MASK);
+  int ftmp = fpr(tmp);
  if (size == 8) { // double to int
-    emit(con, ftosid(tmp, a->low));
+    emit(con, ftosizd(ftmp, fpr(a)));
  } else {         // float to int
-    emit(con, ftosis(tmp, a->low));
+    emit(con, ftosizs(ftmp, fpr(a)));
  }                // else thunked
-  emit(con, fmrs(b->low, tmp));
+  emit(con, fmrs(b->low, ftmp));
  freeTemp(con, tmp);
 }

 void int2FloatRR(Context* con, unsigned UNUSED, Assembler::Register* a, unsigned size, Assembler::Register* b) {
-  emit(con, fmsr(b->low, a->low));
+  emit(con, fmsr(fpr(b), a->low));
  if (size == 8) { // int to double
-    emit(con, fsitod(b->low, b->low));
+    emit(con, fsitod(fpr(b), fpr(b)));
  } else {         // int to float
-    emit(con, fsitos(b->low, b->low));
+    emit(con, fsitos(fpr(b), fpr(b)));
  }                // else thunked
 }

 void floatSqrtRR(Context* con, unsigned size, Assembler::Register* a, unsigned UNUSED, Assembler::Register* b) {
-  if (size == 8) { 
-    emit(con, fsqrtd(b->low, a->low));
+  if (size == 8) {
+    emit(con, fsqrtd(fpr(b), fpr(a)));
  } else {
-    emit(con, fsqrts(b->low, a->low));
+    emit(con, fsqrts(fpr(b), fpr(a)));
  }
 }

 void floatAddR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) {
-  if (size == 8) { 
-    emit(con, faddd(t->low, a->low, b->low));
+  if (size == 8) {
+    emit(con, faddd(fpr(t), fpr(a), fpr(b)));
  } else {
-    fprintf(stderr, "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ %d <- %d + %d\n", toFpr(t), toFpr(a), toFpr(b));
-    emit(con, fadds(toFpr(t), toFpr(a), toFpr(b)));
+    emit(con, fadds(fpr(t), fpr(a), fpr(b)));
  }
 }

 void floatSubtractR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) {
-  if (size == 8) { 
-    emit(con, fsubd(t->low, a->low, b->low));
+  if (size == 8) {
+    emit(con, fsubd(fpr(t), fpr(b), fpr(a)));
  } else {
-    emit(con, fsubs(t->low, a->low, b->low));
+    emit(con, fsubs(fpr(t), fpr(b), fpr(a)));
  }
 }

 void floatMultiplyR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) {
  if (size == 8) {
-    emit(con, fmuld(t->low, a->low, b->low));
+    emit(con, fmuld(fpr(t), fpr(a), fpr(b)));
  } else {
-    emit(con, fmuls(t->low, a->low, b->low));
+    emit(con, fmuls(fpr(t), fpr(a), fpr(b)));
  }
 }

 void floatDivideR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) {
  if (size == 8) { 
-    emit(con, fdivd(t->low, a->low, b->low));
+    emit(con, fdivd(fpr(t), fpr(b), fpr(a)));
  } else {
-    emit(con, fdivs(t->low, a->low, b->low));
+    emit(con, fdivs(fpr(t), fpr(b), fpr(a)));
  }
 }

@ -1242,12 +1241,11 @@ store(Context* con, unsigned size, Assembler::Register* src,

    if (isFpr(src)) { // floating-point store
      if (size == 4) {
-        /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> fpr store base-indexed\n");
        Assembler::Register base_(base),
                            normalized_(normalized),
                            absAddr = makeTemp(con);
        addR(con, size, &base_, &normalized_, &absAddr);
-        emit(con, fsts(toFpr(src), absAddr.low));
+        emit(con, fsts(fpr(src), absAddr.low));
        freeTemp(con, absAddr);
      }
      else abort(con);
@ -1281,8 +1279,7 @@ store(Context* con, unsigned size, Assembler::Register* src,
             or (size != 2 and abs(offset) == (abs(offset) & 0xFFF)))
  {
    if (isFpr(src)) {
-      /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> [%d + 0x%x] <- %d\n", base, offset, src->low);
-      if (size == 4) emit(con, fsts(toFpr(src), base, offset));
+      if (size == 4) emit(con, fsts(fpr(src), base, offset));
      else           abort(con);
    } else {
      switch (size) {
@ -1358,12 +1355,11 @@ load(Context* con, unsigned srcSize, int base, int offset, int index,

    if (isFpr(dst)) { // floating-point store
      if (srcSize == 4) {
-        /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> fpr load base-indexed\n");
        Assembler::Register base_(base),
                            normalized_(normalized),
                            absAddr = makeTemp(con);
        addR(con, srcSize, &base_, &normalized_, &absAddr);
-        emit(con, flds(toFpr(dst), absAddr.low));
+        emit(con, flds(fpr(dst), absAddr.low));
        freeTemp(con, absAddr);
      }
      else abort(con);
@ -1414,8 +1410,7 @@ load(Context* con, unsigned srcSize, int base, int offset, int index,
                 and abs(offset) == (abs(offset) & 0xFFF)))
  {
    if (isFpr(dst)) {
-      /**/fprintf(stderr, ">>>>>>>>>>>>>>>>>>>>>>>> %d <- [%d + 0x%x]\n", dst->low, base, offset);
-      if (srcSize == 4) emit(con, flds(toFpr(dst), base, offset));
+      if (srcSize == 4) emit(con, flds(fpr(dst), base, offset));
      else           abort(con);
    } else {
      switch (srcSize) {
@ -1581,14 +1576,15 @@ compareRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a,
          unsigned bSize UNUSED, Assembler::Register* b)
 {
  assert(c, aSize == 4 and bSize == 4);
-  assert(c, b->low != a->low);
  assert(c, !(isFpr(a) ^ isFpr(b)));

  if (isFpr(a)) {
-    emit(c, fcmps(toFpr(b), toFpr(a)));
+    emit(c, fcmps(fpr(b), fpr(a)));
    emit(c, fmstat());
+  } else {
+    assert(c, b->low != a->low);
+    emit(c, cmp(b->low, a->low));
  }
-  else emit(c, cmp(b->low, a->low));
 }

 void
@ -1699,6 +1695,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
  
  switch (op) {
  case JumpIfEqual:
+  case JumpIfFloatEqual:
    next = c->code.length();
    emit(c, bne(0));

@ -1707,6 +1704,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
    break;

  case JumpIfNotEqual:
+  case JumpIfFloatNotEqual:
    conditional(c, bne(0), target);

    compareSigned(c, 4, al, 4, bl);
@ -1714,6 +1712,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
    break;

  case JumpIfLess:
+  case JumpIfFloatLess:
    conditional(c, blt(0), target);

    next = c->code.length();
@ -1724,6 +1723,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
    break;

  case JumpIfGreater:
+  case JumpIfFloatGreater:
    conditional(c, bgt(0), target);

    next = c->code.length();
@ -1734,6 +1734,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
    break;

  case JumpIfLessOrEqual:
+  case JumpIfFloatLessOrEqual:
    conditional(c, blt(0), target);

    next = c->code.length();
@ -1744,6 +1745,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
    break;

  case JumpIfGreaterOrEqual:
+  case JumpIfFloatGreaterOrEqual:
    conditional(c, bgt(0), target);

    next = c->code.length();
@ -2326,13 +2328,16 @@ class MyArchitecture: public Assembler::Architecture {
      break;

    case Absolute:
+      *thunk = true;
+      break;
+
    case FloatAbsolute:
    case FloatSquareRoot:
    case FloatNegate:
    case Float2Float:
      if (vfpSupported()) {
        *aTypeMask = (1 << RegisterOperand);
-        *aRegisterMask = FPR_MASK;
+        *aRegisterMask = FPR_MASK64;
      } else {
        *thunk = true;
      }
@ -2341,7 +2346,7 @@ class MyArchitecture: public Assembler::Architecture {
    case Float2Int:
      if (vfpSupported() && bSize == 4 && aSize == 4) {
        *aTypeMask = (1 << RegisterOperand);
-        *aRegisterMask = FPR_MASK;
+        *aRegisterMask = FPR_MASK64;
      } else {
        *thunk = true;
      }
@ -2350,7 +2355,7 @@ class MyArchitecture: public Assembler::Architecture {
    case Int2Float:
      if (vfpSupported() && aSize == 4 && bSize == 4) {
        *aTypeMask = (1 << RegisterOperand);
-        *aRegisterMask = FPR_MASK;
+        *aRegisterMask = GPR_MASK64;
      } else {
        *thunk = true;
      }
@ -2363,8 +2368,8 @@ class MyArchitecture: public Assembler::Architecture {
  
  virtual void planDestination
  (BinaryOperation op,
-   unsigned, uint8_t, uint64_t,
-   unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask)
+   unsigned, uint8_t aTypeMask, uint64_t,
+   unsigned , uint8_t* bTypeMask, uint64_t* bRegisterMask)
  {
    *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
    *bRegisterMask = ~static_cast<uint64_t>(0);
@ -2374,6 +2379,26 @@ class MyArchitecture: public Assembler::Architecture {
      *bTypeMask = (1 << RegisterOperand);
      break;

+    case FloatAbsolute:
+    case FloatSquareRoot:
+    case FloatNegate:
+    case Float2Float:
+    case Int2Float:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = FPR_MASK64;
+      break;
+
+    case Float2Int:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = GPR_MASK64;
+      break;
+
+    case Move:
+      if (!(aTypeMask & 1 << RegisterOperand)) {
+        *bTypeMask = 1 << RegisterOperand;
+      }
+      break;
+
    default:
      break;
    }
@ -2382,7 +2407,7 @@ class MyArchitecture: public Assembler::Architecture {
  virtual void planMove
  (unsigned, uint8_t* srcTypeMask, uint64_t* srcRegisterMask,
   uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask,
-   uint8_t dstTypeMask, uint64_t)
+   uint8_t dstTypeMask, uint64_t dstRegisterMask)
  {
    *srcTypeMask = ~0;
    *srcRegisterMask = ~static_cast<uint64_t>(0);
@ -2394,6 +2419,11 @@ class MyArchitecture: public Assembler::Architecture {
      // can't move directly from memory or constant to memory
      *srcTypeMask = 1 << RegisterOperand;
      *tmpTypeMask = 1 << RegisterOperand;
+      *tmpRegisterMask = GPR_MASK64;
+    } else if (dstTypeMask & 1 << RegisterOperand &&
+               dstRegisterMask & FPR_MASK) {
+      *srcTypeMask = *tmpTypeMask = 1 << RegisterOperand |
+                                    1 << MemoryOperand;
      *tmpRegisterMask = ~static_cast<uint64_t>(0);
    }
  }
@ -2429,6 +2459,7 @@ class MyArchitecture: public Assembler::Architecture {

    case Divide:
    case Remainder:
+    case FloatRemainder:
      *thunk = true;
      break;

@ -2436,7 +2467,14 @@ class MyArchitecture: public Assembler::Architecture {
    case FloatSubtract:
    case FloatMultiply:
    case FloatDivide:
-    case FloatRemainder:
+      if (vfpSupported()) {
+        *aTypeMask = *bTypeMask = (1 << RegisterOperand);
+        *aRegisterMask = *bRegisterMask = FPR_MASK64;
+      } else {
+        *thunk = true;
+      }    
+      break;
+
    case JumpIfFloatEqual:
    case JumpIfFloatNotEqual:
    case JumpIfFloatLess:
@ -2449,7 +2487,7 @@ class MyArchitecture: public Assembler::Architecture {
    case JumpIfFloatGreaterOrEqualOrUnordered:
      if (vfpSupported()) {
        *aTypeMask = *bTypeMask = (1 << RegisterOperand);
-        *aRegisterMask = *bRegisterMask = FPR_MASK;
+        *aRegisterMask = *bRegisterMask = FPR_MASK64;
      } else {
        *thunk = true;
      }