added floating point support, split plan function

This commit is contained in:
Josh warner 2009-08-06 10:26:22 -06:00
parent 61bc729974
commit a2e639a2d2

View File

@ -13,10 +13,15 @@
#include "assembler.h" #include "assembler.h"
#include "vector.h" #include "vector.h"
#include "machine.h"
#define CAST1(x) reinterpret_cast<UnaryOperationType>(x) #define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
#define CAST2(x) reinterpret_cast<BinaryOperationType>(x) #define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
const bool DebugSSE = false;
const bool EnableSSE = true;
const bool EnableSSE2 = true;
using namespace vm; using namespace vm;
namespace { namespace {
@ -40,6 +45,28 @@ enum {
r15 = 15, r15 = 15,
}; };
enum {
xmm0 = r15 + 1,
xmm1,
xmm2,
xmm3,
xmm4,
xmm5,
xmm6,
xmm7,
xmm8,
xmm9,
xmm10,
xmm11,
xmm12,
xmm13,
xmm14,
xmm15,
};
const unsigned GeneralRegisterMask = BytesPerWord == 4 ? 0x000000ff : 0x0000ffff;
const unsigned FloatRegisterMask = BytesPerWord == 4 ? 0x00ff0000 : 0xffff0000;
const unsigned FrameHeaderSize = 2; const unsigned FrameHeaderSize = 2;
inline bool inline bool
@ -399,6 +426,35 @@ padding(AlignmentPadding* p, unsigned start, unsigned offset,
return padding; return padding;
} }
extern "C"
bool detectFeature(unsigned ecx, unsigned edx);
inline bool
supportsSSE()
{
static int supported = -1;
if(supported == -1) {
supported = EnableSSE && detectFeature(0, 0x2000000);
if(DebugSSE) {
fprintf(stderr, "sse %sdetected.\n", supported ? "" : "not ");
}
}
return supported;
}
inline bool
supportsSSE2()
{
static int supported = -1;
if(supported == -1) {
supported = EnableSSE2 && detectFeature(0, 0x4000000);
if(DebugSSE) {
fprintf(stderr, "sse2 %sdetected.\n", supported ? "" : "not ");
}
}
return supported;
}
#define REX_W 0x48 #define REX_W 0x48
#define REX_R 0x44 #define REX_R 0x44
#define REX_X 0x42 #define REX_X 0x42
@ -503,6 +559,12 @@ inline void opcode(Context* c, uint8_t op1, uint8_t op2) {
c->code.append(op2); c->code.append(op2);
} }
inline void opcode(Context* c, uint8_t op1, uint8_t op2, uint8_t op3) {
c->code.append(op1);
c->code.append(op2);
c->code.append(op3);
}
void void
return_(Context* c) return_(Context* c)
{ {
@ -667,6 +729,14 @@ jumpIfLessOrEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
conditional(c, 0x8e, a); conditional(c, 0x8e, a);
} }
void
jumpIfUnorderedC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
{
assert(c, size == BytesPerWord);
conditional(c, 0x8a, a);
}
void void
longJumpC(Context* c, unsigned size, Assembler::Constant* a) longJumpC(Context* c, unsigned size, Assembler::Constant* a)
{ {
@ -806,11 +876,59 @@ moveCR2(Context* c, UNUSED unsigned aSize, Assembler::Constant* a,
} }
} }
inline bool floatReg(Assembler::Register* a) {
return a->low >= xmm0;
}
void
sseMoveRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
if(floatReg(a) && floatReg(b)) {
if(aSize == 4) {
opcode(c, 0xf3);
maybeRex(c, 4, a, b);
opcode(c, 0x0f, 0x10);
modrm(c, 0xc0, b, a);
} else {
opcode(c, 0xf2);
maybeRex(c, 4, a, b);
opcode(c, 0x0f, 0x10);
modrm(c, 0xc0, b, a);
}
} else if(floatReg(a)) {
opcode(c, 0x66);
maybeRex(c, aSize, a, b);
opcode(c, 0x0f, 0x7e);
modrm(c, 0xc0, b, a);
} else {
opcode(c, 0x66);
maybeRex(c, aSize, a, b);
opcode(c, 0x0f, 0x6e);
modrm(c, 0xc0, a, b);
}
}
void
sseMoveCR(Context* c, unsigned aSize, Assembler::Constant* a,
unsigned bSize, Assembler::Register* b)
{
assert(c, aSize <= BytesPerWord);
Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR2(c, aSize, a, aSize, &tmp, 0);
sseMoveRR(c, aSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low);
}
void void
moveCR(Context* c, unsigned aSize, Assembler::Constant* a, moveCR(Context* c, unsigned aSize, Assembler::Constant* a,
unsigned bSize, Assembler::Register* b) unsigned bSize, Assembler::Register* b)
{ {
moveCR2(c, aSize, a, bSize, b, 0); if(floatReg(b)) {
sseMoveCR(c, aSize, a, bSize, b);
} else {
moveCR2(c, aSize, a, bSize, b, 0);
}
} }
void void
@ -829,7 +947,11 @@ void
moveRR(Context* c, unsigned aSize, Assembler::Register* a, moveRR(Context* c, unsigned aSize, Assembler::Register* a,
UNUSED unsigned bSize, Assembler::Register* b) UNUSED unsigned bSize, Assembler::Register* b)
{ {
if(floatReg(a) or floatReg(b)) {
sseMoveRR(c, aSize, a, bSize, b);
return;
}
if (BytesPerWord == 4 and aSize == 8 and bSize == 8) { if (BytesPerWord == 4 and aSize == 8 and bSize == 8) {
Assembler::Register ah(a->high); Assembler::Register ah(a->high);
Assembler::Register bh(b->high); Assembler::Register bh(b->high);
@ -902,10 +1024,25 @@ moveRR(Context* c, unsigned aSize, Assembler::Register* a,
} }
} }
void
sseMoveMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
opcode(c, 0x66);
maybeRex(c, aSize, b, a);
opcode(c, 0x0f, 0x6e);
modrmSibImm(c, b, a);
}
void void
moveMR(Context* c, unsigned aSize, Assembler::Memory* a, moveMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize, Assembler::Register* b) unsigned bSize, Assembler::Register* b)
{ {
if(floatReg(b)) {
sseMoveMR(c, aSize, a, bSize, b);
return;
}
switch (aSize) { switch (aSize) {
case 1: case 1:
maybeRex(c, bSize, b, a); maybeRex(c, bSize, b, a);
@ -956,12 +1093,27 @@ moveMR(Context* c, unsigned aSize, Assembler::Memory* a,
} }
} }
void
sseMoveRM(Context* c, unsigned aSize, Assembler::Register* a,
UNUSED unsigned bSize, Assembler::Memory* b)
{
opcode(c, 0x66);
maybeRex(c, aSize, a, b);
opcode(c, 0x0f, 0x7e);
modrmSibImm(c, a, b);
}
void void
moveRM(Context* c, unsigned aSize, Assembler::Register* a, moveRM(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Memory* b) unsigned bSize UNUSED, Assembler::Memory* b)
{ {
assert(c, aSize == bSize); assert(c, aSize == bSize);
if(floatReg(a)) {
sseMoveRM(c, aSize, a, bSize, b);
return;
}
switch (aSize) { switch (aSize) {
case 1: case 1:
maybeRex(c, bSize, a, b); maybeRex(c, bSize, a, b);
@ -1066,7 +1218,7 @@ moveCM(Context* c, unsigned aSize UNUSED, Assembler::Constant* a,
modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset); modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
c->code.append4(a->value->value()); c->code.append4(a->value->value());
} else { } else {
Assembler::Register tmp(c->client->acquireTemporary()); Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR(c, 8, a, 8, &tmp); moveCR(c, 8, a, 8, &tmp);
moveRM(c, 8, &tmp, 8, b); moveRM(c, 8, &tmp, 8, b);
c->client->releaseTemporary(tmp.low); c->client->releaseTemporary(tmp.low);
@ -1188,7 +1340,7 @@ addCR(Context* c, unsigned aSize, Assembler::Constant* a,
c->code.append4(v); c->code.append4(v);
} }
} else { } else {
Assembler::Register tmp(c->client->acquireTemporary()); Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR(c, aSize, a, aSize, &tmp); moveCR(c, aSize, a, aSize, &tmp);
addRR(c, aSize, &tmp, bSize, b); addRR(c, aSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low); c->client->releaseTemporary(tmp.low);
@ -1246,7 +1398,7 @@ subtractCR(Context* c, unsigned aSize, Assembler::Constant* a,
c->code.append4(v); c->code.append4(v);
} }
} else { } else {
Assembler::Register tmp(c->client->acquireTemporary()); Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR(c, aSize, a, aSize, &tmp); moveCR(c, aSize, a, aSize, &tmp);
subtractRR(c, aSize, &tmp, bSize, b); subtractRR(c, aSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low); c->client->releaseTemporary(tmp.low);
@ -1335,7 +1487,7 @@ andCR(Context* c, unsigned aSize, Assembler::Constant* a,
c->code.append4(v); c->code.append4(v);
} }
} else { } else {
Assembler::Register tmp(c->client->acquireTemporary()); Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR(c, aSize, a, aSize, &tmp); moveCR(c, aSize, a, aSize, &tmp);
andRR(c, aSize, &tmp, bSize, b); andRR(c, aSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low); c->client->releaseTemporary(tmp.low);
@ -1392,7 +1544,7 @@ orCR(Context* c, unsigned aSize, Assembler::Constant* a,
c->code.append4(v); c->code.append4(v);
} }
} else { } else {
Assembler::Register tmp(c->client->acquireTemporary()); Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR(c, aSize, a, aSize, &tmp); moveCR(c, aSize, a, aSize, &tmp);
orRR(c, aSize, &tmp, bSize, b); orRR(c, aSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low); c->client->releaseTemporary(tmp.low);
@ -1448,7 +1600,7 @@ xorCR(Context* c, unsigned aSize, Assembler::Constant* a,
c->code.append4(v); c->code.append4(v);
} }
} else { } else {
Assembler::Register tmp(c->client->acquireTemporary()); Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR(c, aSize, a, aSize, &tmp); moveCR(c, aSize, a, aSize, &tmp);
xorRR(c, aSize, &tmp, bSize, b); xorRR(c, aSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low); c->client->releaseTemporary(tmp.low);
@ -1523,7 +1675,7 @@ compareCR(Context* c, unsigned aSize, Assembler::Constant* a,
c->code.append4(v); c->code.append4(v);
} }
} else { } else {
Assembler::Register tmp(c->client->acquireTemporary()); Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR(c, aSize, a, aSize, &tmp); moveCR(c, aSize, a, aSize, &tmp);
compareRR(c, aSize, &tmp, bSize, b); compareRR(c, aSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low); c->client->releaseTemporary(tmp.low);
@ -1537,7 +1689,7 @@ multiplyCR(Context* c, unsigned aSize, Assembler::Constant* a,
assert(c, aSize == bSize); assert(c, aSize == bSize);
if (BytesPerWord == 4 and aSize == 8) { if (BytesPerWord == 4 and aSize == 8) {
const uint32_t mask = ~((1 << rax) | (1 << rdx)); const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
Assembler::Register tmp(c->client->acquireTemporary(mask), Assembler::Register tmp(c->client->acquireTemporary(mask),
c->client->acquireTemporary(mask)); c->client->acquireTemporary(mask));
@ -1560,7 +1712,7 @@ multiplyCR(Context* c, unsigned aSize, Assembler::Constant* a,
c->code.append4(v); c->code.append4(v);
} }
} else { } else {
Assembler::Register tmp(c->client->acquireTemporary()); Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR(c, aSize, a, aSize, &tmp); moveCR(c, aSize, a, aSize, &tmp);
multiplyRR(c, aSize, &tmp, bSize, b); multiplyRR(c, aSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low); c->client->releaseTemporary(tmp.low);
@ -1605,7 +1757,7 @@ compareCM(Context* c, unsigned aSize, Assembler::Constant* a,
abort(c); abort(c);
} }
} else { } else {
Assembler::Register tmp(c->client->acquireTemporary()); Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
moveCR(c, aSize, a, bSize, &tmp); moveCR(c, aSize, a, bSize, &tmp);
compareRM(c, bSize, &tmp, bSize, b); compareRM(c, bSize, &tmp, bSize, b);
c->client->releaseTemporary(tmp.low); c->client->releaseTemporary(tmp.low);
@ -1928,6 +2080,219 @@ unsignedShiftRightCR(Context* c, unsigned aSize UNUSED, Assembler::Constant* a,
doShift(c, unsignedShiftRightRR, 0xe8, aSize, a, bSize, b); doShift(c, unsignedShiftRightRR, 0xe8, aSize, a, bSize, b);
} }
inline void floatRegOp(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b, uint8_t op, uint8_t mod = 0xc0)
{
if(aSize == 4) {
opcode(c, 0xf3);
} else {
opcode(c, 0xf2);
}
maybeRex(c, bSize, a, b);
opcode(c, 0x0f, op);
modrm(c, mod, a, b);
}
inline void floatMemOp(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize UNUSED, Assembler::Register* b, uint8_t op)
{
if(aSize == 4) {
opcode(c, 0xf3);
} else {
opcode(c, 0xf2);
}
maybeRex(c, bSize, b, a);
opcode(c, 0x0f, op);
modrmSibImm(c, b, a);
}
void
floatSqrtRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatRegOp(c, aSize, a, 4, b, 0x51);
}
void
floatSqrtMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatMemOp(c, aSize, a, 4, b, 0x51);
}
void
floatAddRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatRegOp(c, aSize, a, 4, b, 0x58);
}
void
floatAddMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatMemOp(c, aSize, a, 4, b, 0x58);
}
void
floatSubtractRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatRegOp(c, aSize, a, 4, b, 0x5c);
}
void
floatSubtractMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatMemOp(c, aSize, a, 4, b, 0x5c);
}
void
floatMultiplyRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatRegOp(c, aSize, a, 4, b, 0x59);
}
void
floatMultiplyMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatMemOp(c, aSize, a, 4, b, 0x59);
}
void
floatDivideRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatRegOp(c, aSize, a, 4, b, 0x5e);
}
void
floatDivideMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
floatMemOp(c, aSize, a, 4, b, 0x5e);
}
void
float2FloatRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
assert(c, supportsSSE2());
floatRegOp(c, aSize, a, 4, b, 0x5a);
}
void
float2FloatMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
assert(c, supportsSSE2());
floatMemOp(c, aSize, a, 4, b, 0x5a);
}
void
float2IntRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize, Assembler::Register* b)
{
assert(c, !floatReg(b));
floatRegOp(c, aSize, a, bSize, b, 0x2d);
}
void
float2IntMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize, Assembler::Register* b)
{
floatMemOp(c, aSize, a, bSize, b, 0x2d);
}
void
int2FloatRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize, Assembler::Register* b)
{
floatRegOp(c, bSize, a, aSize, b, 0x2a);
}
void
int2FloatMR(Context* c, unsigned aSize, Assembler::Memory* a,
unsigned bSize, Assembler::Register* b)
{
floatMemOp(c, bSize, a, aSize, b, 0x2a);
}
void
floatCompareRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
if (aSize == 8) {
opcode(c, 0x66);
}
maybeRex(c, 4, a, b);
opcode(c, 0x0f, 0x2e);
modrm(c, 0xc0, a, b);
}
void
floatNegateRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
assert(c, floatReg(a) and floatReg(b));
assert(c, aSize == 4); //unlike most of the other floating point code, this does NOT support doubles.
ResolvedPromise pcon(0x80000000);
Assembler::Constant con(&pcon);
if(a->low == b->low) {
Assembler::Register tmp(c->client->acquireTemporary(FloatRegisterMask));
moveCR(c, 4, &con, 4, &tmp);
maybeRex(c, 4, a, &tmp);
opcode(c, 0x0f, 0x57);
modrm(c, 0xc0, &tmp, a);
c->client->releaseTemporary(tmp.low);
} else {
moveCR(c, 4, &con, 4, b);
if(aSize == 8) opcode(c, 0x66);
maybeRex(c, 4, a, b);
opcode(c, 0x0f, 0x57);
modrm(c, 0xc0, a, b);
}
}
void
floatAbsRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b)
{
assert(c, floatReg(a) and floatReg(b));
assert(c, aSize == 4); //unlike most of the other floating point code, this does NOT support doubles.
ResolvedPromise pcon(0x7fffffff);
Assembler::Constant con(&pcon);
if(a->low == b->low) {
Assembler::Register tmp(c->client->acquireTemporary(FloatRegisterMask));
moveCR(c, 4, &con, 4, &tmp);
maybeRex(c, 4, a, &tmp);
opcode(c, 0x0f, 0x54);
modrm(c, 0xc0, &tmp, a);
c->client->releaseTemporary(tmp.low);
} else {
moveCR(c, 4, &con, 4, b);
maybeRex(c, 4, a, b);
opcode(c, 0x0f, 0x54);
modrm(c, 0xc0, a, b);
}
}
void
absRR(Context* c, unsigned aSize, Assembler::Register* a,
unsigned bSize UNUSED, Assembler::Register* b UNUSED)
{
assert(c, aSize == bSize and a->low == rax and b->low == rax);
Assembler::Register d(c->client->acquireTemporary(static_cast<uint64_t>(1) << rdx));
maybeRex(c, aSize, a, b);
opcode(c, 0x99);
xorRR(c, aSize, &d, aSize, a);
subtractRR(c, aSize, &d, aSize, a);
c->client->releaseTemporary(rdx);
}
void void
populateTables(ArchitectureContext* c) populateTables(ArchitectureContext* c)
{ {
@ -1963,11 +2328,14 @@ populateTables(ArchitectureContext* c)
uo[index(JumpIfGreaterOrEqual, C)] = CAST1(jumpIfGreaterOrEqualC); uo[index(JumpIfGreaterOrEqual, C)] = CAST1(jumpIfGreaterOrEqualC);
uo[index(JumpIfLess, C)] = CAST1(jumpIfLessC); uo[index(JumpIfLess, C)] = CAST1(jumpIfLessC);
uo[index(JumpIfLessOrEqual, C)] = CAST1(jumpIfLessOrEqualC); uo[index(JumpIfLessOrEqual, C)] = CAST1(jumpIfLessOrEqualC);
uo[index(JumpIfUnordered, C)] = CAST1(jumpIfUnorderedC);
uo[index(LongJump, C)] = CAST1(longJumpC); uo[index(LongJump, C)] = CAST1(longJumpC);
bo[index(Negate, R, R)] = CAST2(negateRR); bo[index(Negate, R, R)] = CAST2(negateRR);
bo[index(FloatNegate, R, R)] = CAST2(floatNegateRR);
bo[index(Move, R, R)] = CAST2(moveRR); bo[index(Move, R, R)] = CAST2(moveRR);
bo[index(Move, C, R)] = CAST2(moveCR); bo[index(Move, C, R)] = CAST2(moveCR);
bo[index(Move, M, R)] = CAST2(moveMR); bo[index(Move, M, R)] = CAST2(moveMR);
@ -1975,6 +2343,9 @@ populateTables(ArchitectureContext* c)
bo[index(Move, C, M)] = CAST2(moveCM); bo[index(Move, C, M)] = CAST2(moveCM);
bo[index(Move, A, R)] = CAST2(moveAR); bo[index(Move, A, R)] = CAST2(moveAR);
bo[index(FloatSqrt, R, R)] = CAST2(floatSqrtRR);
bo[index(FloatSqrt, M, R)] = CAST2(floatSqrtMR);
bo[index(MoveZ, R, R)] = CAST2(moveZRR); bo[index(MoveZ, R, R)] = CAST2(moveZRR);
bo[index(MoveZ, M, R)] = CAST2(moveZMR); bo[index(MoveZ, M, R)] = CAST2(moveZMR);
@ -1983,12 +2354,20 @@ populateTables(ArchitectureContext* c)
bo[index(Compare, C, M)] = CAST2(compareCM); bo[index(Compare, C, M)] = CAST2(compareCM);
bo[index(Compare, R, M)] = CAST2(compareRM); bo[index(Compare, R, M)] = CAST2(compareRM);
bo[index(FloatCompare, R, R)] = CAST2(floatCompareRR);
bo[index(Add, R, R)] = CAST2(addRR); bo[index(Add, R, R)] = CAST2(addRR);
bo[index(Add, C, R)] = CAST2(addCR); bo[index(Add, C, R)] = CAST2(addCR);
bo[index(Subtract, C, R)] = CAST2(subtractCR); bo[index(Subtract, C, R)] = CAST2(subtractCR);
bo[index(Subtract, R, R)] = CAST2(subtractRR); bo[index(Subtract, R, R)] = CAST2(subtractRR);
bo[index(FloatAdd, R, R)] = CAST2(floatAddRR);
bo[index(FloatAdd, M, R)] = CAST2(floatAddMR);
bo[index(FloatSubtract, R, R)] = CAST2(floatSubtractRR);
bo[index(FloatSubtract, M, R)] = CAST2(floatSubtractMR);
bo[index(And, R, R)] = CAST2(andRR); bo[index(And, R, R)] = CAST2(andRR);
bo[index(And, C, R)] = CAST2(andCR); bo[index(And, C, R)] = CAST2(andCR);
@ -2003,6 +2382,12 @@ populateTables(ArchitectureContext* c)
bo[index(Divide, R, R)] = CAST2(divideRR); bo[index(Divide, R, R)] = CAST2(divideRR);
bo[index(FloatMultiply, R, R)] = CAST2(floatMultiplyRR);
bo[index(FloatMultiply, M, R)] = CAST2(floatMultiplyMR);
bo[index(FloatDivide, R, R)] = CAST2(floatDivideRR);
bo[index(FloatDivide, M, R)] = CAST2(floatDivideMR);
bo[index(Remainder, R, R)] = CAST2(remainderRR); bo[index(Remainder, R, R)] = CAST2(remainderRR);
bo[index(LongCompare, C, R)] = CAST2(longCompareCR); bo[index(LongCompare, C, R)] = CAST2(longCompareCR);
@ -2016,8 +2401,19 @@ populateTables(ArchitectureContext* c)
bo[index(UnsignedShiftRight, R, R)] = CAST2(unsignedShiftRightRR); bo[index(UnsignedShiftRight, R, R)] = CAST2(unsignedShiftRightRR);
bo[index(UnsignedShiftRight, C, R)] = CAST2(unsignedShiftRightCR); bo[index(UnsignedShiftRight, C, R)] = CAST2(unsignedShiftRightCR);
}
bo[index(Float2Float, R, R)] = CAST2(float2FloatRR);
bo[index(Float2Float, M, R)] = CAST2(float2FloatMR);
bo[index(Float2Int, R, R)] = CAST2(float2IntRR);
bo[index(Float2Int, M, R)] = CAST2(float2IntMR);
bo[index(Int2Float, R, R)] = CAST2(int2FloatRR);
bo[index(Int2Float, M, R)] = CAST2(int2FloatMR);
bo[index(Abs, R, R)] = CAST2(absRR);
bo[index(FloatAbs, R, R)] = CAST2(floatAbsRR);
}
class MyArchitecture: public Assembler::Architecture { class MyArchitecture: public Assembler::Architecture {
public: public:
MyArchitecture(System* system): c(system), referenceCount(0) { MyArchitecture(System* system): c(system), referenceCount(0) {
@ -2025,7 +2421,31 @@ class MyArchitecture: public Assembler::Architecture {
} }
virtual unsigned registerCount() { virtual unsigned registerCount() {
return (BytesPerWord == 4 ? 8 : 16); if (supportsSSE()) {
return BytesPerWord == 4 ? 24 : 32;
} else {
return BytesPerWord == 4 ? 8 : 16;
}
}
virtual unsigned generalRegisterCount() {
return BytesPerWord == 4 ? 8 : 16;
}
virtual unsigned floatRegisterCount() {
if (supportsSSE()) {
return BytesPerWord == 4 ? 8 : 16;
} else {
return 0;
}
}
virtual uint64_t generalRegisters() {
return GeneralRegisterMask;
}
virtual uint64_t floatRegisters() {
return supportsSSE() ? FloatRegisterMask : 0;
} }
virtual int stack() { virtual int stack() {
@ -2044,10 +2464,6 @@ class MyArchitecture: public Assembler::Architecture {
return (BytesPerWord == 4 ? rdx : NoRegister); return (BytesPerWord == 4 ? rdx : NoRegister);
} }
virtual bool condensedAddressing() {
return true;
}
virtual bool bigEndian() { virtual bool bigEndian() {
return false; return false;
} }
@ -2058,7 +2474,7 @@ class MyArchitecture: public Assembler::Architecture {
case rsp: case rsp:
case rbx: case rbx:
return true; return true;
default: default:
return false; return false;
} }
@ -2171,6 +2587,10 @@ class MyArchitecture: public Assembler::Architecture {
return 0; return 0;
} }
virtual bool supportsFloatCompare(unsigned size) {
return supportsSSE() and size <= BytesPerWord;
}
virtual void nextFrame(void** stack, void** base) { virtual void nextFrame(void** stack, void** base) {
assert(&c, *static_cast<void**>(*base) != *base); assert(&c, *static_cast<void**>(*base) != *base);
@ -2189,61 +2609,206 @@ class MyArchitecture: public Assembler::Architecture {
*thunk = false; *thunk = false;
} }
virtual void plan bool checkMethodClass(Thread* t, object method, const char* value)
{
return strcmp
(reinterpret_cast<const char*>
(&byteArrayBody(t, className(t, methodClass(t, method)), 0)),
value) == 0;
}
bool checkMethodName(Thread* t, object method, const char* value)
{
return strcmp
(reinterpret_cast<const char*>
(&byteArrayBody(t, methodName(t, method), 0)),
value) == 0;
}
bool checkMethodSpec(Thread* t, object method, const char* value)
{
return strcmp
(reinterpret_cast<const char*>
(&byteArrayBody(t, methodSpec(t, method), 0)),
value) == 0;
}
virtual BinaryOperation hasBinaryIntrinsic(Thread* t, object method)
{
if(checkMethodClass(t, method, "java/lang/Math")) {
if(supportsSSE() and checkMethodName(t, method, "sqrt") and checkMethodSpec(t, method, "(D)D") and BytesPerWord == 8) {
return FloatSqrt;
} else if(checkMethodName(t, method, "abs")) {
if(checkMethodSpec(t, method, "(I)I") or (checkMethodSpec(t, method, "(J)J") and BytesPerWord == 8)) {
return Abs;
} else if(supportsSSE() and supportsSSE2() and checkMethodSpec(t, method, "(F)F")) {
return FloatAbs;
}
}
}
return NoBinaryOperation;
}
virtual TernaryOperation hasTernaryIntrinsic(Thread* t UNUSED, object method UNUSED) {
return NoTernaryOperation;
}
virtual void planSource
(BinaryOperation op, (BinaryOperation op,
unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask, unsigned bSize, bool* thunk)
bool* thunk)
{ {
*aTypeMask = ~0; *aTypeMask = ~0;
*aRegisterMask = ~static_cast<uint64_t>(0); *aRegisterMask = GeneralRegisterMask | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
*bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
*bRegisterMask = ~static_cast<uint64_t>(0);
*thunk = false; *thunk = false;
switch (op) { switch (op) {
case Compare: case Compare:
*aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand);
*bTypeMask = (1 << RegisterOperand); *aRegisterMask = GeneralRegisterMask;
break;
case FloatCompare:
assert(&c, supportsSSE() && aSize <= BytesPerWord);
*aTypeMask = (1 << RegisterOperand);
*aRegisterMask = FloatRegisterMask;
break; break;
case Negate: case Negate:
*aTypeMask = (1 << RegisterOperand); *aTypeMask = (1 << RegisterOperand);
*bTypeMask = (1 << RegisterOperand);
*aRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32)) *aRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32))
| (static_cast<uint64_t>(1) << rax); | (static_cast<uint64_t>(1) << rax);
*bRegisterMask = *aRegisterMask;
break; break;
case Abs:
*aTypeMask = (1 << RegisterOperand);
*aRegisterMask = (static_cast<uint64_t>(1) << rax);
break;
case FloatAbs:
*aTypeMask = (1 << RegisterOperand);
*aRegisterMask = FloatRegisterMask;
break;
case FloatNegate:
if(!supportsSSE() or aSize == 8 or bSize == 8) { //floatNegateRR does not support doubles
*thunk = true;
} else {
*aTypeMask = (1 << RegisterOperand);
*aRegisterMask = FloatRegisterMask;
}
break;
case FloatSqrt:
*aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
*aRegisterMask = FloatRegisterMask;
break;
case Float2Float:
if(!supportsSSE() or !supportsSSE2() or BytesPerWord == 4) {
*thunk = true;
} else {
*aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
*aRegisterMask = FloatRegisterMask;
}
break;
case Float2Int:
if(!supportsSSE() or aSize > BytesPerWord or bSize > BytesPerWord) {
*thunk = true;
} else {
*aTypeMask = (1 << RegisterOperand);// | (1 << MemoryOperand);
*aRegisterMask = FloatRegisterMask;
}
break;
case Int2Float:
if(!supportsSSE() or aSize > BytesPerWord or bSize > BytesPerWord) {
*thunk = true;
} else {
*aTypeMask = (1 << RegisterOperand);// | (1 << MemoryOperand);
*aRegisterMask = GeneralRegisterMask | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
}
break;
case Move: case Move:
*aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
*aRegisterMask = GeneralRegisterMask | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
if (BytesPerWord == 4) { if (BytesPerWord == 4) {
if (aSize == 4 and bSize == 8) { if (aSize == 4 and bSize == 8) {
const uint32_t mask = ~((1 << rax) | (1 << rdx)); *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
*aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask; const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
*bRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32)) *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
| (static_cast<uint64_t>(1) << rax);
} else if (aSize == 1 or bSize == 1) { } else if (aSize == 1 or bSize == 1) {
*aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
const uint32_t mask const uint32_t mask
= (1 << rax) | (1 << rcx) | (1 << rdx) | (1 << rbx); = (1 << rax) | (1 << rcx) | (1 << rdx) | (1 << rbx);
*aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask; *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
*bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
} }
} }
break; break;
default: default:
break; break;
} }
} }
virtual void plan virtual void planDestination
(BinaryOperation op,
unsigned aSize, const uint8_t* aTypeMask UNUSED, const uint64_t* aRegisterMask,
unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask)
{
*bTypeMask = ~0;
*bRegisterMask = GeneralRegisterMask | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
switch (op) {
case Compare:
*bTypeMask = (1 << RegisterOperand);
*bRegisterMask = GeneralRegisterMask;
break;
case FloatCompare:
*bTypeMask = (1 << RegisterOperand);
*bRegisterMask = FloatRegisterMask;
break;
case Abs:
*bTypeMask = (1 << RegisterOperand);
*bRegisterMask = (static_cast<uint64_t>(1) << rax);
break;
case FloatAbs:
*bTypeMask = (1 << RegisterOperand);
*bRegisterMask = *aRegisterMask;
break;
case Negate:
case FloatNegate:
case FloatSqrt:
case Float2Float:
*bTypeMask = (1 << RegisterOperand);
*bRegisterMask = *aRegisterMask;
break;
case Int2Float:
*bTypeMask = (1 << RegisterOperand);
*bRegisterMask = FloatRegisterMask;
break;
case Float2Int:
*bTypeMask = (1 << RegisterOperand);
*bRegisterMask = GeneralRegisterMask | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
break;
case Move:
*bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
*bRegisterMask = GeneralRegisterMask | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
if (BytesPerWord == 4) {
if (aSize == 4 and bSize == 8) {
*bRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32))
| (static_cast<uint64_t>(1) << rax);
} else if (aSize == 1 or bSize == 1) {
const uint32_t mask
= (1 << rax) | (1 << rcx) | (1 << rdx) | (1 << rbx);
*bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
}
}
break;
default:
break;
}
}
virtual void planSource
(TernaryOperation op, (TernaryOperation op,
unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, unsigned aSize, uint8_t *aTypeMask, uint64_t *aRegisterMask,
unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask, unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask,
unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask, unsigned, bool* thunk)
bool* thunk)
{ {
*aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand);
*aRegisterMask = ~static_cast<uint64_t>(0); *aRegisterMask = ~static_cast<uint64_t>(0);
@ -2254,21 +2819,37 @@ class MyArchitecture: public Assembler::Architecture {
*thunk = false; *thunk = false;
switch (op) { switch (op) {
case FloatAdd:
case FloatSubtract:
case FloatMultiply:
case FloatDivide:
if(!supportsSSE() or aSize > BytesPerWord) {
*thunk = true;
} else {
*aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
*bTypeMask = (1 << RegisterOperand);
*aRegisterMask = FloatRegisterMask;
*bRegisterMask = FloatRegisterMask;
}
break;
case Multiply: case Multiply:
if (BytesPerWord == 4 and aSize == 8) { if (BytesPerWord == 4 and aSize == 8) {
const uint32_t mask = ~((1 << rax) | (1 << rdx)); const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
*aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask; *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
*bRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32)) | mask; *bRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32)) | mask;
} else {
*aRegisterMask = GeneralRegisterMask;
*bRegisterMask = GeneralRegisterMask;
} }
break; break;
case Divide: case Divide:
if (BytesPerWord == 4 and aSize == 8) { if (BytesPerWord == 4 and aSize == 8) {
*bTypeMask = ~0; *thunk = true;
*thunk = true;
} else { } else {
*aTypeMask = (1 << RegisterOperand); *aTypeMask = (1 << RegisterOperand);
*aRegisterMask = ~((1 << rax) | (1 << rdx)); *aRegisterMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
*bRegisterMask = 1 << rax; *bRegisterMask = 1 << rax;
} }
break; break;
@ -2279,25 +2860,32 @@ class MyArchitecture: public Assembler::Architecture {
*thunk = true; *thunk = true;
} else { } else {
*aTypeMask = (1 << RegisterOperand); *aTypeMask = (1 << RegisterOperand);
*aRegisterMask = ~((1 << rax) | (1 << rdx)); *aRegisterMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
*bRegisterMask = 1 << rax; *bRegisterMask = 1 << rax;
} }
break; break;
case ShiftLeft: case ShiftLeft:
case ShiftRight: case ShiftRight:
case UnsignedShiftRight: { case UnsignedShiftRight: {
*aRegisterMask = (~static_cast<uint64_t>(0) << 32) *aRegisterMask = (static_cast<uint64_t>(GeneralRegisterMask) << 32)
| (static_cast<uint64_t>(1) << rcx); | (static_cast<uint64_t>(1) << rcx);
const uint32_t mask = ~(1 << rcx); const uint32_t mask = GeneralRegisterMask & ~(1 << rcx);
*bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask; *bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
} break; } break;
default: default:
break; break;
} }
}
*cTypeMask = *bTypeMask; virtual void planDestination
(TernaryOperation op UNUSED,
unsigned aSize UNUSED, const uint8_t* aTypeMask UNUSED, const uint64_t* aRegisterMask UNUSED,
unsigned bSize UNUSED, const uint8_t* bTypeMask UNUSED, const uint64_t* bRegisterMask,
unsigned cSize UNUSED, uint8_t* cTypeMask, uint64_t* cRegisterMask)
{
*cTypeMask = (1 << RegisterOperand);
*cRegisterMask = *bRegisterMask; *cRegisterMask = *bRegisterMask;
} }