From c7a1a7af77a94d7ddeeb9e75612793d737fa2014 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 08:44:15 -0600 Subject: [PATCH 01/16] added floating point support, split plan function. --- src/assembler.h | 70 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/src/assembler.h b/src/assembler.h index c915cfe894..78a807ebd6 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -36,7 +36,8 @@ enum UnaryOperation { JumpIfLessOrEqual, JumpIfGreaterOrEqual, JumpIfEqual, - JumpIfNotEqual + JumpIfNotEqual, + JumpIfUnordered }; const unsigned UnaryOperationCount = JumpIfNotEqual + 1; @@ -45,10 +46,24 @@ enum BinaryOperation { Move, MoveZ, Compare, - Negate + Negate, + + //extensions: + FloatNegate, + FloatCompare, + Float2Float, + Float2Int, + Int2Float, + + //intrinsic functions: + FloatSqrt, + FloatAbs, + Abs, + + NoBinaryOperation = -1 }; -const unsigned BinaryOperationCount = Negate + 1; +const unsigned BinaryOperationCount = Abs + 1; enum TernaryOperation { LongCompare, @@ -62,10 +77,23 @@ enum TernaryOperation { UnsignedShiftRight, And, Or, - Xor + Xor, + + //extensions: + FloatAdd, + FloatSubtract, + FloatMultiply, + FloatDivide, + FloatRemainder, + + //intrinsic functions: + FloatMax, + FloatMin, + + NoTernaryOperation = -1 }; -const unsigned TernaryOperationCount = Xor + 1; +const unsigned TernaryOperationCount = FloatMin + 1; enum OperandType { ConstantOperand, @@ -258,15 +286,19 @@ class Assembler { class Architecture { public: virtual unsigned registerCount() = 0; + virtual unsigned generalRegisterCount() = 0; + virtual unsigned floatRegisterCount() = 0; + virtual uint64_t generalRegisters() = 0; + virtual uint64_t floatRegisters() = 0; virtual int stack() = 0; virtual int thread() = 0; virtual int returnLow() = 0; virtual int returnHigh() = 0; - virtual bool condensedAddressing() = 0; - virtual bool bigEndian() = 0; + + virtual bool supportsFloatCompare(unsigned size) = 0; virtual bool reserved(int register_) = 0; @@ -287,24 +319,36 @@ class Assembler { virtual unsigned frameReturnAddressSize() = 0; virtual unsigned frameFooterSize() = 0; virtual void nextFrame(void** stack, void** base) = 0; + + virtual BinaryOperation hasBinaryIntrinsic(Thread* t, object method) = 0; + virtual TernaryOperation hasTernaryIntrinsic(Thread* t, object method) = 0; virtual void plan (UnaryOperation op, unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, bool* thunk) = 0; - virtual void plan + virtual void planSource (BinaryOperation op, unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, - unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask, - bool* thunk) = 0; + unsigned bSize, bool* thunk) = 0; + + virtual void planDestination + (BinaryOperation op, + unsigned aSize, const uint8_t* aTypeMask, const uint64_t* aRegisterMask, + unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask) = 0; - virtual void plan + virtual void planSource (TernaryOperation op, unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask, - unsigned cSize, uint8_t* cTypeMask, uint64_t* cRegisterMask, - bool* thunk) = 0; + unsigned cSize, bool* thunk) = 0; + + virtual void planDestination + (TernaryOperation op, + unsigned aSize, const uint8_t* aTypeMask, const uint64_t* aRegisterMask, + unsigned bSize, const uint8_t* bTypeMask, const uint64_t* bRegisterMask, + unsigned cSize, uint8_t* cTypeMask, uint64_t* cRegisterMask) = 0; virtual void acquire() = 0; virtual void release() = 0; From 5cc605b56df43a959669274638e3fd8613d77840 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 08:48:15 -0600 Subject: [PATCH 02/16] added floating point support. --- src/compiler.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index db5f5963be..b2a702596d 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -21,8 +21,9 @@ class Compiler { public: class Client { public: - virtual intptr_t getThunk(UnaryOperation op, unsigned size) = 0; - virtual intptr_t getThunk(TernaryOperation op, unsigned size) = 0; + virtual intptr_t getThunk(UnaryOperation op, unsigned size, unsigned resultSize) = 0; + virtual intptr_t getThunk(BinaryOperation op, unsigned size, unsigned resultSize) = 0; + virtual intptr_t getThunk(TernaryOperation op, unsigned size, unsigned resultSize) = 0; }; static const unsigned Aligned = 1 << 0; @@ -106,18 +107,25 @@ class Compiler { unsigned dstSize) = 0; virtual Operand* lcmp(Operand* a, Operand* b) = 0; virtual void cmp(unsigned size, Operand* a, Operand* b) = 0; + virtual void fcmp(unsigned size, Operand* a, Operand* b) = 0; virtual void jl(Operand* address) = 0; virtual void jg(Operand* address) = 0; virtual void jle(Operand* address) = 0; virtual void jge(Operand* address) = 0; virtual void je(Operand* address) = 0; virtual void jne(Operand* address) = 0; + virtual void juo(Operand* address) = 0; virtual void jmp(Operand* address) = 0; virtual Operand* add(unsigned size, Operand* a, Operand* b) = 0; virtual Operand* sub(unsigned size, Operand* a, Operand* b) = 0; virtual Operand* mul(unsigned size, Operand* a, Operand* b) = 0; virtual Operand* div(unsigned size, Operand* a, Operand* b) = 0; virtual Operand* rem(unsigned size, Operand* a, Operand* b) = 0; + virtual Operand* fadd(unsigned size, Operand* a, Operand* b) = 0; + virtual Operand* fsub(unsigned size, Operand* a, Operand* b) = 0; + virtual Operand* fmul(unsigned size, Operand* a, Operand* b) = 0; + virtual Operand* fdiv(unsigned size, Operand* a, Operand* b) = 0; + virtual Operand* frem(unsigned size, Operand* a, Operand* b) = 0; virtual Operand* shl(unsigned size, Operand* a, Operand* b) = 0; virtual Operand* shr(unsigned size, Operand* a, Operand* b) = 0; virtual Operand* ushr(unsigned size, Operand* a, Operand* b) = 0; @@ -125,6 +133,12 @@ class Compiler { virtual Operand* or_(unsigned size, Operand* a, Operand* b) = 0; virtual Operand* xor_(unsigned size, Operand* a, Operand* b) = 0; virtual Operand* neg(unsigned size, Operand* a) = 0; + virtual Operand* fneg(unsigned size, Operand* a) = 0; + virtual Operand* operation(BinaryOperation op, unsigned aSize, unsigned resSize, Operand* a) = 0; + virtual Operand* operation(TernaryOperation op, unsigned aSize, unsigned bSize, unsigned resSize, Operand* a, Operand* b) = 0; + virtual Operand* f2f(unsigned aSize, unsigned resSize, Operand* a) = 0; + virtual Operand* f2i(unsigned aSize, unsigned resSize, Operand* a) = 0; + virtual Operand* i2f(unsigned aSize, unsigned resSize, Operand* a) = 0; virtual void loadBarrier() = 0; virtual void storeStoreBarrier() = 0; From c042354ea0028cf0991bc2512e5f9a4d40b7d6c9 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 08:49:26 -0600 Subject: [PATCH 03/16] added detectFeature function, used to detect sse in x86.cpp --- src/x86.S | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/src/x86.S b/src/x86.S index 09abdb5b94..01d53894fa 100644 --- a/src/x86.S +++ b/src/x86.S @@ -18,6 +18,42 @@ #ifdef __x86_64__ #ifdef __WINDOWS__ +# if defined __APPLE__ || defined __MINGW32__ || defined __CYGWIN32__ +.globl _detectFeature +_detectFeature: +# else +.globl detectFeature +detectFeature: +# endif + pushq %rbp + movq %rsp, %rbp + pushq %rdx + pushq %rcx + pushq %rbx + pushq %rsi + pushq %rdi + movl %ecx, %edi + movl %edx, %esi + movl $1, %eax + cpuid + andl %esi, %edx + andl %edi, %ecx + orl %edx, %ecx + test %ecx, %ecx + je LOCAL(NOSSE) + movl $1, %eax + jmp LOCAL(SSEEND) +LOCAL(NOSSE): + movl $0, %eax +LOCAL(SSEEND): + popq %rdi + popq %rsi + popq %rbx + popq %rcx + popq %rdx + movq %rbp,%rsp + popq %rbp + ret # if defined __APPLE__ || defined __MINGW32__ || defined __CYGWIN32__ .globl _vmNativeCall @@ -141,6 +177,36 @@ _vmJump: jmp *%rcx #elif defined __LINUX__ +# if defined __APPLE__ || defined __MINGW32__ || defined __CYGWIN32__ +.globl _detectFeature +_detectFeature: +# else +.globl detectFeature +detectFeature: +# endif + pushq %rbp + movq %rsp, %rbp + pushq %rdx + pushq %rcx + pushq %rbx + movl $1, %eax + cpuid + andl %esi, %edx + andl %edi, %ecx + orl %edx, %ecx + test %ecx, %ecx + je LOCAL(NOSSE) + movl $1, %eax + jmp LOCAL(SSEEND) +LOCAL(NOSSE): + movl $0, %eax +LOCAL(SSEEND): + popq %rbx + popq %rcx + popq %rdx + movq %rbp,%rsp + popq %rbp + ret # if defined __APPLE__ || defined __MINGW32__ || defined __CYGWIN32__ .globl _vmNativeCall @@ -252,8 +318,44 @@ vmJump: jmp *%rdi #endif //def __WINDOWS__ - #elif defined __i386__ +# if defined __APPLE__ || defined __MINGW32__ || defined __CYGWIN32__ +.globl _detectFeature +_detectFeature: +# else +.globl detectFeature +detectFeature: +# endif + pushl %ebp + movl %esp, %ebp + pushl %edx + pushl %ecx + pushl %ebx + pushl %esi + pushl %edi + movl 12(%ebp), %esi + movl 8(%ebp), %edi + movl $1, %eax + cpuid + andl %esi, %edx + andl %edi, %ecx + orl %edx, %ecx + test %ecx, %ecx + je LOCAL(NOSSE) + movl $1, %eax + jmp LOCAL(SSEEND) +LOCAL(NOSSE): + movl $0, %eax +LOCAL(SSEEND): + popl %edi + popl %esi + popl %ebx + popl %ecx + popl %edx + movl %ebp,%esp + popl %ebp + ret + # if defined __APPLE__ || defined __MINGW32__ || defined __CYGWIN32__ .globl _vmNativeCall _vmNativeCall: From c3a389429e5ad6ba2c519978c653bf2c21fdd98b Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 08:54:23 -0600 Subject: [PATCH 04/16] split source function, update interface for floating point / instrinsic support --- src/powerpc.cpp | 71 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/src/powerpc.cpp b/src/powerpc.cpp index 01f5a3e862..da368961b7 100644 --- a/src/powerpc.cpp +++ b/src/powerpc.cpp @@ -1679,10 +1679,6 @@ class MyArchitecture: public Assembler::Architecture { return (BytesPerWord == 4 ? 3 : NoRegister); } - virtual bool condensedAddressing() { - return false; - } - virtual bool bigEndian() { return true; } @@ -1770,6 +1766,18 @@ class MyArchitecture: public Assembler::Architecture { *stack = *static_cast(*stack); } + virtual BinaryOperation hasBinaryIntrinsic(Thread* t, object method) { + return NoBinaryOperation; + } + + virtual TernaryOperation hasTernaryIntrinsic(Thread* t UNUSED, object method UNUSED) { + return NoTernaryOperation; + } + + virtual bool supportsFloatCompare(unsigned size) { + return false; + } + virtual void plan (UnaryOperation, unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask, @@ -1780,42 +1788,62 @@ class MyArchitecture: public Assembler::Architecture { *thunk = false; } - virtual void plan + virtual void planSource (BinaryOperation op, unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask, - unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask, - bool* thunk) + unsigned, bool* thunk) { *aTypeMask = ~0; *aRegisterMask = ~static_cast(0); - *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); - *bRegisterMask = ~static_cast(0); - *thunk = false; switch (op) { case Compare: *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); - *bTypeMask = (1 << RegisterOperand); break; case Negate: *aTypeMask = (1 << RegisterOperand); + break; + case FloatCompare: + case FloatNegate: + case Float2Float: + case Float2Int: + case Int2Float: + *thunk = true; + break; + default: + break; + } + } + + virtual void planDestination + (BinaryOperation op, + unsigned, const uint8_t* aTypeMask, const uint64_t* aRegisterMask, + unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask) + { + *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *bRegisterMask = ~static_cast(0); + + switch (op) { + case Compare: *bTypeMask = (1 << RegisterOperand); break; + case Negate: + *bTypeMask = (1 << RegisterOperand); + break; default: break; } } - virtual void plan + virtual void planSource (TernaryOperation op, unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask, - unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask, - bool* thunk) + unsigned, bool* thunk) { *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); *aRegisterMask = ~static_cast(0); @@ -1851,10 +1879,25 @@ class MyArchitecture: public Assembler::Architecture { } break; + case FloatAdd: + case FloatSubtract: + case FloatMultiply: + case FloatDivide: + case FloatRemainder: + *bTypeMask = ~0; + *thunk = true; + break; default: break; } + } + virtual void planDestination + (TernaryOperation op, + unsigned, const uint8_t*, const uint64_t*, + unsigned, const uint8_t* bTypeMask, const uint64_t* bRegisterMask, + unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask) + { *cTypeMask = *bTypeMask; *cRegisterMask = *bRegisterMask; } From 7483fa154d4181d9f68850e8011e292258a47852 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 10:01:57 -0600 Subject: [PATCH 05/16] added floating point support, instrinsics support --- src/compile.cpp | 346 +++++++++++++++++++++++++++++------------------- 1 file changed, 212 insertions(+), 134 deletions(-) diff --git a/src/compile.cpp b/src/compile.cpp index 6113452a7e..3b8637c872 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -27,7 +27,7 @@ vmCall(); namespace { -const bool DebugCompile = false; +const bool DebugCompile = true; const bool DebugNatives = false; const bool DebugCallTable = false; const bool DebugMethodTree = false; @@ -562,20 +562,86 @@ class Context { virtual intptr_t getThunk(UnaryOperation, unsigned) { abort(t); } + + virtual intptr_t getThunk(BinaryOperation op, unsigned size, unsigned resultSize) { + switch(op) { + case FloatNegate: + if (size == 4) { + return ::getThunk(t, negateFloatThunk); + } else { + return ::getThunk(t, negateDoubleThunk); + } + case Float2Float: + if (size == 4 && resultSize == 8) { + return ::getThunk(t, floatToDoubleThunk); + } else if(size == 8 && resultSize == 4) { + return ::getThunk(t, doubleToFloatThunk); + } + case Float2Int: + if (size == 4 && resultSize == 4) { + return ::getThunk(t, floatToIntThunk); + } else if(size == 4 && resultSize == 8) { + return ::getThunk(t, floatToLongThunk); + } else if(size == 8 && resultSize == 4) { + return ::getThunk(t, doubleToIntThunk); + } else if(size == 8 && resultSize == 8) { + return ::getThunk(t, doubleToLongThunk); + } + case Int2Float: + if (size == 4 && resultSize == 4) { + return ::getThunk(t, intToFloatThunk); + } else if(size == 4 && resultSize == 8) { + return ::getThunk(t, intToDoubleThunk); + } else if(size == 8 && resultSize == 4) { + return ::getThunk(t, longToFloatThunk); + } else if(size == 8 && resultSize == 8) { + return ::getThunk(t, longToDoubleThunk); + } + + default: break; + } + + abort(t); + } - virtual intptr_t getThunk(TernaryOperation op, unsigned size) { + virtual intptr_t getThunk(TernaryOperation op, unsigned size UNUSED, unsigned resultSize) { switch (op) { case Divide: - if (size == 8) { + if (resultSize == 8) { return ::getThunk(t, divideLongThunk); } break; case Remainder: - if (size == 8) { + if (resultSize == 8) { return ::getThunk(t, moduloLongThunk); } break; + + case FloatAdd: + if(resultSize == 4) { + return ::getThunk(t, addFloatThunk); + } else { + return ::getThunk(t, addDoubleThunk); + } + case FloatSubtract: + if(resultSize == 4) { + return ::getThunk(t, subtractFloatThunk); + } else { + return ::getThunk(t, subtractDoubleThunk); + } + case FloatMultiply: + if(resultSize == 4) { + return ::getThunk(t, multiplyFloatThunk); + } else { + return ::getThunk(t, multiplyDoubleThunk); + } + case FloatDivide: + if(resultSize == 4) { + return ::getThunk(t, divideFloatThunk); + } else { + return ::getThunk(t, divideDoubleThunk); + } default: break; } @@ -1008,7 +1074,7 @@ class Frame { poppedLong(); return popLongQuiet(); } - + Compiler::Operand* popObject() { poppedObject(); return popQuiet(1); @@ -2071,6 +2137,22 @@ saveStateAndCompile(MyThread* t, Frame* initialFrame, unsigned ip) initialFrame->c->restoreState(state); } +bool +isCJump(unsigned instruction) +{ + switch(instruction) { + case ifeq: + case ifne: + case ifgt: + case ifge: + case iflt: + case ifle: + return true; + default: + return false; + } +} + void compile(MyThread* t, Frame* initialFrame, unsigned ip, int exceptionHandlerStart) @@ -2084,6 +2166,8 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, object code = methodCode(t, context->method); PROTECT(t, code); + + int lastFcmpl = 1, lastFcmpg = 1; while (ip < codeLength(t, code)) { if (context->visitTable[ip] ++) { @@ -2108,6 +2192,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, 0, 1, c->thread()); } + + ++ lastFcmpl; + ++ lastFcmpg; // fprintf(stderr, "ip: %d map: %ld\n", ip, *(frame->map)); @@ -2341,63 +2428,56 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, } break; case d2f: { - frame->pushInt - (c->call - (c->constant(getThunk(t, doubleToFloatThunk)), - 0, 0, 4, 2, - static_cast(0), frame->popLong())); + frame->pushInt(c->f2f(8, 4, frame->popLong())); } break; case d2i: { - frame->pushInt - (c->call - (c->constant(getThunk(t, doubleToIntThunk)), - 0, 0, 4, 2, - static_cast(0), frame->popLong())); + frame->pushInt(c->f2i(8, 4, frame->popLong())); } break; case d2l: { - frame->pushLong - (c->call - (c->constant(getThunk(t, doubleToLongThunk)), - 0, 0, 8, 2, - static_cast(0), frame->popLong())); + frame->pushLong(c->f2i(8, 8, frame->popLong())); } break; case dadd: { Compiler::Operand* a = frame->popLong(); Compiler::Operand* b = frame->popLong(); - frame->pushLong - (c->call - (c->constant(getThunk(t, addDoubleThunk)), - 0, 0, 8, 4, - static_cast(0), a, - static_cast(0), b)); + frame->pushLong(c->fadd(8, a, b)); } break; case dcmpg: { Compiler::Operand* a = frame->popLong(); Compiler::Operand* b = frame->popLong(); - frame->pushInt - (c->call - (c->constant(getThunk(t, compareDoublesGThunk)), - 0, 0, 4, 4, - static_cast(0), a, - static_cast(0), b)); + if(t->arch->supportsFloatCompare(8) && isCJump(codeBody(t, code, ip))) { + c->fcmp(8, a, b); + lastFcmpg = 0; + } else { + frame->pushInt + (c->call + (c->constant(getThunk(t, compareDoublesGThunk)), + 0, 0, 4, 4, + static_cast(0), a, + static_cast(0), b)); + } } break; case dcmpl: { Compiler::Operand* a = frame->popLong(); Compiler::Operand* b = frame->popLong(); - frame->pushInt - (c->call - (c->constant(getThunk(t, compareDoublesLThunk)), - 0, 0, 4, 4, - static_cast(0), a, - static_cast(0), b)); + if(t->arch->supportsFloatCompare(8) && isCJump(codeBody(t, code, ip))) { + c->fcmp(8, a, b); + lastFcmpl = 0; + } else { + frame->pushInt + (c->call + (c->constant(getThunk(t, compareDoublesLThunk)), + 0, 0, 4, 4, + static_cast(0), a, + static_cast(0), b)); + } } break; case dconst_0: @@ -2412,56 +2492,32 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, Compiler::Operand* a = frame->popLong(); Compiler::Operand* b = frame->popLong(); - frame->pushLong - (c->call - (c->constant(getThunk(t, divideDoubleThunk)), - 0, 0, 8, 4, - static_cast(0), a, - static_cast(0), b)); + frame->pushLong(c->fdiv(8, a, b)); } break; case dmul: { Compiler::Operand* a = frame->popLong(); Compiler::Operand* b = frame->popLong(); - frame->pushLong - (c->call - (c->constant(getThunk(t, multiplyDoubleThunk)), - 0, 0, 8, 4, - static_cast(0), a, - static_cast(0), b)); + frame->pushLong(c->fmul(8, a, b)); } break; case dneg: { - frame->pushLong - (c->call - (c->constant(getThunk(t, negateDoubleThunk)), - 0, 0, 8, 2, - static_cast(0), frame->popLong())); + frame->pushLong(c->fneg(8, frame->popLong())); } break; case vm::drem: { Compiler::Operand* a = frame->popLong(); Compiler::Operand* b = frame->popLong(); - frame->pushLong - (c->call - (c->constant(getThunk(t, moduloDoubleThunk)), - 0, 0, 8, 4, - static_cast(0), a, - static_cast(0), b)); + frame->pushLong(c->frem(8, a, b)); } break; case dsub: { Compiler::Operand* a = frame->popLong(); Compiler::Operand* b = frame->popLong(); - frame->pushLong - (c->call - (c->constant(getThunk(t, subtractDoubleThunk)), - 0, 0, 8, 4, - static_cast(0), a, - static_cast(0), b)); + frame->pushLong(c->fsub(8, a, b)); } break; case dup: @@ -2489,54 +2545,52 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, break; case f2d: { - frame->pushLong - (c->call - (c->constant(getThunk(t, floatToDoubleThunk)), - 0, 0, 8, 1, frame->popInt())); + frame->pushLong(c->f2f(4, 8, frame->popInt())); } break; case f2i: { - frame->pushInt - (c->call - (c->constant(getThunk(t, floatToIntThunk)), - 0, 0, 4, 1, frame->popInt())); + frame->pushInt(c->f2i(4, 4, frame->popInt())); } break; case f2l: { - frame->pushLong - (c->call - (c->constant(getThunk(t, floatToLongThunk)), - 0, 0, 8, 1, frame->popInt())); + frame->pushLong(c->f2i(4, 8, frame->popInt())); } break; case fadd: { Compiler::Operand* a = frame->popInt(); Compiler::Operand* b = frame->popInt(); - frame->pushInt - (c->call - (c->constant(getThunk(t, addFloatThunk)), - 0, 0, 4, 2, a, b)); + frame->pushInt(c->fadd(4, a, b)); } break; case fcmpg: { Compiler::Operand* a = frame->popInt(); Compiler::Operand* b = frame->popInt(); - frame->pushInt - (c->call - (c->constant(getThunk(t, compareFloatsGThunk)), - 0, 0, 4, 2, a, b)); + if(t->arch->supportsFloatCompare(4) && isCJump(codeBody(t, code, ip))) { + c->fcmp(4, a, b); + lastFcmpg = 0; + } else { + frame->pushInt + (c->call + (c->constant(getThunk(t, compareFloatsGThunk)), + 0, 0, 4, 2, a, b)); + } } break; case fcmpl: { Compiler::Operand* a = frame->popInt(); Compiler::Operand* b = frame->popInt(); - frame->pushInt - (c->call - (c->constant(getThunk(t, compareFloatsLThunk)), - 0, 0, 4, 2, a, b)); + if(t->arch->supportsFloatCompare(4) && isCJump(codeBody(t, code, ip))) { + c->fcmp(4, a, b); + lastFcmpl = 0; + } else { + frame->pushInt + (c->call + (c->constant(getThunk(t, compareFloatsLThunk)), + 0, 0, 4, 2, a, b)); + } } break; case fconst_0: @@ -2555,47 +2609,32 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, Compiler::Operand* a = frame->popInt(); Compiler::Operand* b = frame->popInt(); - frame->pushInt - (c->call - (c->constant(getThunk(t, divideFloatThunk)), - 0, 0, 4, 2, a, b)); + frame->pushInt(c->fdiv(4, a, b)); } break; case fmul: { Compiler::Operand* a = frame->popInt(); Compiler::Operand* b = frame->popInt(); - frame->pushInt - (c->call - (c->constant(getThunk(t, multiplyFloatThunk)), - 0, 0, 4, 2, a, b)); + frame->pushInt(c->fmul(4, a, b)); } break; case fneg: { - frame->pushInt - (c->call - (c->constant(getThunk(t, negateFloatThunk)), - 0, 0, 4, 1, frame->popInt())); + frame->pushInt(c->fneg(4, frame->popInt())); } break; case vm::frem: { Compiler::Operand* a = frame->popInt(); Compiler::Operand* b = frame->popInt(); - frame->pushInt - (c->call - (c->constant(getThunk(t, moduloFloatThunk)), - 0, 0, 4, 2, a, b)); + frame->pushInt(c->frem(4, a, b)); } break; case fsub: { Compiler::Operand* a = frame->popInt(); Compiler::Operand* b = frame->popInt(); - frame->pushInt - (c->call - (c->constant(getThunk(t, subtractFloatThunk)), - 0, 0, 4, 2, a, b)); + frame->pushInt(c->fsub(4, a, b)); } break; case getfield: @@ -2731,17 +2770,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, } break; case i2d: { - frame->pushLong - (c->call - (c->constant(getThunk(t, intToDoubleThunk)), - 0, 0, 8, 1, frame->popInt())); + frame->pushLong(c->i2f(4, 8, frame->popInt())); } break; case i2f: { - frame->pushInt - (c->call - (c->constant(getThunk(t, intToFloatThunk)), - 0, 0, 4, 1, frame->popInt())); + frame->pushInt(c->i2f(4, 4, frame->popInt())); } break; case i2l: @@ -2869,27 +2902,48 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, uint32_t newIp = (ip - 3) + offset; assert(t, newIp < codeLength(t, code)); - Compiler::Operand* a = frame->popInt(); Compiler::Operand* target = frame->machineIp(newIp); + Compiler::Operand* cont = frame->machineIp(ip); - c->cmp(4, c->constant(0), a); + if(lastFcmpl != 1 && lastFcmpg != 1) { + Compiler::Operand* a = frame->popInt(); + c->cmp(4, c->constant(0), a); + } switch (instruction) { case ifeq: + if(lastFcmpl == 1 || lastFcmpg == 1) { + c->juo(cont); + } c->je(target); break; case ifne: + if(lastFcmpl == 1 || lastFcmpg == 1) { + c->juo(cont); + } c->jne(target); break; case ifgt: + if(lastFcmpl == 1) { + c->juo(cont); + } c->jg(target); break; case ifge: + if(lastFcmpl == 1) { + c->juo(cont); + } c->jge(target); break; case iflt: + if(lastFcmpg == 1) { + c->juo(cont); + } c->jl(target); break; case ifle: + if(lastFcmpg == 1) { + c->juo(cont); + } c->jle(target); break; } @@ -3033,8 +3087,40 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, if (UNLIKELY(t->exception)) return; assert(t, methodFlags(t, target) & ACC_STATIC); - - compileDirectInvoke(t, frame, target); + int params = methodParameterCount(t, target); + if(params == 1) {//TODO: Get number of method params + BinaryOperation op = t->arch->hasBinaryIntrinsic(t, target); + if(op != NoBinaryOperation) { + printf("Could use binary intrinsic %i.\n", op); + int opSize = methodParameterFootprint(t, target) * BytesPerWord; + int resSize = resultSize(t, methodReturnCode(t, target)); + Compiler::Operand* param; + if(opSize == 4) { + param = frame->popInt(); + } else { + param = frame->popLong(); + } + if(resSize == 4) { + frame->pushInt(c->operation(op, opSize, resSize, param)); + } else { + frame->pushLong(c->operation(op, opSize, resSize, param)); + } + } else { + compileDirectInvoke(t, frame, target); + } + } else if(params == 2) { //TODO: Get number of method params + TernaryOperation op = t->arch->hasTernaryIntrinsic(t, target); + if(op != NoTernaryOperation) { + printf("Could use ternary intrinsic %i.\n", op); + //int aSize, bSize; + //int resSize = resultSize(t, methodReturnCode(t, target)); + compileDirectInvoke(t, frame, target); //TODO: use intrinsic + } else { + compileDirectInvoke(t, frame, target); + } + } else { + compileDirectInvoke(t, frame, target); + } } break; case invokevirtual: { @@ -3187,19 +3273,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, } break; case l2d: { - frame->pushLong - (c->call - (c->constant(getThunk(t, longToDoubleThunk)), - 0, 0, 8, 2, - static_cast(0), frame->popLong())); + frame->pushLong(c->i2f(8, 8, frame->popLong())); } break; case l2f: { - frame->pushInt - (c->call - (c->constant(getThunk(t, longToFloatThunk)), - 0, 0, 4, 2, - static_cast(0), frame->popLong())); + frame->pushInt(c->i2f(8, 4, frame->popLong())); } break; case l2i: @@ -4095,7 +4173,6 @@ clearBit(MyThread* t, object map, unsigned count, unsigned size, unsigned i, intArrayBody(t, map, count + (index / 32)) &= ~(static_cast(1) << (index % 32)); } - uint8_t* finish(MyThread* t, Allocator* allocator, Context* context) { @@ -4260,6 +4337,7 @@ finish(MyThread* t, Allocator* allocator, Context* context) "printStackTrace") == 0) { trap(); + printf("Address: %p\n", ::vmAddressFromLine(t, (object)(context->method), 1176)); } syncInstructionCache(start, codeSize); From 53c0656ee7225c4c80374524af2ba8a0100e1e39 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 10:14:31 -0600 Subject: [PATCH 06/16] added floating point support, split plan method --- src/compiler.cpp | 333 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 256 insertions(+), 77 deletions(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index f48fa446a6..6ef7751355 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -286,11 +286,16 @@ intersect(const SiteMask& a, const SiteMask& b) intersectFrameIndexes(a.frameIndex, b.frameIndex)); } +enum ValueType { + ValueGeneral, + ValueFloat +}; + class Value: public Compiler::Operand { public: Value(Site* site, Site* target): reads(0), lastRead(0), sites(site), source(0), target(target), buddy(this), - high(0), home(NoFrameIndex) + high(0), home(NoFrameIndex), type(ValueGeneral) { } virtual void addPredecessor(Context*, Event*) { } @@ -303,6 +308,7 @@ class Value: public Compiler::Operand { Value* buddy; Value* high; int8_t home; + ValueType type; }; class Context { @@ -338,12 +344,19 @@ class Context { machineCodeSize(0), alignedFrameSize(0), availableRegisterCount(arch->registerCount()), + floatRegisterCount(arch->floatRegisterCount()), + generalRegisterCount(arch->generalRegisterCount()), constantCompare(CompareNone) { for (unsigned i = 0; i < arch->registerCount(); ++i) { new (registerResources + i) RegisterResource(arch->reserved(i)); if (registerResources[i].reserved) { -- availableRegisterCount; + if (arch->generalRegisters() & (1 << i)) { + -- generalRegisterCount; + } else if (arch->floatRegisters() & (1 << i)) { + -- floatRegisterCount; + } } } } @@ -375,6 +388,8 @@ class Context { unsigned machineCodeSize; unsigned alignedFrameSize; unsigned availableRegisterCount; + unsigned floatRegisterCount; + unsigned generalRegisterCount; ConstantCompare constantCompare; }; @@ -949,20 +964,41 @@ buddies(Value* a, Value* b) } void -decrementAvailableRegisterCount(Context* c) +decrementAvailableRegisterCount(Context* c, Value* v) { assert(c, c->availableRegisterCount); -- c->availableRegisterCount; + if (v) { + if (v->type == ValueGeneral) { + -- c->generalRegisterCount; + } else if (v->type == ValueFloat) { + -- c->floatRegisterCount; + } + } else { + -- c->generalRegisterCount; + } + + if (DebugResources) { - fprintf(stderr, "%d registers available\n", c->availableRegisterCount); + fprintf(stderr, "%d registers available - %d float, %d general\n", c->availableRegisterCount, c->floatRegisterCount, c->generalRegisterCount); } } void -incrementAvailableRegisterCount(Context* c) +incrementAvailableRegisterCount(Context* c, Value* v) { ++ c->availableRegisterCount; + + if (v) { + if (v->type == ValueGeneral) { + ++ c->generalRegisterCount; + } else if (v->type == ValueFloat) { + ++ c->floatRegisterCount; + } + } else { + ++ c->generalRegisterCount; + } if (DebugResources) { fprintf(stderr, "%d registers available\n", c->availableRegisterCount); @@ -981,7 +1017,7 @@ increment(Context* c, RegisterResource* r) ++ r->referenceCount; if (r->referenceCount == 1) { - decrementAvailableRegisterCount(c); + decrementAvailableRegisterCount(c, r->value); } } } @@ -1000,7 +1036,7 @@ decrement(Context* c, Resource* r) -- r->referenceCount; if (r->referenceCount == 0) { - incrementAvailableRegisterCount(c); + incrementAvailableRegisterCount(c, r->value); } } } @@ -1023,7 +1059,7 @@ RegisterResource::freeze(Context* c, Value* v) freezeResource(c, this, v); if (freezeCount == 1) { - decrementAvailableRegisterCount(c); + decrementAvailableRegisterCount(c, v); } } } @@ -1056,7 +1092,7 @@ RegisterResource::thaw(Context* c, Value* v) thawResource(c, this, v); if (freezeCount == 0) { - incrementAvailableRegisterCount(c); + incrementAvailableRegisterCount(c, v); } } } @@ -1112,6 +1148,13 @@ pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost) { int target = NoRegister; unsigned bestCost = Target::Impossible; + if (v) { + if (v->type == ValueFloat) { + mask &= (c->arch->floatRegisters() | c->arch->generalRegisters()); + } else if(v->type == ValueGeneral) { + mask &= c->arch->generalRegisters(); + } + } for (int i = c->arch->registerCount() - 1; i >= 0; --i) { if ((1 << i) & mask) { RegisterResource* r = c->registerResources + i; @@ -1190,15 +1233,30 @@ pickTarget(Context* c, Read* read, bool intersectRead, SiteMask mask; read->intersect(&mask); - unsigned registerPenalty = (c->availableRegisterCount > registerReserveCount + unsigned registerPenalty; + if(read->value) { + if(read->value->type == ValueGeneral) { + registerPenalty = (c->generalRegisterCount > registerReserveCount ? 0 : Target::Penalty); + } else if(read->value->type == ValueFloat) { + registerPenalty = (c->floatRegisterCount > registerReserveCount + ? 0 : Target::Penalty); + } else { + registerPenalty = (c->availableRegisterCount > registerReserveCount + ? 0 : Target::Penalty); + } + } else { + registerPenalty = (c->availableRegisterCount > registerReserveCount + ? 0 : Target::Penalty); + } + Target best; if ((mask.typeMask & (1 << RegisterOperand))) { Target mine = pickRegisterTarget(c, read->value, mask.registerMask); mine.cost += registerPenalty; - + if(mine.cost == Target::Impossible) asm("int3"); if (mine.cost == 0) { return mine; } else if (mine.cost < best.cost) { @@ -1916,17 +1974,17 @@ read(Context* c, const SiteMask& mask) } Read* -anyRegisterRead(Context* c) +generalRegisterRead(Context* c) { - return read(c, SiteMask(1 << RegisterOperand, ~0, NoFrameIndex)); + return read(c, SiteMask(1 << RegisterOperand, c->arch->generalRegisters(), NoFrameIndex)); } Read* -registerOrConstantRead(Context* c) +generalRegisterOrConstantRead(Context* c) { return read (c, SiteMask - ((1 << RegisterOperand) | (1 << ConstantOperand), ~0, NoFrameIndex)); + ((1 << RegisterOperand) | (1 << ConstantOperand), c->arch->generalRegisters(), NoFrameIndex)); } Read* @@ -2524,12 +2582,9 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize, bool thunk; uint8_t srcTypeMask; uint64_t srcRegisterMask; - uint8_t dstTypeMask; - uint64_t dstRegisterMask; - c->arch->plan(type, dstSize, &srcTypeMask, &srcRegisterMask, - dstSize, &dstTypeMask, &dstRegisterMask, - &thunk); + c->arch->planSource(type, dstSize, &srcTypeMask, &srcRegisterMask, + dstSize, &thunk); assert(c, dstMask.typeMask & srcTypeMask & (1 << RegisterOperand)); @@ -2758,12 +2813,14 @@ appendMove(Context* c, BinaryOperation type, unsigned srcSize, uint8_t dstTypeMask; uint64_t dstRegisterMask; - c->arch->plan(type, srcSelectSize, &srcTypeMask, &srcRegisterMask, - dstSize, &dstTypeMask, &dstRegisterMask, - &thunk); + c->arch->planSource(type, srcSelectSize, &srcTypeMask, &srcRegisterMask, + dstSize, &thunk); assert(c, not thunk); + c->arch->planDestination(type, srcSelectSize, &srcTypeMask, &srcRegisterMask, + dstSize, &dstTypeMask, &dstRegisterMask); + append(c, new (c->zone->allocate(sizeof(MoveEvent))) MoveEvent (c, type, srcSize, srcSelectSize, src, dstSize, dst, @@ -2787,10 +2844,11 @@ findConstantSite(Context* c, Value* v) class CompareEvent: public Event { public: - CompareEvent(Context* c, unsigned size, Value* first, Value* second, + CompareEvent(Context* c, BinaryOperation type, unsigned size, Value* first, Value* second, const SiteMask& firstMask, const SiteMask& secondMask): - Event(c), size(size), first(first), second(second) + Event(c), type(type), size(size), first(first), second(second) { + assert(c, type != FloatCompare || (first->type == ValueFloat && first->type == ValueFloat)); addRead(c, this, first, read(c, firstMask)); addRead(c, this, second, read(c, secondMask)); } @@ -2817,20 +2875,21 @@ class CompareEvent: public Event { } else { c->constantCompare = CompareNone; - apply(c, Compare, size, first->source, 0, size, second->source, 0); + apply(c, type, size, first->source, 0, size, second->source, 0); } popRead(c, this, first); popRead(c, this, second); } - + + BinaryOperation type; unsigned size; Value* first; Value* second; }; void -appendCompare(Context* c, unsigned size, Value* first, Value* second) +appendCompare(Context* c, BinaryOperation op, unsigned size, Value* first, Value* second) { bool thunk; uint8_t firstTypeMask; @@ -2838,15 +2897,17 @@ appendCompare(Context* c, unsigned size, Value* first, Value* second) uint8_t secondTypeMask; uint64_t secondRegisterMask; - c->arch->plan(Compare, size, &firstTypeMask, &firstRegisterMask, - size, &secondTypeMask, &secondRegisterMask, - &thunk); + c->arch->planSource(op, size, &firstTypeMask, &firstRegisterMask, + size, &thunk); assert(c, not thunk); // todo + c->arch->planDestination(op, size, &firstTypeMask, &firstRegisterMask, + size, &secondTypeMask, &secondRegisterMask); + append(c, new (c->zone->allocate(sizeof(CompareEvent))) CompareEvent - (c, size, first, second, + (c, op, size, first, second, SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex), SiteMask(secondTypeMask, secondRegisterMask, AnyFrameIndex))); } @@ -2867,7 +2928,7 @@ getTarget(Context* c, Value* value, Value* result, const SiteMask& resultMask) Site* s; Value* v; Read* r = liveNext(c, value); - if (c->arch->condensedAddressing() or r == 0) { + if (r == 0 and value->source->match(c, static_cast(resultMask))) { s = value->source; v = value; if (r and not hasMoreThanOneSite(v)) { @@ -2911,6 +2972,13 @@ thawSource(Context* c, unsigned size, Value* v) } } +uint64_t +registerMask(Value* v) { + Site* s = source(v); + if(!s) return 0; + else return static_cast(1) << ((RegisterSite*)s)->number; +} + class CombineEvent: public Event { public: CombineEvent(Context* c, TernaryOperation type, @@ -2920,13 +2988,10 @@ class CombineEvent: public Event { const SiteMask& firstLowMask, const SiteMask& firstHighMask, const SiteMask& secondLowMask, - const SiteMask& secondHighMask, - const SiteMask& resultLowMask, - const SiteMask& resultHighMask): + const SiteMask& secondHighMask): Event(c), type(type), firstSize(firstSize), first(first), secondSize(secondSize), second(second), resultSize(resultSize), - result(result), resultLowMask(resultLowMask), - resultHighMask(resultHighMask) + result(result) { addRead(c, this, first, read(c, firstLowMask)); if (firstSize > BytesPerWord) { @@ -2949,6 +3014,17 @@ class CombineEvent: public Event { virtual void compile(Context* c) { freezeSource(c, firstSize, first); + + uint8_t aTypeMask = first->source->type(c); + uint8_t bTypeMask = second->source->type(c); + uint8_t cTypeMask; + uint64_t aRegisterMask = (registerMask(first->high) << 32) | registerMask(first); + uint64_t bRegisterMask = (registerMask(second->high) << 32) | registerMask(second); + uint64_t cRegisterMask; + + c->arch->planDestination(type, firstSize, &aTypeMask, &aRegisterMask, secondSize, &bTypeMask, &bRegisterMask, resultSize, &cTypeMask, &cRegisterMask); + SiteMask resultLowMask(cTypeMask, cRegisterMask, AnyFrameIndex); + SiteMask resultHighMask(cTypeMask, cRegisterMask >> 32, AnyFrameIndex); Site* low = getTarget(c, second, result, resultLowMask); Site* high @@ -2987,8 +3063,6 @@ class CombineEvent: public Event { Value* second; unsigned resultSize; Value* result; - SiteMask resultLowMask; - SiteMask resultHighMask; }; void @@ -3284,13 +3358,10 @@ appendCombine(Context* c, TernaryOperation type, uint64_t firstRegisterMask; uint8_t secondTypeMask; uint64_t secondRegisterMask; - uint8_t resultTypeMask; - uint64_t resultRegisterMask; - c->arch->plan(type, firstSize, &firstTypeMask, &firstRegisterMask, + c->arch->planSource(type, firstSize, &firstTypeMask, &firstRegisterMask, secondSize, &secondTypeMask, &secondRegisterMask, - resultSize, &resultTypeMask, &resultRegisterMask, - &thunk); + resultSize, &thunk); if (thunk) { Stack* oldStack = c->stack; @@ -3302,7 +3373,7 @@ appendCombine(Context* c, TernaryOperation type, c->stack = oldStack; appendCall - (c, value(c, constantSite(c, c->client->getThunk(type, resultSize))), + (c, value(c, constantSite(c, c->client->getThunk(type, firstSize, resultSize))), 0, 0, result, resultSize, argumentStack, ceiling(secondSize, BytesPerWord) + ceiling(firstSize, BytesPerWord), 0); @@ -3317,22 +3388,17 @@ appendCombine(Context* c, TernaryOperation type, SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex), SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex), SiteMask(secondTypeMask, secondRegisterMask, AnyFrameIndex), - SiteMask(secondTypeMask, secondRegisterMask >> 32, AnyFrameIndex), - SiteMask(resultTypeMask, resultRegisterMask, AnyFrameIndex), - SiteMask(resultTypeMask, resultRegisterMask >> 32, AnyFrameIndex))); + SiteMask(secondTypeMask, secondRegisterMask >> 32, AnyFrameIndex))); } } class TranslateEvent: public Event { public: - TranslateEvent(Context* c, BinaryOperation type, unsigned size, Value* value, + TranslateEvent(Context* c, BinaryOperation type, unsigned size, unsigned resSize, Value* value, Value* result, const SiteMask& valueLowMask, - const SiteMask& valueHighMask, - const SiteMask& resultLowMask, - const SiteMask& resultHighMask): - Event(c), type(type), size(size), value(value), result(result), - resultLowMask(resultLowMask), resultHighMask(resultHighMask) + const SiteMask& valueHighMask): + Event(c), type(type), size(size), resSize(resSize), value(value), result(result) { addRead(c, this, value, read(c, valueLowMask)); if (size > BytesPerWord) { @@ -3346,6 +3412,15 @@ class TranslateEvent: public Event { } virtual void compile(Context* c) { + uint8_t aTypeMask = value->source->type(c); + uint8_t bTypeMask; + uint64_t aRegisterMask = (registerMask(value->high) << 32) | registerMask(value); + uint64_t bRegisterMask; + + c->arch->planDestination(type, size, &aTypeMask, &aRegisterMask, resSize, &bTypeMask, &bRegisterMask); + SiteMask resultLowMask(bTypeMask, bRegisterMask, AnyFrameIndex); + SiteMask resultHighMask(bTypeMask, bRegisterMask >> 32, AnyFrameIndex); + Site* low = getTarget(c, value, result, resultLowMask); Site* high = (size > BytesPerWord @@ -3375,6 +3450,7 @@ class TranslateEvent: public Event { BinaryOperation type; unsigned size; + unsigned resSize; Value* value; Value* result; Read* resultRead; @@ -3383,28 +3459,35 @@ class TranslateEvent: public Event { }; void -appendTranslate(Context* c, BinaryOperation type, unsigned size, Value* value, - Value* result) +appendTranslate(Context* c, BinaryOperation type, unsigned firstSize, Value* first, + unsigned resultSize, Value* result) { bool thunk; uint8_t firstTypeMask; uint64_t firstRegisterMask; - uint8_t resultTypeMask; - uint64_t resultRegisterMask; - c->arch->plan(type, size, &firstTypeMask, &firstRegisterMask, - size, &resultTypeMask, &resultRegisterMask, - &thunk); + c->arch->planSource(type, firstSize, &firstTypeMask, &firstRegisterMask, + resultSize, &thunk); - assert(c, not thunk); // todo + if (thunk) { + Stack* oldStack = c->stack; - append(c, new (c->zone->allocate(sizeof(TranslateEvent))) - TranslateEvent - (c, type, size, value, result, - SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex), - SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex), - SiteMask(resultTypeMask, resultRegisterMask, AnyFrameIndex), - SiteMask(resultTypeMask, resultRegisterMask >> 32, AnyFrameIndex))); + ::push(c, ceiling(firstSize, BytesPerWord), first); + + Stack* argumentStack = c->stack; + c->stack = oldStack; + + appendCall + (c, value(c, constantSite(c, c->client->getThunk(type, firstSize, resultSize))), + 0, 0, result, resultSize, argumentStack, + ceiling(firstSize, BytesPerWord), 0); + } else { + append(c, new (c->zone->allocate(sizeof(TranslateEvent))) + TranslateEvent + (c, type, firstSize, resultSize, first, result, + SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex), + SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex))); + } } class BarrierEvent: public Event { @@ -3437,9 +3520,9 @@ class MemoryEvent: public Event { Event(c), base(base), displacement(displacement), index(index), scale(scale), result(result) { - addRead(c, this, base, anyRegisterRead(c)); + addRead(c, this, base, generalRegisterRead(c)); if (index) { - addRead(c, this, index, registerOrConstantRead(c)); + addRead(c, this, index, generalRegisterOrConstantRead(c)); } } @@ -3617,8 +3700,8 @@ class BoundsCheckEvent: public Event { Event(c), object(object), lengthOffset(lengthOffset), index(index), handler(handler) { - addRead(c, this, object, anyRegisterRead(c)); - addRead(c, this, index, registerOrConstantRead(c)); + addRead(c, this, object, generalRegisterRead(c)); + addRead(c, this, index, generalRegisterOrConstantRead(c)); } virtual const char* name() { @@ -4322,10 +4405,8 @@ populateSources(Context* c, Event* e) { SiteRecord frozenRecords[e->readCount]; SiteRecordList frozen(frozenRecords, e->readCount); - for (Read* r = e->reads; r; r = r->eventNext) { r->value->source = readSource(c, r); - if (r->value->source) { if (DebugReads) { char buffer[256]; r->value->source->toString(c, buffer, 256); @@ -5188,10 +5269,20 @@ class MyCompiler: public Compiler { } virtual void cmp(unsigned size, Operand* a, Operand* b) { - appendCompare(&c, size, static_cast(a), + appendCompare(&c, Compare, size, static_cast(a), static_cast(b)); } + virtual void fcmp(unsigned size, Operand* a, Operand* b) { + static_cast(a)->type = ValueFloat; + static_cast(b)->type = ValueFloat; + appendCompare(&c, FloatCompare, size, static_cast(a), + static_cast(b)); + //static_cast(a)->type = ValueGeneral; + //static_cast(b)->type = ValueGeneral; + } + + virtual void jl(Operand* address) { appendBranch(&c, JumpIfLess, static_cast(address)); } @@ -5215,6 +5306,10 @@ class MyCompiler: public Compiler { virtual void jne(Operand* address) { appendBranch(&c, JumpIfNotEqual, static_cast(address)); } + + virtual void juo(Operand* address) { + appendBranch(&c, JumpIfUnordered, static_cast(address)); + } virtual void jmp(Operand* address) { appendBranch(&c, Jump, static_cast(address)); @@ -5255,6 +5350,46 @@ class MyCompiler: public Compiler { return result; } + virtual Operand* fadd(unsigned size, Operand* a, Operand* b) { + Value* result = value(&c); + static_cast(a)->type = static_cast(b)->type = ValueFloat; + appendCombine(&c, FloatAdd, size, static_cast(a), + size, static_cast(b), size, result); + return result; + } + + virtual Operand* fsub(unsigned size, Operand* a, Operand* b) { + Value* result = value(&c); + static_cast(a)->type = static_cast(b)->type = ValueFloat; + appendCombine(&c, FloatSubtract, size, static_cast(a), + size, static_cast(b), size, result); + return result; + } + + virtual Operand* fmul(unsigned size, Operand* a, Operand* b) { + Value* result = value(&c); + static_cast(a)->type = static_cast(b)->type = ValueFloat; + appendCombine(&c, FloatMultiply, size, static_cast(a), + size, static_cast(b), size, result); + return result; + } + + virtual Operand* fdiv(unsigned size, Operand* a, Operand* b) { + Value* result = value(&c); + static_cast(a)->type = static_cast(b)->type = ValueFloat; + appendCombine(&c, FloatDivide, size, static_cast(a), + size, static_cast(b), size, result); + return result; + } + + virtual Operand* frem(unsigned size, Operand* a, Operand* b) { + Value* result = value(&c); + static_cast(a)->type = static_cast(b)->type = ValueFloat; + appendCombine(&c, FloatRemainder, size, static_cast(a), + size, static_cast(b), size, result); + return result; + } + virtual Operand* shl(unsigned size, Operand* a, Operand* b) { Value* result = value(&c); appendCombine(&c, ShiftLeft, BytesPerWord, static_cast(a), @@ -5299,7 +5434,51 @@ class MyCompiler: public Compiler { virtual Operand* neg(unsigned size, Operand* a) { Value* result = value(&c); - appendTranslate(&c, Negate, size, static_cast(a), result); + appendTranslate(&c, Negate, size, static_cast(a), size, result); + return result; + } + + virtual Operand* fneg(unsigned size, Operand* a) { + Value* result = value(&c); + static_cast(a)->type = ValueFloat; + appendTranslate(&c, FloatNegate, size, static_cast(a), size, result); + return result; + } + + virtual Operand* operation(BinaryOperation op, unsigned aSize, unsigned resSize, Operand* a) { + Value* result = value(&c); + static_cast(a)->type = ValueFloat; + appendTranslate(&c, op, aSize, static_cast(a), resSize, result); + return result; + } + + virtual Operand* operation(TernaryOperation op, unsigned aSize, unsigned bSize, unsigned resSize, Operand* a, Operand* b) { + Value* result = value(&c); + static_cast(a)->type = static_cast(b)->type = ValueFloat; + appendCombine(&c, op, aSize, static_cast(a), + bSize, static_cast(b), resSize, result); + return result; + } + + virtual Operand* f2f(unsigned aSize, unsigned resSize, Operand* a) { + Value* result = value(&c); + static_cast(a)->type = ValueFloat; + appendTranslate(&c, Float2Float, aSize, static_cast(a), resSize, result); + return result; + } + + virtual Operand* f2i(unsigned aSize, unsigned resSize, Operand* a) { + Value* result = value(&c); + static_cast(a)->type = ValueFloat; + appendTranslate(&c, Float2Int, aSize, static_cast(a), resSize, result); + return result; + } + + virtual Operand* i2f(unsigned aSize, unsigned resSize, Operand* a) { + Value* result = value(&c); + //result->type = ValueFloat; + appendTranslate(&c, Int2Float, aSize, static_cast(a), resSize, result); + //result->type = ValueGeneral; return result; } From 61bc7299743047ffc6c8f7995fe343ced3556189 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 10:17:48 -0600 Subject: [PATCH 07/16] added floating point support, split plan method --- src/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler.h b/src/compiler.h index b2a702596d..8d78720e25 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -21,7 +21,7 @@ class Compiler { public: class Client { public: - virtual intptr_t getThunk(UnaryOperation op, unsigned size, unsigned resultSize) = 0; + virtual intptr_t getThunk(UnaryOperation op, unsigned size) = 0; virtual intptr_t getThunk(BinaryOperation op, unsigned size, unsigned resultSize) = 0; virtual intptr_t getThunk(TernaryOperation op, unsigned size, unsigned resultSize) = 0; }; From a2e639a2d21498c2e11f5cdc5a380f1c274e6897 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 10:26:22 -0600 Subject: [PATCH 08/16] added floating point support, split plan function --- src/x86.cpp | 690 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 639 insertions(+), 51 deletions(-) diff --git a/src/x86.cpp b/src/x86.cpp index 7fc7a343dc..c57e3b42fe 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -13,10 +13,15 @@ #include "assembler.h" #include "vector.h" +#include "machine.h" #define CAST1(x) reinterpret_cast(x) #define CAST2(x) reinterpret_cast(x) +const bool DebugSSE = false; +const bool EnableSSE = true; +const bool EnableSSE2 = true; + using namespace vm; namespace { @@ -40,6 +45,28 @@ enum { r15 = 15, }; +enum { + xmm0 = r15 + 1, + xmm1, + xmm2, + xmm3, + xmm4, + xmm5, + xmm6, + xmm7, + xmm8, + xmm9, + xmm10, + xmm11, + xmm12, + xmm13, + xmm14, + xmm15, +}; + +const unsigned GeneralRegisterMask = BytesPerWord == 4 ? 0x000000ff : 0x0000ffff; +const unsigned FloatRegisterMask = BytesPerWord == 4 ? 0x00ff0000 : 0xffff0000; + const unsigned FrameHeaderSize = 2; inline bool @@ -399,6 +426,35 @@ padding(AlignmentPadding* p, unsigned start, unsigned offset, return padding; } +extern "C" +bool detectFeature(unsigned ecx, unsigned edx); + +inline bool +supportsSSE() +{ + static int supported = -1; + if(supported == -1) { + supported = EnableSSE && detectFeature(0, 0x2000000); + if(DebugSSE) { + fprintf(stderr, "sse %sdetected.\n", supported ? "" : "not "); + } + } + return supported; +} + +inline bool +supportsSSE2() +{ + static int supported = -1; + if(supported == -1) { + supported = EnableSSE2 && detectFeature(0, 0x4000000); + if(DebugSSE) { + fprintf(stderr, "sse2 %sdetected.\n", supported ? "" : "not "); + } + } + return supported; +} + #define REX_W 0x48 #define REX_R 0x44 #define REX_X 0x42 @@ -503,6 +559,12 @@ inline void opcode(Context* c, uint8_t op1, uint8_t op2) { c->code.append(op2); } +inline void opcode(Context* c, uint8_t op1, uint8_t op2, uint8_t op3) { + c->code.append(op1); + c->code.append(op2); + c->code.append(op3); +} + void return_(Context* c) { @@ -667,6 +729,14 @@ jumpIfLessOrEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a) conditional(c, 0x8e, a); } +void +jumpIfUnorderedC(Context* c, unsigned size UNUSED, Assembler::Constant* a) +{ + assert(c, size == BytesPerWord); + + conditional(c, 0x8a, a); +} + void longJumpC(Context* c, unsigned size, Assembler::Constant* a) { @@ -806,11 +876,59 @@ moveCR2(Context* c, UNUSED unsigned aSize, Assembler::Constant* a, } } +inline bool floatReg(Assembler::Register* a) { + return a->low >= xmm0; +} + +void +sseMoveRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + if(floatReg(a) && floatReg(b)) { + if(aSize == 4) { + opcode(c, 0xf3); + maybeRex(c, 4, a, b); + opcode(c, 0x0f, 0x10); + modrm(c, 0xc0, b, a); + } else { + opcode(c, 0xf2); + maybeRex(c, 4, a, b); + opcode(c, 0x0f, 0x10); + modrm(c, 0xc0, b, a); + } + } else if(floatReg(a)) { + opcode(c, 0x66); + maybeRex(c, aSize, a, b); + opcode(c, 0x0f, 0x7e); + modrm(c, 0xc0, b, a); + } else { + opcode(c, 0x66); + maybeRex(c, aSize, a, b); + opcode(c, 0x0f, 0x6e); + modrm(c, 0xc0, a, b); + } +} + +void +sseMoveCR(Context* c, unsigned aSize, Assembler::Constant* a, + unsigned bSize, Assembler::Register* b) +{ + assert(c, aSize <= BytesPerWord); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); + moveCR2(c, aSize, a, aSize, &tmp, 0); + sseMoveRR(c, aSize, &tmp, bSize, b); + c->client->releaseTemporary(tmp.low); +} + void moveCR(Context* c, unsigned aSize, Assembler::Constant* a, unsigned bSize, Assembler::Register* b) { - moveCR2(c, aSize, a, bSize, b, 0); + if(floatReg(b)) { + sseMoveCR(c, aSize, a, bSize, b); + } else { + moveCR2(c, aSize, a, bSize, b, 0); + } } void @@ -829,7 +947,11 @@ void moveRR(Context* c, unsigned aSize, Assembler::Register* a, UNUSED unsigned bSize, Assembler::Register* b) { - + if(floatReg(a) or floatReg(b)) { + sseMoveRR(c, aSize, a, bSize, b); + return; + } + if (BytesPerWord == 4 and aSize == 8 and bSize == 8) { Assembler::Register ah(a->high); Assembler::Register bh(b->high); @@ -902,10 +1024,25 @@ moveRR(Context* c, unsigned aSize, Assembler::Register* a, } } +void +sseMoveMR(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + opcode(c, 0x66); + maybeRex(c, aSize, b, a); + opcode(c, 0x0f, 0x6e); + modrmSibImm(c, b, a); +} + void moveMR(Context* c, unsigned aSize, Assembler::Memory* a, unsigned bSize, Assembler::Register* b) { + if(floatReg(b)) { + sseMoveMR(c, aSize, a, bSize, b); + return; + } + switch (aSize) { case 1: maybeRex(c, bSize, b, a); @@ -956,12 +1093,27 @@ moveMR(Context* c, unsigned aSize, Assembler::Memory* a, } } +void +sseMoveRM(Context* c, unsigned aSize, Assembler::Register* a, + UNUSED unsigned bSize, Assembler::Memory* b) +{ + opcode(c, 0x66); + maybeRex(c, aSize, a, b); + opcode(c, 0x0f, 0x7e); + modrmSibImm(c, a, b); +} + void moveRM(Context* c, unsigned aSize, Assembler::Register* a, unsigned bSize UNUSED, Assembler::Memory* b) { assert(c, aSize == bSize); + if(floatReg(a)) { + sseMoveRM(c, aSize, a, bSize, b); + return; + } + switch (aSize) { case 1: maybeRex(c, bSize, a, b); @@ -1066,7 +1218,7 @@ moveCM(Context* c, unsigned aSize UNUSED, Assembler::Constant* a, modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset); c->code.append4(a->value->value()); } else { - Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); moveCR(c, 8, a, 8, &tmp); moveRM(c, 8, &tmp, 8, b); c->client->releaseTemporary(tmp.low); @@ -1188,7 +1340,7 @@ addCR(Context* c, unsigned aSize, Assembler::Constant* a, c->code.append4(v); } } else { - Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); moveCR(c, aSize, a, aSize, &tmp); addRR(c, aSize, &tmp, bSize, b); c->client->releaseTemporary(tmp.low); @@ -1246,7 +1398,7 @@ subtractCR(Context* c, unsigned aSize, Assembler::Constant* a, c->code.append4(v); } } else { - Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); moveCR(c, aSize, a, aSize, &tmp); subtractRR(c, aSize, &tmp, bSize, b); c->client->releaseTemporary(tmp.low); @@ -1335,7 +1487,7 @@ andCR(Context* c, unsigned aSize, Assembler::Constant* a, c->code.append4(v); } } else { - Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); moveCR(c, aSize, a, aSize, &tmp); andRR(c, aSize, &tmp, bSize, b); c->client->releaseTemporary(tmp.low); @@ -1392,7 +1544,7 @@ orCR(Context* c, unsigned aSize, Assembler::Constant* a, c->code.append4(v); } } else { - Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); moveCR(c, aSize, a, aSize, &tmp); orRR(c, aSize, &tmp, bSize, b); c->client->releaseTemporary(tmp.low); @@ -1448,7 +1600,7 @@ xorCR(Context* c, unsigned aSize, Assembler::Constant* a, c->code.append4(v); } } else { - Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); moveCR(c, aSize, a, aSize, &tmp); xorRR(c, aSize, &tmp, bSize, b); c->client->releaseTemporary(tmp.low); @@ -1523,7 +1675,7 @@ compareCR(Context* c, unsigned aSize, Assembler::Constant* a, c->code.append4(v); } } else { - Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); moveCR(c, aSize, a, aSize, &tmp); compareRR(c, aSize, &tmp, bSize, b); c->client->releaseTemporary(tmp.low); @@ -1537,7 +1689,7 @@ multiplyCR(Context* c, unsigned aSize, Assembler::Constant* a, assert(c, aSize == bSize); if (BytesPerWord == 4 and aSize == 8) { - const uint32_t mask = ~((1 << rax) | (1 << rdx)); + const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx)); Assembler::Register tmp(c->client->acquireTemporary(mask), c->client->acquireTemporary(mask)); @@ -1560,7 +1712,7 @@ multiplyCR(Context* c, unsigned aSize, Assembler::Constant* a, c->code.append4(v); } } else { - Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); moveCR(c, aSize, a, aSize, &tmp); multiplyRR(c, aSize, &tmp, bSize, b); c->client->releaseTemporary(tmp.low); @@ -1605,7 +1757,7 @@ compareCM(Context* c, unsigned aSize, Assembler::Constant* a, abort(c); } } else { - Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask)); moveCR(c, aSize, a, bSize, &tmp); compareRM(c, bSize, &tmp, bSize, b); c->client->releaseTemporary(tmp.low); @@ -1928,6 +2080,219 @@ unsignedShiftRightCR(Context* c, unsigned aSize UNUSED, Assembler::Constant* a, doShift(c, unsignedShiftRightRR, 0xe8, aSize, a, bSize, b); } +inline void floatRegOp(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b, uint8_t op, uint8_t mod = 0xc0) +{ + if(aSize == 4) { + opcode(c, 0xf3); + } else { + opcode(c, 0xf2); + } + maybeRex(c, bSize, a, b); + opcode(c, 0x0f, op); + modrm(c, mod, a, b); +} + +inline void floatMemOp(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize UNUSED, Assembler::Register* b, uint8_t op) +{ + if(aSize == 4) { + opcode(c, 0xf3); + } else { + opcode(c, 0xf2); + } + maybeRex(c, bSize, b, a); + opcode(c, 0x0f, op); + modrmSibImm(c, b, a); +} + +void +floatSqrtRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatRegOp(c, aSize, a, 4, b, 0x51); +} + +void +floatSqrtMR(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatMemOp(c, aSize, a, 4, b, 0x51); +} + +void +floatAddRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatRegOp(c, aSize, a, 4, b, 0x58); +} + +void +floatAddMR(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatMemOp(c, aSize, a, 4, b, 0x58); +} + +void +floatSubtractRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatRegOp(c, aSize, a, 4, b, 0x5c); +} + +void +floatSubtractMR(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatMemOp(c, aSize, a, 4, b, 0x5c); +} + +void +floatMultiplyRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatRegOp(c, aSize, a, 4, b, 0x59); +} + +void +floatMultiplyMR(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatMemOp(c, aSize, a, 4, b, 0x59); +} + +void +floatDivideRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatRegOp(c, aSize, a, 4, b, 0x5e); +} + +void +floatDivideMR(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + floatMemOp(c, aSize, a, 4, b, 0x5e); +} + +void +float2FloatRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + assert(c, supportsSSE2()); + floatRegOp(c, aSize, a, 4, b, 0x5a); +} + +void +float2FloatMR(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + assert(c, supportsSSE2()); + floatMemOp(c, aSize, a, 4, b, 0x5a); +} + +void +float2IntRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize, Assembler::Register* b) +{ + assert(c, !floatReg(b)); + floatRegOp(c, aSize, a, bSize, b, 0x2d); +} + +void +float2IntMR(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize, Assembler::Register* b) +{ + floatMemOp(c, aSize, a, bSize, b, 0x2d); +} + +void +int2FloatRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize, Assembler::Register* b) +{ + floatRegOp(c, bSize, a, aSize, b, 0x2a); +} + +void +int2FloatMR(Context* c, unsigned aSize, Assembler::Memory* a, + unsigned bSize, Assembler::Register* b) +{ + floatMemOp(c, bSize, a, aSize, b, 0x2a); +} + +void +floatCompareRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + if (aSize == 8) { + opcode(c, 0x66); + } + maybeRex(c, 4, a, b); + opcode(c, 0x0f, 0x2e); + modrm(c, 0xc0, a, b); +} + +void +floatNegateRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + assert(c, floatReg(a) and floatReg(b)); + assert(c, aSize == 4); //unlike most of the other floating point code, this does NOT support doubles. + ResolvedPromise pcon(0x80000000); + Assembler::Constant con(&pcon); + if(a->low == b->low) { + Assembler::Register tmp(c->client->acquireTemporary(FloatRegisterMask)); + moveCR(c, 4, &con, 4, &tmp); + maybeRex(c, 4, a, &tmp); + opcode(c, 0x0f, 0x57); + modrm(c, 0xc0, &tmp, a); + c->client->releaseTemporary(tmp.low); + } else { + moveCR(c, 4, &con, 4, b); + if(aSize == 8) opcode(c, 0x66); + maybeRex(c, 4, a, b); + opcode(c, 0x0f, 0x57); + modrm(c, 0xc0, a, b); + } +} + +void +floatAbsRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b) +{ + assert(c, floatReg(a) and floatReg(b)); + assert(c, aSize == 4); //unlike most of the other floating point code, this does NOT support doubles. + ResolvedPromise pcon(0x7fffffff); + Assembler::Constant con(&pcon); + if(a->low == b->low) { + Assembler::Register tmp(c->client->acquireTemporary(FloatRegisterMask)); + moveCR(c, 4, &con, 4, &tmp); + maybeRex(c, 4, a, &tmp); + opcode(c, 0x0f, 0x54); + modrm(c, 0xc0, &tmp, a); + c->client->releaseTemporary(tmp.low); + } else { + moveCR(c, 4, &con, 4, b); + maybeRex(c, 4, a, b); + opcode(c, 0x0f, 0x54); + modrm(c, 0xc0, a, b); + } +} + +void +absRR(Context* c, unsigned aSize, Assembler::Register* a, + unsigned bSize UNUSED, Assembler::Register* b UNUSED) +{ + assert(c, aSize == bSize and a->low == rax and b->low == rax); + Assembler::Register d(c->client->acquireTemporary(static_cast(1) << rdx)); + maybeRex(c, aSize, a, b); + opcode(c, 0x99); + xorRR(c, aSize, &d, aSize, a); + subtractRR(c, aSize, &d, aSize, a); + c->client->releaseTemporary(rdx); +} + void populateTables(ArchitectureContext* c) { @@ -1963,11 +2328,14 @@ populateTables(ArchitectureContext* c) uo[index(JumpIfGreaterOrEqual, C)] = CAST1(jumpIfGreaterOrEqualC); uo[index(JumpIfLess, C)] = CAST1(jumpIfLessC); uo[index(JumpIfLessOrEqual, C)] = CAST1(jumpIfLessOrEqualC); + uo[index(JumpIfUnordered, C)] = CAST1(jumpIfUnorderedC); uo[index(LongJump, C)] = CAST1(longJumpC); bo[index(Negate, R, R)] = CAST2(negateRR); + bo[index(FloatNegate, R, R)] = CAST2(floatNegateRR); + bo[index(Move, R, R)] = CAST2(moveRR); bo[index(Move, C, R)] = CAST2(moveCR); bo[index(Move, M, R)] = CAST2(moveMR); @@ -1975,6 +2343,9 @@ populateTables(ArchitectureContext* c) bo[index(Move, C, M)] = CAST2(moveCM); bo[index(Move, A, R)] = CAST2(moveAR); + bo[index(FloatSqrt, R, R)] = CAST2(floatSqrtRR); + bo[index(FloatSqrt, M, R)] = CAST2(floatSqrtMR); + bo[index(MoveZ, R, R)] = CAST2(moveZRR); bo[index(MoveZ, M, R)] = CAST2(moveZMR); @@ -1983,12 +2354,20 @@ populateTables(ArchitectureContext* c) bo[index(Compare, C, M)] = CAST2(compareCM); bo[index(Compare, R, M)] = CAST2(compareRM); + bo[index(FloatCompare, R, R)] = CAST2(floatCompareRR); + bo[index(Add, R, R)] = CAST2(addRR); bo[index(Add, C, R)] = CAST2(addCR); bo[index(Subtract, C, R)] = CAST2(subtractCR); bo[index(Subtract, R, R)] = CAST2(subtractRR); + bo[index(FloatAdd, R, R)] = CAST2(floatAddRR); + bo[index(FloatAdd, M, R)] = CAST2(floatAddMR); + + bo[index(FloatSubtract, R, R)] = CAST2(floatSubtractRR); + bo[index(FloatSubtract, M, R)] = CAST2(floatSubtractMR); + bo[index(And, R, R)] = CAST2(andRR); bo[index(And, C, R)] = CAST2(andCR); @@ -2003,6 +2382,12 @@ populateTables(ArchitectureContext* c) bo[index(Divide, R, R)] = CAST2(divideRR); + bo[index(FloatMultiply, R, R)] = CAST2(floatMultiplyRR); + bo[index(FloatMultiply, M, R)] = CAST2(floatMultiplyMR); + + bo[index(FloatDivide, R, R)] = CAST2(floatDivideRR); + bo[index(FloatDivide, M, R)] = CAST2(floatDivideMR); + bo[index(Remainder, R, R)] = CAST2(remainderRR); bo[index(LongCompare, C, R)] = CAST2(longCompareCR); @@ -2016,8 +2401,19 @@ populateTables(ArchitectureContext* c) bo[index(UnsignedShiftRight, R, R)] = CAST2(unsignedShiftRightRR); bo[index(UnsignedShiftRight, C, R)] = CAST2(unsignedShiftRightCR); -} + bo[index(Float2Float, R, R)] = CAST2(float2FloatRR); + bo[index(Float2Float, M, R)] = CAST2(float2FloatMR); + + bo[index(Float2Int, R, R)] = CAST2(float2IntRR); + bo[index(Float2Int, M, R)] = CAST2(float2IntMR); + + bo[index(Int2Float, R, R)] = CAST2(int2FloatRR); + bo[index(Int2Float, M, R)] = CAST2(int2FloatMR); + + bo[index(Abs, R, R)] = CAST2(absRR); + bo[index(FloatAbs, R, R)] = CAST2(floatAbsRR); +} class MyArchitecture: public Assembler::Architecture { public: MyArchitecture(System* system): c(system), referenceCount(0) { @@ -2025,7 +2421,31 @@ class MyArchitecture: public Assembler::Architecture { } virtual unsigned registerCount() { - return (BytesPerWord == 4 ? 8 : 16); + if (supportsSSE()) { + return BytesPerWord == 4 ? 24 : 32; + } else { + return BytesPerWord == 4 ? 8 : 16; + } + } + + virtual unsigned generalRegisterCount() { + return BytesPerWord == 4 ? 8 : 16; + } + + virtual unsigned floatRegisterCount() { + if (supportsSSE()) { + return BytesPerWord == 4 ? 8 : 16; + } else { + return 0; + } + } + + virtual uint64_t generalRegisters() { + return GeneralRegisterMask; + } + + virtual uint64_t floatRegisters() { + return supportsSSE() ? FloatRegisterMask : 0; } virtual int stack() { @@ -2044,10 +2464,6 @@ class MyArchitecture: public Assembler::Architecture { return (BytesPerWord == 4 ? rdx : NoRegister); } - virtual bool condensedAddressing() { - return true; - } - virtual bool bigEndian() { return false; } @@ -2058,7 +2474,7 @@ class MyArchitecture: public Assembler::Architecture { case rsp: case rbx: return true; - + default: return false; } @@ -2171,6 +2587,10 @@ class MyArchitecture: public Assembler::Architecture { return 0; } + virtual bool supportsFloatCompare(unsigned size) { + return supportsSSE() and size <= BytesPerWord; + } + virtual void nextFrame(void** stack, void** base) { assert(&c, *static_cast(*base) != *base); @@ -2189,61 +2609,206 @@ class MyArchitecture: public Assembler::Architecture { *thunk = false; } - virtual void plan + bool checkMethodClass(Thread* t, object method, const char* value) + { + return strcmp + (reinterpret_cast + (&byteArrayBody(t, className(t, methodClass(t, method)), 0)), + value) == 0; + } + + bool checkMethodName(Thread* t, object method, const char* value) + { + return strcmp + (reinterpret_cast + (&byteArrayBody(t, methodName(t, method), 0)), + value) == 0; + } + + bool checkMethodSpec(Thread* t, object method, const char* value) + { + return strcmp + (reinterpret_cast + (&byteArrayBody(t, methodSpec(t, method), 0)), + value) == 0; + } + + virtual BinaryOperation hasBinaryIntrinsic(Thread* t, object method) + { + if(checkMethodClass(t, method, "java/lang/Math")) { + if(supportsSSE() and checkMethodName(t, method, "sqrt") and checkMethodSpec(t, method, "(D)D") and BytesPerWord == 8) { + return FloatSqrt; + } else if(checkMethodName(t, method, "abs")) { + if(checkMethodSpec(t, method, "(I)I") or (checkMethodSpec(t, method, "(J)J") and BytesPerWord == 8)) { + return Abs; + } else if(supportsSSE() and supportsSSE2() and checkMethodSpec(t, method, "(F)F")) { + return FloatAbs; + } + } + } + return NoBinaryOperation; + } + + virtual TernaryOperation hasTernaryIntrinsic(Thread* t UNUSED, object method UNUSED) { + return NoTernaryOperation; + } + + virtual void planSource (BinaryOperation op, unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, - unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask, - bool* thunk) + unsigned bSize, bool* thunk) { *aTypeMask = ~0; - *aRegisterMask = ~static_cast(0); - - *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); - *bRegisterMask = ~static_cast(0); + *aRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); *thunk = false; switch (op) { case Compare: *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); - *bTypeMask = (1 << RegisterOperand); + *aRegisterMask = GeneralRegisterMask; + break; + case FloatCompare: + assert(&c, supportsSSE() && aSize <= BytesPerWord); + *aTypeMask = (1 << RegisterOperand); + *aRegisterMask = FloatRegisterMask; break; - case Negate: *aTypeMask = (1 << RegisterOperand); - *bTypeMask = (1 << RegisterOperand); *aRegisterMask = (static_cast(1) << (rdx + 32)) | (static_cast(1) << rax); - *bRegisterMask = *aRegisterMask; break; - + case Abs: + *aTypeMask = (1 << RegisterOperand); + *aRegisterMask = (static_cast(1) << rax); + break; + case FloatAbs: + *aTypeMask = (1 << RegisterOperand); + *aRegisterMask = FloatRegisterMask; + break; + case FloatNegate: + if(!supportsSSE() or aSize == 8 or bSize == 8) { //floatNegateRR does not support doubles + *thunk = true; + } else { + *aTypeMask = (1 << RegisterOperand); + *aRegisterMask = FloatRegisterMask; + } + break; + case FloatSqrt: + *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *aRegisterMask = FloatRegisterMask; + break; + case Float2Float: + if(!supportsSSE() or !supportsSSE2() or BytesPerWord == 4) { + *thunk = true; + } else { + *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *aRegisterMask = FloatRegisterMask; + } + break; + case Float2Int: + if(!supportsSSE() or aSize > BytesPerWord or bSize > BytesPerWord) { + *thunk = true; + } else { + *aTypeMask = (1 << RegisterOperand);// | (1 << MemoryOperand); + *aRegisterMask = FloatRegisterMask; + } + break; + case Int2Float: + if(!supportsSSE() or aSize > BytesPerWord or bSize > BytesPerWord) { + *thunk = true; + } else { + *aTypeMask = (1 << RegisterOperand);// | (1 << MemoryOperand); + *aRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); + } + break; case Move: + *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *aRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); if (BytesPerWord == 4) { if (aSize == 4 and bSize == 8) { - const uint32_t mask = ~((1 << rax) | (1 << rdx)); - *aRegisterMask = (static_cast(mask) << 32) | mask; - *bRegisterMask = (static_cast(1) << (rdx + 32)) - | (static_cast(1) << rax); + *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx)); + *aRegisterMask = (static_cast(mask) << 32) | mask; } else if (aSize == 1 or bSize == 1) { + *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); const uint32_t mask = (1 << rax) | (1 << rcx) | (1 << rdx) | (1 << rbx); - *aRegisterMask = (static_cast(mask) << 32) | mask; - *bRegisterMask = (static_cast(mask) << 32) | mask; + *aRegisterMask = (static_cast(mask) << 32) | mask; } } break; - default: break; } } - virtual void plan + virtual void planDestination + (BinaryOperation op, + unsigned aSize, const uint8_t* aTypeMask UNUSED, const uint64_t* aRegisterMask, + unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask) + { + *bTypeMask = ~0; + *bRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); + switch (op) { + case Compare: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = GeneralRegisterMask; + break; + case FloatCompare: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = FloatRegisterMask; + break; + + case Abs: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = (static_cast(1) << rax); + break; + + case FloatAbs: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = *aRegisterMask; + break; + + case Negate: + case FloatNegate: + case FloatSqrt: + case Float2Float: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = *aRegisterMask; + break; + case Int2Float: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = FloatRegisterMask; + break; + case Float2Int: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); + break; + case Move: + *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *bRegisterMask = GeneralRegisterMask | (static_cast(GeneralRegisterMask) << 32); + if (BytesPerWord == 4) { + if (aSize == 4 and bSize == 8) { + *bRegisterMask = (static_cast(1) << (rdx + 32)) + | (static_cast(1) << rax); + } else if (aSize == 1 or bSize == 1) { + const uint32_t mask + = (1 << rax) | (1 << rcx) | (1 << rdx) | (1 << rbx); + *bRegisterMask = (static_cast(mask) << 32) | mask; + } + } + break; + default: + break; + } + } + + virtual void planSource (TernaryOperation op, - unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, + unsigned aSize, uint8_t *aTypeMask, uint64_t *aRegisterMask, unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask, - unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask, - bool* thunk) + unsigned, bool* thunk) { *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); *aRegisterMask = ~static_cast(0); @@ -2254,21 +2819,37 @@ class MyArchitecture: public Assembler::Architecture { *thunk = false; switch (op) { + case FloatAdd: + case FloatSubtract: + case FloatMultiply: + case FloatDivide: + if(!supportsSSE() or aSize > BytesPerWord) { + *thunk = true; + } else { + *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *bTypeMask = (1 << RegisterOperand); + *aRegisterMask = FloatRegisterMask; + *bRegisterMask = FloatRegisterMask; + } + break; + case Multiply: if (BytesPerWord == 4 and aSize == 8) { - const uint32_t mask = ~((1 << rax) | (1 << rdx)); + const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx)); *aRegisterMask = (static_cast(mask) << 32) | mask; *bRegisterMask = (static_cast(1) << (rdx + 32)) | mask; + } else { + *aRegisterMask = GeneralRegisterMask; + *bRegisterMask = GeneralRegisterMask; } break; case Divide: if (BytesPerWord == 4 and aSize == 8) { - *bTypeMask = ~0; - *thunk = true; + *thunk = true; } else { *aTypeMask = (1 << RegisterOperand); - *aRegisterMask = ~((1 << rax) | (1 << rdx)); + *aRegisterMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx)); *bRegisterMask = 1 << rax; } break; @@ -2279,25 +2860,32 @@ class MyArchitecture: public Assembler::Architecture { *thunk = true; } else { *aTypeMask = (1 << RegisterOperand); - *aRegisterMask = ~((1 << rax) | (1 << rdx)); - *bRegisterMask = 1 << rax; + *aRegisterMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx)); + *bRegisterMask = 1 << rax; } break; case ShiftLeft: case ShiftRight: case UnsignedShiftRight: { - *aRegisterMask = (~static_cast(0) << 32) + *aRegisterMask = (static_cast(GeneralRegisterMask) << 32) | (static_cast(1) << rcx); - const uint32_t mask = ~(1 << rcx); + const uint32_t mask = GeneralRegisterMask & ~(1 << rcx); *bRegisterMask = (static_cast(mask) << 32) | mask; } break; default: break; } + } - *cTypeMask = *bTypeMask; + virtual void planDestination + (TernaryOperation op UNUSED, + unsigned aSize UNUSED, const uint8_t* aTypeMask UNUSED, const uint64_t* aRegisterMask UNUSED, + unsigned bSize UNUSED, const uint8_t* bTypeMask UNUSED, const uint64_t* bRegisterMask, + unsigned cSize UNUSED, uint8_t* cTypeMask, uint64_t* cRegisterMask) + { + *cTypeMask = (1 << RegisterOperand); *cRegisterMask = *bRegisterMask; } From f8bbc609e8d4465f5cf9b159a6a18814b9960d5b Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 10:32:00 -0600 Subject: [PATCH 09/16] corrected debug messages --- src/compile.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/compile.cpp b/src/compile.cpp index 3b8637c872..07d0813751 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -27,11 +27,12 @@ vmCall(); namespace { -const bool DebugCompile = true; +const bool DebugCompile = false; const bool DebugNatives = false; const bool DebugCallTable = false; const bool DebugMethodTree = false; const bool DebugFrameMaps = false; +const bool DebugIntrinsics = false; const bool CheckArrayBounds = true; @@ -3091,7 +3092,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, if(params == 1) {//TODO: Get number of method params BinaryOperation op = t->arch->hasBinaryIntrinsic(t, target); if(op != NoBinaryOperation) { - printf("Could use binary intrinsic %i.\n", op); + if(DebugIntrinsics) { + fprintf(stderr, "Using binary intrinsic %i.\n", op); + } int opSize = methodParameterFootprint(t, target) * BytesPerWord; int resSize = resultSize(t, methodReturnCode(t, target)); Compiler::Operand* param; @@ -3111,7 +3114,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, } else if(params == 2) { //TODO: Get number of method params TernaryOperation op = t->arch->hasTernaryIntrinsic(t, target); if(op != NoTernaryOperation) { - printf("Could use ternary intrinsic %i.\n", op); + if(DebugIntrinsics) { + fprintf(stderr, "Could use ternary intrinsic %i.\n", op); + } //int aSize, bSize; //int resSize = resultSize(t, methodReturnCode(t, target)); compileDirectInvoke(t, frame, target); //TODO: use intrinsic From 04583ea534a30048978891121cb2df8a442c507d Mon Sep 17 00:00:00 2001 From: Josh warner Date: Thu, 6 Aug 2009 10:34:28 -0600 Subject: [PATCH 10/16] floating point test code --- test/AllFloats.java | 77 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 test/AllFloats.java diff --git a/test/AllFloats.java b/test/AllFloats.java new file mode 100644 index 0000000000..8f41c4d20e --- /dev/null +++ b/test/AllFloats.java @@ -0,0 +1,77 @@ +public class AllFloats { + private static float multiplyByFive(float a) {return 5f * a;} + private static double multiplyByFive(double a) {return 5d * a;} + private static float multiply(float a, float b) {return a * b;} + private static double multiply(double a, double b) {return a * b;} + private static double multiply(float a, double b) {return a * b;} + private static float divide(float a, float b) {return a / b;} + private static double divide(double a, double b) {return a / b;} + private static double divide(float a, double b) {return a / b;} + private static float add(float a, float b) {return a + b;} + private static double add(double a, double b) {return a + b;} + private static double add(float a, double b) {return a + b;} + private static float subtract(float a, float b) {return a - b;} + private static double subtract(double a, double b) {return a - b;} + private static double subtract(float a, double b) {return a - b;} + private static float complex(float a, float b) {return (a - b) / (a * b) + (float)Math.sqrt(a);} + private static double complex(double a, double b) {return (a - b) / (a * b) + Math.sqrt(a);} + private static double complex(float a, double b) {return (a - b) / (a * b) + Math.sqrt(a);} + private static int f2i(float a) {return (int)a;} + private static long f2l(float a) {return (long)a;} + private static float i2f(int a) {return (float)a;} + private static double i2d(int a) {return (double)a;} + private static int d2i(double a) {return (int)a;} + private static long d2l(double a) {return (long)a;} + private static float l2f(long a) {return (float)a;} + private static double l2d(long a) {return (double)a;} + private static float negate(float a) {return -a;} + private static double negate(double a) {return -a;} + private static int abs(int a) {return Math.abs(a);} + private static float abs(float a) {return Math.abs(a);} + + private static void expect(boolean v) { + if(!v)throw new RuntimeException(); + } + + private static int last(){return 0;} + + public static void main(String[] args) { + expect(multiplyByFive(36f) == 5f * 36f); + expect(multiplyByFive(36d) == 5d * 36d); + expect(multiply(5f, 4f) == 5f*4f); + expect(multiply(5d, 4d) == 5d*4d); + expect(multiply(5f, 4d) == 5f*4d); + expect(divide(5f, 2f) == 5f/2f); + expect(divide(5d, 2d) == 5d/2d); + expect(divide(5f, 2d) == 5f/2d); + expect(add(5f, 4f) == 5f+4f); + expect(add(5d, 4d) == 5f+4d); + expect(add(5f, 4f) == 5f+4d); + expect(subtract(5f, 4f) == 5f-4f); + expect(subtract(5d, 4d) == 5f-4d); + expect(subtract(5f, 4f) == 5f-4d); + expect(complex(4f, 3f) == (4f-3f)/(4f*3f) + 2f); + expect(complex(4d, 3d) == (4d-3d)/(4d*3d) + 2d); + expect(complex(4f, 3d) == (4f-3d)/(4f*3d) + 2f); + + expect(f2i(4f) == 4); + expect(f2l(4f) == 4); + expect(i2f(4) == 4f); + expect(i2d(4) == 4d); + + expect(d2i(4d) == 4); + expect(d2l(4d) == 4); + expect(l2f(4) == 4f); + expect(l2d(4) == 4d); + + expect(negate(4f) == -4f); + expect(negate(4d) == -4d); + + expect(abs(-4) == 4); + expect(abs(12) == 12); + expect(abs(-4f) == 4f); + expect(abs(12f) == 12f); + + int unused = last(); + } +} From 9910e310cb4ce1cf95f9223d42e45266c0833dd7 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Mon, 10 Aug 2009 13:42:37 -0600 Subject: [PATCH 11/16] fixed register reserve logic in pickTarget --- src/compiler.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index 52d3dbfa99..b1a4251bbb 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -1317,11 +1317,10 @@ pickTarget(Context* c, Read* read, bool intersectRead, registerPenalty = (c->floatRegisterCount > registerReserveCount ? 0 : Target::LowRegisterPenalty); } else { - registerPenalty = (c->availableRegisterCount > registerReserveCount - ? 0 : Target::LowRegisterPenalty); + abort(c); } } else { - registerPenalty = (c->availableRegisterCount > registerReserveCount + registerPenalty = (c->generalRegisterCount > registerReserveCount || c->floatRegisterCount > registerReserveCount ? 0 : Target::LowRegisterPenalty); } From 32167168f8c3d6a68d43c76b2e6961c068562e6b Mon Sep 17 00:00:00 2001 From: Josh warner Date: Tue, 11 Aug 2009 13:25:22 -0600 Subject: [PATCH 12/16] fixed incorrect opSize bug for 64-bit platforms --- src/compile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compile.cpp b/src/compile.cpp index 2ca1a7a8fd..9d55bdf83f 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -3630,7 +3630,7 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, if (DebugIntrinsics) { fprintf(stderr, "Using binary intrinsic %i.\n", op); } - int opSize = methodParameterFootprint(t, target) * BytesPerWord; + int opSize = methodParameterFootprint(t, target) * 4; int resSize = resultSize(t, methodReturnCode(t, target)); Compiler::Operand* param; if (opSize == 4) { From cd59222f53397d6f40fb5977cbb40847d74735e9 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Tue, 11 Aug 2009 13:27:25 -0600 Subject: [PATCH 13/16] fixed propegation of result sizes --- src/compiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index b1a4251bbb..0f1f37cdaa 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -3652,7 +3652,7 @@ class TranslateEvent: public Event { apply(c, type, size, value->source, source(value->high), - size, low, high); + resSize, low, high); for (Read* r = reads; r; r = r->eventNext) { popRead(c, this, r->value); From 711680a183d92dd68d6b588756418fc4dbfd9078 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Tue, 11 Aug 2009 13:29:00 -0600 Subject: [PATCH 14/16] fixed powerpc compile errors --- src/powerpc.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/powerpc.cpp b/src/powerpc.cpp index 99e0c0be34..07a31636b9 100644 --- a/src/powerpc.cpp +++ b/src/powerpc.cpp @@ -1676,6 +1676,14 @@ class MyArchitecture: public Assembler::Architecture { return 32; } + virtual unsigned generalRegisterCount() { + return 32; + } + + virtual unsigned floatRegisterCount() { + return 0; + } + virtual int stack() { return StackRegister; } @@ -1827,23 +1835,23 @@ class MyArchitecture: public Assembler::Architecture { *stack = *static_cast(*stack); } - virtual BinaryOperation hasBinaryIntrinsic(Thread* t, object method) { + virtual BinaryOperation hasBinaryIntrinsic(Thread*, object) { return NoBinaryOperation; } - virtual TernaryOperation hasTernaryIntrinsic(Thread* t UNUSED, object method UNUSED) { + virtual TernaryOperation hasTernaryIntrinsic(Thread*, object) { return NoTernaryOperation; } - virtual bool supportsFloatCompare(unsigned size) { + virtual bool supportsFloatCompare(unsigned) { return false; } - virtual bool alwaysCondensed(BinaryOperation op) { + virtual bool alwaysCondensed(BinaryOperation) { return false; } - virtual bool alwaysCondensed(TernaryOperation op) { + virtual bool alwaysCondensed(TernaryOperation) { return false; } @@ -1889,7 +1897,7 @@ class MyArchitecture: public Assembler::Architecture { virtual void planDestination (BinaryOperation op, - unsigned, const uint8_t* aTypeMask, const uint64_t* aRegisterMask, + unsigned, const uint8_t*, const uint64_t*, unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask) { *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); @@ -1962,9 +1970,9 @@ class MyArchitecture: public Assembler::Architecture { } virtual void planDestination - (TernaryOperation op, + (TernaryOperation, + unsigned, const uint8_t*, const uint64_t*, unsigned, const uint8_t*, const uint64_t*, - unsigned, const uint8_t* bTypeMask, const uint64_t* bRegisterMask, unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask) { *cTypeMask = (1 << RegisterOperand); From f29199a2851c59da8ed9af5360463250646c1c0e Mon Sep 17 00:00:00 2001 From: Josh warner Date: Tue, 11 Aug 2009 13:30:31 -0600 Subject: [PATCH 15/16] fixed several operand type errors that appeared on 64-bit platforms --- src/x86.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/x86.cpp b/src/x86.cpp index 4e3a681c76..d0db69ab3b 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -961,7 +961,7 @@ sseMoveRR(Context* c, unsigned aSize, Assembler::Register* a, modrm(c, 0xc0, b, a); } else { opcode(c, 0x66); - maybeRex(c, aSize, a, b); + maybeRex(c, aSize, b, a); opcode(c, 0x0f, 0x6e); modrm(c, 0xc0, a, b); } @@ -2699,12 +2699,12 @@ class MyArchitecture: public Assembler::Architecture { case Float2Float: case Float2Int: case Int2Float: - return false; - case Negate: - case Abs: case FloatAbs: case FloatNegate: case FloatSqrt: + return false; + case Negate: + case Abs: default: return true; } @@ -2902,11 +2902,14 @@ class MyArchitecture: public Assembler::Architecture { break; case Negate: + *bTypeMask = (1 << RegisterOperand); + *bRegisterMask = *aRegisterMask; + break; case FloatNegate: case FloatSqrt: case Float2Float: *bTypeMask = (1 << RegisterOperand); - *bRegisterMask = *aRegisterMask; + *bRegisterMask = FloatRegisterMask; break; case Int2Float: *bTypeMask = (1 << RegisterOperand); From 78ea4d20e3cc44e812ab524d73d44d40fef64033 Mon Sep 17 00:00:00 2001 From: Josh warner Date: Tue, 11 Aug 2009 13:46:51 -0600 Subject: [PATCH 16/16] added loneMatch to improve register allocation --- src/compiler.cpp | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/compiler.cpp b/src/compiler.cpp index 0f1f37cdaa..e715f54cce 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -108,6 +108,8 @@ class Site { virtual unsigned copyCost(Context*, Site*) = 0; virtual bool match(Context*, const SiteMask&) = 0; + + virtual bool loneMatch(Context*, const SiteMask&) = 0; virtual void acquire(Context*, Value*) { } @@ -1429,6 +1431,10 @@ class ConstantSite: public Site { return mask.typeMask & (1 << ConstantOperand); } + virtual bool loneMatch(Context*, const SiteMask&) { + return true; + } + virtual OperandType type(Context*) { return ConstantOperand; } @@ -1501,6 +1507,10 @@ class AddressSite: public Site { return mask.typeMask & (1 << AddressOperand); } + virtual bool loneMatch(Context*, const SiteMask&) { + return false; + } + virtual OperandType type(Context*) { return AddressOperand; } @@ -1575,6 +1585,16 @@ class RegisterSite: public Site { } } + virtual bool loneMatch(Context* c UNUSED, const SiteMask& mask) { + assert(c, number != NoRegister); + + if ((mask.typeMask & (1 << RegisterOperand))) { + return ((static_cast(1) << number) == mask.registerMask); + } else { + return false; + } + } + virtual void acquire(Context* c, Value* v) { Target target; if (number != NoRegister) { @@ -1729,6 +1749,23 @@ class MemorySite: public Site { } } + virtual bool loneMatch(Context* c, const SiteMask& mask) { + assert(c, acquired); + + if (mask.typeMask & (1 << MemoryOperand)) { + if (base == c->arch->stack()) { + assert(c, index == NoRegister); + + if (mask.frameIndex == AnyFrameIndex) { + return false; + } else { + return true; + } + } + } + return false; + } + virtual void acquire(Context* c, Value* v) { increment(c, c->registerResources + base); if (index != NoRegister) { @@ -3145,7 +3182,8 @@ getTarget(Context* c, Value* value, Value* result, const SiteMask& resultMask) Site* s; Value* v; Read* r = liveNext(c, value); - if (value->source->match(c, static_cast(resultMask))) { + if (value->source->match(c, static_cast(resultMask)) and (r == 0 or + value->source->loneMatch(c, static_cast(resultMask)))) { s = value->source; v = value; if (r and not hasMoreThanOneSite(v)) {