diff --git a/src/assembler.h b/src/assembler.h
index 26b1f9b509..4c01c82404 100644
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -43,7 +43,14 @@ enum UnaryOperation {
   JumpIfLessOrEqual,
   JumpIfGreaterOrEqual,
   JumpIfEqual,
-  JumpIfNotEqual
+  JumpIfNotEqual,
+  JumpIfFloatUnordered,
+  JumpIfFloatLess,
+  JumpIfFloatGreater,
+  JumpIfFloatLessOrEqual,
+  JumpIfFloatGreaterOrEqual,
+  JumpIfFloatEqual,
+  JumpIfFloatNotEqual,
 };
 
 const unsigned UnaryOperationCount = JumpIfNotEqual + 1;
@@ -52,10 +59,24 @@ enum BinaryOperation {
   Move,
   MoveZ,
   Compare,
-  Negate
+  Negate,
+  
+  //extensions:
+  FloatNegate,
+  FloatCompare,
+  Float2Float,
+  Float2Int,
+  Int2Float,
+  
+  //intrinsic functions:
+  FloatSqrt,
+  FloatAbs,
+  Abs,
+  
+  NoBinaryOperation = -1
 };
 
-const unsigned BinaryOperationCount = Negate + 1;
+const unsigned BinaryOperationCount = Abs + 1;
 
 enum TernaryOperation {
   LongCompare,
@@ -69,10 +90,23 @@ enum TernaryOperation {
   UnsignedShiftRight,
   And,
   Or,
-  Xor
+  Xor,
+  
+  //extensions:
+  FloatAdd,
+  FloatSubtract,
+  FloatMultiply,
+  FloatDivide,
+  FloatRemainder,
+  
+  //intrinsic functions:
+  FloatMax,
+  FloatMin,
+  
+  NoTernaryOperation = -1
 };
 
-const unsigned TernaryOperationCount = Xor + 1;
+const unsigned TernaryOperationCount = FloatMin + 1;
 
 enum OperandType {
   ConstantOperand,
@@ -259,6 +293,11 @@ class Assembler {
   class Architecture {
    public:
     virtual unsigned registerCount() = 0;
+    virtual unsigned generalRegisterCount() = 0;
+    virtual unsigned floatRegisterCount() = 0;
+    virtual uint64_t generalRegisters() = 0;
+    virtual uint64_t floatRegisters() = 0;
+    virtual uint64_t allRegisters() = 0;
 
     virtual int stack() = 0;
     virtual int thread() = 0;
@@ -267,9 +306,12 @@ class Assembler {
     virtual int virtualCallTarget() = 0;
     virtual int virtualCallIndex() = 0;
 
-    virtual bool condensedAddressing() = 0;
-
     virtual bool bigEndian() = 0;
+    
+    virtual bool supportsFloatCompare(unsigned size) = 0;
+
+    virtual bool alwaysCondensed(BinaryOperation op) = 0;
+    virtual bool alwaysCondensed(TernaryOperation op) = 0;
 
     virtual bool reserved(int register_) = 0;
 
@@ -297,24 +339,41 @@ class Assembler {
     virtual int returnAddressOffset() = 0;
     virtual int framePointerOffset() = 0;
     virtual void nextFrame(void** stack, void** base) = 0;
+    
+    virtual BinaryOperation binaryIntrinsic(const char* className,
+                                            const char* methodName,
+                                            const char* parameterSpec) = 0;
+
+    virtual TernaryOperation ternaryIntrinsic(const char* className,
+                                              const char* methodName,
+                                              const char* parameterSpec) = 0;
 
     virtual void plan
     (UnaryOperation op,
      unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
      bool* thunk) = 0;
 
-    virtual void plan
+    virtual void planSource
     (BinaryOperation op,
      unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
-     unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask,
-     bool* thunk) = 0;
+     unsigned bSize, bool* thunk) = 0;
+     
+    virtual void planDestination
+    (BinaryOperation op,
+     unsigned aSize, const uint8_t* aTypeMask, const uint64_t* aRegisterMask,
+     unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask) = 0;
 
-    virtual void plan
+    virtual void planSource
     (TernaryOperation op,
      unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
      unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask,
-     unsigned cSize, uint8_t* cTypeMask, uint64_t* cRegisterMask,
-     bool* thunk) = 0; 
+     unsigned cSize, bool* thunk) = 0; 
+
+    virtual void planDestination
+    (TernaryOperation op,
+     unsigned aSize, const uint8_t* aTypeMask, const uint64_t* aRegisterMask,
+     unsigned bSize, const uint8_t* bTypeMask, const uint64_t* bRegisterMask,
+     unsigned cSize, uint8_t* cTypeMask, uint64_t* cRegisterMask) = 0; 
 
     virtual void acquire() = 0;
     virtual void release() = 0;
diff --git a/src/compile.cpp b/src/compile.cpp
index 165ff3b7a1..c97646b036 100644
--- a/src/compile.cpp
+++ b/src/compile.cpp
@@ -39,6 +39,7 @@ const bool DebugNatives = false;
 const bool DebugCallTable = false;
 const bool DebugMethodTree = false;
 const bool DebugFrameMaps = false;
+const bool DebugIntrinsics = false;
 
 const bool CheckArrayBounds = true;
 
@@ -774,20 +775,90 @@ class Context {
     virtual intptr_t getThunk(UnaryOperation, unsigned) {
       abort(t);
     }
+    
+    virtual intptr_t getThunk(BinaryOperation op, unsigned size,
+                              unsigned resultSize)
+    {
+      switch(op) {
+      case FloatNegate:
+        if (size == 4) {
+          return local::getThunk(t, negateFloatThunk);
+        } else {
+          return local::getThunk(t, negateDoubleThunk);
+        }
+      case Float2Float:
+        if (size == 4 and resultSize == 8) {
+          return local::getThunk(t, floatToDoubleThunk);
+        } else if(size == 8 and resultSize == 4) {
+          return local::getThunk(t, doubleToFloatThunk);
+        }
+      case Float2Int:
+        if (size == 4 and resultSize == 4) {
+          return local::getThunk(t, floatToIntThunk);
+        } else if(size == 4 and resultSize == 8) {
+          return local::getThunk(t, floatToLongThunk);
+        } else if(size == 8 and resultSize == 4) {
+          return local::getThunk(t, doubleToIntThunk);
+        } else if(size == 8 and resultSize == 8) {
+          return local::getThunk(t, doubleToLongThunk);
+        }
+      case Int2Float:
+        if (size == 4 and resultSize == 4) {
+          return local::getThunk(t, intToFloatThunk);
+        } else if(size == 4 and resultSize == 8) {
+          return local::getThunk(t, intToDoubleThunk);
+        } else if(size == 8 and resultSize == 4) {
+          return local::getThunk(t, longToFloatThunk);
+        } else if(size == 8 and resultSize == 8) {
+          return local::getThunk(t, longToDoubleThunk);
+        }
+          
+      default: break;
+      }
+      
+      abort(t);
+    }
 
-    virtual intptr_t getThunk(TernaryOperation op, unsigned size) {
+    virtual intptr_t getThunk(TernaryOperation op, unsigned size UNUSED,
+                              unsigned resultSize)
+    {
       switch (op) {
       case Divide:
-        if (size == 8) {
+        if (resultSize == 8) {
           return local::getThunk(t, divideLongThunk);
         }
         break;
 
       case Remainder:
-        if (size == 8) {
+        if (resultSize == 8) {
           return local::getThunk(t, moduloLongThunk);
         }
         break;
+      
+      case FloatAdd:
+        if(resultSize == 4) {
+          return local::getThunk(t, addFloatThunk);
+        } else {
+          return local::getThunk(t, addDoubleThunk);
+        }        
+      case FloatSubtract:
+        if(resultSize == 4) {
+          return local::getThunk(t, subtractFloatThunk);
+        } else {
+          return local::getThunk(t, subtractDoubleThunk);
+        }  
+      case FloatMultiply:
+        if(resultSize == 4) {
+          return local::getThunk(t, multiplyFloatThunk);
+        } else {
+          return local::getThunk(t, multiplyDoubleThunk);
+        }  
+      case FloatDivide:
+        if(resultSize == 4) {
+          return local::getThunk(t, divideFloatThunk);
+        } else {
+          return local::getThunk(t, divideDoubleThunk);
+        }  
 
       default: break;
       }
@@ -951,7 +1022,7 @@ class Frame {
       object pointer = makePointer(t, p);
       bc->constants = makeTriple(t, o, pointer, bc->constants);
 
-      return c->promiseConstant(p);
+      return c->promiseConstant(p, Compiler::ObjectType);
     } else {
       context->objectPool = new
         (context->zone.allocate(sizeof(PoolElement)))
@@ -1169,11 +1240,11 @@ class Frame {
   }
 
   Compiler::Operand* addressOperand(Promise* p) {
-    return c->promiseConstant(addressPromise(p));
+    return c->promiseConstant(addressPromise(p), Compiler::AddressType);
   }
 
   Compiler::Operand* machineIp(unsigned logicalIp) {
-    return c->promiseConstant(c->machineIp(logicalIp));
+    return c->promiseConstant(c->machineIp(logicalIp), Compiler::AddressType);
   }
 
   void visitLogicalIp(unsigned ip) {
@@ -1254,7 +1325,7 @@ class Frame {
     poppedLong();
     return popLongQuiet();
   }
-
+  
   Compiler::Operand* popObject() {
     poppedObject();
     return popQuiet(1);
@@ -2339,6 +2410,33 @@ pushReturnValue(MyThread* t, Frame* frame, unsigned code,
   }
 }
 
+Compiler::OperandType
+operandTypeForFieldCode(Thread* t, unsigned code)
+{
+  switch (code) {
+  case ByteField:
+  case BooleanField:
+  case CharField:
+  case ShortField:
+  case IntField:
+  case LongField:
+    return Compiler::IntegerType;
+
+  case ObjectField:
+    return Compiler::ObjectType;
+
+  case FloatField:
+  case DoubleField:
+    return Compiler::FloatType;
+
+  case VoidField:
+    return Compiler::VoidType;
+
+  default:
+    abort(t);
+  }
+}
+
 Compiler::Operand*
 compileDirectInvoke(MyThread* t, Frame* frame, object target, bool tailCall,
                     bool useThunk, unsigned rSize, Promise* addressPromise)
@@ -2352,39 +2450,42 @@ compileDirectInvoke(MyThread* t, Frame* frame, object target, bool tailCall,
       TraceElement* trace = frame->trace(target, TraceElement::TailCall);
       Compiler::Operand* returnAddress = c->promiseConstant
         (new (frame->context->zone.allocate(sizeof(TraceElementPromise)))
-         TraceElementPromise(t->m->system, trace));;
+         TraceElementPromise(t->m->system, trace), Compiler::AddressType);
 
       Compiler::Operand* result = c->stackCall
         (returnAddress,
          flags | Compiler::Aligned,
          trace,
          rSize,
+         operandTypeForFieldCode(t, methodReturnCode(t, target)),
          methodParameterFootprint(t, target));
 
-      c->store(BytesPerWord, returnAddress, BytesPerWord,
-               c->memory(c->register_(t->arch->thread()),
-                         difference(&(t->tailAddress), t)));
+      c->store
+        (BytesPerWord, returnAddress, BytesPerWord, c->memory
+         (c->register_(t->arch->thread()), Compiler::AddressType,
+          difference(&(t->tailAddress), t)));
 
       if (methodFlags(t, target) & ACC_NATIVE) {
-        c->exit(c->constant(nativeThunk(t)));
+        c->exit(c->constant(nativeThunk(t), Compiler::AddressType));
       } else {
-        c->exit(c->constant(defaultThunk(t)));
+        c->exit(c->constant(defaultThunk(t), Compiler::AddressType));
       }
 
       return result;
     } else {
       return c->stackCall
-        (c->constant(defaultThunk(t)),
+        (c->constant(defaultThunk(t), Compiler::AddressType),
          flags | Compiler::Aligned,
          frame->trace(target, 0),
          rSize,
+         operandTypeForFieldCode(t, methodReturnCode(t, target)),
          methodParameterFootprint(t, target));
     }
   } else {
     Compiler::Operand* address =
       (addressPromise
-       ? c->promiseConstant(addressPromise)
-       : c->constant(methodAddress(t, target)));
+       ? c->promiseConstant(addressPromise, Compiler::AddressType)
+       : c->constant(methodAddress(t, target), Compiler::AddressType));
 
     return c->stackCall
       (address,
@@ -2392,6 +2493,7 @@ compileDirectInvoke(MyThread* t, Frame* frame, object target, bool tailCall,
        tailCall ? 0 : frame->trace
        ((methodFlags(t, target) & ACC_NATIVE) ? target : 0, 0),
        rSize,
+       operandTypeForFieldCode(t, methodReturnCode(t, target)),
        methodParameterFootprint(t, target));
   }
 }
@@ -2458,10 +2560,11 @@ handleMonitorEvent(MyThread* t, Frame* frame, intptr_t function)
       lock = loadLocal(frame->context, 1, savedTargetIndex(t, method));
     }
     
-    c->call(c->constant(function),
+    c->call(c->constant(function, Compiler::AddressType),
             0,
             frame->trace(0, 0),
             0,
+            Compiler::VoidType,
             2, c->register_(t->arch->thread()), lock);
   }
 }
@@ -2573,6 +2676,22 @@ saveStateAndCompile(MyThread* t, Frame* initialFrame, unsigned ip)
   initialFrame->c->restoreState(state);
 }
 
+bool
+isCJump(unsigned instruction)
+{
+  switch(instruction) {
+  case ifeq:
+  case ifne:
+  case ifgt:
+  case ifge:
+  case iflt:
+  case ifle: 
+    return true;
+  default:
+    return false;
+  }
+}
+
 void
 compile(MyThread* t, Frame* initialFrame, unsigned ip,
         int exceptionHandlerStart)
@@ -2586,6 +2705,8 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
   object code = methodCode(t, context->method);
   PROTECT(t, code);
+  
+  int lastFcmpl = 1, lastFcmpg = 1;
     
   while (ip < codeLength(t, code)) {
     if (context->visitTable[ip] ++) {
@@ -2604,12 +2725,16 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       frame->pushObject();
       
       c->call
-        (c->constant(getThunk(t, gcIfNecessaryThunk)),
+        (c->constant(getThunk(t, gcIfNecessaryThunk), Compiler::AddressType),
          0,
          frame->trace(0, 0),
          0,
+         Compiler::VoidType,
          1, c->register_(t->arch->thread()));
     }
+    
+    ++ lastFcmpl;
+    ++ lastFcmpg;
 
 //     fprintf(stderr, "ip: %d map: %ld\n", ip, *(frame->map));
 
@@ -2639,36 +2764,61 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       case aaload:
         frame->pushObject
           (c->load
-           (BytesPerWord, BytesPerWord,
-            c->memory(array, ArrayBody, index, BytesPerWord), BytesPerWord));
+           (BytesPerWord, BytesPerWord, c->memory
+            (array, Compiler::ObjectType, ArrayBody, index, BytesPerWord),
+            BytesPerWord));
         break;
 
       case faload:
+        frame->pushInt
+          (c->load
+           (4, 4, c->memory
+            (array, Compiler::FloatType, ArrayBody, index, 4), BytesPerWord));
+        break;
       case iaload:
         frame->pushInt
-          (c->load(4, 4, c->memory(array, ArrayBody, index, 4), BytesPerWord));
+          (c->load
+           (4, 4, c->memory
+            (array, Compiler::IntegerType, ArrayBody, index, 4),
+            BytesPerWord));
         break;
 
       case baload:
         frame->pushInt
-          (c->load(1, 1, c->memory(array, ArrayBody, index, 1), BytesPerWord));
+          (c->load
+           (1, 1, c->memory
+            (array, Compiler::IntegerType, ArrayBody, index, 1),
+            BytesPerWord));
         break;
 
       case caload:
         frame->pushInt
-          (c->loadz(2, 2, c->memory(array, ArrayBody, index, 2),
-                    BytesPerWord));
+          (c->loadz
+           (2, 2, c->memory
+            (array, Compiler::IntegerType, ArrayBody, index, 2),
+            BytesPerWord));
         break;
 
       case daload:
+        frame->pushLong
+          (c->load
+           (8, 8, c->memory
+            (array, Compiler::FloatType, ArrayBody, index, 8), 8));
+        break;
+
       case laload:
         frame->pushLong
-          (c->load(8, 8, c->memory(array, ArrayBody, index, 8), 8));
+          (c->load
+           (8, 8, c->memory
+            (array, Compiler::IntegerType, ArrayBody, index, 8), 8));
         break;
 
       case saload:
         frame->pushInt
-          (c->load(2, 2, c->memory(array, ArrayBody, index, 2), BytesPerWord));
+          (c->load
+           (2, 2, c->memory
+            (array, Compiler::IntegerType, ArrayBody, index, 2),
+            BytesPerWord));
         break;
       }
     } break;
@@ -2704,42 +2854,60 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       switch (instruction) {
       case aastore: {
         c->call
-          (c->constant(getThunk(t, setMaybeNullThunk)),
+          (c->constant(getThunk(t, setMaybeNullThunk), Compiler::AddressType),
            0,
            frame->trace(0, 0),
            0,
+           Compiler::VoidType,
            4, c->register_(t->arch->thread()), array,
-           c->add(4, c->constant(ArrayBody),
-                  c->shl(4, c->constant(log(BytesPerWord)), index)),
+           c->add
+           (4, c->constant(ArrayBody, Compiler::IntegerType),
+            c->shl
+            (4, c->constant(log(BytesPerWord), Compiler::IntegerType), index)),
            value);
       } break;
 
       case fastore:
+        c->store
+          (BytesPerWord, value, 4, c->memory
+           (array, Compiler::FloatType, ArrayBody, index, 4));
+        break;
+
       case iastore:
         c->store
-          (BytesPerWord, value, 4, c->memory(array, ArrayBody, index, 4));
+          (BytesPerWord, value, 4, c->memory
+           (array, Compiler::IntegerType, ArrayBody, index, 4));
         break;
 
       case bastore:
         c->store
-          (BytesPerWord, value, 1, c->memory(array, ArrayBody, index, 1));
+          (BytesPerWord, value, 1, c->memory
+           (array, Compiler::IntegerType, ArrayBody, index, 1));
         break;
 
       case castore:
       case sastore:
         c->store
-          (BytesPerWord, value, 2, c->memory(array, ArrayBody, index, 2));
+          (BytesPerWord, value, 2, c->memory
+           (array, Compiler::IntegerType, ArrayBody, index, 2));
         break;
 
       case dastore:
+        c->store
+          (8, value, 8, c->memory
+           (array, Compiler::FloatType, ArrayBody, index, 8));
+        break;
+
       case lastore:
-        c->store(8, value, 8, c->memory(array, ArrayBody, index, 8));
+        c->store
+          (8, value, 8, c->memory
+           (array, Compiler::IntegerType, ArrayBody, index, 8));
         break;
       }
     } break;
 
     case aconst_null:
-      frame->pushObject(c->constant(0));
+      frame->pushObject(c->constant(0, Compiler::ObjectType));
       break;
 
     case aload:
@@ -2772,10 +2940,12 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
       frame->pushObject
         (c->call
-         (c->constant(getThunk(t, makeBlankObjectArrayThunk)),
+         (c->constant
+          (getThunk(t, makeBlankObjectArrayThunk), Compiler::AddressType),
           0,
           frame->trace(0, 0),
           BytesPerWord,
+          Compiler::ObjectType,
           4, c->register_(t->arch->thread()),
           frame->append(classLoader(t, methodClass(t, context->method))),
           frame->append(class_), length));
@@ -2790,7 +2960,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       frame->pushInt
         (c->load
          (BytesPerWord, BytesPerWord,
-          c->memory(frame->popObject(), ArrayLength, 0, 1), BytesPerWord));
+          c->memory
+          (frame->popObject(), Compiler::IntegerType, ArrayLength, 0, 1),
+          BytesPerWord));
     } break;
 
     case astore:
@@ -2816,16 +2988,19 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
     case athrow: {
       Compiler::Operand* target = frame->popObject();
       c->call
-        (c->constant(getThunk(t, throw_Thunk)),
+        (c->constant(getThunk(t, throw_Thunk), Compiler::AddressType),
          Compiler::NoReturn,
          frame->trace(0, 0),
          0,
+         Compiler::VoidType,
          2, c->register_(t->arch->thread()), target);
     } return;
 
     case bipush:
       frame->pushInt
-        (c->constant(static_cast<int8_t>(codeBody(t, code, ip++))));
+        (c->constant
+         (static_cast<int8_t>(codeBody(t, code, ip++)),
+          Compiler::IntegerType));
       break;
 
     case checkcast: {
@@ -2837,135 +3012,107 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* instance = c->peek(1, 0);
 
       c->call
-        (c->constant(getThunk(t, checkCastThunk)),
+        (c->constant(getThunk(t, checkCastThunk), Compiler::AddressType),
          0,
          frame->trace(0, 0),
          0,
+         Compiler::VoidType,
          3, c->register_(t->arch->thread()), frame->append(class_), instance);
     } break;
 
     case d2f: {
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, doubleToFloatThunk)),
-          0, 0, 4, 2,
-          static_cast<Compiler::Operand*>(0), frame->popLong()));
+        frame->pushInt(c->f2f(8, 4, frame->popLong()));
     } break;
 
     case d2i: {
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, doubleToIntThunk)),
-          0, 0, 4, 2,
-          static_cast<Compiler::Operand*>(0), frame->popLong()));
+      frame->pushInt(c->f2i(8, 4, frame->popLong()));
     } break;
 
     case d2l: {
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, doubleToLongThunk)),
-          0, 0, 8, 2,
-          static_cast<Compiler::Operand*>(0), frame->popLong()));
+      frame->pushLong(c->f2i(8, 8, frame->popLong()));
     } break;
 
     case dadd: {
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, addDoubleThunk)),
-          0, 0, 8, 4,
-          static_cast<Compiler::Operand*>(0), a,
-          static_cast<Compiler::Operand*>(0), b));
+      frame->pushLong(c->fadd(8, a, b));
     } break;
 
     case dcmpg: {
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, compareDoublesGThunk)),
-          0, 0, 4, 4,
-          static_cast<Compiler::Operand*>(0), a,
-          static_cast<Compiler::Operand*>(0), b));
+      if(t->arch->supportsFloatCompare(8) and isCJump(codeBody(t, code, ip))) {
+        c->fcmp(8, a, b);
+        lastFcmpg = 0;
+      } else {
+        frame->pushInt
+          (c->call
+           (c->constant
+            (getThunk(t, compareDoublesGThunk), Compiler::AddressType),
+            0, 0, 4, Compiler::IntegerType, 4,
+            static_cast<Compiler::Operand*>(0), a,
+            static_cast<Compiler::Operand*>(0), b));
+      }
     } break;
 
     case dcmpl: {
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, compareDoublesLThunk)),
-          0, 0, 4, 4,
-          static_cast<Compiler::Operand*>(0), a,
-          static_cast<Compiler::Operand*>(0), b));
+      if(t->arch->supportsFloatCompare(8) and isCJump(codeBody(t, code, ip))) {
+        c->fcmp(8, a, b);
+        lastFcmpl = 0;
+      } else {
+        frame->pushInt
+          (c->call
+           (c->constant
+            (getThunk(t, compareDoublesLThunk), Compiler::AddressType),
+            0, 0, 4, Compiler::IntegerType, 4,
+            static_cast<Compiler::Operand*>(0), a,
+            static_cast<Compiler::Operand*>(0), b));
+      }
     } break;
 
     case dconst_0:
-      frame->pushLong(c->constant(doubleToBits(0.0)));
+      frame->pushLong(c->constant(doubleToBits(0.0), Compiler::FloatType));
       break;
       
     case dconst_1:
-      frame->pushLong(c->constant(doubleToBits(1.0)));
+      frame->pushLong(c->constant(doubleToBits(1.0), Compiler::FloatType));
       break;
 
     case ddiv: {
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, divideDoubleThunk)),
-          0, 0, 8, 4,
-          static_cast<Compiler::Operand*>(0), a,
-          static_cast<Compiler::Operand*>(0), b));
+      frame->pushLong(c->fdiv(8, a, b));
     } break;
 
     case dmul: {
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, multiplyDoubleThunk)),
-          0, 0, 8, 4,
-          static_cast<Compiler::Operand*>(0), a,
-          static_cast<Compiler::Operand*>(0), b));
+      frame->pushLong(c->fmul(8, a, b));
     } break;
 
     case dneg: {
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, negateDoubleThunk)),
-          0, 0, 8, 2,
-          static_cast<Compiler::Operand*>(0), frame->popLong()));
+      frame->pushLong(c->fneg(8, frame->popLong()));
     } break;
 
     case vm::drem: {
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, moduloDoubleThunk)),
-          0, 0, 8, 4,
-          static_cast<Compiler::Operand*>(0), a,
-          static_cast<Compiler::Operand*>(0), b));
+      frame->pushLong(c->frem(8, a, b));
     } break;
 
     case dsub: {
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, subtractDoubleThunk)),
-          0, 0, 8, 4,
-          static_cast<Compiler::Operand*>(0), a,
-          static_cast<Compiler::Operand*>(0), b));
+      frame->pushLong(c->fsub(8, a, b));
     } break;
 
     case dup:
@@ -2993,113 +3140,98 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       break;
 
     case f2d: {
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, floatToDoubleThunk)),
-          0, 0, 8, 1, frame->popInt()));
+      frame->pushLong(c->f2f(4, 8, frame->popInt()));
     } break;
 
     case f2i: {
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, floatToIntThunk)),
-          0, 0, 4, 1, frame->popInt()));
+      frame->pushInt(c->f2i(4, 4, frame->popInt()));
     } break;
 
     case f2l: {
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, floatToLongThunk)),
-          0, 0, 8, 1, frame->popInt()));
+      frame->pushLong(c->f2i(4, 8, frame->popInt()));
     } break;
 
     case fadd: {
       Compiler::Operand* a = frame->popInt();
       Compiler::Operand* b = frame->popInt();
 
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, addFloatThunk)),
-          0, 0, 4, 2, a, b));
+      frame->pushInt(c->fadd(4, a, b));
     } break;
 
     case fcmpg: {
       Compiler::Operand* a = frame->popInt();
       Compiler::Operand* b = frame->popInt();
 
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, compareFloatsGThunk)),
-          0, 0, 4, 2, a, b));
+      if(t->arch->supportsFloatCompare(4) and isCJump(codeBody(t, code, ip))) {
+        c->fcmp(4, a, b);
+        lastFcmpg = 0;
+      } else {
+        frame->pushInt
+          (c->call
+           (c->constant
+            (getThunk(t, compareFloatsGThunk), Compiler::AddressType),
+            0, 0, 4, Compiler::IntegerType, 2, a, b));
+      }
     } break;
 
     case fcmpl: {
       Compiler::Operand* a = frame->popInt();
       Compiler::Operand* b = frame->popInt();
 
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, compareFloatsLThunk)),
-          0, 0, 4, 2, a, b));
+      if(t->arch->supportsFloatCompare(4) and isCJump(codeBody(t, code, ip))) {
+        c->fcmp(4, a, b);
+        lastFcmpl = 0;
+      } else {
+        frame->pushInt
+          (c->call
+           (c->constant
+            (getThunk(t, compareFloatsLThunk), Compiler::AddressType),
+            0, 0, 4, Compiler::IntegerType, 2, a, b));
+      }
     } break;
 
     case fconst_0:
-      frame->pushInt(c->constant(floatToBits(0.0)));
+      frame->pushInt(c->constant(floatToBits(0.0), Compiler::FloatType));
       break;
       
     case fconst_1:
-      frame->pushInt(c->constant(floatToBits(1.0)));
+      frame->pushInt(c->constant(floatToBits(1.0), Compiler::FloatType));
       break;
       
     case fconst_2:
-      frame->pushInt(c->constant(floatToBits(2.0)));
+      frame->pushInt(c->constant(floatToBits(2.0), Compiler::FloatType));
       break;
 
     case fdiv: {
       Compiler::Operand* a = frame->popInt();
       Compiler::Operand* b = frame->popInt();
 
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, divideFloatThunk)),
-          0, 0, 4, 2, a, b));
+      frame->pushInt(c->fdiv(4, a, b));
     } break;
 
     case fmul: {
       Compiler::Operand* a = frame->popInt();
       Compiler::Operand* b = frame->popInt();
 
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, multiplyFloatThunk)),
-          0, 0, 4, 2, a, b));
+      frame->pushInt(c->fmul(4, a, b));
     } break;
 
     case fneg: {
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, negateFloatThunk)),
-          0, 0, 4, 1, frame->popInt()));
+      frame->pushInt(c->fneg(4, frame->popInt()));
     } break;
 
     case vm::frem: {
       Compiler::Operand* a = frame->popInt();
       Compiler::Operand* b = frame->popInt();
 
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, moduloFloatThunk)),
-          0, 0, 4, 2, a, b));
+      frame->pushInt(c->frem(4, a, b));   	
     } break;
 
     case fsub: {
       Compiler::Operand* a = frame->popInt();
       Compiler::Operand* b = frame->popInt();
 
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, subtractFloatThunk)),
-          0, 0, 4, 2, a, b));
+      frame->pushInt(c->fsub(4, a, b));
     } break;
 
     case getfield:
@@ -3118,10 +3250,12 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
             and classNeedsInit(t, fieldClass(t, field)))
         {
           c->call
-            (c->constant(getThunk(t, tryInitClassThunk)),
+            (c->constant
+             (getThunk(t, tryInitClassThunk), Compiler::AddressType),
              0,
              frame->trace(0, 0),
              0,
+             Compiler::VoidType,
              2, c->register_(t->arch->thread()),
              frame->append(fieldClass(t, field)));
         }
@@ -3147,8 +3281,10 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
         fieldOperand = frame->append(field);
 
         c->call
-          (c->constant(getThunk(t, acquireMonitorForObjectThunk)),
-           0, frame->trace(0, 0), 0, 2, c->register_(t->arch->thread()),
+          (c->constant
+           (getThunk(t, acquireMonitorForObjectThunk), Compiler::AddressType),
+           0, frame->trace(0, 0), 0, Compiler::VoidType, 2,
+           c->register_(t->arch->thread()),
            fieldOperand);
       }
 
@@ -3156,40 +3292,65 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       case ByteField:
       case BooleanField:
         frame->pushInt
-          (c->load(1, 1, c->memory(table, fieldOffset(t, field), 0, 1),
-                   BytesPerWord));
+          (c->load
+           (1, 1, c->memory
+            (table, Compiler::IntegerType, fieldOffset(t, field), 0, 1),
+            BytesPerWord));
         break;
 
       case CharField:
         frame->pushInt
-          (c->loadz(2, 2, c->memory(table, fieldOffset(t, field), 0, 1),
-                    BytesPerWord));
+          (c->loadz
+           (2, 2, c->memory
+            (table, Compiler::IntegerType, fieldOffset(t, field), 0, 1),
+            BytesPerWord));
         break;
 
       case ShortField:
         frame->pushInt
-          (c->load(2, 2, c->memory(table, fieldOffset(t, field), 0, 1),
-                   BytesPerWord));
+          (c->load
+           (2, 2, c->memory
+            (table, Compiler::IntegerType, fieldOffset(t, field), 0, 1),
+            BytesPerWord));
         break;
 
       case FloatField:
+        frame->pushInt
+          (c->load
+           (4, 4, c->memory
+            (table, Compiler::FloatType, fieldOffset(t, field), 0, 1),
+            BytesPerWord));
+        break;
+
       case IntField:
         frame->pushInt
-          (c->load(4, 4, c->memory(table, fieldOffset(t, field), 0, 1),
-                   BytesPerWord));
+          (c->load
+           (4, 4, c->memory
+            (table, Compiler::IntegerType, fieldOffset(t, field), 0, 1),
+            BytesPerWord));
         break;
 
       case DoubleField:
+        frame->pushLong
+          (c->load
+           (8, 8, c->memory
+            (table, Compiler::FloatType, fieldOffset(t, field), 0, 1), 8));
+        break;
+
       case LongField:
         frame->pushLong
-          (c->load(8, 8, c->memory(table, fieldOffset(t, field), 0, 1), 8));
+          (c->load
+           (8, 8, c->memory
+            (table, Compiler::IntegerType, fieldOffset(t, field), 0, 1), 8));
         break;
 
       case ObjectField:
         frame->pushObject
           (c->load
            (BytesPerWord, BytesPerWord,
-            c->memory(table, fieldOffset(t, field), 0, 1), BytesPerWord));
+            c->memory
+            (table, Compiler::ObjectType, fieldOffset(t, field), 0, 1),
+            BytesPerWord));
         break;
 
       default:
@@ -3202,8 +3363,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
                  or fieldCode(t, field) == LongField))
         {
           c->call
-            (c->constant(getThunk(t, releaseMonitorForObjectThunk)),
-             0, frame->trace(0, 0), 0, 2, c->register_(t->arch->thread()),
+            (c->constant
+             (getThunk(t, releaseMonitorForObjectThunk),
+              Compiler::AddressType),
+             0, frame->trace(0, 0), 0, Compiler::VoidType, 2,
+             c->register_(t->arch->thread()),
              fieldOperand);
         } else {
           c->loadBarrier();
@@ -3238,17 +3402,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
     } break;
 
     case i2d: {
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, intToDoubleThunk)),
-          0, 0, 8, 1, frame->popInt()));
+      frame->pushLong(c->i2f(4, 8, frame->popInt()));
     } break;
 
     case i2f: {
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, intToFloatThunk)),
-          0, 0, 4, 1, frame->popInt()));
+      frame->pushInt(c->i2f(4, 4, frame->popInt()));
     } break;
 
     case i2l:
@@ -3272,31 +3430,31 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
     } break;
 
     case iconst_m1:
-      frame->pushInt(c->constant(-1));
+      frame->pushInt(c->constant(-1, Compiler::IntegerType));
       break;
 
     case iconst_0:
-      frame->pushInt(c->constant(0));
+      frame->pushInt(c->constant(0, Compiler::IntegerType));
       break;
 
     case iconst_1:
-      frame->pushInt(c->constant(1));
+      frame->pushInt(c->constant(1, Compiler::IntegerType));
       break;
 
     case iconst_2:
-      frame->pushInt(c->constant(2));
+      frame->pushInt(c->constant(2, Compiler::IntegerType));
       break;
 
     case iconst_3:
-      frame->pushInt(c->constant(3));
+      frame->pushInt(c->constant(3, Compiler::IntegerType));
       break;
 
     case iconst_4:
-      frame->pushInt(c->constant(4));
+      frame->pushInt(c->constant(4, Compiler::IntegerType));
       break;
 
     case iconst_5:
-      frame->pushInt(c->constant(5));
+      frame->pushInt(c->constant(5, Compiler::IntegerType));
       break;
 
     case idiv: {
@@ -3376,28 +3534,69 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       uint32_t newIp = (ip - 3) + offset;
       assert(t, newIp < codeLength(t, code));
 
-      Compiler::Operand* a = frame->popInt();
       Compiler::Operand* target = frame->machineIp(newIp);
+      Compiler::Operand* cont = frame->machineIp(ip);
 
-      c->cmp(4, c->constant(0), a);
+      if (lastFcmpl != 1 and lastFcmpg != 1) {
+        Compiler::Operand* a = frame->popInt();
+        c->cmp(4, c->constant(0, Compiler::IntegerType), a);
+      }
       switch (instruction) {
       case ifeq:
-        c->je(target);
+        if (lastFcmpl == 1 or lastFcmpg == 1) {
+          c->fjuo(cont);
+          c->fje(target);
+        } else {
+          c->je(target);
+        }
         break;
       case ifne:
-        c->jne(target);
+        if (lastFcmpl == 1 or lastFcmpg == 1) {
+          c->fjuo(cont);
+          c->fjne(target);
+        } else {
+          c->jne(target);
+        }
         break;
       case ifgt:
-        c->jg(target);
+        if (lastFcmpl == 1) {
+          c->fjuo(cont);
+          c->fjg(target);
+        } else if (lastFcmpg == 1) {
+          c->fjg(target);
+        } else {
+          c->jg(target);
+        }
         break;
       case ifge:
-        c->jge(target);
+        if (lastFcmpl == 1) {
+          c->fjuo(cont);
+          c->fjge(target);
+        } else if (lastFcmpg == 1) {
+          c->fjge(target);
+        } else {
+          c->jge(target);
+        }
         break;
       case iflt:
-        c->jl(target);
+        if (lastFcmpg == 1) {
+          c->fjuo(cont);
+          c->fjl(target);
+        } else if (lastFcmpl == 1) {
+          c->fjl(target);
+        } else {
+          c->jl(target);
+        }
         break;
       case ifle:
-        c->jle(target);
+        if(lastFcmpg == 1) {
+          c->fjuo(cont);
+          c->fjle(target);
+        } else if (lastFcmpl == 1) {
+          c->fjle(target);
+        } else {
+          c->jle(target);
+        }
         break;
       }
 
@@ -3414,7 +3613,7 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* a = frame->popObject();
       Compiler::Operand* target = frame->machineIp(newIp);
 
-      c->cmp(BytesPerWord, c->constant(0), a);
+      c->cmp(BytesPerWord, c->constant(0, Compiler::ObjectType), a);
       if (instruction == ifnull) {
         c->je(target);
       } else {
@@ -3431,7 +3630,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
       storeLocal
         (context, 1,
-         c->add(4, c->constant(count), loadLocal(context, 1, index)),
+         c->add
+         (4, c->constant(count, Compiler::IntegerType),
+          loadLocal(context, 1, index)),
          index);
     } break;
 
@@ -3478,8 +3679,8 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
       frame->pushInt
         (c->call
-         (c->constant(getThunk(t, instanceOf64Thunk)),
-          0, 0, 4,
+         (c->constant(getThunk(t, instanceOf64Thunk), Compiler::AddressType),
+          0, 0, 4, Compiler::IntegerType,
           3, c->register_(t->arch->thread()), frame->append(class_),
           frame->popObject()));
     } break;
@@ -3502,15 +3703,18 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* result = c->stackCall
         (c->call
          (c->constant
-          (getThunk(t, findInterfaceMethodFromInstanceThunk)),
+          (getThunk(t, findInterfaceMethodFromInstanceThunk),
+           Compiler::AddressType),
           0,
           frame->trace(0, 0),
           BytesPerWord,
+          Compiler::AddressType,
           3, c->register_(t->arch->thread()), frame->append(target),
           c->peek(1, instance)),
          0,
          frame->trace(0, 0),
          rSize,
+         operandTypeForFieldCode(t, methodReturnCode(t, target)),
          parameterFootprint);
 
       frame->pop(parameterFootprint);
@@ -3522,7 +3726,6 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
     case invokespecial: {
       uint16_t index = codeReadInt16(t, code, ip);
-
       object target = resolveMethod(t, context->method, index - 1);
       if (UNLIKELY(t->exception)) return;
 
@@ -3546,9 +3749,61 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
       assert(t, methodFlags(t, target) & ACC_STATIC);
 
-      bool tailCall = isTailCall(t, code, ip, context->method, target);
+      bool usedIntrinsic = false;
+      int params = methodParameterCount(t, target);
+      if (params == 1) {
+      	BinaryOperation op = t->arch->binaryIntrinsic
+          (reinterpret_cast<char*>
+           (&byteArrayBody(t, className(t, methodClass(t, target)), 0)),
+           reinterpret_cast<char*>
+           (&byteArrayBody(t, methodName(t, target), 0)),
+           reinterpret_cast<char*>
+           (&byteArrayBody(t, methodSpec(t, target), 0)));
 
-      compileDirectInvoke(t, frame, target, tailCall);
+      	if (op != NoBinaryOperation) {
+      	  if (DebugIntrinsics) {
+      	    fprintf(stderr, "Using binary intrinsic %i.\n", op);
+      	  }
+          int opSize = methodParameterFootprint(t, target) * 4;
+          int resSize = resultSize(t, methodReturnCode(t, target));
+          Compiler::Operand* param;
+          if (opSize == 4) {
+            param = frame->popInt();
+          } else {
+            param = frame->popLong();
+          }
+          Compiler::Operand* operand = c->operation
+            (op, opSize, resSize, operandTypeForFieldCode
+             (t, methodReturnCode(t, target)), param);
+          if (resSize == 4) {
+            frame->pushInt(operand);
+          } else {
+            frame->pushLong(operand);
+          }
+          usedIntrinsic = true;
+      	}
+      } else if (params == 2) {
+      	TernaryOperation op = t->arch->ternaryIntrinsic
+          (reinterpret_cast<char*>
+           (&byteArrayBody(t, className(t, methodClass(t, target)), 0)),
+           reinterpret_cast<char*>
+           (&byteArrayBody(t, methodName(t, target), 0)),
+           reinterpret_cast<char*>
+           (&byteArrayBody(t, methodSpec(t, target), 0)));
+
+      	if (op != NoTernaryOperation) {
+      	  if (DebugIntrinsics) {
+      	    fprintf(stderr, "Could use ternary intrinsic %i.\n", op);
+      	  }
+      	  //int aSize, bSize;
+		  //int resSize = resultSize(t, methodReturnCode(t, target));
+          //TODO: use intrinsic
+      	}
+      }
+      if (not usedIntrinsic) {
+        bool tailCall = isTailCall(t, code, ip, context->method, target);
+        compileDirectInvoke(t, frame, target, tailCall);
+      }
     } break;
 
     case invokevirtual: {
@@ -3572,11 +3827,13 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* result = c->stackCall
         (c->memory
          (c->and_
-          (BytesPerWord, c->constant(PointerMask),
-           c->memory(instance, 0, 0, 1)), offset, 0, 1),
+          (BytesPerWord, c->constant(PointerMask, Compiler::IntegerType),
+           c->memory(instance, Compiler::ObjectType, 0, 0, 1)),
+          Compiler::ObjectType, offset, 0, 1),
          tailCall ? Compiler::TailJump : 0,
          frame->trace(0, 0),
          rSize,
+         operandTypeForFieldCode(t, methodReturnCode(t, target)),
          parameterFootprint);
 
       frame->pop(parameterFootprint);
@@ -3687,19 +3944,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
     } break;
 
     case l2d: {
-      frame->pushLong
-        (c->call
-         (c->constant(getThunk(t, longToDoubleThunk)),
-          0, 0, 8, 2,
-          static_cast<Compiler::Operand*>(0), frame->popLong()));
+      frame->pushLong(c->i2f(8, 8, frame->popLong()));
     } break;
 
     case l2f: {
-      frame->pushInt
-        (c->call
-         (c->constant(getThunk(t, longToFloatThunk)),
-          0, 0, 4, 2,
-          static_cast<Compiler::Operand*>(0), frame->popLong()));
+      frame->pushInt(c->i2f(8, 4, frame->popLong()));
     } break;
 
     case l2i:
@@ -3726,11 +3975,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
     } break;
 
     case lconst_0:
-      frame->pushLong(c->constant(0));
+      frame->pushLong(c->constant(0, Compiler::IntegerType));
       break;
 
     case lconst_1:
-      frame->pushLong(c->constant(1));
+      frame->pushLong(c->constant(1, Compiler::IntegerType));
       break;
 
     case ldc:
@@ -3758,7 +4007,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
           frame->pushObject(frame->append(v));
         }
       } else {
-        frame->pushInt(c->constant(singletonValue(t, pool, index - 1)));
+        frame->pushInt
+          (c->constant
+           (singletonValue(t, pool, index - 1),
+            singletonIsFloat(t, pool, index - 1)
+            ? Compiler::FloatType : Compiler::IntegerType));
       }
     } break;
 
@@ -3769,7 +4022,10 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
       uint64_t v;
       memcpy(&v, &singletonValue(t, pool, index - 1), 8);
-      frame->pushLong(c->constant(v));
+      frame->pushLong
+        (c->constant
+         (v, singletonIsFloat(t, pool, index - 1)
+          ? Compiler::FloatType : Compiler::IntegerType));
     } break;
 
     case ldiv_: {
@@ -3849,9 +4105,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
         c->jmp
           (c->call
-           (c->constant(getThunk(t, lookUpAddressThunk)),
-            0, 0, BytesPerWord,
-            4, key, start, c->constant(pairCount), default_));
+           (c->constant
+            (getThunk(t, lookUpAddressThunk), Compiler::AddressType),
+            0, 0, BytesPerWord, Compiler::AddressType,
+            4, key, start, c->constant(pairCount, Compiler::IntegerType),
+            default_));
 
         Compiler::State* state = c->saveState();
 
@@ -3945,15 +4203,19 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
     case monitorenter: {
       Compiler::Operand* target = frame->popObject();
       c->call
-        (c->constant(getThunk(t, acquireMonitorForObjectThunk)),
-         0, frame->trace(0, 0), 0, 2, c->register_(t->arch->thread()), target);
+        (c->constant
+         (getThunk(t, acquireMonitorForObjectThunk), Compiler::AddressType),
+         0, frame->trace(0, 0), 0, Compiler::VoidType, 2,
+         c->register_(t->arch->thread()), target);
     } break;
 
     case monitorexit: {
       Compiler::Operand* target = frame->popObject();
       c->call
-        (c->constant(getThunk(t, releaseMonitorForObjectThunk)),
-         0, frame->trace(0, 0), 0, 2, c->register_(t->arch->thread()), target);
+        (c->constant
+         (getThunk(t, releaseMonitorForObjectThunk), Compiler::AddressType),
+         0, frame->trace(0, 0), 0, Compiler::VoidType, 2,
+         c->register_(t->arch->thread()), target);
     } break;
 
     case multianewarray: {
@@ -3970,12 +4232,15 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
         + t->arch->frameReturnAddressSize();
 
       Compiler::Operand* result = c->call
-        (c->constant(getThunk(t, makeMultidimensionalArrayThunk)),
+        (c->constant
+         (getThunk(t, makeMultidimensionalArrayThunk), Compiler::AddressType),
          0,
          frame->trace(0, 0),
          BytesPerWord,
+         Compiler::ObjectType,
          4, c->register_(t->arch->thread()), frame->append(class_),
-         c->constant(dimensions), c->constant(offset));
+         c->constant(dimensions, Compiler::IntegerType),
+         c->constant(offset, Compiler::IntegerType));
 
       frame->pop(dimensions);
       frame->pushObject(result);
@@ -3990,18 +4255,21 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       if (classVmFlags(t, class_) & (WeakReferenceFlag | HasFinalizerFlag)) {
         frame->pushObject
           (c->call
-           (c->constant(getThunk(t, makeNewGeneral64Thunk)),
+           (c->constant
+            (getThunk(t, makeNewGeneral64Thunk), Compiler::AddressType),
             0,
             frame->trace(0, 0),
             BytesPerWord,
+            Compiler::ObjectType,
             2, c->register_(t->arch->thread()), frame->append(class_)));
       } else {
         frame->pushObject
           (c->call
-           (c->constant(getThunk(t, makeNew64Thunk)),
+           (c->constant(getThunk(t, makeNew64Thunk), Compiler::AddressType),
             0,
             frame->trace(0, 0),
             BytesPerWord,
+            Compiler::ObjectType,
             2, c->register_(t->arch->thread()), frame->append(class_)));
       }
     } break;
@@ -4013,11 +4281,13 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
       frame->pushObject
         (c->call
-         (c->constant(getThunk(t, makeBlankArrayThunk)),
+         (c->constant(getThunk(t, makeBlankArrayThunk), Compiler::AddressType),
           0,
           frame->trace(0, 0),
           BytesPerWord,
-          3, c->register_(t->arch->thread()), c->constant(type), length));
+          Compiler::ObjectType,
+          3, c->register_(t->arch->thread()),
+          c->constant(type, Compiler::IntegerType), length));
     } break;
 
     case nop: break;
@@ -4046,10 +4316,12 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
             and classNeedsInit(t, fieldClass(t, field)))
         {
           c->call
-            (c->constant(getThunk(t, tryInitClassThunk)),
+            (c->constant
+             (getThunk(t, tryInitClassThunk), Compiler::AddressType),
              0,
              frame->trace(0, 0),
              0,
+             Compiler::VoidType,
              2, c->register_(t->arch->thread()),
              frame->append(fieldClass(t, field)));
         }
@@ -4104,9 +4376,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
           fieldOperand = frame->append(field);
 
           c->call
-            (c->constant(getThunk(t, acquireMonitorForObjectThunk)),
-             0, frame->trace(0, 0), 0, 2, c->register_(t->arch->thread()),
-             fieldOperand);
+            (c->constant
+             (getThunk(t, acquireMonitorForObjectThunk),
+              Compiler::AddressType),
+             0, frame->trace(0, 0), 0, Compiler::VoidType, 2,
+             c->register_(t->arch->thread()), fieldOperand);
         } else {
           c->storeStoreBarrier();
         }
@@ -4115,42 +4389,59 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       switch (fieldCode(t, field)) {
       case ByteField:
       case BooleanField:
-        c->store(BytesPerWord, value, 1,
-                 c->memory(table, fieldOffset(t, field), 0, 1));
+        c->store
+          (BytesPerWord, value, 1, c->memory
+           (table, Compiler::IntegerType, fieldOffset(t, field), 0, 1));
         break;
 
       case CharField:
       case ShortField:
-        c->store(BytesPerWord, value, 2,
-                 c->memory(table, fieldOffset(t, field), 0, 1));
+        c->store
+          (BytesPerWord, value, 2, c->memory
+           (table, Compiler::IntegerType, fieldOffset(t, field), 0, 1));
         break;
             
       case FloatField:
+        c->store
+          (BytesPerWord, value, 4, c->memory
+           (table, Compiler::FloatType, fieldOffset(t, field), 0, 1));
+        break;
+
       case IntField:
-        c->store(BytesPerWord, value, 4,
-                 c->memory(table, fieldOffset(t, field), 0, 1));
+        c->store
+          (BytesPerWord, value, 4, c->memory
+           (table, Compiler::IntegerType, fieldOffset(t, field), 0, 1));
         break;
 
       case DoubleField:
+        c->store
+          (8, value, 8, c->memory
+           (table, Compiler::FloatType, fieldOffset(t, field), 0, 1));
+        break;
+
       case LongField:
-        c->store(8, value, 8, c->memory(table, fieldOffset(t, field), 0, 1));
+        c->store
+          (8, value, 8, c->memory
+           (table, Compiler::IntegerType, fieldOffset(t, field), 0, 1));
         break;
 
       case ObjectField:
         if (instruction == putfield) {
           c->call
-            (c->constant(getThunk(t, setMaybeNullThunk)),
+            (c->constant
+             (getThunk(t, setMaybeNullThunk), Compiler::AddressType),
              0,
              frame->trace(0, 0),
              0,
+             Compiler::VoidType,
              4, c->register_(t->arch->thread()), table,
-             c->constant(fieldOffset(t, field)), value);
+             c->constant(fieldOffset(t, field), Compiler::IntegerType), value);
         } else {
           c->call
-            (c->constant(getThunk(t, setThunk)),
-             0, 0, 0,
+            (c->constant(getThunk(t, setThunk), Compiler::AddressType),
+             0, 0, 0, Compiler::VoidType,
              4, c->register_(t->arch->thread()), table,
-             c->constant(fieldOffset(t, field)), value);
+             c->constant(fieldOffset(t, field), Compiler::IntegerType), value);
         }
         break;
 
@@ -4163,9 +4454,11 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
                  or fieldCode(t, field) == LongField))
         {
           c->call
-            (c->constant(getThunk(t, releaseMonitorForObjectThunk)),
-             0, frame->trace(0, 0), 0, 2, c->register_(t->arch->thread()),
-             fieldOperand);
+            (c->constant
+             (getThunk(t, releaseMonitorForObjectThunk),
+              Compiler::AddressType),
+             0, frame->trace(0, 0), 0, Compiler::VoidType, 2,
+             c->register_(t->arch->thread()), fieldOperand);
         } else {
           c->storeLoadBarrier();
         }
@@ -4190,7 +4483,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
     case sipush:
       frame->pushInt
-        (c->constant(static_cast<int16_t>(codeReadInt16(t, code, ip))));
+        (c->constant
+         (static_cast<int16_t>(codeReadInt16(t, code, ip)),
+          Compiler::IntegerType));
       break;
 
     case swap:
@@ -4227,14 +4522,14 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
       Compiler::Operand* key = frame->popInt();
       
-      c->cmp(4, c->constant(bottom), key);
+      c->cmp(4, c->constant(bottom, Compiler::IntegerType), key);
       c->jl(frame->machineIp(defaultIp));
 
       c->save(1, key);
 
       saveStateAndCompile(t, frame, defaultIp);
 
-      c->cmp(4, c->constant(top), key);
+      c->cmp(4, c->constant(top, Compiler::IntegerType), key);
       c->jg(frame->machineIp(defaultIp));
 
       c->save(1, key);
@@ -4242,11 +4537,14 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       saveStateAndCompile(t, frame, defaultIp);
 
       Compiler::Operand* normalizedKey
-        = (bottom ? c->sub(4, c->constant(bottom), key) : key);
+        = (bottom
+           ? c->sub(4, c->constant(bottom, Compiler::IntegerType), key) : key);
 
-      c->jmp(c->load(BytesPerWord, BytesPerWord,
-                     c->memory(start, 0, normalizedKey, BytesPerWord),
-                     BytesPerWord));
+      c->jmp
+        (c->load
+         (BytesPerWord, BytesPerWord, c->memory
+          (start, Compiler::AddressType, 0, normalizedKey, BytesPerWord),
+          BytesPerWord));
 
       Compiler::State* state = c->saveState();
 
@@ -4276,7 +4574,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
 
         storeLocal
           (context, 1,
-           c->add(4, c->constant(count), loadLocal(context, 1, index)),
+           c->add
+           (4, c->constant(count, Compiler::IntegerType), 
+            loadLocal(context, 1, index)),
            index);
       } break;
 
@@ -4927,7 +5227,6 @@ makeSimpleFrameMapTable(MyThread* t, Context* context, uint8_t* start,
 
   return table;
 }
-
 uint8_t*
 finish(MyThread* t, Allocator* allocator, Context* context)
 {
@@ -5062,9 +5361,11 @@ finish(MyThread* t, Allocator* allocator, Context* context)
       ::strcmp
       (reinterpret_cast<const char*>
        (&byteArrayBody(t, methodName(t, context->method), 0)),
-       "printStackTrace") == 0)
+       "write") == 0)
   {
     trap();
+    fprintf(stderr, "Address: %p\n",
+            ::vmAddressFromLine(t, (object)(context->method), 1176));
   }
 
   syncInstructionCache(start, codeSize);
@@ -5094,7 +5395,7 @@ compile(MyThread* t, Allocator* allocator, Context* context)
   unsigned index = methodParameterFootprint(t, context->method);
   if ((methodFlags(t, context->method) & ACC_STATIC) == 0) {
     frame.set(--index, Frame::Object);
-    c->initLocal(1, index);
+    c->initLocal(1, index, Compiler::ObjectType);
   }
 
   for (MethodSpecIterator it
@@ -5106,19 +5407,28 @@ compile(MyThread* t, Allocator* allocator, Context* context)
     case 'L':
     case '[':
       frame.set(--index, Frame::Object);
-      c->initLocal(1, index);
+      c->initLocal(1, index, Compiler::ObjectType);
       break;
       
     case 'J':
+      frame.set(--index, Frame::Long);
+      frame.set(--index, Frame::Long);
+      c->initLocal(2, index, Compiler::IntegerType);
+      break;
     case 'D':
       frame.set(--index, Frame::Long);
       frame.set(--index, Frame::Long);
-      c->initLocal(2, index);
+      c->initLocal(2, index, Compiler::FloatType);
       break;
-
+      
+    case 'F':
+      frame.set(--index, Frame::Integer);
+      c->initLocal(1, index, Compiler::FloatType);
+      break;
+      
     default:
       frame.set(--index, Frame::Integer);
-      c->initLocal(1, index);
+      c->initLocal(1, index, Compiler::IntegerType);
       break;
     }
   }
diff --git a/src/compiler.cpp b/src/compiler.cpp
index 30441bfa07..622122dd73 100644
--- a/src/compiler.cpp
+++ b/src/compiler.cpp
@@ -110,6 +110,8 @@ class Site {
   virtual unsigned copyCost(Context*, Site*) = 0;
 
   virtual bool match(Context*, const SiteMask&) = 0;
+
+  virtual bool loneMatch(Context*, const SiteMask&) = 0;
   
   virtual void acquire(Context*, Value*) { }
 
@@ -284,11 +286,16 @@ intersect(const SiteMask& a, const SiteMask& b)
                   intersectFrameIndexes(a.frameIndex, b.frameIndex));
 }
 
+enum ValueType {
+  ValueGeneral,
+  ValueFloat
+};
+
 class Value: public Compiler::Operand {
  public:
-  Value(Site* site, Site* target):
+  Value(Site* site, Site* target, ValueType type):
     reads(0), lastRead(0), sites(site), source(0), target(target), buddy(this),
-    high(0), home(NoFrameIndex)
+    high(0), home(NoFrameIndex), type(type)
   { }
   
   Read* reads;
@@ -299,6 +306,7 @@ class Value: public Compiler::Operand {
   Value* buddy;
   Value* high;
   int8_t home;
+  ValueType type;
 };
 
 class Context {
@@ -334,12 +342,19 @@ class Context {
     machineCodeSize(0),
     alignedFrameSize(0),
     availableRegisterCount(arch->registerCount()),
+    floatRegisterCount(arch->floatRegisterCount()),
+    generalRegisterCount(arch->generalRegisterCount()),
     constantCompare(CompareNone)
   {
     for (unsigned i = 0; i < arch->registerCount(); ++i) {
       new (registerResources + i) RegisterResource(arch->reserved(i));
       if (registerResources[i].reserved) {
         -- availableRegisterCount;
+        if (arch->generalRegisters() & (1 << i)) {
+          -- generalRegisterCount;
+        } else if (arch->floatRegisters() & (1 << i)) {
+          -- floatRegisterCount;
+        }
       }
     }
   }
@@ -371,6 +386,8 @@ class Context {
   unsigned machineCodeSize;
   unsigned alignedFrameSize;
   unsigned availableRegisterCount;
+  unsigned floatRegisterCount;
+  unsigned generalRegisterCount;
   ConstantCompare constantCompare;
 };
 
@@ -969,20 +986,43 @@ buddies(Value* a, Value* b)
 }
 
 void
-decrementAvailableRegisterCount(Context* c)
+decrementAvailableRegisterCount(Context* c, Value* v)
 {
   assert(c, c->availableRegisterCount);
   -- c->availableRegisterCount;
   
+  if (v) {
+    if (v->type == ValueGeneral) {
+      -- c->generalRegisterCount;
+    } else if (v->type == ValueFloat) {
+      -- c->floatRegisterCount;
+    }
+  } else {
+    -- c->generalRegisterCount;
+  }
+    
+  
   if (DebugResources) {
-    fprintf(stderr, "%d registers available\n", c->availableRegisterCount);
+    fprintf(stderr, "%d registers available - %d float, %d general\n",
+            c->availableRegisterCount, c->floatRegisterCount,
+            c->generalRegisterCount);
   }
 }
 
 void
-incrementAvailableRegisterCount(Context* c)
+incrementAvailableRegisterCount(Context* c, Value* v)
 {
   ++ c->availableRegisterCount;
+  
+  if (v) {
+    if (v->type == ValueGeneral) {
+      ++ c->generalRegisterCount;
+    } else if (v->type == ValueFloat) {
+      ++ c->floatRegisterCount;
+    }
+  } else {
+    ++ c->generalRegisterCount;
+  }
 
   if (DebugResources) {
     fprintf(stderr, "%d registers available\n", c->availableRegisterCount);
@@ -1001,7 +1041,7 @@ increment(Context* c, RegisterResource* r)
     ++ r->referenceCount;
 
     if (r->referenceCount == 1) {
-      decrementAvailableRegisterCount(c);
+      decrementAvailableRegisterCount(c, r->value);
     }
   }
 }
@@ -1020,7 +1060,7 @@ decrement(Context* c, Resource* r)
     -- r->referenceCount;
 
     if (r->referenceCount == 0) {
-      incrementAvailableRegisterCount(c);
+      incrementAvailableRegisterCount(c, r->value);
     }
   }
 }
@@ -1043,7 +1083,7 @@ RegisterResource::freeze(Context* c, Value* v)
     freezeResource(c, this, v);
 
     if (freezeCount == 1) {
-      decrementAvailableRegisterCount(c);
+      decrementAvailableRegisterCount(c, v);
     }
   }
 }
@@ -1076,7 +1116,7 @@ RegisterResource::thaw(Context* c, Value* v)
     thawResource(c, this, v);
 
     if (freezeCount == 0) {
-      incrementAvailableRegisterCount(c);
+      incrementAvailableRegisterCount(c, v);
     }
   }
 }
@@ -1107,6 +1147,22 @@ class Target {
   uint8_t cost;
 };
 
+ValueType
+valueType(Context* c, Compiler::OperandType type)
+{
+  switch (type) {
+  case Compiler::ObjectType:
+  case Compiler::AddressType:
+  case Compiler::IntegerType:
+  case Compiler::VoidType:
+    return ValueGeneral;
+  case Compiler::FloatType:
+    return ValueFloat;
+  default:
+    abort(c);
+  }
+}
+
 Target
 pickTarget(Context* c, Read* r, bool intersectRead,
            unsigned registerReserveCount);
@@ -1136,6 +1192,13 @@ pickRegisterTarget(Context* c, Value* v, uint32_t mask, unsigned* cost)
 {
   int target = NoRegister;
   unsigned bestCost = Target::Impossible;
+  if (v) {
+    if (v->type == ValueFloat) {
+      mask &= (c->arch->floatRegisters() | c->arch->generalRegisters());
+    } else if(v->type == ValueGeneral) {
+      mask &= c->arch->generalRegisters();
+    }
+  }
   for (int i = c->arch->registerCount() - 1; i >= 0; --i) {
     if ((1 << i) & mask) {
       RegisterResource* r = c->registerResources + i;
@@ -1216,7 +1279,6 @@ pickTarget(Context* c, Value* value, const SiteMask& mask,
     Target mine = pickRegisterTarget(c, value, mask.registerMask);
 
     mine.cost += registerPenalty;
-
     if (mine.cost == Target::MinimumRegisterCost) {
       return mine;
     } else if (mine.cost < best.cost) {
@@ -1224,7 +1286,7 @@ pickTarget(Context* c, Value* value, const SiteMask& mask,
     }
   }
 
-  if ((mask.typeMask & (1 << MemoryOperand)) && mask.frameIndex >= 0) {
+  if ((mask.typeMask & (1 << MemoryOperand)) and mask.frameIndex >= 0) {
     Target mine(mask.frameIndex, MemoryOperand,
                 frameCost(c, value, mask.frameIndex));
     if (mine.cost == Target::MinimumFrameCost) {
@@ -1241,8 +1303,25 @@ Target
 pickTarget(Context* c, Read* read, bool intersectRead,
            unsigned registerReserveCount)
 {
-  unsigned registerPenalty = (c->availableRegisterCount > registerReserveCount
+  /*unsigned registerPenalty = (c->availableRegisterCount > registerReserveCount
+                              ? 0 : Target::LowRegisterPenalty);*/
+  unsigned registerPenalty;
+  if(read->value) {
+    if(read->value->type == ValueGeneral) {
+      registerPenalty = (c->generalRegisterCount > registerReserveCount
                               ? 0 : Target::LowRegisterPenalty);
+    } else if(read->value->type == ValueFloat) {
+      registerPenalty = (c->floatRegisterCount > registerReserveCount
+                              ? 0 : Target::LowRegisterPenalty);
+    } else {
+      abort(c);
+    }
+  } else {
+    registerPenalty
+      = (c->generalRegisterCount > registerReserveCount
+         or c->floatRegisterCount > registerReserveCount
+         ? 0 : Target::LowRegisterPenalty);
+  }
 
   SiteMask mask;
   read->intersect(&mask);
@@ -1301,6 +1380,7 @@ pickTarget(Context* c, Read* read, bool intersectRead,
     assert(c, best.cost <= 3);
   }
 
+  //if(best.cost == Target::Impossible)asm("int3");
   return best;
 }
 
@@ -1348,6 +1428,10 @@ class ConstantSite: public Site {
     return mask.typeMask & (1 << ConstantOperand);
   }
 
+  virtual bool loneMatch(Context*, const SiteMask&) {
+    return true;
+  }
+
   virtual OperandType type(Context*) {
     return ConstantOperand;
   }
@@ -1420,6 +1504,10 @@ class AddressSite: public Site {
     return mask.typeMask & (1 << AddressOperand);
   }
 
+  virtual bool loneMatch(Context*, const SiteMask&) {
+    return false;
+  }
+
   virtual OperandType type(Context*) {
     return AddressOperand;
   }
@@ -1494,6 +1582,16 @@ class RegisterSite: public Site {
     }
   }
 
+  virtual bool loneMatch(Context* c UNUSED, const SiteMask& mask) {
+    assert(c, number != NoRegister);
+
+    if ((mask.typeMask & (1 << RegisterOperand))) {
+      return ((static_cast<uint64_t>(1) << number) == mask.registerMask);
+    } else {
+      return false;
+    }
+  }
+
   virtual void acquire(Context* c, Value* v) {
     Target target;
     if (number != NoRegister) {
@@ -1637,8 +1735,8 @@ class MemorySite: public Site {
       if (base == c->arch->stack()) {
         assert(c, index == NoRegister);
         return mask.frameIndex == AnyFrameIndex
-          || (mask.frameIndex != NoFrameIndex
-              && static_cast<int>(frameIndexToOffset(c, mask.frameIndex))
+          or (mask.frameIndex != NoFrameIndex
+              and static_cast<int>(frameIndexToOffset(c, mask.frameIndex))
               == offset);
       } else {
         return true;
@@ -1648,6 +1746,23 @@ class MemorySite: public Site {
     }
   }
 
+  virtual bool loneMatch(Context* c, const SiteMask& mask) {
+    assert(c, acquired);
+
+    if (mask.typeMask & (1 << MemoryOperand)) {
+      if (base == c->arch->stack()) {
+        assert(c, index == NoRegister);
+
+        if (mask.frameIndex == AnyFrameIndex) {
+          return false;
+        } else {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
   virtual void acquire(Context* c, Value* v) {
     increment(c, c->registerResources + base);
     if (index != NoRegister) {
@@ -1977,17 +2092,20 @@ read(Context* c, const SiteMask& mask, Value* successor = 0)
 }
 
 Read*
-anyRegisterRead(Context* c)
-{
-  return read(c, SiteMask(1 << RegisterOperand, ~0, NoFrameIndex));
-}
-
-Read*
-registerOrConstantRead(Context* c)
+generalRegisterRead(Context* c)
 {
   return read
     (c, SiteMask
-     ((1 << RegisterOperand) | (1 << ConstantOperand), ~0, NoFrameIndex));
+     (1 << RegisterOperand, c->arch->generalRegisters(), NoFrameIndex));
+}
+
+Read*
+generalRegisterOrConstantRead(Context* c)
+{
+  return read
+    (c, SiteMask
+     ((1 << RegisterOperand) | (1 << ConstantOperand),
+      c->arch->generalRegisters(), NoFrameIndex));
 }
 
 Read*
@@ -2225,7 +2343,7 @@ addRead(Context* c, Event* e, Value* v, Read* r)
     fprintf(stderr, "add read %p to %p last %p event %p (%s)\n",
             r, v, v->lastRead, e, (e ? e->name() : 0));
   }
-
+//if(!e)asm("int3");
   r->value = v;
   if (e) {
     r->event = e;
@@ -2335,7 +2453,7 @@ class CallEvent: public Event {
     resultSize(resultSize),
     stackArgumentFootprint(stackArgumentFootprint)
   {
-    uint32_t registerMask = ~0;
+    uint32_t registerMask = c->arch->allRegisters();
 
     if (argumentCount) {
       assert(c, (flags & Compiler::TailJump) == 0);
@@ -2427,10 +2545,10 @@ class CallEvent: public Event {
 
           if (static_cast<int>(frameIndex) == returnAddressIndex) {
             returnAddressSurrogate = stack->value;
-            addRead(c, this, stack->value, anyRegisterRead(c));
+            addRead(c, this, stack->value, generalRegisterRead(c));
           } else if (static_cast<int>(frameIndex) == framePointerIndex) {
             framePointerSurrogate = stack->value;
-            addRead(c, this, stack->value, anyRegisterRead(c));
+            addRead(c, this, stack->value, generalRegisterRead(c));
           } else {
             addRead(c, this, stack->value, read
                     (c, SiteMask(1 << MemoryOperand, 0, frameIndex)));
@@ -2647,6 +2765,7 @@ addBuddy(Value* original, Value* buddy)
   Value* p = original;
   while (p->buddy != original) p = p->buddy;
   p->buddy = buddy;
+  //buddy->type = original->type;
 
   if (DebugBuddies) {
     fprintf(stderr, "add buddy %p to", buddy);
@@ -2722,12 +2841,9 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize,
       bool thunk;
       uint8_t srcTypeMask;
       uint64_t srcRegisterMask;
-      uint8_t dstTypeMask;
-      uint64_t dstRegisterMask;
 
-      c->arch->plan(type, dstSize, &srcTypeMask, &srcRegisterMask,
-                    dstSize, &dstTypeMask, &dstRegisterMask,
-                    &thunk);
+      c->arch->planSource(type, dstSize, &srcTypeMask, &srcRegisterMask,
+                    dstSize, &thunk);
 
       assert(c, dstMask.typeMask & srcTypeMask & (1 << RegisterOperand));
 
@@ -2801,9 +2917,9 @@ maybeMove(Context* c, BinaryOperation type, unsigned srcSize,
 }
 
 Value*
-value(Context* c, Site* site = 0, Site* target = 0)
+value(Context* c, ValueType type, Site* site = 0, Site* target = 0)
 {
-  return new (c->zone->allocate(sizeof(Value))) Value(site, target);
+  return new (c->zone->allocate(sizeof(Value))) Value(site, target, type);
 }
 
 void
@@ -2811,7 +2927,7 @@ split(Context* c, Value* v)
 {
   assert(c, v->high == 0);
 
-  v->high = value(c);
+  v->high = value(c, v->type);
   for (SiteIterator it(v); it.hasMore();) {
     Site* s = it.next();
     removeSite(c, v, s);
@@ -2834,7 +2950,7 @@ grow(Context* c, Value* v)
 {
   assert(c, v->high == 0);
 
-  v->high = value(c);
+  v->high = value(c, v->type);
 }
 
 class MoveEvent: public Event {
@@ -2963,12 +3079,14 @@ appendMove(Context* c, BinaryOperation type, unsigned srcSize,
   uint8_t dstTypeMask;
   uint64_t dstRegisterMask;
 
-  c->arch->plan(type, srcSelectSize, &srcTypeMask, &srcRegisterMask,
-                dstSize, &dstTypeMask, &dstRegisterMask,
-                &thunk);
+  c->arch->planSource(type, srcSelectSize, &srcTypeMask, &srcRegisterMask,
+                dstSize, &thunk);
 
   assert(c, not thunk);
 
+  c->arch->planDestination(type, srcSelectSize, &srcTypeMask, &srcRegisterMask,
+                dstSize, &dstTypeMask, &dstRegisterMask);
+
   append(c, new (c->zone->allocate(sizeof(MoveEvent)))
          MoveEvent
          (c, type, srcSize, srcSelectSize, src, dstSize, dst,
@@ -2992,10 +3110,13 @@ findConstantSite(Context* c, Value* v)
 
 class CompareEvent: public Event {
  public:
-  CompareEvent(Context* c, unsigned size, Value* first, Value* second,
-               const SiteMask& firstMask, const SiteMask& secondMask):
-    Event(c), size(size), first(first), second(second)
+  CompareEvent(Context* c, BinaryOperation type, unsigned size, Value* first,
+               Value* second, const SiteMask& firstMask,
+               const SiteMask& secondMask):
+    Event(c), type(type), size(size), first(first), second(second)
   {
+    assert(c, type != FloatCompare or
+           (first->type == ValueFloat and first->type == ValueFloat)); 
     addRead(c, this, first, read(c, firstMask));
     addRead(c, this, second, read(c, secondMask));
   }
@@ -3022,20 +3143,22 @@ class CompareEvent: public Event {
     } else {
       c->constantCompare = CompareNone;
 
-      apply(c, Compare, size, first->source, 0, size, second->source, 0);
+      apply(c, type, size, first->source, 0, size, second->source, 0);
     }
 
     popRead(c, this, first);
     popRead(c, this, second);
   }
-
+  
+  BinaryOperation type;
   unsigned size;
   Value* first;
   Value* second;
 };
 
 void
-appendCompare(Context* c, unsigned size, Value* first, Value* second)
+appendCompare(Context* c, BinaryOperation op, unsigned size, Value* first,
+              Value* second)
 {
   bool thunk;
   uint8_t firstTypeMask;
@@ -3043,15 +3166,17 @@ appendCompare(Context* c, unsigned size, Value* first, Value* second)
   uint8_t secondTypeMask;
   uint64_t secondRegisterMask;
 
-  c->arch->plan(Compare, size, &firstTypeMask, &firstRegisterMask,
-                size, &secondTypeMask, &secondRegisterMask,
-                &thunk);
+  c->arch->planSource(op, size, &firstTypeMask, &firstRegisterMask,
+                size, &thunk);
 
   assert(c, not thunk); // todo
 
+  c->arch->planDestination(op, size, &firstTypeMask, &firstRegisterMask,
+                size, &secondTypeMask, &secondRegisterMask);
+
   append(c, new (c->zone->allocate(sizeof(CompareEvent)))
          CompareEvent
-         (c, size, first, second,
+         (c, op, size, first, second,
           SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex),
           SiteMask(secondTypeMask, secondRegisterMask, AnyFrameIndex)));
 }
@@ -3072,7 +3197,11 @@ getTarget(Context* c, Value* value, Value* result, const SiteMask& resultMask)
   Site* s;
   Value* v;
   Read* r = liveNext(c, value);
-  if (c->arch->condensedAddressing() or r == 0) {
+  if (value->source->match
+      (c, static_cast<const SiteMask&>(resultMask))
+      and (r == 0 or value->source->loneMatch
+           (c, static_cast<const SiteMask&>(resultMask))))
+  {
     s = value->source;
     v = value;
     if (r and not hasMoreThanOneSite(v)) {
@@ -3116,6 +3245,13 @@ thawSource(Context* c, unsigned size, Value* v)
   }
 }
 
+uint64_t
+registerMask(Value* v) {
+  Site* s = source(v);
+  if(!s) return 0;
+  else return static_cast<uint64_t>(1) << ((RegisterSite*)s)->number;
+}
+
 class CombineEvent: public Event {
  public:
   CombineEvent(Context* c, TernaryOperation type,
@@ -3125,13 +3261,10 @@ class CombineEvent: public Event {
                const SiteMask& firstLowMask,
                const SiteMask& firstHighMask,
                const SiteMask& secondLowMask,
-               const SiteMask& secondHighMask,
-               const SiteMask& resultLowMask,
-               const SiteMask& resultHighMask):
+               const SiteMask& secondHighMask):
     Event(c), type(type), firstSize(firstSize), first(first),
     secondSize(secondSize), second(second), resultSize(resultSize),
-    result(result), resultLowMask(resultLowMask),
-    resultHighMask(resultHighMask)
+    result(result)
   {
     addRead(c, this, first, read(c, firstLowMask));
     if (firstSize > BytesPerWord) {
@@ -3142,7 +3275,7 @@ class CombineEvent: public Event {
       grow(c, result);
     }
 
-    bool condensed = c->arch->condensedAddressing();
+    bool condensed = c->arch->alwaysCondensed(type);
 
     addRead(c, this, second, read(c, secondLowMask, condensed ? result : 0));
     if (secondSize > BytesPerWord) {
@@ -3157,6 +3290,21 @@ class CombineEvent: public Event {
 
   virtual void compile(Context* c) {
     freezeSource(c, firstSize, first);
+    
+    uint8_t aTypeMask = first->source->type(c);
+    uint8_t bTypeMask = second->source->type(c);
+    uint8_t cTypeMask;
+    uint64_t aRegisterMask
+      = (registerMask(first->high) << 32) | registerMask(first);
+    uint64_t bRegisterMask
+      = (registerMask(second->high) << 32) | registerMask(second);
+    uint64_t cRegisterMask;
+    
+    c->arch->planDestination
+      (type, firstSize, &aTypeMask, &aRegisterMask, secondSize, &bTypeMask,
+       &bRegisterMask, resultSize, &cTypeMask, &cRegisterMask);
+    SiteMask resultLowMask(cTypeMask, cRegisterMask, AnyFrameIndex);
+    SiteMask resultHighMask(cTypeMask, cRegisterMask >> 32, AnyFrameIndex);
 
     Site* low = getTarget(c, second, result, resultLowMask);
     Site* high
@@ -3195,8 +3343,6 @@ class CombineEvent: public Event {
   Value* second;
   unsigned resultSize;
   Value* result;
-  SiteMask resultLowMask;
-  SiteMask resultHighMask;
 };
 
 void
@@ -3496,13 +3642,10 @@ appendCombine(Context* c, TernaryOperation type,
   uint64_t firstRegisterMask;
   uint8_t secondTypeMask;
   uint64_t secondRegisterMask;
-  uint8_t resultTypeMask;
-  uint64_t resultRegisterMask;
 
-  c->arch->plan(type, firstSize, &firstTypeMask, &firstRegisterMask,
+  c->arch->planSource(type, firstSize, &firstTypeMask, &firstRegisterMask,
                 secondSize, &secondTypeMask, &secondRegisterMask,
-                resultSize, &resultTypeMask, &resultRegisterMask,
-                &thunk);
+                resultSize, &thunk);
 
   if (thunk) {
     Stack* oldStack = c->stack;
@@ -3514,7 +3657,9 @@ appendCombine(Context* c, TernaryOperation type,
     c->stack = oldStack;
 
     appendCall
-      (c, value(c, constantSite(c, c->client->getThunk(type, resultSize))),
+      (c, value
+       (c, ValueGeneral, constantSite
+        (c, c->client->getThunk(type, firstSize, resultSize))),
        0, 0, result, resultSize, argumentStack,
        ceiling(secondSize, BytesPerWord) + ceiling(firstSize, BytesPerWord),
        0);
@@ -3529,24 +3674,20 @@ appendCombine(Context* c, TernaryOperation type,
         SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex),
         SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex),
         SiteMask(secondTypeMask, secondRegisterMask, AnyFrameIndex),
-        SiteMask(secondTypeMask, secondRegisterMask >> 32, AnyFrameIndex),
-        SiteMask(resultTypeMask, resultRegisterMask, AnyFrameIndex),
-        SiteMask(resultTypeMask, resultRegisterMask >> 32, AnyFrameIndex)));
+        SiteMask(secondTypeMask, secondRegisterMask >> 32, AnyFrameIndex)));
   }
 }
 
 class TranslateEvent: public Event {
  public:
-  TranslateEvent(Context* c, BinaryOperation type, unsigned size, Value* value,
-                 Value* result,
+  TranslateEvent(Context* c, BinaryOperation type, unsigned size,
+                 unsigned resSize, Value* value, Value* result,
                  const SiteMask& valueLowMask,
-                 const SiteMask& valueHighMask,
-                 const SiteMask& resultLowMask,
-                 const SiteMask& resultHighMask):
-    Event(c), type(type), size(size), value(value), result(result),
-    resultLowMask(resultLowMask), resultHighMask(resultHighMask)
+                 const SiteMask& valueHighMask):
+    Event(c), type(type), size(size), resSize(resSize), value(value),
+    result(result)
   {
-    bool condensed = c->arch->condensedAddressing();
+    bool condensed = c->arch->alwaysCondensed(type);
 
     addRead(c, this, value, read(c, valueLowMask, condensed ? result : 0));
     if (size > BytesPerWord) {
@@ -3561,6 +3702,18 @@ class TranslateEvent: public Event {
   }
 
   virtual void compile(Context* c) {
+    uint8_t aTypeMask = value->source->type(c);
+    uint8_t bTypeMask;
+    uint64_t aRegisterMask
+      = (registerMask(value->high) << 32) | registerMask(value);
+    uint64_t bRegisterMask;
+    
+    c->arch->planDestination
+      (type, size, &aTypeMask, &aRegisterMask, resSize, &bTypeMask,
+       &bRegisterMask);
+    SiteMask resultLowMask(bTypeMask, bRegisterMask, AnyFrameIndex);
+    SiteMask resultHighMask(bTypeMask, bRegisterMask >> 32, AnyFrameIndex);
+    
     Site* low = getTarget(c, value, result, resultLowMask);
     Site* high
       = (size > BytesPerWord
@@ -3569,7 +3722,7 @@ class TranslateEvent: public Event {
 
     apply(c, type,
           size, value->source, source(value->high),
-          size, low, high);
+          resSize, low, high);
 
     for (Read* r = reads; r; r = r->eventNext) {
       popRead(c, this, r->value);
@@ -3590,6 +3743,7 @@ class TranslateEvent: public Event {
 
   BinaryOperation type;
   unsigned size;
+  unsigned resSize;
   Value* value;
   Value* result;
   Read* resultRead;
@@ -3598,28 +3752,37 @@ class TranslateEvent: public Event {
 };
 
 void
-appendTranslate(Context* c, BinaryOperation type, unsigned size, Value* value,
-                Value* result)
+appendTranslate(Context* c, BinaryOperation type, unsigned firstSize,
+                Value* first, unsigned resultSize, Value* result)
 {
   bool thunk;
   uint8_t firstTypeMask;
   uint64_t firstRegisterMask;
-  uint8_t resultTypeMask;
-  uint64_t resultRegisterMask;
 
-  c->arch->plan(type, size, &firstTypeMask, &firstRegisterMask,
-                size, &resultTypeMask, &resultRegisterMask,
-                &thunk);
+  c->arch->planSource(type, firstSize, &firstTypeMask, &firstRegisterMask,
+                resultSize, &thunk);
 
-  assert(c, not thunk); // todo
+  if (thunk) {
+    Stack* oldStack = c->stack;
 
-  append(c, new (c->zone->allocate(sizeof(TranslateEvent)))
-         TranslateEvent
-         (c, type, size, value, result,
-          SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex),
-          SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex),
-          SiteMask(resultTypeMask, resultRegisterMask, AnyFrameIndex),
-          SiteMask(resultTypeMask, resultRegisterMask >> 32, AnyFrameIndex)));
+    local::push(c, ceiling(firstSize, BytesPerWord), first, false);
+
+    Stack* argumentStack = c->stack;
+    c->stack = oldStack;
+
+    appendCall
+      (c, value
+       (c, ValueGeneral, constantSite
+        (c, c->client->getThunk(type, firstSize, resultSize))),
+       0, 0, result, resultSize, argumentStack,
+       ceiling(firstSize, BytesPerWord), 0);
+  } else {
+    append(c, new (c->zone->allocate(sizeof(TranslateEvent)))
+           TranslateEvent
+           (c, type, firstSize, resultSize, first, result,
+            SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex),
+            SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex)));
+  }
 }
 
 class BarrierEvent: public Event {
@@ -3652,9 +3815,9 @@ class MemoryEvent: public Event {
     Event(c), base(base), displacement(displacement), index(index),
     scale(scale), result(result)
   {
-    addRead(c, this, base, anyRegisterRead(c));
+    addRead(c, this, base, generalRegisterRead(c));
     if (index) {
-      addRead(c, this, index, registerOrConstantRead(c));
+      addRead(c, this, index, generalRegisterOrConstantRead(c));
     }
   }
 
@@ -3836,8 +3999,8 @@ class BoundsCheckEvent: public Event {
     Event(c), object(object), lengthOffset(lengthOffset), index(index),
     handler(handler)
   {
-    addRead(c, this, object, anyRegisterRead(c));
-    addRead(c, this, index, registerOrConstantRead(c));
+    addRead(c, this, object, generalRegisterRead(c));
+    addRead(c, this, index, generalRegisterOrConstantRead(c));
   }
 
   virtual const char* name() {
@@ -3975,7 +4138,8 @@ class BuddyEvent: public Event {
   BuddyEvent(Context* c, Value* original, Value* buddy):
     Event(c), original(original), buddy(buddy)
   {
-    addRead(c, this, original, read(c, SiteMask(~0, ~0, AnyFrameIndex)));
+    addRead(c, this, original, read
+            (c, SiteMask(~0, c->arch->allRegisters(), AnyFrameIndex)));
   }
 
   virtual const char* name() {
@@ -4153,7 +4317,7 @@ acceptForResolve(Context* c, Site* s, Read* read, const SiteMask& mask)
 {
   if (acceptMatch(c, s, read, mask) and (not s->frozen(c))) {
     if (s->type(c) == RegisterOperand) {
-      return c->availableRegisterCount > ResolveRegisterReserveCount;
+      return c->generalRegisterCount > ResolveRegisterReserveCount;
     } else {
       assert(c, s->match(c, SiteMask(1 << MemoryOperand, 0, AnyFrameIndex)));
 
@@ -4172,7 +4336,7 @@ pickSourceSite(Context* c, Read* read, Site* target = 0,
                bool (*accept)(Context*, Site*, Read*, const SiteMask&)
                = acceptMatch)
 {
-  SiteMask mask(typeMask, ~0, AnyFrameIndex);
+  SiteMask mask(typeMask, c->arch->allRegisters(), AnyFrameIndex);
 
   if (intersectRead) {
     read->intersect(&mask);
@@ -4595,7 +4759,6 @@ populateSources(Context* c, Event* e)
 
   for (Read* r = e->reads; r; r = r->eventNext) {
     r->value->source = readSource(c, r);
-
     if (r->value->source) {
       if (DebugReads) {
         char buffer[256]; r->value->source->toString(c, buffer, 256);
@@ -4925,7 +5088,7 @@ Value*
 maybeBuddy(Context* c, Value* v)
 {
   if (v->home >= 0) {
-    Value* n = value(c);
+    Value* n = value(c, v->type);
     appendBuddy(c, v, n);
     return n;
   } else {
@@ -5116,7 +5279,7 @@ class MyCompiler: public Compiler {
       for (unsigned li = 0; li < c.localFootprint; ++li) {
         Local* local = c.locals + li;
         if (local->value == 0) {
-          initLocal(1, li);
+          initLocal(1, li, IntegerType); 
         }
       }
     }
@@ -5149,24 +5312,26 @@ class MyCompiler: public Compiler {
     return p;
   }
 
-  virtual Operand* constant(int64_t value) {
-    return promiseConstant(resolved(&c, value));
+  virtual Operand* constant(int64_t value, OperandType type) {
+    return promiseConstant(resolved(&c, value), type);
   }
 
-  virtual Operand* promiseConstant(Promise* value) {
-    return local::value(&c, local::constantSite(&c, value));
+  virtual Operand* promiseConstant(Promise* value, OperandType type) {
+    return local::value
+      (&c, valueType(&c, type), local::constantSite(&c, value));
   }
 
   virtual Operand* address(Promise* address) {
-    return value(&c, local::addressSite(&c, address));
+    return value(&c, ValueGeneral, local::addressSite(&c, address));
   }
 
   virtual Operand* memory(Operand* base,
+                          OperandType type,
                           int displacement = 0,
                           Operand* index = 0,
                           unsigned scale = 1)
   {
-    Value* result = value(&c);
+    Value* result = value(&c, valueType(&c, type));
 
     appendMemory(&c, static_cast<Value*>(base), displacement,
                  static_cast<Value*>(index), scale, result);
@@ -5175,8 +5340,13 @@ class MyCompiler: public Compiler {
   }
 
   virtual Operand* register_(int number) {
+    assert(&c, (1 << number) & c.arch->allRegisters());
+  	
     Site* s = registerSite(&c, number);
-    return value(&c, s, s);
+    ValueType type = ((1 << number) & c.arch->floatRegisters())
+      ? ValueFloat: ValueGeneral;
+
+    return value(&c, type, s, s);
   }
 
   Promise* machineIp() {
@@ -5186,8 +5356,9 @@ class MyCompiler: public Compiler {
   virtual void push(unsigned footprint UNUSED) {
     assert(&c, footprint == 1);
 
-    Value* v = value(&c);
+    Value* v = value(&c, ValueFloat);
     Stack* s = local::stack(&c, v, c.stack);
+
     v->home = frameIndex(&c, s->index + c.localFootprint);
     c.stack = s;
   }
@@ -5211,7 +5382,7 @@ class MyCompiler: public Compiler {
   }
 
   virtual void pushed() {
-    Value* v = value(&c);
+    Value* v = value(&c, ValueFloat);
     appendFrameSite
       (&c, v, frameIndex
        (&c, (c.stack ? c.stack->index : 0) + c.localFootprint));
@@ -5275,6 +5446,7 @@ class MyCompiler: public Compiler {
                         unsigned flags,
                         TraceHandler* traceHandler,
                         unsigned resultSize,
+                        OperandType resultType,
                         unsigned argumentCount,
                         ...)
   {
@@ -5312,7 +5484,7 @@ class MyCompiler: public Compiler {
         (&c, RUNTIME_ARRAY_BODY(arguments)[i], argumentStack);
     }
 
-    Value* result = value(&c);
+    Value* result = value(&c, valueType(&c, resultType));
     appendCall(&c, static_cast<Value*>(address), flags, traceHandler, result,
                resultSize, argumentStack, index, 0);
 
@@ -5323,9 +5495,10 @@ class MyCompiler: public Compiler {
                              unsigned flags,
                              TraceHandler* traceHandler,
                              unsigned resultSize,
+                             OperandType resultType,
                              unsigned argumentFootprint)
   {
-    Value* result = value(&c);
+    Value* result = value(&c, valueType(&c, resultType));
     appendCall(&c, static_cast<Value*>(address), flags, traceHandler, result,
                resultSize, c.stack, 0, argumentFootprint);
     return result;
@@ -5335,10 +5508,11 @@ class MyCompiler: public Compiler {
     appendReturn(&c, size, static_cast<Value*>(value));
   }
 
-  virtual void initLocal(unsigned footprint, unsigned index) {
+  virtual void initLocal(unsigned footprint, unsigned index, OperandType type)
+  {
     assert(&c, index + footprint <= c.localFootprint);
 
-    Value* v = value(&c);
+    Value* v = value(&c, valueType(&c, type));
 
     if (footprint > 1) {
       assert(&c, footprint == 2);
@@ -5354,7 +5528,7 @@ class MyCompiler: public Compiler {
       }
 
       if (BytesPerWord == 4) {
-        initLocal(1, highIndex);
+        initLocal(1, highIndex, type);
         v->high = c.locals[highIndex].value;
       }
 
@@ -5385,7 +5559,8 @@ class MyCompiler: public Compiler {
     for (int i = 0; i < static_cast<int>(c.localFootprint); ++i) {
       Local* local = e->localsBefore + i;
       if (local->value) {
-        initLocal(1, i);
+        initLocal
+          (1, i, local->value->type == ValueGeneral ? IntegerType : FloatType);
       }
     }
 
@@ -5435,7 +5610,7 @@ class MyCompiler: public Compiler {
   {
     assert(&c, dstSize >= BytesPerWord);
 
-    Value* dst = value(&c);
+    Value* dst = value(&c, static_cast<Value*>(src)->type);
     appendMove(&c, Move, srcSize, srcSelectSize, static_cast<Value*>(src),
                dstSize, dst);
     return dst;
@@ -5446,24 +5621,36 @@ class MyCompiler: public Compiler {
   {
     assert(&c, dstSize >= BytesPerWord);
 
-    Value* dst = value(&c);
+    Value* dst = value(&c, static_cast<Value*>(src)->type);
     appendMove(&c, MoveZ, srcSize, srcSelectSize, static_cast<Value*>(src),
                dstSize, dst);
     return dst;
   }
 
   virtual Operand* lcmp(Operand* a, Operand* b) {
-    Value* result = value(&c);
+    assert(&c, static_cast<Value*>(a)->type == ValueGeneral
+           and static_cast<Value*>(b)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, LongCompare, 8, static_cast<Value*>(a),
                   8, static_cast<Value*>(b), 8, result);
     return result;
   }
 
   virtual void cmp(unsigned size, Operand* a, Operand* b) {
-    appendCompare(&c, size, static_cast<Value*>(a),
+    assert(&c, static_cast<Value*>(a)->type == ValueGeneral
+           and static_cast<Value*>(b)->type == ValueGeneral);
+    appendCompare(&c, Compare, size, static_cast<Value*>(a),
                   static_cast<Value*>(b));
   }
 
+  virtual void fcmp(unsigned size, Operand* a, Operand* b) {
+    assert(&c, static_cast<Value*>(a)->type == ValueFloat
+           and static_cast<Value*>(b)->type == ValueFloat);
+    appendCompare(&c, FloatCompare, size, static_cast<Value*>(a),
+                  static_cast<Value*>(b));
+  }
+  
+
   virtual void jl(Operand* address) {
     appendBranch(&c, JumpIfLess, static_cast<Value*>(address));
   }
@@ -5488,6 +5675,34 @@ class MyCompiler: public Compiler {
     appendBranch(&c, JumpIfNotEqual, static_cast<Value*>(address));
   }
 
+  virtual void fjl(Operand* address) {
+    appendBranch(&c, JumpIfFloatLess, static_cast<Value*>(address));
+  }
+
+  virtual void fjg(Operand* address) {
+    appendBranch(&c, JumpIfFloatGreater, static_cast<Value*>(address));
+  }
+
+  virtual void fjle(Operand* address) {
+    appendBranch(&c, JumpIfFloatLessOrEqual, static_cast<Value*>(address));
+  }
+
+  virtual void fjge(Operand* address) {
+    appendBranch(&c, JumpIfFloatGreaterOrEqual, static_cast<Value*>(address));
+  }
+
+  virtual void fje(Operand* address) {
+    appendBranch(&c, JumpIfFloatEqual, static_cast<Value*>(address));
+  }
+
+  virtual void fjne(Operand* address) {
+    appendBranch(&c, JumpIfFloatNotEqual, static_cast<Value*>(address));
+  }
+  
+  virtual void fjuo(Operand* address) {
+    appendBranch(&c, JumpIfFloatUnordered, static_cast<Value*>(address));
+  }
+
   virtual void jmp(Operand* address) {
     appendBranch(&c, Jump, static_cast<Value*>(address));
   }
@@ -5497,85 +5712,202 @@ class MyCompiler: public Compiler {
   }
 
   virtual Operand* add(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
+    assert(&c, static_cast<Value*>(a)->type == ValueGeneral
+           and static_cast<Value*>(b)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, Add, size, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* sub(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
+    assert(&c, static_cast<Value*>(a)->type == ValueGeneral
+           and static_cast<Value*>(b)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, Subtract, size, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* mul(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
+    assert(&c, static_cast<Value*>(a)->type == ValueGeneral
+           and static_cast<Value*>(b)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, Multiply, size, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* div(unsigned size, Operand* a, Operand* b)  {
-    Value* result = value(&c);
+    assert(&c, static_cast<Value*>(a)->type == ValueGeneral
+           and static_cast<Value*>(b)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, Divide, size, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* rem(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
+    assert(&c, static_cast<Value*>(a)->type == ValueGeneral
+           and static_cast<Value*>(b)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, Remainder, size, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
+  virtual Operand* fadd(unsigned size, Operand* a, Operand* b) {
+    assert(&c, static_cast<Value*>(a)->type == ValueFloat
+           and static_cast<Value*>(b)->type == ValueFloat);
+    Value* result = value(&c, ValueFloat);
+    static_cast<Value*>(a)->type = static_cast<Value*>(b)->type = ValueFloat;
+    appendCombine(&c, FloatAdd, size, static_cast<Value*>(a),
+                  size, static_cast<Value*>(b), size, result);
+    return result;
+  }
+
+  virtual Operand* fsub(unsigned size, Operand* a, Operand* b) {
+    assert(&c, static_cast<Value*>(a)->type == ValueFloat
+           and static_cast<Value*>(b)->type == ValueFloat);
+    Value* result = value(&c, ValueFloat);
+    static_cast<Value*>(a)->type = static_cast<Value*>(b)->type = ValueFloat;
+    appendCombine(&c, FloatSubtract, size, static_cast<Value*>(a),
+                  size, static_cast<Value*>(b), size, result);
+    return result;
+  }
+
+  virtual Operand* fmul(unsigned size, Operand* a, Operand* b) {
+    assert(&c, static_cast<Value*>(a)->type == ValueFloat
+           and static_cast<Value*>(b)->type == ValueFloat);
+    Value* result = value(&c, ValueFloat);
+    static_cast<Value*>(a)->type = static_cast<Value*>(b)->type = ValueFloat;
+    appendCombine(&c, FloatMultiply, size, static_cast<Value*>(a),
+                  size, static_cast<Value*>(b), size, result);
+    return result;
+  }
+
+  virtual Operand* fdiv(unsigned size, Operand* a, Operand* b)  {
+    assert(&c, static_cast<Value*>(a)->type == ValueFloat
+           and static_cast<Value*>(b)->type == ValueFloat);
+    Value* result = value(&c, ValueFloat);
+    appendCombine(&c, FloatDivide, size, static_cast<Value*>(a),
+                  size, static_cast<Value*>(b), size, result);
+    return result;
+  }
+
+  virtual Operand* frem(unsigned size, Operand* a, Operand* b) {
+    assert(&c, static_cast<Value*>(a)->type == ValueFloat
+           and static_cast<Value*>(b)->type == ValueFloat);
+    Value* result = value(&c, ValueFloat);
+    appendCombine(&c, FloatRemainder, size, static_cast<Value*>(a),
+                  size, static_cast<Value*>(b), size, result);
+    return result;
+  }
+
   virtual Operand* shl(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
+  	assert(&c, static_cast<Value*>(a)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, ShiftLeft, BytesPerWord, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* shr(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
+  	assert(&c, static_cast<Value*>(a)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, ShiftRight, BytesPerWord, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* ushr(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
-    appendCombine(&c, UnsignedShiftRight, BytesPerWord, static_cast<Value*>(a),
-                  size, static_cast<Value*>(b), size, result);
+  	assert(&c, static_cast<Value*>(a)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
+    appendCombine
+      (&c, UnsignedShiftRight, BytesPerWord, static_cast<Value*>(a), size,
+       static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* and_(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
+  	assert(&c, static_cast<Value*>(a)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, And, size, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* or_(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
+  	assert(&c, static_cast<Value*>(a)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, Or, size, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* xor_(unsigned size, Operand* a, Operand* b) {
-    Value* result = value(&c);
+  	assert(&c, static_cast<Value*>(a)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
     appendCombine(&c, Xor, size, static_cast<Value*>(a),
                   size, static_cast<Value*>(b), size, result);
     return result;
   }
 
   virtual Operand* neg(unsigned size, Operand* a) {
-    Value* result = value(&c);
-    appendTranslate(&c, Negate, size, static_cast<Value*>(a), result);
+  	assert(&c, static_cast<Value*>(a)->type == ValueGeneral);
+    Value* result = value(&c, ValueGeneral);
+    appendTranslate(&c, Negate, size, static_cast<Value*>(a), size, result);
+    return result;
+  }
+
+  virtual Operand* fneg(unsigned size, Operand* a) {
+    assert(&c, static_cast<Value*>(a)->type == ValueFloat);
+    Value* result = value(&c, ValueFloat);
+    appendTranslate
+      (&c, FloatNegate, size, static_cast<Value*>(a), size, result);
+    return result;
+  }
+  
+  virtual Operand* operation(BinaryOperation op, unsigned aSize,
+                             unsigned resSize, OperandType resType, Operand* a)
+  {
+    Value* result = value(&c, valueType(&c, resType));
+    appendTranslate(&c, op, aSize, static_cast<Value*>(a), resSize, result);
+    return result;
+  }
+  
+  virtual Operand* operation(TernaryOperation op, unsigned aSize,
+                             unsigned bSize, unsigned resSize,
+                             OperandType resType, Operand* a, Operand* b)
+  {
+    Value* result = value(&c, valueType(&c, resType));
+    appendCombine
+      (&c, op, aSize, static_cast<Value*>(a), bSize, static_cast<Value*>(b),
+       resSize, result);
+    return result;
+  }
+  
+  virtual Operand* f2f(unsigned aSize, unsigned resSize, Operand* a) {
+    assert(&c, static_cast<Value*>(a)->type == ValueFloat);
+    Value* result = value(&c, ValueFloat);
+    appendTranslate
+      (&c, Float2Float, aSize, static_cast<Value*>(a), resSize, result);
+    return result;
+  }
+  
+  virtual Operand* f2i(unsigned aSize, unsigned resSize, Operand* a) {
+    assert(&c, static_cast<Value*>(a)->type == ValueFloat);
+    Value* result = value(&c, ValueGeneral);
+    appendTranslate
+      (&c, Float2Int, aSize, static_cast<Value*>(a), resSize, result);
+    return result;
+  }
+  
+  virtual Operand* i2f(unsigned aSize, unsigned resSize, Operand* a) {
+    assert(&c, static_cast<Value*>(a)->type == ValueGeneral);
+    Value* result = value(&c, ValueFloat);
+    appendTranslate
+      (&c, Int2Float, aSize, static_cast<Value*>(a), resSize, result);
     return result;
   }
 
diff --git a/src/compiler.h b/src/compiler.h
index 8c40def7cc..0d4f9fa8fc 100644
--- a/src/compiler.h
+++ b/src/compiler.h
@@ -27,13 +27,24 @@ class Compiler {
   class Client {
    public:
     virtual intptr_t getThunk(UnaryOperation op, unsigned size) = 0;
-    virtual intptr_t getThunk(TernaryOperation op, unsigned size) = 0;
+    virtual intptr_t getThunk(BinaryOperation op, unsigned size,
+                              unsigned resultSize) = 0;
+    virtual intptr_t getThunk(TernaryOperation op, unsigned size,
+                              unsigned resultSize) = 0;
   };
   
   static const unsigned Aligned  = 1 << 0;
   static const unsigned NoReturn = 1 << 1;
   static const unsigned TailJump = 1 << 2;
 
+  enum OperandType {
+    ObjectType,
+    AddressType,
+    IntegerType,
+    FloatType,
+    VoidType
+  };
+
   class Operand { };
   class State { };
   class Subroutine { };
@@ -56,10 +67,11 @@ class Compiler {
   virtual Promise* poolAppend(intptr_t value) = 0;
   virtual Promise* poolAppendPromise(Promise* value) = 0;
 
-  virtual Operand* constant(int64_t value) = 0;
-  virtual Operand* promiseConstant(Promise* value) = 0;
+  virtual Operand* constant(int64_t value, OperandType type) = 0;
+  virtual Operand* promiseConstant(Promise* value, OperandType type) = 0;
   virtual Operand* address(Promise* address) = 0;
   virtual Operand* memory(Operand* base,
+                          OperandType type,
                           int displacement = 0,
                           Operand* index = 0,
                           unsigned scale = 1) = 0;
@@ -79,6 +91,7 @@ class Compiler {
                         unsigned flags,
                         TraceHandler* traceHandler,
                         unsigned resultSize,
+                        OperandType resultType,
                         unsigned argumentCount,
                         ...) = 0;
 
@@ -86,11 +99,12 @@ class Compiler {
                              unsigned flags,
                              TraceHandler* traceHandler,
                              unsigned resultSize,
+                             OperandType resultType,
                              unsigned argumentFootprint) = 0;
 
   virtual void return_(unsigned size, Operand* value) = 0;
 
-  virtual void initLocal(unsigned size, unsigned index) = 0;
+  virtual void initLocal(unsigned size, unsigned index, OperandType type) = 0;
   virtual void initLocalsFromLogicalIp(unsigned logicalIp) = 0;
   virtual void storeLocal(unsigned footprint, Operand* src,
                           unsigned index) = 0;
@@ -108,12 +122,20 @@ class Compiler {
                          unsigned dstSize) = 0;
   virtual Operand* lcmp(Operand* a, Operand* b) = 0;
   virtual void cmp(unsigned size, Operand* a, Operand* b) = 0;
+  virtual void fcmp(unsigned size, Operand* a, Operand* b) = 0;
   virtual void jl(Operand* address) = 0;
   virtual void jg(Operand* address) = 0;
   virtual void jle(Operand* address) = 0;
   virtual void jge(Operand* address) = 0;
   virtual void je(Operand* address) = 0;
   virtual void jne(Operand* address) = 0;
+  virtual void fjl(Operand* address) = 0;
+  virtual void fjg(Operand* address) = 0;
+  virtual void fjle(Operand* address) = 0;
+  virtual void fjge(Operand* address) = 0;
+  virtual void fje(Operand* address) = 0;
+  virtual void fjne(Operand* address) = 0;
+  virtual void fjuo(Operand* address) = 0;
   virtual void jmp(Operand* address) = 0;
   virtual void exit(Operand* address) = 0;
   virtual Operand* add(unsigned size, Operand* a, Operand* b) = 0;
@@ -121,6 +143,11 @@ class Compiler {
   virtual Operand* mul(unsigned size, Operand* a, Operand* b) = 0;
   virtual Operand* div(unsigned size, Operand* a, Operand* b) = 0;
   virtual Operand* rem(unsigned size, Operand* a, Operand* b) = 0;
+  virtual Operand* fadd(unsigned size, Operand* a, Operand* b) = 0;
+  virtual Operand* fsub(unsigned size, Operand* a, Operand* b) = 0;
+  virtual Operand* fmul(unsigned size, Operand* a, Operand* b) = 0;
+  virtual Operand* fdiv(unsigned size, Operand* a, Operand* b) = 0;
+  virtual Operand* frem(unsigned size, Operand* a, Operand* b) = 0;
   virtual Operand* shl(unsigned size, Operand* a, Operand* b) = 0;
   virtual Operand* shr(unsigned size, Operand* a, Operand* b) = 0;
   virtual Operand* ushr(unsigned size, Operand* a, Operand* b) = 0;
@@ -128,6 +155,16 @@ class Compiler {
   virtual Operand* or_(unsigned size, Operand* a, Operand* b) = 0;
   virtual Operand* xor_(unsigned size, Operand* a, Operand* b) = 0;
   virtual Operand* neg(unsigned size, Operand* a) = 0;
+  virtual Operand* fneg(unsigned size, Operand* a) = 0;
+  virtual Operand* operation(BinaryOperation op, unsigned aSize,
+                             unsigned resSize, OperandType resType,
+                             Operand* a) = 0;
+  virtual Operand* operation(TernaryOperation op, unsigned aSize,
+                             unsigned bSize, unsigned resSize,
+                             OperandType resType, Operand* a, Operand* b) = 0;
+  virtual Operand* f2f(unsigned aSize, unsigned resSize, Operand* a) = 0;
+  virtual Operand* f2i(unsigned aSize, unsigned resSize, Operand* a) = 0;
+  virtual Operand* i2f(unsigned aSize, unsigned resSize, Operand* a) = 0;
 
   virtual void loadBarrier() = 0;
   virtual void storeStoreBarrier() = 0;
diff --git a/src/machine.cpp b/src/machine.cpp
index 3929d853b1..271589d60b 100644
--- a/src/machine.cpp
+++ b/src/machine.cpp
@@ -826,8 +826,13 @@ object
 parsePool(Thread* t, Stream& s)
 {
   unsigned count = s.read2() - 1;
-
-  object pool = makeSingletonOfSize(t, count);
+  unsigned old;
+  unsigned floatMaskSize = 0;
+  do {
+  	old = floatMaskSize;
+    floatMaskSize = singletonMaskSize(count + floatMaskSize);
+  } while (floatMaskSize != old);
+  object pool = makeSingletonOfSize(t, count + floatMaskSize);
   PROTECT(t, pool);
 
   if (count) {
@@ -839,12 +844,18 @@ parsePool(Thread* t, Stream& s)
       switch (s.read1()) {
       case CONSTANT_Class:
       case CONSTANT_String:
+        assert(t, !singletonIsFloat(t, pool, i));
         singletonMarkObject(t, pool, i);
         s.skip(2);
         break;
 
       case CONSTANT_Integer:
+        assert(t, !singletonIsFloat(t, pool, i));
+        s.skip(4);
+        break;
       case CONSTANT_Float:
+        singletonMarkBit(t, pool, count, i);
+        assert(t, singletonIsFloat(t, pool, i));
         s.skip(4);
         break;
 
@@ -852,17 +863,27 @@ parsePool(Thread* t, Stream& s)
       case CONSTANT_Fieldref:
       case CONSTANT_Methodref:
       case CONSTANT_InterfaceMethodref:
+        assert(t, !singletonIsFloat(t, pool, i));
         singletonMarkObject(t, pool, i);
         s.skip(4);
         break;
 
       case CONSTANT_Long:
+        assert(t, !singletonIsFloat(t, pool, i));
+        s.skip(8);
+        ++ i;
+        break;
       case CONSTANT_Double:
+        singletonMarkBit(t, pool, count, i);
+        singletonMarkBit(t, pool, count, i + 1);
+        assert(t, singletonIsFloat(t, pool, i));
+        assert(t, singletonIsFloat(t, pool, i + 1));
         s.skip(8);
         ++ i;
         break;
 
       case CONSTANT_Utf8:
+        assert(t, !singletonIsFloat(t, pool, i));
         singletonMarkObject(t, pool, i);
         s.skip(s.read2());
         break;
diff --git a/src/machine.h b/src/machine.h
index 125198745b..3b485cf20a 100644
--- a/src/machine.h
+++ b/src/machine.h
@@ -2530,6 +2530,26 @@ makeSingletonOfSize(Thread* t, unsigned count)
   return o;
 }
 
+inline void
+singletonMarkBit(Thread* t, object singleton, unsigned start, unsigned index)
+{
+  uintptr_t& val = singletonValue(t, singleton, start + (index / BitsPerWord));
+  val |= static_cast<uintptr_t>(1) << (index % BitsPerWord);
+}
+
+inline bool
+singletonGetBit(Thread* t, object singleton, unsigned start, unsigned index)
+{
+  uintptr_t& val = singletonValue(t, singleton, start + (index / BitsPerWord));
+  return (val & static_cast<uintptr_t>(1) << (index % BitsPerWord)) != 0;
+}
+
+inline bool
+singletonIsFloat(Thread* t, object singleton, unsigned index)
+{
+  return singletonGetBit(t, singleton, singletonLength(t, singleton) - 2 * singletonMaskSize(t, singleton), index);
+}
+
 inline object
 resolveClassInObject(Thread* t, object loader, object container,
                      unsigned classOffset)
diff --git a/src/powerpc.cpp b/src/powerpc.cpp
index 073dc8bbf7..f11efa9d12 100644
--- a/src/powerpc.cpp
+++ b/src/powerpc.cpp
@@ -1680,6 +1680,14 @@ class MyArchitecture: public Assembler::Architecture {
     return 32;
   }
 
+  virtual unsigned generalRegisterCount() {
+    return 32;
+  }
+
+  virtual unsigned floatRegisterCount() {
+    return 0;
+  }
+
   virtual int stack() {
     return StackRegister;
   }
@@ -1704,10 +1712,6 @@ class MyArchitecture: public Assembler::Architecture {
     return 3;
   }
 
-  virtual bool condensedAddressing() {
-    return false;
-  }
-
   virtual bool bigEndian() {
     return true;
   }
@@ -1741,6 +1745,18 @@ class MyArchitecture: public Assembler::Architecture {
 
     return index + 3;
   }
+  
+  virtual uint64_t generalRegisters() {
+  	return (static_cast<uint64_t>(1) << 32) - 1;
+  }
+  
+  virtual uint64_t floatRegisters() {
+  	return 0;
+  }
+  
+  virtual uint64_t allRegisters() {
+  	return generalRegisters() | floatRegisters();
+  }
 
   virtual unsigned stackAlignmentInWords() {
     return StackAlignmentInWords;
@@ -1823,6 +1839,26 @@ class MyArchitecture: public Assembler::Architecture {
     *stack = *static_cast<void**>(*stack);
   }
 
+  virtual BinaryOperation hasBinaryIntrinsic(Thread*, object) {
+  	return NoBinaryOperation;
+  }
+  
+  virtual TernaryOperation hasTernaryIntrinsic(Thread*, object) {
+  	return NoTernaryOperation;
+  }
+  
+  virtual bool supportsFloatCompare(unsigned) {
+    return false;
+  }
+  
+  virtual bool alwaysCondensed(BinaryOperation) {
+    return false;
+  }
+  
+  virtual bool alwaysCondensed(TernaryOperation) {
+    return false;
+  }
+  
   virtual void plan
   (UnaryOperation,
    unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask,
@@ -1833,42 +1869,62 @@ class MyArchitecture: public Assembler::Architecture {
     *thunk = false;
   }
 
-  virtual void plan
+  virtual void planSource
   (BinaryOperation op,
    unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask,
-   unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask,
-   bool* thunk)
+   unsigned, bool* thunk)
   {
     *aTypeMask = ~0;
     *aRegisterMask = ~static_cast<uint64_t>(0);
 
-    *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
-    *bRegisterMask = ~static_cast<uint64_t>(0);
-
     *thunk = false;
 
     switch (op) {
     case Compare:
       *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand);
-      *bTypeMask = (1 << RegisterOperand);
       break;
 
     case Negate:
       *aTypeMask = (1 << RegisterOperand);
+      break;
+    case FloatCompare:
+    case FloatNegate:
+    case Float2Float:
+    case Float2Int:
+    case Int2Float:
+      *thunk = true;
+      break;
+    default:
+      break;
+    }
+  }
+  
+  virtual void planDestination
+  (BinaryOperation op,
+   unsigned, const uint8_t*, const uint64_t*,
+   unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask)
+  {
+    *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
+    *bRegisterMask = ~static_cast<uint64_t>(0);
+
+    switch (op) {
+    case Compare:
       *bTypeMask = (1 << RegisterOperand);
       break;
 
+    case Negate:
+      *bTypeMask = (1 << RegisterOperand);
+      break;
     default:
       break;
     }
   }
 
-  virtual void plan
+  virtual void planSource
   (TernaryOperation op,
    unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
    unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask,
-   unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask,
-   bool* thunk)
+   unsigned, bool* thunk)
   {
     *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand);
     *aRegisterMask = ~static_cast<uint64_t>(0);
@@ -1904,12 +1960,27 @@ class MyArchitecture: public Assembler::Architecture {
       }
       break;
 
+    case FloatAdd:
+    case FloatSubtract:
+    case FloatMultiply:
+    case FloatDivide:
+    case FloatRemainder:
+      *bTypeMask = ~0;
+      *thunk = true;
+      break;
     default:
       break;
     }
+  }
 
-    *cTypeMask = *bTypeMask;
-    *cRegisterMask = *bRegisterMask;
+  virtual void planDestination
+  (TernaryOperation,
+   unsigned, const uint8_t*, const uint64_t*,
+   unsigned, const uint8_t*, const uint64_t*,
+   unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask)
+  {
+    *cTypeMask = (1 << RegisterOperand);
+    *cRegisterMask = ~static_cast<uint64_t>(0);
   }
 
   virtual void acquire() {
diff --git a/src/x86.S b/src/x86.S
index 1d27e700e0..5b11db56d6 100644
--- a/src/x86.S
+++ b/src/x86.S
@@ -24,6 +24,37 @@
 #ifdef __x86_64__
 
 #ifdef __MINGW32__
+.globl GLOBAL(detectFeature)
+GLOBAL(detectFeature):
+   pushq %rbp
+   movq %rsp, %rbp
+   pushq %rdx
+   pushq %rcx
+   pushq %rbx
+   pushq %rsi
+   pushq %rdi
+   movl %ecx, %edi
+   movl %edx, %esi
+   movl $1, %eax
+   cpuid
+   andl %esi, %edx
+   andl %edi, %ecx
+   orl %edx, %ecx
+   test %ecx, %ecx
+   je LOCAL(NOSSE)
+   movl $1, %eax
+   jmp LOCAL(SSEEND)
+LOCAL(NOSSE):
+   movl $0, %eax
+LOCAL(SSEEND):
+   popq %rdi
+   popq %rsi
+   popq %rbx
+   popq %rcx
+   popq %rdx
+   movq %rbp,%rsp
+   popq %rbp
+   ret
 
 .globl GLOBAL(vmNativeCall)
 GLOBAL(vmNativeCall):
@@ -144,6 +175,31 @@ GLOBAL(vmJump):
    jmp    *%rcx
    
 #else // not __MINGW32__
+.globl GLOBAL(detectFeature)
+GLOBAL(detectFeature):
+   pushq %rbp
+   movq %rsp, %rbp
+   pushq %rdx
+   pushq %rcx
+   pushq %rbx
+   movl $1, %eax
+   cpuid
+   andl %esi, %edx
+   andl %edi, %ecx
+   orl %edx, %ecx
+   test %ecx, %ecx
+   je LOCAL(NOSSE)
+   movl $1, %eax
+   jmp LOCAL(SSEEND)
+LOCAL(NOSSE):
+   movl $0, %eax
+LOCAL(SSEEND):
+   popq %rbx
+   popq %rcx
+   popq %rdx
+   movq %rbp,%rsp
+   popq %rbp
+   ret
 
 .globl GLOBAL(vmNativeCall)
 GLOBAL(vmNativeCall):
@@ -250,11 +306,43 @@ GLOBAL(vmJump):
    movq   %r8,%rax
    movq   %r9,%rdx
    jmp    *%rdi
-   
+
 #endif // not __MINGW32__
 
 #elif defined __i386__
 
+.globl GLOBAL(detectFeature)
+GLOBAL(detectFeature):
+   pushl %ebp
+   movl %esp, %ebp
+   pushl %edx
+   pushl %ecx
+   pushl %ebx
+   pushl %esi
+   pushl %edi
+   movl 12(%ebp), %esi
+   movl 8(%ebp), %edi
+   movl $1, %eax
+   cpuid
+   andl %esi, %edx
+   andl %edi, %ecx
+   orl %edx, %ecx
+   test %ecx, %ecx
+   je LOCAL(NOSSE)
+   movl $1, %eax
+   jmp LOCAL(SSEEND)
+LOCAL(NOSSE):
+   movl $0, %eax
+LOCAL(SSEEND):
+   popl %edi
+   popl %esi
+   popl %ebx
+   popl %ecx
+   popl %edx
+   movl %ebp,%esp
+   popl %ebp
+   ret
+
 .globl GLOBAL(vmNativeCall)
 GLOBAL(vmNativeCall):
    pushl  %ebp
diff --git a/src/x86.cpp b/src/x86.cpp
index 9bb5f9172d..bba864e906 100644
--- a/src/x86.cpp
+++ b/src/x86.cpp
@@ -10,10 +10,14 @@
 
 #include "assembler.h"
 #include "vector.h"
-   
+
 #define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
 #define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
 
+const bool DebugSSE = false;
+const bool EnableSSE = true;
+const bool EnableSSE2 = true;
+
 using namespace vm;
 
 namespace {
@@ -39,6 +43,31 @@ enum {
   r15 = 15,
 };
 
+enum {
+  xmm0 = r15 + 1,
+  xmm1,
+  xmm2,
+  xmm3,
+  xmm4,
+  xmm5,
+  xmm6,
+  xmm7,
+  xmm8,
+  xmm9,
+  xmm10,
+  xmm11,
+  xmm12,
+  xmm13,
+  xmm14,
+  xmm15,
+};
+
+const unsigned GeneralRegisterMask
+= BytesPerWord == 4 ? 0x000000ff : 0x0000ffff;
+
+const unsigned FloatRegisterMask
+= BytesPerWord == 4 ? 0x00ff0000 : 0xffff0000;
+
 const unsigned FrameHeaderSize = 2;
 
 const unsigned StackAlignmentInBytes = 16;
@@ -401,13 +430,44 @@ padding(AlignmentPadding* p, unsigned start, unsigned offset,
   return padding;
 }
 
+extern "C"
+bool detectFeature(unsigned ecx, unsigned edx);
+
+inline bool
+supportsSSE()
+{
+	static int supported = -1;
+	if(supported == -1) {
+	  supported = EnableSSE && detectFeature(0, 0x2000000);
+	  if(DebugSSE) {
+	    fprintf(stderr, "sse %sdetected.\n", supported ? "" : "not ");
+	  }
+	}
+	return supported;	
+}
+
+inline bool
+supportsSSE2()
+{
+	static int supported = -1;
+	if(supported == -1) {
+	  supported = EnableSSE2 && detectFeature(0, 0x4000000);
+	  if(DebugSSE) {
+	    fprintf(stderr, "sse2 %sdetected.\n", supported ? "" : "not ");
+	  }
+	}
+	return supported;
+}
+
 #define REX_W 0x48
 #define REX_R 0x44
 #define REX_X 0x42
 #define REX_B 0x41
 #define REX_NONE 0x40
 
-void maybeRex(Context* c, unsigned size, int a, int index, int base, bool always) {
+void maybeRex(Context* c, unsigned size, int a, int index, int base,
+              bool always)
+{
   if(BytesPerWord == 8) {
     uint8_t byte;
     if(size == 8) {
@@ -466,7 +526,9 @@ inline void sib(Context* c, unsigned scale, int index, int base) {
   c->code.append((log(scale) << 6) | (regCode(index) << 3) | regCode(base));
 }
 
-inline void modrmSib(Context* c, int width, int a, int scale, int index, int base) {
+inline void modrmSib(Context* c, int width, int a, int scale, int index,
+                     int base)
+{
   if(index == NoRegister) {
     modrm(c, width, base, a);
     if(regCode(base) == rsp) {
@@ -478,7 +540,9 @@ inline void modrmSib(Context* c, int width, int a, int scale, int index, int bas
   }
 }
 
-inline void modrmSibImm(Context* c, int a, int scale, int index, int base, int offset) {
+inline void modrmSibImm(Context* c, int a, int scale, int index, int base,
+                        int offset)
+{
   if(offset == 0 && regCode(base) != rbp) {
     modrmSib(c, 0x00, a, scale, index, base);
   } else if(isInt8(offset)) {
@@ -505,6 +569,12 @@ inline void opcode(Context* c, uint8_t op1, uint8_t op2) {
   c->code.append(op2);
 }
 
+inline void opcode(Context* c, uint8_t op1, uint8_t op2, uint8_t op3) {
+  c->code.append(op1);
+  c->code.append(op2);
+  c->code.append(op3);
+}
+
 void
 return_(Context* c)
 {
@@ -669,6 +739,46 @@ jumpIfLessOrEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
   conditional(c, 0x8e, a);
 }
 
+void
+jumpIfFloatUnorderedC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
+{
+  assert(c, size == BytesPerWord);
+
+  conditional(c, 0x8a, a);
+}
+
+void
+jumpIfFloatGreaterC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
+{
+  assert(c, size == BytesPerWord);
+
+  conditional(c, 0x87, a);
+}
+
+void
+jumpIfFloatGreaterOrEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
+{
+  assert(c, size == BytesPerWord);
+
+  conditional(c, 0x83, a);
+}
+
+void
+jumpIfFloatLessC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
+{
+  assert(c, size == BytesPerWord);
+
+  conditional(c, 0x82, a);
+}
+
+void
+jumpIfFloatLessOrEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
+{
+  assert(c, size == BytesPerWord);
+
+  conditional(c, 0x86, a);
+}
+
 void
 longJumpC(Context* c, unsigned size, Assembler::Constant* a)
 {
@@ -831,11 +941,59 @@ moveCR2(Context* c, UNUSED unsigned aSize, Assembler::Constant* a,
   }
 }
 
+inline bool floatReg(Assembler::Register* a) {
+	return a->low >= xmm0;
+}
+
+void
+sseMoveRR(Context* c, unsigned aSize, Assembler::Register* a,
+       unsigned bSize UNUSED, Assembler::Register* b)
+{
+  if(floatReg(a) && floatReg(b)) {
+  	if(aSize == 4) {
+  	  opcode(c, 0xf3);
+  	  maybeRex(c, 4, a, b);
+  	  opcode(c, 0x0f, 0x10);
+  	  modrm(c, 0xc0, b, a);
+  	} else {
+  	  opcode(c, 0xf2);
+  	  maybeRex(c, 4, a, b);
+  	  opcode(c, 0x0f, 0x10);
+  	  modrm(c, 0xc0, b, a);
+  	} 
+  } else if(floatReg(a)) {
+  	opcode(c, 0x66);
+  	maybeRex(c, aSize, a, b);
+  	opcode(c, 0x0f, 0x7e);
+  	modrm(c, 0xc0, b, a);  	
+  } else {
+  	opcode(c, 0x66);
+  	maybeRex(c, aSize, b, a);
+  	opcode(c, 0x0f, 0x6e);
+  	modrm(c, 0xc0, a, b);  	
+  }
+}
+
+void
+sseMoveCR(Context* c, unsigned aSize, Assembler::Constant* a,
+       unsigned bSize, Assembler::Register* b)
+{
+  assert(c, aSize <= BytesPerWord);
+  Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
+  moveCR2(c, aSize, a, aSize, &tmp, 0);
+  sseMoveRR(c, aSize, &tmp, bSize, b);
+  c->client->releaseTemporary(tmp.low);
+}
+
 void
 moveCR(Context* c, unsigned aSize, Assembler::Constant* a,
        unsigned bSize, Assembler::Register* b)
 {
-  moveCR2(c, aSize, a, bSize, b, 0);
+  if(floatReg(b)) {
+  	sseMoveCR(c, aSize, a, bSize, b);
+  } else {
+    moveCR2(c, aSize, a, bSize, b, 0);
+  }
 }
 
 void
@@ -854,7 +1012,11 @@ void
 moveRR(Context* c, unsigned aSize, Assembler::Register* a,
        UNUSED unsigned bSize, Assembler::Register* b)
 {
-	
+  if(floatReg(a) or floatReg(b)) {
+  	sseMoveRR(c, aSize, a, bSize, b);
+  	return;
+  }
+  
   if (BytesPerWord == 4 and aSize == 8 and bSize == 8) {
     Assembler::Register ah(a->high);
     Assembler::Register bh(b->high);
@@ -927,10 +1089,25 @@ moveRR(Context* c, unsigned aSize, Assembler::Register* a,
   }
 }
 
+void
+sseMoveMR(Context* c, unsigned aSize, Assembler::Memory* a,
+       unsigned bSize UNUSED, Assembler::Register* b)
+{
+  opcode(c, 0x66);
+  maybeRex(c, aSize, b, a);
+  opcode(c, 0x0f, 0x6e);
+  modrmSibImm(c, b, a);
+}
+
 void
 moveMR(Context* c, unsigned aSize, Assembler::Memory* a,
        unsigned bSize, Assembler::Register* b)
 {
+  if(floatReg(b)) {
+  	sseMoveMR(c, aSize, a, bSize, b);
+  	return;
+  }
+  
   switch (aSize) {
   case 1:
     maybeRex(c, bSize, b, a);
@@ -981,12 +1158,27 @@ moveMR(Context* c, unsigned aSize, Assembler::Memory* a,
   }
 }
 
+void
+sseMoveRM(Context* c, unsigned aSize, Assembler::Register* a,
+       UNUSED unsigned bSize, Assembler::Memory* b)
+{
+  opcode(c, 0x66);
+  maybeRex(c, aSize, a, b);
+  opcode(c, 0x0f, 0x7e);
+  modrmSibImm(c, a, b);
+}
+
 void
 moveRM(Context* c, unsigned aSize, Assembler::Register* a,
        unsigned bSize UNUSED, Assembler::Memory* b)
 {
   assert(c, aSize == bSize);
   
+  if(floatReg(a)) {
+  	sseMoveRM(c, aSize, a, bSize, b);
+  	return;
+  }
+  
   switch (aSize) {
   case 1:
     maybeRex(c, bSize, a, b);
@@ -1084,20 +1276,21 @@ moveCM(Context* c, unsigned aSize UNUSED, Assembler::Constant* a,
     break;
 
   case 8: {
-  	if (BytesPerWord == 8) {
+    if (BytesPerWord == 8) {
       if(a->value->resolved() and isInt32(a->value->value())) {
         maybeRex(c, bSize, b);
         opcode(c, 0xc7);
         modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
         c->code.append4(a->value->value());
       } else {
-        Assembler::Register tmp(c->client->acquireTemporary());
+        Assembler::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
         moveCR(c, 8, a, 8, &tmp);
         moveRM(c, 8, &tmp, 8, b);
         c->client->releaseTemporary(tmp.low);
       }
-  	} else {
-  	  Assembler::Constant ah(shiftMaskPromise(c, a->value, 32, 0xFFFFFFFF));
+    } else {
+      Assembler::Constant ah(shiftMaskPromise(c, a->value, 32, 0xFFFFFFFF));
       Assembler::Constant al(shiftMaskPromise(c, a->value, 0, 0xFFFFFFFF));
 
       Assembler::Memory bh(b->base, b->offset + 4, b->index, b->scale);
@@ -1213,7 +1406,8 @@ addCR(Context* c, unsigned aSize, Assembler::Constant* a,
           c->code.append4(v);
         }
       } else {
-        Assembler::Register tmp(c->client->acquireTemporary());
+        Assembler::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
         moveCR(c, aSize, a, aSize, &tmp);
         addRR(c, aSize, &tmp, bSize, b);
         c->client->releaseTemporary(tmp.low);
@@ -1271,7 +1465,8 @@ subtractCR(Context* c, unsigned aSize, Assembler::Constant* a,
           c->code.append4(v);
         }
       } else {
-        Assembler::Register tmp(c->client->acquireTemporary());
+        Assembler::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
         moveCR(c, aSize, a, aSize, &tmp);
         subtractRR(c, aSize, &tmp, bSize, b);
         c->client->releaseTemporary(tmp.low);
@@ -1360,7 +1555,8 @@ andCR(Context* c, unsigned aSize, Assembler::Constant* a,
         c->code.append4(v);
       }
     } else {
-      Assembler::Register tmp(c->client->acquireTemporary());
+      Assembler::Register tmp
+        (c->client->acquireTemporary(GeneralRegisterMask));
       moveCR(c, aSize, a, aSize, &tmp);
       andRR(c, aSize, &tmp, bSize, b);
       c->client->releaseTemporary(tmp.low);
@@ -1417,7 +1613,8 @@ orCR(Context* c, unsigned aSize, Assembler::Constant* a,
           c->code.append4(v);        
         }
       } else {
-        Assembler::Register tmp(c->client->acquireTemporary());
+        Assembler::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
         moveCR(c, aSize, a, aSize, &tmp);
         orRR(c, aSize, &tmp, bSize, b);
         c->client->releaseTemporary(tmp.low);
@@ -1473,7 +1670,8 @@ xorCR(Context* c, unsigned aSize, Assembler::Constant* a,
           c->code.append4(v);        
         }
       } else {
-        Assembler::Register tmp(c->client->acquireTemporary());
+        Assembler::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
         moveCR(c, aSize, a, aSize, &tmp);
         xorRR(c, aSize, &tmp, bSize, b);
         c->client->releaseTemporary(tmp.low);
@@ -1548,7 +1746,7 @@ compareCR(Context* c, unsigned aSize, Assembler::Constant* a,
       c->code.append4(v);
     }
   } else {
-    Assembler::Register tmp(c->client->acquireTemporary());
+    Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
     moveCR(c, aSize, a, aSize, &tmp);
     compareRR(c, aSize, &tmp, bSize, b);
     c->client->releaseTemporary(tmp.low);
@@ -1562,7 +1760,7 @@ multiplyCR(Context* c, unsigned aSize, Assembler::Constant* a,
   assert(c, aSize == bSize);
 
   if (BytesPerWord == 4 and aSize == 8) {
-    const uint32_t mask = ~((1 << rax) | (1 << rdx));
+    const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
     Assembler::Register tmp(c->client->acquireTemporary(mask),
                             c->client->acquireTemporary(mask));
 
@@ -1585,7 +1783,8 @@ multiplyCR(Context* c, unsigned aSize, Assembler::Constant* a,
           c->code.append4(v);        
         }
       } else {
-        Assembler::Register tmp(c->client->acquireTemporary());
+        Assembler::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
         moveCR(c, aSize, a, aSize, &tmp);
         multiplyRR(c, aSize, &tmp, bSize, b);
         c->client->releaseTemporary(tmp.low);      
@@ -1630,7 +1829,7 @@ compareCM(Context* c, unsigned aSize, Assembler::Constant* a,
       abort(c);
     }
   } else {
-    Assembler::Register tmp(c->client->acquireTemporary());
+    Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
     moveCR(c, aSize, a, bSize, &tmp);
     compareRM(c, bSize, &tmp, bSize, b);
     c->client->releaseTemporary(tmp.low);
@@ -1953,6 +2152,223 @@ unsignedShiftRightCR(Context* c, unsigned aSize UNUSED, Assembler::Constant* a,
   doShift(c, unsignedShiftRightRR, 0xe8, aSize, a, bSize, b);
 }
 
+inline void floatRegOp(Context* c, unsigned aSize, Assembler::Register* a,
+                     unsigned bSize UNUSED, Assembler::Register* b, uint8_t op, uint8_t mod = 0xc0)
+{
+  if(aSize == 4) {
+    opcode(c, 0xf3);
+  } else {
+    opcode(c, 0xf2);
+  }
+  maybeRex(c, bSize, a, b);
+  opcode(c, 0x0f, op);
+  modrm(c, mod, a, b);
+}
+
+inline void floatMemOp(Context* c, unsigned aSize, Assembler::Memory* a,
+                     unsigned bSize UNUSED, Assembler::Register* b, uint8_t op)
+{
+  if(aSize == 4) {
+    opcode(c, 0xf3);
+  } else {
+    opcode(c, 0xf2);
+  }
+  maybeRex(c, bSize, b, a);
+  opcode(c, 0x0f, op);
+  modrmSibImm(c, b, a);
+}
+
+void
+floatSqrtRR(Context* c, unsigned aSize, Assembler::Register* a,
+                     unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x51);
+}
+
+void
+floatSqrtMR(Context* c, unsigned aSize, Assembler::Memory* a,
+                     unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x51);
+}
+
+void
+floatAddRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x58);
+}
+
+void
+floatAddMR(Context* c, unsigned aSize, Assembler::Memory* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x58);
+}
+
+void
+floatSubtractRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x5c);
+}
+
+void
+floatSubtractMR(Context* c, unsigned aSize, Assembler::Memory* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x5c);
+}
+
+void
+floatMultiplyRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x59);
+}
+
+void
+floatMultiplyMR(Context* c, unsigned aSize, Assembler::Memory* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x59);
+}
+
+void
+floatDivideRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x5e);
+}
+
+void
+floatDivideMR(Context* c, unsigned aSize, Assembler::Memory* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x5e);
+}
+
+void
+float2FloatRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  assert(c, supportsSSE2());
+  floatRegOp(c, aSize, a, 4, b, 0x5a);
+}
+
+void
+float2FloatMR(Context* c, unsigned aSize, Assembler::Memory* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  assert(c, supportsSSE2());
+  floatMemOp(c, aSize, a, 4, b, 0x5a);
+}
+
+void
+float2IntRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize, Assembler::Register* b)
+{
+  assert(c, !floatReg(b));
+  floatRegOp(c, aSize, a, bSize, b, 0x2d);
+}
+
+void
+float2IntMR(Context* c, unsigned aSize, Assembler::Memory* a,
+      unsigned bSize, Assembler::Register* b)
+{
+  floatMemOp(c, aSize, a, bSize, b, 0x2d);
+}
+
+void
+int2FloatRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize, Assembler::Register* b)
+{
+  floatRegOp(c, bSize, a, aSize, b, 0x2a);
+}
+
+void
+int2FloatMR(Context* c, unsigned aSize, Assembler::Memory* a,
+      unsigned bSize, Assembler::Register* b)
+{
+  floatMemOp(c, bSize, a, aSize, b, 0x2a);
+}
+
+void
+floatCompareRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  if (aSize == 8) {
+    opcode(c, 0x66);
+  }
+  maybeRex(c, 4, a, b);
+  opcode(c, 0x0f, 0x2e);
+  modrm(c, 0xc0, a, b);
+}
+
+void
+floatNegateRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  assert(c, floatReg(a) and floatReg(b));
+  // unlike most of the other floating point code, this does NOT
+  // support doubles:
+  assert(c, aSize == 4);
+  ResolvedPromise pcon(0x80000000);
+  Assembler::Constant con(&pcon);
+  if(a->low == b->low) {
+    Assembler::Register tmp(c->client->acquireTemporary(FloatRegisterMask));
+    moveCR(c, 4, &con, 4, &tmp);
+    maybeRex(c, 4, a, &tmp);
+    opcode(c, 0x0f, 0x57);
+    modrm(c, 0xc0, &tmp, a);
+    c->client->releaseTemporary(tmp.low);
+  } else {
+    moveCR(c, 4, &con, 4, b);
+    if(aSize == 8) opcode(c, 0x66);
+    maybeRex(c, 4, a, b);
+    opcode(c, 0x0f, 0x57);
+    modrm(c, 0xc0, a, b);
+  }
+}
+
+void
+floatAbsRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a,
+      unsigned bSize UNUSED, Assembler::Register* b)
+{
+  assert(c, floatReg(a) and floatReg(b));
+  // unlike most of the other floating point code, this does NOT
+  // support doubles:
+  assert(c, aSize == 4);
+  ResolvedPromise pcon(0x7fffffff);
+  Assembler::Constant con(&pcon);
+  if(a->low == b->low) {
+    Assembler::Register tmp(c->client->acquireTemporary(FloatRegisterMask));
+    moveCR(c, 4, &con, 4, &tmp);
+    maybeRex(c, 4, a, &tmp);
+    opcode(c, 0x0f, 0x54);
+    modrm(c, 0xc0, &tmp, a);
+    c->client->releaseTemporary(tmp.low);
+  } else {
+    moveCR(c, 4, &con, 4, b);
+    maybeRex(c, 4, a, b);
+    opcode(c, 0x0f, 0x54);
+    modrm(c, 0xc0, a, b);
+  }
+}
+
+void
+absRR(Context* c, unsigned aSize, Assembler::Register* a,
+      unsigned bSize UNUSED, Assembler::Register* b UNUSED)
+{
+  assert(c, aSize == bSize and a->low == rax and b->low == rax);
+  Assembler::Register d(c->client->acquireTemporary(static_cast<uint64_t>(1) << rdx));
+  maybeRex(c, aSize, a, b);
+  opcode(c, 0x99);
+  xorRR(c, aSize, &d, aSize, a);
+  subtractRR(c, aSize, &d, aSize, a);
+  c->client->releaseTemporary(rdx);
+}
+
 void
 populateTables(ArchitectureContext* c)
 {
@@ -1991,10 +2407,20 @@ populateTables(ArchitectureContext* c)
   uo[index(JumpIfLess, C)] = CAST1(jumpIfLessC);
   uo[index(JumpIfLessOrEqual, C)] = CAST1(jumpIfLessOrEqualC);
 
+  uo[index(JumpIfFloatUnordered, C)] = CAST1(jumpIfFloatUnorderedC);
+  uo[index(JumpIfFloatEqual, C)] = CAST1(jumpIfEqualC);
+  uo[index(JumpIfFloatNotEqual, C)] = CAST1(jumpIfNotEqualC);
+  uo[index(JumpIfFloatGreater, C)] = CAST1(jumpIfFloatGreaterC);
+  uo[index(JumpIfFloatGreaterOrEqual, C)] = CAST1(jumpIfFloatGreaterOrEqualC);
+  uo[index(JumpIfFloatLess, C)] = CAST1(jumpIfFloatLessC);
+  uo[index(JumpIfFloatLessOrEqual, C)] = CAST1(jumpIfFloatLessOrEqualC);
+
   uo[index(LongJump, C)] = CAST1(longJumpC);
 
   bo[index(Negate, R, R)] = CAST2(negateRR);
 
+  bo[index(FloatNegate, R, R)] = CAST2(floatNegateRR);
+
   bo[index(Move, R, R)] = CAST2(moveRR);
   bo[index(Move, C, R)] = CAST2(moveCR);
   bo[index(Move, M, R)] = CAST2(moveMR);
@@ -2002,6 +2428,9 @@ populateTables(ArchitectureContext* c)
   bo[index(Move, C, M)] = CAST2(moveCM);
   bo[index(Move, A, R)] = CAST2(moveAR);
 
+  bo[index(FloatSqrt, R, R)] = CAST2(floatSqrtRR);
+  bo[index(FloatSqrt, M, R)] = CAST2(floatSqrtMR);
+
   bo[index(MoveZ, R, R)] = CAST2(moveZRR);
   bo[index(MoveZ, M, R)] = CAST2(moveZMR);
 
@@ -2010,12 +2439,20 @@ populateTables(ArchitectureContext* c)
   bo[index(Compare, C, M)] = CAST2(compareCM);
   bo[index(Compare, R, M)] = CAST2(compareRM);
 
+  bo[index(FloatCompare, R, R)] = CAST2(floatCompareRR);
+
   bo[index(Add, R, R)] = CAST2(addRR);
   bo[index(Add, C, R)] = CAST2(addCR);
 
   bo[index(Subtract, C, R)] = CAST2(subtractCR);
   bo[index(Subtract, R, R)] = CAST2(subtractRR);
 
+  bo[index(FloatAdd, R, R)] = CAST2(floatAddRR);
+  bo[index(FloatAdd, M, R)] = CAST2(floatAddMR);
+
+  bo[index(FloatSubtract, R, R)] = CAST2(floatSubtractRR);
+  bo[index(FloatSubtract, M, R)] = CAST2(floatSubtractMR);
+
   bo[index(And, R, R)] = CAST2(andRR);
   bo[index(And, C, R)] = CAST2(andCR);
 
@@ -2030,6 +2467,12 @@ populateTables(ArchitectureContext* c)
 
   bo[index(Divide, R, R)] = CAST2(divideRR);
 
+  bo[index(FloatMultiply, R, R)] = CAST2(floatMultiplyRR);
+  bo[index(FloatMultiply, M, R)] = CAST2(floatMultiplyMR);
+
+  bo[index(FloatDivide, R, R)] = CAST2(floatDivideRR);
+  bo[index(FloatDivide, M, R)] = CAST2(floatDivideMR);
+
   bo[index(Remainder, R, R)] = CAST2(remainderRR);
 
   bo[index(LongCompare, C, R)] = CAST2(longCompareCR);
@@ -2043,8 +2486,19 @@ populateTables(ArchitectureContext* c)
 
   bo[index(UnsignedShiftRight, R, R)] = CAST2(unsignedShiftRightRR);
   bo[index(UnsignedShiftRight, C, R)] = CAST2(unsignedShiftRightCR);
-}
 
+  bo[index(Float2Float, R, R)] = CAST2(float2FloatRR);
+  bo[index(Float2Float, M, R)] = CAST2(float2FloatMR);
+
+  bo[index(Float2Int, R, R)] = CAST2(float2IntRR);
+  bo[index(Float2Int, M, R)] = CAST2(float2IntMR);
+
+  bo[index(Int2Float, R, R)] = CAST2(int2FloatRR);
+  bo[index(Int2Float, M, R)] = CAST2(int2FloatMR);
+
+  bo[index(Abs, R, R)] = CAST2(absRR);
+  bo[index(FloatAbs, R, R)] = CAST2(floatAbsRR);
+}
 class MyArchitecture: public Assembler::Architecture {
  public:
   MyArchitecture(System* system): c(system), referenceCount(0) {
@@ -2052,8 +2506,37 @@ class MyArchitecture: public Assembler::Architecture {
   }
 
   virtual unsigned registerCount() {
-    return (BytesPerWord == 4 ? 8 : 16);
+    if (supportsSSE()) {
+      return BytesPerWord == 4 ? 24 : 32;
+    } else {
+      return BytesPerWord == 4 ? 8 : 16;
+    }
   }
+  
+  virtual unsigned generalRegisterCount() {
+  	return BytesPerWord == 4 ? 8 : 16;
+  }
+  
+  virtual unsigned floatRegisterCount() {
+    if (supportsSSE()) {
+      return BytesPerWord == 4 ? 8 : 16;
+    } else {
+      return 0;
+    }
+  }
+  
+  virtual uint64_t generalRegisters() {
+  	return GeneralRegisterMask;
+  }
+  
+  virtual uint64_t floatRegisters() {
+  	return supportsSSE() ? FloatRegisterMask : 0;
+  }
+  
+  virtual uint64_t allRegisters() {
+  	return generalRegisters() | floatRegisters();
+  }
+  
 
   virtual int stack() {
     return rsp;
@@ -2079,10 +2562,6 @@ class MyArchitecture: public Assembler::Architecture {
     return rdx;
   }
 
-  virtual bool condensedAddressing() {
-    return true;
-  }
-
   virtual bool bigEndian() {
     return false;
   }
@@ -2093,7 +2572,7 @@ class MyArchitecture: public Assembler::Architecture {
     case rsp:
     case rbx:
       return true;
-
+   	  
     default:
       return false;
     }
@@ -2226,6 +2705,33 @@ class MyArchitecture: public Assembler::Architecture {
     return 0;
   }
 
+  virtual bool supportsFloatCompare(unsigned) {
+    return supportsSSE();
+  }
+  
+  virtual bool alwaysCondensed(BinaryOperation op)
+  {
+    switch(op) {
+    case FloatCompare:
+    case Compare:
+    case Float2Float:
+    case Float2Int:
+    case Int2Float:
+    case FloatAbs:
+    case FloatNegate:
+    case FloatSqrt:
+      return false;
+    case Negate:
+    case Abs:
+    default:
+      return true;
+    }
+  }
+  
+  virtual bool alwaysCondensed(TernaryOperation) {
+    return true;
+  }
+
   virtual int returnAddressOffset() {
     return 0;
   }
@@ -2252,46 +2758,137 @@ class MyArchitecture: public Assembler::Architecture {
     *thunk = false;
   }
 
-  virtual void plan
+  virtual BinaryOperation binaryIntrinsic(const char* className,
+                                          const char* methodName,
+                                          const char* parameterSpec)
+  {
+    if (strcmp(className, "java/lang/Math") == 0) {
+      if (supportsSSE()
+          and strcmp(methodName, "sqrt") == 0
+          and strcmp(parameterSpec, "(D)D") == 0)
+      {
+        return FloatSqrt;
+      } else if (strcmp(methodName, "abs")) {
+      	if (strcmp(parameterSpec, "(I)I") == 0 
+            or strcmp(parameterSpec, "(J)J") == 0)
+        {
+          return Abs;
+      	} else if (supportsSSE()
+                   and supportsSSE2()
+                   and strcmp(parameterSpec, "(F)F") == 0)
+        {
+      	  return FloatAbs;
+      	}
+      }
+    }
+    return NoBinaryOperation;
+  }
+
+  virtual TernaryOperation ternaryIntrinsic(const char* className UNUSED,
+                                            const char* methodName UNUSED,
+                                            const char* parameterSpec UNUSED)
+  {
+    return NoTernaryOperation;
+  }
+
+  virtual void planSource
   (BinaryOperation op,
    unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
-   unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask,
-   bool* thunk)
+   unsigned bSize, bool* thunk)
   {
     *aTypeMask = ~0;
-    *aRegisterMask = ~static_cast<uint64_t>(0);
-
-    *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
-    *bRegisterMask = ~static_cast<uint64_t>(0);
+    *aRegisterMask = GeneralRegisterMask |
+      (static_cast<uint64_t>(GeneralRegisterMask) << 32);
 
     *thunk = false;
 
     switch (op) {
     case Compare:
       *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand);
-      *bTypeMask = (1 << RegisterOperand);
+      *aRegisterMask = GeneralRegisterMask;
+      break;
+
+    case FloatCompare:
+      assert(&c, supportsSSE());
+      *aTypeMask = (1 << RegisterOperand);
+      *aRegisterMask = FloatRegisterMask;
       break;
 
     case Negate:
       *aTypeMask = (1 << RegisterOperand);
-      *bTypeMask = (1 << RegisterOperand);
       *aRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32))
         | (static_cast<uint64_t>(1) << rax);
-      *bRegisterMask = *aRegisterMask;
+      break;
+
+    case Abs:
+      *aTypeMask = (1 << RegisterOperand);
+      *aRegisterMask = (static_cast<uint64_t>(1) << rax);
+      break;
+
+    case FloatAbs:
+      *aTypeMask = (1 << RegisterOperand);
+      *aRegisterMask = FloatRegisterMask;
+      break;  
+  
+    case FloatNegate:
+      // floatNegateRR does not support doubles
+      if (supportsSSE() and aSize == 4 and bSize == 4) {
+        *aTypeMask = (1 << RegisterOperand);
+        *aRegisterMask = FloatRegisterMask;
+      } else {
+        *thunk = true;
+      }
+      break;
+
+    case FloatSqrt:
+      *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
+      *aRegisterMask = FloatRegisterMask;
+      break;
+
+    case Float2Float:
+      if (supportsSSE() and supportsSSE2()) {
+        *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
+        *aRegisterMask = FloatRegisterMask;
+      } else {
+        *thunk = true;
+      }
+      break;
+
+    case Float2Int:
+      if (supportsSSE()) {
+        *aTypeMask = (1 << RegisterOperand);
+        *aRegisterMask = FloatRegisterMask;
+      } else {
+        *thunk = true;
+      }
+      break;
+
+    case Int2Float:
+      if (supportsSSE()) {
+        *aTypeMask = (1 << RegisterOperand);
+        *aRegisterMask = GeneralRegisterMask
+          | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
+      } else {
+        *thunk = true;
+      }
       break;
 
     case Move:
+      *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
+      *aRegisterMask = GeneralRegisterMask
+        | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
+
       if (BytesPerWord == 4) {
         if (aSize == 4 and bSize == 8) {
-          const uint32_t mask = ~((1 << rax) | (1 << rdx));
-          *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
-          *bRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32))
-            | (static_cast<uint64_t>(1) << rax);        
+          *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
+          const uint32_t mask
+            = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
+          *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;    
         } else if (aSize == 1 or bSize == 1) {
+          *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
           const uint32_t mask
             = (1 << rax) | (1 << rcx) | (1 << rdx) | (1 << rbx);
-          *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
-          *bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;        
+          *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;     
         }
       }
       break;
@@ -2301,12 +2898,84 @@ class MyArchitecture: public Assembler::Architecture {
     }
   }
 
-  virtual void plan
+  virtual void planDestination
+  (BinaryOperation op,
+   unsigned aSize, const uint8_t* aTypeMask UNUSED,
+   const uint64_t* aRegisterMask, unsigned bSize, uint8_t* bTypeMask,
+   uint64_t* bRegisterMask)
+  {
+    *bTypeMask = ~0;
+    *bRegisterMask = GeneralRegisterMask
+      | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
+
+    switch (op) {
+    case Compare:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = GeneralRegisterMask;
+      break;
+
+    case FloatCompare:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = FloatRegisterMask;
+      break;
+
+    case Abs:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = (static_cast<uint64_t>(1) << rax);
+      break;
+
+    case FloatAbs:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = *aRegisterMask;
+      break;
+
+    case Negate:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = *aRegisterMask;
+      break;
+
+    case FloatNegate:
+    case FloatSqrt:
+    case Float2Float:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = FloatRegisterMask;
+      break;
+    case Int2Float:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = FloatRegisterMask;
+      break;
+    case Float2Int:
+      *bTypeMask = (1 << RegisterOperand);
+      *bRegisterMask = GeneralRegisterMask
+        | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
+      break;
+
+    case Move:
+      *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
+      *bRegisterMask = GeneralRegisterMask
+        | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
+      if (BytesPerWord == 4) {
+        if (aSize == 4 and bSize == 8) {
+          *bRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32))
+            | (static_cast<uint64_t>(1) << rax);
+        } else if (aSize == 1 or bSize == 1) {
+          const uint32_t mask
+            = (1 << rax) | (1 << rcx) | (1 << rdx) | (1 << rbx);
+          *bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
+        }
+      }
+      break;
+
+    default:
+      break;
+    }
+  }
+
+  virtual void planSource
   (TernaryOperation op,
-   unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
+   unsigned aSize, uint8_t *aTypeMask, uint64_t *aRegisterMask,
    unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask,
-   unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask,
-   bool* thunk)
+   unsigned, bool* thunk)
   {
     *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand);
     *aRegisterMask = ~static_cast<uint64_t>(0);
@@ -2317,21 +2986,37 @@ class MyArchitecture: public Assembler::Architecture {
     *thunk = false;
 
     switch (op) {
+    case FloatAdd:
+    case FloatSubtract:
+    case FloatMultiply:
+    case FloatDivide:
+      if (supportsSSE()) {
+        *aTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand);
+        *bTypeMask = (1 << RegisterOperand);
+        *aRegisterMask = FloatRegisterMask;
+        *bRegisterMask = FloatRegisterMask;
+      } else {
+        *thunk = true;
+      }
+      break;
+   	  
     case Multiply:
       if (BytesPerWord == 4 and aSize == 8) { 
-        const uint32_t mask = ~((1 << rax) | (1 << rdx));
+        const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
         *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
         *bRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32)) | mask;
+      } else {
+        *aRegisterMask = GeneralRegisterMask;
+        *bRegisterMask = GeneralRegisterMask;
       }
       break;
 
     case Divide:
       if (BytesPerWord == 4 and aSize == 8) {
-        *bTypeMask = ~0;
-        *thunk = true;        
+        *thunk = true;        			
       } else {
         *aTypeMask = (1 << RegisterOperand);
-        *aRegisterMask = ~((1 << rax) | (1 << rdx));
+        *aRegisterMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
         *bRegisterMask = 1 << rax;      
       }
       break;
@@ -2342,25 +3027,33 @@ class MyArchitecture: public Assembler::Architecture {
         *thunk = true;
       } else {
         *aTypeMask = (1 << RegisterOperand);
-        *aRegisterMask = ~((1 << rax) | (1 << rdx));
-        *bRegisterMask = 1 << rax;      
+        *aRegisterMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
+        *bRegisterMask = 1 << rax;
       }
       break;
 
     case ShiftLeft:
     case ShiftRight:
     case UnsignedShiftRight: {
-      *aRegisterMask = (~static_cast<uint64_t>(0) << 32)
+      *aRegisterMask = (static_cast<uint64_t>(GeneralRegisterMask) << 32)
         | (static_cast<uint64_t>(1) << rcx);
-      const uint32_t mask = ~(1 << rcx);
+      const uint32_t mask = GeneralRegisterMask & ~(1 << rcx);
       *bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
     } break;
 
     default:
       break;
     }
+  }
 
-    *cTypeMask = *bTypeMask;
+  virtual void planDestination
+  (TernaryOperation op UNUSED,
+   unsigned aSize UNUSED, const uint8_t* aTypeMask UNUSED,
+   const uint64_t* aRegisterMask UNUSED, unsigned bSize UNUSED,
+   const uint8_t* bTypeMask UNUSED, const uint64_t* bRegisterMask,
+   unsigned cSize UNUSED, uint8_t* cTypeMask, uint64_t* cRegisterMask)
+  {
+    *cTypeMask = (1 << RegisterOperand);
     *cRegisterMask = *bRegisterMask;
   }
 
diff --git a/test/AllFloats.java b/test/AllFloats.java
new file mode 100644
index 0000000000..8f41c4d20e
--- /dev/null
+++ b/test/AllFloats.java
@@ -0,0 +1,77 @@
+public class AllFloats {
+  private static float multiplyByFive(float a) {return 5f * a;}
+  private static double multiplyByFive(double a) {return 5d * a;}
+  private static float multiply(float a, float b) {return a * b;}
+  private static double multiply(double a, double b) {return a * b;}
+  private static double multiply(float a, double b) {return a * b;}
+  private static float divide(float a, float b) {return a / b;}
+  private static double divide(double a, double b) {return a / b;}
+  private static double divide(float a, double b) {return a / b;}
+  private static float add(float a, float b) {return a + b;}
+  private static double add(double a, double b) {return a + b;}
+  private static double add(float a, double b) {return a + b;}
+  private static float subtract(float a, float b) {return a - b;}
+  private static double subtract(double a, double b) {return a - b;}
+  private static double subtract(float a, double b) {return a - b;}
+  private static float complex(float a, float b) {return (a - b) / (a * b) + (float)Math.sqrt(a);}
+  private static double complex(double a, double b) {return (a - b) / (a * b) + Math.sqrt(a);}
+  private static double complex(float a, double b) {return (a - b) / (a * b) + Math.sqrt(a);}
+  private static int f2i(float a) {return (int)a;}
+  private static long f2l(float a) {return (long)a;}
+  private static float i2f(int a) {return (float)a;}
+  private static double i2d(int a) {return (double)a;}
+  private static int d2i(double a) {return (int)a;}
+  private static long d2l(double a) {return (long)a;}
+  private static float l2f(long a) {return (float)a;}
+  private static double l2d(long a) {return (double)a;}
+  private static float negate(float a) {return -a;}
+  private static double negate(double a) {return -a;}
+  private static int abs(int a) {return Math.abs(a);}
+  private static float abs(float a) {return Math.abs(a);}
+  
+  private static void expect(boolean v) {
+    if(!v)throw new RuntimeException();
+  }
+  
+  private static int last(){return 0;}
+  
+  public static void main(String[] args) {
+    expect(multiplyByFive(36f) == 5f * 36f);
+    expect(multiplyByFive(36d) == 5d * 36d);
+    expect(multiply(5f, 4f) == 5f*4f);
+    expect(multiply(5d, 4d) == 5d*4d);
+    expect(multiply(5f, 4d) == 5f*4d);
+    expect(divide(5f, 2f) == 5f/2f);
+    expect(divide(5d, 2d) == 5d/2d);
+    expect(divide(5f, 2d) == 5f/2d);
+    expect(add(5f, 4f) == 5f+4f);
+    expect(add(5d, 4d) == 5f+4d);
+    expect(add(5f, 4f) == 5f+4d);
+    expect(subtract(5f, 4f) == 5f-4f);
+    expect(subtract(5d, 4d) == 5f-4d);
+    expect(subtract(5f, 4f) == 5f-4d);
+    expect(complex(4f, 3f) == (4f-3f)/(4f*3f) + 2f);
+    expect(complex(4d, 3d) == (4d-3d)/(4d*3d) + 2d);
+    expect(complex(4f, 3d) == (4f-3d)/(4f*3d) + 2f);
+    
+    expect(f2i(4f) == 4);
+    expect(f2l(4f) == 4);
+    expect(i2f(4) == 4f);
+    expect(i2d(4) == 4d);
+    
+    expect(d2i(4d) == 4);
+    expect(d2l(4d) == 4);
+    expect(l2f(4) == 4f);
+    expect(l2d(4) == 4d);
+    
+    expect(negate(4f) == -4f);
+    expect(negate(4d) == -4d);
+    
+    expect(abs(-4) == 4);
+    expect(abs(12) == 12);
+    expect(abs(-4f) == 4f);
+    expect(abs(12f) == 12f);
+    
+    int unused = last();
+  }
+}