diff --git a/src/assembler.h b/src/assembler.h
index bb619040ca..486f487cc9 100644
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -49,10 +49,8 @@ enum BinaryOperation {
   MoveLow,
   MoveHigh,
   MoveZ,
-  Compare,
   Negate,
   FloatNegate,
-  FloatCompare,
   Float2Float,
   Float2Int,
   Int2Float,
diff --git a/src/compile.cpp b/src/compile.cpp
index 0c22da31f5..a07b51b3cd 100644
--- a/src/compile.cpp
+++ b/src/compile.cpp
@@ -2689,8 +2689,8 @@ saveStateAndCompile(MyThread* t, Frame* initialFrame, unsigned ip)
 }
 
 bool
-integerBranch(Frame* frame, object code, unsigned& ip, unsigned size,
-              Compiler::Operand* a, Compiler::Operand* b)
+integerBranch(MyThread* t, Frame* frame, object code, unsigned& ip,
+              unsigned size, Compiler::Operand* a, Compiler::Operand* b)
 {
   if (ip + 3 > codeLength(t, code)) {
     return false;
@@ -2707,37 +2707,41 @@ integerBranch(Frame* frame, object code, unsigned& ip, unsigned size,
   switch (instruction) {
   case ifeq:
     c->jumpIfEqual(size, a, b, target);
-    return true;
+    break;
 
   case ifne:
     c->jumpIfNotEqual(size, a, b, target);
-    return true;
+    break;
 
   case ifgt:
     c->jumpIfGreater(size, a, b, target);
-    return true;
+    break;
 
   case ifge:
     c->jumpIfGreaterOrEqual(size, a, b, target);
-    return true;
+    break;
 
   case iflt:
-    c->jumpIfLessOrUnordered(size, a, b, target);
-    return true;
+    c->jumpIfLess(size, a, b, target);
+    break;
 
   case ifle:
-    c->jumpIfLessOrEqualOrUnordered(size, a, b, target);
-    return true;
+    c->jumpIfLessOrEqual(size, a, b, target);
+    break;
 
   default:
     ip -= 3;
     return false;
   }
+
+  saveStateAndCompile(t, frame, newIp);
+  return t->exception == 0;
 }
 
 bool
-floatBranch(Frame* frame, object code, unsigned& ip, unsigned size,
-            bool lessIfUnordered, Compiler::Operand* a, Compiler::Operand* b)
+floatBranch(MyThread* t, Frame* frame, object code, unsigned& ip,
+            unsigned size, bool lessIfUnordered, Compiler::Operand* a,
+            Compiler::Operand* b)
 {
   if (ip + 3 > codeLength(t, code)) {
     return false;
@@ -2754,11 +2758,11 @@ floatBranch(Frame* frame, object code, unsigned& ip, unsigned size,
   switch (instruction) {
   case ifeq:
     c->jumpIfFloatEqual(size, a, b, target);
-    return true;
+    break;
 
   case ifne:
     c->jumpIfFloatNotEqual(size, a, b, target);
-    return true;
+    break;
 
   case ifgt:
     if (lessIfUnordered) {
@@ -2766,7 +2770,7 @@ floatBranch(Frame* frame, object code, unsigned& ip, unsigned size,
     } else {
       c->jumpIfFloatGreaterOrUnordered(size, a, b, target);
     }
-    return true;
+    break;
 
   case ifge:
     if (lessIfUnordered) {
@@ -2774,7 +2778,7 @@ floatBranch(Frame* frame, object code, unsigned& ip, unsigned size,
     } else {
       c->jumpIfFloatGreaterOrEqualOrUnordered(size, a, b, target);
     }
-    return true;
+    break;
 
   case iflt:
     if (lessIfUnordered) {
@@ -2782,7 +2786,7 @@ floatBranch(Frame* frame, object code, unsigned& ip, unsigned size,
     } else {
       c->jumpIfFloatLess(size, a, b, target);
     }
-    return true;
+    break;
 
   case ifle:
     if (lessIfUnordered) {
@@ -2790,12 +2794,15 @@ floatBranch(Frame* frame, object code, unsigned& ip, unsigned size,
     } else {
       c->jumpIfFloatLessOrEqual(size, a, b, target);
     }
-    return true;
+    break;
 
   default:
     ip -= 3;
     return false;
   }
+
+  saveStateAndCompile(t, frame, newIp);
+  return t->exception == 0;
 }
 
 void
@@ -3144,7 +3151,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      if (not floatBranch(frame, ip, 8, false, a, b)) {
+      if (not floatBranch(t, frame, code, ip, 8, false, a, b)) {
+        if (UNLIKELY(t->exception)) return;        
+
         frame->pushInt
           (c->call
            (c->constant
@@ -3159,7 +3168,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      if (not floatBranch(frame, ip, 8, true, a, b)) {
+      if (not floatBranch(t, frame, code, ip, 8, true, a, b)) {
+        if (UNLIKELY(t->exception)) return;        
+
         frame->pushInt
           (c->call
            (c->constant
@@ -3257,7 +3268,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* a = frame->popInt();
       Compiler::Operand* b = frame->popInt();
 
-      if (not floatBranch(frame, ip, 4, false, a, b)) {
+      if (not floatBranch(t, frame, code, ip, 4, false, a, b)) {
+        if (UNLIKELY(t->exception)) return;        
+
         frame->pushInt
           (c->call
            (c->constant
@@ -3270,7 +3283,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* a = frame->popInt();
       Compiler::Operand* b = frame->popInt();
 
-      if (not floatBranch(frame, ip, 4, true, a, b)) {
+      if (not floatBranch(t, frame, code, ip, 4, true, a, b)) {
+        if (UNLIKELY(t->exception)) return;        
+
         frame->pushInt
           (c->call
            (c->constant
@@ -3563,9 +3578,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* target = frame->machineIp(newIp);
 
       if (instruction == if_acmpeq) {
-        c->jumpIfEqual(BytesPerWord, a, btarget);
+        c->jumpIfEqual(BytesPerWord, a, b, target);
       } else {
-        c->jumpIfNotEqual(BytesPerWord, a, btarget);
+        c->jumpIfNotEqual(BytesPerWord, a, b, target);
       }
 
       saveStateAndCompile(t, frame, newIp);
@@ -3624,7 +3639,6 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       assert(t, newIp < codeLength(t, code));
 
       Compiler::Operand* target = frame->machineIp(newIp);
-      Compiler::Operand* cont = frame->machineIp(ip);
 
       Compiler::Operand* a = c->constant(0, Compiler::IntegerType);
       Compiler::Operand* b = frame->popInt();
@@ -3667,9 +3681,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* target = frame->machineIp(newIp);
 
       if (instruction == ifnull) {
-        c->jumpIfEqual(BytesPerWord, a, btarget);
+        c->jumpIfEqual(BytesPerWord, a, b, target);
       } else {
-        c->jumpIfNotEqual(BytesPerWord, a, btarget);
+        c->jumpIfNotEqual(BytesPerWord, a, b, target);
       }
 
       saveStateAndCompile(t, frame, newIp);
@@ -4023,7 +4037,9 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip,
       Compiler::Operand* a = frame->popLong();
       Compiler::Operand* b = frame->popLong();
 
-      if (not integerBranch(frame, ip, 8, a, b)) {
+      if (not integerBranch(t, frame, code, ip, 8, a, b)) {
+        if (UNLIKELY(t->exception)) return;        
+
         frame->pushInt
           (c->call
            (c->constant
diff --git a/src/compiler.cpp b/src/compiler.cpp
index 1ddcbd4f9d..424e81cf4e 100644
--- a/src/compiler.cpp
+++ b/src/compiler.cpp
@@ -31,7 +31,10 @@ const int AnyFrameIndex = -2;
 const int NoFrameIndex = -1;
 
 const unsigned StealRegisterReserveCount = 2;
-const unsigned ResolveRegisterReserveCount = 2;
+
+// this should be equal to the largest number of registers used by a
+// compare instruction:
+const unsigned ResolveRegisterReserveCount = (BytesPerWord == 8 ? 2 : 4);
 
 class Context;
 class Value;
@@ -3263,7 +3266,6 @@ grow(Context* c, Value* v)
   assert(c, v->next == v);
 
   Value* next = value(c, v->type);
-  fprintf(stderr, "grow %p to %p\n", v, next);
   v->next = next;
   next->next = v;
   next->index = 1;
@@ -3583,8 +3585,8 @@ class CombineEvent: public Event {
 
     //     fprintf(stderr, "combine %p and %p into %p\n", first, second, result);
     apply(c, type,
-          firstSize, first->source, first->next->source),
-          secondSize, second->source, second->next->source),
+          firstSize, first->source, first->next->source,
+          secondSize, second->source, second->next->source,
           resultSize, low, high);
 
     thawSource(c, firstSize, first);
@@ -4004,7 +4006,7 @@ class TranslateEvent: public Event {
          ? getTarget(c, value->next, result->next, resultHighMask)
          : low);
 
-    apply(c, type, valueSize, value->source, value->next->source),
+    apply(c, type, valueSize, value->source, value->next->source,
           resultSize, low, high);
 
     for (Read* r = reads; r; r = r->eventNext) {
@@ -4189,7 +4191,8 @@ unordered(double a, double b)
 }
 
 bool
-shouldJump(TernaryOperation type, unsigned size, int64_t a, int64_t b)
+shouldJump(Context* c, TernaryOperation type, unsigned size, int64_t b,
+           int64_t a)
 {
   switch (type) {
   case JumpIfEqual:
@@ -4245,23 +4248,30 @@ shouldJump(TernaryOperation type, unsigned size, int64_t a, int64_t b)
       or unordered(asFloat(size, a), asFloat(size, b));
 
   default:
-    jump = false;
+    abort(c);
   }
 }
 
 class BranchEvent: public Event {
  public:
   BranchEvent(Context* c, TernaryOperation type, unsigned size,
-              Value* first, Value* second, Value* address, bool exit,
+              Value* first, Value* second, Value* address,
               const SiteMask& firstLowMask,
               const SiteMask& firstHighMask,
               const SiteMask& secondLowMask,
               const SiteMask& secondHighMask):
     Event(c), type(type), size(size), first(first), second(second),
-    address(address), exit(exit)
+    address(address)
   {
-    addReads(c, this, first, firstSize, firstLowMask, firstHighMask);
-    addReads(c, this, second, secondSize, secondLowMask, secondHighMask);
+    addReads(c, this, first, size, firstLowMask, firstHighMask);
+    addReads(c, this, second, size, secondLowMask, secondHighMask);
+
+    uint8_t typeMask;
+    uint64_t registerMask;
+    c->arch->planDestination(type, size, 0, 0, size, 0, 0, BytesPerWord,
+                             &typeMask, &registerMask);
+
+    addRead(c, this, address, SiteMask(typeMask, registerMask, AnyFrameIndex));
   }
 
   virtual const char* name() {
@@ -4274,15 +4284,14 @@ class BranchEvent: public Event {
 
     if (not unreachable(this)) {
       if (firstConstant and secondConstant) {
-        if (shouldJump(type, firstConstant->value->value(),
-                       secondConstant->value->value())
-            and reachable)
+        if (shouldJump(c, type, size, firstConstant->value->value(),
+                       secondConstant->value->value()))
         {
           apply(c, Jump, BytesPerWord, address->source, address->source);
         }      
       } else {
         apply(c, type, size, first->source, first->next->source,
-              size, second->source, second->next->sourcem
+              size, second->source, second->next->source,
               BytesPerWord, address->source, address->source);
       }
     }
@@ -4294,21 +4303,16 @@ class BranchEvent: public Event {
 
   virtual bool isBranch() { return true; }
 
-  virtual bool allExits() {
-    return type == Jump and (exit or unreachable(this));
-  }
-
-  UnaryOperation type;
+  TernaryOperation type;
   unsigned size;
   Value* first;
   Value* second;
   Value* address;
-  bool exit;
 };
 
 void
 appendBranch(Context* c, TernaryOperation type, unsigned size, Value* first,
-             Value* second, Value* address, bool exit = false)
+             Value* second, Value* address)
 {
   bool thunk;
   uint8_t firstTypeMask;
@@ -4318,7 +4322,7 @@ appendBranch(Context* c, TernaryOperation type, unsigned size, Value* first,
 
   c->arch->planSource(type, size, &firstTypeMask, &firstRegisterMask,
                       size, &secondTypeMask, &secondRegisterMask,
-                      resultSize, &thunk);
+                      BytesPerWord, &thunk);
 
   if (thunk) {
     Stack* oldStack = c->stack;
@@ -4329,20 +4333,21 @@ appendBranch(Context* c, TernaryOperation type, unsigned size, Value* first,
     Stack* argumentStack = c->stack;
     c->stack = oldStack;
 
-    Value* result = value(&c, ValueGeneral);
+    Value* result = value(c, ValueGeneral);
     appendCall
       (c, value
        (c, ValueGeneral, constantSite(c, c->client->getThunk(type, size, 4))),
-       0, 0, result, resultSize, argumentStack,
+       0, 0, result, 4, argumentStack,
        ceiling(size, BytesPerWord) * 2, 0);
 
-    appendBranch(c, JumpIfEqual, 4, value(c, ValueGeneral, constantSite(c, 0)),
+    appendBranch(c, JumpIfEqual, 4, value
+                 (c, ValueGeneral, constantSite(c, static_cast<int64_t>(0))),
                  result, address);
   } else {
     append
       (c, new (c->zone->allocate(sizeof(BranchEvent)))
        BranchEvent
-       (c, type, size, first, second, address, exit,
+       (c, type, size, first, second, address,
         SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex),
         SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex),
         SiteMask(secondTypeMask, secondRegisterMask, AnyFrameIndex),
@@ -4350,12 +4355,51 @@ appendBranch(Context* c, TernaryOperation type, unsigned size, Value* first,
   }
 }
 
+class JumpEvent: public Event {
+ public:
+  JumpEvent(Context* c, UnaryOperation type, Value* address, bool exit):
+    Event(c), type(type), address(address), exit(exit)
+  {
+    bool thunk;
+    uint8_t typeMask;
+    uint64_t registerMask;
+    c->arch->plan(type, BytesPerWord, &typeMask, &registerMask, &thunk);
+
+    assert(c, not thunk);
+
+    addRead(c, this, address, SiteMask(typeMask, registerMask, AnyFrameIndex));
+  }
+
+  virtual const char* name() {
+    return "JumpEvent";
+  }
+
+  virtual void compile(Context* c) {
+    if (not unreachable(this)) {
+      apply(c, type, BytesPerWord, address->source, address->source);
+    }
+
+    for (Read* r = reads; r; r = r->eventNext) {
+      popRead(c, this, r->value);
+    }
+  }
+
+  virtual bool isBranch() { return true; }
+
+  virtual bool allExits() {
+    return exit or unreachable(this);
+  }
+
+  UnaryOperation type;
+  Value* address;
+  bool exit;
+};
+
 void
-appendBranch(Context* c, UnaryOperation type, Value* address,
-             bool exit = false)
+appendJump(Context* c, UnaryOperation type, Value* address, bool exit = false)
 {
-  append(c, new (c->zone->allocate(sizeof(BranchEvent)))
-         BranchEvent(c, type, address, exit));
+  append(c, new (c->zone->allocate(sizeof(JumpEvent)))
+         JumpEvent(c, type, address, exit));
 }
 
 class BoundsCheckEvent: public Event {
@@ -4386,11 +4430,10 @@ class BoundsCheckEvent: public Event {
     } else {
       outOfBoundsPromise = codePromise(c, static_cast<Promise*>(0));
 
-      Site* zero = constantSite(c, resolved(c, 0));
-      Assembler::Constant outOfBoundsConstant(outOfBoundsPromise);
-      a->apply
-        (JumpIfLess, 4, zero, zero, 4, index->source, index->source,
-         BytesPerWord, ConstantOperand, &outOfBoundsConstant);
+      ConstantSite zero(resolved(c, 0));
+      ConstantSite oob(outOfBoundsPromise);
+      apply(c, JumpIfLess, 4, &zero, &zero, 4, index->source, index->source,
+            BytesPerWord, &oob, &oob);
     }
 
     assert(c, object->source->type(c) == RegisterOperand);
@@ -4398,9 +4441,9 @@ class BoundsCheckEvent: public Event {
                       lengthOffset, NoRegister, 1);
     length.acquired = true;
 
-    Assembler::Constant nextConstant(nextPromise);
-    a->apply(JumpIfGreater, 4, index->source, index->source, 4, &length,
-             &length, BytesPerWord, ConstantOperand, &nextConstant);
+    ConstantSite next(nextPromise);
+    apply(c, JumpIfGreater, 4, index->source, index->source, 4, &length,
+          &length, BytesPerWord, &next, &next);
 
     if (constant == 0) {
       outOfBoundsPromise->offset = a->offset();
@@ -6107,11 +6150,11 @@ class MyCompiler: public Compiler {
   }
 
   virtual void jmp(Operand* address) {
-    appendBranch(&c, Jump, 0, 0, 0, static_cast<Value*>(address));
+    appendJump(&c, Jump, static_cast<Value*>(address));
   }
 
   virtual void exit(Operand* address) {
-    appendBranch(&c, Jump, 0, 0, 0, static_cast<Value*>(address), true);
+    appendJump(&c, Jump, static_cast<Value*>(address), true);
   }
 
   virtual Operand* add(unsigned size, Operand* a, Operand* b) {
diff --git a/src/x86.cpp b/src/x86.cpp
index b7d39bf993..b4300452a2 100644
--- a/src/x86.cpp
+++ b/src/x86.cpp
@@ -13,6 +13,7 @@
 
 #define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
 #define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
+#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
 
 const bool DebugSSE = false;
 const bool EnableSSE = true;
@@ -73,13 +74,17 @@ const unsigned FrameHeaderSize = 2;
 const unsigned StackAlignmentInBytes = 16;
 const unsigned StackAlignmentInWords = StackAlignmentInBytes / BytesPerWord;
 
-inline bool
+const unsigned NonBranchTernaryOperationCount = FloatMin + 1;
+const unsigned BranchOperationCount
+= JumpIfFloatGreaterOrEqualOrUnordered - FloatMin;
+
+bool
 isInt8(intptr_t v)
 {
   return v == static_cast<int8_t>(v);
 }
 
-inline bool
+bool
 isInt32(intptr_t v)
 {
   return v == static_cast<int32_t>(v);
@@ -139,6 +144,10 @@ typedef void (*UnaryOperationType)(Context*, unsigned, Assembler::Operand*);
 typedef void (*BinaryOperationType)
 (Context*, unsigned, Assembler::Operand*, unsigned, Assembler::Operand*);
 
+typedef void (*BranchOperationType)
+(Context*, TernaryOperation, unsigned, Assembler::Operand*,
+ Assembler::Operand*, Assembler::Operand*);
+
 class ArchitectureContext {
  public:
   ArchitectureContext(System* s): s(s) { }
@@ -148,38 +157,42 @@ class ArchitectureContext {
   UnaryOperationType unaryOperations[UnaryOperationCount
                                      * OperandTypeCount];
   BinaryOperationType binaryOperations
-  [(BinaryOperationCount + TernaryOperationCount)
+  [(BinaryOperationCount + NonBranchTernaryOperationCount)
+   * OperandTypeCount
+   * OperandTypeCount];
+  BranchOperationType branchOperations
+  [(BranchOperationCount)
    * OperandTypeCount
    * OperandTypeCount];
 };
 
-inline void NO_RETURN
+void NO_RETURN
 abort(Context* c)
 {
   abort(c->s);
 }
 
-inline void NO_RETURN
+void NO_RETURN
 abort(ArchitectureContext* c)
 {
   abort(c->s);
 }
 
 #ifndef NDEBUG
-inline void
+void
 assert(Context* c, bool v)
 {
   assert(c->s, v);
 }
 
-inline void
+void
 assert(ArchitectureContext* c, bool v)
 {
   assert(c->s, v);
 }
 #endif // not NDEBUG
 
-inline void
+void
 expect(Context* c, bool v)
 {
   expect(c->s, v);
@@ -433,30 +446,30 @@ padding(AlignmentPadding* p, unsigned start, unsigned offset,
 extern "C"
 bool detectFeature(unsigned ecx, unsigned edx);
 
-inline bool
+bool
 supportsSSE()
 {
-	static int supported = -1;
-	if(supported == -1) {
-	  supported = EnableSSE && detectFeature(0, 0x2000000);
-	  if(DebugSSE) {
-	    fprintf(stderr, "sse %sdetected.\n", supported ? "" : "not ");
-	  }
-	}
-	return supported;	
+  static int supported = -1;
+  if(supported == -1) {
+    supported = EnableSSE and detectFeature(0, 0x2000000);
+    if(DebugSSE) {
+      fprintf(stderr, "sse %sdetected.\n", supported ? "" : "not ");
+    }
+  }
+  return supported;	
 }
 
-inline bool
+bool
 supportsSSE2()
 {
-	static int supported = -1;
-	if(supported == -1) {
-	  supported = EnableSSE2 && detectFeature(0, 0x4000000);
-	  if(DebugSSE) {
-	    fprintf(stderr, "sse2 %sdetected.\n", supported ? "" : "not ");
-	  }
-	}
-	return supported;
+  static int supported = -1;
+  if(supported == -1) {
+    supported = EnableSSE2 and detectFeature(0, 0x4000000);
+    if(DebugSSE) {
+      fprintf(stderr, "sse2 %sdetected.\n", supported ? "" : "not ");
+    }
+  }
+  return supported;
 }
 
 #define REX_W 0x48
@@ -475,59 +488,78 @@ void maybeRex(Context* c, unsigned size, int a, int index, int base,
     } else {
       byte = REX_NONE;
     }
-    if(a != NoRegister && (a & 8)) byte |= REX_R;
-    if(index != NoRegister && (index & 8)) byte |= REX_X;
-    if(base != NoRegister && (base & 8)) byte |= REX_B;
+    if(a != NoRegister and (a & 8)) byte |= REX_R;
+    if(index != NoRegister and (index & 8)) byte |= REX_X;
+    if(base != NoRegister and (base & 8)) byte |= REX_B;
     if(always or byte != REX_NONE) c->code.append(byte);
   }
 }
 
-inline void maybeRex(Context* c, unsigned size, Assembler::Register* a, 
-	Assembler::Register* b) {
+void
+maybeRex(Context* c, unsigned size, Assembler::Register* a,
+         Assembler::Register* b)
+{
   maybeRex(c, size, a->low, NoRegister, b->low, false);
 }
 
-inline void alwaysRex(Context* c, unsigned size, Assembler::Register* a, 
-	Assembler::Register* b) {
+void
+alwaysRex(Context* c, unsigned size, Assembler::Register* a,
+          Assembler::Register* b)
+{
   maybeRex(c, size, a->low, NoRegister, b->low, true);
 }
 
-inline void maybeRex(Context* c, unsigned size, Assembler::Register* a) {
+void
+maybeRex(Context* c, unsigned size, Assembler::Register* a)
+{
   maybeRex(c, size, NoRegister, NoRegister, a->low, false);
 }
 
-inline void maybeRex(Context* c, unsigned size, Assembler::Register* a,
-	Assembler::Memory* b) {
+void
+maybeRex(Context* c, unsigned size, Assembler::Register* a,
+         Assembler::Memory* b)
+{
   maybeRex(c, size, a->low, b->index, b->base, false);
 }
 
-inline void maybeRex(Context* c, unsigned size, Assembler::Memory* a) {
+void
+maybeRex(Context* c, unsigned size, Assembler::Memory* a)
+{
   maybeRex(c, size, NoRegister, a->index, a->base, false);
 }
 
-inline int regCode(int a) {
+int
+regCode(int a)
+{
   return a & 7;
 }
 
-inline int regCode(Assembler::Register* a) {
+int
+regCode(Assembler::Register* a)
+{
   return regCode(a->low);
 }
 
-inline void modrm(Context* c, uint8_t mod, int a, int b) {
+void
+modrm(Context* c, uint8_t mod, int a, int b)
+{
   c->code.append(mod | (regCode(b) << 3) | regCode(a));
 }
 
-inline void modrm(Context* c, uint8_t mod, Assembler::Register* a, 
-	Assembler::Register* b) {
+void
+modrm(Context* c, uint8_t mod, Assembler::Register* a, Assembler::Register* b)
+{
   modrm(c, mod, a->low, b->low);
 }
 
-inline void sib(Context* c, unsigned scale, int index, int base) {
+void
+sib(Context* c, unsigned scale, int index, int base)
+{
   c->code.append((log(scale) << 6) | (regCode(index) << 3) | regCode(base));
 }
 
-inline void modrmSib(Context* c, int width, int a, int scale, int index,
-                     int base)
+void
+modrmSib(Context* c, int width, int a, int scale, int index, int base)
 {
   if(index == NoRegister) {
     modrm(c, width, base, a);
@@ -540,10 +572,10 @@ inline void modrmSib(Context* c, int width, int a, int scale, int index,
   }
 }
 
-inline void modrmSibImm(Context* c, int a, int scale, int index, int base,
-                        int offset)
+void
+modrmSibImm(Context* c, int a, int scale, int index, int base, int offset)
 {
-  if(offset == 0 && regCode(base) != rbp) {
+  if(offset == 0 and regCode(base) != rbp) {
     modrmSib(c, 0x00, a, scale, index, base);
   } else if(isInt8(offset)) {
     modrmSib(c, 0x40, a, scale, index, base);
@@ -555,21 +587,28 @@ inline void modrmSibImm(Context* c, int a, int scale, int index, int base,
 }
   
 
-inline void modrmSibImm(Context* c, Assembler::Register* a,
-	Assembler::Memory* b) {
+void
+modrmSibImm(Context* c, Assembler::Register* a, Assembler::Memory* b)
+{
   modrmSibImm(c, a->low, b->scale, b->index, b->base, b->offset);
 }
 
-inline void opcode(Context* c, uint8_t op) {
+void
+opcode(Context* c, uint8_t op)
+{
   c->code.append(op);
 }
 
-inline void opcode(Context* c, uint8_t op1, uint8_t op2) {
+void
+opcode(Context* c, uint8_t op1, uint8_t op2)
+{
   c->code.append(op1);
   c->code.append(op2);
 }
 
-inline void opcode(Context* c, uint8_t op1, uint8_t op2, uint8_t op3) {
+void
+opcode(Context* c, uint8_t op1, uint8_t op2, uint8_t op3)
+{
   c->code.append(op1);
   c->code.append(op2);
   c->code.append(op3);
@@ -603,34 +642,54 @@ conditional(Context* c, unsigned condition, Assembler::Constant* a)
   c->code.append4(0);
 }
 
-inline unsigned
-index(UnaryOperation operation, OperandType operand)
+unsigned
+index(ArchitectureContext*, UnaryOperation operation, OperandType operand)
 {
   return operation + (UnaryOperationCount * operand);
 }
 
-inline unsigned
-index(BinaryOperation operation,
+unsigned
+index(ArchitectureContext*, BinaryOperation operation,
       OperandType operand1,
       OperandType operand2)
 {
   return operation
-    + ((BinaryOperationCount + TernaryOperationCount) * operand1)
-    + ((BinaryOperationCount + TernaryOperationCount)
+    + ((BinaryOperationCount + NonBranchTernaryOperationCount) * operand1)
+    + ((BinaryOperationCount + NonBranchTernaryOperationCount)
        * OperandTypeCount * operand2);
 }
 
-inline unsigned
-index(TernaryOperation operation,
-      OperandType operand1,
-      OperandType operand2)
+bool
+isBranch(TernaryOperation op)
 {
+  return op > FloatMin;
+}
+
+bool
+isFloatBranch(TernaryOperation op)
+{
+  return op > JumpIfNotEqual;
+}
+
+unsigned
+index(ArchitectureContext* c UNUSED, TernaryOperation operation,
+      OperandType operand1, OperandType operand2)
+{
+  assert(c, not isBranch(operation));
+
   return BinaryOperationCount + operation
-    + ((BinaryOperationCount + TernaryOperationCount) * operand1)
-    + ((BinaryOperationCount + TernaryOperationCount)
+    + ((BinaryOperationCount + NonBranchTernaryOperationCount) * operand1)
+    + ((BinaryOperationCount + NonBranchTernaryOperationCount)
        * OperandTypeCount * operand2);
 }
 
+unsigned
+branchIndex(ArchitectureContext* c UNUSED, OperandType operand1,
+            OperandType operand2)
+{
+  return operand1 + (OperandTypeCount * operand2);
+}
+
 void
 moveCR(Context* c, unsigned aSize, Assembler::Constant* a,
        unsigned bSize, Assembler::Register* b);
@@ -691,94 +750,6 @@ jumpM(Context* c, unsigned size UNUSED, Assembler::Memory* a)
   modrmSibImm(c, rsp, a->scale, a->index, a->base, a->offset);
 }
 
-void
-jumpIfEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x84, a);
-}
-
-void
-jumpIfNotEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x85, a);
-}
-
-void
-jumpIfGreaterC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x8f, a);
-}
-
-void
-jumpIfGreaterOrEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x8d, a);
-}
-
-void
-jumpIfLessC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x8c, a);
-}
-
-void
-jumpIfLessOrEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x8e, a);
-}
-
-void
-jumpIfFloatUnorderedC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x8a, a);
-}
-
-void
-jumpIfFloatGreaterC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x87, a);
-}
-
-void
-jumpIfFloatGreaterOrEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x83, a);
-}
-
-void
-jumpIfFloatLessC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x82, a);
-}
-
-void
-jumpIfFloatLessOrEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* a)
-{
-  assert(c, size == BytesPerWord);
-
-  conditional(c, 0x86, a);
-}
-
 void
 longJumpC(Context* c, unsigned size, Assembler::Constant* a)
 {
@@ -798,7 +769,7 @@ callR(Context* c, unsigned size UNUSED, Assembler::Register* a)
 {
   assert(c, size == BytesPerWord);
 
-	//maybeRex.W has no meaning here so we disable it
+  // maybeRex.W has no meaning here so we disable it
   maybeRex(c, 4, a);
   opcode(c, 0xff, 0xd0 + regCode(a));
 }
@@ -941,42 +912,44 @@ moveCR2(Context* c, UNUSED unsigned aSize, Assembler::Constant* a,
   }
 }
 
-inline bool floatReg(Assembler::Register* a) {
-	return a->low >= xmm0;
+bool
+floatReg(Assembler::Register* a)
+{
+  return a->low >= xmm0;
 }
 
 void
 sseMoveRR(Context* c, unsigned aSize, Assembler::Register* a,
-       unsigned bSize UNUSED, Assembler::Register* b)
+          unsigned bSize UNUSED, Assembler::Register* b)
 {
-  if(floatReg(a) && floatReg(b)) {
-  	if(aSize == 4) {
-  	  opcode(c, 0xf3);
-  	  maybeRex(c, 4, a, b);
-  	  opcode(c, 0x0f, 0x10);
-  	  modrm(c, 0xc0, b, a);
-  	} else {
-  	  opcode(c, 0xf2);
-  	  maybeRex(c, 4, a, b);
-  	  opcode(c, 0x0f, 0x10);
-  	  modrm(c, 0xc0, b, a);
-  	} 
-  } else if(floatReg(a)) {
-  	opcode(c, 0x66);
-  	maybeRex(c, aSize, a, b);
-  	opcode(c, 0x0f, 0x7e);
-  	modrm(c, 0xc0, b, a);  	
+  if (floatReg(a) and floatReg(b)) {
+    if (aSize == 4) {
+      opcode(c, 0xf3);
+      maybeRex(c, 4, a, b);
+      opcode(c, 0x0f, 0x10);
+      modrm(c, 0xc0, b, a);
+    } else {
+      opcode(c, 0xf2);
+      maybeRex(c, 4, a, b);
+      opcode(c, 0x0f, 0x10);
+      modrm(c, 0xc0, b, a);
+    } 
+  } else if (floatReg(a)) {
+    opcode(c, 0x66);
+    maybeRex(c, aSize, a, b);
+    opcode(c, 0x0f, 0x7e);
+    modrm(c, 0xc0, b, a);  	
   } else {
-  	opcode(c, 0x66);
-  	maybeRex(c, aSize, b, a);
-  	opcode(c, 0x0f, 0x6e);
-  	modrm(c, 0xc0, a, b);  	
+    opcode(c, 0x66);
+    maybeRex(c, aSize, b, a);
+    opcode(c, 0x0f, 0x6e);
+    modrm(c, 0xc0, a, b);  	
   }
 }
 
 void
 sseMoveCR(Context* c, unsigned aSize, Assembler::Constant* a,
-       unsigned bSize, Assembler::Register* b)
+          unsigned bSize, Assembler::Register* b)
 {
   assert(c, aSize <= BytesPerWord);
   Assembler::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
@@ -1013,8 +986,8 @@ moveRR(Context* c, unsigned aSize, Assembler::Register* a,
        UNUSED unsigned bSize, Assembler::Register* b)
 {
   if(floatReg(a) or floatReg(b)) {
-  	sseMoveRR(c, aSize, a, bSize, b);
-  	return;
+    sseMoveRR(c, aSize, a, bSize, b);
+    return;
   }
   
   if (BytesPerWord == 4 and aSize == 8 and bSize == 8) {
@@ -1732,16 +1705,102 @@ multiplyRR(Context* c, unsigned aSize, Assembler::Register* a,
   }
 }
 
+void
+branch(Context* c, TernaryOperation op, Assembler::Constant* target)
+{
+  switch (op) {
+  case JumpIfEqual:
+    conditional(c, 0x84, target);
+    break;
+
+  case JumpIfNotEqual:
+    conditional(c, 0x85, target);
+    break;
+
+  case JumpIfLess:
+    conditional(c, 0x8c, target);
+    break;
+
+  case JumpIfGreater:
+    conditional(c, 0x8f, target);
+    break;
+
+  case JumpIfLessOrEqual:
+    conditional(c, 0x8e, target);
+    break;
+
+  case JumpIfGreaterOrEqual:
+    conditional(c, 0x8d, target);
+    break;
+
+  default:
+    abort(c);
+  }
+}
+
+void
+branchFloat(Context* c, TernaryOperation op, Assembler::Constant* target)
+{
+  switch (op) {
+  case JumpIfFloatEqual:
+    conditional(c, 0x84, target);
+    break;
+
+  case JumpIfFloatNotEqual:
+    conditional(c, 0x85, target);
+    break;
+
+  case JumpIfFloatLess:
+    conditional(c, 0x82, target);
+    break;
+
+  case JumpIfFloatGreater:
+    conditional(c, 0x87, target);
+    break;
+
+  case JumpIfFloatLessOrEqual:
+    conditional(c, 0x86, target);
+    break;
+
+  case JumpIfFloatGreaterOrEqual:
+    conditional(c, 0x83, target);
+    break;
+
+  case JumpIfFloatLessOrUnordered:
+    conditional(c, 0x82, target);
+    conditional(c, 0x8a, target);
+    break;
+
+  case JumpIfFloatGreaterOrUnordered:
+    conditional(c, 0x87, target);
+    conditional(c, 0x8a, target);
+    break;
+
+  case JumpIfFloatLessOrEqualOrUnordered:
+    conditional(c, 0x86, target);
+    conditional(c, 0x8a, target);
+    break;
+
+  case JumpIfFloatGreaterOrEqualOrUnordered:
+    conditional(c, 0x83, target);
+    conditional(c, 0x8a, target);
+    break;
+
+  default:
+    abort(c);
+  }
+}
+
 void
 compareRR(Context* c, unsigned aSize, Assembler::Register* a,
           unsigned bSize UNUSED, Assembler::Register* b)
 {
   assert(c, aSize == bSize);
-
+  assert(c, aSize <= BytesPerWord);
 
   maybeRex(c, aSize, a, b);
   opcode(c, 0x39);
-  modrm(c, 0xc0, b, a);
+  modrm(c, 0xc0, b, a);  
 }
 
 void
@@ -1769,46 +1828,6 @@ compareCR(Context* c, unsigned aSize, Assembler::Constant* a,
   }
 }
 
-void
-multiplyCR(Context* c, unsigned aSize, Assembler::Constant* a,
-           unsigned bSize, Assembler::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  if (BytesPerWord == 4 and aSize == 8) {
-    const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
-    Assembler::Register tmp(c->client->acquireTemporary(mask),
-                            c->client->acquireTemporary(mask));
-
-    moveCR(c, aSize, a, aSize, &tmp);
-    multiplyRR(c, aSize, &tmp, bSize, b);
-    c->client->releaseTemporary(tmp.low);
-    c->client->releaseTemporary(tmp.high);
-  } else {
-    int64_t v = a->value->value();
-    if (v != 1) {
-      if (isInt32(v)) {
-        maybeRex(c, bSize, b, b);
-        if (isInt8(v)) {
-          opcode(c, 0x6b);
-          modrm(c, 0xc0, b, b);
-          c->code.append(v);
-        } else {
-          opcode(c, 0x69);
-          modrm(c, 0xc0, b, b);
-          c->code.append4(v);        
-        }
-      } else {
-        Assembler::Register tmp
-          (c->client->acquireTemporary(GeneralRegisterMask));
-        moveCR(c, aSize, a, aSize, &tmp);
-        multiplyRR(c, aSize, &tmp, bSize, b);
-        c->client->releaseTemporary(tmp.low);      
-      }
-    }
-  }
-}
-
 void
 compareRM(Context* c, unsigned aSize, Assembler::Register* a,
           unsigned bSize UNUSED, Assembler::Memory* b)
@@ -1853,107 +1872,205 @@ compareCM(Context* c, unsigned aSize, Assembler::Constant* a,
 }
 
 void
-longCompare(Context* c, Assembler::Operand* al, UNUSED Assembler::Operand* ah,
-            Assembler::Register* bl, UNUSED Assembler::Operand* bh,
-            BinaryOperationType compare)
+compareFloatRR(Context* c, unsigned aSize, Assembler::Register* a,
+               unsigned bSize UNUSED, Assembler::Register* b)
 {
-  ResolvedPromise negativePromise(-1);
-  Assembler::Constant negative(&negativePromise);
+  assert(c, aSize == bSize);
 
-  ResolvedPromise zeroPromise(0);
-  Assembler::Constant zero(&zeroPromise);
+  if (aSize == 8) {
+    opcode(c, 0x66);
+  }
+  maybeRex(c, 4, a, b);
+  opcode(c, 0x0f, 0x2e);
+  modrm(c, 0xc0, a, b);
+}
 
-  ResolvedPromise positivePromise(1);
-  Assembler::Constant positive(&positivePromise);
+void
+branchLong(Context* c, TernaryOperation op, Assembler::Operand* al,
+           Assembler::Operand* ah, Assembler::Operand* bl,
+           Assembler::Operand* bh, Assembler::Constant* target,
+           BinaryOperationType compare)
+{
+  compare(c, 4, ah, 4, bh);
+  
+  unsigned next = 0;
 
-  if (BytesPerWord == 8) {
-    compare(c, 8, al, 8, bl);
-    
-    opcode(c, 0x0f, 0x8c); // jl
-    unsigned less = c->code.length();
-    c->code.append4(0);
-
-    opcode(c, 0x0f, 0x8f); // jg
-    unsigned greater = c->code.length();
-    c->code.append4(0);
-
-    moveCR(c, 4, &zero, 4, bl);
-    
-    opcode(c, 0xe9); // jmp
-    unsigned nextFirst = c->code.length();
-    c->code.append4(0);
-
-    int32_t lessOffset = c->code.length() - less - 4;
-    c->code.set(less, &lessOffset, 4);
-
-    moveCR(c, 4, &negative, 4, bl);
-
-    opcode(c, 0xe9); // jmp
-    unsigned nextSecond = c->code.length();
-    c->code.append4(0);
-
-    int32_t greaterOffset = c->code.length() - greater - 4;
-    c->code.set(greater, &greaterOffset, 4);
-
-    moveCR(c, 4, &positive, 4, bl);
-
-    int32_t nextFirstOffset = c->code.length() - nextFirst - 4;
-    c->code.set(nextFirst, &nextFirstOffset, 4);
-
-    int32_t nextSecondOffset = c->code.length() - nextSecond - 4;
-    c->code.set(nextSecond, &nextSecondOffset, 4);
-  } else {
-    compare(c, 4, ah, 4, bh);
-    
-    opcode(c, 0x0f, 0x8c); //jl
-    unsigned less = c->code.length();
-    c->code.append4(0);
-
-    opcode(c, 0x0f, 0x8f); //jg
-    unsigned greater = c->code.length();
-    c->code.append4(0);
+  switch (op) {
+  case JumpIfEqual:
+    opcode(c, 0x75); // jne
+    next = c->code.length();
+    c->code.append(0);
 
     compare(c, 4, al, 4, bl);
+    conditional(c, 0x84, target); // je
+    break;
 
-    opcode(c, 0x0f, 0x82); //ja
-    unsigned above = c->code.length();
-    c->code.append4(0);
+  case JumpIfNotEqual:
+    conditional(c, 0x85, target); // jne
 
-    opcode(c, 0x0f, 0x87); //jb
-    unsigned below = c->code.length();
-    c->code.append4(0);
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x85, target); // jne
+    break;
 
-    moveCR(c, 4, &zero, 4, bl);
-    
-    c->code.append(0xe9); // jmp
-    unsigned nextFirst = c->code.length();
-    c->code.append4(0);
+  case JumpIfLess:
+    conditional(c, 0x8c, target); // jl
 
-    int32_t lessOffset = c->code.length() - less - 4;
-    c->code.set(less, &lessOffset, 4);
+    opcode(c, 0x7f); // jg
+    next = c->code.length();
+    c->code.append(0);
 
-    int32_t aboveOffset = c->code.length() - above - 4;
-    c->code.set(above, &aboveOffset, 4);
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x82, target); // jb
+    break;
 
-    moveCR(c, 4, &negative, 4, bl);
+  case JumpIfGreater:
+    conditional(c, 0x8f, target); // jg
 
-    opcode(c, 0xe9); // jmp
-    unsigned nextSecond = c->code.length();
-    c->code.append4(0);
+    opcode(c, 0x7c); // jl
+    next = c->code.length();
+    c->code.append(0);
 
-    int32_t greaterOffset = c->code.length() - greater - 4;
-    c->code.set(greater, &greaterOffset, 4);
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x87, target); // ja
+    break;
 
-    int32_t belowOffset = c->code.length() - below - 4;
-    c->code.set(below, &belowOffset, 4);
+  case JumpIfLessOrEqual:
+    conditional(c, 0x8c, target); // jl
 
-    moveCR(c, 4, &positive, 4, bl);
+    opcode(c, 0x7f); // jg
+    next = c->code.length();
+    c->code.append(0);
 
-    int32_t nextFirstOffset = c->code.length() - nextFirst - 4;
-    c->code.set(nextFirst, &nextFirstOffset, 4);
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x86, target); // jbe
+    break;
 
-    int32_t nextSecondOffset = c->code.length() - nextSecond - 4;
-    c->code.set(nextSecond, &nextSecondOffset, 4);
+  case JumpIfGreaterOrEqual:
+    conditional(c, 0x8f, target); // jg
+
+    opcode(c, 0x7c); // jl
+    next = c->code.length();
+    c->code.append(0);
+
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x83, target); // jae
+    break;
+
+  default:
+    abort(c);
+  }  
+
+  if (next) {
+    int8_t nextOffset = c->code.length() - next - 1;
+    c->code.set(next, &nextOffset, 1);
+  }
+}
+
+void
+branchRR(Context* c, TernaryOperation op, unsigned size,
+         Assembler::Register* a, Assembler::Register* b,
+         Assembler::Constant* target)
+{
+  if (isFloatBranch(op)) {
+    compareFloatRR(c, size, a, size, b);
+    branchFloat(c, op, target);
+  } else if (size > BytesPerWord) {
+    Assembler::Register ah(a->high);
+    Assembler::Register bh(b->high);
+
+    branchLong(c, op, a, &ah, b, &bh, target, CAST2(compareRR));
+  } else {
+    compareRR(c, size, a, size, b);
+    branch(c, op, target);
+  }
+}
+
+void
+branchCR(Context* c, TernaryOperation op, unsigned size,
+         Assembler::Constant* a, Assembler::Register* b,
+         Assembler::Constant* target)
+{
+  assert(c, not isFloatBranch(op));
+
+  if (size > BytesPerWord) {
+    int64_t v = a->value->value();
+
+    ResolvedPromise low(v & ~static_cast<uintptr_t>(0));
+    Assembler::Constant al(&low);
+  
+    ResolvedPromise high((v >> 32) & ~static_cast<uintptr_t>(0));
+    Assembler::Constant ah(&high);
+  
+    Assembler::Register bh(b->high);
+
+    branchLong(c, op, &al, &ah, b, &bh, target, CAST2(compareCR));
+  } else {
+    compareCR(c, size, a, size, b);
+    branch(c, op, target);
+  }
+}
+
+void
+branchRM(Context* c, TernaryOperation op, unsigned size,
+         Assembler::Register* a, Assembler::Memory* b,
+         Assembler::Constant* target)
+{
+  assert(c, not isFloatBranch(op));
+  assert(c, size <= BytesPerWord);
+
+  compareRM(c, size, a, size, b);
+  branch(c, op, target);
+}
+
+void
+branchCM(Context* c, TernaryOperation op, unsigned size,
+         Assembler::Constant* a, Assembler::Memory* b,
+         Assembler::Constant* target)
+{
+  assert(c, not isFloatBranch(op));
+  assert(c, size <= BytesPerWord);
+
+  compareCM(c, size, a, size, b);
+  branch(c, op, target);
+}
+
+void
+multiplyCR(Context* c, unsigned aSize, Assembler::Constant* a,
+           unsigned bSize, Assembler::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  if (BytesPerWord == 4 and aSize == 8) {
+    const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
+    Assembler::Register tmp(c->client->acquireTemporary(mask),
+                            c->client->acquireTemporary(mask));
+
+    moveCR(c, aSize, a, aSize, &tmp);
+    multiplyRR(c, aSize, &tmp, bSize, b);
+    c->client->releaseTemporary(tmp.low);
+    c->client->releaseTemporary(tmp.high);
+  } else {
+    int64_t v = a->value->value();
+    if (v != 1) {
+      if (isInt32(v)) {
+        maybeRex(c, bSize, b, b);
+        if (isInt8(v)) {
+          opcode(c, 0x6b);
+          modrm(c, 0xc0, b, b);
+          c->code.append(v);
+        } else {
+          opcode(c, 0x69);
+          modrm(c, 0xc0, b, b);
+          c->code.append4(v);        
+        }
+      } else {
+        Assembler::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
+        moveCR(c, aSize, a, aSize, &tmp);
+        multiplyRR(c, aSize, &tmp, bSize, b);
+        c->client->releaseTemporary(tmp.low);      
+      }
+    }
   }
 }
 
@@ -1994,39 +2111,6 @@ remainderRR(Context* c, unsigned aSize, Assembler::Register* a,
   moveRR(c, BytesPerWord, &dx, BytesPerWord, b);
 }
 
-void
-longCompareCR(Context* c, unsigned aSize UNUSED, Assembler::Constant* a,
-              unsigned bSize UNUSED, Assembler::Register* b)
-{
-  assert(c, aSize == 8);
-  assert(c, bSize == 8);
-  
-  int64_t v = a->value->value();
-
-  ResolvedPromise low(v & ~static_cast<uintptr_t>(0));
-  Assembler::Constant al(&low);
-  
-  ResolvedPromise high((v >> 32) & ~static_cast<uintptr_t>(0));
-  Assembler::Constant ah(&high);
-  
-  Assembler::Register bh(b->high);
-  
-  longCompare(c, &al, &ah, b, &bh, CAST2(compareCR));
-}
-
-void
-longCompareRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a,
-              unsigned bSize UNUSED, Assembler::Register* b)
-{
-  assert(c, aSize == 8);
-  assert(c, bSize == 8);
-  
-  Assembler::Register ah(a->high);
-  Assembler::Register bh(b->high);
-  
-  longCompare(c, a, &ah, b, &bh, CAST2(compareRR));
-}
-
 void
 doShift(Context* c, UNUSED void (*shift)
         (Context*, unsigned, Assembler::Register*, unsigned,
@@ -2073,8 +2157,8 @@ shiftLeftRR(Context* c, UNUSED unsigned aSize, Assembler::Register* a,
     Assembler::Constant constant(&promise);
     compareCR(c, aSize, &constant, aSize, a);
 
-    opcode(c, 0x0f, 0x8c); //jl
-    c->code.append4(2 + 2);
+    opcode(c, 0x7c); //jl
+    c->code.append(2 + 2);
 
     Assembler::Register bh(b->high);
     moveRR(c, 4, b, 4, &bh); // 2 bytes
@@ -2109,8 +2193,8 @@ shiftRightRR(Context* c, UNUSED unsigned aSize, Assembler::Register* a,
     Assembler::Constant constant(&promise);
     compareCR(c, aSize, &constant, aSize, a);
 
-    opcode(c, 0x0f, 0x8c); //jl
-    c->code.append4(2 + 3);
+    opcode(c, 0x7c); //jl
+    c->code.append(2 + 3);
 
     Assembler::Register bh(b->high);
     moveRR(c, 4, &bh, 4, b); // 2 bytes
@@ -2149,8 +2233,8 @@ unsignedShiftRightRR(Context* c, UNUSED unsigned aSize, Assembler::Register* a,
     Assembler::Constant constant(&promise);
     compareCR(c, aSize, &constant, aSize, a);
 
-    opcode(c, 0x0f, 0x8c); //jl
-    c->code.append4(2 + 2);
+    opcode(c, 0x7c); //jl
+    c->code.append(2 + 2);
 
     Assembler::Register bh(b->high);
     moveRR(c, 4, &bh, 4, b); // 2 bytes
@@ -2168,11 +2252,11 @@ unsignedShiftRightCR(Context* c, unsigned aSize UNUSED, Assembler::Constant* a,
   doShift(c, unsignedShiftRightRR, 0xe8, aSize, a, bSize, b);
 }
 
-inline void floatRegOp(Context* c, unsigned aSize, Assembler::Register* a,
-                       unsigned bSize, Assembler::Register* b, uint8_t op,
-                       uint8_t mod = 0xc0)
+void
+floatRegOp(Context* c, unsigned aSize, Assembler::Register* a, unsigned bSize,
+           Assembler::Register* b, uint8_t op, uint8_t mod = 0xc0)
 {
-  if(aSize == 4) {
+  if (aSize == 4) {
     opcode(c, 0xf3);
   } else {
     opcode(c, 0xf2);
@@ -2182,10 +2266,11 @@ inline void floatRegOp(Context* c, unsigned aSize, Assembler::Register* a,
   modrm(c, mod, a, b);
 }
 
-inline void floatMemOp(Context* c, unsigned aSize, Assembler::Memory* a,
-                       unsigned bSize, Assembler::Register* b, uint8_t op)
+void
+floatMemOp(Context* c, unsigned aSize, Assembler::Memory* a, unsigned bSize,
+           Assembler::Register* b, uint8_t op)
 {
-  if(aSize == 4) {
+  if (aSize == 4) {
     opcode(c, 0xf3);
   } else {
     opcode(c, 0xf2);
@@ -2197,77 +2282,77 @@ inline void floatMemOp(Context* c, unsigned aSize, Assembler::Memory* a,
 
 void
 floatSqrtRR(Context* c, unsigned aSize, Assembler::Register* a,
-                     unsigned bSize UNUSED, Assembler::Register* b)
+            unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatRegOp(c, aSize, a, 4, b, 0x51);
 }
 
 void
 floatSqrtMR(Context* c, unsigned aSize, Assembler::Memory* a,
-                     unsigned bSize UNUSED, Assembler::Register* b)
+            unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatMemOp(c, aSize, a, 4, b, 0x51);
 }
 
 void
 floatAddRR(Context* c, unsigned aSize, Assembler::Register* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+           unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatRegOp(c, aSize, a, 4, b, 0x58);
 }
 
 void
 floatAddMR(Context* c, unsigned aSize, Assembler::Memory* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+           unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatMemOp(c, aSize, a, 4, b, 0x58);
 }
 
 void
 floatSubtractRR(Context* c, unsigned aSize, Assembler::Register* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+                unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatRegOp(c, aSize, a, 4, b, 0x5c);
 }
 
 void
 floatSubtractMR(Context* c, unsigned aSize, Assembler::Memory* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+                unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatMemOp(c, aSize, a, 4, b, 0x5c);
 }
 
 void
 floatMultiplyRR(Context* c, unsigned aSize, Assembler::Register* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+                unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatRegOp(c, aSize, a, 4, b, 0x59);
 }
 
 void
 floatMultiplyMR(Context* c, unsigned aSize, Assembler::Memory* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+                unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatMemOp(c, aSize, a, 4, b, 0x59);
 }
 
 void
 floatDivideRR(Context* c, unsigned aSize, Assembler::Register* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+              unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatRegOp(c, aSize, a, 4, b, 0x5e);
 }
 
 void
 floatDivideMR(Context* c, unsigned aSize, Assembler::Memory* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+              unsigned bSize UNUSED, Assembler::Register* b)
 {
   floatMemOp(c, aSize, a, 4, b, 0x5e);
 }
 
 void
 float2FloatRR(Context* c, unsigned aSize, Assembler::Register* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+              unsigned bSize UNUSED, Assembler::Register* b)
 {
   assert(c, supportsSSE2());
   floatRegOp(c, aSize, a, 4, b, 0x5a);
@@ -2275,7 +2360,7 @@ float2FloatRR(Context* c, unsigned aSize, Assembler::Register* a,
 
 void
 float2FloatMR(Context* c, unsigned aSize, Assembler::Memory* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+              unsigned bSize UNUSED, Assembler::Register* b)
 {
   assert(c, supportsSSE2());
   floatMemOp(c, aSize, a, 4, b, 0x5a);
@@ -2283,7 +2368,7 @@ float2FloatMR(Context* c, unsigned aSize, Assembler::Memory* a,
 
 void
 float2IntRR(Context* c, unsigned aSize, Assembler::Register* a,
-      unsigned bSize, Assembler::Register* b)
+            unsigned bSize, Assembler::Register* b)
 {
   assert(c, !floatReg(b));
   floatRegOp(c, aSize, a, bSize, b, 0x2d);
@@ -2291,40 +2376,28 @@ float2IntRR(Context* c, unsigned aSize, Assembler::Register* a,
 
 void
 float2IntMR(Context* c, unsigned aSize, Assembler::Memory* a,
-      unsigned bSize, Assembler::Register* b)
+            unsigned bSize, Assembler::Register* b)
 {
   floatMemOp(c, aSize, a, bSize, b, 0x2d);
 }
 
 void
 int2FloatRR(Context* c, unsigned aSize, Assembler::Register* a,
-      unsigned bSize, Assembler::Register* b)
+            unsigned bSize, Assembler::Register* b)
 {
   floatRegOp(c, bSize, a, aSize, b, 0x2a);
 }
 
 void
 int2FloatMR(Context* c, unsigned aSize, Assembler::Memory* a,
-      unsigned bSize, Assembler::Register* b)
+            unsigned bSize, Assembler::Register* b)
 {
   floatMemOp(c, bSize, a, aSize, b, 0x2a);
 }
 
-void
-floatCompareRR(Context* c, unsigned aSize, Assembler::Register* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
-{
-  if (aSize == 8) {
-    opcode(c, 0x66);
-  }
-  maybeRex(c, 4, a, b);
-  opcode(c, 0x0f, 0x2e);
-  modrm(c, 0xc0, a, b);
-}
-
 void
 floatNegateRR(Context* c, unsigned aSize, Assembler::Register* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+              unsigned bSize UNUSED, Assembler::Register* b)
 {
   assert(c, floatReg(a) and floatReg(b));
   // unlike most of the other floating point code, this does NOT
@@ -2350,7 +2423,7 @@ floatNegateRR(Context* c, unsigned aSize, Assembler::Register* a,
 
 void
 floatAbsRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a,
-      unsigned bSize UNUSED, Assembler::Register* b)
+           unsigned bSize UNUSED, Assembler::Register* b)
 {
   assert(c, floatReg(a) and floatReg(b));
   // unlike most of the other floating point code, this does NOT
@@ -2378,7 +2451,8 @@ absRR(Context* c, unsigned aSize, Assembler::Register* a,
       unsigned bSize UNUSED, Assembler::Register* b UNUSED)
 {
   assert(c, aSize == bSize and a->low == rax and b->low == rax);
-  Assembler::Register d(c->client->acquireTemporary(static_cast<uint64_t>(1) << rdx));
+  Assembler::Register d
+    (c->client->acquireTemporary(static_cast<uint64_t>(1) << rdx));
   maybeRex(c, aSize, a, b);
   opcode(c, 0x99);
   xorRR(c, aSize, &d, aSize, a);
@@ -2397,124 +2471,105 @@ populateTables(ArchitectureContext* c)
   OperationType* zo = c->operations;
   UnaryOperationType* uo = c->unaryOperations;
   BinaryOperationType* bo = c->binaryOperations;
+  BranchOperationType* bro = c->branchOperations;
 
   zo[Return] = return_;
   zo[LoadBarrier] = ignore;
   zo[StoreStoreBarrier] = ignore;
   zo[StoreLoadBarrier] = ignore;
 
-  uo[index(Call, C)] = CAST1(callC);
-  uo[index(Call, R)] = CAST1(callR);
-  uo[index(Call, M)] = CAST1(callM);
+  uo[index(c, Call, C)] = CAST1(callC);
+  uo[index(c, Call, R)] = CAST1(callR);
+  uo[index(c, Call, M)] = CAST1(callM);
 
-  uo[index(AlignedCall, C)] = CAST1(alignedCallC);
+  uo[index(c, AlignedCall, C)] = CAST1(alignedCallC);
 
-  uo[index(LongCall, C)] = CAST1(longCallC);
+  uo[index(c, LongCall, C)] = CAST1(longCallC);
 
-  uo[index(Jump, R)] = CAST1(jumpR);
-  uo[index(Jump, C)] = CAST1(jumpC);
-  uo[index(Jump, M)] = CAST1(jumpM);
+  uo[index(c, Jump, R)] = CAST1(jumpR);
+  uo[index(c, Jump, C)] = CAST1(jumpC);
+  uo[index(c, Jump, M)] = CAST1(jumpM);
 
-  uo[index(AlignedJump, C)] = CAST1(alignedJumpC);
+  uo[index(c, AlignedJump, C)] = CAST1(alignedJumpC);
 
-  uo[index(JumpIfEqual, C)] = CAST1(jumpIfEqualC);
-  uo[index(JumpIfNotEqual, C)] = CAST1(jumpIfNotEqualC);
-  uo[index(JumpIfGreater, C)] = CAST1(jumpIfGreaterC);
-  uo[index(JumpIfGreaterOrEqual, C)] = CAST1(jumpIfGreaterOrEqualC);
-  uo[index(JumpIfLess, C)] = CAST1(jumpIfLessC);
-  uo[index(JumpIfLessOrEqual, C)] = CAST1(jumpIfLessOrEqualC);
+  uo[index(c, LongJump, C)] = CAST1(longJumpC);
 
-  uo[index(JumpIfFloatUnordered, C)] = CAST1(jumpIfFloatUnorderedC);
-  uo[index(JumpIfFloatEqual, C)] = CAST1(jumpIfEqualC);
-  uo[index(JumpIfFloatNotEqual, C)] = CAST1(jumpIfNotEqualC);
-  uo[index(JumpIfFloatGreater, C)] = CAST1(jumpIfFloatGreaterC);
-  uo[index(JumpIfFloatGreaterOrEqual, C)] = CAST1(jumpIfFloatGreaterOrEqualC);
-  uo[index(JumpIfFloatLess, C)] = CAST1(jumpIfFloatLessC);
-  uo[index(JumpIfFloatLessOrEqual, C)] = CAST1(jumpIfFloatLessOrEqualC);
+  bo[index(c, Negate, R, R)] = CAST2(negateRR);
 
-  uo[index(LongJump, C)] = CAST1(longJumpC);
+  bo[index(c, FloatNegate, R, R)] = CAST2(floatNegateRR);
 
-  bo[index(Negate, R, R)] = CAST2(negateRR);
+  bo[index(c, Move, R, R)] = CAST2(moveRR);
+  bo[index(c, Move, C, R)] = CAST2(moveCR);
+  bo[index(c, Move, M, R)] = CAST2(moveMR);
+  bo[index(c, Move, R, M)] = CAST2(moveRM);
+  bo[index(c, Move, C, M)] = CAST2(moveCM);
+  bo[index(c, Move, A, R)] = CAST2(moveAR);
 
-  bo[index(FloatNegate, R, R)] = CAST2(floatNegateRR);
+  bo[index(c, FloatSqrt, R, R)] = CAST2(floatSqrtRR);
+  bo[index(c, FloatSqrt, M, R)] = CAST2(floatSqrtMR);
 
-  bo[index(Move, R, R)] = CAST2(moveRR);
-  bo[index(Move, C, R)] = CAST2(moveCR);
-  bo[index(Move, M, R)] = CAST2(moveMR);
-  bo[index(Move, R, M)] = CAST2(moveRM);
-  bo[index(Move, C, M)] = CAST2(moveCM);
-  bo[index(Move, A, R)] = CAST2(moveAR);
+  bo[index(c, MoveZ, R, R)] = CAST2(moveZRR);
+  bo[index(c, MoveZ, M, R)] = CAST2(moveZMR);
 
-  bo[index(FloatSqrt, R, R)] = CAST2(floatSqrtRR);
-  bo[index(FloatSqrt, M, R)] = CAST2(floatSqrtMR);
+  bo[index(c, Add, R, R)] = CAST2(addRR);
+  bo[index(c, Add, C, R)] = CAST2(addCR);
 
-  bo[index(MoveZ, R, R)] = CAST2(moveZRR);
-  bo[index(MoveZ, M, R)] = CAST2(moveZMR);
+  bo[index(c, Subtract, C, R)] = CAST2(subtractCR);
+  bo[index(c, Subtract, R, R)] = CAST2(subtractRR);
 
-  bo[index(Compare, R, R)] = CAST2(compareRR);
-  bo[index(Compare, C, R)] = CAST2(compareCR);
-  bo[index(Compare, C, M)] = CAST2(compareCM);
-  bo[index(Compare, R, M)] = CAST2(compareRM);
+  bo[index(c, FloatAdd, R, R)] = CAST2(floatAddRR);
+  bo[index(c, FloatAdd, M, R)] = CAST2(floatAddMR);
 
-  bo[index(FloatCompare, R, R)] = CAST2(floatCompareRR);
+  bo[index(c, FloatSubtract, R, R)] = CAST2(floatSubtractRR);
+  bo[index(c, FloatSubtract, M, R)] = CAST2(floatSubtractMR);
 
-  bo[index(Add, R, R)] = CAST2(addRR);
-  bo[index(Add, C, R)] = CAST2(addCR);
+  bo[index(c, And, R, R)] = CAST2(andRR);
+  bo[index(c, And, C, R)] = CAST2(andCR);
 
-  bo[index(Subtract, C, R)] = CAST2(subtractCR);
-  bo[index(Subtract, R, R)] = CAST2(subtractRR);
+  bo[index(c, Or, R, R)] = CAST2(orRR);
+  bo[index(c, Or, C, R)] = CAST2(orCR);
 
-  bo[index(FloatAdd, R, R)] = CAST2(floatAddRR);
-  bo[index(FloatAdd, M, R)] = CAST2(floatAddMR);
+  bo[index(c, Xor, R, R)] = CAST2(xorRR);
+  bo[index(c, Xor, C, R)] = CAST2(xorCR);
 
-  bo[index(FloatSubtract, R, R)] = CAST2(floatSubtractRR);
-  bo[index(FloatSubtract, M, R)] = CAST2(floatSubtractMR);
+  bo[index(c, Multiply, R, R)] = CAST2(multiplyRR);
+  bo[index(c, Multiply, C, R)] = CAST2(multiplyCR);
 
-  bo[index(And, R, R)] = CAST2(andRR);
-  bo[index(And, C, R)] = CAST2(andCR);
+  bo[index(c, Divide, R, R)] = CAST2(divideRR);
 
-  bo[index(Or, R, R)] = CAST2(orRR);
-  bo[index(Or, C, R)] = CAST2(orCR);
+  bo[index(c, FloatMultiply, R, R)] = CAST2(floatMultiplyRR);
+  bo[index(c, FloatMultiply, M, R)] = CAST2(floatMultiplyMR);
 
-  bo[index(Xor, R, R)] = CAST2(xorRR);
-  bo[index(Xor, C, R)] = CAST2(xorCR);
+  bo[index(c, FloatDivide, R, R)] = CAST2(floatDivideRR);
+  bo[index(c, FloatDivide, M, R)] = CAST2(floatDivideMR);
 
-  bo[index(Multiply, R, R)] = CAST2(multiplyRR);
-  bo[index(Multiply, C, R)] = CAST2(multiplyCR);
+  bo[index(c, Remainder, R, R)] = CAST2(remainderRR);
 
-  bo[index(Divide, R, R)] = CAST2(divideRR);
+  bo[index(c, ShiftLeft, R, R)] = CAST2(shiftLeftRR);
+  bo[index(c, ShiftLeft, C, R)] = CAST2(shiftLeftCR);
 
-  bo[index(FloatMultiply, R, R)] = CAST2(floatMultiplyRR);
-  bo[index(FloatMultiply, M, R)] = CAST2(floatMultiplyMR);
+  bo[index(c, ShiftRight, R, R)] = CAST2(shiftRightRR);
+  bo[index(c, ShiftRight, C, R)] = CAST2(shiftRightCR);
 
-  bo[index(FloatDivide, R, R)] = CAST2(floatDivideRR);
-  bo[index(FloatDivide, M, R)] = CAST2(floatDivideMR);
+  bo[index(c, UnsignedShiftRight, R, R)] = CAST2(unsignedShiftRightRR);
+  bo[index(c, UnsignedShiftRight, C, R)] = CAST2(unsignedShiftRightCR);
 
-  bo[index(Remainder, R, R)] = CAST2(remainderRR);
+  bo[index(c, Float2Float, R, R)] = CAST2(float2FloatRR);
+  bo[index(c, Float2Float, M, R)] = CAST2(float2FloatMR);
 
-  bo[index(LongCompare, C, R)] = CAST2(longCompareCR);
-  bo[index(LongCompare, R, R)] = CAST2(longCompareRR);
+  bo[index(c, Float2Int, R, R)] = CAST2(float2IntRR);
+  bo[index(c, Float2Int, M, R)] = CAST2(float2IntMR);
 
-  bo[index(ShiftLeft, R, R)] = CAST2(shiftLeftRR);
-  bo[index(ShiftLeft, C, R)] = CAST2(shiftLeftCR);
+  bo[index(c, Int2Float, R, R)] = CAST2(int2FloatRR);
+  bo[index(c, Int2Float, M, R)] = CAST2(int2FloatMR);
 
-  bo[index(ShiftRight, R, R)] = CAST2(shiftRightRR);
-  bo[index(ShiftRight, C, R)] = CAST2(shiftRightCR);
+  bo[index(c, Abs, R, R)] = CAST2(absRR);
+  bo[index(c, FloatAbs, R, R)] = CAST2(floatAbsRR);
 
-  bo[index(UnsignedShiftRight, R, R)] = CAST2(unsignedShiftRightRR);
-  bo[index(UnsignedShiftRight, C, R)] = CAST2(unsignedShiftRightCR);
-
-  bo[index(Float2Float, R, R)] = CAST2(float2FloatRR);
-  bo[index(Float2Float, M, R)] = CAST2(float2FloatMR);
-
-  bo[index(Float2Int, R, R)] = CAST2(float2IntRR);
-  bo[index(Float2Int, M, R)] = CAST2(float2IntMR);
-
-  bo[index(Int2Float, R, R)] = CAST2(int2FloatRR);
-  bo[index(Int2Float, M, R)] = CAST2(int2FloatMR);
-
-  bo[index(Abs, R, R)] = CAST2(absRR);
-  bo[index(FloatAbs, R, R)] = CAST2(floatAbsRR);
+  bro[branchIndex(c, R, R)] = CAST_BRANCH(branchRR);
+  bro[branchIndex(c, C, R)] = CAST_BRANCH(branchCR);
+  bro[branchIndex(c, C, M)] = CAST_BRANCH(branchCM);
+  bro[branchIndex(c, R, M)] = CAST_BRANCH(branchRM);
 }
 class MyArchitecture: public Assembler::Architecture {
  public:
@@ -2713,11 +2768,8 @@ class MyArchitecture: public Assembler::Architecture {
     return 0;
   }
 
-  virtual bool alwaysCondensed(BinaryOperation op)
-  {
+  virtual bool alwaysCondensed(BinaryOperation op) {
     switch(op) {
-    case FloatCompare:
-    case Compare:
     case Float2Float:
     case Float2Int:
     case Int2Float:
@@ -2725,10 +2777,13 @@ class MyArchitecture: public Assembler::Architecture {
     case FloatNegate:
     case FloatSqrt:
       return false;
+
     case Negate:
     case Abs:
-    default:
       return true;
+
+    default:
+      abort(&c);
     }
   }
   
@@ -2807,18 +2862,6 @@ class MyArchitecture: public Assembler::Architecture {
     *thunk = false;
 
     switch (op) {
-    case Compare:
-      *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand);
-      *aRegisterMask = GeneralRegisterMask;
-      break;
-
-    case FloatCompare:
-      assert(&c, supportsSSE());
-      *aTypeMask = (1 << RegisterOperand);
-      *aRegisterMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
-        | FloatRegisterMask;
-      break;
-
     case Negate:
       *aTypeMask = (1 << RegisterOperand);
       *aRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32))
@@ -2917,17 +2960,6 @@ class MyArchitecture: public Assembler::Architecture {
       | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
 
     switch (op) {
-    case Compare:
-      *bTypeMask = (1 << RegisterOperand);
-      *bRegisterMask = GeneralRegisterMask;
-      break;
-
-    case FloatCompare:
-      *bTypeMask = (1 << RegisterOperand);
-      *bRegisterMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
-        | FloatRegisterMask;
-      break;
-
     case Abs:
       *bTypeMask = (1 << RegisterOperand);
       *bRegisterMask = (static_cast<uint64_t>(1) << rax);
@@ -3083,7 +3115,6 @@ class MyArchitecture: public Assembler::Architecture {
 
     case Remainder:
       if (BytesPerWord == 4 and aSize == 8) {
-        *bTypeMask = ~0;
         *thunk = true;
       } else {
         *aTypeMask = (1 << RegisterOperand);
@@ -3101,18 +3132,44 @@ class MyArchitecture: public Assembler::Architecture {
       *bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
     } break;
 
+    case JumpIfFloatEqual:
+    case JumpIfFloatNotEqual:
+    case JumpIfFloatLess:
+    case JumpIfFloatGreater:
+    case JumpIfFloatLessOrEqual:
+    case JumpIfFloatGreaterOrEqual:
+    case JumpIfFloatLessOrUnordered:
+    case JumpIfFloatGreaterOrUnordered:
+    case JumpIfFloatLessOrEqualOrUnordered:
+    case JumpIfFloatGreaterOrEqualOrUnordered:
+      if (supportsSSE()) {
+        *aTypeMask = (1 << RegisterOperand);
+        *aRegisterMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
+          | FloatRegisterMask;
+        *bTypeMask = *aTypeMask;
+        *bRegisterMask = *aRegisterMask;
+      } else {
+        *thunk = true;
+      }
+      break;
+
     default:
       break;
     }
   }
 
   virtual void planDestination
-  (TernaryOperation, unsigned, uint8_t, uint64_t, unsigned, uint8_t,
+  (TernaryOperation op, unsigned, uint8_t, uint64_t, unsigned, uint8_t,
    uint64_t bRegisterMask, unsigned, uint8_t* cTypeMask,
    uint64_t* cRegisterMask)
   {
-    *cTypeMask = (1 << RegisterOperand);
-    *cRegisterMask = bRegisterMask;
+    if (isBranch(op)) {
+      *cTypeMask = (1 << ConstantOperand);
+      *cRegisterMask = 0;
+    } else {
+      *cTypeMask = (1 << RegisterOperand);
+      *cRegisterMask = bRegisterMask;
+    }
   }
 
   virtual void acquire() {
@@ -3325,28 +3382,37 @@ class MyAssembler: public Assembler {
   virtual void apply(UnaryOperation op,
                      unsigned aSize, OperandType aType, Operand* aOperand)
   {
-    arch_->c.unaryOperations[index(op, aType)](&c, aSize, aOperand);
+    arch_->c.unaryOperations[index(&(arch_->c), op, aType)]
+      (&c, aSize, aOperand);
   }
 
   virtual void apply(BinaryOperation op,
                      unsigned aSize, OperandType aType, Operand* aOperand,
                      unsigned bSize, OperandType bType, Operand* bOperand)
   {
-    arch_->c.binaryOperations[index(op, aType, bType)]
+    arch_->c.binaryOperations[index(&(arch_->c), op, aType, bType)]
       (&c, aSize, aOperand, bSize, bOperand);
   }
 
   virtual void apply(TernaryOperation op,
                      unsigned aSize, OperandType aType, Operand* aOperand,
                      unsigned bSize, OperandType bType, Operand* bOperand,
-                     unsigned cSize UNUSED, OperandType cType UNUSED,
-                     Operand*)
+                     unsigned cSize, OperandType cType, Operand* cOperand)
   {
-    assert(&c, bSize == cSize);
-    assert(&c, bType == cType);
+    if (isBranch(op)) {
+      assert(&c, aSize == bSize);
+      assert(&c, cSize == BytesPerWord);
+      assert(&c, cType == ConstantOperand);
 
-    arch_->c.binaryOperations[index(op, aType, bType)]
-      (&c, aSize, aOperand, bSize, bOperand);
+      arch_->c.branchOperations[branchIndex(&(arch_->c), aType, bType)]
+        (&c, op, aSize, aOperand, bOperand, cOperand);
+    } else {
+      assert(&c, bSize == cSize);
+      assert(&c, bType == cType);
+
+      arch_->c.binaryOperations[index(&(arch_->c), op, aType, bType)]
+        (&c, aSize, aOperand, bSize, bOperand);
+    }
   }
 
   virtual void writeTo(uint8_t* dst) {