From 4462b87f1045454e60eacd3987c29bdb4b732a23 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Fri, 15 Feb 2013 20:04:30 -0700
Subject: [PATCH 01/22] group typeMask and registerMask into OperandMask, for
 Architecture::plan

---
 src/codegen/arm/assembler.cpp       | 120 ++++----
 src/codegen/assembler.h             |  40 ++-
 src/codegen/compiler.cpp            | 156 +++++------
 src/codegen/compiler/event.cpp      | 419 ++++++++++++++--------------
 src/codegen/compiler/site.h         |   8 +
 src/codegen/powerpc/assembler.cpp   |  78 +++---
 src/codegen/x86/assembler.cpp       | 222 ++++++++-------
 unittest/codegen/assembler-test.cpp |   9 +-
 8 files changed, 529 insertions(+), 523 deletions(-)

diff --git a/src/codegen/arm/assembler.cpp b/src/codegen/arm/assembler.cpp
index c15007835f..85a7a0e933 100644
--- a/src/codegen/arm/assembler.cpp
+++ b/src/codegen/arm/assembler.cpp
@@ -2254,27 +2254,27 @@ class MyArchitecture: public Assembler::Architecture {
   
   virtual void plan
   (lir::UnaryOperation,
-   unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask,
+   unsigned, OperandMask& aMask,
    bool* thunk)
   {
-    *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
-    *aRegisterMask = ~static_cast<uint64_t>(0);
+    aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
+    aMask.registerMask = ~static_cast<uint64_t>(0);
     *thunk = false;
   }
 
   virtual void planSource
   (lir::BinaryOperation op,
-   unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
+   unsigned aSize, OperandMask& aMask,
    unsigned bSize, bool* thunk)
   {
     *thunk = false;
-    *aTypeMask = ~0;
-    *aRegisterMask = GPR_MASK64;
+    aMask.typeMask = ~0;
+    aMask.registerMask = GPR_MASK64;
 
     switch (op) {
     case lir::Negate:
-      *aTypeMask = (1 << lir::RegisterOperand);
-      *aRegisterMask = GPR_MASK64;
+      aMask.typeMask = (1 << lir::RegisterOperand);
+      aMask.registerMask = GPR_MASK64;
       break;
 
     case lir::Absolute:
@@ -2286,8 +2286,8 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::FloatNegate:
     case lir::Float2Float:
       if (vfpSupported()) {
-        *aTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = FPR_MASK64;
+        aMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = FPR_MASK64;
       } else {
         *thunk = true;
       }
@@ -2299,8 +2299,8 @@ class MyArchitecture: public Assembler::Architecture {
       // thunks or produce inline machine code which handles edge
       // cases properly.
       if (false && vfpSupported() && bSize == 4) {
-        *aTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = FPR_MASK64;
+        aMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = FPR_MASK64;
       } else {
         *thunk = true;
       }
@@ -2308,8 +2308,8 @@ class MyArchitecture: public Assembler::Architecture {
 
     case lir::Int2Float:
       if (vfpSupported() && aSize == 4) {
-        *aTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = GPR_MASK64;
+        aMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = GPR_MASK64;
       } else {
         *thunk = true;
       }
@@ -2322,16 +2322,16 @@ class MyArchitecture: public Assembler::Architecture {
   
   virtual void planDestination
   (lir::BinaryOperation op,
-   unsigned, uint8_t aTypeMask, uint64_t,
-   unsigned , uint8_t* bTypeMask, uint64_t* bRegisterMask)
+   unsigned, const OperandMask& aMask,
+   unsigned, OperandMask& bMask)
   {
-    *bTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
-    *bRegisterMask = GPR_MASK64;
+    bMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+    bMask.registerMask = GPR_MASK64;
 
     switch (op) {
     case lir::Negate:
-      *bTypeMask = (1 << lir::RegisterOperand);
-      *bRegisterMask = GPR_MASK64;
+      bMask.typeMask = (1 << lir::RegisterOperand);
+      bMask.registerMask = GPR_MASK64;
       break;
 
     case lir::FloatAbsolute:
@@ -2339,18 +2339,18 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::FloatNegate:
     case lir::Float2Float:
     case lir::Int2Float:
-      *bTypeMask = (1 << lir::RegisterOperand);
-      *bRegisterMask = FPR_MASK64;
+      bMask.typeMask = (1 << lir::RegisterOperand);
+      bMask.registerMask = FPR_MASK64;
       break;
 
     case lir::Float2Int:
-      *bTypeMask = (1 << lir::RegisterOperand);
-      *bRegisterMask = GPR_MASK64;
+      bMask.typeMask = (1 << lir::RegisterOperand);
+      bMask.registerMask = GPR_MASK64;
       break;
 
     case lir::Move:
-      if (!(aTypeMask & 1 << lir::RegisterOperand)) {
-        *bTypeMask = 1 << lir::RegisterOperand;
+      if (!(aMask.typeMask & 1 << lir::RegisterOperand)) {
+        bMask.typeMask = 1 << lir::RegisterOperand;
       }
       break;
 
@@ -2360,41 +2360,41 @@ class MyArchitecture: public Assembler::Architecture {
   }
 
   virtual void planMove
-  (unsigned, uint8_t* srcTypeMask, uint64_t* srcRegisterMask,
-   uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask,
-   uint8_t dstTypeMask, uint64_t dstRegisterMask)
+  (unsigned, OperandMask& srcMask,
+   OperandMask& tmpMask,
+   const OperandMask& dstMask)
   {
-    *srcTypeMask = ~0;
-    *srcRegisterMask = ~static_cast<uint64_t>(0);
+    srcMask.typeMask = ~0;
+    srcMask.registerMask = ~static_cast<uint64_t>(0);
 
-    *tmpTypeMask = 0;
-    *tmpRegisterMask = 0;
+    tmpMask.typeMask = 0;
+    tmpMask.registerMask = 0;
 
-    if (dstTypeMask & (1 << lir::MemoryOperand)) {
+    if (dstMask.typeMask & (1 << lir::MemoryOperand)) {
       // can't move directly from memory or constant to memory
-      *srcTypeMask = 1 << lir::RegisterOperand;
-      *tmpTypeMask = 1 << lir::RegisterOperand;
-      *tmpRegisterMask = GPR_MASK64;
+      srcMask.typeMask = 1 << lir::RegisterOperand;
+      tmpMask.typeMask = 1 << lir::RegisterOperand;
+      tmpMask.registerMask = GPR_MASK64;
     } else if (vfpSupported() &&
-               dstTypeMask & 1 << lir::RegisterOperand &&
-               dstRegisterMask & FPR_MASK) {
-      *srcTypeMask = *tmpTypeMask = 1 << lir::RegisterOperand |
+               dstMask.typeMask & 1 << lir::RegisterOperand &&
+               dstMask.registerMask & FPR_MASK) {
+      srcMask.typeMask = tmpMask.typeMask = 1 << lir::RegisterOperand |
                                     1 << lir::MemoryOperand;
-      *tmpRegisterMask = ~static_cast<uint64_t>(0);
+      tmpMask.registerMask = ~static_cast<uint64_t>(0);
     }
   }
 
   virtual void planSource
   (lir::TernaryOperation op,
-   unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask,
-   unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask,
+   unsigned, OperandMask& aMask,
+   unsigned bSize, OperandMask& bMask,
    unsigned, bool* thunk)
   {
-    *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
-    *aRegisterMask = GPR_MASK64;
+    aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
+    aMask.registerMask = GPR_MASK64;
 
-    *bTypeMask = (1 << lir::RegisterOperand);
-    *bRegisterMask = GPR_MASK64;
+    bMask.typeMask = (1 << lir::RegisterOperand);
+    bMask.registerMask = GPR_MASK64;
 
     *thunk = false;
 
@@ -2402,7 +2402,7 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::ShiftLeft:
     case lir::ShiftRight:
     case lir::UnsignedShiftRight:
-      if (bSize == 8) *aTypeMask = *bTypeMask = (1 << lir::RegisterOperand);
+      if (bSize == 8) aMask.typeMask = bMask.typeMask = (1 << lir::RegisterOperand);
       break;
 
     case lir::Add:
@@ -2410,7 +2410,7 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::Or:
     case lir::Xor:
     case lir::Multiply:
-      *aTypeMask = *bTypeMask = (1 << lir::RegisterOperand);
+      aMask.typeMask = bMask.typeMask = (1 << lir::RegisterOperand);
       break;
 
     case lir::Divide:
@@ -2424,8 +2424,8 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::FloatMultiply:
     case lir::FloatDivide:
       if (vfpSupported()) {
-        *aTypeMask = *bTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = *bRegisterMask = FPR_MASK64;
+        aMask.typeMask = bMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = bMask.registerMask = FPR_MASK64;
       } else {
         *thunk = true;
       }    
@@ -2442,8 +2442,8 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::JumpIfFloatLessOrEqualOrUnordered:
     case lir::JumpIfFloatGreaterOrEqualOrUnordered:
       if (vfpSupported()) {
-        *aTypeMask = *bTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = *bRegisterMask = FPR_MASK64;
+        aMask.typeMask = bMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = bMask.registerMask = FPR_MASK64;
       } else {
         *thunk = true;
       }
@@ -2456,16 +2456,16 @@ class MyArchitecture: public Assembler::Architecture {
 
   virtual void planDestination
   (lir::TernaryOperation op,
-   unsigned, uint8_t, uint64_t,
-   unsigned, uint8_t, const uint64_t bRegisterMask,
-   unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask)
+   unsigned, const OperandMask& aMask UNUSED,
+   unsigned, const OperandMask& bMask,
+   unsigned, OperandMask& cMask)
   {
     if (isBranch(op)) {
-      *cTypeMask = (1 << lir::ConstantOperand);
-      *cRegisterMask = 0;
+      cMask.typeMask = (1 << lir::ConstantOperand);
+      cMask.registerMask = 0;
     } else {
-      *cTypeMask = (1 << lir::RegisterOperand);
-      *cRegisterMask = bRegisterMask;
+      cMask.typeMask = (1 << lir::RegisterOperand);
+      cMask.registerMask = bMask.registerMask;
     }
   }
 
diff --git a/src/codegen/assembler.h b/src/codegen/assembler.h
index 21301fd71f..8322ef8128 100644
--- a/src/codegen/assembler.h
+++ b/src/codegen/assembler.h
@@ -35,6 +35,22 @@ public:
   { }
 };
 
+class OperandMask {
+public:
+  uint8_t typeMask;
+  uint64_t registerMask;
+
+  OperandMask(uint8_t typeMask, uint64_t registerMask):
+    typeMask(typeMask),
+    registerMask(registerMask)
+  { }
+
+  OperandMask():
+    typeMask(~0),
+    registerMask(~static_cast<uint64_t>(0))
+  { }
+};
+
 #ifdef AVIAN_TAILS
 const bool TailCalls = true;
 #else
@@ -120,35 +136,35 @@ class Assembler {
 
     virtual void plan
     (lir::UnaryOperation op,
-     unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
+     unsigned aSize, OperandMask& aMask,
      bool* thunk) = 0;
 
     virtual void planSource
     (lir::BinaryOperation op,
-     unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
+     unsigned aSize, OperandMask& aMask,
      unsigned bSize, bool* thunk) = 0;
      
     virtual void planDestination
     (lir::BinaryOperation op,
-     unsigned aSize, uint8_t aTypeMask, uint64_t aRegisterMask,
-     unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask) = 0;
+     unsigned aSize, const OperandMask& aMask,
+     unsigned bSize, OperandMask& bMask) = 0;
 
     virtual void planMove
-    (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask,
-     uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask,
-     uint8_t dstTypeMask, uint64_t dstRegisterMask) = 0; 
+    (unsigned size, OperandMask& src,
+     OperandMask& tmp,
+     const OperandMask& dst) = 0; 
 
     virtual void planSource
     (lir::TernaryOperation op,
-     unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
-     unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask,
+     unsigned aSize, OperandMask& aMask,
+     unsigned bSize, OperandMask& bMask,
      unsigned cSize, bool* thunk) = 0; 
 
     virtual void planDestination
     (lir::TernaryOperation op,
-     unsigned aSize, uint8_t aTypeMask, uint64_t aRegisterMask,
-     unsigned bSize, uint8_t bTypeMask, uint64_t bRegisterMask,
-     unsigned cSize, uint8_t* cTypeMask, uint64_t* cRegisterMask) = 0;
+     unsigned aSize, const OperandMask& aMask,
+     unsigned bSize, const OperandMask& bMask,
+     unsigned cSize, OperandMask& cMask) = 0;
 
     virtual Assembler* makeAssembler(vm::Allocator*, vm::Zone*) = 0;
 
diff --git a/src/codegen/compiler.cpp b/src/codegen/compiler.cpp
index 23c62a8b5e..de46f1c639 100644
--- a/src/codegen/compiler.cpp
+++ b/src/codegen/compiler.cpp
@@ -334,16 +334,13 @@ maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord,
 
     virtual unsigned cost(Context* c, SiteMask dstMask)
     {
-      uint8_t srcTypeMask;
-      uint64_t srcRegisterMask;
-      uint8_t tmpTypeMask;
-      uint64_t tmpRegisterMask;
+      OperandMask src;
+      OperandMask tmp;
       c->arch->planMove
-        (size, &srcTypeMask, &srcRegisterMask,
-         &tmpTypeMask, &tmpRegisterMask,
-         dstMask.typeMask, dstMask.registerMask);
+        (size, src, tmp,
+         OperandMask(dstMask.typeMask, dstMask.registerMask));
 
-      SiteMask srcMask(srcTypeMask, srcRegisterMask, AnyFrameIndex);
+      SiteMask srcMask(src.typeMask, src.registerMask, AnyFrameIndex);
       for (SiteIterator it(c, value, true, includeNextWord); it.hasMore();) {
         Site* s = it.next();
         if (s->match(c, srcMask) or s->match(c, dstMask)) {
@@ -359,26 +356,23 @@ maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord,
     bool includeNextWord;
   } costCalculator(value, size, includeNextWord);
 
-  Site* dst = pickTargetSite
+  Site* dstSite = pickTargetSite
     (c, read, intersectRead, registerReserveCount, &costCalculator);
 
-  uint8_t srcTypeMask;
-  uint64_t srcRegisterMask;
-  uint8_t tmpTypeMask;
-  uint64_t tmpRegisterMask;
+  OperandMask src;
+  OperandMask tmp;
   c->arch->planMove
-    (size, &srcTypeMask, &srcRegisterMask,
-     &tmpTypeMask, &tmpRegisterMask,
-     1 << dst->type(c), dst->registerMask(c));
+    (size, src, tmp,
+     OperandMask(1 << dstSite->type(c), dstSite->registerMask(c)));
 
-  SiteMask srcMask(srcTypeMask, srcRegisterMask, AnyFrameIndex);
+  SiteMask srcMask(src.typeMask, src.registerMask, AnyFrameIndex);
   unsigned cost = 0xFFFFFFFF;
-  Site* src = 0;
+  Site* srcSite = 0;
   for (SiteIterator it(c, value, true, includeNextWord); it.hasMore();) {
     Site* s = it.next();
-    unsigned v = s->copyCost(c, dst);
+    unsigned v = s->copyCost(c, dstSite);
     if (v == 0) {
-      src = s;
+      srcSite = s;
       cost = 0;
       break;
     }
@@ -386,50 +380,50 @@ maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord,
       v += CopyPenalty;
     }
     if (v < cost) {
-      src = s;
+      srcSite = s;
       cost = v;
     }
   }
  
   if (cost) {
     if (DebugMoves) {
-      char srcb[256]; src->toString(c, srcb, 256);
-      char dstb[256]; dst->toString(c, dstb, 256);
+      char srcb[256]; srcSite->toString(c, srcb, 256);
+      char dstb[256]; dstSite->toString(c, dstb, 256);
       fprintf(stderr, "maybe move %s to %s for %p to %p\n",
               srcb, dstb, value, value);
     }
 
-    src->freeze(c, value);
+    srcSite->freeze(c, value);
 
-    value->addSite(c, dst);
+    value->addSite(c, dstSite);
     
-    src->thaw(c, value);    
+    srcSite->thaw(c, value);    
 
-    if (not src->match(c, srcMask)) {
-      src->freeze(c, value);
-      dst->freeze(c, value);
+    if (not srcSite->match(c, srcMask)) {
+      srcSite->freeze(c, value);
+      dstSite->freeze(c, value);
 
-      SiteMask tmpMask(tmpTypeMask, tmpRegisterMask, AnyFrameIndex);
+      SiteMask tmpMask(tmp.typeMask, tmp.registerMask, AnyFrameIndex);
       SingleRead tmpRead(tmpMask, 0);
       tmpRead.value = value;
       tmpRead.successor_ = value;
 
-      Site* tmp = pickTargetSite(c, &tmpRead, true);
+      Site* tmpSite = pickTargetSite(c, &tmpRead, true);
 
-      value->addSite(c, tmp);
+      value->addSite(c, tmpSite);
 
-      move(c, value, src, tmp);
+      move(c, value, srcSite, tmpSite);
       
-      dst->thaw(c, value);
-      src->thaw(c, value);
+      dstSite->thaw(c, value);
+      srcSite->thaw(c, value);
 
-      src = tmp;
+      srcSite = tmpSite;
     }
 
-    move(c, value, src, dst);
+    move(c, value, srcSite, dstSite);
   }
 
-  return dst;
+  return dstSite;
 }
 
 Site*
@@ -757,145 +751,143 @@ saveLocals(Context* c, Event* e)
 
 void
 maybeMove(Context* c, lir::BinaryOperation type, unsigned srcSize,
-          unsigned srcSelectSize, Value* src, unsigned dstSize, Value* dst,
+          unsigned srcSelectSize, Value* srcValue, unsigned dstSize, Value* dstValue,
           const SiteMask& dstMask)
 {
-  Read* read = live(c, dst);
+  Read* read = live(c, dstValue);
   bool isStore = read == 0;
 
   Site* target;
-  if (dst->target) {
-    target = dst->target;
+  if (dstValue->target) {
+    target = dstValue->target;
   } else if (isStore) {
     return;
   } else {
     target = pickTargetSite(c, read);
   }
 
-  unsigned cost = src->source->copyCost(c, target);
+  unsigned cost = srcValue->source->copyCost(c, target);
 
   if (srcSelectSize < dstSize) cost = 1;
 
   if (cost) {
     // todo: let c->arch->planMove decide this:
     bool useTemporary = ((target->type(c) == lir::MemoryOperand
-                          and src->source->type(c) == lir::MemoryOperand)
+                          and srcValue->source->type(c) == lir::MemoryOperand)
                          or (srcSelectSize < dstSize
                              and target->type(c) != lir::RegisterOperand));
 
-    src->source->freeze(c, src);
+    srcValue->source->freeze(c, srcValue);
 
-    dst->addSite(c, target);
+    dstValue->addSite(c, target);
 
-    src->source->thaw(c, src);
+    srcValue->source->thaw(c, srcValue);
 
     bool addOffset = srcSize != srcSelectSize
       and c->arch->bigEndian()
-      and src->source->type(c) == lir::MemoryOperand;
+      and srcValue->source->type(c) == lir::MemoryOperand;
 
     if (addOffset) {
-      static_cast<MemorySite*>(src->source)->offset
+      static_cast<MemorySite*>(srcValue->source)->offset
         += (srcSize - srcSelectSize);
     }
 
-    target->freeze(c, dst);
+    target->freeze(c, dstValue);
 
     if (target->match(c, dstMask) and not useTemporary) {
       if (DebugMoves) {
-        char srcb[256]; src->source->toString(c, srcb, 256);
+        char srcb[256]; srcValue->source->toString(c, srcb, 256);
         char dstb[256]; target->toString(c, dstb, 256);
         fprintf(stderr, "move %s to %s for %p to %p\n",
-                srcb, dstb, src, dst);
+                srcb, dstb, srcValue, dstValue);
       }
 
-      src->source->freeze(c, src);
+      srcValue->source->freeze(c, srcValue);
 
-      apply(c, type, min(srcSelectSize, dstSize), src->source, src->source,
+      apply(c, type, min(srcSelectSize, dstSize), srcValue->source, srcValue->source,
             dstSize, target, target);
 
-      src->source->thaw(c, src);
+      srcValue->source->thaw(c, srcValue);
     } else {
       // pick a temporary register which is valid as both a
       // destination and a source for the moves we need to perform:
       
-      dst->removeSite(c, target);
+      dstValue->removeSite(c, target);
 
       bool thunk;
-      uint8_t srcTypeMask;
-      uint64_t srcRegisterMask;
+      OperandMask src;
 
-      c->arch->planSource(type, dstSize, &srcTypeMask, &srcRegisterMask,
-                          dstSize, &thunk);
+      c->arch->planSource(type, dstSize, src, dstSize, &thunk);
 
-      if (src->type == lir::ValueGeneral) {
-        srcRegisterMask &= c->regFile->generalRegisters.mask;
+      if (srcValue->type == lir::ValueGeneral) {
+        src.registerMask &= c->regFile->generalRegisters.mask;
       }
 
       assert(c, thunk == 0);
-      assert(c, dstMask.typeMask & srcTypeMask & (1 << lir::RegisterOperand));
+      assert(c, dstMask.typeMask & src.typeMask & (1 << lir::RegisterOperand));
 
       Site* tmpTarget = freeRegisterSite
-        (c, dstMask.registerMask & srcRegisterMask);
+        (c, dstMask.registerMask & src.registerMask);
 
-      src->source->freeze(c, src);
+      srcValue->source->freeze(c, srcValue);
 
-      dst->addSite(c, tmpTarget);
+      dstValue->addSite(c, tmpTarget);
 
-      tmpTarget->freeze(c, dst);
+      tmpTarget->freeze(c, dstValue);
 
       if (DebugMoves) {
-        char srcb[256]; src->source->toString(c, srcb, 256);
+        char srcb[256]; srcValue->source->toString(c, srcb, 256);
         char dstb[256]; tmpTarget->toString(c, dstb, 256);
         fprintf(stderr, "move %s to %s for %p to %p\n",
-                srcb, dstb, src, dst);
+                srcb, dstb, srcValue, dstValue);
       }
 
-      apply(c, type, srcSelectSize, src->source, src->source,
+      apply(c, type, srcSelectSize, srcValue->source, srcValue->source,
             dstSize, tmpTarget, tmpTarget);
 
-      tmpTarget->thaw(c, dst);
+      tmpTarget->thaw(c, dstValue);
 
-      src->source->thaw(c, src);
+      srcValue->source->thaw(c, srcValue);
 
       if (useTemporary or isStore) {
         if (DebugMoves) {
           char srcb[256]; tmpTarget->toString(c, srcb, 256);
           char dstb[256]; target->toString(c, dstb, 256);
           fprintf(stderr, "move %s to %s for %p to %p\n",
-                  srcb, dstb, src, dst);
+                  srcb, dstb, srcValue, dstValue);
         }
 
-        dst->addSite(c, target);
+        dstValue->addSite(c, target);
 
-        tmpTarget->freeze(c, dst);
+        tmpTarget->freeze(c, dstValue);
 
         apply(c, lir::Move, dstSize, tmpTarget, tmpTarget, dstSize, target, target);
 
-        tmpTarget->thaw(c, dst);
+        tmpTarget->thaw(c, dstValue);
 
         if (isStore) {
-          dst->removeSite(c, tmpTarget);
+          dstValue->removeSite(c, tmpTarget);
         }
       }
     }
 
-    target->thaw(c, dst);
+    target->thaw(c, dstValue);
 
     if (addOffset) {
-      static_cast<MemorySite*>(src->source)->offset
+      static_cast<MemorySite*>(srcValue->source)->offset
         -= (srcSize - srcSelectSize);
     }
   } else {
-    target = src->source;
+    target = srcValue->source;
 
     if (DebugMoves) {
       char dstb[256]; target->toString(c, dstb, 256);
-      fprintf(stderr, "null move in %s for %p to %p\n", dstb, src, dst);
+      fprintf(stderr, "null move in %s for %p to %p\n", dstb, srcValue, dstValue);
     }
   }
 
   if (isStore) {
-    dst->removeSite(c, target);
+    dstValue->removeSite(c, target);
   }
 }
 
diff --git a/src/codegen/compiler/event.cpp b/src/codegen/compiler/event.cpp
index ba8c0c222a..a785001de9 100644
--- a/src/codegen/compiler/event.cpp
+++ b/src/codegen/compiler/event.cpp
@@ -264,16 +264,15 @@ class CallEvent: public Event {
     }
 
     { bool thunk;
-      uint8_t typeMask;
-      uint64_t planRegisterMask;
+      OperandMask op;
       c->arch->plan
         ((flags & Compiler::Aligned) ? lir::AlignedCall : lir::Call, vm::TargetBytesPerWord,
-         &typeMask, &planRegisterMask, &thunk);
+         op, &thunk);
 
       assert(c, not thunk);
 
       this->addRead(c, address, SiteMask
-               (typeMask, registerMask & planRegisterMask, AnyFrameIndex));
+               (op.typeMask, registerMask & op.registerMask, AnyFrameIndex));
     }
 
     Stack* stack = stackBefore;
@@ -543,26 +542,26 @@ void appendReturn(Context* c, unsigned size, Value* value) {
 class MoveEvent: public Event {
  public:
   MoveEvent(Context* c, lir::BinaryOperation type, unsigned srcSize,
-            unsigned srcSelectSize, Value* src, unsigned dstSize, Value* dst,
+            unsigned srcSelectSize, Value* srcValue, unsigned dstSize, Value* dstValue,
             const SiteMask& srcLowMask, const SiteMask& srcHighMask):
     Event(c), type(type), srcSize(srcSize), srcSelectSize(srcSelectSize),
-    src(src), dstSize(dstSize), dst(dst)
+    srcValue(srcValue), dstSize(dstSize), dstValue(dstValue)
   {
     assert(c, srcSelectSize <= srcSize);
 
     bool noop = srcSelectSize >= dstSize;
     
     if (dstSize > vm::TargetBytesPerWord) {
-      dst->grow(c);
+      dstValue->grow(c);
     }
 
     if (srcSelectSize > vm::TargetBytesPerWord) {
-      src->maybeSplit(c);
+      srcValue->maybeSplit(c);
     }
 
-    this->addReads(c, src, srcSelectSize, srcLowMask, noop ? dst : 0,
+    this->addReads(c, srcValue, srcSelectSize, srcLowMask, noop ? dstValue : 0,
              srcHighMask,
-             noop and dstSize > vm::TargetBytesPerWord ? dst->nextWord : 0);
+             noop and dstSize > vm::TargetBytesPerWord ? dstValue->nextWord : 0);
   }
 
   virtual const char* name() {
@@ -570,118 +569,116 @@ class MoveEvent: public Event {
   }
 
   virtual void compile(Context* c) {
-    uint8_t dstTypeMask;
-    uint64_t dstRegisterMask;
+    OperandMask dst;
 
     c->arch->planDestination
       (type,
        srcSelectSize,
-       1 << src->source->type(c), 
-       (static_cast<uint64_t>(src->nextWord->source->registerMask(c)) << 32)
-       | static_cast<uint64_t>(src->source->registerMask(c)),
-       dstSize,
-       &dstTypeMask,
-       &dstRegisterMask);
+       OperandMask(
+         1 << srcValue->source->type(c), 
+         (static_cast<uint64_t>(srcValue->nextWord->source->registerMask(c)) << 32)
+            | static_cast<uint64_t>(srcValue->source->registerMask(c))),
+       dstSize, dst);
 
-    SiteMask dstLowMask(dstTypeMask, dstRegisterMask, AnyFrameIndex);
-    SiteMask dstHighMask(dstTypeMask, dstRegisterMask >> 32, AnyFrameIndex);
+    SiteMask dstLowMask(dst.typeMask, dst.registerMask, AnyFrameIndex);
+    SiteMask dstHighMask(dst.typeMask, dst.registerMask >> 32, AnyFrameIndex);
 
     if (srcSelectSize >= vm::TargetBytesPerWord
         and dstSize >= vm::TargetBytesPerWord
         and srcSelectSize >= dstSize)
     {
-      if (dst->target) {
+      if (dstValue->target) {
         if (dstSize > vm::TargetBytesPerWord) {
-          if (src->source->registerSize(c) > vm::TargetBytesPerWord) {
-            apply(c, lir::Move, srcSelectSize, src->source, src->source,
-                  dstSize, dst->target, dst->target);
+          if (srcValue->source->registerSize(c) > vm::TargetBytesPerWord) {
+            apply(c, lir::Move, srcSelectSize, srcValue->source, srcValue->source,
+                  dstSize, dstValue->target, dstValue->target);
             
-            if (live(c, dst) == 0) {
-              dst->removeSite(c, dst->target);
+            if (live(c, dstValue) == 0) {
+              dstValue->removeSite(c, dstValue->target);
               if (dstSize > vm::TargetBytesPerWord) {
-                dst->nextWord->removeSite(c, dst->nextWord->target);
+                dstValue->nextWord->removeSite(c, dstValue->nextWord->target);
               }
             }
           } else {
-            src->nextWord->source->freeze(c, src->nextWord);
+            srcValue->nextWord->source->freeze(c, srcValue->nextWord);
 
-            maybeMove(c, lir::Move, vm::TargetBytesPerWord, vm::TargetBytesPerWord, src,
-                      vm::TargetBytesPerWord, dst, dstLowMask);
+            maybeMove(c, lir::Move, vm::TargetBytesPerWord, vm::TargetBytesPerWord, srcValue,
+                      vm::TargetBytesPerWord, dstValue, dstLowMask);
 
-            src->nextWord->source->thaw(c, src->nextWord);
+            srcValue->nextWord->source->thaw(c, srcValue->nextWord);
 
             maybeMove
-              (c, lir::Move, vm::TargetBytesPerWord, vm::TargetBytesPerWord, src->nextWord,
-               vm::TargetBytesPerWord, dst->nextWord, dstHighMask);
+              (c, lir::Move, vm::TargetBytesPerWord, vm::TargetBytesPerWord, srcValue->nextWord,
+               vm::TargetBytesPerWord, dstValue->nextWord, dstHighMask);
           }
         } else {
-          maybeMove(c, lir::Move, vm::TargetBytesPerWord, vm::TargetBytesPerWord, src,
-                    vm::TargetBytesPerWord, dst, dstLowMask);
+          maybeMove(c, lir::Move, vm::TargetBytesPerWord, vm::TargetBytesPerWord, srcValue,
+                    vm::TargetBytesPerWord, dstValue, dstLowMask);
         }
       } else {
-        Site* low = pickSiteOrMove(c, src, dst, 0, 0);
+        Site* low = pickSiteOrMove(c, srcValue, dstValue, 0, 0);
         if (dstSize > vm::TargetBytesPerWord) {
-          pickSiteOrMove(c, src->nextWord, dst->nextWord, low, 1);
+          pickSiteOrMove(c, srcValue->nextWord, dstValue->nextWord, low, 1);
         }
       }
     } else if (srcSelectSize <= vm::TargetBytesPerWord
                and dstSize <= vm::TargetBytesPerWord)
     {
-      maybeMove(c, type, srcSize, srcSelectSize, src, dstSize, dst,
+      maybeMove(c, type, srcSize, srcSelectSize, srcValue, dstSize, dstValue,
                 dstLowMask);
     } else {
       assert(c, srcSize == vm::TargetBytesPerWord);
       assert(c, srcSelectSize == vm::TargetBytesPerWord);
 
-      if (dst->nextWord->target or live(c, dst->nextWord)) {
+      if (dstValue->nextWord->target or live(c, dstValue->nextWord)) {
         assert(c, dstLowMask.typeMask & (1 << lir::RegisterOperand));
 
         Site* low = freeRegisterSite(c, dstLowMask.registerMask);
 
-        src->source->freeze(c, src);
+        srcValue->source->freeze(c, srcValue);
 
-        dst->addSite(c, low);
+        dstValue->addSite(c, low);
 
-        low->freeze(c, dst);
+        low->freeze(c, dstValue);
           
         if (DebugMoves) {
-          char srcb[256]; src->source->toString(c, srcb, 256);
+          char srcb[256]; srcValue->source->toString(c, srcb, 256);
           char dstb[256]; low->toString(c, dstb, 256);
           fprintf(stderr, "move %s to %s for %p\n",
-                  srcb, dstb, src);
+                  srcb, dstb, srcValue);
         }
 
-        apply(c, lir::Move, vm::TargetBytesPerWord, src->source, src->source,
+        apply(c, lir::Move, vm::TargetBytesPerWord, srcValue->source, srcValue->source,
               vm::TargetBytesPerWord, low, low);
 
-        low->thaw(c, dst);
+        low->thaw(c, dstValue);
 
-        src->source->thaw(c, src);
+        srcValue->source->thaw(c, srcValue);
 
         assert(c, dstHighMask.typeMask & (1 << lir::RegisterOperand));
 
         Site* high = freeRegisterSite(c, dstHighMask.registerMask);
 
-        low->freeze(c, dst);
+        low->freeze(c, dstValue);
 
-        dst->nextWord->addSite(c, high);
+        dstValue->nextWord->addSite(c, high);
 
-        high->freeze(c, dst->nextWord);
+        high->freeze(c, dstValue->nextWord);
         
         if (DebugMoves) {
           char srcb[256]; low->toString(c, srcb, 256);
           char dstb[256]; high->toString(c, dstb, 256);
           fprintf(stderr, "extend %s to %s for %p %p\n",
-                  srcb, dstb, dst, dst->nextWord);
+                  srcb, dstb, dstValue, dstValue->nextWord);
         }
 
         apply(c, lir::Move, vm::TargetBytesPerWord, low, low, dstSize, low, high);
 
-        high->thaw(c, dst->nextWord);
+        high->thaw(c, dstValue->nextWord);
 
-        low->thaw(c, dst);
+        low->thaw(c, dstValue);
       } else {
-        pickSiteOrMove(c, src, dst, 0, 0);
+        pickSiteOrMove(c, srcValue, dstValue, 0, 0);
       }
     }
 
@@ -693,29 +690,28 @@ class MoveEvent: public Event {
   lir::BinaryOperation type;
   unsigned srcSize;
   unsigned srcSelectSize;
-  Value* src;
+  Value* srcValue;
   unsigned dstSize;
-  Value* dst;
+  Value* dstValue;
 };
 
 void
 appendMove(Context* c, lir::BinaryOperation type, unsigned srcSize,
-           unsigned srcSelectSize, Value* src, unsigned dstSize, Value* dst)
+           unsigned srcSelectSize, Value* srcValue, unsigned dstSize, Value* dstValue)
 {
   bool thunk;
-  uint8_t srcTypeMask;
-  uint64_t srcRegisterMask;
+  OperandMask src;
 
   c->arch->planSource
-    (type, srcSelectSize, &srcTypeMask, &srcRegisterMask, dstSize, &thunk);
+    (type, srcSelectSize, src, dstSize, &thunk);
 
   assert(c, not thunk);
 
   append(c, new(c->zone)
          MoveEvent
-         (c, type, srcSize, srcSelectSize, src, dstSize, dst,
-          SiteMask(srcTypeMask, srcRegisterMask, AnyFrameIndex),
-          SiteMask(srcTypeMask, srcRegisterMask >> 32, AnyFrameIndex)));
+         (c, type, srcSize, srcSelectSize, srcValue, dstSize, dstValue,
+          SiteMask(src.typeMask, src.registerMask, AnyFrameIndex),
+          SiteMask(src.typeMask, src.registerMask >> 32, AnyFrameIndex)));
 }
 
 
@@ -791,28 +787,28 @@ Site* getTarget(Context* c, Value* value, Value* result, const SiteMask& resultM
 class CombineEvent: public Event {
  public:
   CombineEvent(Context* c, lir::TernaryOperation type,
-               unsigned firstSize, Value* first,
-               unsigned secondSize, Value* second,
-               unsigned resultSize, Value* result,
+               unsigned firstSize, Value* firstValue,
+               unsigned secondSize, Value* secondValue,
+               unsigned resultSize, Value* resultValue,
                const SiteMask& firstLowMask,
                const SiteMask& firstHighMask,
                const SiteMask& secondLowMask,
                const SiteMask& secondHighMask):
-    Event(c), type(type), firstSize(firstSize), first(first),
-    secondSize(secondSize), second(second), resultSize(resultSize),
-    result(result)
+    Event(c), type(type), firstSize(firstSize), firstValue(firstValue),
+    secondSize(secondSize), secondValue(secondValue), resultSize(resultSize),
+    resultValue(resultValue)
   {
-    this->addReads(c, first, firstSize, firstLowMask, firstHighMask);
+    this->addReads(c, firstValue, firstSize, firstLowMask, firstHighMask);
 
     if (resultSize > vm::TargetBytesPerWord) {
-      result->grow(c);
+      resultValue->grow(c);
     }
 
     bool condensed = c->arch->alwaysCondensed(type);
 
-    this->addReads(c, second, secondSize,
-             secondLowMask, condensed ? result : 0,
-             secondHighMask, condensed ? result->nextWord : 0);
+    this->addReads(c, secondValue, secondSize,
+             secondLowMask, condensed ? resultValue : 0,
+             secondHighMask, condensed ? resultValue->nextWord : 0);
   }
 
   virtual const char* name() {
@@ -820,99 +816,99 @@ class CombineEvent: public Event {
   }
 
   virtual void compile(Context* c) {
-    assert(c, first->source->type(c) == first->nextWord->source->type(c));
+    assert(c, firstValue->source->type(c) == firstValue->nextWord->source->type(c));
 
-    // if (second->source->type(c) != second->nextWord->source->type(c)) {
+    // if (secondValue->source->type(c) != secondValue->nextWord->source->type(c)) {
     //   fprintf(stderr, "%p %p %d : %p %p %d\n",
-    //           second, second->source, second->source->type(c),
-    //           second->nextWord, second->nextWord->source,
-    //           second->nextWord->source->type(c));
+    //           secondValue, secondValue->source, secondValue->source->type(c),
+    //           secondValue->nextWord, secondValue->nextWord->source,
+    //           secondValue->nextWord->source->type(c));
     // }
 
-    assert(c, second->source->type(c) == second->nextWord->source->type(c));
+    assert(c, secondValue->source->type(c) == secondValue->nextWord->source->type(c));
     
-    freezeSource(c, firstSize, first);
+    freezeSource(c, firstSize, firstValue);
     
-    uint8_t cTypeMask;
-    uint64_t cRegisterMask;
+    OperandMask cMask;
 
     c->arch->planDestination
       (type,
        firstSize,
-       1 << first->source->type(c),
-       (static_cast<uint64_t>(first->nextWord->source->registerMask(c)) << 32)
-       | static_cast<uint64_t>(first->source->registerMask(c)),
+       OperandMask(
+          1 << firstValue->source->type(c),
+          (static_cast<uint64_t>(firstValue->nextWord->source->registerMask(c)) << 32)
+            | static_cast<uint64_t>(firstValue->source->registerMask(c))),
        secondSize,
-       1 << second->source->type(c),
-       (static_cast<uint64_t>(second->nextWord->source->registerMask(c)) << 32)
-       | static_cast<uint64_t>(second->source->registerMask(c)),
+       OperandMask(
+          1 << secondValue->source->type(c),
+          (static_cast<uint64_t>(secondValue->nextWord->source->registerMask(c)) << 32)
+            | static_cast<uint64_t>(secondValue->source->registerMask(c))),
        resultSize,
-       &cTypeMask,
-       &cRegisterMask);
+       cMask);
 
-    SiteMask resultLowMask(cTypeMask, cRegisterMask, AnyFrameIndex);
-    SiteMask resultHighMask(cTypeMask, cRegisterMask >> 32, AnyFrameIndex);
+    SiteMask resultLowMask(cMask.typeMask, cMask.registerMask, AnyFrameIndex);
+    SiteMask resultHighMask(cMask.typeMask, cMask.registerMask >> 32, AnyFrameIndex);
 
-    Site* low = getTarget(c, second, result, resultLowMask);
+    Site* low = getTarget(c, secondValue, resultValue, resultLowMask);
     unsigned lowSize = low->registerSize(c);
     Site* high
       = (resultSize > lowSize
-         ? getTarget(c, second->nextWord, result->nextWord, resultHighMask)
+         ? getTarget(c, secondValue->nextWord, resultValue->nextWord, resultHighMask)
          : low);
 
 //     fprintf(stderr, "combine %p:%p and %p:%p into %p:%p\n",
-//             first, first->nextWord,
-//             second, second->nextWord,
-//             result, result->nextWord);
+//             firstValue, firstValue->nextWord,
+//             secondValue, secondValue->nextWord,
+//             resultValue, resultValue->nextWord);
 
     apply(c, type,
-          firstSize, first->source, first->nextWord->source,
-          secondSize, second->source, second->nextWord->source,
+          firstSize, firstValue->source, firstValue->nextWord->source,
+          secondSize, secondValue->source, secondValue->nextWord->source,
           resultSize, low, high);
 
-    thawSource(c, firstSize, first);
+    thawSource(c, firstSize, firstValue);
 
     for (Read* r = reads; r; r = r->eventNext) {
       popRead(c, this, r->value);
     }
 
-    low->thaw(c, second);
+    low->thaw(c, secondValue);
     if (resultSize > lowSize) {
-      high->thaw(c, second->nextWord);
+      high->thaw(c, secondValue->nextWord);
     }
 
-    if (live(c, result)) {
-      result->addSite(c, low);
-      if (resultSize > lowSize and live(c, result->nextWord)) {
-        result->nextWord->addSite(c, high);
+    if (live(c, resultValue)) {
+      resultValue->addSite(c, low);
+      if (resultSize > lowSize and live(c, resultValue->nextWord)) {
+        resultValue->nextWord->addSite(c, high);
       }
     }
   }
 
   lir::TernaryOperation type;
   unsigned firstSize;
-  Value* first;
+  Value* firstValue;
   unsigned secondSize;
-  Value* second;
+  Value* secondValue;
   unsigned resultSize;
-  Value* result;
+  Value* resultValue;
 };
 
 void
 appendCombine(Context* c, lir::TernaryOperation type,
-              unsigned firstSize, Value* first,
-              unsigned secondSize, Value* second,
-              unsigned resultSize, Value* result)
+              unsigned firstSize, Value* firstValue,
+              unsigned secondSize, Value* secondValue,
+              unsigned resultSize, Value* resultValue)
 {
   bool thunk;
-  uint8_t firstTypeMask;
-  uint64_t firstRegisterMask;
-  uint8_t secondTypeMask;
-  uint64_t secondRegisterMask;
+  OperandMask firstMask;
+  OperandMask secondMask;
 
-  c->arch->planSource(type, firstSize, &firstTypeMask, &firstRegisterMask,
-                      secondSize, &secondTypeMask, &secondRegisterMask,
-                      resultSize, &thunk);
+  c->arch->planSource(type,
+                      firstSize, firstMask,
+                      secondSize, secondMask,
+                      resultSize,
+                      &thunk);
 
   if (thunk) {
     Stack* oldStack = c->stack;
@@ -924,8 +920,8 @@ appendCombine(Context* c, lir::TernaryOperation type,
     unsigned stackSize = vm::ceilingDivide(secondSize, vm::TargetBytesPerWord)
       + vm::ceilingDivide(firstSize, vm::TargetBytesPerWord);
 
-    compiler::push(c, vm::ceilingDivide(secondSize, vm::TargetBytesPerWord), second);
-    compiler::push(c, vm::ceilingDivide(firstSize, vm::TargetBytesPerWord), first);
+    compiler::push(c, vm::ceilingDivide(secondSize, vm::TargetBytesPerWord), secondValue);
+    compiler::push(c, vm::ceilingDivide(firstSize, vm::TargetBytesPerWord), firstValue);
 
     if (threadParameter) {
       ++ stackSize;
@@ -937,40 +933,40 @@ appendCombine(Context* c, lir::TernaryOperation type,
     c->stack = oldStack;
 
     appendCall
-      (c, value(c, lir::ValueGeneral, constantSite(c, handler)), 0, 0, result,
+      (c, value(c, lir::ValueGeneral, constantSite(c, handler)), 0, 0, resultValue,
        resultSize, argumentStack, stackSize, 0);
   } else {
     append
       (c, new(c->zone)
        CombineEvent
        (c, type,
-        firstSize, first,
-        secondSize, second,
-        resultSize, result,
-        SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex),
-        SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex),
-        SiteMask(secondTypeMask, secondRegisterMask, AnyFrameIndex),
-        SiteMask(secondTypeMask, secondRegisterMask >> 32, AnyFrameIndex)));
+        firstSize, firstValue,
+        secondSize, secondValue,
+        resultSize, resultValue,
+        SiteMask(firstMask.typeMask, firstMask.registerMask, AnyFrameIndex),
+        SiteMask(firstMask.typeMask, firstMask.registerMask >> 32, AnyFrameIndex),
+        SiteMask(secondMask.typeMask, secondMask.registerMask, AnyFrameIndex),
+        SiteMask(secondMask.typeMask, secondMask.registerMask >> 32, AnyFrameIndex)));
   }
 }
 
 class TranslateEvent: public Event {
  public:
   TranslateEvent(Context* c, lir::BinaryOperation type, unsigned valueSize,
-                 Value* value, unsigned resultSize, Value* result,
+                 Value* value, unsigned resultSize, Value* resultValue,
                  const SiteMask& valueLowMask,
                  const SiteMask& valueHighMask):
     Event(c), type(type), valueSize(valueSize), resultSize(resultSize),
-    value(value), result(result)
+    value(value), resultValue(resultValue)
   {
     bool condensed = c->arch->alwaysCondensed(type);
 
     if (resultSize > vm::TargetBytesPerWord) {
-      result->grow(c);
+      resultValue->grow(c);
     }
 
-    this->addReads(c, value, valueSize, valueLowMask, condensed ? result : 0,
-             valueHighMask, condensed ? result->nextWord : 0);
+    this->addReads(c, value, valueSize, valueLowMask, condensed ? resultValue : 0,
+             valueHighMask, condensed ? resultValue->nextWord : 0);
   }
 
   virtual const char* name() {
@@ -980,27 +976,26 @@ class TranslateEvent: public Event {
   virtual void compile(Context* c) {
     assert(c, value->source->type(c) == value->nextWord->source->type(c));
 
-    uint8_t bTypeMask;
-    uint64_t bRegisterMask;
+    OperandMask bMask;
     
     c->arch->planDestination
       (type,
        valueSize,
-       1 << value->source->type(c),
-       (static_cast<uint64_t>(value->nextWord->source->registerMask(c)) << 32)
-       | static_cast<uint64_t>(value->source->registerMask(c)),
+       OperandMask(
+          1 << value->source->type(c),
+          (static_cast<uint64_t>(value->nextWord->source->registerMask(c)) << 32)
+            | static_cast<uint64_t>(value->source->registerMask(c))),
        resultSize,
-       &bTypeMask,
-       &bRegisterMask);
+       bMask);
 
-    SiteMask resultLowMask(bTypeMask, bRegisterMask, AnyFrameIndex);
-    SiteMask resultHighMask(bTypeMask, bRegisterMask >> 32, AnyFrameIndex);
+    SiteMask resultLowMask(bMask.typeMask, bMask.registerMask, AnyFrameIndex);
+    SiteMask resultHighMask(bMask.typeMask, bMask.registerMask >> 32, AnyFrameIndex);
     
-    Site* low = getTarget(c, value, result, resultLowMask);
+    Site* low = getTarget(c, value, resultValue, resultLowMask);
     unsigned lowSize = low->registerSize(c);
     Site* high
       = (resultSize > lowSize
-         ? getTarget(c, value->nextWord, result->nextWord, resultHighMask)
+         ? getTarget(c, value->nextWord, resultValue->nextWord, resultHighMask)
          : low);
 
     apply(c, type, valueSize, value->source, value->nextWord->source,
@@ -1015,10 +1010,10 @@ class TranslateEvent: public Event {
       high->thaw(c, value->nextWord);
     }
 
-    if (live(c, result)) {
-      result->addSite(c, low);
-      if (resultSize > lowSize and live(c, result->nextWord)) {
-        result->nextWord->addSite(c, high);
+    if (live(c, resultValue)) {
+      resultValue->addSite(c, low);
+      if (resultSize > lowSize and live(c, resultValue->nextWord)) {
+        resultValue->nextWord->addSite(c, high);
       }
     }
   }
@@ -1027,7 +1022,7 @@ class TranslateEvent: public Event {
   unsigned valueSize;
   unsigned resultSize;
   Value* value;
-  Value* result;
+  Value* resultValue;
   Read* resultRead;
   SiteMask resultLowMask;
   SiteMask resultHighMask;
@@ -1035,19 +1030,18 @@ class TranslateEvent: public Event {
 
 void
 appendTranslate(Context* c, lir::BinaryOperation type, unsigned firstSize,
-                Value* first, unsigned resultSize, Value* result)
+                Value* firstValue, unsigned resultSize, Value* resultValue)
 {
   bool thunk;
-  uint8_t firstTypeMask;
-  uint64_t firstRegisterMask;
+  OperandMask first;
 
-  c->arch->planSource(type, firstSize, &firstTypeMask, &firstRegisterMask,
+  c->arch->planSource(type, firstSize, first,
                 resultSize, &thunk);
 
   if (thunk) {
     Stack* oldStack = c->stack;
 
-    compiler::push(c, vm::ceilingDivide(firstSize, vm::TargetBytesPerWord), first);
+    compiler::push(c, vm::ceilingDivide(firstSize, vm::TargetBytesPerWord), firstValue);
 
     Stack* argumentStack = c->stack;
     c->stack = oldStack;
@@ -1056,14 +1050,14 @@ appendTranslate(Context* c, lir::BinaryOperation type, unsigned firstSize,
       (c, value
        (c, lir::ValueGeneral, constantSite
         (c, c->client->getThunk(type, firstSize, resultSize))),
-       0, 0, result, resultSize, argumentStack,
+       0, 0, resultValue, resultSize, argumentStack,
        vm::ceilingDivide(firstSize, vm::TargetBytesPerWord), 0);
   } else {
     append(c, new(c->zone)
            TranslateEvent
-           (c, type, firstSize, first, resultSize, result,
-            SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex),
-            SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex)));
+           (c, type, firstSize, firstValue, resultSize, resultValue,
+            SiteMask(first.typeMask, first.registerMask, AnyFrameIndex),
+            SiteMask(first.typeMask, first.registerMask >> 32, AnyFrameIndex)));
   }
 }
 
@@ -1306,23 +1300,24 @@ thunkBranch(Context* c, lir::TernaryOperation type)
 class BranchEvent: public Event {
  public:
   BranchEvent(Context* c, lir::TernaryOperation type, unsigned size,
-              Value* first, Value* second, Value* address,
+              Value* firstValue, Value* secondValue, Value* addressValue,
               const SiteMask& firstLowMask,
               const SiteMask& firstHighMask,
               const SiteMask& secondLowMask,
               const SiteMask& secondHighMask):
-    Event(c), type(type), size(size), first(first), second(second),
-    address(address)
+    Event(c), type(type), size(size), firstValue(firstValue), secondValue(secondValue),
+    addressValue(addressValue)
   {
-    this->addReads(c, first, size, firstLowMask, firstHighMask);
-    this->addReads(c, second, size, secondLowMask, secondHighMask);
+    this->addReads(c, firstValue, size, firstLowMask, firstHighMask);
+    this->addReads(c, secondValue, size, secondLowMask, secondHighMask);
 
-    uint8_t typeMask;
-    uint64_t registerMask;
-    c->arch->planDestination(type, size, 0, 0, size, 0, 0, vm::TargetBytesPerWord,
-                             &typeMask, &registerMask);
+    OperandMask dstMask;
+    c->arch->planDestination(type,
+      size, OperandMask(0, 0),
+      size, OperandMask(0, 0),
+      vm::TargetBytesPerWord, dstMask);
 
-    this->addRead(c, address, SiteMask(typeMask, registerMask, AnyFrameIndex));
+    this->addRead(c, addressValue, SiteMask(dstMask.typeMask, dstMask.registerMask, AnyFrameIndex));
   }
 
   virtual const char* name() {
@@ -1330,8 +1325,8 @@ class BranchEvent: public Event {
   }
 
   virtual void compile(Context* c) {
-    ConstantSite* firstConstant = findConstantSite(c, first);
-    ConstantSite* secondConstant = findConstantSite(c, second);
+    ConstantSite* firstConstant = findConstantSite(c, firstValue);
+    ConstantSite* secondConstant = findConstantSite(c, secondValue);
 
     if (not this->isUnreachable()) {
       if (firstConstant
@@ -1339,31 +1334,31 @@ class BranchEvent: public Event {
           and firstConstant->value->resolved()
           and secondConstant->value->resolved())
       {
-        int64_t firstValue = firstConstant->value->value();
-        int64_t secondValue = secondConstant->value->value();
+        int64_t firstConstVal = firstConstant->value->value();
+        int64_t secondConstVal = secondConstant->value->value();
 
         if (size > vm::TargetBytesPerWord) {
-          firstValue |= findConstantSite
-            (c, first->nextWord)->value->value() << 32;
-          secondValue |= findConstantSite
-            (c, second->nextWord)->value->value() << 32;
+          firstConstVal |= findConstantSite
+            (c, firstValue->nextWord)->value->value() << 32;
+          secondConstVal |= findConstantSite
+            (c, secondValue->nextWord)->value->value() << 32;
         }
 
-        if (shouldJump(c, type, size, firstValue, secondValue)) {
-          apply(c, lir::Jump, vm::TargetBytesPerWord, address->source, address->source);
+        if (shouldJump(c, type, size, firstConstVal, secondConstVal)) {
+          apply(c, lir::Jump, vm::TargetBytesPerWord, addressValue->source, addressValue->source);
         }      
       } else {
-        freezeSource(c, size, first);
-        freezeSource(c, size, second);
-        freezeSource(c, vm::TargetBytesPerWord, address);
+        freezeSource(c, size, firstValue);
+        freezeSource(c, size, secondValue);
+        freezeSource(c, vm::TargetBytesPerWord, addressValue);
 
-        apply(c, type, size, first->source, first->nextWord->source,
-              size, second->source, second->nextWord->source,
-              vm::TargetBytesPerWord, address->source, address->source);
+        apply(c, type, size, firstValue->source, firstValue->nextWord->source,
+              size, secondValue->source, secondValue->nextWord->source,
+              vm::TargetBytesPerWord, addressValue->source, addressValue->source);
 
-        thawSource(c, vm::TargetBytesPerWord, address);
-        thawSource(c, size, second);
-        thawSource(c, size, first);
+        thawSource(c, vm::TargetBytesPerWord, addressValue);
+        thawSource(c, size, secondValue);
+        thawSource(c, size, firstValue);
       }
     }
 
@@ -1376,24 +1371,23 @@ class BranchEvent: public Event {
 
   lir::TernaryOperation type;
   unsigned size;
-  Value* first;
-  Value* second;
-  Value* address;
+  Value* firstValue;
+  Value* secondValue;
+  Value* addressValue;
 };
 
 void
-appendBranch(Context* c, lir::TernaryOperation type, unsigned size, Value* first,
-             Value* second, Value* address)
+appendBranch(Context* c, lir::TernaryOperation type, unsigned size, Value* firstValue,
+             Value* secondValue, Value* addressValue)
 {
   bool thunk;
-  uint8_t firstTypeMask;
-  uint64_t firstRegisterMask;
-  uint8_t secondTypeMask;
-  uint64_t secondRegisterMask;
+  OperandMask firstMask;
+  OperandMask secondMask;
 
-  c->arch->planSource(type, size, &firstTypeMask, &firstRegisterMask,
-                      size, &secondTypeMask, &secondRegisterMask,
-                      vm::TargetBytesPerWord, &thunk);
+  c->arch->planSource(type,
+    size, firstMask,
+    size, secondMask,
+    vm::TargetBytesPerWord, &thunk);
 
   if (thunk) {
     Stack* oldStack = c->stack;
@@ -1404,8 +1398,8 @@ appendBranch(Context* c, lir::TernaryOperation type, unsigned size, Value* first
 
     assert(c, not threadParameter);
 
-    compiler::push(c, vm::ceilingDivide(size, vm::TargetBytesPerWord), second);
-    compiler::push(c, vm::ceilingDivide(size, vm::TargetBytesPerWord), first);
+    compiler::push(c, vm::ceilingDivide(size, vm::TargetBytesPerWord), secondValue);
+    compiler::push(c, vm::ceilingDivide(size, vm::TargetBytesPerWord), firstValue);
 
     Stack* argumentStack = c->stack;
     c->stack = oldStack;
@@ -1418,16 +1412,16 @@ appendBranch(Context* c, lir::TernaryOperation type, unsigned size, Value* first
 
     appendBranch(c, thunkBranch(c, type), 4, value
                  (c, lir::ValueGeneral, constantSite(c, static_cast<int64_t>(0))),
-                 result, address);
+                 result, addressValue);
   } else {
     append
       (c, new(c->zone)
        BranchEvent
-       (c, type, size, first, second, address,
-        SiteMask(firstTypeMask, firstRegisterMask, AnyFrameIndex),
-        SiteMask(firstTypeMask, firstRegisterMask >> 32, AnyFrameIndex),
-        SiteMask(secondTypeMask, secondRegisterMask, AnyFrameIndex),
-        SiteMask(secondTypeMask, secondRegisterMask >> 32, AnyFrameIndex)));
+       (c, type, size, firstValue, secondValue, addressValue,
+        SiteMask(firstMask.typeMask, firstMask.registerMask, AnyFrameIndex),
+        SiteMask(firstMask.typeMask, firstMask.registerMask >> 32, AnyFrameIndex),
+        SiteMask(secondMask.typeMask, secondMask.registerMask, AnyFrameIndex),
+        SiteMask(secondMask.typeMask, secondMask.registerMask >> 32, AnyFrameIndex)));
   }
 }
 
@@ -1475,13 +1469,12 @@ class JumpEvent: public Event {
     cleanLocals(cleanLocals)
   {
     bool thunk;
-    uint8_t typeMask;
-    uint64_t registerMask;
-    c->arch->plan(type, vm::TargetBytesPerWord, &typeMask, &registerMask, &thunk);
+    OperandMask mask;
+    c->arch->plan(type, vm::TargetBytesPerWord, mask, &thunk);
 
     assert(c, not thunk);
 
-    this->addRead(c, address, SiteMask(typeMask, registerMask, AnyFrameIndex));
+    this->addRead(c, address, SiteMask(mask.typeMask, mask.registerMask, AnyFrameIndex));
   }
 
   virtual const char* name() {
diff --git a/src/codegen/compiler/site.h b/src/codegen/compiler/site.h
index 1360e8959f..bf2cef34b6 100644
--- a/src/codegen/compiler/site.h
+++ b/src/codegen/compiler/site.h
@@ -40,6 +40,14 @@ class SiteMask {
     return SiteMask(1 << lir::RegisterOperand, 1 << number, NoFrameIndex);
   }
 
+  static SiteMask lowPart(const OperandMask& mask) {
+    return SiteMask(mask.typeMask, mask.registerMask, AnyFrameIndex);
+  }
+
+  static SiteMask highPart(const OperandMask& mask) {
+    return SiteMask(mask.typeMask, mask.registerMask >> 32, AnyFrameIndex);
+  }
+
   uint8_t typeMask;
   uint32_t registerMask;
   int frameIndex;
diff --git a/src/codegen/powerpc/assembler.cpp b/src/codegen/powerpc/assembler.cpp
index 426891d475..97e5cec19d 100644
--- a/src/codegen/powerpc/assembler.cpp
+++ b/src/codegen/powerpc/assembler.cpp
@@ -2258,27 +2258,27 @@ class MyArchitecture: public Assembler::Architecture {
   
   virtual void plan
   (lir::UnaryOperation,
-   unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask,
+   unsigned, OperandMask& aMask,
    bool* thunk)
   {
-    *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
-    *aRegisterMask = ~static_cast<uint64_t>(0);
+    aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
+    aMask.registerMask = ~static_cast<uint64_t>(0);
     *thunk = false;
   }
 
   virtual void planSource
   (lir::BinaryOperation op,
-   unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask,
+   unsigned, OperandMask& aMask,
    unsigned, bool* thunk)
   {
-    *aTypeMask = ~0;
-    *aRegisterMask = ~static_cast<uint64_t>(0);
+    aMask.typeMask = ~0;
+    aMask.registerMask = ~static_cast<uint64_t>(0);
 
     *thunk = false;
 
     switch (op) {
     case lir::Negate:
-      *aTypeMask = (1 << lir::RegisterOperand);
+      aMask.typeMask = (1 << lir::RegisterOperand);
       break;
 
     case lir::Absolute:
@@ -2298,15 +2298,15 @@ class MyArchitecture: public Assembler::Architecture {
   
   virtual void planDestination
   (lir::BinaryOperation op,
-   unsigned, uint8_t, uint64_t,
-   unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask)
+   unsigned, const OperandMask& aMask UNUSED,
+   unsigned, OperandMask& bMask)
   {
-    *bTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
-    *bRegisterMask = ~static_cast<uint64_t>(0);
+    bMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+    bMask.registerMask = ~static_cast<uint64_t>(0);
 
     switch (op) {
     case lir::Negate:
-      *bTypeMask = (1 << lir::RegisterOperand);
+      bMask.typeMask = (1 << lir::RegisterOperand);
       break;
 
     default:
@@ -2315,35 +2315,35 @@ class MyArchitecture: public Assembler::Architecture {
   }
 
   virtual void planMove
-  (unsigned, uint8_t* srcTypeMask, uint64_t* srcRegisterMask,
-   uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask,
-   uint8_t dstTypeMask, uint64_t)
+  (unsigned, OperandMask& srcMask,
+   OperandMask& tmpMask,
+   const OperandMask& dstMask)
   {
-    *srcTypeMask = ~0;
-    *srcRegisterMask = ~static_cast<uint64_t>(0);
+    srcMask.typeMask = ~0;
+    srcMask.registerMask = ~static_cast<uint64_t>(0);
 
-    *tmpTypeMask = 0;
-    *tmpRegisterMask = 0;
+    tmpMask.typeMask = 0;
+    tmpMask.registerMask = 0;
 
-    if (dstTypeMask & (1 << lir::MemoryOperand)) {
+    if (dstMask.typeMask & (1 << lir::MemoryOperand)) {
       // can't move directly from memory or constant to memory
-      *srcTypeMask = 1 << lir::RegisterOperand;
-      *tmpTypeMask = 1 << lir::RegisterOperand;
-      *tmpRegisterMask = ~static_cast<uint64_t>(0);
+      srcMask.typeMask = 1 << lir::RegisterOperand;
+      tmpMask.typeMask = 1 << lir::RegisterOperand;
+      tmpMask.registerMask = ~static_cast<uint64_t>(0);
     }
   }
 
   virtual void planSource
   (lir::TernaryOperation op,
-   unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
-   unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask,
+   unsigned aSize, OperandMask& aMask,
+   unsigned, OperandMask& bMask,
    unsigned, bool* thunk)
   {
-    *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
-    *aRegisterMask = ~static_cast<uint64_t>(0);
+    aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
+    aMask.registerMask = ~static_cast<uint64_t>(0);
 
-    *bTypeMask = (1 << lir::RegisterOperand);
-    *bRegisterMask = ~static_cast<uint64_t>(0);
+    bMask.typeMask = (1 << lir::RegisterOperand);
+    bMask.registerMask = ~static_cast<uint64_t>(0);
 
     *thunk = false;
 
@@ -2351,12 +2351,12 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::Add:
     case lir::Subtract:
       if (aSize == 8) {
-        *aTypeMask = *bTypeMask = (1 << lir::RegisterOperand);
+        aMask.typeMask = bMask.typeMask = (1 << lir::RegisterOperand);
       }
       break;
 
     case lir::Multiply:
-      *aTypeMask = *bTypeMask = (1 << lir::RegisterOperand);
+      aMask.typeMask = bMask.typeMask = (1 << lir::RegisterOperand);
       break;
 
     case lir::Divide:
@@ -2370,7 +2370,7 @@ class MyArchitecture: public Assembler::Architecture {
       if (true) {//if (TargetBytesPerWord == 4 and aSize == 8) {
         *thunk = true;        
       } else {
-        *aTypeMask = (1 << lir::RegisterOperand);
+        aMask.typeMask = (1 << lir::RegisterOperand);
       }
       break;
 
@@ -2399,16 +2399,16 @@ class MyArchitecture: public Assembler::Architecture {
 
   virtual void planDestination
   (lir::TernaryOperation op,
-   unsigned, uint8_t, uint64_t,
-   unsigned, uint8_t, const uint64_t,
-   unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask)
+   unsigned, const OperandMask& aMask UNUSED,
+   unsigned, const OperandMask& bMask UNUSED,
+   unsigned, OperandMask& cMask)
   {
     if (isBranch(op)) {
-      *cTypeMask = (1 << lir::ConstantOperand);
-      *cRegisterMask = 0;
+      cMask.typeMask = (1 << lir::ConstantOperand);
+      cMask.registerMask = 0;
     } else {
-      *cTypeMask = (1 << lir::RegisterOperand);
-      *cRegisterMask = ~static_cast<uint64_t>(0);
+      cMask.typeMask = (1 << lir::RegisterOperand);
+      cMask.registerMask = ~static_cast<uint64_t>(0);
     }
   }
 
diff --git a/src/codegen/x86/assembler.cpp b/src/codegen/x86/assembler.cpp
index 5e3fa6aa3c..33b7ce168d 100644
--- a/src/codegen/x86/assembler.cpp
+++ b/src/codegen/x86/assembler.cpp
@@ -2992,37 +2992,34 @@ class MyArchitecture: public Assembler::Architecture {
 
   virtual void plan
   (lir::UnaryOperation,
-   unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask,
+   unsigned, OperandMask& aMask,
    bool* thunk)
   {
-    *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand)
-      | (1 << lir::ConstantOperand);
-    *aRegisterMask = ~static_cast<uint64_t>(0);
+    aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand) | (1 << lir::ConstantOperand);
     *thunk = false;
   }
 
   virtual void planSource
   (lir::BinaryOperation op,
-   unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask,
+   unsigned aSize, OperandMask& aMask,
    unsigned bSize, bool* thunk)
   {
-    *aTypeMask = ~0;
-    *aRegisterMask = GeneralRegisterMask |
+    aMask.registerMask = GeneralRegisterMask |
       (static_cast<uint64_t>(GeneralRegisterMask) << 32);
 
     *thunk = false;
 
     switch (op) {
     case lir::Negate:
-      *aTypeMask = (1 << lir::RegisterOperand);
-      *aRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32))
+      aMask.typeMask = (1 << lir::RegisterOperand);
+      aMask.registerMask = (static_cast<uint64_t>(1) << (rdx + 32))
         | (static_cast<uint64_t>(1) << rax);
       break;
 
     case lir::Absolute:
       if (aSize <= TargetBytesPerWord) {
-        *aTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = (static_cast<uint64_t>(1) << rax);
+        aMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = (static_cast<uint64_t>(1) << rax);
       } else {
         *thunk = true;
       }
@@ -3030,8 +3027,8 @@ class MyArchitecture: public Assembler::Architecture {
 
     case lir::FloatAbsolute:
       if (useSSE(&c)) {
-        *aTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
+        aMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
           | FloatRegisterMask;
       } else {
         *thunk = true;
@@ -3041,8 +3038,8 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::FloatNegate:
       // floatNegateRR does not support doubles
       if (useSSE(&c) and aSize == 4 and bSize == 4) {
-        *aTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = FloatRegisterMask;
+        aMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = FloatRegisterMask;
       } else {
         *thunk = true;
       }
@@ -3050,8 +3047,8 @@ class MyArchitecture: public Assembler::Architecture {
 
     case lir::FloatSquareRoot:
       if (useSSE(&c)) {
-        *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
-        *aRegisterMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
+        aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+        aMask.registerMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
           | FloatRegisterMask;
       } else {
         *thunk = true;
@@ -3060,8 +3057,8 @@ class MyArchitecture: public Assembler::Architecture {
 
     case lir::Float2Float:
       if (useSSE(&c)) {
-        *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
-        *aRegisterMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
+        aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+        aMask.registerMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
           | FloatRegisterMask;
       } else {
         *thunk = true;
@@ -3074,8 +3071,8 @@ class MyArchitecture: public Assembler::Architecture {
       // thunks or produce inline machine code which handles edge
       // cases properly.
       if (false and useSSE(&c) and bSize <= TargetBytesPerWord) {
-        *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
-        *aRegisterMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
+        aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+        aMask.registerMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
           | FloatRegisterMask;
       } else {
         *thunk = true;
@@ -3084,8 +3081,8 @@ class MyArchitecture: public Assembler::Architecture {
 
     case lir::Int2Float:
       if (useSSE(&c) and aSize <= TargetBytesPerWord) {
-        *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
-        *aRegisterMask = GeneralRegisterMask
+        aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+        aMask.registerMask = GeneralRegisterMask
           | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
       } else {
         *thunk = true;
@@ -3093,20 +3090,20 @@ class MyArchitecture: public Assembler::Architecture {
       break;
 
     case lir::Move:
-      *aTypeMask = ~0;
-      *aRegisterMask = ~static_cast<uint64_t>(0);
+      aMask.typeMask = ~0;
+      aMask.registerMask = ~static_cast<uint64_t>(0);
 
       if (TargetBytesPerWord == 4) {
         if (aSize == 4 and bSize == 8) {
-          *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+          aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
           const uint32_t mask
             = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
-          *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;    
+          aMask.registerMask = (static_cast<uint64_t>(mask) << 32) | mask;    
         } else if (aSize == 1 or bSize == 1) {
-          *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+          aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
           const uint32_t mask
             = (1 << rax) | (1 << rcx) | (1 << rdx) | (1 << rbx);
-          *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;     
+          aMask.registerMask = (static_cast<uint64_t>(mask) << 32) | mask;     
         }
       }
       break;
@@ -3117,69 +3114,69 @@ class MyArchitecture: public Assembler::Architecture {
   }
 
   virtual void planDestination
-  (lir::BinaryOperation op, unsigned aSize, uint8_t aTypeMask,
-   uint64_t aRegisterMask, unsigned bSize, uint8_t* bTypeMask,
-   uint64_t* bRegisterMask)
+  (lir::BinaryOperation op,
+   unsigned aSize, const OperandMask& aMask,
+   unsigned bSize, OperandMask& bMask)
   {
-    *bTypeMask = ~0;
-    *bRegisterMask = GeneralRegisterMask
+    bMask.typeMask = ~0;
+    bMask.registerMask = GeneralRegisterMask
       | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
 
     switch (op) {
     case lir::Absolute:
-      *bTypeMask = (1 << lir::RegisterOperand);
-      *bRegisterMask = (static_cast<uint64_t>(1) << rax);
+      bMask.typeMask = (1 << lir::RegisterOperand);
+      bMask.registerMask = (static_cast<uint64_t>(1) << rax);
       break;
 
     case lir::FloatAbsolute:
-      *bTypeMask = (1 << lir::RegisterOperand);
-      *bRegisterMask = aRegisterMask;
+      bMask.typeMask = (1 << lir::RegisterOperand);
+      bMask.registerMask = aMask.registerMask;
       break;
 
     case lir::Negate:
-      *bTypeMask = (1 << lir::RegisterOperand);
-      *bRegisterMask = aRegisterMask;
+      bMask.typeMask = (1 << lir::RegisterOperand);
+      bMask.registerMask = aMask.registerMask;
       break;
 
     case lir::FloatNegate:
     case lir::FloatSquareRoot:
     case lir::Float2Float:
     case lir::Int2Float:
-      *bTypeMask = (1 << lir::RegisterOperand);
-      *bRegisterMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
+      bMask.typeMask = (1 << lir::RegisterOperand);
+      bMask.registerMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
         | FloatRegisterMask;
       break;
 
     case lir::Float2Int:
-      *bTypeMask = (1 << lir::RegisterOperand);
+      bMask.typeMask = (1 << lir::RegisterOperand);
       break;
 
     case lir::Move:
-      if (aTypeMask & ((1 << lir::MemoryOperand) | 1 << lir::AddressOperand)) {
-        *bTypeMask = (1 << lir::RegisterOperand);
-        *bRegisterMask = GeneralRegisterMask
+      if (aMask.typeMask & ((1 << lir::MemoryOperand) | 1 << lir::AddressOperand)) {
+        bMask.typeMask = (1 << lir::RegisterOperand);
+        bMask.registerMask = GeneralRegisterMask
           | (static_cast<uint64_t>(GeneralRegisterMask) << 32)
           | FloatRegisterMask;
-      } else if (aTypeMask & (1 << lir::RegisterOperand)) {
-        *bTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
-        if (aRegisterMask & FloatRegisterMask) {
-          *bRegisterMask = FloatRegisterMask;          
+      } else if (aMask.typeMask & (1 << lir::RegisterOperand)) {
+        bMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+        if (aMask.registerMask & FloatRegisterMask) {
+          bMask.registerMask = FloatRegisterMask;          
         } else {
-          *bRegisterMask = GeneralRegisterMask
+          bMask.registerMask = GeneralRegisterMask
             | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
         }
       } else {
-        *bTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+        bMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
       }
 
       if (TargetBytesPerWord == 4) {
         if (aSize == 4 and bSize == 8) {
-          *bRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32))
+          bMask.registerMask = (static_cast<uint64_t>(1) << (rdx + 32))
             | (static_cast<uint64_t>(1) << rax);
         } else if (aSize == 1 or bSize == 1) {
           const uint32_t mask
             = (1 << rax) | (1 << rcx) | (1 << rdx) | (1 << rbx);
-          *bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
+          bMask.registerMask = (static_cast<uint64_t>(mask) << 32) | mask;
         }
       }
       break;
@@ -3190,44 +3187,44 @@ class MyArchitecture: public Assembler::Architecture {
   }
 
   virtual void planMove
-  (unsigned size, uint8_t* srcTypeMask, uint64_t* srcRegisterMask,
-   uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask,
-   uint8_t dstTypeMask, uint64_t dstRegisterMask)
+  (unsigned size, OperandMask& srcMask,
+   OperandMask& tmpMask,
+   const OperandMask& dstMask)
   {
-    *srcTypeMask = ~0;
-    *srcRegisterMask = ~static_cast<uint64_t>(0);
+    srcMask.typeMask = ~0;
+    srcMask.registerMask = ~static_cast<uint64_t>(0);
 
-    *tmpTypeMask = 0;
-    *tmpRegisterMask = 0;
+    tmpMask.typeMask = 0;
+    tmpMask.registerMask = 0;
 
-    if (dstTypeMask & (1 << lir::MemoryOperand)) {
+    if (dstMask.typeMask & (1 << lir::MemoryOperand)) {
       // can't move directly from memory to memory
-      *srcTypeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
-      *tmpTypeMask = 1 << lir::RegisterOperand;
-      *tmpRegisterMask = GeneralRegisterMask
+      srcMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
+      tmpMask.typeMask = 1 << lir::RegisterOperand;
+      tmpMask.registerMask = GeneralRegisterMask
         | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
-    } else if (dstTypeMask & (1 << lir::RegisterOperand)) {
+    } else if (dstMask.typeMask & (1 << lir::RegisterOperand)) {
       if (size > TargetBytesPerWord) {
         // can't move directly from FPR to GPR or vice-versa for
         // values larger than the GPR size
-        if (dstRegisterMask & FloatRegisterMask) {
-          *srcRegisterMask = FloatRegisterMask
+        if (dstMask.registerMask & FloatRegisterMask) {
+          srcMask.registerMask = FloatRegisterMask
             | (static_cast<uint64_t>(FloatRegisterMask) << 32);
-          *tmpTypeMask = 1 << lir::MemoryOperand;          
-        } else if (dstRegisterMask & GeneralRegisterMask) {
-          *srcRegisterMask = GeneralRegisterMask
+          tmpMask.typeMask = 1 << lir::MemoryOperand;          
+        } else if (dstMask.registerMask & GeneralRegisterMask) {
+          srcMask.registerMask = GeneralRegisterMask
             | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
-          *tmpTypeMask = 1 << lir::MemoryOperand;
+          tmpMask.typeMask = 1 << lir::MemoryOperand;
         }
       }
-      if (dstRegisterMask & FloatRegisterMask) {
+      if (dstMask.registerMask & FloatRegisterMask) {
         // can't move directly from constant to FPR
-        *srcTypeMask &= ~(1 << lir::ConstantOperand);
+        srcMask.typeMask &= ~(1 << lir::ConstantOperand);
         if (size > TargetBytesPerWord) {
-          *tmpTypeMask = 1 << lir::MemoryOperand;
+          tmpMask.typeMask = 1 << lir::MemoryOperand;
         } else {
-          *tmpTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
-          *tmpRegisterMask = GeneralRegisterMask
+          tmpMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+          tmpMask.registerMask = GeneralRegisterMask
             | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
         }
       }
@@ -3236,16 +3233,16 @@ class MyArchitecture: public Assembler::Architecture {
 
   virtual void planSource
   (lir::TernaryOperation op,
-   unsigned aSize, uint8_t *aTypeMask, uint64_t *aRegisterMask,
-   unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask,
+   unsigned aSize, OperandMask& aMask,
+   unsigned bSize, OperandMask& bMask,
    unsigned, bool* thunk)
   {
-    *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
-    *aRegisterMask = GeneralRegisterMask
+    aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::ConstantOperand);
+    aMask.registerMask = GeneralRegisterMask
       | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
 
-    *bTypeMask = (1 << lir::RegisterOperand);
-    *bRegisterMask = GeneralRegisterMask
+    bMask.typeMask = (1 << lir::RegisterOperand);
+    bMask.registerMask = GeneralRegisterMask
       | (static_cast<uint64_t>(GeneralRegisterMask) << 32);
 
     *thunk = false;
@@ -3256,14 +3253,14 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::FloatMultiply:
     case lir::FloatDivide:
       if (useSSE(&c)) {
-        *aTypeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
-        *bTypeMask = (1 << lir::RegisterOperand);
+        aMask.typeMask = (1 << lir::RegisterOperand) | (1 << lir::MemoryOperand);
+        bMask.typeMask = (1 << lir::RegisterOperand);
 
         const uint64_t mask
           = (static_cast<uint64_t>(FloatRegisterMask) << 32)
           | FloatRegisterMask;
-        *aRegisterMask = mask;
-        *bRegisterMask = mask;
+        aMask.registerMask = mask;
+        bMask.registerMask = mask;
       } else {
         *thunk = true;
       }
@@ -3276,11 +3273,11 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::Multiply:
       if (TargetBytesPerWord == 4 and aSize == 8) { 
         const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
-        *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
-        *bRegisterMask = (static_cast<uint64_t>(1) << (rdx + 32)) | mask;
+        aMask.registerMask = (static_cast<uint64_t>(mask) << 32) | mask;
+        bMask.registerMask = (static_cast<uint64_t>(1) << (rdx + 32)) | mask;
       } else {
-        *aRegisterMask = GeneralRegisterMask;
-        *bRegisterMask = GeneralRegisterMask;
+        aMask.registerMask = GeneralRegisterMask;
+        bMask.registerMask = GeneralRegisterMask;
       }
       break;
 
@@ -3288,9 +3285,9 @@ class MyArchitecture: public Assembler::Architecture {
       if (TargetBytesPerWord == 4 and aSize == 8) {
         *thunk = true;        			
       } else {
-        *aTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
-        *bRegisterMask = 1 << rax;      
+        aMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
+        bMask.registerMask = 1 << rax;      
       }
       break;
 
@@ -3298,9 +3295,9 @@ class MyArchitecture: public Assembler::Architecture {
       if (TargetBytesPerWord == 4 and aSize == 8) {
         *thunk = true;
       } else {
-        *aTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
-        *bRegisterMask = 1 << rax;
+        aMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
+        bMask.registerMask = 1 << rax;
       }
       break;
 
@@ -3309,13 +3306,13 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::UnsignedShiftRight: {
       if (TargetBytesPerWord == 4 and bSize == 8) {
         const uint32_t mask = GeneralRegisterMask & ~(1 << rcx);
-        *aRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
-        *bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
+        aMask.registerMask = (static_cast<uint64_t>(mask) << 32) | mask;
+        bMask.registerMask = (static_cast<uint64_t>(mask) << 32) | mask;
       } else {
-        *aRegisterMask = (static_cast<uint64_t>(GeneralRegisterMask) << 32)
+        aMask.registerMask = (static_cast<uint64_t>(GeneralRegisterMask) << 32)
           | (static_cast<uint64_t>(1) << rcx);
         const uint32_t mask = GeneralRegisterMask & ~(1 << rcx);
-        *bRegisterMask = (static_cast<uint64_t>(mask) << 32) | mask;
+        bMask.registerMask = (static_cast<uint64_t>(mask) << 32) | mask;
       }
     } break;
 
@@ -3330,11 +3327,11 @@ class MyArchitecture: public Assembler::Architecture {
     case lir::JumpIfFloatLessOrEqualOrUnordered:
     case lir::JumpIfFloatGreaterOrEqualOrUnordered:
       if (useSSE(&c)) {
-        *aTypeMask = (1 << lir::RegisterOperand);
-        *aRegisterMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
+        aMask.typeMask = (1 << lir::RegisterOperand);
+        aMask.registerMask = (static_cast<uint64_t>(FloatRegisterMask) << 32)
           | FloatRegisterMask;
-        *bTypeMask = *aTypeMask;
-        *bRegisterMask = *aRegisterMask;
+        bMask.typeMask = aMask.typeMask;
+        bMask.registerMask = aMask.registerMask;
       } else {
         *thunk = true;
       }
@@ -3346,16 +3343,17 @@ class MyArchitecture: public Assembler::Architecture {
   }
 
   virtual void planDestination
-  (lir::TernaryOperation op, unsigned, uint8_t, uint64_t, unsigned, uint8_t,
-   uint64_t bRegisterMask, unsigned, uint8_t* cTypeMask,
-   uint64_t* cRegisterMask)
+  (lir::TernaryOperation op,
+    unsigned, const OperandMask&,
+    unsigned, const OperandMask& bMask,
+    unsigned, OperandMask& cMask)
   {
     if (isBranch(op)) {
-      *cTypeMask = (1 << lir::ConstantOperand);
-      *cRegisterMask = 0;
+      cMask.typeMask = (1 << lir::ConstantOperand);
+      cMask.registerMask = 0;
     } else {
-      *cTypeMask = (1 << lir::RegisterOperand);
-      *cRegisterMask = bRegisterMask;
+      cMask.typeMask = (1 << lir::RegisterOperand);
+      cMask.registerMask = bMask.registerMask;
     }
   }
 
diff --git a/unittest/codegen/assembler-test.cpp b/unittest/codegen/assembler-test.cpp
index 3fe8a07427..78317c19c8 100644
--- a/unittest/codegen/assembler-test.cpp
+++ b/unittest/codegen/assembler-test.cpp
@@ -84,12 +84,11 @@ public:
 
     for(int op = (int)lir::Call; op < (int)lir::AlignedJump; op++) {
       bool thunk;
-      uint8_t typeMask;
-      uint64_t registerMask;
-      env.arch->plan((lir::UnaryOperation)op, vm::TargetBytesPerWord, &typeMask, &registerMask, &thunk);
+      OperandMask mask;
+      env.arch->plan((lir::UnaryOperation)op, vm::TargetBytesPerWord, mask, &thunk);
       assertFalse(thunk);
-      assertNotEqual(static_cast<uint8_t>(0), typeMask);
-      assertNotEqual(static_cast<uint64_t>(0), registerMask);
+      assertNotEqual(static_cast<uint8_t>(0), mask.typeMask);
+      assertNotEqual(static_cast<uint64_t>(0), mask.registerMask);
     }
 
   }

From 49bfda3932bc4e4f7af128ea3a1c8042bf42914a Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Fri, 15 Feb 2013 22:04:32 -0700
Subject: [PATCH 02/22] construct SiteMasks based on OperationMasks

---
 src/codegen/compiler.cpp       |  6 ++---
 src/codegen/compiler/event.cpp | 40 +++++++++++++++++-----------------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/codegen/compiler.cpp b/src/codegen/compiler.cpp
index de46f1c639..bd67d8bdc1 100644
--- a/src/codegen/compiler.cpp
+++ b/src/codegen/compiler.cpp
@@ -340,7 +340,7 @@ maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord,
         (size, src, tmp,
          OperandMask(dstMask.typeMask, dstMask.registerMask));
 
-      SiteMask srcMask(src.typeMask, src.registerMask, AnyFrameIndex);
+      SiteMask srcMask = SiteMask::lowPart(src);
       for (SiteIterator it(c, value, true, includeNextWord); it.hasMore();) {
         Site* s = it.next();
         if (s->match(c, srcMask) or s->match(c, dstMask)) {
@@ -365,7 +365,7 @@ maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord,
     (size, src, tmp,
      OperandMask(1 << dstSite->type(c), dstSite->registerMask(c)));
 
-  SiteMask srcMask(src.typeMask, src.registerMask, AnyFrameIndex);
+  SiteMask srcMask = SiteMask::lowPart(src);
   unsigned cost = 0xFFFFFFFF;
   Site* srcSite = 0;
   for (SiteIterator it(c, value, true, includeNextWord); it.hasMore();) {
@@ -403,7 +403,7 @@ maybeMove(Context* c, Read* read, bool intersectRead, bool includeNextWord,
       srcSite->freeze(c, value);
       dstSite->freeze(c, value);
 
-      SiteMask tmpMask(tmp.typeMask, tmp.registerMask, AnyFrameIndex);
+      SiteMask tmpMask = SiteMask::lowPart(tmp);
       SingleRead tmpRead(tmpMask, 0);
       tmpRead.value = value;
       tmpRead.successor_ = value;
diff --git a/src/codegen/compiler/event.cpp b/src/codegen/compiler/event.cpp
index a785001de9..3cb37b821c 100644
--- a/src/codegen/compiler/event.cpp
+++ b/src/codegen/compiler/event.cpp
@@ -580,8 +580,8 @@ class MoveEvent: public Event {
             | static_cast<uint64_t>(srcValue->source->registerMask(c))),
        dstSize, dst);
 
-    SiteMask dstLowMask(dst.typeMask, dst.registerMask, AnyFrameIndex);
-    SiteMask dstHighMask(dst.typeMask, dst.registerMask >> 32, AnyFrameIndex);
+    SiteMask dstLowMask = SiteMask::lowPart(dst);
+    SiteMask dstHighMask = SiteMask::highPart(dst);
 
     if (srcSelectSize >= vm::TargetBytesPerWord
         and dstSize >= vm::TargetBytesPerWord
@@ -710,8 +710,8 @@ appendMove(Context* c, lir::BinaryOperation type, unsigned srcSize,
   append(c, new(c->zone)
          MoveEvent
          (c, type, srcSize, srcSelectSize, srcValue, dstSize, dstValue,
-          SiteMask(src.typeMask, src.registerMask, AnyFrameIndex),
-          SiteMask(src.typeMask, src.registerMask >> 32, AnyFrameIndex)));
+          SiteMask::lowPart(src),
+          SiteMask::highPart(src)));
 }
 
 
@@ -846,8 +846,8 @@ class CombineEvent: public Event {
        resultSize,
        cMask);
 
-    SiteMask resultLowMask(cMask.typeMask, cMask.registerMask, AnyFrameIndex);
-    SiteMask resultHighMask(cMask.typeMask, cMask.registerMask >> 32, AnyFrameIndex);
+    SiteMask resultLowMask = SiteMask::lowPart(cMask);
+    SiteMask resultHighMask = SiteMask::highPart(cMask);
 
     Site* low = getTarget(c, secondValue, resultValue, resultLowMask);
     unsigned lowSize = low->registerSize(c);
@@ -943,10 +943,10 @@ appendCombine(Context* c, lir::TernaryOperation type,
         firstSize, firstValue,
         secondSize, secondValue,
         resultSize, resultValue,
-        SiteMask(firstMask.typeMask, firstMask.registerMask, AnyFrameIndex),
-        SiteMask(firstMask.typeMask, firstMask.registerMask >> 32, AnyFrameIndex),
-        SiteMask(secondMask.typeMask, secondMask.registerMask, AnyFrameIndex),
-        SiteMask(secondMask.typeMask, secondMask.registerMask >> 32, AnyFrameIndex)));
+        SiteMask::lowPart(firstMask),
+        SiteMask::highPart(firstMask),
+        SiteMask::lowPart(secondMask),
+        SiteMask::highPart(secondMask)));
   }
 }
 
@@ -988,8 +988,8 @@ class TranslateEvent: public Event {
        resultSize,
        bMask);
 
-    SiteMask resultLowMask(bMask.typeMask, bMask.registerMask, AnyFrameIndex);
-    SiteMask resultHighMask(bMask.typeMask, bMask.registerMask >> 32, AnyFrameIndex);
+    SiteMask resultLowMask = SiteMask::lowPart(bMask);
+    SiteMask resultHighMask = SiteMask::highPart(bMask);
     
     Site* low = getTarget(c, value, resultValue, resultLowMask);
     unsigned lowSize = low->registerSize(c);
@@ -1056,8 +1056,8 @@ appendTranslate(Context* c, lir::BinaryOperation type, unsigned firstSize,
     append(c, new(c->zone)
            TranslateEvent
            (c, type, firstSize, firstValue, resultSize, resultValue,
-            SiteMask(first.typeMask, first.registerMask, AnyFrameIndex),
-            SiteMask(first.typeMask, first.registerMask >> 32, AnyFrameIndex)));
+            SiteMask::lowPart(first),
+            SiteMask::highPart(first)));
   }
 }
 
@@ -1317,7 +1317,7 @@ class BranchEvent: public Event {
       size, OperandMask(0, 0),
       vm::TargetBytesPerWord, dstMask);
 
-    this->addRead(c, addressValue, SiteMask(dstMask.typeMask, dstMask.registerMask, AnyFrameIndex));
+    this->addRead(c, addressValue, SiteMask::lowPart(dstMask));
   }
 
   virtual const char* name() {
@@ -1418,10 +1418,10 @@ appendBranch(Context* c, lir::TernaryOperation type, unsigned size, Value* first
       (c, new(c->zone)
        BranchEvent
        (c, type, size, firstValue, secondValue, addressValue,
-        SiteMask(firstMask.typeMask, firstMask.registerMask, AnyFrameIndex),
-        SiteMask(firstMask.typeMask, firstMask.registerMask >> 32, AnyFrameIndex),
-        SiteMask(secondMask.typeMask, secondMask.registerMask, AnyFrameIndex),
-        SiteMask(secondMask.typeMask, secondMask.registerMask >> 32, AnyFrameIndex)));
+        SiteMask::lowPart(firstMask),
+        SiteMask::highPart(firstMask),
+        SiteMask::lowPart(secondMask),
+        SiteMask::highPart(secondMask)));
   }
 }
 
@@ -1474,7 +1474,7 @@ class JumpEvent: public Event {
 
     assert(c, not thunk);
 
-    this->addRead(c, address, SiteMask(mask.typeMask, mask.registerMask, AnyFrameIndex));
+    this->addRead(c, address, SiteMask::lowPart(mask));
   }
 
   virtual const char* name() {

From 61f03643e0db111863469efc5c329c99055545ce Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sat, 16 Feb 2013 19:50:34 -0700
Subject: [PATCH 03/22] begin splitting up x86 assembler

---
 makefile                      |  37 ++--
 src/codegen/x86/assembler.cpp | 368 +++++-----------------------------
 src/codegen/x86/block.cpp     |  39 ++++
 src/codegen/x86/block.h       |  40 ++++
 src/codegen/x86/context.cpp   |  34 ++++
 src/codegen/x86/context.h     |  91 +++++++++
 src/codegen/x86/fixup.cpp     | 106 ++++++++++
 src/codegen/x86/fixup.h       |  87 ++++++++
 src/codegen/x86/padding.cpp   |  68 +++++++
 src/codegen/x86/padding.h     |  38 ++++
 src/common.h                  |  12 ++
 11 files changed, 583 insertions(+), 337 deletions(-)
 create mode 100644 src/codegen/x86/block.cpp
 create mode 100644 src/codegen/x86/block.h
 create mode 100644 src/codegen/x86/context.cpp
 create mode 100644 src/codegen/x86/context.h
 create mode 100644 src/codegen/x86/fixup.cpp
 create mode 100644 src/codegen/x86/fixup.h
 create mode 100644 src/codegen/x86/padding.cpp
 create mode 100644 src/codegen/x86/padding.h

diff --git a/makefile b/makefile
index 1e29f0ca91..d0d451c9ae 100755
--- a/makefile
+++ b/makefile
@@ -939,7 +939,7 @@ generated-code = \
 	$(build)/type-name-initializations.cpp \
 	$(build)/type-maps.cpp
 
-vm-depends := $(generated-code) $(wildcard $(src)/*.h) $(wildcard $(src)/codegen/*.h) $(wildcard $(src)/codegen/compiler/*.h)
+vm-depends := $(generated-code) $(wildcard $(src)/*.h) $(wildcard $(src)/codegen/*.h)
 
 vm-sources = \
 	$(src)/$(system).cpp \
@@ -968,26 +968,31 @@ embed-objects = $(call cpp-objects,$(embed-sources),$(src),$(build-embed))
 
 compiler-sources = \
 	$(src)/codegen/compiler.cpp \
-	$(src)/codegen/compiler/context.cpp \
-	$(src)/codegen/compiler/resource.cpp \
-	$(src)/codegen/compiler/site.cpp \
-	$(src)/codegen/compiler/regalloc.cpp \
-	$(src)/codegen/compiler/value.cpp \
-	$(src)/codegen/compiler/read.cpp \
-	$(src)/codegen/compiler/event.cpp \
-	$(src)/codegen/compiler/promise.cpp \
-	$(src)/codegen/compiler/frame.cpp \
-	$(src)/codegen/compiler/ir.cpp \
+	$(wildcard $(src)/codegen/compiler/*.cpp) \
 	$(src)/codegen/registers.cpp \
 	$(src)/codegen/targets.cpp
+compiler-objects = $(call cpp-objects,$(compiler-sources),$(src),$(build))
+$(compiler-objects): $(wildcard $(src)/codegen/compiler/*.h) $(vm-depends)
+
+x86-assembler-sources = $(wildcard $(src)/codegen/x86/*.cpp)
+x86-assembler-objects = $(call cpp-objects,$(x86-assembler-sources),$(src),$(build))
+$(x86-assembler-objects): $(wildcard $(src)/codegen/x86/*.h) $(vm-depends)
+
+arm-assembler-sources = $(wildcard $(src)/codegen/arm/*.cpp)
+arm-assembler-objects = $(call cpp-objects,$(arm-assembler-sources),$(src),$(build))
+$(arm-assembler-objects): $(wildcard $(src)/codegen/arm/*.h) $(vm-depends)
+
+powerpc-assembler-sources = $(wildcard $(src)/codegen/powerpc/*.cpp)
+powerpc-assembler-objects = $(call cpp-objects,$(powerpc-assembler-sources),$(src),$(build))
+$(powerpc-assembler-objects): $(wildcard $(src)/codegen/powerpc/*.h) $(vm-depends)
 
 all-assembler-sources = \
-	$(src)/codegen/x86/assembler.cpp \
-	$(src)/codegen/arm/assembler.cpp \
-	$(src)/codegen/powerpc/assembler.cpp
+	$(x86-assembler-sources) \
+	$(arm-assembler-sources) \
+	$(powerpc-assembler-sources)
 
-native-assembler-sources = \
-	$(src)/codegen/$(target-asm)/assembler.cpp
+native-assembler-sources = $($(target-asm)-assembler-sources)
+native-assembler-objects = $($(target-asm)-assembler-objects)
 
 all-codegen-target-sources = \
 	$(compiler-sources) \
diff --git a/src/codegen/x86/assembler.cpp b/src/codegen/x86/assembler.cpp
index 33b7ce168d..c455f3e963 100644
--- a/src/codegen/x86/assembler.cpp
+++ b/src/codegen/x86/assembler.cpp
@@ -15,6 +15,11 @@
 #include "codegen/assembler.h"
 #include "codegen/registers.h"
 
+#include "codegen/x86/context.h"
+#include "codegen/x86/block.h"
+#include "codegen/x86/fixup.h"
+#include "codegen/x86/padding.h"
+
 #include "util/runtime-array.h"
 #include "util/abort.h"
 
@@ -23,11 +28,10 @@
 #define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
 
 using namespace vm;
-using namespace avian::codegen;
 
-namespace {
-
-namespace local {
+namespace avian {
+namespace codegen {
+namespace x86 {
 
 enum {
   rax = 0,
@@ -82,18 +86,6 @@ const int LongJumpRegister = r10;
 const unsigned StackAlignmentInBytes = 16;
 const unsigned StackAlignmentInWords = StackAlignmentInBytes / TargetBytesPerWord;
 
-bool
-isInt8(target_intptr_t v)
-{
-  return v == static_cast<int8_t>(v);
-}
-
-bool
-isInt32(target_intptr_t v)
-{
-  return v == static_cast<int32_t>(v);
-}
-
 class Task;
 class AlignmentPadding;
 
@@ -101,213 +93,6 @@ unsigned
 padding(AlignmentPadding* p, unsigned index, unsigned offset,
         AlignmentPadding* limit);
 
-class Context;
-class MyBlock;
-
-ResolvedPromise*
-resolved(Context* c, int64_t value);
-
-class MyBlock: public Assembler::Block {
- public:
-  MyBlock(unsigned offset):
-    next(0), firstPadding(0), lastPadding(0), offset(offset), start(~0),
-    size(0)
-  { }
-
-  virtual unsigned resolve(unsigned start, Assembler::Block* next) {
-    this->start = start;
-    this->next = static_cast<MyBlock*>(next);
-
-    return start + size + padding(firstPadding, start, offset, lastPadding);
-  }
-
-  MyBlock* next;
-  AlignmentPadding* firstPadding;
-  AlignmentPadding* lastPadding;
-  unsigned offset;
-  unsigned start;
-  unsigned size;
-};
-
-typedef void (*OperationType)(Context*);
-
-typedef void (*UnaryOperationType)(Context*, unsigned, lir::Operand*);
-
-typedef void (*BinaryOperationType)
-(Context*, unsigned, lir::Operand*, unsigned, lir::Operand*);
-
-typedef void (*BranchOperationType)
-(Context*, lir::TernaryOperation, unsigned, lir::Operand*,
- lir::Operand*, lir::Operand*);
-
-class ArchitectureContext {
- public:
-  ArchitectureContext(System* s, bool useNativeFeatures):
-    s(s), useNativeFeatures(useNativeFeatures)
-  { }
-
-  System* s;
-  bool useNativeFeatures;
-  OperationType operations[lir::OperationCount];
-  UnaryOperationType unaryOperations[lir::UnaryOperationCount
-                                     * lir::OperandTypeCount];
-  BinaryOperationType binaryOperations
-  [(lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount)
-   * lir::OperandTypeCount
-   * lir::OperandTypeCount];
-  BranchOperationType branchOperations
-  [lir::BranchOperationCount
-   * lir::OperandTypeCount
-   * lir::OperandTypeCount];
-};
-
-class Context {
- public:
-  Context(System* s, Allocator* a, Zone* zone, ArchitectureContext* ac):
-    s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0),
-    firstBlock(new(zone) MyBlock(0)),
-    lastBlock(firstBlock), ac(ac)
-  { }
-
-  System* s;
-  Zone* zone;
-  Assembler::Client* client;
-  Vector code;
-  Task* tasks;
-  uint8_t* result;
-  MyBlock* firstBlock;
-  MyBlock* lastBlock;
-  ArchitectureContext* ac;
-};
-
-Aborter* getAborter(Context* c) {
-  return c->s;
-}
-
-Aborter* getAborter(ArchitectureContext* c) {
-  return c->s;
-}
-
-ResolvedPromise*
-resolved(Context* c, int64_t value)
-{
-  return new(c->zone) ResolvedPromise(value);
-}
-
-class Offset: public Promise {
- public:
-  Offset(Context* c, MyBlock* block, unsigned offset, AlignmentPadding* limit):
-    c(c), block(block), offset(offset), limit(limit), value_(-1)
-  { }
-
-  virtual bool resolved() {
-    return block->start != static_cast<unsigned>(~0);
-  }
-  
-  virtual int64_t value() {
-    assert(c, resolved());
-
-    if (value_ == -1) {
-      value_ = block->start + (offset - block->offset)
-        + padding(block->firstPadding, block->start, block->offset, limit);
-    }
-
-    return value_;
-  }
-
-  Context* c;
-  MyBlock* block;
-  unsigned offset;
-  AlignmentPadding* limit;
-  int value_;
-};
-
-Promise*
-offset(Context* c)
-{
-  return new(c->zone) Offset(c, c->lastBlock, c->code.length(), c->lastBlock->lastPadding);
-}
-
-class Task {
- public:
-  Task(Task* next): next(next) { }
-
-  virtual void run(Context* c) = 0;
-
-  Task* next;
-};
-
-void*
-resolveOffset(System* s, uint8_t* instruction, unsigned instructionSize,
-              int64_t value)
-{
-  intptr_t v = reinterpret_cast<uint8_t*>(value)
-    - instruction - instructionSize;
-    
-  expect(s, isInt32(v));
-    
-  int32_t v4 = v;
-  memcpy(instruction + instructionSize - 4, &v4, 4);
-  return instruction + instructionSize;
-}
-
-class OffsetListener: public Promise::Listener {
- public:
-  OffsetListener(System* s, uint8_t* instruction,
-                 unsigned instructionSize):
-    s(s),
-    instruction(instruction),
-    instructionSize(instructionSize)
-  { }
-
-  virtual bool resolve(int64_t value, void** location) {
-    void* p = resolveOffset(s, instruction, instructionSize, value);
-    if (location) *location = p;
-    return false;
-  }
-
-  System* s;
-  uint8_t* instruction;
-  unsigned instructionSize;
-};
-
-class OffsetTask: public Task {
- public:
-  OffsetTask(Task* next, Promise* promise, Promise* instructionOffset,
-             unsigned instructionSize):
-    Task(next),
-    promise(promise),
-    instructionOffset(instructionOffset),
-    instructionSize(instructionSize)
-  { }
-
-  virtual void run(Context* c) {
-    if (promise->resolved()) {
-      resolveOffset
-        (c->s, c->result + instructionOffset->value(), instructionSize,
-         promise->value());
-    } else {
-      new (promise->listen(sizeof(OffsetListener)))
-        OffsetListener(c->s, c->result + instructionOffset->value(),
-                       instructionSize);
-    }
-  }
-
-  Promise* promise;
-  Promise* instructionOffset;
-  unsigned instructionSize;
-};
-
-void
-appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset,
-                 unsigned instructionSize)
-{
-  OffsetTask* task =
-    new(c->zone) OffsetTask(c->tasks, promise, instructionOffset, instructionSize);
-
-  c->tasks = task;
-}
-
 void
 copy(System* s, void* dst, int64_t src, unsigned size)
 {
@@ -378,60 +163,6 @@ appendImmediateTask(Context* c, Promise* promise, Promise* offset,
     (c->tasks, promise, offset, size, promiseOffset);
 }
 
-class AlignmentPadding {
- public:
-  AlignmentPadding(Context* c, unsigned instructionOffset, unsigned alignment):
-    offset(c->code.length()),
-    instructionOffset(instructionOffset),
-    alignment(alignment),
-    next(0),
-    padding(-1)
-  {
-    if (c->lastBlock->firstPadding) {
-      c->lastBlock->lastPadding->next = this;
-    } else {
-      c->lastBlock->firstPadding = this;
-    }
-    c->lastBlock->lastPadding = this;
-  }
-
-  unsigned offset;
-  unsigned instructionOffset;
-  unsigned alignment;
-  AlignmentPadding* next;
-  int padding;
-};
-
-unsigned
-padding(AlignmentPadding* p, unsigned start, unsigned offset,
-        AlignmentPadding* limit)
-{
-  unsigned padding = 0;
-  if (limit) {
-    if (limit->padding == -1) {
-      for (; p; p = p->next) {
-        if (p->padding == -1) {
-          unsigned index = p->offset - offset;
-          while ((start + index + padding + p->instructionOffset)
-                 % p->alignment)
-          {
-            ++ padding;
-          }
-      
-          p->padding = padding;
-
-          if (p == limit) break;
-        } else {
-          padding = p->padding;
-        }
-      }
-    } else {
-      padding = limit->padding;
-    }
-  }
-  return padding;
-}
-
 extern "C" bool
 detectFeature(unsigned ecx, unsigned edx);
 
@@ -558,7 +289,7 @@ modrmSibImm(Context* c, int a, int scale, int index, int base, int offset)
 {
   if (offset == 0 and regCode(base) != rbp) {
     modrmSib(c, 0x00, a, scale, index, base);
-  } else if (isInt8(offset)) {
+  } else if (vm::fitsInInt8(offset)) {
     modrmSib(c, 0x40, a, scale, index, base);
     c->code.append(offset);
   } else {
@@ -627,7 +358,7 @@ storeLoadBarrier(Context* c)
 void
 unconditional(Context* c, unsigned jump, lir::Constant* a)
 {
-  appendOffsetTask(c, a->value, offset(c), 5);
+  appendOffsetTask(c, a->value, offsetPromise(c), 5);
 
   opcode(c, jump);
   c->code.append4(0);
@@ -636,7 +367,7 @@ unconditional(Context* c, unsigned jump, lir::Constant* a)
 void
 conditional(Context* c, unsigned condition, lir::Constant* a)
 {
-  appendOffsetTask(c, a->value, offset(c), 6);
+  appendOffsetTask(c, a->value, offsetPromise(c), 6);
   
   opcode(c, 0x0f, condition);
   c->code.append4(0);
@@ -904,7 +635,7 @@ moveCR2(Context* c, UNUSED unsigned aSize, lir::Constant* a,
       c->code.appendTargetAddress(a->value->value());
     } else {
       appendImmediateTask
-        (c, a->value, offset(c), TargetBytesPerWord, promiseOffset);
+        (c, a->value, offsetPromise(c), TargetBytesPerWord, promiseOffset);
       c->code.appendTargetAddress(static_cast<target_uintptr_t>(0));
     }
   }
@@ -1260,14 +991,14 @@ moveCM(Context* c, unsigned aSize UNUSED, lir::Constant* a,
     if (a->value->resolved()) {
       c->code.append4(a->value->value());
     } else {
-      appendImmediateTask(c, a->value, offset(c), 4);
+      appendImmediateTask(c, a->value, offsetPromise(c), 4);
       c->code.append4(0);
     }
     break;
 
   case 8: {
     if (TargetBytesPerWord == 8) {
-      if (a->value->resolved() and isInt32(a->value->value())) {
+      if (a->value->resolved() and vm::fitsInInt32(a->value->value())) {
         maybeRex(c, bSize, b);
         opcode(c, 0xc7);
         modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
@@ -1358,7 +1089,7 @@ addCarryCR(Context* c, unsigned size, lir::Constant* a,
   
   int64_t v = a->value->value();
   maybeRex(c, size, b);
-  if (isInt8(v)) {
+  if (vm::fitsInInt8(v)) {
     opcode(c, 0x83, 0xd0 + regCode(b));
     c->code.append(v);
   } else {
@@ -1387,9 +1118,9 @@ addCR(Context* c, unsigned aSize, lir::Constant* a,
       addCR(c, 4, &al, 4, b);
       addCarryCR(c, 4, &ah, &bh);
     } else {
-      if (isInt32(v)) {
+      if (vm::fitsInInt32(v)) {
         maybeRex(c, aSize, b);
-        if (isInt8(v)) {
+        if (vm::fitsInInt8(v)) {
           opcode(c, 0x83, 0xc0 + regCode(b));
           c->code.append(v);
         } else {
@@ -1414,7 +1145,7 @@ subtractBorrowCR(Context* c, unsigned size UNUSED, lir::Constant* a,
   assert(c, TargetBytesPerWord == 8 or size == 4);
   
   int64_t v = a->value->value();
-  if (isInt8(v)) {
+  if (vm::fitsInInt8(v)) {
     opcode(c, 0x83, 0xd8 + regCode(b));
     c->code.append(v);
   } else {
@@ -1447,9 +1178,9 @@ subtractCR(Context* c, unsigned aSize, lir::Constant* a,
       subtractCR(c, 4, &al, 4, b);
       subtractBorrowCR(c, 4, &ah, &bh);
     } else {
-      if (isInt32(v)) {
+      if (vm::fitsInInt32(v)) {
         maybeRex(c, aSize, b);
-        if (isInt8(v)) {
+        if (vm::fitsInInt8(v)) {
           opcode(c, 0x83, 0xe8 + regCode(b));
           c->code.append(v);
         } else {
@@ -1537,9 +1268,9 @@ andCR(Context* c, unsigned aSize, lir::Constant* a,
     andCR(c, 4, &al, 4, b);
     andCR(c, 4, &ah, 4, &bh);
   } else {
-    if (isInt32(v)) {
+    if (vm::fitsInInt32(v)) {
       maybeRex(c, aSize, b);
-      if (isInt8(v)) {
+      if (vm::fitsInInt8(v)) {
         opcode(c, 0x83, 0xe0 + regCode(b));
         c->code.append(v);
       } else {
@@ -1595,9 +1326,9 @@ orCR(Context* c, unsigned aSize, lir::Constant* a,
       orCR(c, 4, &al, 4, b);
       orCR(c, 4, &ah, 4, &bh);
     } else {
-      if (isInt32(v)) {
+      if (vm::fitsInInt32(v)) {
         maybeRex(c, aSize, b);
-        if (isInt8(v)) {
+        if (vm::fitsInInt8(v)) {
           opcode(c, 0x83, 0xc8 + regCode(b));
           c->code.append(v);
         } else {
@@ -1652,9 +1383,9 @@ xorCR(Context* c, unsigned aSize, lir::Constant* a,
       xorCR(c, 4, &al, 4, b);
       xorCR(c, 4, &ah, 4, &bh);
     } else {
-      if (isInt32(v)) {
+      if (vm::fitsInInt32(v)) {
         maybeRex(c, aSize, b);
-        if (isInt8(v)) {
+        if (vm::fitsInInt8(v)) {
           opcode(c, 0x83, 0xf0 + regCode(b));
           c->code.append(v);
         } else {
@@ -1828,10 +1559,10 @@ compareCR(Context* c, unsigned aSize, lir::Constant* a,
   assert(c, aSize == bSize);
   assert(c, TargetBytesPerWord == 8 or aSize == 4);
   
-  if (a->value->resolved() and isInt32(a->value->value())) {
+  if (a->value->resolved() and vm::fitsInInt32(a->value->value())) {
     int64_t v = a->value->value();
     maybeRex(c, aSize, b);
-    if (isInt8(v)) {
+    if (vm::fitsInInt8(v)) {
       opcode(c, 0x83, 0xf8 + regCode(b));
       c->code.append(v);
     } else {
@@ -1871,12 +1602,12 @@ compareCM(Context* c, unsigned aSize, lir::Constant* a,
   if (a->value->resolved()) { 
     int64_t v = a->value->value();   
     maybeRex(c, aSize, b);
-    opcode(c, isInt8(v) ? 0x83 : 0x81);
+    opcode(c, vm::fitsInInt8(v) ? 0x83 : 0x81);
     modrmSibImm(c, rdi, b->scale, b->index, b->base, b->offset);
     
-    if (isInt8(v)) {
+    if (vm::fitsInInt8(v)) {
       c->code.append(v);
-    } else if (isInt32(v)) {
+    } else if (vm::fitsInInt32(v)) {
       c->code.append4(v);
     } else {
       abort(c);
@@ -2070,9 +1801,9 @@ multiplyCR(Context* c, unsigned aSize, lir::Constant* a,
   } else {
     int64_t v = a->value->value();
     if (v != 1) {
-      if (isInt32(v)) {
+      if (vm::fitsInInt32(v)) {
         maybeRex(c, bSize, b, b);
-        if (isInt8(v)) {
+        if (vm::fitsInInt8(v)) {
           opcode(c, 0x6b);
           modrm(c, 0xc0, b, b);
           c->code.append(v);
@@ -2150,7 +1881,7 @@ doShift(Context* c, UNUSED void (*shift)
     maybeRex(c, bSize, b);
     if (v == 1) {
       opcode(c, 0xd1, type + regCode(b));
-    } else if (isInt8(v)) {
+    } else if (vm::fitsInInt8(v)) {
       opcode(c, 0xc1, type + regCode(b));
       c->code.append(v);
     } else {
@@ -2796,7 +2527,7 @@ class MyArchitecture: public Assembler::Architecture {
   }
 
   virtual unsigned argumentFootprint(unsigned footprint) {
-    return local::argumentFootprint(footprint);
+    return x86::argumentFootprint(footprint);
   }
 
   virtual bool argumentAlignment() {
@@ -2904,7 +2635,7 @@ class MyArchitecture: public Assembler::Architecture {
       intptr_t v = static_cast<uint8_t*>(newTarget)
         - static_cast<uint8_t*>(returnAddress);
 
-      assert(&c, isInt32(v));
+      assert(&c, vm::fitsInInt32(v));
 
       int32_t v32 = v;
 
@@ -2939,7 +2670,7 @@ class MyArchitecture: public Assembler::Architecture {
                          unsigned targetParameterFootprint, void** ip,
                          void** stack)
   {
-    local::nextFrame(&c, static_cast<uint8_t*>(start), size, footprint,
+    x86::nextFrame(&c, static_cast<uint8_t*>(start), size, footprint,
                      link, mostRecent, targetParameterFootprint, ip, stack);
   }
 
@@ -3393,7 +3124,7 @@ class MyAssembler: public Assembler {
   {
     lir::Register stack(rsp);
     lir::Memory stackLimit(rbx, stackLimitOffsetFromThread);
-    lir::Constant handlerConstant(resolved(&c, handler));
+    lir::Constant handlerConstant(resolvedPromise(&c, handler));
     branchRM(&c, lir::JumpIfGreaterOrEqual, TargetBytesPerWord, &stack, &stackLimit,
              &handlerConstant);
   }
@@ -3470,7 +3201,7 @@ class MyAssembler: public Assembler {
         OperandInfo(TargetBytesPerWord, lir::RegisterOperand, &base));
     }
 
-    lir::Constant footprintConstant(resolved(&c, footprint * TargetBytesPerWord));
+    lir::Constant footprintConstant(resolvedPromise(&c, footprint * TargetBytesPerWord));
     apply(lir::Subtract,
       OperandInfo(TargetBytesPerWord, lir::ConstantOperand, &footprintConstant),
       OperandInfo(TargetBytesPerWord, lir::RegisterOperand, &stack),
@@ -3479,7 +3210,7 @@ class MyAssembler: public Assembler {
 
   virtual void adjustFrame(unsigned difference) {
     lir::Register stack(rsp);
-    lir::Constant differenceConstant(resolved(&c, difference * TargetBytesPerWord));
+    lir::Constant differenceConstant(resolvedPromise(&c, difference * TargetBytesPerWord));
     apply(lir::Subtract, 
       OperandInfo(TargetBytesPerWord, lir::ConstantOperand, &differenceConstant),
       OperandInfo(TargetBytesPerWord, lir::RegisterOperand, &stack),
@@ -3497,7 +3228,7 @@ class MyAssembler: public Assembler {
       popR(&c, TargetBytesPerWord, &base);
     } else {
       lir::Register stack(rsp);
-      lir::Constant footprint(resolved(&c, frameFootprint * TargetBytesPerWord));
+      lir::Constant footprint(resolvedPromise(&c, frameFootprint * TargetBytesPerWord));
       apply(lir::Add,
         OperandInfo(TargetBytesPerWord, lir::ConstantOperand, &footprint),
         OperandInfo(TargetBytesPerWord, lir::RegisterOperand, &stack),
@@ -3536,7 +3267,7 @@ class MyAssembler: public Assembler {
 
         lir::Register stack(rsp);
         lir::Constant footprint
-          (resolved
+          (resolvedPromise
            (&c, (frameFootprint - offset + baseSize) * TargetBytesPerWord));
 
         addCR(&c, TargetBytesPerWord, &footprint, TargetBytesPerWord, &stack);
@@ -3578,7 +3309,7 @@ class MyAssembler: public Assembler {
 
       lir::Register stack(rsp);
       lir::Constant adjustment
-        (resolved(&c, (argumentFootprint - StackAlignmentInWords)
+        (resolvedPromise(&c, (argumentFootprint - StackAlignmentInWords)
                   * TargetBytesPerWord));
       addCR(&c, TargetBytesPerWord, &adjustment, TargetBytesPerWord, &stack);
 
@@ -3674,7 +3405,7 @@ class MyAssembler: public Assembler {
   }
 
   virtual Promise* offset(bool) {
-    return local::offset(&c);
+    return x86::offsetPromise(&c);
   }
 
   virtual Block* endBlock(bool startNew) {
@@ -3713,17 +3444,12 @@ Assembler* MyArchitecture::makeAssembler(Allocator* allocator, Zone* zone) {
     new(zone) MyAssembler(c.s, allocator, zone, this);
 }
 
-} // namespace local
-
-} // namespace
-
-namespace avian {
-namespace codegen {
+} // namespace x86
 
 Assembler::Architecture* makeArchitectureX86(System* system, bool useNativeFeatures)
 {
-  return new (allocate(system, sizeof(local::MyArchitecture)))
-    local::MyArchitecture(system, useNativeFeatures);
+  return new (allocate(system, sizeof(x86::MyArchitecture)))
+    x86::MyArchitecture(system, useNativeFeatures);
 }
 
 } // namespace codegen
diff --git a/src/codegen/x86/block.cpp b/src/codegen/x86/block.cpp
new file mode 100644
index 0000000000..152fd99a8d
--- /dev/null
+++ b/src/codegen/x86/block.cpp
@@ -0,0 +1,39 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "codegen/x86/block.h"
+#include "common.h"
+
+#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
+#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
+#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+unsigned
+padding(AlignmentPadding* p, unsigned index, unsigned offset, AlignmentPadding* limit);
+
+MyBlock::MyBlock(unsigned offset):
+  next(0), firstPadding(0), lastPadding(0), offset(offset), start(~0),
+  size(0)
+{ }
+
+unsigned MyBlock::resolve(unsigned start, Assembler::Block* next) {
+  this->start = start;
+  this->next = static_cast<MyBlock*>(next);
+
+  return start + size + padding(firstPadding, start, offset, lastPadding);
+}
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/x86/block.h b/src/codegen/x86/block.h
new file mode 100644
index 0000000000..76f7ff05a6
--- /dev/null
+++ b/src/codegen/x86/block.h
@@ -0,0 +1,40 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_X86_BLOCK_H
+#define AVIAN_CODEGEN_ASSEMBLER_X86_BLOCK_H
+
+#include "codegen/assembler.h"
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+class AlignmentPadding;
+
+class MyBlock: public Assembler::Block {
+ public:
+  MyBlock(unsigned offset);
+
+  virtual unsigned resolve(unsigned start, Assembler::Block* next);
+
+  MyBlock* next;
+  AlignmentPadding* firstPadding;
+  AlignmentPadding* lastPadding;
+  unsigned offset;
+  unsigned start;
+  unsigned size;
+};
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_X86_BLOCK_H
diff --git a/src/codegen/x86/context.cpp b/src/codegen/x86/context.cpp
new file mode 100644
index 0000000000..da73e97606
--- /dev/null
+++ b/src/codegen/x86/context.cpp
@@ -0,0 +1,34 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "codegen/x86/context.h"
+#include "codegen/x86/block.h"
+
+#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
+#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
+#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+ArchitectureContext::ArchitectureContext(vm::System* s, bool useNativeFeatures):
+  s(s), useNativeFeatures(useNativeFeatures)
+{ }
+
+Context::Context(vm::System* s, vm::Allocator* a, vm::Zone* zone, ArchitectureContext* ac):
+  s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0),
+  firstBlock(new(zone) MyBlock(0)),
+  lastBlock(firstBlock), ac(ac)
+{ }
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/x86/context.h b/src/codegen/x86/context.h
new file mode 100644
index 0000000000..5b3a915b9c
--- /dev/null
+++ b/src/codegen/x86/context.h
@@ -0,0 +1,91 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_X86_CONTEXT_H
+#define AVIAN_CODEGEN_ASSEMBLER_X86_CONTEXT_H
+
+#include "codegen/lir.h"
+#include "codegen/assembler.h"
+#include "alloc-vector.h"
+
+class Aborter;
+
+namespace vm {
+class System;
+class Allocator;
+class Zone;
+} // namespace vm
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+class Context;
+class MyBlock;
+class Task;
+
+typedef void (*OperationType)(Context*);
+
+typedef void (*UnaryOperationType)(Context*, unsigned, lir::Operand*);
+
+typedef void (*BinaryOperationType)
+(Context*, unsigned, lir::Operand*, unsigned, lir::Operand*);
+
+typedef void (*BranchOperationType)
+(Context*, lir::TernaryOperation, unsigned, lir::Operand*,
+ lir::Operand*, lir::Operand*);
+
+class ArchitectureContext {
+ public:
+  ArchitectureContext(vm::System* s, bool useNativeFeatures);
+
+  vm::System* s;
+  bool useNativeFeatures;
+  OperationType operations[lir::OperationCount];
+  UnaryOperationType unaryOperations[lir::UnaryOperationCount
+                                     * lir::OperandTypeCount];
+  BinaryOperationType binaryOperations
+  [(lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount)
+   * lir::OperandTypeCount
+   * lir::OperandTypeCount];
+  BranchOperationType branchOperations
+  [lir::BranchOperationCount
+   * lir::OperandTypeCount
+   * lir::OperandTypeCount];
+};
+
+class Context {
+ public:
+  Context(vm::System* s, vm::Allocator* a, vm::Zone* zone, ArchitectureContext* ac);
+
+  vm::System* s;
+  vm::Zone* zone;
+  Assembler::Client* client;
+  vm::Vector code;
+  Task* tasks;
+  uint8_t* result;
+  MyBlock* firstBlock;
+  MyBlock* lastBlock;
+  ArchitectureContext* ac;
+};
+
+inline Aborter* getAborter(Context* c) {
+  return c->s;
+}
+
+inline Aborter* getAborter(ArchitectureContext* c) {
+  return c->s;
+}
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_X86_CONTEXT_H
diff --git a/src/codegen/x86/fixup.cpp b/src/codegen/x86/fixup.cpp
new file mode 100644
index 0000000000..f739e1b3e4
--- /dev/null
+++ b/src/codegen/x86/fixup.cpp
@@ -0,0 +1,106 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "codegen/assembler.h"
+#include "codegen/x86/context.h"
+#include "codegen/x86/fixup.h"
+#include "codegen/x86/padding.h"
+#include "codegen/x86/block.h"
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+ResolvedPromise* resolvedPromise(Context* c, int64_t value) {
+  return new(c->zone) ResolvedPromise(value);
+}
+
+Offset::Offset(Context* c, MyBlock* block, unsigned offset, AlignmentPadding* limit):
+  c(c), block(block), offset(offset), limit(limit), value_(-1)
+{ }
+
+bool Offset::resolved() {
+  return block->start != static_cast<unsigned>(~0);
+}
+
+int64_t Offset::value() {
+  assert(c, resolved());
+
+  if (value_ == -1) {
+    value_ = block->start + (offset - block->offset)
+      + padding(block->firstPadding, block->start, block->offset, limit);
+  }
+
+  return value_;
+}
+Promise* offsetPromise(Context* c) {
+  return new(c->zone) Offset(c, c->lastBlock, c->code.length(), c->lastBlock->lastPadding);
+}
+
+void*
+resolveOffset(vm::System* s, uint8_t* instruction, unsigned instructionSize,
+              int64_t value)
+{
+  intptr_t v = reinterpret_cast<uint8_t*>(value)
+    - instruction - instructionSize;
+    
+  expect(s, vm::fitsInInt32(v));
+
+  int32_t v4 = v;
+  memcpy(instruction + instructionSize - 4, &v4, 4);
+  return instruction + instructionSize;
+}
+
+OffsetListener::OffsetListener(vm::System* s, uint8_t* instruction,
+               unsigned instructionSize):
+  s(s),
+  instruction(instruction),
+  instructionSize(instructionSize)
+{ }
+
+bool OffsetListener::resolve(int64_t value, void** location) {
+  void* p = resolveOffset(s, instruction, instructionSize, value);
+  if (location) *location = p;
+  return false;
+}
+
+OffsetTask::OffsetTask(Task* next, Promise* promise, Promise* instructionOffset,
+           unsigned instructionSize):
+  Task(next),
+  promise(promise),
+  instructionOffset(instructionOffset),
+  instructionSize(instructionSize)
+{ }
+
+void OffsetTask::run(Context* c) {
+  if (promise->resolved()) {
+    resolveOffset
+      (c->s, c->result + instructionOffset->value(), instructionSize,
+       promise->value());
+  } else {
+    new (promise->listen(sizeof(OffsetListener)))
+      OffsetListener(c->s, c->result + instructionOffset->value(),
+                     instructionSize);
+  }
+}
+
+void
+appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset,
+                 unsigned instructionSize)
+{
+  OffsetTask* task =
+    new(c->zone) OffsetTask(c->tasks, promise, instructionOffset, instructionSize);
+
+  c->tasks = task;
+}
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/x86/fixup.h b/src/codegen/x86/fixup.h
new file mode 100644
index 0000000000..2b97af9cfb
--- /dev/null
+++ b/src/codegen/x86/fixup.h
@@ -0,0 +1,87 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_X86_FIXUP_H
+#define AVIAN_CODEGEN_ASSEMBLER_X86_FIXUP_H
+
+namespace vm {
+class System;
+}
+
+namespace avian {
+namespace codegen {
+
+class Promise;
+
+namespace x86 {
+
+class MyBlock;
+class AlignmentPadding;
+
+ResolvedPromise* resolvedPromise(Context* c, int64_t value);
+
+class Offset: public Promise {
+ public:
+  Offset(Context* c, MyBlock* block, unsigned offset, AlignmentPadding* limit);
+
+  virtual bool resolved();
+  
+  virtual int64_t value();
+
+  Context* c;
+  MyBlock* block;
+  unsigned offset;
+  AlignmentPadding* limit;
+  int value_;
+};
+
+Promise* offsetPromise(Context* c);
+
+
+class Task {
+ public:
+  Task(Task* next): next(next) { }
+
+  virtual void run(Context* c) = 0;
+
+  Task* next;
+};
+
+void* resolveOffset(vm::System* s, uint8_t* instruction, unsigned instructionSize, int64_t value);
+
+class OffsetListener: public Promise::Listener {
+ public:
+  OffsetListener(vm::System* s, uint8_t* instruction, unsigned instructionSize);
+
+  virtual bool resolve(int64_t value, void** location);
+
+  vm::System* s;
+  uint8_t* instruction;
+  unsigned instructionSize;
+};
+
+class OffsetTask: public Task {
+ public:
+  OffsetTask(Task* next, Promise* promise, Promise* instructionOffset, unsigned instructionSize);
+
+  virtual void run(Context* c);
+
+  Promise* promise;
+  Promise* instructionOffset;
+  unsigned instructionSize;
+};
+
+void appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset, unsigned instructionSize);
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_X86_FIXUP_H
diff --git a/src/codegen/x86/padding.cpp b/src/codegen/x86/padding.cpp
new file mode 100644
index 0000000000..355c30e041
--- /dev/null
+++ b/src/codegen/x86/padding.cpp
@@ -0,0 +1,68 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "codegen/assembler.h"
+#include "codegen/x86/context.h"
+#include "codegen/x86/fixup.h"
+#include "codegen/x86/padding.h"
+#include "codegen/x86/block.h"
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+AlignmentPadding::AlignmentPadding(Context* c, unsigned instructionOffset, unsigned alignment):
+  offset(c->code.length()),
+  instructionOffset(instructionOffset),
+  alignment(alignment),
+  next(0),
+  padding(-1)
+{
+  if (c->lastBlock->firstPadding) {
+    c->lastBlock->lastPadding->next = this;
+  } else {
+    c->lastBlock->firstPadding = this;
+  }
+  c->lastBlock->lastPadding = this;
+}
+
+unsigned
+padding(AlignmentPadding* p, unsigned start, unsigned offset,
+        AlignmentPadding* limit)
+{
+  unsigned padding = 0;
+  if (limit) {
+    if (limit->padding == -1) {
+      for (; p; p = p->next) {
+        if (p->padding == -1) {
+          unsigned index = p->offset - offset;
+          while ((start + index + padding + p->instructionOffset)
+                 % p->alignment)
+          {
+            ++ padding;
+          }
+      
+          p->padding = padding;
+
+          if (p == limit) break;
+        } else {
+          padding = p->padding;
+        }
+      }
+    } else {
+      padding = limit->padding;
+    }
+  }
+  return padding;
+}
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/x86/padding.h b/src/codegen/x86/padding.h
new file mode 100644
index 0000000000..32fc358142
--- /dev/null
+++ b/src/codegen/x86/padding.h
@@ -0,0 +1,38 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_X86_PADDING_H
+#define AVIAN_CODEGEN_ASSEMBLER_X86_PADDING_H
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+
+class AlignmentPadding {
+ public:
+  AlignmentPadding(Context* c, unsigned instructionOffset, unsigned alignment);
+
+  unsigned offset;
+  unsigned instructionOffset;
+  unsigned alignment;
+  AlignmentPadding* next;
+  int padding;
+};
+
+unsigned
+padding(AlignmentPadding* p, unsigned start, unsigned offset,
+        AlignmentPadding* limit);
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_X86_PADDING_H
diff --git a/src/common.h b/src/common.h
index 7cf763c4bd..d4334f3733 100644
--- a/src/common.h
+++ b/src/common.h
@@ -359,6 +359,18 @@ nextPowerOfTwo(unsigned n)
   return r;
 }
 
+inline bool fitsInInt8(int64_t v) {
+  return v == static_cast<int8_t>(v);
+}
+
+inline bool fitsInInt16(int64_t v) {
+  return v == static_cast<int16_t>(v);
+}
+
+inline bool fitsInInt32(int64_t v) {
+  return v == static_cast<int32_t>(v);
+}
+
 inline unsigned
 log(unsigned n)
 {

From 984f987e03d884cfbc88afd2b8fc551e6bf179f7 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sat, 16 Feb 2013 21:55:28 -0700
Subject: [PATCH 04/22] further break out x86 assembler

---
 src/codegen/x86/assembler.cpp   | 2320 +------------------------------
 src/codegen/x86/block.cpp       |    4 -
 src/codegen/x86/context.cpp     |    4 -
 src/codegen/x86/context.h       |    4 +
 src/codegen/x86/detect.cpp      |   40 +
 src/codegen/x86/detect.h        |   28 +
 src/codegen/x86/encode.cpp      |  345 +++++
 src/codegen/x86/encode.h        |   93 ++
 src/codegen/x86/fixup.cpp       |   56 +
 src/codegen/x86/fixup.h         |   31 +
 src/codegen/x86/multimethod.cpp |  174 +++
 src/codegen/x86/multimethod.h   |   38 +
 src/codegen/x86/operations.cpp  | 1532 ++++++++++++++++++++
 src/codegen/x86/operations.h    |  261 ++++
 src/codegen/x86/registers.h     |   67 +
 15 files changed, 2674 insertions(+), 2323 deletions(-)
 create mode 100644 src/codegen/x86/detect.cpp
 create mode 100644 src/codegen/x86/detect.h
 create mode 100644 src/codegen/x86/encode.cpp
 create mode 100644 src/codegen/x86/encode.h
 create mode 100644 src/codegen/x86/multimethod.cpp
 create mode 100644 src/codegen/x86/multimethod.h
 create mode 100644 src/codegen/x86/operations.cpp
 create mode 100644 src/codegen/x86/operations.h
 create mode 100644 src/codegen/x86/registers.h

diff --git a/src/codegen/x86/assembler.cpp b/src/codegen/x86/assembler.cpp
index c455f3e963..b52e1ea4ee 100644
--- a/src/codegen/x86/assembler.cpp
+++ b/src/codegen/x86/assembler.cpp
@@ -19,6 +19,11 @@
 #include "codegen/x86/block.h"
 #include "codegen/x86/fixup.h"
 #include "codegen/x86/padding.h"
+#include "codegen/x86/registers.h"
+#include "codegen/x86/encode.h"
+#include "codegen/x86/operations.h"
+#include "codegen/x86/detect.h"
+#include "codegen/x86/multimethod.h"
 
 #include "util/runtime-array.h"
 #include "util/abort.h"
@@ -33,2210 +38,13 @@ namespace avian {
 namespace codegen {
 namespace x86 {
 
-enum {
-  rax = 0,
-  rcx = 1,
-  rdx = 2,
-  rbx = 3,
-  rsp = 4,
-  rbp = 5,
-  rsi = 6,
-  rdi = 7,
-  r8 = 8,
-  r9 = 9,
-  r10 = 10,
-  r11 = 11,
-  r12 = 12,
-  r13 = 13,
-  r14 = 14,
-  r15 = 15,
-};
-
-enum {
-  xmm0 = r15 + 1,
-  xmm1,
-  xmm2,
-  xmm3,
-  xmm4,
-  xmm5,
-  xmm6,
-  xmm7,
-  xmm8,
-  xmm9,
-  xmm10,
-  xmm11,
-  xmm12,
-  xmm13,
-  xmm14,
-  xmm15,
-};
-
-const unsigned GeneralRegisterMask
-= TargetBytesPerWord == 4 ? 0x000000ff : 0x0000ffff;
-
-const unsigned FloatRegisterMask
-= TargetBytesPerWord == 4 ? 0x00ff0000 : 0xffff0000;
-
 const RegisterFile MyRegisterFile(GeneralRegisterMask, FloatRegisterMask);
 
 const unsigned FrameHeaderSize = (UseFramePointer ? 2 : 1);
 
-const int LongJumpRegister = r10;
-
 const unsigned StackAlignmentInBytes = 16;
 const unsigned StackAlignmentInWords = StackAlignmentInBytes / TargetBytesPerWord;
 
-class Task;
-class AlignmentPadding;
-
-unsigned
-padding(AlignmentPadding* p, unsigned index, unsigned offset,
-        AlignmentPadding* limit);
-
-void
-copy(System* s, void* dst, int64_t src, unsigned size)
-{
-  switch (size) {
-  case 4: {
-    int32_t v = src;
-    memcpy(dst, &v, 4);
-  } break;
-
-  case 8: {
-    int64_t v = src;
-    memcpy(dst, &v, 8);
-  } break;
-
-  default: abort(s);
-  }
-}
-
-class ImmediateListener: public Promise::Listener {
- public:
-  ImmediateListener(System* s, void* dst, unsigned size, unsigned offset):
-    s(s), dst(dst), size(size), offset(offset)
-  { }
-
-  virtual bool resolve(int64_t value, void** location) {
-    copy(s, dst, value, size);
-    if (location) *location = static_cast<uint8_t*>(dst) + offset;
-    return offset == 0;
-  }
-
-  System* s;
-  void* dst;
-  unsigned size;
-  unsigned offset;
-};
-
-class ImmediateTask: public Task {
- public:
-  ImmediateTask(Task* next, Promise* promise, Promise* offset, unsigned size,
-                unsigned promiseOffset):
-    Task(next),
-    promise(promise),
-    offset(offset),
-    size(size),
-    promiseOffset(promiseOffset)
-  { }
-
-  virtual void run(Context* c) {
-    if (promise->resolved()) {
-      copy(c->s, c->result + offset->value(), promise->value(), size);
-    } else {
-      new (promise->listen(sizeof(ImmediateListener))) ImmediateListener
-        (c->s, c->result + offset->value(), size, promiseOffset);
-    }
-  }
-
-  Promise* promise;
-  Promise* offset;
-  unsigned size;
-  unsigned promiseOffset;
-};
-
-void
-appendImmediateTask(Context* c, Promise* promise, Promise* offset,
-                    unsigned size, unsigned promiseOffset = 0)
-{
-  c->tasks = new(c->zone) ImmediateTask
-    (c->tasks, promise, offset, size, promiseOffset);
-}
-
-extern "C" bool
-detectFeature(unsigned ecx, unsigned edx);
-
-bool
-useSSE(ArchitectureContext* c)
-{
-  if (TargetBytesPerWord == 8) {
-    // amd64 implies SSE2 support
-    return true;
-  } else if (c->useNativeFeatures) {
-    static int supported = -1;
-    if (supported == -1) {
-      supported = detectFeature(0, 0x2000000) // SSE 1
-        and detectFeature(0, 0x4000000); // SSE 2
-    }
-    return supported;
-  } else {
-    return false;
-  }
-}
-
-#define REX_W 0x48
-#define REX_R 0x44
-#define REX_X 0x42
-#define REX_B 0x41
-#define REX_NONE 0x40
-
-void maybeRex(Context* c, unsigned size, int a, int index, int base,
-              bool always)
-{
-  if (TargetBytesPerWord == 8) {
-    uint8_t byte;
-    if (size == 8) {
-      byte = REX_W;
-    } else {
-      byte = REX_NONE;
-    }
-    if (a != lir::NoRegister and (a & 8)) byte |= REX_R;
-    if (index != lir::NoRegister and (index & 8)) byte |= REX_X;
-    if (base != lir::NoRegister and (base & 8)) byte |= REX_B;
-    if (always or byte != REX_NONE) c->code.append(byte);
-  }
-}
-
-void
-maybeRex(Context* c, unsigned size, lir::Register* a,
-         lir::Register* b)
-{
-  maybeRex(c, size, a->low, lir::NoRegister, b->low, false);
-}
-
-void
-alwaysRex(Context* c, unsigned size, lir::Register* a,
-          lir::Register* b)
-{
-  maybeRex(c, size, a->low, lir::NoRegister, b->low, true);
-}
-
-void
-maybeRex(Context* c, unsigned size, lir::Register* a)
-{
-  maybeRex(c, size, lir::NoRegister, lir::NoRegister, a->low, false);
-}
-
-void
-maybeRex(Context* c, unsigned size, lir::Register* a,
-         lir::Memory* b)
-{
-  maybeRex(c, size, a->low, b->index, b->base, size == 1 and (a->low & 4));
-}
-
-void
-maybeRex(Context* c, unsigned size, lir::Memory* a)
-{
-  maybeRex(c, size, lir::NoRegister, a->index, a->base, false);
-}
-
-int
-regCode(int a)
-{
-  return a & 7;
-}
-
-int
-regCode(lir::Register* a)
-{
-  return regCode(a->low);
-}
-
-void
-modrm(Context* c, uint8_t mod, int a, int b)
-{
-  c->code.append(mod | (regCode(b) << 3) | regCode(a));
-}
-
-void
-modrm(Context* c, uint8_t mod, lir::Register* a, lir::Register* b)
-{
-  modrm(c, mod, a->low, b->low);
-}
-
-void
-sib(Context* c, unsigned scale, int index, int base)
-{
-  c->code.append((log(scale) << 6) | (regCode(index) << 3) | regCode(base));
-}
-
-void
-modrmSib(Context* c, int width, int a, int scale, int index, int base)
-{
-  if (index == lir::NoRegister) {
-    modrm(c, width, base, a);
-    if (regCode(base) == rsp) {
-      sib(c, 0x00, rsp, rsp);
-    }
-  } else {
-    modrm(c, width, rsp, a);
-    sib(c, scale, index, base);
-  }
-}
-
-void
-modrmSibImm(Context* c, int a, int scale, int index, int base, int offset)
-{
-  if (offset == 0 and regCode(base) != rbp) {
-    modrmSib(c, 0x00, a, scale, index, base);
-  } else if (vm::fitsInInt8(offset)) {
-    modrmSib(c, 0x40, a, scale, index, base);
-    c->code.append(offset);
-  } else {
-    modrmSib(c, 0x80, a, scale, index, base);
-    c->code.append4(offset);
-  }
-}
-  
-
-void
-modrmSibImm(Context* c, lir::Register* a, lir::Memory* b)
-{
-  modrmSibImm(c, a->low, b->scale, b->index, b->base, b->offset);
-}
-
-void
-opcode(Context* c, uint8_t op)
-{
-  c->code.append(op);
-}
-
-void
-opcode(Context* c, uint8_t op1, uint8_t op2)
-{
-  c->code.append(op1);
-  c->code.append(op2);
-}
-
-void
-return_(Context* c)
-{
-  opcode(c, 0xc3);
-}
-
-void
-trap(Context* c)
-{
-  opcode(c, 0xcc);
-}
-
-void
-ignore(Context*)
-{ }
-
-void
-storeLoadBarrier(Context* c)
-{
-  if (useSSE(c->ac)) {
-    // mfence:
-    c->code.append(0x0f);
-    c->code.append(0xae);
-    c->code.append(0xf0);
-  } else {
-    // lock addq $0x0,(%rsp):
-    c->code.append(0xf0);
-    if (TargetBytesPerWord == 8) {
-      c->code.append(0x48);
-    }
-    c->code.append(0x83);
-    c->code.append(0x04);
-    c->code.append(0x24);
-    c->code.append(0x00);    
-  }
-}
-
-void
-unconditional(Context* c, unsigned jump, lir::Constant* a)
-{
-  appendOffsetTask(c, a->value, offsetPromise(c), 5);
-
-  opcode(c, jump);
-  c->code.append4(0);
-}
-
-void
-conditional(Context* c, unsigned condition, lir::Constant* a)
-{
-  appendOffsetTask(c, a->value, offsetPromise(c), 6);
-  
-  opcode(c, 0x0f, condition);
-  c->code.append4(0);
-}
-
-unsigned
-index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
-{
-  return operation + (lir::UnaryOperationCount * operand);
-}
-
-unsigned
-index(ArchitectureContext*, lir::BinaryOperation operation,
-      lir::OperandType operand1,
-      lir::OperandType operand2)
-{
-  return operation
-    + ((lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount) * operand1)
-    + ((lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount)
-       * lir::OperandTypeCount * operand2);
-}
-
-unsigned
-index(ArchitectureContext* c UNUSED, lir::TernaryOperation operation,
-      lir::OperandType operand1, lir::OperandType operand2)
-{
-  assert(c, not isBranch(operation));
-
-  return lir::BinaryOperationCount + operation
-    + ((lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount) * operand1)
-    + ((lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount)
-       * lir::OperandTypeCount * operand2);
-}
-
-unsigned
-branchIndex(ArchitectureContext* c UNUSED, lir::OperandType operand1,
-            lir::OperandType operand2)
-{
-  return operand1 + (lir::OperandTypeCount * operand2);
-}
-
-void
-moveCR(Context* c, unsigned aSize, lir::Constant* a,
-       unsigned bSize, lir::Register* b);
-
-void
-moveCR2(Context*, unsigned, lir::Constant*, unsigned,
-        lir::Register*, unsigned);
-
-void
-callR(Context*, unsigned, lir::Register*);
-
-void
-callC(Context* c, unsigned size UNUSED, lir::Constant* a)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  unconditional(c, 0xe8, a);
-}
-
-void
-longCallC(Context* c, unsigned size, lir::Constant* a)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  if (TargetBytesPerWord == 8) {
-    lir::Register r(LongJumpRegister);
-    moveCR2(c, size, a, size, &r, 11);
-    callR(c, size, &r);
-  } else {
-    callC(c, size, a);
-  }
-}
-
-void
-jumpR(Context* c, unsigned size UNUSED, lir::Register* a)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  maybeRex(c, 4, a);
-  opcode(c, 0xff, 0xe0 + regCode(a));
-}
-
-void
-jumpC(Context* c, unsigned size UNUSED, lir::Constant* a)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  unconditional(c, 0xe9, a);
-}
-
-void
-jumpM(Context* c, unsigned size UNUSED, lir::Memory* a)
-{
-  assert(c, size == TargetBytesPerWord);
-  
-  maybeRex(c, 4, a);
-  opcode(c, 0xff);
-  modrmSibImm(c, rsp, a->scale, a->index, a->base, a->offset);
-}
-
-void
-longJumpC(Context* c, unsigned size, lir::Constant* a)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  if (TargetBytesPerWord == 8) {
-    lir::Register r(LongJumpRegister);
-    moveCR2(c, size, a, size, &r, 11);
-    jumpR(c, size, &r);
-  } else {
-    jumpC(c, size, a);
-  }
-}
-
-void
-callR(Context* c, unsigned size UNUSED, lir::Register* a)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  // maybeRex.W has no meaning here so we disable it
-  maybeRex(c, 4, a);
-  opcode(c, 0xff, 0xd0 + regCode(a));
-}
-
-void
-callM(Context* c, unsigned size UNUSED, lir::Memory* a)
-{
-  assert(c, size == TargetBytesPerWord);
-  
-  maybeRex(c, 4, a);
-  opcode(c, 0xff);
-  modrmSibImm(c, rdx, a->scale, a->index, a->base, a->offset);
-}
-
-void
-alignedCallC(Context* c, unsigned size, lir::Constant* a)
-{
-  new(c->zone) AlignmentPadding(c, 1, 4);
-  callC(c, size, a);
-}
-
-void
-alignedLongCallC(Context* c, unsigned size, lir::Constant* a)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  if (TargetBytesPerWord == 8) {
-    new (c->zone) AlignmentPadding(c, 2, 8);
-    longCallC(c, size, a);
-  } else {
-    alignedCallC(c, size, a);
-  }
-}
-
-void
-alignedJumpC(Context* c, unsigned size, lir::Constant* a)
-{
-  new (c->zone) AlignmentPadding(c, 1, 4);
-  jumpC(c, size, a);
-}
-
-void
-alignedLongJumpC(Context* c, unsigned size, lir::Constant* a)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  if (TargetBytesPerWord == 8) {
-    new (c->zone) AlignmentPadding(c, 2, 8);
-    longJumpC(c, size, a);
-  } else {
-    alignedJumpC(c, size, a);
-  }
-}
-
-void
-pushR(Context* c, unsigned size, lir::Register* a)
-{
-  if (TargetBytesPerWord == 4 and size == 8) {
-    lir::Register ah(a->high);
-
-    pushR(c, 4, &ah);
-    pushR(c, 4, a);
-  } else {
-    maybeRex(c, 4, a);
-    opcode(c, 0x50 + regCode(a));
-  }
-}
-
-void
-moveRR(Context* c, unsigned aSize, lir::Register* a,
-       unsigned bSize, lir::Register* b);
-
-void
-popR(Context* c, unsigned size, lir::Register* a)
-{
-  if (TargetBytesPerWord == 4 and size == 8) {
-    lir::Register ah(a->high);
-
-    popR(c, 4, a);
-    popR(c, 4, &ah);
-  } else {
-    maybeRex(c, 4, a);
-    opcode(c, 0x58 + regCode(a));
-    if (TargetBytesPerWord == 8 and size == 4) {
-      moveRR(c, 4, a, 8, a);
-    }
-  }
-}
-
-void
-addCarryCR(Context* c, unsigned size, lir::Constant* a,
-           lir::Register* b);
-
-void
-negateR(Context* c, unsigned size, lir::Register* a)
-{
-  if (TargetBytesPerWord == 4 and size == 8) {
-    assert(c, a->low == rax and a->high == rdx);
-
-    ResolvedPromise zeroPromise(0);
-    lir::Constant zero(&zeroPromise);
-
-    lir::Register ah(a->high);
-
-    negateR(c, 4, a);
-    addCarryCR(c, 4, &zero, &ah);
-    negateR(c, 4, &ah);
-  } else {
-    maybeRex(c, size, a);
-    opcode(c, 0xf7, 0xd8 + regCode(a));
-  }
-}
-
-void
-negateRR(Context* c, unsigned aSize, lir::Register* a,
-         unsigned bSize UNUSED, lir::Register* b UNUSED)
-{
-  assert(c, aSize == bSize);
-
-  negateR(c, aSize, a);
-}
-
-void
-moveCR2(Context* c, UNUSED unsigned aSize, lir::Constant* a,
-        UNUSED unsigned bSize, lir::Register* b, unsigned promiseOffset)
-{
-  if (TargetBytesPerWord == 4 and bSize == 8) {
-    int64_t v = a->value->value();
-
-    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-    lir::Constant ah(&high);
-
-    ResolvedPromise low(v & 0xFFFFFFFF);
-    lir::Constant al(&low);
-
-    lir::Register bh(b->high);
-
-    moveCR(c, 4, &al, 4, b);
-    moveCR(c, 4, &ah, 4, &bh);
-  } else {
-    maybeRex(c, TargetBytesPerWord, b);
-    opcode(c, 0xb8 + regCode(b));
-    if (a->value->resolved()) {
-      c->code.appendTargetAddress(a->value->value());
-    } else {
-      appendImmediateTask
-        (c, a->value, offsetPromise(c), TargetBytesPerWord, promiseOffset);
-      c->code.appendTargetAddress(static_cast<target_uintptr_t>(0));
-    }
-  }
-}
-
-bool
-floatReg(lir::Register* a)
-{
-  return a->low >= xmm0;
-}
-
-void
-sseMoveRR(Context* c, unsigned aSize, lir::Register* a,
-          unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize >= 4);
-  assert(c, aSize == bSize);
-
-  if (floatReg(a) and floatReg(b)) {
-    if (aSize == 4) {
-      opcode(c, 0xf3);
-      maybeRex(c, 4, a, b);
-      opcode(c, 0x0f, 0x10);
-      modrm(c, 0xc0, a, b);
-    } else {
-      opcode(c, 0xf2);
-      maybeRex(c, 4, b, a);
-      opcode(c, 0x0f, 0x10);
-      modrm(c, 0xc0, a, b);
-    } 
-  } else if (floatReg(a)) {
-    opcode(c, 0x66);
-    maybeRex(c, aSize, a, b);
-    opcode(c, 0x0f, 0x7e);
-    modrm(c, 0xc0, b, a);  	
-  } else {
-    opcode(c, 0x66);
-    maybeRex(c, aSize, b, a);
-    opcode(c, 0x0f, 0x6e);
-    modrm(c, 0xc0, a, b);  	
-  }
-}
-
-void
-sseMoveCR(Context* c, unsigned aSize, lir::Constant* a,
-          unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize <= TargetBytesPerWord);
-  lir::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
-  moveCR2(c, aSize, a, aSize, &tmp, 0);
-  sseMoveRR(c, aSize, &tmp, bSize, b);
-  c->client->releaseTemporary(tmp.low);
-}
-
-void
-moveCR(Context* c, unsigned aSize, lir::Constant* a,
-       unsigned bSize, lir::Register* b)
-{
-  if (floatReg(b)) {
-    sseMoveCR(c, aSize, a, bSize, b);
-  } else {
-    moveCR2(c, aSize, a, bSize, b, 0);
-  }
-}
-
-void
-swapRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
-       unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-  assert(c, aSize == TargetBytesPerWord);
-  
-  alwaysRex(c, aSize, a, b);
-  opcode(c, 0x87);
-  modrm(c, 0xc0, b, a);
-}
-
-void
-moveRR(Context* c, unsigned aSize, lir::Register* a,
-       UNUSED unsigned bSize, lir::Register* b)
-{
-  if (floatReg(a) or floatReg(b)) {
-    sseMoveRR(c, aSize, a, bSize, b);
-    return;
-  }
-  
-  if (TargetBytesPerWord == 4 and aSize == 8 and bSize == 8) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    if (a->high == b->low) {
-      if (a->low == b->high) {
-        swapRR(c, 4, a, 4, b);
-      } else {
-        moveRR(c, 4, &ah, 4, &bh);
-        moveRR(c, 4, a, 4, b);
-      }
-    } else {
-      moveRR(c, 4, a, 4, b);
-      moveRR(c, 4, &ah, 4, &bh);
-    }
-  } else {
-    switch (aSize) {
-    case 1:
-      if (TargetBytesPerWord == 4 and a->low > rbx) {
-        assert(c, b->low <= rbx);
-
-        moveRR(c, TargetBytesPerWord, a, TargetBytesPerWord, b);
-        moveRR(c, 1, b, TargetBytesPerWord, b);
-      } else {
-        alwaysRex(c, aSize, b, a);
-        opcode(c, 0x0f, 0xbe);
-        modrm(c, 0xc0, a, b);
-      }
-      break;
-
-    case 2:
-      alwaysRex(c, aSize, b, a);
-      opcode(c, 0x0f, 0xbf);
-      modrm(c, 0xc0, a, b);
-      break;
-
-    case 4:
-      if (bSize == 8) {
-      	if (TargetBytesPerWord == 8) {
-          alwaysRex(c, bSize, b, a);
-          opcode(c, 0x63);
-          modrm(c, 0xc0, a, b);
-      	} else {
-      	  if (a->low == rax and b->low == rax and b->high == rdx) {
-      	  	opcode(c, 0x99); //cdq
-      	  } else {
-            assert(c, b->low == rax and b->high == rdx);
-
-            moveRR(c, 4, a, 4, b);
-            moveRR(c, 4, b, 8, b);
-          }
-        }
-      } else {
-        if (a->low != b->low) {
-          alwaysRex(c, aSize, a, b);
-          opcode(c, 0x89);
-          modrm(c, 0xc0, b, a);
-        }
-      }
-      break; 
-      
-    case 8:
-      if (a->low != b->low){
-        maybeRex(c, aSize, a, b);
-        opcode(c, 0x89);
-        modrm(c, 0xc0, b, a);
-      }
-      break;
-    }
-  }
-}
-
-void
-sseMoveMR(Context* c, unsigned aSize, lir::Memory* a,
-          unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize >= 4);
-
-  if (TargetBytesPerWord == 4 and aSize == 8) {
-    opcode(c, 0xf3);
-    opcode(c, 0x0f, 0x7e);
-    modrmSibImm(c, b, a);
-  } else {
-    opcode(c, 0x66);
-    maybeRex(c, aSize, b, a);
-    opcode(c, 0x0f, 0x6e);
-    modrmSibImm(c, b, a);
-  }
-}
-
-void
-moveMR(Context* c, unsigned aSize, lir::Memory* a,
-       unsigned bSize, lir::Register* b)
-{
-  if (floatReg(b)) {
-    sseMoveMR(c, aSize, a, bSize, b);
-    return;
-  }
-  
-  switch (aSize) {
-  case 1:
-    maybeRex(c, bSize, b, a);
-    opcode(c, 0x0f, 0xbe);
-    modrmSibImm(c, b, a);
-    break;
-
-  case 2:
-    maybeRex(c, bSize, b, a);
-    opcode(c, 0x0f, 0xbf);
-    modrmSibImm(c, b, a);
-    break;
-
-  case 4:
-    if (TargetBytesPerWord == 8) {
-      maybeRex(c, bSize, b, a);
-      opcode(c, 0x63);
-      modrmSibImm(c, b, a);
-    } else {
-      if (bSize == 8) {
-        assert(c, b->low == rax and b->high == rdx);
-        
-        moveMR(c, 4, a, 4, b);
-        moveRR(c, 4, b, 8, b);
-      } else {
-        maybeRex(c, bSize, b, a);
-        opcode(c, 0x8b);
-        modrmSibImm(c, b, a);
-      }
-    }
-    break;
-    
-  case 8:
-    if (TargetBytesPerWord == 4 and bSize == 8) {
-      lir::Memory ah(a->base, a->offset + 4, a->index, a->scale);
-      lir::Register bh(b->high);
-
-      moveMR(c, 4, a, 4, b);    
-      moveMR(c, 4, &ah, 4, &bh);
-    } else {
-      maybeRex(c, bSize, b, a);
-      opcode(c, 0x8b);
-      modrmSibImm(c, b, a);
-    }
-    break;
-
-  default: abort(c);
-  }
-}
-
-void
-sseMoveRM(Context* c, unsigned aSize, lir::Register* a,
-       UNUSED unsigned bSize, lir::Memory* b)
-{
-  assert(c, aSize >= 4);
-  assert(c, aSize == bSize);
-
-  if (TargetBytesPerWord == 4 and aSize == 8) {
-    opcode(c, 0x66);
-    opcode(c, 0x0f, 0xd6);
-    modrmSibImm(c, a, b);
-  } else {
-    opcode(c, 0x66);
-    maybeRex(c, aSize, a, b);
-    opcode(c, 0x0f, 0x7e);
-    modrmSibImm(c, a, b);
-  }
-}
-
-void
-moveRM(Context* c, unsigned aSize, lir::Register* a,
-       unsigned bSize UNUSED, lir::Memory* b)
-{
-  assert(c, aSize == bSize);
-  
-  if (floatReg(a)) {
-    sseMoveRM(c, aSize, a, bSize, b);
-    return;
-  }
-  
-  switch (aSize) {
-  case 1:
-    maybeRex(c, bSize, a, b);
-    opcode(c, 0x88);
-    modrmSibImm(c, a, b);
-    break;
-
-  case 2:
-    opcode(c, 0x66);
-    maybeRex(c, bSize, a, b);
-    opcode(c, 0x89);
-    modrmSibImm(c, a, b);
-    break;
-
-  case 4:
-    if (TargetBytesPerWord == 8) {
-      maybeRex(c, bSize, a, b);
-      opcode(c, 0x89);
-      modrmSibImm(c, a, b);
-      break;
-    } else {
-      opcode(c, 0x89);
-      modrmSibImm(c, a, b);
-    }
-    break;
-    
-  case 8:
-    if (TargetBytesPerWord == 8) {
-      maybeRex(c, bSize, a, b);
-      opcode(c, 0x89);
-      modrmSibImm(c, a, b);
-    } else {
-      lir::Register ah(a->high);
-      lir::Memory bh(b->base, b->offset + 4, b->index, b->scale);
-
-      moveRM(c, 4, a, 4, b);    
-      moveRM(c, 4, &ah, 4, &bh);
-    }
-    break;
-
-  default: abort(c);
-  }
-}
-
-void
-moveAR(Context* c, unsigned aSize, lir::Address* a,
-       unsigned bSize, lir::Register* b)
-{
-  assert(c, TargetBytesPerWord == 8 or (aSize == 4 and bSize == 4));
-
-  lir::Constant constant(a->address);
-  lir::Memory memory(b->low, 0, -1, 0);
-
-  moveCR(c, aSize, &constant, bSize, b);
-  moveMR(c, bSize, &memory, bSize, b);
-}
-
-ShiftMaskPromise*
-shiftMaskPromise(Context* c, Promise* base, unsigned shift, int64_t mask)
-{
-  return new(c->zone) ShiftMaskPromise(base, shift, mask);
-}
-
-void
-moveCM(Context* c, unsigned aSize UNUSED, lir::Constant* a,
-       unsigned bSize, lir::Memory* b)
-{
-  switch (bSize) {
-  case 1:
-    maybeRex(c, bSize, b);
-    opcode(c, 0xc6);
-    modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
-    c->code.append(a->value->value());
-    break;
-
-  case 2:
-    opcode(c, 0x66);
-    maybeRex(c, bSize, b);
-    opcode(c, 0xc7);
-    modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
-    c->code.append2(a->value->value());
-    break;
-
-  case 4:
-    maybeRex(c, bSize, b);
-    opcode(c, 0xc7);
-    modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
-    if (a->value->resolved()) {
-      c->code.append4(a->value->value());
-    } else {
-      appendImmediateTask(c, a->value, offsetPromise(c), 4);
-      c->code.append4(0);
-    }
-    break;
-
-  case 8: {
-    if (TargetBytesPerWord == 8) {
-      if (a->value->resolved() and vm::fitsInInt32(a->value->value())) {
-        maybeRex(c, bSize, b);
-        opcode(c, 0xc7);
-        modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
-        c->code.append4(a->value->value());
-      } else {
-        lir::Register tmp
-          (c->client->acquireTemporary(GeneralRegisterMask));
-        moveCR(c, 8, a, 8, &tmp);
-        moveRM(c, 8, &tmp, 8, b);
-        c->client->releaseTemporary(tmp.low);
-      }
-    } else {
-      lir::Constant ah(shiftMaskPromise(c, a->value, 32, 0xFFFFFFFF));
-      lir::Constant al(shiftMaskPromise(c, a->value, 0, 0xFFFFFFFF));
-
-      lir::Memory bh(b->base, b->offset + 4, b->index, b->scale);
-
-      moveCM(c, 4, &al, 4, b);
-      moveCM(c, 4, &ah, 4, &bh);
-    }
-  } break;
-
-  default: abort(c);
-  }
-}
-
-void
-moveZRR(Context* c, unsigned aSize, lir::Register* a,
-        unsigned bSize UNUSED, lir::Register* b)
-{
-  switch (aSize) {
-  case 2:
-    alwaysRex(c, aSize, b, a);
-    opcode(c, 0x0f, 0xb7);
-    modrm(c, 0xc0, a, b);
-    break;
-
-  default: abort(c);
-  }
-}
-
-void
-moveZMR(Context* c, unsigned aSize UNUSED, lir::Memory* a,
-        unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, bSize == TargetBytesPerWord);
-  assert(c, aSize == 2);
-  
-  maybeRex(c, bSize, b, a);
-  opcode(c, 0x0f, 0xb7);
-  modrmSibImm(c, b->low, a->scale, a->index, a->base, a->offset);
-}
-
-void
-addCarryRR(Context* c, unsigned size, lir::Register* a,
-           lir::Register* b)
-{
-  assert(c, TargetBytesPerWord == 8 or size == 4);
-  
-  maybeRex(c, size, a, b);
-  opcode(c, 0x11);
-  modrm(c, 0xc0, b, a);
-}
-
-void
-addRR(Context* c, unsigned aSize, lir::Register* a,
-      unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  if (TargetBytesPerWord == 4 and aSize == 8) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    addRR(c, 4, a, 4, b);
-    addCarryRR(c, 4, &ah, &bh);
-  } else {
-    maybeRex(c, aSize, a, b);
-    opcode(c, 0x01);
-    modrm(c, 0xc0, b, a);
-  }
-}
-
-void
-addCarryCR(Context* c, unsigned size, lir::Constant* a,
-           lir::Register* b)
-{
-  
-  int64_t v = a->value->value();
-  maybeRex(c, size, b);
-  if (vm::fitsInInt8(v)) {
-    opcode(c, 0x83, 0xd0 + regCode(b));
-    c->code.append(v);
-  } else {
-    opcode(c, 0x81, 0xd0 + regCode(b));
-    c->code.append4(v);
-  }
-}
-
-void
-addCR(Context* c, unsigned aSize, lir::Constant* a,
-      unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  int64_t v = a->value->value();
-  if (v) {
-    if (TargetBytesPerWord == 4 and bSize == 8) {
-      ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-      lir::Constant ah(&high);
-
-      ResolvedPromise low(v & 0xFFFFFFFF);
-      lir::Constant al(&low);
-
-      lir::Register bh(b->high);
-
-      addCR(c, 4, &al, 4, b);
-      addCarryCR(c, 4, &ah, &bh);
-    } else {
-      if (vm::fitsInInt32(v)) {
-        maybeRex(c, aSize, b);
-        if (vm::fitsInInt8(v)) {
-          opcode(c, 0x83, 0xc0 + regCode(b));
-          c->code.append(v);
-        } else {
-          opcode(c, 0x81, 0xc0 + regCode(b));
-          c->code.append4(v);
-        }
-      } else {
-        lir::Register tmp
-          (c->client->acquireTemporary(GeneralRegisterMask));
-        moveCR(c, aSize, a, aSize, &tmp);
-        addRR(c, aSize, &tmp, bSize, b);
-        c->client->releaseTemporary(tmp.low);
-      }
-    }
-  }
-}
-
-void
-subtractBorrowCR(Context* c, unsigned size UNUSED, lir::Constant* a,
-                 lir::Register* b)
-{
-  assert(c, TargetBytesPerWord == 8 or size == 4);
-  
-  int64_t v = a->value->value();
-  if (vm::fitsInInt8(v)) {
-    opcode(c, 0x83, 0xd8 + regCode(b));
-    c->code.append(v);
-  } else {
-    opcode(c, 0x81, 0xd8 + regCode(b));
-    c->code.append4(v);
-  }
-}
-
-void
-subtractRR(Context* c, unsigned aSize, lir::Register* a,
-           unsigned bSize, lir::Register* b);
-
-void
-subtractCR(Context* c, unsigned aSize, lir::Constant* a,
-           unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  int64_t v = a->value->value();
-  if (v) {
-    if (TargetBytesPerWord == 4 and bSize == 8) {
-      ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-      lir::Constant ah(&high);
-
-      ResolvedPromise low(v & 0xFFFFFFFF);
-      lir::Constant al(&low);
-
-      lir::Register bh(b->high);
-
-      subtractCR(c, 4, &al, 4, b);
-      subtractBorrowCR(c, 4, &ah, &bh);
-    } else {
-      if (vm::fitsInInt32(v)) {
-        maybeRex(c, aSize, b);
-        if (vm::fitsInInt8(v)) {
-          opcode(c, 0x83, 0xe8 + regCode(b));
-          c->code.append(v);
-        } else {
-          opcode(c, 0x81, 0xe8 + regCode(b));
-          c->code.append4(v);
-        }
-      } else {
-        lir::Register tmp
-          (c->client->acquireTemporary(GeneralRegisterMask));
-        moveCR(c, aSize, a, aSize, &tmp);
-        subtractRR(c, aSize, &tmp, bSize, b);
-        c->client->releaseTemporary(tmp.low);
-      }
-    }
-  }
-}
-
-void
-subtractBorrowRR(Context* c, unsigned size, lir::Register* a,
-                 lir::Register* b)
-{
-  assert(c, TargetBytesPerWord == 8 or size == 4);
-  
-  maybeRex(c, size, a, b);
-  opcode(c, 0x19);
-  modrm(c, 0xc0, b, a);
-}
-
-void
-subtractRR(Context* c, unsigned aSize, lir::Register* a,
-           unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-  
-  if (TargetBytesPerWord == 4 and aSize == 8) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    subtractRR(c, 4, a, 4, b);
-    subtractBorrowRR(c, 4, &ah, &bh);
-  } else {
-    maybeRex(c, aSize, a, b);
-    opcode(c, 0x29);
-    modrm(c, 0xc0, b, a);
-  }
-}
-
-void
-andRR(Context* c, unsigned aSize, lir::Register* a,
-      unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-
-  if (TargetBytesPerWord == 4 and aSize == 8) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    andRR(c, 4, a, 4, b);
-    andRR(c, 4, &ah, 4, &bh);
-  } else {
-    maybeRex(c, aSize, a, b);
-    opcode(c, 0x21);
-    modrm(c, 0xc0, b, a);
-  }
-}
-
-void
-andCR(Context* c, unsigned aSize, lir::Constant* a,
-      unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  int64_t v = a->value->value();
-
-  if (TargetBytesPerWord == 4 and bSize == 8) {
-    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-    lir::Constant ah(&high);
-
-    ResolvedPromise low(v & 0xFFFFFFFF);
-    lir::Constant al(&low);
-
-    lir::Register bh(b->high);
-
-    andCR(c, 4, &al, 4, b);
-    andCR(c, 4, &ah, 4, &bh);
-  } else {
-    if (vm::fitsInInt32(v)) {
-      maybeRex(c, aSize, b);
-      if (vm::fitsInInt8(v)) {
-        opcode(c, 0x83, 0xe0 + regCode(b));
-        c->code.append(v);
-      } else {
-        opcode(c, 0x81, 0xe0 + regCode(b));
-        c->code.append4(v);
-      }
-    } else {
-      lir::Register tmp
-        (c->client->acquireTemporary(GeneralRegisterMask));
-      moveCR(c, aSize, a, aSize, &tmp);
-      andRR(c, aSize, &tmp, bSize, b);
-      c->client->releaseTemporary(tmp.low);
-    }
-  }
-}
-
-void
-orRR(Context* c, unsigned aSize, lir::Register* a,
-     unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  if (TargetBytesPerWord == 4 and aSize == 8) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    orRR(c, 4, a, 4, b);
-    orRR(c, 4, &ah, 4, &bh);
-  } else {
-    maybeRex(c, aSize, a, b);
-    opcode(c, 0x09);
-    modrm(c, 0xc0, b, a);
-  }
-}
-
-void
-orCR(Context* c, unsigned aSize, lir::Constant* a,
-     unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  int64_t v = a->value->value();
-  if (v) {
-    if (TargetBytesPerWord == 4 and bSize == 8) {
-      ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-      lir::Constant ah(&high);
-
-      ResolvedPromise low(v & 0xFFFFFFFF);
-      lir::Constant al(&low);
-
-      lir::Register bh(b->high);
-
-      orCR(c, 4, &al, 4, b);
-      orCR(c, 4, &ah, 4, &bh);
-    } else {
-      if (vm::fitsInInt32(v)) {
-        maybeRex(c, aSize, b);
-        if (vm::fitsInInt8(v)) {
-          opcode(c, 0x83, 0xc8 + regCode(b));
-          c->code.append(v);
-        } else {
-          opcode(c, 0x81, 0xc8 + regCode(b));
-          c->code.append4(v);        
-        }
-      } else {
-        lir::Register tmp
-          (c->client->acquireTemporary(GeneralRegisterMask));
-        moveCR(c, aSize, a, aSize, &tmp);
-        orRR(c, aSize, &tmp, bSize, b);
-        c->client->releaseTemporary(tmp.low);
-      }
-    }
-  }
-}
-
-void
-xorRR(Context* c, unsigned aSize, lir::Register* a,
-      unsigned bSize UNUSED, lir::Register* b)
-{
-  if (TargetBytesPerWord == 4 and aSize == 8) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    xorRR(c, 4, a, 4, b);
-    xorRR(c, 4, &ah, 4, &bh);
-  } else {
-    maybeRex(c, aSize, a, b);
-    opcode(c, 0x31);
-    modrm(c, 0xc0, b, a);
-  }
-}
-
-void
-xorCR(Context* c, unsigned aSize, lir::Constant* a,
-      unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  int64_t v = a->value->value();
-  if (v) {
-    if (TargetBytesPerWord == 4 and bSize == 8) {
-      ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-      lir::Constant ah(&high);
-
-      ResolvedPromise low(v & 0xFFFFFFFF);
-      lir::Constant al(&low);
-
-      lir::Register bh(b->high);
-
-      xorCR(c, 4, &al, 4, b);
-      xorCR(c, 4, &ah, 4, &bh);
-    } else {
-      if (vm::fitsInInt32(v)) {
-        maybeRex(c, aSize, b);
-        if (vm::fitsInInt8(v)) {
-          opcode(c, 0x83, 0xf0 + regCode(b));
-          c->code.append(v);
-        } else {
-          opcode(c, 0x81, 0xf0 + regCode(b));
-          c->code.append4(v);        
-        }
-      } else {
-        lir::Register tmp
-          (c->client->acquireTemporary(GeneralRegisterMask));
-        moveCR(c, aSize, a, aSize, &tmp);
-        xorRR(c, aSize, &tmp, bSize, b);
-        c->client->releaseTemporary(tmp.low);
-      }
-    }
-  }
-}
-
-void
-multiplyRR(Context* c, unsigned aSize, lir::Register* a,
-           unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-
-  if (TargetBytesPerWord == 4 and aSize == 8) {
-    assert(c, b->high == rdx);
-    assert(c, b->low != rax);
-    assert(c, a->low != rax);
-    assert(c, a->high != rax);
-
-    c->client->save(rax);
-
-    lir::Register axdx(rax, rdx);
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    lir::Register tmp(-1);
-    lir::Register* scratch;
-    if (a->low == b->low) {
-      tmp.low = c->client->acquireTemporary
-        (GeneralRegisterMask & ~(1 << rax));
-      scratch = &tmp;
-      moveRR(c, 4, b, 4, scratch);
-    } else {
-      scratch = b;
-    }
-
-    moveRR(c, 4, b, 4, &axdx);
-    multiplyRR(c, 4, &ah, 4, scratch);
-    multiplyRR(c, 4, a, 4, &bh);
-    addRR(c, 4, &bh, 4, scratch);
-    
-    // mul a->low,%eax%edx
-    opcode(c, 0xf7, 0xe0 + a->low);
-    
-    addRR(c, 4, scratch, 4, &bh);
-    moveRR(c, 4, &axdx, 4, b);
-
-    if (tmp.low != -1) {
-      c->client->releaseTemporary(tmp.low);
-    }
-  } else {
-    maybeRex(c, aSize, b, a);
-    opcode(c, 0x0f, 0xaf);
-    modrm(c, 0xc0, a, b);
-  }
-}
-
-void
-branch(Context* c, lir::TernaryOperation op, lir::Constant* target)
-{
-  switch (op) {
-  case lir::JumpIfEqual:
-    conditional(c, 0x84, target);
-    break;
-
-  case lir::JumpIfNotEqual:
-    conditional(c, 0x85, target);
-    break;
-
-  case lir::JumpIfLess:
-    conditional(c, 0x8c, target);
-    break;
-
-  case lir::JumpIfGreater:
-    conditional(c, 0x8f, target);
-    break;
-
-  case lir::JumpIfLessOrEqual:
-    conditional(c, 0x8e, target);
-    break;
-
-  case lir::JumpIfGreaterOrEqual:
-    conditional(c, 0x8d, target);
-    break;
-
-  default:
-    abort(c);
-  }
-}
-
-void
-branchFloat(Context* c, lir::TernaryOperation op, lir::Constant* target)
-{
-  switch (op) {
-  case lir::JumpIfFloatEqual:
-    conditional(c, 0x84, target);
-    break;
-
-  case lir::JumpIfFloatNotEqual:
-    conditional(c, 0x85, target);
-    break;
-
-  case lir::JumpIfFloatLess:
-    conditional(c, 0x82, target);
-    break;
-
-  case lir::JumpIfFloatGreater:
-    conditional(c, 0x87, target);
-    break;
-
-  case lir::JumpIfFloatLessOrEqual:
-    conditional(c, 0x86, target);
-    break;
-
-  case lir::JumpIfFloatGreaterOrEqual:
-    conditional(c, 0x83, target);
-    break;
-
-  case lir::JumpIfFloatLessOrUnordered:
-    conditional(c, 0x82, target);
-    conditional(c, 0x8a, target);
-    break;
-
-  case lir::JumpIfFloatGreaterOrUnordered:
-    conditional(c, 0x87, target);
-    conditional(c, 0x8a, target);
-    break;
-
-  case lir::JumpIfFloatLessOrEqualOrUnordered:
-    conditional(c, 0x86, target);
-    conditional(c, 0x8a, target);
-    break;
-
-  case lir::JumpIfFloatGreaterOrEqualOrUnordered:
-    conditional(c, 0x83, target);
-    conditional(c, 0x8a, target);
-    break;
-
-  default:
-    abort(c);
-  }
-}
-
-void
-compareRR(Context* c, unsigned aSize, lir::Register* a,
-          unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-  assert(c, aSize <= TargetBytesPerWord);
-
-  maybeRex(c, aSize, a, b);
-  opcode(c, 0x39);
-  modrm(c, 0xc0, b, a);  
-}
-
-void
-compareCR(Context* c, unsigned aSize, lir::Constant* a,
-          unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-  assert(c, TargetBytesPerWord == 8 or aSize == 4);
-  
-  if (a->value->resolved() and vm::fitsInInt32(a->value->value())) {
-    int64_t v = a->value->value();
-    maybeRex(c, aSize, b);
-    if (vm::fitsInInt8(v)) {
-      opcode(c, 0x83, 0xf8 + regCode(b));
-      c->code.append(v);
-    } else {
-      opcode(c, 0x81, 0xf8 + regCode(b));
-      c->code.append4(v);
-    }
-  } else {
-    lir::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
-    moveCR(c, aSize, a, aSize, &tmp);
-    compareRR(c, aSize, &tmp, bSize, b);
-    c->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-compareRM(Context* c, unsigned aSize, lir::Register* a,
-          unsigned bSize UNUSED, lir::Memory* b)
-{
-  assert(c, aSize == bSize);
-  assert(c, TargetBytesPerWord == 8 or aSize == 4);
-  
-  if (TargetBytesPerWord == 8 and aSize == 4) {
-    moveRR(c, 4, a, 8, a);
-  }
-  maybeRex(c, bSize, a, b);
-  opcode(c, 0x39);
-  modrmSibImm(c, a, b);
-}
-
-void
-compareCM(Context* c, unsigned aSize, lir::Constant* a,
-          unsigned bSize, lir::Memory* b)
-{
-  assert(c, aSize == bSize);
-  assert(c, TargetBytesPerWord == 8 or aSize == 4);
-  
-  if (a->value->resolved()) { 
-    int64_t v = a->value->value();   
-    maybeRex(c, aSize, b);
-    opcode(c, vm::fitsInInt8(v) ? 0x83 : 0x81);
-    modrmSibImm(c, rdi, b->scale, b->index, b->base, b->offset);
-    
-    if (vm::fitsInInt8(v)) {
-      c->code.append(v);
-    } else if (vm::fitsInInt32(v)) {
-      c->code.append4(v);
-    } else {
-      abort(c);
-    }
-  } else {
-    lir::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
-    moveCR(c, aSize, a, bSize, &tmp);
-    compareRM(c, bSize, &tmp, bSize, b);
-    c->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-compareFloatRR(Context* c, unsigned aSize, lir::Register* a,
-               unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  if (aSize == 8) {
-    opcode(c, 0x66);
-  }
-  maybeRex(c, 4, a, b);
-  opcode(c, 0x0f, 0x2e);
-  modrm(c, 0xc0, a, b);
-}
-
-void
-branchLong(Context* c, lir::TernaryOperation op, lir::Operand* al,
-           lir::Operand* ah, lir::Operand* bl,
-           lir::Operand* bh, lir::Constant* target,
-           BinaryOperationType compare)
-{
-  compare(c, 4, ah, 4, bh);
-  
-  unsigned next = 0;
-
-  switch (op) {
-  case lir::JumpIfEqual:
-    opcode(c, 0x75); // jne
-    next = c->code.length();
-    c->code.append(0);
-
-    compare(c, 4, al, 4, bl);
-    conditional(c, 0x84, target); // je
-    break;
-
-  case lir::JumpIfNotEqual:
-    conditional(c, 0x85, target); // jne
-
-    compare(c, 4, al, 4, bl);
-    conditional(c, 0x85, target); // jne
-    break;
-
-  case lir::JumpIfLess:
-    conditional(c, 0x8c, target); // jl
-
-    opcode(c, 0x7f); // jg
-    next = c->code.length();
-    c->code.append(0);
-
-    compare(c, 4, al, 4, bl);
-    conditional(c, 0x82, target); // jb
-    break;
-
-  case lir::JumpIfGreater:
-    conditional(c, 0x8f, target); // jg
-
-    opcode(c, 0x7c); // jl
-    next = c->code.length();
-    c->code.append(0);
-
-    compare(c, 4, al, 4, bl);
-    conditional(c, 0x87, target); // ja
-    break;
-
-  case lir::JumpIfLessOrEqual:
-    conditional(c, 0x8c, target); // jl
-
-    opcode(c, 0x7f); // jg
-    next = c->code.length();
-    c->code.append(0);
-
-    compare(c, 4, al, 4, bl);
-    conditional(c, 0x86, target); // jbe
-    break;
-
-  case lir::JumpIfGreaterOrEqual:
-    conditional(c, 0x8f, target); // jg
-
-    opcode(c, 0x7c); // jl
-    next = c->code.length();
-    c->code.append(0);
-
-    compare(c, 4, al, 4, bl);
-    conditional(c, 0x83, target); // jae
-    break;
-
-  default:
-    abort(c);
-  }  
-
-  if (next) {
-    int8_t nextOffset = c->code.length() - next - 1;
-    c->code.set(next, &nextOffset, 1);
-  }
-}
-
-void
-branchRR(Context* c, lir::TernaryOperation op, unsigned size,
-         lir::Register* a, lir::Register* b,
-         lir::Constant* target)
-{
-  if (isFloatBranch(op)) {
-    compareFloatRR(c, size, a, size, b);
-    branchFloat(c, op, target);
-  } else if (size > TargetBytesPerWord) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    branchLong(c, op, a, &ah, b, &bh, target, CAST2(compareRR));
-  } else {
-    compareRR(c, size, a, size, b);
-    branch(c, op, target);
-  }
-}
-
-void
-branchCR(Context* c, lir::TernaryOperation op, unsigned size,
-         lir::Constant* a, lir::Register* b,
-         lir::Constant* target)
-{
-  assert(c, not isFloatBranch(op));
-
-  if (size > TargetBytesPerWord) {
-    int64_t v = a->value->value();
-
-    ResolvedPromise low(v & ~static_cast<uintptr_t>(0));
-    lir::Constant al(&low);
-  
-    ResolvedPromise high((v >> 32) & ~static_cast<uintptr_t>(0));
-    lir::Constant ah(&high);
-  
-    lir::Register bh(b->high);
-
-    branchLong(c, op, &al, &ah, b, &bh, target, CAST2(compareCR));
-  } else {
-    compareCR(c, size, a, size, b);
-    branch(c, op, target);
-  }
-}
-
-void
-branchRM(Context* c, lir::TernaryOperation op, unsigned size,
-         lir::Register* a, lir::Memory* b,
-         lir::Constant* target)
-{
-  assert(c, not isFloatBranch(op));
-  assert(c, size <= TargetBytesPerWord);
-
-  compareRM(c, size, a, size, b);
-  branch(c, op, target);
-}
-
-void
-branchCM(Context* c, lir::TernaryOperation op, unsigned size,
-         lir::Constant* a, lir::Memory* b,
-         lir::Constant* target)
-{
-  assert(c, not isFloatBranch(op));
-  assert(c, size <= TargetBytesPerWord);
-
-  compareCM(c, size, a, size, b);
-  branch(c, op, target);
-}
-
-void
-multiplyCR(Context* c, unsigned aSize, lir::Constant* a,
-           unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  if (TargetBytesPerWord == 4 and aSize == 8) {
-    const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
-    lir::Register tmp(c->client->acquireTemporary(mask),
-                            c->client->acquireTemporary(mask));
-
-    moveCR(c, aSize, a, aSize, &tmp);
-    multiplyRR(c, aSize, &tmp, bSize, b);
-    c->client->releaseTemporary(tmp.low);
-    c->client->releaseTemporary(tmp.high);
-  } else {
-    int64_t v = a->value->value();
-    if (v != 1) {
-      if (vm::fitsInInt32(v)) {
-        maybeRex(c, bSize, b, b);
-        if (vm::fitsInInt8(v)) {
-          opcode(c, 0x6b);
-          modrm(c, 0xc0, b, b);
-          c->code.append(v);
-        } else {
-          opcode(c, 0x69);
-          modrm(c, 0xc0, b, b);
-          c->code.append4(v);        
-        }
-      } else {
-        lir::Register tmp
-          (c->client->acquireTemporary(GeneralRegisterMask));
-        moveCR(c, aSize, a, aSize, &tmp);
-        multiplyRR(c, aSize, &tmp, bSize, b);
-        c->client->releaseTemporary(tmp.low);      
-      }
-    }
-  }
-}
-
-void
-divideRR(Context* c, unsigned aSize, lir::Register* a,
-         unsigned bSize UNUSED, lir::Register* b UNUSED)
-{
-  assert(c, aSize == bSize);
-
-  assert(c, b->low == rax);
-  assert(c, a->low != rdx);
-
-  c->client->save(rdx);
-
-  maybeRex(c, aSize, a, b);    
-  opcode(c, 0x99); // cdq
-  maybeRex(c, aSize, b, a);
-  opcode(c, 0xf7, 0xf8 + regCode(a));
-}
-
-void
-remainderRR(Context* c, unsigned aSize, lir::Register* a,
-            unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == bSize);
-
-  assert(c, b->low == rax);
-  assert(c, a->low != rdx);
-
-  c->client->save(rdx);
-
-  maybeRex(c, aSize, a, b);    
-  opcode(c, 0x99); // cdq
-  maybeRex(c, aSize, b, a);
-  opcode(c, 0xf7, 0xf8 + regCode(a));
-
-  lir::Register dx(rdx);
-  moveRR(c, TargetBytesPerWord, &dx, TargetBytesPerWord, b);
-}
-
-void
-doShift(Context* c, UNUSED void (*shift)
-        (Context*, unsigned, lir::Register*, unsigned,
-         lir::Register*),
-        int type, UNUSED unsigned aSize, lir::Constant* a,
-        unsigned bSize, lir::Register* b)
-{
-  int64_t v = a->value->value();
-
-  if (TargetBytesPerWord == 4 and bSize == 8) {
-    c->client->save(rcx);
-
-    lir::Register cx(rcx);
-    ResolvedPromise promise(v & 0x3F);
-    lir::Constant masked(&promise);
-    moveCR(c, 4, &masked, 4, &cx);
-    shift(c, aSize, &cx, bSize, b);
-  } else {
-    maybeRex(c, bSize, b);
-    if (v == 1) {
-      opcode(c, 0xd1, type + regCode(b));
-    } else if (vm::fitsInInt8(v)) {
-      opcode(c, 0xc1, type + regCode(b));
-      c->code.append(v);
-    } else {
-      abort(c);
-    }
-  }
-}
-
-void
-shiftLeftRR(Context* c, UNUSED unsigned aSize, lir::Register* a,
-            unsigned bSize, lir::Register* b)
-{
-  if (TargetBytesPerWord == 4 and bSize == 8) {
-    lir::Register cx(rcx);
-    if (a->low != rcx) {
-      c->client->save(rcx);
-      ResolvedPromise promise(0x3F);
-      lir::Constant mask(&promise);
-      moveRR(c, 4, a, 4, &cx);
-      andCR(c, 4, &mask, 4, &cx);
-    }
-
-    // shld
-    opcode(c, 0x0f, 0xa5);
-    modrm(c, 0xc0, b->high, b->low);
-
-    // shl
-    opcode(c, 0xd3, 0xe0 + b->low);
-
-    ResolvedPromise promise(32);
-    lir::Constant constant(&promise);
-    compareCR(c, aSize, &constant, aSize, &cx);
-
-    opcode(c, 0x7c); //jl
-    c->code.append(2 + 2);
-
-    lir::Register bh(b->high);
-    moveRR(c, 4, b, 4, &bh); // 2 bytes
-    xorRR(c, 4, b, 4, b); // 2 bytes
-  } else {
-    assert(c, a->low == rcx);  
-
-    maybeRex(c, bSize, a, b);
-    opcode(c, 0xd3, 0xe0 + regCode(b));
-  }
-}
-
-void
-shiftLeftCR(Context* c, unsigned aSize, lir::Constant* a,
-            unsigned bSize, lir::Register* b)
-{
-  doShift(c, shiftLeftRR, 0xe0, aSize, a, bSize, b);
-}
-
-void
-shiftRightRR(Context* c, UNUSED unsigned aSize, lir::Register* a,
-             unsigned bSize, lir::Register* b)
-{
-  if (TargetBytesPerWord == 4 and bSize == 8) {
-    lir::Register cx(rcx);
-    if (a->low != rcx) {
-      c->client->save(rcx);
-      ResolvedPromise promise(0x3F);
-      lir::Constant mask(&promise);
-      moveRR(c, 4, a, 4, &cx);
-      andCR(c, 4, &mask, 4, &cx);
-    }
-
-    // shrd
-    opcode(c, 0x0f, 0xad);
-    modrm(c, 0xc0, b->low, b->high);
-
-    // sar
-    opcode(c, 0xd3, 0xf8 + b->high);
-
-    ResolvedPromise promise(32);
-    lir::Constant constant(&promise);
-    compareCR(c, aSize, &constant, aSize, &cx);
-
-    opcode(c, 0x7c); //jl
-    c->code.append(2 + 3);
-
-    lir::Register bh(b->high);
-    moveRR(c, 4, &bh, 4, b); // 2 bytes
-
-    // sar 31,high
-    opcode(c, 0xc1, 0xf8 + b->high);
-    c->code.append(31);
-  } else {
-    assert(c, a->low == rcx);
-
-    maybeRex(c, bSize, a, b);
-    opcode(c, 0xd3, 0xf8 + regCode(b));
-  }
-}
-
-void
-shiftRightCR(Context* c, unsigned aSize, lir::Constant* a,
-             unsigned bSize, lir::Register* b)
-{
-  doShift(c, shiftRightRR, 0xf8, aSize, a, bSize, b);
-}
-
-void
-unsignedShiftRightRR(Context* c, UNUSED unsigned aSize, lir::Register* a,
-                     unsigned bSize, lir::Register* b)
-{
-  if (TargetBytesPerWord == 4 and bSize == 8) {
-    lir::Register cx(rcx);
-    if (a->low != rcx) {
-      c->client->save(rcx);
-      ResolvedPromise promise(0x3F);
-      lir::Constant mask(&promise);
-      moveRR(c, 4, a, 4, &cx);
-      andCR(c, 4, &mask, 4, &cx);
-    }
-
-    // shrd
-    opcode(c, 0x0f, 0xad);
-    modrm(c, 0xc0, b->low, b->high);
-
-    // shr
-    opcode(c, 0xd3, 0xe8 + b->high);
-
-    ResolvedPromise promise(32);
-    lir::Constant constant(&promise);
-    compareCR(c, aSize, &constant, aSize, &cx);
-
-    opcode(c, 0x7c); //jl
-    c->code.append(2 + 2);
-
-    lir::Register bh(b->high);
-    moveRR(c, 4, &bh, 4, b); // 2 bytes
-    xorRR(c, 4, &bh, 4, &bh); // 2 bytes
-  } else {
-    assert(c, a->low == rcx);
-
-    maybeRex(c, bSize, a, b);
-    opcode(c, 0xd3, 0xe8 + regCode(b));
-  }
-}
-
-void
-unsignedShiftRightCR(Context* c, unsigned aSize UNUSED, lir::Constant* a,
-                     unsigned bSize, lir::Register* b)
-{
-  doShift(c, unsignedShiftRightRR, 0xe8, aSize, a, bSize, b);
-}
-
-void
-floatRegOp(Context* c, unsigned aSize, lir::Register* a, unsigned bSize,
-           lir::Register* b, uint8_t op, uint8_t mod = 0xc0)
-{
-  if (aSize == 4) {
-    opcode(c, 0xf3);
-  } else {
-    opcode(c, 0xf2);
-  }
-  maybeRex(c, bSize, b, a);
-  opcode(c, 0x0f, op);
-  modrm(c, mod, a, b);
-}
-
-void
-floatMemOp(Context* c, unsigned aSize, lir::Memory* a, unsigned bSize,
-           lir::Register* b, uint8_t op)
-{
-  if (aSize == 4) {
-    opcode(c, 0xf3);
-  } else {
-    opcode(c, 0xf2);
-  }
-  maybeRex(c, bSize, b, a);
-  opcode(c, 0x0f, op);
-  modrmSibImm(c, b, a);
-}
-
-void
-floatSqrtRR(Context* c, unsigned aSize, lir::Register* a,
-            unsigned bSize UNUSED, lir::Register* b)
-{
-  floatRegOp(c, aSize, a, 4, b, 0x51);
-}
-
-void
-floatSqrtMR(Context* c, unsigned aSize, lir::Memory* a,
-            unsigned bSize UNUSED, lir::Register* b)
-{
-  floatMemOp(c, aSize, a, 4, b, 0x51);
-}
-
-void
-floatAddRR(Context* c, unsigned aSize, lir::Register* a,
-           unsigned bSize UNUSED, lir::Register* b)
-{
-  floatRegOp(c, aSize, a, 4, b, 0x58);
-}
-
-void
-floatAddMR(Context* c, unsigned aSize, lir::Memory* a,
-           unsigned bSize UNUSED, lir::Register* b)
-{
-  floatMemOp(c, aSize, a, 4, b, 0x58);
-}
-
-void
-floatSubtractRR(Context* c, unsigned aSize, lir::Register* a,
-                unsigned bSize UNUSED, lir::Register* b)
-{
-  floatRegOp(c, aSize, a, 4, b, 0x5c);
-}
-
-void
-floatSubtractMR(Context* c, unsigned aSize, lir::Memory* a,
-                unsigned bSize UNUSED, lir::Register* b)
-{
-  floatMemOp(c, aSize, a, 4, b, 0x5c);
-}
-
-void
-floatMultiplyRR(Context* c, unsigned aSize, lir::Register* a,
-                unsigned bSize UNUSED, lir::Register* b)
-{
-  floatRegOp(c, aSize, a, 4, b, 0x59);
-}
-
-void
-floatMultiplyMR(Context* c, unsigned aSize, lir::Memory* a,
-                unsigned bSize UNUSED, lir::Register* b)
-{
-  floatMemOp(c, aSize, a, 4, b, 0x59);
-}
-
-void
-floatDivideRR(Context* c, unsigned aSize, lir::Register* a,
-              unsigned bSize UNUSED, lir::Register* b)
-{
-  floatRegOp(c, aSize, a, 4, b, 0x5e);
-}
-
-void
-floatDivideMR(Context* c, unsigned aSize, lir::Memory* a,
-              unsigned bSize UNUSED, lir::Register* b)
-{
-  floatMemOp(c, aSize, a, 4, b, 0x5e);
-}
-
-void
-float2FloatRR(Context* c, unsigned aSize, lir::Register* a,
-              unsigned bSize UNUSED, lir::Register* b)
-{
-  floatRegOp(c, aSize, a, 4, b, 0x5a);
-}
-
-void
-float2FloatMR(Context* c, unsigned aSize, lir::Memory* a,
-              unsigned bSize UNUSED, lir::Register* b)
-{
-  floatMemOp(c, aSize, a, 4, b, 0x5a);
-}
-
-void
-float2IntRR(Context* c, unsigned aSize, lir::Register* a,
-            unsigned bSize, lir::Register* b)
-{
-  assert(c, not floatReg(b));
-  floatRegOp(c, aSize, a, bSize, b, 0x2c);
-}
-
-void
-float2IntMR(Context* c, unsigned aSize, lir::Memory* a,
-            unsigned bSize, lir::Register* b)
-{
-  floatMemOp(c, aSize, a, bSize, b, 0x2c);
-}
-
-void
-int2FloatRR(Context* c, unsigned aSize, lir::Register* a,
-            unsigned bSize, lir::Register* b)
-{
-  floatRegOp(c, bSize, a, aSize, b, 0x2a);
-}
-
-void
-int2FloatMR(Context* c, unsigned aSize, lir::Memory* a,
-            unsigned bSize, lir::Register* b)
-{
-  floatMemOp(c, bSize, a, aSize, b, 0x2a);
-}
-
-void
-floatNegateRR(Context* c, unsigned aSize, lir::Register* a,
-              unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, floatReg(a) and floatReg(b));
-  // unlike most of the other floating point code, this does NOT
-  // support doubles:
-  assert(c, aSize == 4);
-  ResolvedPromise pcon(0x80000000);
-  lir::Constant con(&pcon);
-  if (a->low == b->low) {
-    lir::Register tmp(c->client->acquireTemporary(FloatRegisterMask));
-    moveCR(c, 4, &con, 4, &tmp);
-    maybeRex(c, 4, a, &tmp);
-    opcode(c, 0x0f, 0x57);
-    modrm(c, 0xc0, &tmp, a);
-    c->client->releaseTemporary(tmp.low);
-  } else {
-    moveCR(c, 4, &con, 4, b);
-    if (aSize == 8) opcode(c, 0x66);
-    maybeRex(c, 4, a, b);
-    opcode(c, 0x0f, 0x57);
-    modrm(c, 0xc0, a, b);
-  }
-}
-
-void
-floatAbsoluteRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
-           unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, floatReg(a) and floatReg(b));
-  // unlike most of the other floating point code, this does NOT
-  // support doubles:
-  assert(c, aSize == 4);
-  ResolvedPromise pcon(0x7fffffff);
-  lir::Constant con(&pcon);
-  if (a->low == b->low) {
-    lir::Register tmp(c->client->acquireTemporary(FloatRegisterMask));
-    moveCR(c, 4, &con, 4, &tmp);
-    maybeRex(c, 4, a, &tmp);
-    opcode(c, 0x0f, 0x54);
-    modrm(c, 0xc0, &tmp, a);
-    c->client->releaseTemporary(tmp.low);
-  } else {
-    moveCR(c, 4, &con, 4, b);
-    maybeRex(c, 4, a, b);
-    opcode(c, 0x0f, 0x54);
-    modrm(c, 0xc0, a, b);
-  }
-}
-
-void
-absoluteRR(Context* c, unsigned aSize, lir::Register* a,
-      unsigned bSize UNUSED, lir::Register* b UNUSED)
-{
-  assert(c, aSize == bSize and a->low == rax and b->low == rax);
-  lir::Register d
-    (c->client->acquireTemporary(static_cast<uint64_t>(1) << rdx));
-  maybeRex(c, aSize, a, b);
-  opcode(c, 0x99);
-  xorRR(c, aSize, &d, aSize, a);
-  subtractRR(c, aSize, &d, aSize, a);
-  c->client->releaseTemporary(rdx);
-}
-
 unsigned
 argumentFootprint(unsigned footprint)
 {
@@ -2327,124 +135,6 @@ nextFrame(ArchitectureContext* c UNUSED, uint8_t* start, unsigned size UNUSED,
   *stack = static_cast<void**>(*stack) + offset;
 }
 
-void
-populateTables(ArchitectureContext* c)
-{
-  const lir::OperandType C = lir::ConstantOperand;
-  const lir::OperandType A = lir::AddressOperand;
-  const lir::OperandType R = lir::RegisterOperand;
-  const lir::OperandType M = lir::MemoryOperand;
-
-  OperationType* zo = c->operations;
-  UnaryOperationType* uo = c->unaryOperations;
-  BinaryOperationType* bo = c->binaryOperations;
-  BranchOperationType* bro = c->branchOperations;
-
-  zo[lir::Return] = return_;
-  zo[lir::LoadBarrier] = ignore;
-  zo[lir::StoreStoreBarrier] = ignore;
-  zo[lir::StoreLoadBarrier] = storeLoadBarrier;
-  zo[lir::Trap] = trap;
-
-  uo[index(c, lir::Call, C)] = CAST1(callC);
-  uo[index(c, lir::Call, R)] = CAST1(callR);
-  uo[index(c, lir::Call, M)] = CAST1(callM);
-
-  uo[index(c, lir::AlignedCall, C)] = CAST1(alignedCallC);
-
-  uo[index(c, lir::LongCall, C)] = CAST1(longCallC);
-
-  uo[index(c, lir::AlignedLongCall, C)] = CAST1(alignedLongCallC);
-
-  uo[index(c, lir::Jump, R)] = CAST1(jumpR);
-  uo[index(c, lir::Jump, C)] = CAST1(jumpC);
-  uo[index(c, lir::Jump, M)] = CAST1(jumpM);
-
-  uo[index(c, lir::AlignedJump, C)] = CAST1(alignedJumpC);
-
-  uo[index(c, lir::LongJump, C)] = CAST1(longJumpC);
-
-  uo[index(c, lir::AlignedLongJump, C)] = CAST1(alignedLongJumpC);
-
-  bo[index(c, lir::Negate, R, R)] = CAST2(negateRR);
-
-  bo[index(c, lir::FloatNegate, R, R)] = CAST2(floatNegateRR);
-
-  bo[index(c, lir::Move, R, R)] = CAST2(moveRR);
-  bo[index(c, lir::Move, C, R)] = CAST2(moveCR);
-  bo[index(c, lir::Move, M, R)] = CAST2(moveMR);
-  bo[index(c, lir::Move, R, M)] = CAST2(moveRM);
-  bo[index(c, lir::Move, C, M)] = CAST2(moveCM);
-  bo[index(c, lir::Move, A, R)] = CAST2(moveAR);
-
-  bo[index(c, lir::FloatSquareRoot, R, R)] = CAST2(floatSqrtRR);
-  bo[index(c, lir::FloatSquareRoot, M, R)] = CAST2(floatSqrtMR);
-
-  bo[index(c, lir::MoveZ, R, R)] = CAST2(moveZRR);
-  bo[index(c, lir::MoveZ, M, R)] = CAST2(moveZMR);
-  bo[index(c, lir::MoveZ, C, R)] = CAST2(moveCR);
-
-  bo[index(c, lir::Add, R, R)] = CAST2(addRR);
-  bo[index(c, lir::Add, C, R)] = CAST2(addCR);
-
-  bo[index(c, lir::Subtract, C, R)] = CAST2(subtractCR);
-  bo[index(c, lir::Subtract, R, R)] = CAST2(subtractRR);
-
-  bo[index(c, lir::FloatAdd, R, R)] = CAST2(floatAddRR);
-  bo[index(c, lir::FloatAdd, M, R)] = CAST2(floatAddMR);
-
-  bo[index(c, lir::FloatSubtract, R, R)] = CAST2(floatSubtractRR);
-  bo[index(c, lir::FloatSubtract, M, R)] = CAST2(floatSubtractMR);
-
-  bo[index(c, lir::And, R, R)] = CAST2(andRR);
-  bo[index(c, lir::And, C, R)] = CAST2(andCR);
-
-  bo[index(c, lir::Or, R, R)] = CAST2(orRR);
-  bo[index(c, lir::Or, C, R)] = CAST2(orCR);
-
-  bo[index(c, lir::Xor, R, R)] = CAST2(xorRR);
-  bo[index(c, lir::Xor, C, R)] = CAST2(xorCR);
-
-  bo[index(c, lir::Multiply, R, R)] = CAST2(multiplyRR);
-  bo[index(c, lir::Multiply, C, R)] = CAST2(multiplyCR);
-
-  bo[index(c, lir::Divide, R, R)] = CAST2(divideRR);
-
-  bo[index(c, lir::FloatMultiply, R, R)] = CAST2(floatMultiplyRR);
-  bo[index(c, lir::FloatMultiply, M, R)] = CAST2(floatMultiplyMR);
-
-  bo[index(c, lir::FloatDivide, R, R)] = CAST2(floatDivideRR);
-  bo[index(c, lir::FloatDivide, M, R)] = CAST2(floatDivideMR);
-
-  bo[index(c, lir::Remainder, R, R)] = CAST2(remainderRR);
-
-  bo[index(c, lir::ShiftLeft, R, R)] = CAST2(shiftLeftRR);
-  bo[index(c, lir::ShiftLeft, C, R)] = CAST2(shiftLeftCR);
-
-  bo[index(c, lir::ShiftRight, R, R)] = CAST2(shiftRightRR);
-  bo[index(c, lir::ShiftRight, C, R)] = CAST2(shiftRightCR);
-
-  bo[index(c, lir::UnsignedShiftRight, R, R)] = CAST2(unsignedShiftRightRR);
-  bo[index(c, lir::UnsignedShiftRight, C, R)] = CAST2(unsignedShiftRightCR);
-
-  bo[index(c, lir::Float2Float, R, R)] = CAST2(float2FloatRR);
-  bo[index(c, lir::Float2Float, M, R)] = CAST2(float2FloatMR);
-
-  bo[index(c, lir::Float2Int, R, R)] = CAST2(float2IntRR);
-  bo[index(c, lir::Float2Int, M, R)] = CAST2(float2IntMR);
-
-  bo[index(c, lir::Int2Float, R, R)] = CAST2(int2FloatRR);
-  bo[index(c, lir::Int2Float, M, R)] = CAST2(int2FloatMR);
-
-  bo[index(c, lir::Absolute, R, R)] = CAST2(absoluteRR);
-  bo[index(c, lir::FloatAbsolute, R, R)] = CAST2(floatAbsoluteRR);
-
-  bro[branchIndex(c, R, R)] = CAST_BRANCH(branchRR);
-  bro[branchIndex(c, C, R)] = CAST_BRANCH(branchCR);
-  bro[branchIndex(c, C, M)] = CAST_BRANCH(branchCM);
-  bro[branchIndex(c, R, M)] = CAST_BRANCH(branchRM);
-}
-
 class MyArchitecture: public Assembler::Architecture {
  public:
   MyArchitecture(System* system, bool useNativeFeatures):
diff --git a/src/codegen/x86/block.cpp b/src/codegen/x86/block.cpp
index 152fd99a8d..97bac8dbf1 100644
--- a/src/codegen/x86/block.cpp
+++ b/src/codegen/x86/block.cpp
@@ -11,10 +11,6 @@
 #include "codegen/x86/block.h"
 #include "common.h"
 
-#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
-#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
-#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
-
 namespace avian {
 namespace codegen {
 namespace x86 {
diff --git a/src/codegen/x86/context.cpp b/src/codegen/x86/context.cpp
index da73e97606..4c996f2b75 100644
--- a/src/codegen/x86/context.cpp
+++ b/src/codegen/x86/context.cpp
@@ -11,10 +11,6 @@
 #include "codegen/x86/context.h"
 #include "codegen/x86/block.h"
 
-#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
-#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
-#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
-
 namespace avian {
 namespace codegen {
 namespace x86 {
diff --git a/src/codegen/x86/context.h b/src/codegen/x86/context.h
index 5b3a915b9c..70233dd399 100644
--- a/src/codegen/x86/context.h
+++ b/src/codegen/x86/context.h
@@ -11,6 +11,10 @@
 #ifndef AVIAN_CODEGEN_ASSEMBLER_X86_CONTEXT_H
 #define AVIAN_CODEGEN_ASSEMBLER_X86_CONTEXT_H
 
+#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
+#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
+#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
+
 #include "codegen/lir.h"
 #include "codegen/assembler.h"
 #include "alloc-vector.h"
diff --git a/src/codegen/x86/detect.cpp b/src/codegen/x86/detect.cpp
new file mode 100644
index 0000000000..eaf6af54ac
--- /dev/null
+++ b/src/codegen/x86/detect.cpp
@@ -0,0 +1,40 @@
+
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "codegen/x86/context.h"
+#include "codegen/x86/block.h"
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+extern "C" bool
+detectFeature(unsigned ecx, unsigned edx);
+
+bool useSSE(ArchitectureContext* c) {
+  if (vm::TargetBytesPerWord == 8) {
+    // amd64 implies SSE2 support
+    return true;
+  } else if (c->useNativeFeatures) {
+    static int supported = -1;
+    if (supported == -1) {
+      supported = detectFeature(0, 0x2000000) // SSE 1
+        and detectFeature(0, 0x4000000); // SSE 2
+    }
+    return supported;
+  } else {
+    return false;
+  }
+}
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/x86/detect.h b/src/codegen/x86/detect.h
new file mode 100644
index 0000000000..13c7e58be1
--- /dev/null
+++ b/src/codegen/x86/detect.h
@@ -0,0 +1,28 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_X86_DETECT_H
+#define AVIAN_CODEGEN_ASSEMBLER_X86_DETECT_H
+
+#include "codegen/assembler.h"
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+class ArchitectureContext;
+
+bool useSSE(ArchitectureContext* c);
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_X86_DETECT_H
diff --git a/src/codegen/x86/encode.cpp b/src/codegen/x86/encode.cpp
new file mode 100644
index 0000000000..8c34c426f9
--- /dev/null
+++ b/src/codegen/x86/encode.cpp
@@ -0,0 +1,345 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "target.h"
+
+#include "codegen/x86/context.h"
+#include "codegen/x86/encode.h"
+#include "codegen/x86/registers.h"
+#include "codegen/x86/fixup.h"
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+
+#define REX_W 0x48
+#define REX_R 0x44
+#define REX_X 0x42
+#define REX_B 0x41
+#define REX_NONE 0x40
+
+void maybeRex(Context* c, unsigned size, int a, int index, int base, bool always) {
+  if (vm::TargetBytesPerWord == 8) {
+    uint8_t byte;
+    if (size == 8) {
+      byte = REX_W;
+    } else {
+      byte = REX_NONE;
+    }
+    if (a != lir::NoRegister and (a & 8)) byte |= REX_R;
+    if (index != lir::NoRegister and (index & 8)) byte |= REX_X;
+    if (base != lir::NoRegister and (base & 8)) byte |= REX_B;
+    if (always or byte != REX_NONE) c->code.append(byte);
+  }
+}
+
+void maybeRex(Context* c, unsigned size, lir::Register* a, lir::Register* b) {
+  maybeRex(c, size, a->low, lir::NoRegister, b->low, false);
+}
+
+void alwaysRex(Context* c, unsigned size, lir::Register* a, lir::Register* b) {
+  maybeRex(c, size, a->low, lir::NoRegister, b->low, true);
+}
+
+void maybeRex(Context* c, unsigned size, lir::Register* a) {
+  maybeRex(c, size, lir::NoRegister, lir::NoRegister, a->low, false);
+}
+
+void maybeRex(Context* c, unsigned size, lir::Register* a, lir::Memory* b) {
+  maybeRex(c, size, a->low, b->index, b->base, size == 1 and (a->low & 4));
+}
+
+void maybeRex(Context* c, unsigned size, lir::Memory* a) {
+  maybeRex(c, size, lir::NoRegister, a->index, a->base, false);
+}
+
+void modrm(Context* c, uint8_t mod, int a, int b) {
+  c->code.append(mod | (regCode(b) << 3) | regCode(a));
+}
+
+void modrm(Context* c, uint8_t mod, lir::Register* a, lir::Register* b) {
+  modrm(c, mod, a->low, b->low);
+}
+
+void sib(Context* c, unsigned scale, int index, int base) {
+  c->code.append((vm::log(scale) << 6) | (regCode(index) << 3) | regCode(base));
+}
+
+void modrmSib(Context* c, int width, int a, int scale, int index, int base) {
+  if (index == lir::NoRegister) {
+    modrm(c, width, base, a);
+    if (regCode(base) == rsp) {
+      sib(c, 0x00, rsp, rsp);
+    }
+  } else {
+    modrm(c, width, rsp, a);
+    sib(c, scale, index, base);
+  }
+}
+
+void modrmSibImm(Context* c, int a, int scale, int index, int base, int offset) {
+  if (offset == 0 and regCode(base) != rbp) {
+    modrmSib(c, 0x00, a, scale, index, base);
+  } else if (vm::fitsInInt8(offset)) {
+    modrmSib(c, 0x40, a, scale, index, base);
+    c->code.append(offset);
+  } else {
+    modrmSib(c, 0x80, a, scale, index, base);
+    c->code.append4(offset);
+  }
+}
+
+void modrmSibImm(Context* c, lir::Register* a, lir::Memory* b) {
+  modrmSibImm(c, a->low, b->scale, b->index, b->base, b->offset);
+}
+
+void opcode(Context* c, uint8_t op) {
+  c->code.append(op);
+}
+
+void opcode(Context* c, uint8_t op1, uint8_t op2) {
+  c->code.append(op1);
+  c->code.append(op2);
+}
+
+void unconditional(Context* c, unsigned jump, lir::Constant* a) {
+  appendOffsetTask(c, a->value, offsetPromise(c), 5);
+
+  opcode(c, jump);
+  c->code.append4(0);
+}
+
+void conditional(Context* c, unsigned condition, lir::Constant* a) {
+  appendOffsetTask(c, a->value, offsetPromise(c), 6);
+  
+  opcode(c, 0x0f, condition);
+  c->code.append4(0);
+}
+
+void sseMoveRR(Context* c, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize >= 4);
+  assert(c, aSize == bSize);
+
+  if (isFloatReg(a) and isFloatReg(b)) {
+    if (aSize == 4) {
+      opcode(c, 0xf3);
+      maybeRex(c, 4, a, b);
+      opcode(c, 0x0f, 0x10);
+      modrm(c, 0xc0, a, b);
+    } else {
+      opcode(c, 0xf2);
+      maybeRex(c, 4, b, a);
+      opcode(c, 0x0f, 0x10);
+      modrm(c, 0xc0, a, b);
+    } 
+  } else if (isFloatReg(a)) {
+    opcode(c, 0x66);
+    maybeRex(c, aSize, a, b);
+    opcode(c, 0x0f, 0x7e);
+    modrm(c, 0xc0, b, a);   
+  } else {
+    opcode(c, 0x66);
+    maybeRex(c, aSize, b, a);
+    opcode(c, 0x0f, 0x6e);
+    modrm(c, 0xc0, a, b);   
+  }
+}
+
+void sseMoveCR(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b)
+{
+  assert(c, aSize <= vm::TargetBytesPerWord);
+  lir::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
+  moveCR2(c, aSize, a, aSize, &tmp, 0);
+  sseMoveRR(c, aSize, &tmp, bSize, b);
+  c->client->releaseTemporary(tmp.low);
+}
+
+void sseMoveMR(Context* c, unsigned aSize, lir::Memory* a,
+          unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize >= 4);
+
+  if (vm::TargetBytesPerWord == 4 and aSize == 8) {
+    opcode(c, 0xf3);
+    opcode(c, 0x0f, 0x7e);
+    modrmSibImm(c, b, a);
+  } else {
+    opcode(c, 0x66);
+    maybeRex(c, aSize, b, a);
+    opcode(c, 0x0f, 0x6e);
+    modrmSibImm(c, b, a);
+  }
+}
+
+void sseMoveRM(Context* c, unsigned aSize, lir::Register* a,
+       UNUSED unsigned bSize, lir::Memory* b)
+{
+  assert(c, aSize >= 4);
+  assert(c, aSize == bSize);
+
+  if (vm::TargetBytesPerWord == 4 and aSize == 8) {
+    opcode(c, 0x66);
+    opcode(c, 0x0f, 0xd6);
+    modrmSibImm(c, a, b);
+  } else {
+    opcode(c, 0x66);
+    maybeRex(c, aSize, a, b);
+    opcode(c, 0x0f, 0x7e);
+    modrmSibImm(c, a, b);
+  }
+}
+
+void branch(Context* c, lir::TernaryOperation op, lir::Constant* target) {
+  switch (op) {
+  case lir::JumpIfEqual:
+    conditional(c, 0x84, target);
+    break;
+
+  case lir::JumpIfNotEqual:
+    conditional(c, 0x85, target);
+    break;
+
+  case lir::JumpIfLess:
+    conditional(c, 0x8c, target);
+    break;
+
+  case lir::JumpIfGreater:
+    conditional(c, 0x8f, target);
+    break;
+
+  case lir::JumpIfLessOrEqual:
+    conditional(c, 0x8e, target);
+    break;
+
+  case lir::JumpIfGreaterOrEqual:
+    conditional(c, 0x8d, target);
+    break;
+
+  default:
+    abort(c);
+  }
+}
+
+void branchFloat(Context* c, lir::TernaryOperation op, lir::Constant* target) {
+  switch (op) {
+  case lir::JumpIfFloatEqual:
+    conditional(c, 0x84, target);
+    break;
+
+  case lir::JumpIfFloatNotEqual:
+    conditional(c, 0x85, target);
+    break;
+
+  case lir::JumpIfFloatLess:
+    conditional(c, 0x82, target);
+    break;
+
+  case lir::JumpIfFloatGreater:
+    conditional(c, 0x87, target);
+    break;
+
+  case lir::JumpIfFloatLessOrEqual:
+    conditional(c, 0x86, target);
+    break;
+
+  case lir::JumpIfFloatGreaterOrEqual:
+    conditional(c, 0x83, target);
+    break;
+
+  case lir::JumpIfFloatLessOrUnordered:
+    conditional(c, 0x82, target);
+    conditional(c, 0x8a, target);
+    break;
+
+  case lir::JumpIfFloatGreaterOrUnordered:
+    conditional(c, 0x87, target);
+    conditional(c, 0x8a, target);
+    break;
+
+  case lir::JumpIfFloatLessOrEqualOrUnordered:
+    conditional(c, 0x86, target);
+    conditional(c, 0x8a, target);
+    break;
+
+  case lir::JumpIfFloatGreaterOrEqualOrUnordered:
+    conditional(c, 0x83, target);
+    conditional(c, 0x8a, target);
+    break;
+
+  default:
+    abort(c);
+  }
+}
+
+void floatRegOp(Context* c, unsigned aSize, lir::Register* a, unsigned bSize,
+           lir::Register* b, uint8_t op, uint8_t mod)
+{
+  if (aSize == 4) {
+    opcode(c, 0xf3);
+  } else {
+    opcode(c, 0xf2);
+  }
+  maybeRex(c, bSize, b, a);
+  opcode(c, 0x0f, op);
+  modrm(c, mod, a, b);
+}
+
+void floatMemOp(Context* c, unsigned aSize, lir::Memory* a, unsigned bSize,
+           lir::Register* b, uint8_t op)
+{
+  if (aSize == 4) {
+    opcode(c, 0xf3);
+  } else {
+    opcode(c, 0xf2);
+  }
+  maybeRex(c, bSize, b, a);
+  opcode(c, 0x0f, op);
+  modrmSibImm(c, b, a);
+}
+
+void moveCR(Context* c, unsigned aSize, lir::Constant* a,
+       unsigned bSize, lir::Register* b);
+
+void moveCR2(Context* c, UNUSED unsigned aSize, lir::Constant* a,
+        UNUSED unsigned bSize, lir::Register* b, unsigned promiseOffset)
+{
+  if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+    int64_t v = a->value->value();
+
+    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+    lir::Constant ah(&high);
+
+    ResolvedPromise low(v & 0xFFFFFFFF);
+    lir::Constant al(&low);
+
+    lir::Register bh(b->high);
+
+    moveCR(c, 4, &al, 4, b);
+    moveCR(c, 4, &ah, 4, &bh);
+  } else {
+    maybeRex(c, vm::TargetBytesPerWord, b);
+    opcode(c, 0xb8 + regCode(b));
+    if (a->value->resolved()) {
+      c->code.appendTargetAddress(a->value->value());
+    } else {
+      appendImmediateTask
+        (c, a->value, offsetPromise(c), vm::TargetBytesPerWord, promiseOffset);
+      c->code.appendTargetAddress(static_cast<vm::target_uintptr_t>(0));
+    }
+  }
+}
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/x86/encode.h b/src/codegen/x86/encode.h
new file mode 100644
index 0000000000..293eeab2a9
--- /dev/null
+++ b/src/codegen/x86/encode.h
@@ -0,0 +1,93 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_X86_ENCODE_H
+#define AVIAN_CODEGEN_ASSEMBLER_X86_ENCODE_H
+
+#include "codegen/assembler.h"
+#include "codegen/x86/registers.h"
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+void maybeRex(Context* c, unsigned size, int a, int index, int base, bool always);
+
+void maybeRex(Context* c, unsigned size, lir::Register* a, lir::Register* b);
+
+void alwaysRex(Context* c, unsigned size, lir::Register* a, lir::Register* b);
+
+void maybeRex(Context* c, unsigned size, lir::Register* a);
+
+void maybeRex(Context* c, unsigned size, lir::Register* a, lir::Memory* b);
+
+void maybeRex(Context* c, unsigned size, lir::Memory* a);
+
+inline int regCode(int a) {
+  return a & 7;
+}
+
+inline int regCode(lir::Register* a) {
+  return regCode(a->low);
+}
+
+inline bool isFloatReg(lir::Register* a) {
+  return a->low >= xmm0;
+}
+
+void modrm(Context* c, uint8_t mod, int a, int b);
+
+void modrm(Context* c, uint8_t mod, lir::Register* a, lir::Register* b);
+
+void sib(Context* c, unsigned scale, int index, int base);
+
+void modrmSib(Context* c, int width, int a, int scale, int index, int base);
+
+void modrmSibImm(Context* c, int a, int scale, int index, int base, int offset);
+  
+void modrmSibImm(Context* c, lir::Register* a, lir::Memory* b);
+
+void opcode(Context* c, uint8_t op);
+
+void opcode(Context* c, uint8_t op1, uint8_t op2);
+
+void unconditional(Context* c, unsigned jump, lir::Constant* a);
+
+void conditional(Context* c, unsigned condition, lir::Constant* a);
+
+void sseMoveRR(Context* c, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b);
+
+void sseMoveCR(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b);
+
+void sseMoveMR(Context* c, unsigned aSize, lir::Memory* a,
+          unsigned bSize UNUSED, lir::Register* b);
+
+void sseMoveRM(Context* c, unsigned aSize, lir::Register* a,
+       UNUSED unsigned bSize, lir::Memory* b);
+
+void branch(Context* c, lir::TernaryOperation op, lir::Constant* target);
+
+void branchFloat(Context* c, lir::TernaryOperation op, lir::Constant* target);
+
+void floatRegOp(Context* c, unsigned aSize, lir::Register* a, unsigned bSize,
+           lir::Register* b, uint8_t op, uint8_t mod = 0xc0);
+
+void floatMemOp(Context* c, unsigned aSize, lir::Memory* a, unsigned bSize,
+           lir::Register* b, uint8_t op);
+
+void moveCR2(Context* c, UNUSED unsigned aSize, lir::Constant* a,
+        UNUSED unsigned bSize, lir::Register* b, unsigned promiseOffset);
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_X86_ENCODE_H
diff --git a/src/codegen/x86/fixup.cpp b/src/codegen/x86/fixup.cpp
index f739e1b3e4..5892fbd9e2 100644
--- a/src/codegen/x86/fixup.cpp
+++ b/src/codegen/x86/fixup.cpp
@@ -101,6 +101,62 @@ appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset,
   c->tasks = task;
 }
 
+ImmediateListener::ImmediateListener(vm::System* s, void* dst, unsigned size, unsigned offset):
+  s(s), dst(dst), size(size), offset(offset)
+{ }
+
+void copy(vm::System* s, void* dst, int64_t src, unsigned size) {
+  switch (size) {
+  case 4: {
+    int32_t v = src;
+    memcpy(dst, &v, 4);
+  } break;
+
+  case 8: {
+    int64_t v = src;
+    memcpy(dst, &v, 8);
+  } break;
+
+  default: abort(s);
+  }
+}
+
+bool ImmediateListener::resolve(int64_t value, void** location) {
+  copy(s, dst, value, size);
+  if (location) *location = static_cast<uint8_t*>(dst) + offset;
+  return offset == 0;
+}
+
+ImmediateTask::ImmediateTask(Task* next, Promise* promise, Promise* offset, unsigned size,
+              unsigned promiseOffset):
+  Task(next),
+  promise(promise),
+  offset(offset),
+  size(size),
+  promiseOffset(promiseOffset)
+{ }
+
+void ImmediateTask::run(Context* c) {
+  if (promise->resolved()) {
+    copy(c->s, c->result + offset->value(), promise->value(), size);
+  } else {
+    new (promise->listen(sizeof(ImmediateListener))) ImmediateListener
+      (c->s, c->result + offset->value(), size, promiseOffset);
+  }
+}
+
+void
+appendImmediateTask(Context* c, Promise* promise, Promise* offset,
+                    unsigned size, unsigned promiseOffset)
+{
+  c->tasks = new(c->zone) ImmediateTask
+    (c->tasks, promise, offset, size, promiseOffset);
+}
+
+ShiftMaskPromise* shiftMaskPromise(Context* c, Promise* base, unsigned shift, int64_t mask) {
+  return new(c->zone) ShiftMaskPromise(base, shift, mask);
+}
+
 } // namespace x86
 } // namespace codegen
 } // namespace avian
diff --git a/src/codegen/x86/fixup.h b/src/codegen/x86/fixup.h
index 2b97af9cfb..2bf8ff3481 100644
--- a/src/codegen/x86/fixup.h
+++ b/src/codegen/x86/fixup.h
@@ -80,6 +80,37 @@ class OffsetTask: public Task {
 
 void appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset, unsigned instructionSize);
 
+class ImmediateListener: public Promise::Listener {
+ public:
+  ImmediateListener(vm::System* s, void* dst, unsigned size, unsigned offset);
+
+  virtual bool resolve(int64_t value, void** location);
+
+  vm::System* s;
+  void* dst;
+  unsigned size;
+  unsigned offset;
+};
+
+class ImmediateTask: public Task {
+ public:
+  ImmediateTask(Task* next, Promise* promise, Promise* offset, unsigned size,
+                unsigned promiseOffset);
+
+  virtual void run(Context* c);
+
+  Promise* promise;
+  Promise* offset;
+  unsigned size;
+  unsigned promiseOffset;
+};
+
+void
+appendImmediateTask(Context* c, Promise* promise, Promise* offset,
+                    unsigned size, unsigned promiseOffset = 0);
+
+ShiftMaskPromise* shiftMaskPromise(Context* c, Promise* base, unsigned shift, int64_t mask);
+
 } // namespace x86
 } // namespace codegen
 } // namespace avian
diff --git a/src/codegen/x86/multimethod.cpp b/src/codegen/x86/multimethod.cpp
new file mode 100644
index 0000000000..d665fef895
--- /dev/null
+++ b/src/codegen/x86/multimethod.cpp
@@ -0,0 +1,174 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "common.h"
+
+#include "codegen/lir.h"
+
+#include "codegen/x86/context.h"
+#include "codegen/x86/multimethod.h"
+#include "codegen/x86/operations.h"
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+
+unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand) {
+  return operation + (lir::UnaryOperationCount * operand);
+}
+
+unsigned index(ArchitectureContext*, lir::BinaryOperation operation,
+      lir::OperandType operand1,
+      lir::OperandType operand2)
+{
+  return operation
+    + ((lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount) * operand1)
+    + ((lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount)
+       * lir::OperandTypeCount * operand2);
+}
+
+unsigned index(ArchitectureContext* c UNUSED, lir::TernaryOperation operation,
+      lir::OperandType operand1, lir::OperandType operand2)
+{
+  assert(c, not isBranch(operation));
+
+  return lir::BinaryOperationCount + operation
+    + ((lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount) * operand1)
+    + ((lir::BinaryOperationCount + lir::NonBranchTernaryOperationCount)
+       * lir::OperandTypeCount * operand2);
+}
+
+unsigned branchIndex(ArchitectureContext* c UNUSED, lir::OperandType operand1,
+            lir::OperandType operand2)
+{
+  return operand1 + (lir::OperandTypeCount * operand2);
+}
+
+
+void populateTables(ArchitectureContext* c) {
+  const lir::OperandType C = lir::ConstantOperand;
+  const lir::OperandType A = lir::AddressOperand;
+  const lir::OperandType R = lir::RegisterOperand;
+  const lir::OperandType M = lir::MemoryOperand;
+
+  OperationType* zo = c->operations;
+  UnaryOperationType* uo = c->unaryOperations;
+  BinaryOperationType* bo = c->binaryOperations;
+  BranchOperationType* bro = c->branchOperations;
+
+  zo[lir::Return] = return_;
+  zo[lir::LoadBarrier] = ignore;
+  zo[lir::StoreStoreBarrier] = ignore;
+  zo[lir::StoreLoadBarrier] = storeLoadBarrier;
+  zo[lir::Trap] = trap;
+
+  uo[index(c, lir::Call, C)] = CAST1(callC);
+  uo[index(c, lir::Call, R)] = CAST1(callR);
+  uo[index(c, lir::Call, M)] = CAST1(callM);
+
+  uo[index(c, lir::AlignedCall, C)] = CAST1(alignedCallC);
+
+  uo[index(c, lir::LongCall, C)] = CAST1(longCallC);
+
+  uo[index(c, lir::AlignedLongCall, C)] = CAST1(alignedLongCallC);
+
+  uo[index(c, lir::Jump, R)] = CAST1(jumpR);
+  uo[index(c, lir::Jump, C)] = CAST1(jumpC);
+  uo[index(c, lir::Jump, M)] = CAST1(jumpM);
+
+  uo[index(c, lir::AlignedJump, C)] = CAST1(alignedJumpC);
+
+  uo[index(c, lir::LongJump, C)] = CAST1(longJumpC);
+
+  uo[index(c, lir::AlignedLongJump, C)] = CAST1(alignedLongJumpC);
+
+  bo[index(c, lir::Negate, R, R)] = CAST2(negateRR);
+
+  bo[index(c, lir::FloatNegate, R, R)] = CAST2(floatNegateRR);
+
+  bo[index(c, lir::Move, R, R)] = CAST2(moveRR);
+  bo[index(c, lir::Move, C, R)] = CAST2(moveCR);
+  bo[index(c, lir::Move, M, R)] = CAST2(moveMR);
+  bo[index(c, lir::Move, R, M)] = CAST2(moveRM);
+  bo[index(c, lir::Move, C, M)] = CAST2(moveCM);
+  bo[index(c, lir::Move, A, R)] = CAST2(moveAR);
+
+  bo[index(c, lir::FloatSquareRoot, R, R)] = CAST2(floatSqrtRR);
+  bo[index(c, lir::FloatSquareRoot, M, R)] = CAST2(floatSqrtMR);
+
+  bo[index(c, lir::MoveZ, R, R)] = CAST2(moveZRR);
+  bo[index(c, lir::MoveZ, M, R)] = CAST2(moveZMR);
+  bo[index(c, lir::MoveZ, C, R)] = CAST2(moveCR);
+
+  bo[index(c, lir::Add, R, R)] = CAST2(addRR);
+  bo[index(c, lir::Add, C, R)] = CAST2(addCR);
+
+  bo[index(c, lir::Subtract, C, R)] = CAST2(subtractCR);
+  bo[index(c, lir::Subtract, R, R)] = CAST2(subtractRR);
+
+  bo[index(c, lir::FloatAdd, R, R)] = CAST2(floatAddRR);
+  bo[index(c, lir::FloatAdd, M, R)] = CAST2(floatAddMR);
+
+  bo[index(c, lir::FloatSubtract, R, R)] = CAST2(floatSubtractRR);
+  bo[index(c, lir::FloatSubtract, M, R)] = CAST2(floatSubtractMR);
+
+  bo[index(c, lir::And, R, R)] = CAST2(andRR);
+  bo[index(c, lir::And, C, R)] = CAST2(andCR);
+
+  bo[index(c, lir::Or, R, R)] = CAST2(orRR);
+  bo[index(c, lir::Or, C, R)] = CAST2(orCR);
+
+  bo[index(c, lir::Xor, R, R)] = CAST2(xorRR);
+  bo[index(c, lir::Xor, C, R)] = CAST2(xorCR);
+
+  bo[index(c, lir::Multiply, R, R)] = CAST2(multiplyRR);
+  bo[index(c, lir::Multiply, C, R)] = CAST2(multiplyCR);
+
+  bo[index(c, lir::Divide, R, R)] = CAST2(divideRR);
+
+  bo[index(c, lir::FloatMultiply, R, R)] = CAST2(floatMultiplyRR);
+  bo[index(c, lir::FloatMultiply, M, R)] = CAST2(floatMultiplyMR);
+
+  bo[index(c, lir::FloatDivide, R, R)] = CAST2(floatDivideRR);
+  bo[index(c, lir::FloatDivide, M, R)] = CAST2(floatDivideMR);
+
+  bo[index(c, lir::Remainder, R, R)] = CAST2(remainderRR);
+
+  bo[index(c, lir::ShiftLeft, R, R)] = CAST2(shiftLeftRR);
+  bo[index(c, lir::ShiftLeft, C, R)] = CAST2(shiftLeftCR);
+
+  bo[index(c, lir::ShiftRight, R, R)] = CAST2(shiftRightRR);
+  bo[index(c, lir::ShiftRight, C, R)] = CAST2(shiftRightCR);
+
+  bo[index(c, lir::UnsignedShiftRight, R, R)] = CAST2(unsignedShiftRightRR);
+  bo[index(c, lir::UnsignedShiftRight, C, R)] = CAST2(unsignedShiftRightCR);
+
+  bo[index(c, lir::Float2Float, R, R)] = CAST2(float2FloatRR);
+  bo[index(c, lir::Float2Float, M, R)] = CAST2(float2FloatMR);
+
+  bo[index(c, lir::Float2Int, R, R)] = CAST2(float2IntRR);
+  bo[index(c, lir::Float2Int, M, R)] = CAST2(float2IntMR);
+
+  bo[index(c, lir::Int2Float, R, R)] = CAST2(int2FloatRR);
+  bo[index(c, lir::Int2Float, M, R)] = CAST2(int2FloatMR);
+
+  bo[index(c, lir::Absolute, R, R)] = CAST2(absoluteRR);
+  bo[index(c, lir::FloatAbsolute, R, R)] = CAST2(floatAbsoluteRR);
+
+  bro[branchIndex(c, R, R)] = CAST_BRANCH(branchRR);
+  bro[branchIndex(c, C, R)] = CAST_BRANCH(branchCR);
+  bro[branchIndex(c, C, M)] = CAST_BRANCH(branchCM);
+  bro[branchIndex(c, R, M)] = CAST_BRANCH(branchRM);
+}
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/x86/multimethod.h b/src/codegen/x86/multimethod.h
new file mode 100644
index 0000000000..5a73850f29
--- /dev/null
+++ b/src/codegen/x86/multimethod.h
@@ -0,0 +1,38 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_X86_MULTIMETHOD_H
+#define AVIAN_CODEGEN_ASSEMBLER_X86_MULTIMETHOD_H
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+class ArchitectureContext;
+
+unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand);
+
+unsigned index(ArchitectureContext*, lir::BinaryOperation operation,
+      lir::OperandType operand1,
+      lir::OperandType operand2);
+
+unsigned index(ArchitectureContext* c UNUSED, lir::TernaryOperation operation,
+      lir::OperandType operand1, lir::OperandType operand2);
+
+unsigned branchIndex(ArchitectureContext* c UNUSED, lir::OperandType operand1,
+            lir::OperandType operand2);
+
+void populateTables(ArchitectureContext* c);
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_X86_MULTIMETHOD_H
diff --git a/src/codegen/x86/operations.cpp b/src/codegen/x86/operations.cpp
new file mode 100644
index 0000000000..bf92b399b8
--- /dev/null
+++ b/src/codegen/x86/operations.cpp
@@ -0,0 +1,1532 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "codegen/x86/context.h"
+#include "codegen/x86/encode.h"
+#include "codegen/x86/registers.h"
+#include "codegen/x86/detect.h"
+#include "codegen/x86/operations.h"
+#include "codegen/x86/padding.h"
+#include "codegen/x86/fixup.h"
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+void return_(Context* c) {
+  opcode(c, 0xc3);
+}
+
+void trap(Context* c) {
+  opcode(c, 0xcc);
+}
+
+void ignore(Context*) { }
+
+void storeLoadBarrier(Context* c) {
+  if (useSSE(c->ac)) {
+    // mfence:
+    c->code.append(0x0f);
+    c->code.append(0xae);
+    c->code.append(0xf0);
+  } else {
+    // lock addq $0x0,(%rsp):
+    c->code.append(0xf0);
+    if (vm::TargetBytesPerWord == 8) {
+      c->code.append(0x48);
+    }
+    c->code.append(0x83);
+    c->code.append(0x04);
+    c->code.append(0x24);
+    c->code.append(0x00);    
+  }
+}
+
+void callC(Context* c, unsigned size UNUSED, lir::Constant* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+
+  unconditional(c, 0xe8, a);
+}
+
+void longCallC(Context* c, unsigned size, lir::Constant* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+
+  if (vm::TargetBytesPerWord == 8) {
+    lir::Register r(LongJumpRegister);
+    moveCR2(c, size, a, size, &r, 11);
+    callR(c, size, &r);
+  } else {
+    callC(c, size, a);
+  }
+}
+
+void jumpR(Context* c, unsigned size UNUSED, lir::Register* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+
+  maybeRex(c, 4, a);
+  opcode(c, 0xff, 0xe0 + regCode(a));
+}
+
+void jumpC(Context* c, unsigned size UNUSED, lir::Constant* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+
+  unconditional(c, 0xe9, a);
+}
+
+void jumpM(Context* c, unsigned size UNUSED, lir::Memory* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+  
+  maybeRex(c, 4, a);
+  opcode(c, 0xff);
+  modrmSibImm(c, rsp, a->scale, a->index, a->base, a->offset);
+}
+
+void longJumpC(Context* c, unsigned size, lir::Constant* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+
+  if (vm::TargetBytesPerWord == 8) {
+    lir::Register r(LongJumpRegister);
+    moveCR2(c, size, a, size, &r, 11);
+    jumpR(c, size, &r);
+  } else {
+    jumpC(c, size, a);
+  }
+}
+
+void callR(Context* c, unsigned size UNUSED, lir::Register* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+
+  // maybeRex.W has no meaning here so we disable it
+  maybeRex(c, 4, a);
+  opcode(c, 0xff, 0xd0 + regCode(a));
+}
+
+void callM(Context* c, unsigned size UNUSED, lir::Memory* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+  
+  maybeRex(c, 4, a);
+  opcode(c, 0xff);
+  modrmSibImm(c, rdx, a->scale, a->index, a->base, a->offset);
+}
+
+void alignedCallC(Context* c, unsigned size, lir::Constant* a) {
+  new(c->zone) AlignmentPadding(c, 1, 4);
+  callC(c, size, a);
+}
+
+void alignedLongCallC(Context* c, unsigned size, lir::Constant* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+
+  if (vm::TargetBytesPerWord == 8) {
+    new (c->zone) AlignmentPadding(c, 2, 8);
+    longCallC(c, size, a);
+  } else {
+    alignedCallC(c, size, a);
+  }
+}
+
+void alignedJumpC(Context* c, unsigned size, lir::Constant* a) {
+  new (c->zone) AlignmentPadding(c, 1, 4);
+  jumpC(c, size, a);
+}
+
+void alignedLongJumpC(Context* c, unsigned size, lir::Constant* a) {
+  assert(c, size == vm::TargetBytesPerWord);
+
+  if (vm::TargetBytesPerWord == 8) {
+    new (c->zone) AlignmentPadding(c, 2, 8);
+    longJumpC(c, size, a);
+  } else {
+    alignedJumpC(c, size, a);
+  }
+}
+
+void pushR(Context* c, unsigned size, lir::Register* a)
+{
+  if (vm::TargetBytesPerWord == 4 and size == 8) {
+    lir::Register ah(a->high);
+
+    pushR(c, 4, &ah);
+    pushR(c, 4, a);
+  } else {
+    maybeRex(c, 4, a);
+    opcode(c, 0x50 + regCode(a));
+  }
+}
+
+void popR(Context* c, unsigned size, lir::Register* a)
+{
+  if (vm::TargetBytesPerWord == 4 and size == 8) {
+    lir::Register ah(a->high);
+
+    popR(c, 4, a);
+    popR(c, 4, &ah);
+  } else {
+    maybeRex(c, 4, a);
+    opcode(c, 0x58 + regCode(a));
+    if (vm::TargetBytesPerWord == 8 and size == 4) {
+      moveRR(c, 4, a, 8, a);
+    }
+  }
+}
+
+void negateR(Context* c, unsigned size, lir::Register* a)
+{
+  if (vm::TargetBytesPerWord == 4 and size == 8) {
+    assert(c, a->low == rax and a->high == rdx);
+
+    ResolvedPromise zeroPromise(0);
+    lir::Constant zero(&zeroPromise);
+
+    lir::Register ah(a->high);
+
+    negateR(c, 4, a);
+    addCarryCR(c, 4, &zero, &ah);
+    negateR(c, 4, &ah);
+  } else {
+    maybeRex(c, size, a);
+    opcode(c, 0xf7, 0xd8 + regCode(a));
+  }
+}
+
+void negateRR(Context* c, unsigned aSize, lir::Register* a,
+         unsigned bSize UNUSED, lir::Register* b UNUSED)
+{
+  assert(c, aSize == bSize);
+
+  negateR(c, aSize, a);
+}
+
+void moveCR(Context* c, unsigned aSize, lir::Constant* a,
+       unsigned bSize, lir::Register* b)
+{
+  if (isFloatReg(b)) {
+    sseMoveCR(c, aSize, a, bSize, b);
+  } else {
+    moveCR2(c, aSize, a, bSize, b, 0);
+  }
+}
+
+void swapRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
+       unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+  assert(c, aSize == vm::TargetBytesPerWord);
+  
+  alwaysRex(c, aSize, a, b);
+  opcode(c, 0x87);
+  modrm(c, 0xc0, b, a);
+}
+
+void moveRR(Context* c, unsigned aSize, lir::Register* a,
+       UNUSED unsigned bSize, lir::Register* b)
+{
+  if (isFloatReg(a) or isFloatReg(b)) {
+    sseMoveRR(c, aSize, a, bSize, b);
+    return;
+  }
+  
+  if (vm::TargetBytesPerWord == 4 and aSize == 8 and bSize == 8) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    if (a->high == b->low) {
+      if (a->low == b->high) {
+        swapRR(c, 4, a, 4, b);
+      } else {
+        moveRR(c, 4, &ah, 4, &bh);
+        moveRR(c, 4, a, 4, b);
+      }
+    } else {
+      moveRR(c, 4, a, 4, b);
+      moveRR(c, 4, &ah, 4, &bh);
+    }
+  } else {
+    switch (aSize) {
+    case 1:
+      if (vm::TargetBytesPerWord == 4 and a->low > rbx) {
+        assert(c, b->low <= rbx);
+
+        moveRR(c, vm::TargetBytesPerWord, a, vm::TargetBytesPerWord, b);
+        moveRR(c, 1, b, vm::TargetBytesPerWord, b);
+      } else {
+        alwaysRex(c, aSize, b, a);
+        opcode(c, 0x0f, 0xbe);
+        modrm(c, 0xc0, a, b);
+      }
+      break;
+
+    case 2:
+      alwaysRex(c, aSize, b, a);
+      opcode(c, 0x0f, 0xbf);
+      modrm(c, 0xc0, a, b);
+      break;
+
+    case 4:
+      if (bSize == 8) {
+        if (vm::TargetBytesPerWord == 8) {
+          alwaysRex(c, bSize, b, a);
+          opcode(c, 0x63);
+          modrm(c, 0xc0, a, b);
+        } else {
+          if (a->low == rax and b->low == rax and b->high == rdx) {
+            opcode(c, 0x99); //cdq
+          } else {
+            assert(c, b->low == rax and b->high == rdx);
+
+            moveRR(c, 4, a, 4, b);
+            moveRR(c, 4, b, 8, b);
+          }
+        }
+      } else {
+        if (a->low != b->low) {
+          alwaysRex(c, aSize, a, b);
+          opcode(c, 0x89);
+          modrm(c, 0xc0, b, a);
+        }
+      }
+      break; 
+      
+    case 8:
+      if (a->low != b->low){
+        maybeRex(c, aSize, a, b);
+        opcode(c, 0x89);
+        modrm(c, 0xc0, b, a);
+      }
+      break;
+    }
+  }
+}
+
+void moveMR(Context* c, unsigned aSize, lir::Memory* a,
+       unsigned bSize, lir::Register* b)
+{
+  if (isFloatReg(b)) {
+    sseMoveMR(c, aSize, a, bSize, b);
+    return;
+  }
+  
+  switch (aSize) {
+  case 1:
+    maybeRex(c, bSize, b, a);
+    opcode(c, 0x0f, 0xbe);
+    modrmSibImm(c, b, a);
+    break;
+
+  case 2:
+    maybeRex(c, bSize, b, a);
+    opcode(c, 0x0f, 0xbf);
+    modrmSibImm(c, b, a);
+    break;
+
+  case 4:
+    if (vm::TargetBytesPerWord == 8) {
+      maybeRex(c, bSize, b, a);
+      opcode(c, 0x63);
+      modrmSibImm(c, b, a);
+    } else {
+      if (bSize == 8) {
+        assert(c, b->low == rax and b->high == rdx);
+        
+        moveMR(c, 4, a, 4, b);
+        moveRR(c, 4, b, 8, b);
+      } else {
+        maybeRex(c, bSize, b, a);
+        opcode(c, 0x8b);
+        modrmSibImm(c, b, a);
+      }
+    }
+    break;
+    
+  case 8:
+    if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+      lir::Memory ah(a->base, a->offset + 4, a->index, a->scale);
+      lir::Register bh(b->high);
+
+      moveMR(c, 4, a, 4, b);    
+      moveMR(c, 4, &ah, 4, &bh);
+    } else {
+      maybeRex(c, bSize, b, a);
+      opcode(c, 0x8b);
+      modrmSibImm(c, b, a);
+    }
+    break;
+
+  default: abort(c);
+  }
+}
+
+void moveRM(Context* c, unsigned aSize, lir::Register* a,
+       unsigned bSize UNUSED, lir::Memory* b)
+{
+  assert(c, aSize == bSize);
+  
+  if (isFloatReg(a)) {
+    sseMoveRM(c, aSize, a, bSize, b);
+    return;
+  }
+  
+  switch (aSize) {
+  case 1:
+    maybeRex(c, bSize, a, b);
+    opcode(c, 0x88);
+    modrmSibImm(c, a, b);
+    break;
+
+  case 2:
+    opcode(c, 0x66);
+    maybeRex(c, bSize, a, b);
+    opcode(c, 0x89);
+    modrmSibImm(c, a, b);
+    break;
+
+  case 4:
+    if (vm::TargetBytesPerWord == 8) {
+      maybeRex(c, bSize, a, b);
+      opcode(c, 0x89);
+      modrmSibImm(c, a, b);
+      break;
+    } else {
+      opcode(c, 0x89);
+      modrmSibImm(c, a, b);
+    }
+    break;
+    
+  case 8:
+    if (vm::TargetBytesPerWord == 8) {
+      maybeRex(c, bSize, a, b);
+      opcode(c, 0x89);
+      modrmSibImm(c, a, b);
+    } else {
+      lir::Register ah(a->high);
+      lir::Memory bh(b->base, b->offset + 4, b->index, b->scale);
+
+      moveRM(c, 4, a, 4, b);    
+      moveRM(c, 4, &ah, 4, &bh);
+    }
+    break;
+
+  default: abort(c);
+  }
+}
+
+void moveAR(Context* c, unsigned aSize, lir::Address* a,
+       unsigned bSize, lir::Register* b)
+{
+  assert(c, vm::TargetBytesPerWord == 8 or (aSize == 4 and bSize == 4));
+
+  lir::Constant constant(a->address);
+  lir::Memory memory(b->low, 0, -1, 0);
+
+  moveCR(c, aSize, &constant, bSize, b);
+  moveMR(c, bSize, &memory, bSize, b);
+}
+
+void moveCM(Context* c, unsigned aSize UNUSED, lir::Constant* a,
+       unsigned bSize, lir::Memory* b)
+{
+  switch (bSize) {
+  case 1:
+    maybeRex(c, bSize, b);
+    opcode(c, 0xc6);
+    modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
+    c->code.append(a->value->value());
+    break;
+
+  case 2:
+    opcode(c, 0x66);
+    maybeRex(c, bSize, b);
+    opcode(c, 0xc7);
+    modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
+    c->code.append2(a->value->value());
+    break;
+
+  case 4:
+    maybeRex(c, bSize, b);
+    opcode(c, 0xc7);
+    modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
+    if (a->value->resolved()) {
+      c->code.append4(a->value->value());
+    } else {
+      appendImmediateTask(c, a->value, offsetPromise(c), 4);
+      c->code.append4(0);
+    }
+    break;
+
+  case 8: {
+    if (vm::TargetBytesPerWord == 8) {
+      if (a->value->resolved() and vm::fitsInInt32(a->value->value())) {
+        maybeRex(c, bSize, b);
+        opcode(c, 0xc7);
+        modrmSibImm(c, 0, b->scale, b->index, b->base, b->offset);
+        c->code.append4(a->value->value());
+      } else {
+        lir::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
+        moveCR(c, 8, a, 8, &tmp);
+        moveRM(c, 8, &tmp, 8, b);
+        c->client->releaseTemporary(tmp.low);
+      }
+    } else {
+      lir::Constant ah(shiftMaskPromise(c, a->value, 32, 0xFFFFFFFF));
+      lir::Constant al(shiftMaskPromise(c, a->value, 0, 0xFFFFFFFF));
+
+      lir::Memory bh(b->base, b->offset + 4, b->index, b->scale);
+
+      moveCM(c, 4, &al, 4, b);
+      moveCM(c, 4, &ah, 4, &bh);
+    }
+  } break;
+
+  default: abort(c);
+  }
+}
+
+void moveZRR(Context* c, unsigned aSize, lir::Register* a,
+        unsigned bSize UNUSED, lir::Register* b)
+{
+  switch (aSize) {
+  case 2:
+    alwaysRex(c, aSize, b, a);
+    opcode(c, 0x0f, 0xb7);
+    modrm(c, 0xc0, a, b);
+    break;
+
+  default: abort(c);
+  }
+}
+
+void moveZMR(Context* c, unsigned aSize UNUSED, lir::Memory* a,
+        unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, bSize == vm::TargetBytesPerWord);
+  assert(c, aSize == 2);
+  
+  maybeRex(c, bSize, b, a);
+  opcode(c, 0x0f, 0xb7);
+  modrmSibImm(c, b->low, a->scale, a->index, a->base, a->offset);
+}
+
+void addCarryRR(Context* c, unsigned size, lir::Register* a,
+           lir::Register* b)
+{
+  assert(c, vm::TargetBytesPerWord == 8 or size == 4);
+  
+  maybeRex(c, size, a, b);
+  opcode(c, 0x11);
+  modrm(c, 0xc0, b, a);
+}
+
+void addRR(Context* c, unsigned aSize, lir::Register* a,
+      unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  if (vm::TargetBytesPerWord == 4 and aSize == 8) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    addRR(c, 4, a, 4, b);
+    addCarryRR(c, 4, &ah, &bh);
+  } else {
+    maybeRex(c, aSize, a, b);
+    opcode(c, 0x01);
+    modrm(c, 0xc0, b, a);
+  }
+}
+
+void addCarryCR(Context* c, unsigned size, lir::Constant* a,
+           lir::Register* b)
+{
+  
+  int64_t v = a->value->value();
+  maybeRex(c, size, b);
+  if (vm::fitsInInt8(v)) {
+    opcode(c, 0x83, 0xd0 + regCode(b));
+    c->code.append(v);
+  } else {
+    opcode(c, 0x81, 0xd0 + regCode(b));
+    c->code.append4(v);
+  }
+}
+
+void addCR(Context* c, unsigned aSize, lir::Constant* a,
+      unsigned bSize, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  int64_t v = a->value->value();
+  if (v) {
+    if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+      ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+      lir::Constant ah(&high);
+
+      ResolvedPromise low(v & 0xFFFFFFFF);
+      lir::Constant al(&low);
+
+      lir::Register bh(b->high);
+
+      addCR(c, 4, &al, 4, b);
+      addCarryCR(c, 4, &ah, &bh);
+    } else {
+      if (vm::fitsInInt32(v)) {
+        maybeRex(c, aSize, b);
+        if (vm::fitsInInt8(v)) {
+          opcode(c, 0x83, 0xc0 + regCode(b));
+          c->code.append(v);
+        } else {
+          opcode(c, 0x81, 0xc0 + regCode(b));
+          c->code.append4(v);
+        }
+      } else {
+        lir::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
+        moveCR(c, aSize, a, aSize, &tmp);
+        addRR(c, aSize, &tmp, bSize, b);
+        c->client->releaseTemporary(tmp.low);
+      }
+    }
+  }
+}
+
+void subtractBorrowCR(Context* c, unsigned size UNUSED, lir::Constant* a,
+                 lir::Register* b)
+{
+  assert(c, vm::TargetBytesPerWord == 8 or size == 4);
+  
+  int64_t v = a->value->value();
+  if (vm::fitsInInt8(v)) {
+    opcode(c, 0x83, 0xd8 + regCode(b));
+    c->code.append(v);
+  } else {
+    opcode(c, 0x81, 0xd8 + regCode(b));
+    c->code.append4(v);
+  }
+}
+
+void subtractCR(Context* c, unsigned aSize, lir::Constant* a,
+           unsigned bSize, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  int64_t v = a->value->value();
+  if (v) {
+    if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+      ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+      lir::Constant ah(&high);
+
+      ResolvedPromise low(v & 0xFFFFFFFF);
+      lir::Constant al(&low);
+
+      lir::Register bh(b->high);
+
+      subtractCR(c, 4, &al, 4, b);
+      subtractBorrowCR(c, 4, &ah, &bh);
+    } else {
+      if (vm::fitsInInt32(v)) {
+        maybeRex(c, aSize, b);
+        if (vm::fitsInInt8(v)) {
+          opcode(c, 0x83, 0xe8 + regCode(b));
+          c->code.append(v);
+        } else {
+          opcode(c, 0x81, 0xe8 + regCode(b));
+          c->code.append4(v);
+        }
+      } else {
+        lir::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
+        moveCR(c, aSize, a, aSize, &tmp);
+        subtractRR(c, aSize, &tmp, bSize, b);
+        c->client->releaseTemporary(tmp.low);
+      }
+    }
+  }
+}
+
+void subtractBorrowRR(Context* c, unsigned size, lir::Register* a,
+                 lir::Register* b)
+{
+  assert(c, vm::TargetBytesPerWord == 8 or size == 4);
+  
+  maybeRex(c, size, a, b);
+  opcode(c, 0x19);
+  modrm(c, 0xc0, b, a);
+}
+
+void subtractRR(Context* c, unsigned aSize, lir::Register* a,
+           unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+  
+  if (vm::TargetBytesPerWord == 4 and aSize == 8) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    subtractRR(c, 4, a, 4, b);
+    subtractBorrowRR(c, 4, &ah, &bh);
+  } else {
+    maybeRex(c, aSize, a, b);
+    opcode(c, 0x29);
+    modrm(c, 0xc0, b, a);
+  }
+}
+
+void andRR(Context* c, unsigned aSize, lir::Register* a,
+      unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+
+  if (vm::TargetBytesPerWord == 4 and aSize == 8) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    andRR(c, 4, a, 4, b);
+    andRR(c, 4, &ah, 4, &bh);
+  } else {
+    maybeRex(c, aSize, a, b);
+    opcode(c, 0x21);
+    modrm(c, 0xc0, b, a);
+  }
+}
+
+void andCR(Context* c, unsigned aSize, lir::Constant* a,
+      unsigned bSize, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  int64_t v = a->value->value();
+
+  if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+    lir::Constant ah(&high);
+
+    ResolvedPromise low(v & 0xFFFFFFFF);
+    lir::Constant al(&low);
+
+    lir::Register bh(b->high);
+
+    andCR(c, 4, &al, 4, b);
+    andCR(c, 4, &ah, 4, &bh);
+  } else {
+    if (vm::fitsInInt32(v)) {
+      maybeRex(c, aSize, b);
+      if (vm::fitsInInt8(v)) {
+        opcode(c, 0x83, 0xe0 + regCode(b));
+        c->code.append(v);
+      } else {
+        opcode(c, 0x81, 0xe0 + regCode(b));
+        c->code.append4(v);
+      }
+    } else {
+      lir::Register tmp
+        (c->client->acquireTemporary(GeneralRegisterMask));
+      moveCR(c, aSize, a, aSize, &tmp);
+      andRR(c, aSize, &tmp, bSize, b);
+      c->client->releaseTemporary(tmp.low);
+    }
+  }
+}
+
+void orRR(Context* c, unsigned aSize, lir::Register* a,
+     unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  if (vm::TargetBytesPerWord == 4 and aSize == 8) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    orRR(c, 4, a, 4, b);
+    orRR(c, 4, &ah, 4, &bh);
+  } else {
+    maybeRex(c, aSize, a, b);
+    opcode(c, 0x09);
+    modrm(c, 0xc0, b, a);
+  }
+}
+
+void orCR(Context* c, unsigned aSize, lir::Constant* a,
+     unsigned bSize, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  int64_t v = a->value->value();
+  if (v) {
+    if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+      ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+      lir::Constant ah(&high);
+
+      ResolvedPromise low(v & 0xFFFFFFFF);
+      lir::Constant al(&low);
+
+      lir::Register bh(b->high);
+
+      orCR(c, 4, &al, 4, b);
+      orCR(c, 4, &ah, 4, &bh);
+    } else {
+      if (vm::fitsInInt32(v)) {
+        maybeRex(c, aSize, b);
+        if (vm::fitsInInt8(v)) {
+          opcode(c, 0x83, 0xc8 + regCode(b));
+          c->code.append(v);
+        } else {
+          opcode(c, 0x81, 0xc8 + regCode(b));
+          c->code.append4(v);        
+        }
+      } else {
+        lir::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
+        moveCR(c, aSize, a, aSize, &tmp);
+        orRR(c, aSize, &tmp, bSize, b);
+        c->client->releaseTemporary(tmp.low);
+      }
+    }
+  }
+}
+
+void xorRR(Context* c, unsigned aSize, lir::Register* a,
+      unsigned bSize UNUSED, lir::Register* b)
+{
+  if (vm::TargetBytesPerWord == 4 and aSize == 8) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    xorRR(c, 4, a, 4, b);
+    xorRR(c, 4, &ah, 4, &bh);
+  } else {
+    maybeRex(c, aSize, a, b);
+    opcode(c, 0x31);
+    modrm(c, 0xc0, b, a);
+  }
+}
+
+void xorCR(Context* c, unsigned aSize, lir::Constant* a,
+      unsigned bSize, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  int64_t v = a->value->value();
+  if (v) {
+    if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+      ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+      lir::Constant ah(&high);
+
+      ResolvedPromise low(v & 0xFFFFFFFF);
+      lir::Constant al(&low);
+
+      lir::Register bh(b->high);
+
+      xorCR(c, 4, &al, 4, b);
+      xorCR(c, 4, &ah, 4, &bh);
+    } else {
+      if (vm::fitsInInt32(v)) {
+        maybeRex(c, aSize, b);
+        if (vm::fitsInInt8(v)) {
+          opcode(c, 0x83, 0xf0 + regCode(b));
+          c->code.append(v);
+        } else {
+          opcode(c, 0x81, 0xf0 + regCode(b));
+          c->code.append4(v);        
+        }
+      } else {
+        lir::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
+        moveCR(c, aSize, a, aSize, &tmp);
+        xorRR(c, aSize, &tmp, bSize, b);
+        c->client->releaseTemporary(tmp.low);
+      }
+    }
+  }
+}
+
+void multiplyRR(Context* c, unsigned aSize, lir::Register* a,
+           unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+
+  if (vm::TargetBytesPerWord == 4 and aSize == 8) {
+    assert(c, b->high == rdx);
+    assert(c, b->low != rax);
+    assert(c, a->low != rax);
+    assert(c, a->high != rax);
+
+    c->client->save(rax);
+
+    lir::Register axdx(rax, rdx);
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    lir::Register tmp(-1);
+    lir::Register* scratch;
+    if (a->low == b->low) {
+      tmp.low = c->client->acquireTemporary
+        (GeneralRegisterMask & ~(1 << rax));
+      scratch = &tmp;
+      moveRR(c, 4, b, 4, scratch);
+    } else {
+      scratch = b;
+    }
+
+    moveRR(c, 4, b, 4, &axdx);
+    multiplyRR(c, 4, &ah, 4, scratch);
+    multiplyRR(c, 4, a, 4, &bh);
+    addRR(c, 4, &bh, 4, scratch);
+    
+    // mul a->low,%eax%edx
+    opcode(c, 0xf7, 0xe0 + a->low);
+    
+    addRR(c, 4, scratch, 4, &bh);
+    moveRR(c, 4, &axdx, 4, b);
+
+    if (tmp.low != -1) {
+      c->client->releaseTemporary(tmp.low);
+    }
+  } else {
+    maybeRex(c, aSize, b, a);
+    opcode(c, 0x0f, 0xaf);
+    modrm(c, 0xc0, a, b);
+  }
+}
+
+void compareRR(Context* c, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+  assert(c, aSize <= vm::TargetBytesPerWord);
+
+  maybeRex(c, aSize, a, b);
+  opcode(c, 0x39);
+  modrm(c, 0xc0, b, a);  
+}
+
+void compareCR(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+  assert(c, vm::TargetBytesPerWord == 8 or aSize == 4);
+  
+  if (a->value->resolved() and vm::fitsInInt32(a->value->value())) {
+    int64_t v = a->value->value();
+    maybeRex(c, aSize, b);
+    if (vm::fitsInInt8(v)) {
+      opcode(c, 0x83, 0xf8 + regCode(b));
+      c->code.append(v);
+    } else {
+      opcode(c, 0x81, 0xf8 + regCode(b));
+      c->code.append4(v);
+    }
+  } else {
+    lir::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
+    moveCR(c, aSize, a, aSize, &tmp);
+    compareRR(c, aSize, &tmp, bSize, b);
+    c->client->releaseTemporary(tmp.low);
+  }
+}
+
+void compareRM(Context* c, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Memory* b)
+{
+  assert(c, aSize == bSize);
+  assert(c, vm::TargetBytesPerWord == 8 or aSize == 4);
+  
+  if (vm::TargetBytesPerWord == 8 and aSize == 4) {
+    moveRR(c, 4, a, 8, a);
+  }
+  maybeRex(c, bSize, a, b);
+  opcode(c, 0x39);
+  modrmSibImm(c, a, b);
+}
+
+void compareCM(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Memory* b)
+{
+  assert(c, aSize == bSize);
+  assert(c, vm::TargetBytesPerWord == 8 or aSize == 4);
+  
+  if (a->value->resolved()) { 
+    int64_t v = a->value->value();   
+    maybeRex(c, aSize, b);
+    opcode(c, vm::fitsInInt8(v) ? 0x83 : 0x81);
+    modrmSibImm(c, rdi, b->scale, b->index, b->base, b->offset);
+    
+    if (vm::fitsInInt8(v)) {
+      c->code.append(v);
+    } else if (vm::fitsInInt32(v)) {
+      c->code.append4(v);
+    } else {
+      abort(c);
+    }
+  } else {
+    lir::Register tmp(c->client->acquireTemporary(GeneralRegisterMask));
+    moveCR(c, aSize, a, bSize, &tmp);
+    compareRM(c, bSize, &tmp, bSize, b);
+    c->client->releaseTemporary(tmp.low);
+  }
+}
+
+void compareFloatRR(Context* c, unsigned aSize, lir::Register* a,
+               unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  if (aSize == 8) {
+    opcode(c, 0x66);
+  }
+  maybeRex(c, 4, a, b);
+  opcode(c, 0x0f, 0x2e);
+  modrm(c, 0xc0, a, b);
+}
+
+void branchLong(Context* c, lir::TernaryOperation op, lir::Operand* al,
+           lir::Operand* ah, lir::Operand* bl,
+           lir::Operand* bh, lir::Constant* target,
+           BinaryOperationType compare)
+{
+  compare(c, 4, ah, 4, bh);
+  
+  unsigned next = 0;
+
+  switch (op) {
+  case lir::JumpIfEqual:
+    opcode(c, 0x75); // jne
+    next = c->code.length();
+    c->code.append(0);
+
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x84, target); // je
+    break;
+
+  case lir::JumpIfNotEqual:
+    conditional(c, 0x85, target); // jne
+
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x85, target); // jne
+    break;
+
+  case lir::JumpIfLess:
+    conditional(c, 0x8c, target); // jl
+
+    opcode(c, 0x7f); // jg
+    next = c->code.length();
+    c->code.append(0);
+
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x82, target); // jb
+    break;
+
+  case lir::JumpIfGreater:
+    conditional(c, 0x8f, target); // jg
+
+    opcode(c, 0x7c); // jl
+    next = c->code.length();
+    c->code.append(0);
+
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x87, target); // ja
+    break;
+
+  case lir::JumpIfLessOrEqual:
+    conditional(c, 0x8c, target); // jl
+
+    opcode(c, 0x7f); // jg
+    next = c->code.length();
+    c->code.append(0);
+
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x86, target); // jbe
+    break;
+
+  case lir::JumpIfGreaterOrEqual:
+    conditional(c, 0x8f, target); // jg
+
+    opcode(c, 0x7c); // jl
+    next = c->code.length();
+    c->code.append(0);
+
+    compare(c, 4, al, 4, bl);
+    conditional(c, 0x83, target); // jae
+    break;
+
+  default:
+    abort(c);
+  }  
+
+  if (next) {
+    int8_t nextOffset = c->code.length() - next - 1;
+    c->code.set(next, &nextOffset, 1);
+  }
+}
+
+void branchRR(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Register* b,
+         lir::Constant* target)
+{
+  if (isFloatBranch(op)) {
+    compareFloatRR(c, size, a, size, b);
+    branchFloat(c, op, target);
+  } else if (size > vm::TargetBytesPerWord) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    branchLong(c, op, a, &ah, b, &bh, target, CAST2(compareRR));
+  } else {
+    compareRR(c, size, a, size, b);
+    branch(c, op, target);
+  }
+}
+
+void branchCR(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Register* b,
+         lir::Constant* target)
+{
+  assert(c, not isFloatBranch(op));
+
+  if (size > vm::TargetBytesPerWord) {
+    int64_t v = a->value->value();
+
+    ResolvedPromise low(v & ~static_cast<uintptr_t>(0));
+    lir::Constant al(&low);
+  
+    ResolvedPromise high((v >> 32) & ~static_cast<uintptr_t>(0));
+    lir::Constant ah(&high);
+  
+    lir::Register bh(b->high);
+
+    branchLong(c, op, &al, &ah, b, &bh, target, CAST2(compareCR));
+  } else {
+    compareCR(c, size, a, size, b);
+    branch(c, op, target);
+  }
+}
+
+void branchRM(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Memory* b,
+         lir::Constant* target)
+{
+  assert(c, not isFloatBranch(op));
+  assert(c, size <= vm::TargetBytesPerWord);
+
+  compareRM(c, size, a, size, b);
+  branch(c, op, target);
+}
+
+void branchCM(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Memory* b,
+         lir::Constant* target)
+{
+  assert(c, not isFloatBranch(op));
+  assert(c, size <= vm::TargetBytesPerWord);
+
+  compareCM(c, size, a, size, b);
+  branch(c, op, target);
+}
+
+void multiplyCR(Context* c, unsigned aSize, lir::Constant* a,
+           unsigned bSize, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  if (vm::TargetBytesPerWord == 4 and aSize == 8) {
+    const uint32_t mask = GeneralRegisterMask & ~((1 << rax) | (1 << rdx));
+    lir::Register tmp(c->client->acquireTemporary(mask),
+                            c->client->acquireTemporary(mask));
+
+    moveCR(c, aSize, a, aSize, &tmp);
+    multiplyRR(c, aSize, &tmp, bSize, b);
+    c->client->releaseTemporary(tmp.low);
+    c->client->releaseTemporary(tmp.high);
+  } else {
+    int64_t v = a->value->value();
+    if (v != 1) {
+      if (vm::fitsInInt32(v)) {
+        maybeRex(c, bSize, b, b);
+        if (vm::fitsInInt8(v)) {
+          opcode(c, 0x6b);
+          modrm(c, 0xc0, b, b);
+          c->code.append(v);
+        } else {
+          opcode(c, 0x69);
+          modrm(c, 0xc0, b, b);
+          c->code.append4(v);        
+        }
+      } else {
+        lir::Register tmp
+          (c->client->acquireTemporary(GeneralRegisterMask));
+        moveCR(c, aSize, a, aSize, &tmp);
+        multiplyRR(c, aSize, &tmp, bSize, b);
+        c->client->releaseTemporary(tmp.low);      
+      }
+    }
+  }
+}
+
+void divideRR(Context* c, unsigned aSize, lir::Register* a,
+         unsigned bSize UNUSED, lir::Register* b UNUSED)
+{
+  assert(c, aSize == bSize);
+
+  assert(c, b->low == rax);
+  assert(c, a->low != rdx);
+
+  c->client->save(rdx);
+
+  maybeRex(c, aSize, a, b);    
+  opcode(c, 0x99); // cdq
+  maybeRex(c, aSize, b, a);
+  opcode(c, 0xf7, 0xf8 + regCode(a));
+}
+
+void remainderRR(Context* c, unsigned aSize, lir::Register* a,
+            unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, aSize == bSize);
+
+  assert(c, b->low == rax);
+  assert(c, a->low != rdx);
+
+  c->client->save(rdx);
+
+  maybeRex(c, aSize, a, b);    
+  opcode(c, 0x99); // cdq
+  maybeRex(c, aSize, b, a);
+  opcode(c, 0xf7, 0xf8 + regCode(a));
+
+  lir::Register dx(rdx);
+  moveRR(c, vm::TargetBytesPerWord, &dx, vm::TargetBytesPerWord, b);
+}
+
+void doShift(Context* c, UNUSED void (*shift)
+        (Context*, unsigned, lir::Register*, unsigned,
+         lir::Register*),
+        int type, UNUSED unsigned aSize, lir::Constant* a,
+        unsigned bSize, lir::Register* b)
+{
+  int64_t v = a->value->value();
+
+  if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+    c->client->save(rcx);
+
+    lir::Register cx(rcx);
+    ResolvedPromise promise(v & 0x3F);
+    lir::Constant masked(&promise);
+    moveCR(c, 4, &masked, 4, &cx);
+    shift(c, aSize, &cx, bSize, b);
+  } else {
+    maybeRex(c, bSize, b);
+    if (v == 1) {
+      opcode(c, 0xd1, type + regCode(b));
+    } else if (vm::fitsInInt8(v)) {
+      opcode(c, 0xc1, type + regCode(b));
+      c->code.append(v);
+    } else {
+      abort(c);
+    }
+  }
+}
+
+void shiftLeftRR(Context* c, UNUSED unsigned aSize, lir::Register* a,
+            unsigned bSize, lir::Register* b)
+{
+  if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+    lir::Register cx(rcx);
+    if (a->low != rcx) {
+      c->client->save(rcx);
+      ResolvedPromise promise(0x3F);
+      lir::Constant mask(&promise);
+      moveRR(c, 4, a, 4, &cx);
+      andCR(c, 4, &mask, 4, &cx);
+    }
+
+    // shld
+    opcode(c, 0x0f, 0xa5);
+    modrm(c, 0xc0, b->high, b->low);
+
+    // shl
+    opcode(c, 0xd3, 0xe0 + b->low);
+
+    ResolvedPromise promise(32);
+    lir::Constant constant(&promise);
+    compareCR(c, aSize, &constant, aSize, &cx);
+
+    opcode(c, 0x7c); //jl
+    c->code.append(2 + 2);
+
+    lir::Register bh(b->high);
+    moveRR(c, 4, b, 4, &bh); // 2 bytes
+    xorRR(c, 4, b, 4, b); // 2 bytes
+  } else {
+    assert(c, a->low == rcx);  
+
+    maybeRex(c, bSize, a, b);
+    opcode(c, 0xd3, 0xe0 + regCode(b));
+  }
+}
+
+void shiftLeftCR(Context* c, unsigned aSize, lir::Constant* a,
+            unsigned bSize, lir::Register* b)
+{
+  doShift(c, shiftLeftRR, 0xe0, aSize, a, bSize, b);
+}
+
+void shiftRightRR(Context* c, UNUSED unsigned aSize, lir::Register* a,
+             unsigned bSize, lir::Register* b)
+{
+  if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+    lir::Register cx(rcx);
+    if (a->low != rcx) {
+      c->client->save(rcx);
+      ResolvedPromise promise(0x3F);
+      lir::Constant mask(&promise);
+      moveRR(c, 4, a, 4, &cx);
+      andCR(c, 4, &mask, 4, &cx);
+    }
+
+    // shrd
+    opcode(c, 0x0f, 0xad);
+    modrm(c, 0xc0, b->low, b->high);
+
+    // sar
+    opcode(c, 0xd3, 0xf8 + b->high);
+
+    ResolvedPromise promise(32);
+    lir::Constant constant(&promise);
+    compareCR(c, aSize, &constant, aSize, &cx);
+
+    opcode(c, 0x7c); //jl
+    c->code.append(2 + 3);
+
+    lir::Register bh(b->high);
+    moveRR(c, 4, &bh, 4, b); // 2 bytes
+
+    // sar 31,high
+    opcode(c, 0xc1, 0xf8 + b->high);
+    c->code.append(31);
+  } else {
+    assert(c, a->low == rcx);
+
+    maybeRex(c, bSize, a, b);
+    opcode(c, 0xd3, 0xf8 + regCode(b));
+  }
+}
+
+void shiftRightCR(Context* c, unsigned aSize, lir::Constant* a,
+             unsigned bSize, lir::Register* b)
+{
+  doShift(c, shiftRightRR, 0xf8, aSize, a, bSize, b);
+}
+
+void unsignedShiftRightRR(Context* c, UNUSED unsigned aSize, lir::Register* a,
+                     unsigned bSize, lir::Register* b)
+{
+  if (vm::TargetBytesPerWord == 4 and bSize == 8) {
+    lir::Register cx(rcx);
+    if (a->low != rcx) {
+      c->client->save(rcx);
+      ResolvedPromise promise(0x3F);
+      lir::Constant mask(&promise);
+      moveRR(c, 4, a, 4, &cx);
+      andCR(c, 4, &mask, 4, &cx);
+    }
+
+    // shrd
+    opcode(c, 0x0f, 0xad);
+    modrm(c, 0xc0, b->low, b->high);
+
+    // shr
+    opcode(c, 0xd3, 0xe8 + b->high);
+
+    ResolvedPromise promise(32);
+    lir::Constant constant(&promise);
+    compareCR(c, aSize, &constant, aSize, &cx);
+
+    opcode(c, 0x7c); //jl
+    c->code.append(2 + 2);
+
+    lir::Register bh(b->high);
+    moveRR(c, 4, &bh, 4, b); // 2 bytes
+    xorRR(c, 4, &bh, 4, &bh); // 2 bytes
+  } else {
+    assert(c, a->low == rcx);
+
+    maybeRex(c, bSize, a, b);
+    opcode(c, 0xd3, 0xe8 + regCode(b));
+  }
+}
+
+void unsignedShiftRightCR(Context* c, unsigned aSize UNUSED, lir::Constant* a,
+                     unsigned bSize, lir::Register* b)
+{
+  doShift(c, unsignedShiftRightRR, 0xe8, aSize, a, bSize, b);
+}
+
+void floatSqrtRR(Context* c, unsigned aSize, lir::Register* a,
+            unsigned bSize UNUSED, lir::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x51);
+}
+
+void floatSqrtMR(Context* c, unsigned aSize, lir::Memory* a,
+            unsigned bSize UNUSED, lir::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x51);
+}
+
+void floatAddRR(Context* c, unsigned aSize, lir::Register* a,
+           unsigned bSize UNUSED, lir::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x58);
+}
+
+void floatAddMR(Context* c, unsigned aSize, lir::Memory* a,
+           unsigned bSize UNUSED, lir::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x58);
+}
+
+void floatSubtractRR(Context* c, unsigned aSize, lir::Register* a,
+                unsigned bSize UNUSED, lir::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x5c);
+}
+
+void floatSubtractMR(Context* c, unsigned aSize, lir::Memory* a,
+                unsigned bSize UNUSED, lir::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x5c);
+}
+
+void floatMultiplyRR(Context* c, unsigned aSize, lir::Register* a,
+                unsigned bSize UNUSED, lir::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x59);
+}
+
+void floatMultiplyMR(Context* c, unsigned aSize, lir::Memory* a,
+                unsigned bSize UNUSED, lir::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x59);
+}
+
+void floatDivideRR(Context* c, unsigned aSize, lir::Register* a,
+              unsigned bSize UNUSED, lir::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x5e);
+}
+
+void floatDivideMR(Context* c, unsigned aSize, lir::Memory* a,
+              unsigned bSize UNUSED, lir::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x5e);
+}
+
+void float2FloatRR(Context* c, unsigned aSize, lir::Register* a,
+              unsigned bSize UNUSED, lir::Register* b)
+{
+  floatRegOp(c, aSize, a, 4, b, 0x5a);
+}
+
+void float2FloatMR(Context* c, unsigned aSize, lir::Memory* a,
+              unsigned bSize UNUSED, lir::Register* b)
+{
+  floatMemOp(c, aSize, a, 4, b, 0x5a);
+}
+
+void float2IntRR(Context* c, unsigned aSize, lir::Register* a,
+            unsigned bSize, lir::Register* b)
+{
+  assert(c, not isFloatReg(b));
+  floatRegOp(c, aSize, a, bSize, b, 0x2c);
+}
+
+void float2IntMR(Context* c, unsigned aSize, lir::Memory* a,
+            unsigned bSize, lir::Register* b)
+{
+  floatMemOp(c, aSize, a, bSize, b, 0x2c);
+}
+
+void int2FloatRR(Context* c, unsigned aSize, lir::Register* a,
+            unsigned bSize, lir::Register* b)
+{
+  floatRegOp(c, bSize, a, aSize, b, 0x2a);
+}
+
+void int2FloatMR(Context* c, unsigned aSize, lir::Memory* a,
+            unsigned bSize, lir::Register* b)
+{
+  floatMemOp(c, bSize, a, aSize, b, 0x2a);
+}
+
+void floatNegateRR(Context* c, unsigned aSize, lir::Register* a,
+              unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, isFloatReg(a) and isFloatReg(b));
+  // unlike most of the other floating point code, this does NOT
+  // support doubles:
+  assert(c, aSize == 4);
+  ResolvedPromise pcon(0x80000000);
+  lir::Constant con(&pcon);
+  if (a->low == b->low) {
+    lir::Register tmp(c->client->acquireTemporary(FloatRegisterMask));
+    moveCR(c, 4, &con, 4, &tmp);
+    maybeRex(c, 4, a, &tmp);
+    opcode(c, 0x0f, 0x57);
+    modrm(c, 0xc0, &tmp, a);
+    c->client->releaseTemporary(tmp.low);
+  } else {
+    moveCR(c, 4, &con, 4, b);
+    if (aSize == 8) opcode(c, 0x66);
+    maybeRex(c, 4, a, b);
+    opcode(c, 0x0f, 0x57);
+    modrm(c, 0xc0, a, b);
+  }
+}
+
+void floatAbsoluteRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
+           unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(c, isFloatReg(a) and isFloatReg(b));
+  // unlike most of the other floating point code, this does NOT
+  // support doubles:
+  assert(c, aSize == 4);
+  ResolvedPromise pcon(0x7fffffff);
+  lir::Constant con(&pcon);
+  if (a->low == b->low) {
+    lir::Register tmp(c->client->acquireTemporary(FloatRegisterMask));
+    moveCR(c, 4, &con, 4, &tmp);
+    maybeRex(c, 4, a, &tmp);
+    opcode(c, 0x0f, 0x54);
+    modrm(c, 0xc0, &tmp, a);
+    c->client->releaseTemporary(tmp.low);
+  } else {
+    moveCR(c, 4, &con, 4, b);
+    maybeRex(c, 4, a, b);
+    opcode(c, 0x0f, 0x54);
+    modrm(c, 0xc0, a, b);
+  }
+}
+
+void absoluteRR(Context* c, unsigned aSize, lir::Register* a,
+      unsigned bSize UNUSED, lir::Register* b UNUSED)
+{
+  assert(c, aSize == bSize and a->low == rax and b->low == rax);
+  lir::Register d
+    (c->client->acquireTemporary(static_cast<uint64_t>(1) << rdx));
+  maybeRex(c, aSize, a, b);
+  opcode(c, 0x99);
+  xorRR(c, aSize, &d, aSize, a);
+  subtractRR(c, aSize, &d, aSize, a);
+  c->client->releaseTemporary(rdx);
+}
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/x86/operations.h b/src/codegen/x86/operations.h
new file mode 100644
index 0000000000..52d0d8dc0f
--- /dev/null
+++ b/src/codegen/x86/operations.h
@@ -0,0 +1,261 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_X86_OPERATIONS_H
+#define AVIAN_CODEGEN_ASSEMBLER_X86_OPERATIONS_H
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+void return_(Context* c);
+
+void trap(Context* c);
+
+void ignore(Context*);
+
+void storeLoadBarrier(Context* c);
+
+void callC(Context* c, unsigned size UNUSED, lir::Constant* a);
+
+void longCallC(Context* c, unsigned size, lir::Constant* a);
+
+void jumpR(Context* c, unsigned size UNUSED, lir::Register* a);
+
+void jumpC(Context* c, unsigned size UNUSED, lir::Constant* a);
+
+void jumpM(Context* c, unsigned size UNUSED, lir::Memory* a);
+
+void longJumpC(Context* c, unsigned size, lir::Constant* a);
+
+void callR(Context* c, unsigned size UNUSED, lir::Register* a);
+
+void callM(Context* c, unsigned size UNUSED, lir::Memory* a);
+
+void alignedCallC(Context* c, unsigned size, lir::Constant* a);
+
+void alignedLongCallC(Context* c, unsigned size, lir::Constant* a);
+
+void alignedJumpC(Context* c, unsigned size, lir::Constant* a);
+
+void alignedLongJumpC(Context* c, unsigned size, lir::Constant* a);
+
+void pushR(Context* c, unsigned size, lir::Register* a);
+
+void popR(Context* c, unsigned size, lir::Register* a);
+
+void negateR(Context* c, unsigned size, lir::Register* a);
+
+void negateRR(Context* c, unsigned aSize, lir::Register* a,
+         unsigned bSize UNUSED, lir::Register* b UNUSED);
+
+void moveCR(Context* c, unsigned aSize, lir::Constant* a,
+       unsigned bSize, lir::Register* b);
+
+void swapRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
+       unsigned bSize UNUSED, lir::Register* b);
+
+void moveRR(Context* c, unsigned aSize, lir::Register* a,
+       UNUSED unsigned bSize, lir::Register* b);
+
+void moveMR(Context* c, unsigned aSize, lir::Memory* a,
+       unsigned bSize, lir::Register* b);
+
+void moveRM(Context* c, unsigned aSize, lir::Register* a,
+       unsigned bSize UNUSED, lir::Memory* b);
+
+void moveAR(Context* c, unsigned aSize, lir::Address* a,
+       unsigned bSize, lir::Register* b);
+
+void moveCM(Context* c, unsigned aSize UNUSED, lir::Constant* a,
+       unsigned bSize, lir::Memory* b);
+
+void moveZRR(Context* c, unsigned aSize, lir::Register* a,
+        unsigned bSize UNUSED, lir::Register* b);
+
+void moveZMR(Context* c, unsigned aSize UNUSED, lir::Memory* a,
+        unsigned bSize UNUSED, lir::Register* b);
+
+void addCarryRR(Context* c, unsigned size, lir::Register* a,
+           lir::Register* b);
+
+void addRR(Context* c, unsigned aSize, lir::Register* a,
+      unsigned bSize UNUSED, lir::Register* b);
+
+void addCarryCR(Context* c, unsigned size, lir::Constant* a,
+           lir::Register* b);
+
+void addCR(Context* c, unsigned aSize, lir::Constant* a,
+      unsigned bSize, lir::Register* b);
+
+void subtractBorrowCR(Context* c, unsigned size UNUSED, lir::Constant* a,
+                 lir::Register* b);
+
+void subtractCR(Context* c, unsigned aSize, lir::Constant* a,
+           unsigned bSize, lir::Register* b);
+
+void subtractBorrowRR(Context* c, unsigned size, lir::Register* a,
+                 lir::Register* b);
+
+void subtractRR(Context* c, unsigned aSize, lir::Register* a,
+           unsigned bSize UNUSED, lir::Register* b);
+
+void andRR(Context* c, unsigned aSize, lir::Register* a,
+      unsigned bSize UNUSED, lir::Register* b);
+
+void andCR(Context* c, unsigned aSize, lir::Constant* a,
+      unsigned bSize, lir::Register* b);
+
+void orRR(Context* c, unsigned aSize, lir::Register* a,
+     unsigned bSize UNUSED, lir::Register* b);
+
+void orCR(Context* c, unsigned aSize, lir::Constant* a,
+     unsigned bSize, lir::Register* b);
+
+void xorRR(Context* c, unsigned aSize, lir::Register* a,
+      unsigned bSize UNUSED, lir::Register* b);
+
+void xorCR(Context* c, unsigned aSize, lir::Constant* a,
+      unsigned bSize, lir::Register* b);
+
+void multiplyRR(Context* c, unsigned aSize, lir::Register* a,
+           unsigned bSize UNUSED, lir::Register* b);
+
+void compareRR(Context* c, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b);
+
+void compareCR(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b);
+
+void compareRM(Context* c, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Memory* b);
+
+void compareCM(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Memory* b);
+
+void compareFloatRR(Context* c, unsigned aSize, lir::Register* a,
+               unsigned bSize UNUSED, lir::Register* b);
+
+void branchLong(Context* c, lir::TernaryOperation op, lir::Operand* al,
+           lir::Operand* ah, lir::Operand* bl,
+           lir::Operand* bh, lir::Constant* target,
+           BinaryOperationType compare);
+
+void branchRR(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Register* b,
+         lir::Constant* target);
+
+void branchCR(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Register* b,
+         lir::Constant* target);
+
+void branchRM(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Memory* b,
+         lir::Constant* target);
+
+void branchCM(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Memory* b,
+         lir::Constant* target);
+
+void multiplyCR(Context* c, unsigned aSize, lir::Constant* a,
+           unsigned bSize, lir::Register* b);
+
+void divideRR(Context* c, unsigned aSize, lir::Register* a,
+         unsigned bSize UNUSED, lir::Register* b UNUSED);
+
+void remainderRR(Context* c, unsigned aSize, lir::Register* a,
+            unsigned bSize UNUSED, lir::Register* b);
+
+void doShift(Context* c, UNUSED void (*shift)
+        (Context*, unsigned, lir::Register*, unsigned,
+         lir::Register*),
+        int type, UNUSED unsigned aSize, lir::Constant* a,
+        unsigned bSize, lir::Register* b);
+
+void shiftLeftRR(Context* c, UNUSED unsigned aSize, lir::Register* a,
+            unsigned bSize, lir::Register* b);
+
+void shiftLeftCR(Context* c, unsigned aSize, lir::Constant* a,
+            unsigned bSize, lir::Register* b);
+
+void shiftRightRR(Context* c, UNUSED unsigned aSize, lir::Register* a,
+             unsigned bSize, lir::Register* b);
+
+void shiftRightCR(Context* c, unsigned aSize, lir::Constant* a,
+             unsigned bSize, lir::Register* b);
+
+void unsignedShiftRightRR(Context* c, UNUSED unsigned aSize, lir::Register* a,
+                     unsigned bSize, lir::Register* b);
+
+void unsignedShiftRightCR(Context* c, unsigned aSize UNUSED, lir::Constant* a,
+                     unsigned bSize, lir::Register* b);
+
+void floatSqrtRR(Context* c, unsigned aSize, lir::Register* a,
+            unsigned bSize UNUSED, lir::Register* b);
+
+void floatSqrtMR(Context* c, unsigned aSize, lir::Memory* a,
+            unsigned bSize UNUSED, lir::Register* b);
+
+void floatAddRR(Context* c, unsigned aSize, lir::Register* a,
+           unsigned bSize UNUSED, lir::Register* b);
+
+void floatAddMR(Context* c, unsigned aSize, lir::Memory* a,
+           unsigned bSize UNUSED, lir::Register* b);
+
+void floatSubtractRR(Context* c, unsigned aSize, lir::Register* a,
+                unsigned bSize UNUSED, lir::Register* b);
+
+void floatSubtractMR(Context* c, unsigned aSize, lir::Memory* a,
+                unsigned bSize UNUSED, lir::Register* b);
+
+void floatMultiplyRR(Context* c, unsigned aSize, lir::Register* a,
+                unsigned bSize UNUSED, lir::Register* b);
+
+void floatMultiplyMR(Context* c, unsigned aSize, lir::Memory* a,
+                unsigned bSize UNUSED, lir::Register* b);
+
+void floatDivideRR(Context* c, unsigned aSize, lir::Register* a,
+              unsigned bSize UNUSED, lir::Register* b);
+
+void floatDivideMR(Context* c, unsigned aSize, lir::Memory* a,
+              unsigned bSize UNUSED, lir::Register* b);
+
+void float2FloatRR(Context* c, unsigned aSize, lir::Register* a,
+              unsigned bSize UNUSED, lir::Register* b);
+
+void float2FloatMR(Context* c, unsigned aSize, lir::Memory* a,
+              unsigned bSize UNUSED, lir::Register* b);
+
+void float2IntRR(Context* c, unsigned aSize, lir::Register* a,
+            unsigned bSize, lir::Register* b);
+
+void float2IntMR(Context* c, unsigned aSize, lir::Memory* a,
+            unsigned bSize, lir::Register* b);
+
+void int2FloatRR(Context* c, unsigned aSize, lir::Register* a,
+            unsigned bSize, lir::Register* b);
+
+void int2FloatMR(Context* c, unsigned aSize, lir::Memory* a,
+            unsigned bSize, lir::Register* b);
+
+void floatNegateRR(Context* c, unsigned aSize, lir::Register* a,
+              unsigned bSize UNUSED, lir::Register* b);
+
+void floatAbsoluteRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
+           unsigned bSize UNUSED, lir::Register* b);
+
+void absoluteRR(Context* c, unsigned aSize, lir::Register* a,
+      unsigned bSize UNUSED, lir::Register* b UNUSED);
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_X86_OPERATIONS_H
diff --git a/src/codegen/x86/registers.h b/src/codegen/x86/registers.h
new file mode 100644
index 0000000000..d5f325bca0
--- /dev/null
+++ b/src/codegen/x86/registers.h
@@ -0,0 +1,67 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_X86_REGISTERS_H
+#define AVIAN_CODEGEN_ASSEMBLER_X86_REGISTERS_H
+
+namespace avian {
+namespace codegen {
+namespace x86 {
+
+enum {
+  rax = 0,
+  rcx = 1,
+  rdx = 2,
+  rbx = 3,
+  rsp = 4,
+  rbp = 5,
+  rsi = 6,
+  rdi = 7,
+  r8 = 8,
+  r9 = 9,
+  r10 = 10,
+  r11 = 11,
+  r12 = 12,
+  r13 = 13,
+  r14 = 14,
+  r15 = 15,
+};
+
+enum {
+  xmm0 = r15 + 1,
+  xmm1,
+  xmm2,
+  xmm3,
+  xmm4,
+  xmm5,
+  xmm6,
+  xmm7,
+  xmm8,
+  xmm9,
+  xmm10,
+  xmm11,
+  xmm12,
+  xmm13,
+  xmm14,
+  xmm15,
+};
+
+const int LongJumpRegister = r10;
+
+const unsigned GeneralRegisterMask = vm::TargetBytesPerWord == 4 ? 0x000000ff : 0x0000ffff;
+
+const unsigned FloatRegisterMask = vm::TargetBytesPerWord == 4 ? 0x00ff0000 : 0xffff0000;
+
+
+} // namespace x86
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_X86_REGISTERS_H

From e9be3c4e074664c5691724014521ff703628bec9 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sun, 17 Feb 2013 11:19:07 -0700
Subject: [PATCH 05/22] move arg parser out of bootimage.cpp

---
 makefile                |   2 +-
 src/bootimage.cpp       | 103 ++--------------------------------------
 src/util/arg-parser.cpp |  94 ++++++++++++++++++++++++++++++++++++
 src/util/arg-parser.h   |  46 ++++++++++++++++++
 4 files changed, 145 insertions(+), 100 deletions(-)
 create mode 100644 src/util/arg-parser.cpp
 create mode 100644 src/util/arg-parser.h

diff --git a/makefile b/makefile
index d0d451c9ae..9cbd83b65a 100755
--- a/makefile
+++ b/makefile
@@ -1041,7 +1041,7 @@ ifeq ($(continuations),true)
 	asmflags += -DAVIAN_CONTINUATIONS
 endif
 
-bootimage-generator-sources = $(src)/bootimage.cpp
+bootimage-generator-sources = $(src)/bootimage.cpp $(src)/util/arg-parser.cpp
 ifneq ($(lzma),)
 	bootimage-generator-sources += $(src)/lzma-encode.cpp
 endif
diff --git a/src/bootimage.cpp b/src/bootimage.cpp
index c2879403a2..d32ca69281 100644
--- a/src/bootimage.cpp
+++ b/src/bootimage.cpp
@@ -19,12 +19,16 @@
 #include "binaryToObject/tools.h"
 #include "lzma.h"
 
+#include "util/arg-parser.h"
+#include "util/abort.h"
+
 // since we aren't linking against libstdc++, we must implement this
 // ourselves:
 extern "C" void __cxa_pure_virtual(void) { abort(); }
 
 using namespace vm;
 using namespace avian::tools;
+using namespace avian::util;
 
 namespace {
 
@@ -1714,105 +1718,6 @@ writeBootImage(Thread* t, uintptr_t* arguments)
   return 1;
 }
 
-class Arg;
-
-class ArgParser {
-public:
-  Arg* first;
-  Arg** last;
-
-  ArgParser():
-    first(0),
-    last(&first) {}
-
-  bool parse(int ac, const char** av);
-  void printUsage(const char* exe);
-};
-
-class Arg {
-public:
-  Arg* next;
-  bool required;
-  const char* name;
-  const char* desc;
-
-  const char* value;
-
-  Arg(ArgParser& parser, bool required, const char* name, const char* desc):
-    next(0),
-    required(required),
-    name(name),
-    desc(desc),
-    value(0)
-  {
-    *parser.last = this;
-    parser.last = &next;
-  }
-};
-
-bool ArgParser::parse(int ac, const char** av) {
-  Arg* state = 0;
-
-  for(int i = 1; i < ac; i++) {
-    if(state) {
-      if(state->value) {
-        fprintf(stderr, "duplicate parameter %s: '%s' and '%s'\n", state->name, state->value, av[i]);
-        return false;
-      }
-      state->value = av[i];
-      state = 0;
-    } else {
-      if(av[i][0] != '-') {
-        fprintf(stderr, "expected -parameter\n");
-        return false;
-      }
-      bool found = false;
-      for(Arg* arg = first; arg; arg = arg->next) {
-        if(::strcmp(arg->name, &av[i][1]) == 0) {
-          found = true;
-          if (arg->desc == 0) {
-            arg->value = "true";
-          } else {
-            state = arg;
-          }
-        }
-      }
-      if (not found) {
-        fprintf(stderr, "unrecognized parameter %s\n", av[i]);
-        return false;
-      }
-    }
-  }
-
-  if(state) {
-    fprintf(stderr, "expected argument after -%s\n", state->name);
-    return false;
-  }
-
-  for(Arg* arg = first; arg; arg = arg->next) {
-    if(arg->required && !arg->value) {
-      fprintf(stderr, "expected value for %s\n", arg->name);
-      return false;
-    }
-  }
-
-  return true;
-}
-
-void ArgParser::printUsage(const char* exe) {
-  fprintf(stderr, "usage:\n%s \\\n", exe);
-  for(Arg* arg = first; arg; arg = arg->next) {
-    const char* lineEnd = arg->next ? " \\" : "";
-    if(arg->required) {
-      fprintf(stderr, "  -%s\t%s%s\n", arg->name, arg->desc, lineEnd);
-    } else if (arg->desc) {
-      fprintf(stderr, "  [-%s\t%s]%s\n", arg->name, arg->desc, lineEnd);
-    } else {
-      fprintf(stderr, "  [-%s]%s\n", arg->name, lineEnd);
-    }
-  }
-}
-
 char*
 myStrndup(const char* src, unsigned length)
 {
diff --git a/src/util/arg-parser.cpp b/src/util/arg-parser.cpp
new file mode 100644
index 0000000000..697bfd4fef
--- /dev/null
+++ b/src/util/arg-parser.cpp
@@ -0,0 +1,94 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "util/arg-parser.h"
+
+namespace avian {
+namespace util {
+
+Arg::Arg(ArgParser& parser, bool required, const char* name, const char* desc):
+  next(0),
+  required(required),
+  name(name),
+  desc(desc),
+  value(0)
+{
+  *parser.last = this;
+  parser.last = &next;
+}
+
+bool ArgParser::parse(int ac, const char** av) {
+  Arg* state = 0;
+
+  for(int i = 1; i < ac; i++) {
+    if(state) {
+      if(state->value) {
+        fprintf(stderr, "duplicate parameter %s: '%s' and '%s'\n", state->name, state->value, av[i]);
+        return false;
+      }
+      state->value = av[i];
+      state = 0;
+    } else {
+      if(av[i][0] != '-') {
+        fprintf(stderr, "expected -parameter\n");
+        return false;
+      }
+      bool found = false;
+      for(Arg* arg = first; arg; arg = arg->next) {
+        if(strcmp(arg->name, &av[i][1]) == 0) {
+          found = true;
+          if (arg->desc == 0) {
+            arg->value = "true";
+          } else {
+            state = arg;
+          }
+        }
+      }
+      if (not found) {
+        fprintf(stderr, "unrecognized parameter %s\n", av[i]);
+        return false;
+      }
+    }
+  }
+
+  if(state) {
+    fprintf(stderr, "expected argument after -%s\n", state->name);
+    return false;
+  }
+
+  for(Arg* arg = first; arg; arg = arg->next) {
+    if(arg->required && !arg->value) {
+      fprintf(stderr, "expected value for %s\n", arg->name);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void ArgParser::printUsage(const char* exe) {
+  fprintf(stderr, "usage:\n%s \\\n", exe);
+  for(Arg* arg = first; arg; arg = arg->next) {
+    const char* lineEnd = arg->next ? " \\" : "";
+    if(arg->required) {
+      fprintf(stderr, "  -%s\t%s%s\n", arg->name, arg->desc, lineEnd);
+    } else if (arg->desc) {
+      fprintf(stderr, "  [-%s\t%s]%s\n", arg->name, arg->desc, lineEnd);
+    } else {
+      fprintf(stderr, "  [-%s]%s\n", arg->name, lineEnd);
+    }
+  }
+}
+
+} // namespace util
+} // namespace avian
diff --git a/src/util/arg-parser.h b/src/util/arg-parser.h
new file mode 100644
index 0000000000..bba913c4d9
--- /dev/null
+++ b/src/util/arg-parser.h
@@ -0,0 +1,46 @@
+/* Copyright (c) 2008-2011, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_UTIL_ARG_PARSER_H
+#define AVIAN_UTIL_ARG_PARSER_H
+
+namespace avian {
+namespace util {
+
+class Arg;
+
+class ArgParser {
+public:
+  Arg* first;
+  Arg** last;
+
+  ArgParser();
+
+  bool parse(int ac, const char** av);
+  void printUsage(const char* exe);
+};
+
+class Arg {
+public:
+  Arg* next;
+  bool required;
+  const char* name;
+  const char* desc;
+
+  const char* value;
+
+  Arg(ArgParser& parser, bool required, const char* name, const char* desc);
+};
+
+
+} // namespace avian
+} // namespace util
+
+#endif // AVIAN_UTIL_ARG_PARSER_H
\ No newline at end of file

From 24c0fab9bf37e77e1fdad2122783dee332c9a9ef Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sun, 17 Feb 2013 12:10:18 -0700
Subject: [PATCH 06/22] add unit tests for arg parser

---
 makefile                          |  3 +-
 src/util/arg-parser.cpp           |  4 ++
 test/test.sh                      |  1 +
 unittest/test-harness.h           | 10 +++++
 unittest/util/arg-parser-test.cpp | 69 +++++++++++++++++++++++++++++++
 5 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 unittest/util/arg-parser-test.cpp

diff --git a/makefile b/makefile
index 9cbd83b65a..5d1178f77f 100755
--- a/makefile
+++ b/makefile
@@ -1214,6 +1214,7 @@ test-extra-dep = $(test-build)-extra.dep
 
 unittest-sources = \
 	$(wildcard $(unittest)/*.cpp) \
+	$(wildcard $(unittest)/util/*.cpp) \
 	$(wildcard $(unittest)/codegen/*.cpp)
 
 unittest-depends = \
@@ -1583,7 +1584,7 @@ executable-objects = $(vm-objects) $(classpath-objects) $(driver-object) \
 	$(vm-heapwalk-objects) $(boot-object) $(vm-classpath-objects) \
 	$(javahome-object) $(boot-javahome-object) $(lzma-decode-objects)
 
-unittest-executable-objects = $(unittest-objects) $(vm-objects)
+unittest-executable-objects = $(unittest-objects) $(vm-objects) $(build)/util/arg-parser.o
 
 ifeq ($(process),interpret)
 	unittest-executable-objects += $(all-codegen-target-objects)
diff --git a/src/util/arg-parser.cpp b/src/util/arg-parser.cpp
index 697bfd4fef..30e50cadfb 100644
--- a/src/util/arg-parser.cpp
+++ b/src/util/arg-parser.cpp
@@ -27,6 +27,10 @@ Arg::Arg(ArgParser& parser, bool required, const char* name, const char* desc):
   parser.last = &next;
 }
 
+ArgParser::ArgParser():
+  first(0),
+  last(&first) {}
+
 bool ArgParser::parse(int ac, const char** av) {
   Arg* state = 0;
 
diff --git a/test/test.sh b/test/test.sh
index e30e90c1cb..f5de5ff81c 100644
--- a/test/test.sh
+++ b/test/test.sh
@@ -20,6 +20,7 @@ printf "%12s------- Unit tests -------\n" ""
 ${unit_tester} 2>>${log}
 if [ "${?}" != "0" ]; then
   trouble=1
+  echo "unit tests failed!"
 fi
 
 echo
diff --git a/unittest/test-harness.h b/unittest/test-harness.h
index 717cfc317f..d9a5ed81a7 100644
--- a/unittest/test-harness.h
+++ b/unittest/test-harness.h
@@ -11,6 +11,8 @@
 #ifndef TEST_HARNESS_H
 #define TEST_HARNESS_H
 
+#include <stdio.h>
+
 class Test {
 private:
   Test* next;
@@ -53,6 +55,14 @@ protected:
     }
     runs++;
   }
+
+  void assertEqual(const char* expected, const char* actual) {
+    if((expected == 0 && actual != 0) || (expected != 0 && actual == 0) || strcmp(expected, actual) != 0) {
+      fprintf(stderr, "assertion failure, expected: \"%s\", actual: \"%s\"\n", expected, actual);
+      failures++;
+    }
+    runs++;
+  }
   
   template<class T>
   void assertNotEqual(T expected, T actual) {
diff --git a/unittest/util/arg-parser-test.cpp b/unittest/util/arg-parser-test.cpp
new file mode 100644
index 0000000000..bd93a04c99
--- /dev/null
+++ b/unittest/util/arg-parser-test.cpp
@@ -0,0 +1,69 @@
+/* Copyright (c) 2008-2011, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+
+#include <stdio.h>
+
+#include "common.h"
+
+#include "util/arg-parser.h"
+
+#include "test-harness.h"
+
+using namespace avian::util;
+
+class ArgParserTest : public Test {
+public:
+  ArgParserTest():
+    Test("ArgParser")
+  {}
+
+  virtual void run() {
+    {
+      ArgParser parser;
+      Arg arg1(parser, false, "arg1", "<value>");
+      Arg required2(parser, true, "required2", "<value>");
+      const char* args[] = {
+        "myExecutable",
+        "-arg1", "myValue1",
+        "-required2", "myRequired2",
+        0
+      };
+      assertTrue(parser.parse(sizeof(args) / sizeof(char*) - 1, args));
+      assertEqual("myValue1", arg1.value);
+      assertEqual("myRequired2", required2.value);
+    }
+
+    {
+      ArgParser parser;
+      Arg arg1(parser, false, "arg1", "<value>");
+      Arg required2(parser, true, "required2", "<value>");
+      const char* args[] = {
+        "myExecutable",
+        "-arg1", "myValue1",
+        "-required2",
+        0
+      };
+      assertFalse(parser.parse(sizeof(args) / sizeof(char*) - 1, args));
+    }
+
+    {
+      ArgParser parser;
+      Arg arg1(parser, false, "arg1", "<value>");
+      Arg required2(parser, true, "required2", "<value>");
+      const char* args[] = {
+        "myExecutable",
+        "-arg1", "myValue1",
+        0
+      };
+      assertFalse(parser.parse(sizeof(args) / sizeof(char*) - 1, args));
+    }
+  }
+} argParserTest;
\ No newline at end of file

From 46029939d375ae1605bcae2fae2da90c89db220c Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Mon, 18 Feb 2013 07:50:37 -0700
Subject: [PATCH 07/22] begin work on audit-codegen

---
 makefile                         | 92 ++++++++++++++++++++------------
 src/tools/audit-codegen/main.cpp | 63 ++++++++++++++++++++++
 src/util/arg-parser.cpp          |  2 +-
 src/util/arg-parser.h            |  2 +-
 4 files changed, 123 insertions(+), 36 deletions(-)
 create mode 100644 src/tools/audit-codegen/main.cpp

diff --git a/makefile b/makefile
index 5d1178f77f..bed84761d5 100755
--- a/makefile
+++ b/makefile
@@ -994,6 +994,8 @@ all-assembler-sources = \
 native-assembler-sources = $($(target-asm)-assembler-sources)
 native-assembler-objects = $($(target-asm)-assembler-objects)
 
+audit-codegen-sources = $(wildcard $(src)/tools/audit-codegen/*.cpp)
+
 all-codegen-target-sources = \
 	$(compiler-sources) \
 	$(native-assembler-sources)
@@ -1154,6 +1156,7 @@ dynamic-library = $(build)/$(so-prefix)jvm$(so-suffix)
 executable-dynamic = $(build)/$(name)-dynamic$(exe-suffix)
 
 unittest-executable = $(build)/$(name)-unittest${exe-suffix}
+audit-codegen-executable = $(build)/audit-codegen${exe-suffix}
 
 ifneq ($(classpath),avian)
 # Assembler, ConstantPool, and Stream are not technically needed for a
@@ -1304,6 +1307,14 @@ else
 	ssh -p$(remote-test-port) $(remote-test-user)@$(remote-test-host) sh "$(remote-test-dir)/$(platform)-$(arch)$(options)/run-tests.sh"
 endif
 
+PHONY: audit-baseline
+audit-baseline: $(audit-codegen-executable)
+	$(<) -output $(build)/codegen-audit-output/baseline.o -format macho
+
+PHONY: audit
+audit: $(audit-codegen-executable)
+	$(<) -output $(build)/codegen-audit-output/baseline.o -format macho
+
 .PHONY: tarball
 tarball:
 	@echo "creating build/avian-$(version).tar.bz2"
@@ -1412,6 +1423,9 @@ endif
 $(unittest-objects): $(build)/unittest/%.o: $(unittest)/%.cpp $(vm-depends) $(unittest-depends)
 	$(compile-unittest-object)
 
+$(build)/tools/audit-codegen/main.o: $(build)/%.o: $(src)/%.cpp $(vm-depends)
+	$(compile-object)
+
 $(test-cpp-objects): $(test-build)/%.o: $(test)/%.cpp $(vm-depends)
 	$(compile-object)
 
@@ -1590,43 +1604,53 @@ ifeq ($(process),interpret)
 	unittest-executable-objects += $(all-codegen-target-objects)
 endif
 
-$(executable): $(executable-objects)
-	@echo "linking $(@)"
-ifeq ($(platform),windows)
-ifdef ms_cl_compiler
-	$(ld) $(lflags) $(executable-objects) -out:$(@) \
-		-debug -PDB:$(subst $(exe-suffix),.pdb,$(@)) $(manifest-flags)
-ifdef mt
-	$(mt) -nologo -manifest $(@).manifest -outputresource:"$(@);1"
-endif
-else
-	$(dlltool) -z $(@).def $(executable-objects)
-	$(dlltool) -d $(@).def -e $(@).exp
-	$(ld) $(@).exp $(executable-objects) $(lflags) -o $(@)
-endif
-else
-	$(ld) $(executable-objects) $(rdynamic) $(lflags) $(bootimage-lflags) -o $(@)
-endif
-	$(strip) $(strip-all) $(@)
+audit-codegen-objects = $(call cpp-objects,$(audit-codegen-sources),$(src),$(build))
+audit-codegen-executable-objects = $(audit-codegen-objects) $(vm-objects) $(build)/util/arg-parser.o
 
+.PHONY: print
+print:
+	@echo $(audit-codegen-objects)
+
+# apparently, make does poorly with ifs inside of defines, and indented defines.
+# I suggest re-indenting the following before making edits (and unindenting afterwards):
+ifneq ($(platform),windows)
+define link-executable
+	@echo linking $(@)
+	$(ld) $(^) $(rdynamic) $(lflags) $(bootimage-lflags) -o $(@)
+endef
+else
+ifdef ms_cl_compiler
+ifdef mt
+define link-executable
+	@echo linking $(@)
+	$(ld) $(lflags) $(^) -out:$(@) \
+		-debug -PDB:$(subst $(exe-suffix),.pdb,$(@)) $(manifest-flags)
+	$(mt) -nologo -manifest $(@).manifest -outputresource:"$(@);1"
+endef
+else
+define link-executable
+	@echo linking $(@)
+	$(mt) -nologo -manifest $(@).manifest -outputresource:"$(@);1"
+endef
+endif
+else
+define link-executable
+	@echo linking $(@)
+	$(dlltool) -z $(@).def $(^)
+	$(dlltool) -d $(@).def -e $(@).exp
+	$(ld) $(@).exp $(^) $(lflags) -o $(@)
+endef
+endif
+endif
+
+$(executable): $(executable-objects)
+	$(link-executable)
 
 $(unittest-executable): $(unittest-executable-objects)
-	@echo "linking $(@)"
-ifeq ($(platform),windows)
-ifdef ms_cl_compiler
-	$(ld) $(lflags) $(unittest-executable-objects) -out:$(@) \
-		-debug -PDB:$(subst $(exe-suffix),.pdb,$(@)) $(manifest-flags)
-ifdef mt
-	$(mt) -nologo -manifest $(@).manifest -outputresource:"$(@);1"
-endif
-else
-	$(dlltool) -z $(@).def $(unittest-executable-objects)
-	$(dlltool) -d $(@).def -e $(@).exp
-	$(ld) $(@).exp $(unittest-executable-objects) $(lflags) -o $(@)
-endif
-else
-	$(ld) $(unittest-executable-objects) $(rdynamic) $(lflags) $(bootimage-lflags) -o $(@)
-endif
+	$(link-executable)
+
+$(audit-codegen-executable): $(audit-codegen-executable-objects)
+	$(link-executable)
 
 $(bootimage-generator): $(bootimage-generator-objects)
 	echo building $(bootimage-generator) arch=$(build-arch) platform=$(bootimage-platform)
diff --git a/src/tools/audit-codegen/main.cpp b/src/tools/audit-codegen/main.cpp
new file mode 100644
index 0000000000..9fc8d10119
--- /dev/null
+++ b/src/tools/audit-codegen/main.cpp
@@ -0,0 +1,63 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "system.h"
+
+#include "util/arg-parser.h"
+
+#include "codegen/lir.h"
+#include "codegen/assembler.h"
+#include "codegen/targets.h"
+
+// since we aren't linking against libstdc++, we must implement this
+// ourselves:
+extern "C" void __cxa_pure_virtual(void) { abort(); }
+
+using namespace avian::codegen;
+using namespace avian::util;
+
+void generateCode(Assembler::Architecture* arch) {
+  for()
+}
+
+class Arguments {
+public:
+  const char* output;
+  const char* outputFormat;
+
+  Arguments(int argc, char** argv) {
+    ArgParser parser;
+    Arg out(parser, true, "output", "<output object file>");
+    Arg format(parser, true, "format", "<format of output object file>");
+
+    if(!parser.parse(argc, argv)) {
+      exit(1);
+    }
+
+    output = out.value;
+    outputFormat = format.value;
+
+    // TODO: sanitize format values
+  }
+};
+
+int main(int argc, char** argv) {
+  Arguments args(argc, argv);
+
+  vm::System* s = vm::makeSystem(0);
+  Assembler::Architecture* arch = makeArchitectureNative(s, true);
+  arch->acquire();
+
+  generateCode(arch);
+
+  arch->release();
+  s->dispose();
+  return 0;
+}
\ No newline at end of file
diff --git a/src/util/arg-parser.cpp b/src/util/arg-parser.cpp
index 30e50cadfb..ac652563be 100644
--- a/src/util/arg-parser.cpp
+++ b/src/util/arg-parser.cpp
@@ -31,7 +31,7 @@ ArgParser::ArgParser():
   first(0),
   last(&first) {}
 
-bool ArgParser::parse(int ac, const char** av) {
+bool ArgParser::parse(int ac, const char* const* av) {
   Arg* state = 0;
 
   for(int i = 1; i < ac; i++) {
diff --git a/src/util/arg-parser.h b/src/util/arg-parser.h
index bba913c4d9..1f887764f2 100644
--- a/src/util/arg-parser.h
+++ b/src/util/arg-parser.h
@@ -23,7 +23,7 @@ public:
 
   ArgParser();
 
-  bool parse(int ac, const char** av);
+  bool parse(int ac, const char* const* av);
   void printUsage(const char* exe);
 };
 

From aaa076f1dfbf7d289ea3d9e75f054b279622d83c Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Thu, 21 Feb 2013 20:14:17 -0700
Subject: [PATCH 08/22] move arg-parser.h to include

---
 {src => include/avian}/util/arg-parser.h | 0
 src/tools/audit-codegen/main.cpp         | 2 +-
 src/tools/bootimage-generator/main.cpp   | 2 +-
 src/util/arg-parser.cpp                  | 2 +-
 unittest/util/arg-parser-test.cpp        | 2 +-
 5 files changed, 4 insertions(+), 4 deletions(-)
 rename {src => include/avian}/util/arg-parser.h (100%)

diff --git a/src/util/arg-parser.h b/include/avian/util/arg-parser.h
similarity index 100%
rename from src/util/arg-parser.h
rename to include/avian/util/arg-parser.h
diff --git a/src/tools/audit-codegen/main.cpp b/src/tools/audit-codegen/main.cpp
index 9fc8d10119..77f0b36e8a 100644
--- a/src/tools/audit-codegen/main.cpp
+++ b/src/tools/audit-codegen/main.cpp
@@ -10,7 +10,7 @@
 
 #include "system.h"
 
-#include "util/arg-parser.h"
+#include <avian/util/arg-parser.h>
 
 #include "codegen/lir.h"
 #include "codegen/assembler.h"
diff --git a/src/tools/bootimage-generator/main.cpp b/src/tools/bootimage-generator/main.cpp
index 7688613913..6f27cb5d70 100644
--- a/src/tools/bootimage-generator/main.cpp
+++ b/src/tools/bootimage-generator/main.cpp
@@ -21,7 +21,7 @@
 #include <avian/util/runtime-array.h>
 #include "lzma.h"
 
-#include "util/arg-parser.h"
+#include <avian/util/arg-parser.h>
 #include "util/abort.h"
 
 // since we aren't linking against libstdc++, we must implement this
diff --git a/src/util/arg-parser.cpp b/src/util/arg-parser.cpp
index ac652563be..cefc221532 100644
--- a/src/util/arg-parser.cpp
+++ b/src/util/arg-parser.cpp
@@ -11,7 +11,7 @@
 #include <stdio.h>
 #include <string.h>
 
-#include "util/arg-parser.h"
+#include <avian/util/arg-parser.h>
 
 namespace avian {
 namespace util {
diff --git a/unittest/util/arg-parser-test.cpp b/unittest/util/arg-parser-test.cpp
index bd93a04c99..799acba040 100644
--- a/unittest/util/arg-parser-test.cpp
+++ b/unittest/util/arg-parser-test.cpp
@@ -13,7 +13,7 @@
 
 #include "common.h"
 
-#include "util/arg-parser.h"
+#include <avian/util/arg-parser.h>
 
 #include "test-harness.h"
 

From d1a149a0a1e98d99dc521a7f86cc597eb47cb9bb Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Thu, 21 Feb 2013 21:57:53 -0700
Subject: [PATCH 09/22] audit-codegen prototype working

---
 include/avian/vm/codegen/registers.h | 22 ++++++++
 src/tools/audit-codegen/main.cpp     | 77 +++++++++++++++++++++++-----
 unittest/codegen/registers-test.cpp  | 44 ++++++++++++++++
 3 files changed, 131 insertions(+), 12 deletions(-)
 create mode 100644 unittest/codegen/registers-test.cpp

diff --git a/include/avian/vm/codegen/registers.h b/include/avian/vm/codegen/registers.h
index 317bb2215a..32b005d61a 100644
--- a/include/avian/vm/codegen/registers.h
+++ b/include/avian/vm/codegen/registers.h
@@ -45,6 +45,28 @@ public:
   { }
 };
 
+class RegisterIterator {
+public:
+  int index;
+  const RegisterMask& mask;
+
+  inline RegisterIterator(const RegisterMask& mask):
+    index(mask.start),
+    mask(mask) {}
+
+  inline bool hasNext() {
+    return index < mask.limit;
+  }
+
+  inline int next() {
+    int r = index;
+    do {
+      index++;
+    } while(index < mask.limit && !(mask.mask & (1 << index)));
+    return r;
+  }
+};
+
 } // namespace codegen
 } // namespace avian
 
diff --git a/src/tools/audit-codegen/main.cpp b/src/tools/audit-codegen/main.cpp
index 77f0b36e8a..d201cd43a4 100644
--- a/src/tools/audit-codegen/main.cpp
+++ b/src/tools/audit-codegen/main.cpp
@@ -8,23 +8,80 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
-#include "system.h"
+#include <avian/vm/system/system.h>
 
 #include <avian/util/arg-parser.h>
 
-#include "codegen/lir.h"
-#include "codegen/assembler.h"
-#include "codegen/targets.h"
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/targets.h>
+#include <avian/vm/codegen/registers.h>
+
+#include <avian/vm/heap/heap.h>
 
 // since we aren't linking against libstdc++, we must implement this
 // ourselves:
 extern "C" void __cxa_pure_virtual(void) { abort(); }
 
+using namespace vm;
 using namespace avian::codegen;
 using namespace avian::util;
 
-void generateCode(Assembler::Architecture* arch) {
-  for()
+class BasicEnv {
+public:
+  System* s;
+  Heap* heap;
+  Assembler::Architecture* arch;
+
+  BasicEnv():
+    s(makeSystem(0)),
+    heap(makeHeap(s, 32 * 1024)),
+    arch(makeArchitectureNative(s, true))
+  {
+    arch->acquire();
+  }
+
+  ~BasicEnv() {
+    arch->release();
+    s->dispose();
+  }
+};
+
+class Asm {
+public:
+  Zone zone;
+  Assembler* a;
+
+  Asm(BasicEnv& env):
+    zone(env.s, env.heap, 8192),
+    a(env.arch->makeAssembler(env.heap, &zone))
+  { }
+
+  ~Asm() {
+    a->dispose();
+  }
+};
+
+void generateCode(BasicEnv& env) {
+  Asm a(env);
+  for(RegisterIterator it(env.arch->registerFile()->generalRegisters); it.hasNext(); ) {
+    int r = it.next();
+    lir::Register reg(r);
+    a.a->apply(lir::Add,
+      OperandInfo(4, lir::RegisterOperand, &reg),
+      OperandInfo(4, lir::RegisterOperand, &reg),
+      OperandInfo(4, lir::RegisterOperand, &reg));
+  }
+  unsigned length = a.a->endBlock(false)->resolve(0, 0);
+  printf("length: %d\n", length);
+  uint8_t* data = static_cast<uint8_t*>(env.s->tryAllocate(length));
+  a.a->setDestination(data);
+  a.a->write();
+  for(unsigned i = 0; i < length; i++) {
+    printf("%02x ", data[i]);
+  }
+  printf("\n");
+  env.s->free(data);
 }
 
 class Arguments {
@@ -51,13 +108,9 @@ public:
 int main(int argc, char** argv) {
   Arguments args(argc, argv);
 
-  vm::System* s = vm::makeSystem(0);
-  Assembler::Architecture* arch = makeArchitectureNative(s, true);
-  arch->acquire();
+  BasicEnv env;
 
-  generateCode(arch);
+  generateCode(env);
 
-  arch->release();
-  s->dispose();
   return 0;
 }
\ No newline at end of file
diff --git a/unittest/codegen/registers-test.cpp b/unittest/codegen/registers-test.cpp
new file mode 100644
index 0000000000..946e45ebab
--- /dev/null
+++ b/unittest/codegen/registers-test.cpp
@@ -0,0 +1,44 @@
+/* Copyright (c) 2008-2011, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include <stdio.h>
+
+#include <avian/vm/codegen/registers.h>
+
+#include "test-harness.h"
+
+
+using namespace avian::codegen;
+using namespace vm;
+
+
+class RegisterIteratorTest : public Test {
+public:
+  RegisterIteratorTest():
+    Test("RegisterIterator")
+  {}
+
+  virtual void run() {
+    RegisterMask regs(0x55);
+    assertEqual<unsigned>(0, regs.start);
+    assertEqual<unsigned>(7, regs.limit);
+
+    RegisterIterator it(regs);
+    assertTrue(it.hasNext());
+    assertEqual<unsigned>(0, it.next());
+    assertTrue(it.hasNext());
+    assertEqual<unsigned>(2, it.next());
+    assertTrue(it.hasNext());
+    assertEqual<unsigned>(4, it.next());
+    assertTrue(it.hasNext());
+    assertEqual<unsigned>(6, it.next());
+    assertFalse(it.hasNext());
+  }
+} registerIteratorTest;

From fd59e1e08d50ab05a830a1a30f598aed1822edf8 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Fri, 22 Feb 2013 20:47:56 -0700
Subject: [PATCH 10/22] begin splitting out arm assembler

---
 src/codegen/arm/assembler.cpp | 147 ++++++----------------------------
 src/codegen/arm/block.cpp     |  39 +++++++++
 src/codegen/arm/block.h       |  47 +++++++++++
 src/codegen/arm/context.cpp   |  27 +++++++
 src/codegen/arm/context.h     | 108 +++++++++++++++++++++++++
 5 files changed, 244 insertions(+), 124 deletions(-)
 create mode 100644 src/codegen/arm/block.cpp
 create mode 100644 src/codegen/arm/block.h
 create mode 100644 src/codegen/arm/context.cpp
 create mode 100644 src/codegen/arm/context.h

diff --git a/src/codegen/arm/assembler.cpp b/src/codegen/arm/assembler.cpp
index 5053654610..c4b4ca7dfe 100644
--- a/src/codegen/arm/assembler.cpp
+++ b/src/codegen/arm/assembler.cpp
@@ -8,13 +8,17 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
+#include <avian/util/runtime-array.h>
+
 #include <avian/vm/codegen/assembler.h>
 #include <avian/vm/codegen/registers.h>
 
+#include "context.h"
+#include "block.h"
+
 #include "alloc-vector.h"
 #include <avian/util/abort.h>
 
-#include <avian/util/runtime-array.h>
 
 #define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
 #define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
@@ -25,7 +29,8 @@ using namespace vm;
 using namespace avian::codegen;
 using namespace avian::util;
 
-namespace local {
+namespace avian {
+namespace codegen {
 
 namespace isa {
 // SYSTEM REGISTERS
@@ -196,6 +201,8 @@ bool vfpSupported() {
 }
 }
 
+namespace arm {
+
 const uint64_t MASK_LO32 = 0xffffffff;
 const unsigned MASK_LO16 = 0xffff;
 const unsigned MASK_LO8  = 0xff;
@@ -239,8 +246,6 @@ const int32_t PoolOffsetMask = 0xFFF;
 
 const bool DebugPool = false;
 
-class Context;
-class MyBlock;
 class PoolOffset;
 class PoolEvent;
 
@@ -250,113 +255,12 @@ resolve(MyBlock*);
 unsigned
 padding(MyBlock*, unsigned);
 
-class MyBlock: public Assembler::Block {
- public:
-  MyBlock(Context* context, unsigned offset):
-    context(context), next(0), poolOffsetHead(0), poolOffsetTail(0),
-    lastPoolOffsetTail(0), poolEventHead(0), poolEventTail(0),
-    lastEventOffset(0), offset(offset), start(~0), size(0)
-  { }
-
-  virtual unsigned resolve(unsigned start, Assembler::Block* next) {
-    this->start = start;
-    this->next = static_cast<MyBlock*>(next);
-
-    local::resolve(this);
-
-    return start + size + padding(this, size);
-  }
-
-  Context* context;
-  MyBlock* next;
-  PoolOffset* poolOffsetHead;
-  PoolOffset* poolOffsetTail;
-  PoolOffset* lastPoolOffsetTail;
-  PoolEvent* poolEventHead;
-  PoolEvent* poolEventTail;
-  unsigned lastEventOffset;
-  unsigned offset;
-  unsigned start;
-  unsigned size;
-};
-
 class Task;
 class ConstantPoolEntry;
 
-class Context {
+class OffsetPromise: public Promise {
  public:
-  Context(System* s, Allocator* a, Zone* zone):
-    s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0),
-    firstBlock(new(zone) MyBlock(this, 0)),
-    lastBlock(firstBlock), poolOffsetHead(0), poolOffsetTail(0),
-    constantPool(0), constantPoolCount(0)
-  { }
-
-  System* s;
-  Zone* zone;
-  Assembler::Client* client;
-  Vector code;
-  Task* tasks;
-  uint8_t* result;
-  MyBlock* firstBlock;
-  MyBlock* lastBlock;
-  PoolOffset* poolOffsetHead;
-  PoolOffset* poolOffsetTail;
-  ConstantPoolEntry* constantPool;
-  unsigned constantPoolCount;
-};
-
-class Task {
- public:
-  Task(Task* next): next(next) { }
-
-  virtual void run(Context* con) = 0;
-
-  Task* next;
-};
-
-typedef void (*OperationType)(Context*);
-
-typedef void (*UnaryOperationType)(Context*, unsigned, lir::Operand*);
-
-typedef void (*BinaryOperationType)
-(Context*, unsigned, lir::Operand*, unsigned, lir::Operand*);
-
-typedef void (*TernaryOperationType)
-(Context*, unsigned, lir::Operand*, lir::Operand*,
- lir::Operand*);
-
-typedef void (*BranchOperationType)
-(Context*, lir::TernaryOperation, unsigned, lir::Operand*,
- lir::Operand*, lir::Operand*);
-
-class ArchitectureContext {
- public:
-  ArchitectureContext(System* s): s(s) { }
-
-  System* s;
-  OperationType operations[lir::OperationCount];
-  UnaryOperationType unaryOperations[lir::UnaryOperationCount
-                                     * lir::OperandTypeCount];
-  BinaryOperationType binaryOperations
-  [lir::BinaryOperationCount * lir::OperandTypeCount * lir::OperandTypeCount];
-  TernaryOperationType ternaryOperations
-  [lir::NonBranchTernaryOperationCount * lir::OperandTypeCount];
-  BranchOperationType branchOperations
-  [lir::BranchOperationCount * lir::OperandTypeCount * lir::OperandTypeCount];
-};
-
-inline Aborter* getAborter(Context* con) {
-  return con->s;
-}
-
-inline Aborter* getAborter(ArchitectureContext* con) {
-  return con->s;
-}
-
-class Offset: public Promise {
- public:
-  Offset(Context* con, MyBlock* block, unsigned offset, bool forTrace):
+  OffsetPromise(Context* con, MyBlock* block, unsigned offset, bool forTrace):
     con(con), block(block), offset(offset), forTrace(forTrace)
   { }
 
@@ -378,10 +282,8 @@ class Offset: public Promise {
   bool forTrace;
 };
 
-Promise*
-offset(Context* con, bool forTrace = false)
-{
-  return new(con->zone) Offset(con, con->lastBlock, con->code.length(), forTrace);
+Promise* offsetPromise(Context* con, bool forTrace = false) {
+  return new(con->zone) OffsetPromise(con, con->lastBlock, con->code.length(), forTrace);
 }
 
 bool
@@ -1626,7 +1528,7 @@ branch(Context* con, lir::TernaryOperation op)
 void
 conditional(Context* con, int32_t branch, lir::Constant* target)
 {
-  appendOffsetTask(con, target->value, offset(con));
+  appendOffsetTask(con, target->value, offsetPromise(con));
   emit(con, branch);
 }
 
@@ -1845,7 +1747,7 @@ callC(Context* con, unsigned size UNUSED, lir::Constant* target)
 {
   assert(con, size == TargetBytesPerWord);
 
-  appendOffsetTask(con, target->value, offset(con));
+  appendOffsetTask(con, target->value, offsetPromise(con));
   emit(con, bl(0));
 }
 
@@ -1855,7 +1757,7 @@ longCallC(Context* con, unsigned size UNUSED, lir::Constant* target)
   assert(con, size == TargetBytesPerWord);
 
   lir::Register tmp(4);
-  moveCR2(con, TargetBytesPerWord, target, &tmp, offset(con));
+  moveCR2(con, TargetBytesPerWord, target, &tmp, offsetPromise(con));
   callR(con, TargetBytesPerWord, &tmp);
 }
 
@@ -1865,7 +1767,7 @@ longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
   assert(con, size == TargetBytesPerWord);
 
   lir::Register tmp(4); // a non-arg reg that we don't mind clobbering
-  moveCR2(con, TargetBytesPerWord, target, &tmp, offset(con));
+  moveCR2(con, TargetBytesPerWord, target, &tmp, offsetPromise(con));
   jumpR(con, TargetBytesPerWord, &tmp);
 }
 
@@ -1874,7 +1776,7 @@ jumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
 {
   assert(con, size == TargetBytesPerWord);
 
-  appendOffsetTask(con, target->value, offset(con));
+  appendOffsetTask(con, target->value, offsetPromise(con));
   emit(con, b(0));
 }
 
@@ -2120,7 +2022,7 @@ class MyArchitecture: public Assembler::Architecture {
   }
 
   virtual unsigned argumentFootprint(unsigned footprint) {
-    return local::argumentFootprint(footprint);
+    return arm::argumentFootprint(footprint);
   }
 
   virtual bool argumentAlignment() {
@@ -2209,7 +2111,7 @@ class MyArchitecture: public Assembler::Architecture {
                          unsigned targetParameterFootprint, void** ip,
                          void** stack)
   {
-    local::nextFrame(&con, static_cast<uint32_t*>(start), size, footprint, link,
+    arm::nextFrame(&con, static_cast<uint32_t*>(start), size, footprint, link,
                 mostRecent, targetParameterFootprint, ip, stack);
   }
 
@@ -2796,7 +2698,7 @@ class MyAssembler: public Assembler {
   }
 
   virtual Promise* offset(bool forTrace) {
-    return local::offset(&con, forTrace);
+    return arm::offsetPromise(&con, forTrace);
   }
 
   virtual Block* endBlock(bool startNew) {
@@ -2864,15 +2766,12 @@ Assembler* MyArchitecture::makeAssembler(Allocator* allocator, Zone* zone) {
   return new(zone) MyAssembler(this->con.s, allocator, zone, this);
 }
 
-} // namespace
-
-namespace avian {
-namespace codegen {
+} // namespace arm
 
 Assembler::Architecture*
 makeArchitectureArm(System* system, bool)
 {
-  return new (allocate(system, sizeof(local::MyArchitecture))) local::MyArchitecture(system);
+  return new (allocate(system, sizeof(arm::MyArchitecture))) arm::MyArchitecture(system);
 }
 
 } // namespace codegen
diff --git a/src/codegen/arm/block.cpp b/src/codegen/arm/block.cpp
new file mode 100644
index 0000000000..7216e86f55
--- /dev/null
+++ b/src/codegen/arm/block.cpp
@@ -0,0 +1,39 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "block.h"
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+void resolve(MyBlock*);
+
+unsigned padding(MyBlock*, unsigned);
+
+MyBlock::MyBlock(Context* context, unsigned offset):
+  context(context), next(0), poolOffsetHead(0), poolOffsetTail(0),
+  lastPoolOffsetTail(0), poolEventHead(0), poolEventTail(0),
+  lastEventOffset(0), offset(offset), start(~0), size(0)
+{ }
+
+unsigned MyBlock::resolve(unsigned start, Assembler::Block* next) {
+  this->start = start;
+  this->next = static_cast<MyBlock*>(next);
+
+  arm::resolve(this);
+
+  return start + size + padding(this, size);
+}
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/arm/block.h b/src/codegen/arm/block.h
new file mode 100644
index 0000000000..42f3cceaa3
--- /dev/null
+++ b/src/codegen/arm/block.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_BLOCK_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_BLOCK_H
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+#include "alloc-vector.h"
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+class PoolEvent;
+
+class MyBlock: public Assembler::Block {
+ public:
+  MyBlock(Context* context, unsigned offset);
+
+  virtual unsigned resolve(unsigned start, Assembler::Block* next);
+
+  Context* context;
+  MyBlock* next;
+  PoolOffset* poolOffsetHead;
+  PoolOffset* poolOffsetTail;
+  PoolOffset* lastPoolOffsetTail;
+  PoolEvent* poolEventHead;
+  PoolEvent* poolEventTail;
+  unsigned lastEventOffset;
+  unsigned offset;
+  unsigned start;
+  unsigned size;
+};
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_BLOCK_H
diff --git a/src/codegen/arm/context.cpp b/src/codegen/arm/context.cpp
new file mode 100644
index 0000000000..d3619adf85
--- /dev/null
+++ b/src/codegen/arm/context.cpp
@@ -0,0 +1,27 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "block.h"
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+Context::Context(vm::System* s, vm::Allocator* a, vm::Zone* zone):
+  s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0),
+  firstBlock(new(zone) MyBlock(this, 0)),
+  lastBlock(firstBlock), poolOffsetHead(0), poolOffsetTail(0),
+  constantPool(0), constantPoolCount(0)
+{ }
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/arm/context.h b/src/codegen/arm/context.h
new file mode 100644
index 0000000000..ccba7e403d
--- /dev/null
+++ b/src/codegen/arm/context.h
@@ -0,0 +1,108 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_CONTEXT_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_CONTEXT_H
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+#include "alloc-vector.h"
+
+namespace vm {
+class System;
+class Allocator;
+class Zone;
+} // namespace vm
+
+namespace avian {
+
+namespace util {
+class Aborter;
+} // namespace util
+
+namespace codegen {
+namespace arm {
+
+class Task;
+class MyBlock;
+class PoolOffset;
+class ConstantPoolEntry;
+
+class Context {
+ public:
+  Context(vm::System* s, vm::Allocator* a, vm::Zone* zone);
+
+  vm::System* s;
+  vm::Zone* zone;
+  Assembler::Client* client;
+  vm::Vector code;
+  Task* tasks;
+  uint8_t* result;
+  MyBlock* firstBlock;
+  MyBlock* lastBlock;
+  PoolOffset* poolOffsetHead;
+  PoolOffset* poolOffsetTail;
+  ConstantPoolEntry* constantPool;
+  unsigned constantPoolCount;
+};
+
+class Task {
+ public:
+  Task(Task* next): next(next) { }
+
+  virtual void run(Context* con) = 0;
+
+  Task* next;
+};
+
+typedef void (*OperationType)(Context*);
+
+typedef void (*UnaryOperationType)(Context*, unsigned, lir::Operand*);
+
+typedef void (*BinaryOperationType)
+(Context*, unsigned, lir::Operand*, unsigned, lir::Operand*);
+
+typedef void (*TernaryOperationType)
+(Context*, unsigned, lir::Operand*, lir::Operand*,
+ lir::Operand*);
+
+typedef void (*BranchOperationType)
+(Context*, lir::TernaryOperation, unsigned, lir::Operand*,
+ lir::Operand*, lir::Operand*);
+
+class ArchitectureContext {
+ public:
+  ArchitectureContext(vm::System* s): s(s) { }
+
+  vm::System* s;
+  OperationType operations[lir::OperationCount];
+  UnaryOperationType unaryOperations[lir::UnaryOperationCount
+                                     * lir::OperandTypeCount];
+  BinaryOperationType binaryOperations
+  [lir::BinaryOperationCount * lir::OperandTypeCount * lir::OperandTypeCount];
+  TernaryOperationType ternaryOperations
+  [lir::NonBranchTernaryOperationCount * lir::OperandTypeCount];
+  BranchOperationType branchOperations
+  [lir::BranchOperationCount * lir::OperandTypeCount * lir::OperandTypeCount];
+};
+
+inline avian::util::Aborter* getAborter(Context* c) {
+  return c->s;
+}
+
+inline avian::util::Aborter* getAborter(ArchitectureContext* c) {
+  return c->s;
+}
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_CONTEXT_H

From 22d6ed1becde1d1507b6a7c5ad0ca348a9213b38 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Fri, 22 Feb 2013 21:19:53 -0700
Subject: [PATCH 11/22] further split out arm assembler

---
 src/codegen/arm/assembler.cpp   | 1830 +------------------------------
 src/codegen/arm/block.h         |    1 -
 src/codegen/arm/context.h       |    9 -
 src/codegen/arm/encode.h        |  184 ++++
 src/codegen/arm/fixup.cpp       |  175 +++
 src/codegen/arm/fixup.h         |  140 +++
 src/codegen/arm/multimethod.cpp |  142 +++
 src/codegen/arm/multimethod.h   |   46 +
 src/codegen/arm/operations.cpp  | 1235 +++++++++++++++++++++
 src/codegen/arm/operations.h    |  240 ++++
 src/codegen/arm/registers.h     |   52 +
 src/common.h                    |    6 +
 12 files changed, 2227 insertions(+), 1833 deletions(-)
 create mode 100644 src/codegen/arm/encode.h
 create mode 100644 src/codegen/arm/fixup.cpp
 create mode 100644 src/codegen/arm/fixup.h
 create mode 100644 src/codegen/arm/multimethod.cpp
 create mode 100644 src/codegen/arm/multimethod.h
 create mode 100644 src/codegen/arm/operations.cpp
 create mode 100644 src/codegen/arm/operations.h
 create mode 100644 src/codegen/arm/registers.h

diff --git a/src/codegen/arm/assembler.cpp b/src/codegen/arm/assembler.cpp
index c4b4ca7dfe..ac00fdb5cb 100644
--- a/src/codegen/arm/assembler.cpp
+++ b/src/codegen/arm/assembler.cpp
@@ -15,176 +15,24 @@
 
 #include "context.h"
 #include "block.h"
+#include "fixup.h"
+#include "multimethod.h"
+#include "encode.h"
+#include "operations.h"
+#include "registers.h"
 
 #include "alloc-vector.h"
 #include <avian/util/abort.h>
 
-
-#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
-#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
-#define CAST3(x) reinterpret_cast<TernaryOperationType>(x)
-#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
-
 using namespace vm;
 using namespace avian::codegen;
 using namespace avian::util;
 
 namespace avian {
 namespace codegen {
+namespace arm {
 
 namespace isa {
-// SYSTEM REGISTERS
-const int FPSID = 0x0;
-const int FPSCR = 0x1;
-const int FPEXC = 0x8;
-// INSTRUCTION OPTIONS
-enum CONDITION { EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV };
-enum SHIFTOP { LSL, LSR, ASR, ROR };
-// INSTRUCTION FORMATS
-inline int DATA(int cond, int opcode, int S, int Rn, int Rd, int shift, int Sh, int Rm)
-{ return cond<<28 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | shift<<7 | Sh<<5 | Rm; }
-inline int DATAS(int cond, int opcode, int S, int Rn, int Rd, int Rs, int Sh, int Rm)
-{ return cond<<28 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | Rs<<8 | Sh<<5 | 1<<4 | Rm; }
-inline int DATAI(int cond, int opcode, int S, int Rn, int Rd, int rot, int imm)
-{ return cond<<28 | 1<<25 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | rot<<8 | (imm&0xff); }
-inline int BRANCH(int cond, int L, int offset)
-{ return cond<<28 | 5<<25 | L<<24 | (offset&0xffffff); }
-inline int BRANCHX(int cond, int L, int Rm)
-{ return cond<<28 | 0x4bffc<<6 | L<<5 | 1<<4 | Rm; }
-inline int MULTIPLY(int cond, int mul, int S, int Rd, int Rn, int Rs, int Rm)
-{ return cond<<28 | mul<<21 | S<<20 | Rd<<16 | Rn<<12 | Rs<<8 | 9<<4 | Rm; }
-inline int XFER(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int shift, int Sh, int Rm)
-{ return cond<<28 | 3<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | shift<<7 | Sh<<5 | Rm; }
-inline int XFERI(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int offset)
-{ return cond<<28 | 2<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | (offset&0xfff); }
-inline int XFER2(int cond, int P, int U, int W, int L, int Rn, int Rd, int S, int H, int Rm)
-{ return cond<<28 | P<<24 | U<<23 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | 1<<7 | S<<6 | H<<5 | 1<<4 | Rm; }
-inline int XFER2I(int cond, int P, int U, int W, int L, int Rn, int Rd, int offsetH, int S, int H, int offsetL)
-{ return cond<<28 | P<<24 | U<<23 | 1<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | offsetH<<8 | 1<<7 | S<<6 | H<<5 | 1<<4 | (offsetL&0xf); }
-inline int COOP(int cond, int opcode_1, int CRn, int CRd, int cp_num, int opcode_2, int CRm)
-{ return cond<<28 | 0xe<<24 | opcode_1<<20 | CRn<<16 | CRd<<12 | cp_num<<8 | opcode_2<<5 | CRm; }
-inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset) // offset is in words, not bytes
-{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff)>>2; }
-inline int COREG(int cond, int opcode_1, int L, int CRn, int Rd, int cp_num, int opcode_2, int CRm)
-{ return cond<<28 | 0xe<<24 | opcode_1<<21 | L<<20 | CRn<<16 | Rd<<12 | cp_num<<8 | opcode_2<<5 | 1<<4 | CRm; }
-inline int COREG2(int cond, int L, int Rn, int Rd, int cp_num, int opcode, int CRm)
-{ return cond<<28 | 0xc4<<20 | L<<20 | Rn<<16 | Rd<<12 | cp_num<<8 | opcode<<4 | CRm;}
-// FIELD CALCULATORS
-inline int calcU(int imm) { return imm >= 0 ? 1 : 0; }
-// INSTRUCTIONS
-// The "cond" and "S" fields are set using the SETCOND() and SETS() functions
-inline int b(int offset) { return BRANCH(AL, 0, offset); }
-inline int bl(int offset) { return BRANCH(AL, 1, offset); }
-inline int bx(int Rm) { return BRANCHX(AL, 0, Rm); }
-inline int blx(int Rm) { return BRANCHX(AL, 1, Rm); }
-inline int and_(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x0, 0, Rn, Rd, shift, Sh, Rm); }
-inline int eor(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x1, 0, Rn, Rd, shift, Sh, Rm); }
-inline int rsb(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x3, 0, Rn, Rd, shift, Sh, Rm); }
-inline int add(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x4, 0, Rn, Rd, shift, Sh, Rm); }
-inline int adc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x5, 0, Rn, Rd, shift, Sh, Rm); }
-inline int rsc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x7, 0, Rn, Rd, shift, Sh, Rm); }
-inline int cmp(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xa, 1, Rn, 0, shift, Sh, Rm); }
-inline int orr(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xc, 0, Rn, Rd, shift, Sh, Rm); }
-inline int mov(int Rd, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xd, 0, 0, Rd, shift, Sh, Rm); }
-inline int mvn(int Rd, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xf, 0, 0, Rd, shift, Sh, Rm); }
-inline int andi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x0, 0, Rn, Rd, rot, imm); }
-inline int subi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x2, 0, Rn, Rd, rot, imm); }
-inline int rsbi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x3, 0, Rn, Rd, rot, imm); }
-inline int addi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x4, 0, Rn, Rd, rot, imm); }
-inline int adci(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x5, 0, Rn, Rd, rot, imm); }
-inline int bici(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0xe, 0, Rn, Rd, rot, imm); }
-inline int cmpi(int Rn, int imm, int rot=0) { return DATAI(AL, 0xa, 1, Rn, 0, rot, imm); }
-inline int movi(int Rd, int imm, int rot=0) { return DATAI(AL, 0xd, 0, 0, Rd, rot, imm); }
-inline int orrsh(int Rd, int Rn, int Rm, int Rs, int Sh) { return DATAS(AL, 0xc, 0, Rn, Rd, Rs, Sh, Rm); }
-inline int movsh(int Rd, int Rm, int Rs, int Sh) { return DATAS(AL, 0xd, 0, 0, Rd, Rs, Sh, Rm); }
-inline int mul(int Rd, int Rm, int Rs) { return MULTIPLY(AL, 0, 0, Rd, 0, Rs, Rm); }
-inline int mla(int Rd, int Rm, int Rs, int Rn) { return MULTIPLY(AL, 1, 0, Rd, Rn, Rs, Rm); }
-inline int umull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 4, 0, RdHi, RdLo, Rs, Rm); }
-inline int ldr(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 1, Rn, Rd, 0, 0, Rm); }
-inline int ldri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 1, Rn, Rd, abs(imm)); }
-inline int ldrb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 1, Rn, Rd, 0, 0, Rm); }
-inline int ldrbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 1, Rn, Rd, abs(imm)); }
-inline int str(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 0, Rn, Rd, 0, 0, Rm); }
-inline int stri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 0, Rn, Rd, abs(imm)); }
-inline int strb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 0, Rn, Rd, 0, 0, Rm); }
-inline int strbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 0, Rn, Rd, abs(imm)); }
-inline int ldrh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 0, 1, Rm); }
-inline int ldrhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); }
-inline int strh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 0, Rn, Rd, 0, 1, Rm); }
-inline int strhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 0, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); }
-inline int ldrsh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 1, Rm); }
-inline int ldrshi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 1, abs(imm)&0xf); }
-inline int ldrsb(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 0, Rm); }
-inline int ldrsbi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 0, abs(imm)&0xf); }
-// breakpoint instruction, this really has its own instruction format
-inline int bkpt(int16_t immed) { return 0xe1200070 | (((unsigned)immed & 0xffff) >> 4 << 8) | (immed & 0xf); }
-// COPROCESSOR INSTRUCTIONS
-inline int mcr(int coproc, int opcode_1, int Rd, int CRn, int CRm, int opcode_2=0) { return COREG(AL, opcode_1, 0, CRn, Rd, coproc, opcode_2, CRm); }
-inline int mcrr(int coproc, int opcode, int Rd, int Rn, int CRm) { return COREG2(AL, 0, Rn, Rd, coproc, opcode, CRm); }
-inline int mrc(int coproc, int opcode_1, int Rd, int CRn, int CRm, int opcode_2=0) { return COREG(AL, opcode_1, 1, CRn, Rd, coproc, opcode_2, CRm); }
-inline int mrrc(int coproc, int opcode, int Rd, int Rn, int CRm) { return COREG2(AL, 1, Rn, Rd, coproc, opcode, CRm); }
-// VFP FLOATING-POINT INSTRUCTIONS
-inline int fmuls(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|2, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
-inline int fadds(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|3, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
-inline int fsubs(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|3, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1)|2, Sm>>1); }
-inline int fdivs(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|8, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
-inline int fmuld(int Dd, int Dn, int Dm) { return COOP(AL, 2, Dn, Dd, 11, 0, Dm); }
-inline int faddd(int Dd, int Dn, int Dm) { return COOP(AL, 3, Dn, Dd, 11, 0, Dm); }
-inline int fsubd(int Dd, int Dn, int Dm) { return COOP(AL, 3, Dn, Dd, 11, 2, Dm); }
-inline int fdivd(int Dd, int Dn, int Dm) { return COOP(AL, 8, Dn, Dd, 11, 0, Dm); }
-inline int fcpys(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
-inline int fabss(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
-inline int fnegs(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 1, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
-inline int fsqrts(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 1, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
-inline int fcmps(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 4, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
-inline int fcvtds(int Dd, int Sm) { return COOP(AL, 0xb, 7, Dd, 10, 6|(Sm&1), Sm>>1); }
-inline int fsitos(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 8, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
-inline int ftosizs(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0xd, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
-inline int fcpyd(int Dd, int Dm) { return COOP(AL, 0xb, 0, Dd, 11, 2, Dm); }
-inline int fabsd(int Dd, int Dm) { return COOP(AL, 0xb, 0, Dd, 11, 6, Dm); }
-inline int fnegd(int Dd, int Dm) { return COOP(AL, 0xb, 1, Dd, 11, 2, Dm); }
-inline int fsqrtd(int Dd, int Dm) { return COOP(AL, 0xb, 1, Dd, 11, 6, Dm); }
-// double-precision comparison instructions
-inline int fcmpd(int Dd, int Dm) { return COOP(AL, 0xb, 4, Dd, 11, 2, Dm); }
-// double-precision conversion instructions
-inline int fcvtsd(int Sd, int Dm) { return COOP(AL, 0xb|(Sd&1)<<2, 7, Sd>>1, 11, 6, Dm); }
-inline int fsitod(int Dd, int Sm) { return COOP(AL, 0xb, 8, Dd, 11, 6|(Sm&1), Sm>>1); }
-inline int ftosizd(int Sd, int Dm) { return COOP(AL, 0xb|(Sd&1)<<2, 0xd, Sd>>1, 11, 6, Dm); }
-// single load/store instructions for both precision types
-inline int flds(int Sd, int Rn, int offset=0) { return COXFER(AL, 1, 1, Sd&1, 0, 1, Rn, Sd>>1, 10, offset); };
-inline int fldd(int Dd, int Rn, int offset=0) { return COXFER(AL, 1, 1, 0, 0, 1, Rn, Dd, 11, offset); };
-inline int fsts(int Sd, int Rn, int offset=0) { return COXFER(AL, 1, 1, Sd&1, 0, 0, Rn, Sd>>1, 10, offset); };
-inline int fstd(int Dd, int Rn, int offset=0) { return COXFER(AL, 1, 1, 0, 0, 0, Rn, Dd, 11, offset); };
-// move between GPRs and FPRs
-inline int fmsr(int Sn, int Rd) { return mcr(10, 0, Rd, Sn>>1, 0, (Sn&1)<<2); }
-inline int fmrs(int Rd, int Sn) { return mrc(10, 0, Rd, Sn>>1, 0, (Sn&1)<<2); }
-// move to/from VFP system registers
-inline int fmrx(int Rd, int reg) { return mrc(10, 7, Rd, reg, 0); }
-// these move around pairs of single-precision registers
-inline int fmdrr(int Dm, int Rd, int Rn) { return mcrr(11, 1, Rd, Rn, Dm); }
-inline int fmrrd(int Rd, int Rn, int Dm) { return mrrc(11, 1, Rd, Rn, Dm); }
-// FLAG SETTERS
-inline int SETCOND(int ins, int cond) { return ((ins&0x0fffffff) | (cond<<28)); }
-inline int SETS(int ins) { return ins | 1<<20; }
-// PSEUDO-INSTRUCTIONS
-inline int lsl(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, LSL); }
-inline int lsli(int Rd, int Rm, int imm) { return mov(Rd, Rm, LSL, imm); }
-inline int lsr(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, LSR); }
-inline int lsri(int Rd, int Rm, int imm) { return mov(Rd, Rm, LSR, imm); }
-inline int asr(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, ASR); }
-inline int asri(int Rd, int Rm, int imm) { return mov(Rd, Rm, ASR, imm); }
-inline int beq(int offset) { return SETCOND(b(offset), EQ); }
-inline int bne(int offset) { return SETCOND(b(offset), NE); }
-inline int bls(int offset) { return SETCOND(b(offset), LS); }
-inline int bhi(int offset) { return SETCOND(b(offset), HI); }
-inline int blt(int offset) { return SETCOND(b(offset), LT); }
-inline int bgt(int offset) { return SETCOND(b(offset), GT); }
-inline int ble(int offset) { return SETCOND(b(offset), LE); }
-inline int bge(int offset) { return SETCOND(b(offset), GE); }
-inline int blo(int offset) { return SETCOND(b(offset), CC); }
-inline int bhs(int offset) { return SETCOND(b(offset), CS); }
-inline int bpl(int offset) { return SETCOND(b(offset), PL); }
-inline int fmstat() { return fmrx(15, FPSCR); }
 // HARDWARE FLAGS
 bool vfpSupported() {
   // TODO: Use at runtime detection
@@ -199,1602 +47,31 @@ bool vfpSupported() {
   return false;
 #endif
 }
-}
+} // namespace isa
 
-namespace arm {
-
-const uint64_t MASK_LO32 = 0xffffffff;
-const unsigned MASK_LO16 = 0xffff;
-const unsigned MASK_LO8  = 0xff;
 inline unsigned lo8(int64_t i) { return (unsigned)(i&MASK_LO8); }
 
-inline bool isOfWidth(int64_t i, int size) { return static_cast<uint64_t>(i) >> size == 0; }
-
-const int N_GPRS = 16;
-const int N_FPRS = 16;
-const uint32_t GPR_MASK = 0xffff;
-const uint32_t FPR_MASK = 0xffff0000;
-// for source-to-destination masks
-const uint64_t GPR_MASK64 = GPR_MASK | (uint64_t)GPR_MASK << 32;
-// making the following const somehow breaks debug symbol output in GDB
-/* const */ uint64_t FPR_MASK64 = FPR_MASK | (uint64_t)FPR_MASK << 32;
-
 const RegisterFile MyRegisterFileWithoutFloats(GPR_MASK, 0);
 const RegisterFile MyRegisterFileWithFloats(GPR_MASK, FPR_MASK);
 
-inline bool isFpr(lir::Register* reg) {
-  return reg->low >= N_GPRS;
-}
-
-inline int fpr64(int reg) { return reg - N_GPRS; }
-inline int fpr64(lir::Register* reg) { return fpr64(reg->low); }
-inline int fpr32(int reg) { return fpr64(reg) << 1; }
-inline int fpr32(lir::Register* reg) { return fpr64(reg) << 1; }
-
 const unsigned FrameHeaderSize = 1;
 
 const unsigned StackAlignmentInBytes = 8;
 const unsigned StackAlignmentInWords
 = StackAlignmentInBytes / TargetBytesPerWord;
 
-const int ThreadRegister = 8;
-const int StackRegister = 13;
-const int LinkRegister = 14;
-const int ProgramCounter = 15;
-
-const int32_t PoolOffsetMask = 0xFFF;
-
-const bool DebugPool = false;
-
-class PoolOffset;
-class PoolEvent;
-
 void
 resolve(MyBlock*);
 
 unsigned
 padding(MyBlock*, unsigned);
 
-class Task;
 class ConstantPoolEntry;
 
-class OffsetPromise: public Promise {
- public:
-  OffsetPromise(Context* con, MyBlock* block, unsigned offset, bool forTrace):
-    con(con), block(block), offset(offset), forTrace(forTrace)
-  { }
-
-  virtual bool resolved() {
-    return block->start != static_cast<unsigned>(~0);
-  }
-  
-  virtual int64_t value() {
-    assert(con, resolved());
-
-    unsigned o = offset - block->offset;
-    return block->start + padding
-      (block, forTrace ? o - TargetBytesPerWord : o) + o;
-  }
-
-  Context* con;
-  MyBlock* block;
-  unsigned offset;
-  bool forTrace;
-};
-
-Promise* offsetPromise(Context* con, bool forTrace = false) {
-  return new(con->zone) OffsetPromise(con, con->lastBlock, con->code.length(), forTrace);
-}
-
-bool
-bounded(int right, int left, int32_t v)
-{
-  return ((v << left) >> left) == v and ((v >> right) << right) == v;
-}
-
-void*
-updateOffset(System* s, uint8_t* instruction, int64_t value)
-{
-  // ARM's PC is two words ahead, and branches drop the bottom 2 bits.
-  int32_t v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
-
-  int32_t mask;
-  expect(s, bounded(0, 8, v));
-  mask = 0xFFFFFF;
-
-  int32_t* p = reinterpret_cast<int32_t*>(instruction);
-  *p = (v & mask) | ((~mask) & *p);
-
-  return instruction + 4;
-}
-
-class OffsetListener: public Promise::Listener {
- public:
-  OffsetListener(System* s, uint8_t* instruction):
-    s(s),
-    instruction(instruction)
-  { }
-
-  virtual bool resolve(int64_t value, void** location) {
-    void* p = updateOffset(s, instruction, value);
-    if (location) *location = p;
-    return false;
-  }
-
-  System* s;
-  uint8_t* instruction;
-};
-
-class OffsetTask: public Task {
- public:
-  OffsetTask(Task* next, Promise* promise, Promise* instructionOffset):
-    Task(next),
-    promise(promise),
-    instructionOffset(instructionOffset)
-  { }
-
-  virtual void run(Context* con) {
-    if (promise->resolved()) {
-      updateOffset
-        (con->s, con->result + instructionOffset->value(), promise->value());
-    } else {
-      new (promise->listen(sizeof(OffsetListener)))
-        OffsetListener(con->s, con->result + instructionOffset->value());
-    }
-  }
-
-  Promise* promise;
-  Promise* instructionOffset;
-};
-
-void
-appendOffsetTask(Context* con, Promise* promise, Promise* instructionOffset)
-{
-  con->tasks = new(con->zone) OffsetTask(con->tasks, promise, instructionOffset);
-}
-
-inline unsigned
-index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
-{
-  return operation + (lir::UnaryOperationCount * operand);
-}
-
-inline unsigned
-index(ArchitectureContext*,
-      lir::BinaryOperation operation,
-      lir::OperandType operand1,
-      lir::OperandType operand2)
-{
-  return operation
-    + (lir::BinaryOperationCount * operand1)
-    + (lir::BinaryOperationCount * lir::OperandTypeCount * operand2);
-}
-
-inline unsigned
-index(ArchitectureContext* con UNUSED,
-      lir::TernaryOperation operation,
-      lir::OperandType operand1)
-{
-  assert(con, not isBranch(operation));
-
-  return operation + (lir::NonBranchTernaryOperationCount * operand1);
-}
-
-unsigned
-branchIndex(ArchitectureContext* con UNUSED, lir::OperandType operand1,
-            lir::OperandType operand2)
-{
-  return operand1 + (lir::OperandTypeCount * operand2);
-}
-
 // BEGIN OPERATION COMPILERS
 
 using namespace isa;
 
-// shortcut functions
-inline void emit(Context* con, int code) { con->code.append4(code); }
-
-inline int newTemp(Context* con) {
-  return con->client->acquireTemporary(GPR_MASK);
-}
-
-inline int newTemp(Context* con, unsigned mask) {
-  return con->client->acquireTemporary(mask);
-}
-
-inline void freeTemp(Context* con, int r) {
-  con->client->releaseTemporary(r);
-}
-
-inline int64_t getValue(lir::Constant* con) {
-  return con->value->value();
-}
-
-inline lir::Register makeTemp(Context* con) {
-  lir::Register tmp(newTemp(con));
-  return tmp;
-}
-
-inline lir::Register makeTemp64(Context* con) {
-  lir::Register tmp(newTemp(con), newTemp(con));
-  return tmp;
-}
-
-inline void freeTemp(Context* con, const lir::Register& tmp) {
-  if (tmp.low != lir::NoRegister) freeTemp(con, tmp.low);
-  if (tmp.high != lir::NoRegister) freeTemp(con, tmp.high);
-}
-
-inline void
-write4(uint8_t* dst, uint32_t v)
-{
-  memcpy(dst, &v, 4);
-}
-
-void
-andC(Context* con, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst);
-
-void shiftLeftR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
-{
-  if (size == 8) {
-    int tmp1 = newTemp(con), tmp2 = newTemp(con), tmp3 = newTemp(con);
-    ResolvedPromise maskPromise(0x3F);
-    lir::Constant mask(&maskPromise);
-    lir::Register dst(tmp3);
-    andC(con, 4, &mask, a, &dst);
-    emit(con, lsl(tmp1, b->high, tmp3));
-    emit(con, rsbi(tmp2, tmp3, 32));
-    emit(con, orrsh(tmp1, tmp1, b->low, tmp2, LSR));
-    emit(con, SETS(subi(t->high, tmp3, 32)));
-    emit(con, SETCOND(mov(t->high, tmp1), MI));
-    emit(con, SETCOND(lsl(t->high, b->low, t->high), PL));
-    emit(con, lsl(t->low, b->low, tmp3));
-    freeTemp(con, tmp1); freeTemp(con, tmp2); freeTemp(con, tmp3);
-  } else {
-    int tmp = newTemp(con);
-    ResolvedPromise maskPromise(0x1F);
-    lir::Constant mask(&maskPromise);
-    lir::Register dst(tmp);
-    andC(con, size, &mask, a, &dst);
-    emit(con, lsl(t->low, b->low, tmp));
-    freeTemp(con, tmp);
-  }
-}
-
-void
-moveRR(Context* con, unsigned srcSize, lir::Register* src,
-       unsigned dstSize, lir::Register* dst);
-
-void shiftLeftC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
-{
-  assert(con, size == TargetBytesPerWord);
-  if (getValue(a) & 0x1F) {
-    emit(con, lsli(t->low, b->low, getValue(a) & 0x1F));
-  } else {
-    moveRR(con, size, b, size, t);
-  }
-}
-
-void shiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
-{
-  if (size == 8) {
-    int tmp1 = newTemp(con), tmp2 = newTemp(con), tmp3 = newTemp(con);
-    ResolvedPromise maskPromise(0x3F);
-    lir::Constant mask(&maskPromise);
-    lir::Register dst(tmp3);
-    andC(con, 4, &mask, a, &dst);
-    emit(con, lsr(tmp1, b->low, tmp3));
-    emit(con, rsbi(tmp2, tmp3, 32));
-    emit(con, orrsh(tmp1, tmp1, b->high, tmp2, LSL));
-    emit(con, SETS(subi(t->low, tmp3, 32)));
-    emit(con, SETCOND(mov(t->low, tmp1), MI));
-    emit(con, SETCOND(asr(t->low, b->high, t->low), PL));
-    emit(con, asr(t->high, b->high, tmp3));
-    freeTemp(con, tmp1); freeTemp(con, tmp2); freeTemp(con, tmp3);
-  } else {
-    int tmp = newTemp(con);
-    ResolvedPromise maskPromise(0x1F);
-    lir::Constant mask(&maskPromise);
-    lir::Register dst(tmp);
-    andC(con, size, &mask, a, &dst);
-    emit(con, asr(t->low, b->low, tmp));
-    freeTemp(con, tmp);
-  }
-}
-
-void shiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
-{
-  assert(con, size == TargetBytesPerWord);
-  if (getValue(a) & 0x1F) {
-    emit(con, asri(t->low, b->low, getValue(a) & 0x1F));
-  } else {
-    moveRR(con, size, b, size, t);
-  }
-}
-
-void unsignedShiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
-{
-  int tmpShift = newTemp(con);
-  ResolvedPromise maskPromise(size == 8 ? 0x3F : 0x1F);
-  lir::Constant mask(&maskPromise);
-  lir::Register dst(tmpShift);
-  andC(con, 4, &mask, a, &dst);
-  emit(con, lsr(t->low, b->low, tmpShift));
-  if (size == 8) {
-    int tmpHi = newTemp(con), tmpLo = newTemp(con);
-    emit(con, SETS(rsbi(tmpHi, tmpShift, 32)));
-    emit(con, lsl(tmpLo, b->high, tmpHi));
-    emit(con, orr(t->low, t->low, tmpLo));
-    emit(con, addi(tmpHi, tmpShift, -32));
-    emit(con, lsr(tmpLo, b->high, tmpHi));
-    emit(con, orr(t->low, t->low, tmpLo));
-    emit(con, lsr(t->high, b->high, tmpShift));
-    freeTemp(con, tmpHi); freeTemp(con, tmpLo);
-  }
-  freeTemp(con, tmpShift);
-}
-
-void unsignedShiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
-{
-  assert(con, size == TargetBytesPerWord);
-  if (getValue(a) & 0x1F) {
-    emit(con, lsri(t->low, b->low, getValue(a) & 0x1F));
-  } else {
-    moveRR(con, size, b, size, t);
-  }
-}
-
-class ConstantPoolEntry: public Promise {
- public:
-  ConstantPoolEntry(Context* con, Promise* constant, ConstantPoolEntry* next,
-                    Promise* callOffset):
-    con(con), constant(constant), next(next), callOffset(callOffset),
-    address(0)
-  { }
-
-  virtual int64_t value() {
-    assert(con, resolved());
-
-    return reinterpret_cast<int64_t>(address);
-  }
-
-  virtual bool resolved() {
-    return address != 0;
-  }
-
-  Context* con;
-  Promise* constant;
-  ConstantPoolEntry* next;
-  Promise* callOffset;
-  void* address;
-  unsigned constantPoolCount;
-};
-
-class ConstantPoolListener: public Promise::Listener {
- public:
-  ConstantPoolListener(System* s, target_uintptr_t* address,
-                       uint8_t* returnAddress):
-    s(s),
-    address(address),
-    returnAddress(returnAddress)
-  { }
-
-  virtual bool resolve(int64_t value, void** location) {
-    *address = value;
-    if (location) {
-      *location = returnAddress ? static_cast<void*>(returnAddress) : address;
-    }
-    return true;
-  }
-
-  System* s;
-  target_uintptr_t* address;
-  uint8_t* returnAddress;
-};
-
-class PoolOffset {
- public:
-  PoolOffset(MyBlock* block, ConstantPoolEntry* entry, unsigned offset):
-    block(block), entry(entry), next(0), offset(offset)
-  { }
-
-  MyBlock* block;
-  ConstantPoolEntry* entry;
-  PoolOffset* next;
-  unsigned offset;
-};
-
-class PoolEvent {
- public:
-  PoolEvent(PoolOffset* poolOffsetHead, PoolOffset* poolOffsetTail,
-            unsigned offset):
-    poolOffsetHead(poolOffsetHead), poolOffsetTail(poolOffsetTail), next(0),
-    offset(offset)
-  { }
-
-  PoolOffset* poolOffsetHead;
-  PoolOffset* poolOffsetTail;
-  PoolEvent* next;
-  unsigned offset;
-};
-
-void
-appendConstantPoolEntry(Context* con, Promise* constant, Promise* callOffset)
-{
-  if (constant->resolved()) {
-    // make a copy, since the original might be allocated on the
-    // stack, and we need our copy to live until assembly is complete
-    constant = new(con->zone) ResolvedPromise(constant->value());
-  }
-
-  con->constantPool = new(con->zone) ConstantPoolEntry(con, constant, con->constantPool, callOffset);
-
-  ++ con->constantPoolCount;
-
-  PoolOffset* o = new(con->zone) PoolOffset(con->lastBlock, con->constantPool, con->code.length() - con->lastBlock->offset);
-
-  if (DebugPool) {
-    fprintf(stderr, "add pool offset %p %d to block %p\n",
-            o, o->offset, con->lastBlock);
-  }
-
-  if (con->lastBlock->poolOffsetTail) {
-    con->lastBlock->poolOffsetTail->next = o;
-  } else {
-    con->lastBlock->poolOffsetHead = o;
-  }
-  con->lastBlock->poolOffsetTail = o;
-}
-
-void
-appendPoolEvent(Context* con, MyBlock* b, unsigned offset, PoolOffset* head,
-                PoolOffset* tail)
-{
-  PoolEvent* e = new(con->zone) PoolEvent(head, tail, offset);
-
-  if (b->poolEventTail) {
-    b->poolEventTail->next = e;
-  } else {
-    b->poolEventHead = e;
-  }
-  b->poolEventTail = e;
-}
-
-bool
-needJump(MyBlock* b)
-{
-  return b->next or b->size != (b->size & PoolOffsetMask);
-}
-
-unsigned
-padding(MyBlock* b, unsigned offset)
-{
-  unsigned total = 0;
-  for (PoolEvent* e = b->poolEventHead; e; e = e->next) {
-    if (e->offset <= offset) {
-      if (needJump(b)) {
-        total += TargetBytesPerWord;
-      }
-      for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) {
-        total += TargetBytesPerWord;
-      }
-    } else {
-      break;
-    }
-  }
-  return total;
-}
-
-void
-resolve(MyBlock* b)
-{
-  Context* con = b->context;
-
-  if (b->poolOffsetHead) {
-    if (con->poolOffsetTail) {
-      con->poolOffsetTail->next = b->poolOffsetHead;
-    } else {
-      con->poolOffsetHead = b->poolOffsetHead;
-    }
-    con->poolOffsetTail = b->poolOffsetTail;
-  }
-
-  if (con->poolOffsetHead) {
-    bool append;
-    if (b->next == 0 or b->next->poolEventHead) {
-      append = true;
-    } else {
-      int32_t v = (b->start + b->size + b->next->size + TargetBytesPerWord - 8)
-        - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
-
-      append = (v != (v & PoolOffsetMask));
-
-      if (DebugPool) {
-        fprintf(stderr,
-                "current %p %d %d next %p %d %d\n",
-                b, b->start, b->size, b->next, b->start + b->size,
-                b->next->size);
-        fprintf(stderr,
-                "offset %p %d is of distance %d to next block; append? %d\n",
-                con->poolOffsetHead, con->poolOffsetHead->offset, v, append);
-      }
-    }
-
-    if (append) {
-#ifndef NDEBUG
-      int32_t v = (b->start + b->size - 8)
-        - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
-      
-      expect(con, v == (v & PoolOffsetMask));
-#endif // not NDEBUG
-
-      appendPoolEvent(con, b, b->size, con->poolOffsetHead, con->poolOffsetTail);
-
-      if (DebugPool) {
-        for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) {
-          fprintf(stderr,
-                  "include %p %d in pool event %p at offset %d in block %p\n",
-                  o, o->offset, b->poolEventTail, b->size, b);
-        }
-      }
-
-      con->poolOffsetHead = 0;
-      con->poolOffsetTail = 0;
-    }
-  }
-}
-
-void
-jumpR(Context* con, unsigned size UNUSED, lir::Register* target)
-{
-  assert(con, size == TargetBytesPerWord);
-  emit(con, bx(target->low));
-}
-
-void
-swapRR(Context* con, unsigned aSize, lir::Register* a,
-       unsigned bSize, lir::Register* b)
-{
-  assert(con, aSize == TargetBytesPerWord);
-  assert(con, bSize == TargetBytesPerWord);
-
-  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-  moveRR(con, aSize, a, bSize, &tmp);
-  moveRR(con, bSize, b, aSize, a);
-  moveRR(con, bSize, &tmp, bSize, b);
-  con->client->releaseTemporary(tmp.low);
-}
-
-void
-moveRR(Context* con, unsigned srcSize, lir::Register* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  bool srcIsFpr = isFpr(src);
-  bool dstIsFpr = isFpr(dst);
-  if (srcIsFpr || dstIsFpr) {   // FPR(s) involved
-    assert(con, srcSize == dstSize);
-    const bool dprec = srcSize == 8;
-    if (srcIsFpr && dstIsFpr) { // FPR to FPR
-      if (dprec) emit(con, fcpyd(fpr64(dst), fpr64(src))); // double
-      else       emit(con, fcpys(fpr32(dst), fpr32(src))); // single
-    } else if (srcIsFpr) {      // FPR to GPR
-      if (dprec) emit(con, fmrrd(dst->low, dst->high, fpr64(src)));
-      else       emit(con, fmrs(dst->low, fpr32(src)));
-    } else {                    // GPR to FPR
-      if (dprec) emit(con, fmdrr(fpr64(dst->low), src->low, src->high));
-      else       emit(con, fmsr(fpr32(dst), src->low));
-    }
-    return;
-  }
-
-  switch (srcSize) {
-  case 1:
-    emit(con, lsli(dst->low, src->low, 24));
-    emit(con, asri(dst->low, dst->low, 24));
-    break;
-
-  case 2:
-    emit(con, lsli(dst->low, src->low, 16));
-    emit(con, asri(dst->low, dst->low, 16));
-    break;
-
-  case 4:
-  case 8:
-    if (srcSize == 4 and dstSize == 8) {
-      moveRR(con, 4, src, 4, dst);
-      emit(con, asri(dst->high, src->low, 31));
-    } else if (srcSize == 8 and dstSize == 8) {
-      lir::Register srcHigh(src->high);
-      lir::Register dstHigh(dst->high);
-
-      if (src->high == dst->low) {
-        if (src->low == dst->high) {
-          swapRR(con, 4, src, 4, dst);
-        } else {
-          moveRR(con, 4, &srcHigh, 4, &dstHigh);
-          moveRR(con, 4, src, 4, dst);
-        }
-      } else {
-        moveRR(con, 4, src, 4, dst);
-        moveRR(con, 4, &srcHigh, 4, &dstHigh);
-      }
-    } else if (src->low != dst->low) {
-      emit(con, mov(dst->low, src->low));
-    }
-    break;
-
-  default: abort(con);
-  }
-}
-
-void
-moveZRR(Context* con, unsigned srcSize, lir::Register* src,
-        unsigned, lir::Register* dst)
-{
-  switch (srcSize) {
-  case 2:
-    emit(con, lsli(dst->low, src->low, 16));
-    emit(con, lsri(dst->low, dst->low, 16));
-    break;
-
-  default: abort(con);
-  }
-}
-
-void moveCR(Context* con, unsigned size, lir::Constant* src,
-            unsigned, lir::Register* dst);
-
-void
-moveCR2(Context* con, unsigned size, lir::Constant* src,
-        lir::Register* dst, Promise* callOffset)
-{
-  if (isFpr(dst)) { // floating-point
-    lir::Register tmp = size > 4 ? makeTemp64(con) :
-                                         makeTemp(con);
-    moveCR(con, size, src, size, &tmp);
-    moveRR(con, size, &tmp, size, dst);
-    freeTemp(con, tmp);
-  } else if (size > 4) { 
-    uint64_t value = (uint64_t)src->value->value();
-    ResolvedPromise loBits(value & MASK_LO32);
-    lir::Constant srcLo(&loBits);
-    ResolvedPromise hiBits(value >> 32); 
-    lir::Constant srcHi(&hiBits);
-    lir::Register dstHi(dst->high);
-    moveCR(con, 4, &srcLo, 4, dst);
-    moveCR(con, 4, &srcHi, 4, &dstHi);
-  } else if (src->value->resolved() and isOfWidth(getValue(src), 8)) {
-    emit(con, movi(dst->low, lo8(getValue(src)))); // fits in immediate
-  } else {
-    appendConstantPoolEntry(con, src->value, callOffset);
-    emit(con, ldri(dst->low, ProgramCounter, 0)); // load 32 bits
-  }
-}
-
-void
-moveCR(Context* con, unsigned size, lir::Constant* src,
-       unsigned, lir::Register* dst)
-{
-  moveCR2(con, size, src, dst, 0);
-}
-
-void addR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, SETS(add(t->low, a->low, b->low)));
-    emit(con, adc(t->high, a->high, b->high));
-  } else {
-    emit(con, add(t->low, a->low, b->low));
-  }
-}
-
-void subR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, SETS(rsb(t->low, a->low, b->low)));
-    emit(con, rsc(t->high, a->high, b->high));
-  } else {
-    emit(con, rsb(t->low, a->low, b->low));
-  }
-}
-
-void
-addC(Context* con, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  int32_t v = a->value->value();
-  if (v) {
-    if (v > 0 and v < 256) {
-      emit(con, addi(dst->low, b->low, v));
-    } else if (v > 0 and v < 1024 and v % 4 == 0) {
-      emit(con, addi(dst->low, b->low, v >> 2, 15));
-    } else {
-      // todo
-      abort(con);
-    }
-  } else {
-    moveRR(con, size, b, size, dst);
-  }
-}
-
-void
-subC(Context* con, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  int32_t v = a->value->value();
-  if (v) {
-    if (v > 0 and v < 256) {
-      emit(con, subi(dst->low, b->low, v));
-    } else if (v > 0 and v < 1024 and v % 4 == 0) {
-      emit(con, subi(dst->low, b->low, v >> 2, 15));
-    } else {
-      // todo
-      abort(con);
-    }
-  } else {
-    moveRR(con, size, b, size, dst);
-  }
-}
-
-void multiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    bool useTemporaries = b->low == t->low;
-    int tmpLow  = useTemporaries ? con->client->acquireTemporary(GPR_MASK) : t->low;
-    int tmpHigh = useTemporaries ? con->client->acquireTemporary(GPR_MASK) : t->high;
-
-    emit(con, umull(tmpLow, tmpHigh, a->low, b->low));
-    emit(con, mla(tmpHigh, a->low, b->high, tmpHigh));
-    emit(con, mla(tmpHigh, a->high, b->low, tmpHigh));
-
-    if (useTemporaries) {
-      emit(con, mov(t->low, tmpLow));
-      emit(con, mov(t->high, tmpHigh));
-      con->client->releaseTemporary(tmpLow);
-      con->client->releaseTemporary(tmpHigh);
-    }
-  } else {
-    emit(con, mul(t->low, a->low, b->low));
-  }
-}
-
-void floatAbsoluteRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  if (size == 8) {
-    emit(con, fabsd(fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fabss(fpr32(b), fpr32(a)));
-  }
-}
-
-void floatNegateRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  if (size == 8) {
-    emit(con, fnegd(fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fnegs(fpr32(b), fpr32(a)));
-  }
-}
-
-void float2FloatRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  if (size == 8) {
-    emit(con, fcvtsd(fpr32(b), fpr64(a)));
-  } else {
-    emit(con, fcvtds(fpr64(b), fpr32(a)));
-  }
-}
-
-void float2IntRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  int tmp = newTemp(con, FPR_MASK);
-  int ftmp = fpr32(tmp);
-  if (size == 8) { // double to int
-    emit(con, ftosizd(ftmp, fpr64(a)));
-  } else {         // float to int
-    emit(con, ftosizs(ftmp, fpr32(a)));
-  }                // else thunked
-  emit(con, fmrs(b->low, ftmp));
-  freeTemp(con, tmp);
-}
-
-void int2FloatRR(Context* con, unsigned, lir::Register* a, unsigned size, lir::Register* b) {
-  emit(con, fmsr(fpr32(b), a->low));
-  if (size == 8) { // int to double
-    emit(con, fsitod(fpr64(b), fpr32(b)));
-  } else {         // int to float
-    emit(con, fsitos(fpr32(b), fpr32(b)));
-  }                // else thunked
-}
-
-void floatSqrtRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  if (size == 8) {
-    emit(con, fsqrtd(fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fsqrts(fpr32(b), fpr32(a)));
-  }
-}
-
-void floatAddR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, faddd(fpr64(t), fpr64(a), fpr64(b)));
-  } else {
-    emit(con, fadds(fpr32(t), fpr32(a), fpr32(b)));
-  }
-}
-
-void floatSubtractR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, fsubd(fpr64(t), fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fsubs(fpr32(t), fpr32(b), fpr32(a)));
-  }
-}
-
-void floatMultiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, fmuld(fpr64(t), fpr64(a), fpr64(b)));
-  } else {
-    emit(con, fmuls(fpr32(t), fpr32(a), fpr32(b)));
-  }
-}
-
-void floatDivideR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) { 
-    emit(con, fdivd(fpr64(t), fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fdivs(fpr32(t), fpr32(b), fpr32(a)));
-  }
-}
-
-int
-normalize(Context* con, int offset, int index, unsigned scale, 
-          bool* preserveIndex, bool* release)
-{
-  if (offset != 0 or scale != 1) {
-    lir::Register normalizedIndex
-      (*preserveIndex ? con->client->acquireTemporary(GPR_MASK) : index);
-    
-    if (*preserveIndex) {
-      *release = true;
-      *preserveIndex = false;
-    } else {
-      *release = false;
-    }
-
-    int scaled;
-
-    if (scale != 1) {
-      lir::Register unscaledIndex(index);
-
-      ResolvedPromise scalePromise(log(scale));
-      lir::Constant scaleConstant(&scalePromise);
-      
-      shiftLeftC(con, TargetBytesPerWord, &scaleConstant,
-                 &unscaledIndex, &normalizedIndex);
-
-      scaled = normalizedIndex.low;
-    } else {
-      scaled = index;
-    }
-
-    if (offset != 0) {
-      lir::Register untranslatedIndex(scaled);
-
-      ResolvedPromise offsetPromise(offset);
-      lir::Constant offsetConstant(&offsetPromise);
-
-      lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-      moveCR(con, TargetBytesPerWord, &offsetConstant, TargetBytesPerWord, &tmp);
-      addR(con, TargetBytesPerWord, &tmp, &untranslatedIndex, &normalizedIndex);
-      con->client->releaseTemporary(tmp.low);
-    }
-
-    return normalizedIndex.low;
-  } else {
-    *release = false;
-    return index;
-  }
-}
-
-void
-store(Context* con, unsigned size, lir::Register* src,
-      int base, int offset, int index, unsigned scale, bool preserveIndex)
-{
-  if (index != lir::NoRegister) {
-    bool release;
-    int normalized = normalize
-      (con, offset, index, scale, &preserveIndex, &release);
-
-    if (!isFpr(src)) { // GPR store
-      switch (size) {
-      case 1:
-        emit(con, strb(src->low, base, normalized));
-        break;
-
-      case 2:
-        emit(con, strh(src->low, base, normalized));
-        break;
-
-      case 4:
-        emit(con, str(src->low, base, normalized));
-        break;
-
-      case 8: { // split into 2 32-bit stores
-        lir::Register srcHigh(src->high);
-        store(con, 4, &srcHigh, base, 0, normalized, 1, preserveIndex);
-        store(con, 4, src, base, 4, normalized, 1, preserveIndex);
-      } break;
-
-      default: abort(con);
-      }
-    } else { // FPR store
-      lir::Register base_(base),
-                          normalized_(normalized),
-                          absAddr = makeTemp(con);
-      // FPR stores have only bases, so we must add the index
-      addR(con, TargetBytesPerWord, &base_, &normalized_, &absAddr);
-      // double-precision
-      if (size == 8) emit(con, fstd(fpr64(src), absAddr.low));
-      // single-precision
-      else           emit(con, fsts(fpr32(src), absAddr.low));
-      freeTemp(con, absAddr);
-    }
-
-    if (release) con->client->releaseTemporary(normalized);
-  } else if (size == 8
-             or abs(offset) == (abs(offset) & 0xFF)
-             or (size != 2 and abs(offset) == (abs(offset) & 0xFFF)))
-  {
-    if (!isFpr(src)) { // GPR store
-      switch (size) {
-      case 1:
-        emit(con, strbi(src->low, base, offset));
-        break;
-
-      case 2:
-        emit(con, strhi(src->low, base, offset));
-        break;
-
-      case 4:
-        emit(con, stri(src->low, base, offset));
-        break;
-
-      case 8: { // split into 2 32-bit stores
-        lir::Register srcHigh(src->high);
-        store(con, 4, &srcHigh, base, offset, lir::NoRegister, 1, false);
-        store(con, 4, src, base, offset + 4, lir::NoRegister, 1, false);
-      } break;
-
-      default: abort(con);
-      }
-    } else { // FPR store
-      // double-precision
-      if (size == 8) emit(con, fstd(fpr64(src), base, offset));
-      // single-precision
-      else           emit(con, fsts(fpr32(src), base, offset));
-    }
-  } else {
-    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-    ResolvedPromise offsetPromise(offset);
-    lir::Constant offsetConstant(&offsetPromise);
-    moveCR(con, TargetBytesPerWord, &offsetConstant,
-           TargetBytesPerWord, &tmp);
-    
-    store(con, size, src, base, 0, tmp.low, 1, false);
-
-    con->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-moveRM(Context* con, unsigned srcSize, lir::Register* src,
-       unsigned dstSize UNUSED, lir::Memory* dst)
-{
-  assert(con, srcSize == dstSize);
-
-  store(con, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true);
-}
-
-void
-load(Context* con, unsigned srcSize, int base, int offset, int index,
-     unsigned scale, unsigned dstSize, lir::Register* dst,
-     bool preserveIndex, bool signExtend)
-{
-  if (index != lir::NoRegister) {
-    bool release;
-    int normalized = normalize
-      (con, offset, index, scale, &preserveIndex, &release);
-
-    if (!isFpr(dst)) { // GPR load
-      switch (srcSize) {
-      case 1:
-        if (signExtend) {
-          emit(con, ldrsb(dst->low, base, normalized));
-        } else {
-          emit(con, ldrb(dst->low, base, normalized));
-        }
-        break;
-
-      case 2:
-        if (signExtend) {
-          emit(con, ldrsh(dst->low, base, normalized));
-        } else {
-          emit(con, ldrh(dst->low, base, normalized));
-        }
-        break;
-
-      case 4:
-      case 8: {
-        if (srcSize == 4 and dstSize == 8) {
-          load(con, 4, base, 0, normalized, 1, 4, dst, preserveIndex,
-               false);
-          moveRR(con, 4, dst, 8, dst);
-        } else if (srcSize == 8 and dstSize == 8) {
-          lir::Register dstHigh(dst->high);
-          load(con, 4, base, 0, normalized, 1, 4, &dstHigh,
-              preserveIndex, false);
-          load(con, 4, base, 4, normalized, 1, 4, dst, preserveIndex,
-               false);
-        } else {
-          emit(con, ldr(dst->low, base, normalized));
-        }
-      } break;
-
-      default: abort(con);
-      }
-    } else { // FPR load
-      lir::Register base_(base),
-                          normalized_(normalized),
-                          absAddr = makeTemp(con);
-      // VFP loads only have bases, so we must add the index
-      addR(con, TargetBytesPerWord, &base_, &normalized_, &absAddr);
-      // double-precision
-      if (srcSize == 8) emit(con, fldd(fpr64(dst), absAddr.low));
-      // single-precision
-      else              emit(con, flds(fpr32(dst), absAddr.low));
-      freeTemp(con, absAddr);
-    }
-
-    if (release) con->client->releaseTemporary(normalized);
-  } else if ((srcSize == 8 and dstSize == 8)
-             or abs(offset) == (abs(offset) & 0xFF)
-             or (srcSize != 2
-                 and (srcSize != 1 or not signExtend)
-                 and abs(offset) == (abs(offset) & 0xFFF)))
-  {
-    if (!isFpr(dst)) { // GPR load
-      switch (srcSize) {
-      case 1:
-        if (signExtend) {
-          emit(con, ldrsbi(dst->low, base, offset));
-        } else {
-          emit(con, ldrbi(dst->low, base, offset));
-        }
-        break;
-
-      case 2:
-        if (signExtend) {
-          emit(con, ldrshi(dst->low, base, offset));
-        } else {
-          emit(con, ldrhi(dst->low, base, offset));
-        }
-        break;
-
-      case 4:
-        emit(con, ldri(dst->low, base, offset));
-        break;
-
-      case 8: {
-        if (dstSize == 8) {
-          lir::Register dstHigh(dst->high);
-          load(con, 4, base, offset, lir::NoRegister, 1, 4, &dstHigh, false,
-               false);
-          load(con, 4, base, offset + 4, lir::NoRegister, 1, 4, dst, false,
-               false);
-        } else {
-          emit(con, ldri(dst->low, base, offset));
-        }
-      } break;
-
-      default: abort(con);
-      }
-    } else { // FPR load
-      // double-precision
-      if (srcSize == 8) emit(con, fldd(fpr64(dst), base, offset));
-      // single-precision
-      else              emit(con, flds(fpr32(dst), base, offset));
-    }
-  } else {
-    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-    ResolvedPromise offsetPromise(offset);
-    lir::Constant offsetConstant(&offsetPromise);
-    moveCR(con, TargetBytesPerWord, &offsetConstant, TargetBytesPerWord,
-           &tmp);
-    
-    load(con, srcSize, base, 0, tmp.low, 1, dstSize, dst, false,
-         signExtend);
-
-    con->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-moveMR(Context* con, unsigned srcSize, lir::Memory* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  load(con, srcSize, src->base, src->offset, src->index, src->scale,
-       dstSize, dst, true, true);
-}
-
-void
-moveZMR(Context* con, unsigned srcSize, lir::Memory* src,
-        unsigned dstSize, lir::Register* dst)
-{
-  load(con, srcSize, src->base, src->offset, src->index, src->scale,
-       dstSize, dst, true, false);
-}
-
-void
-andR(Context* con, unsigned size, lir::Register* a,
-     lir::Register* b, lir::Register* dst)
-{
-  if (size == 8) emit(con, and_(dst->high, a->high, b->high));
-  emit(con, and_(dst->low, a->low, b->low));
-}
-
-void
-andC(Context* con, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst)
-{
-  int64_t v = a->value->value();
-
-  if (size == 8) {
-    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-    lir::Constant ah(&high);
-
-    ResolvedPromise low(v & 0xFFFFFFFF);
-    lir::Constant al(&low);
-
-    lir::Register bh(b->high);
-    lir::Register dh(dst->high);
-
-    andC(con, 4, &al, b, dst);
-    andC(con, 4, &ah, &bh, &dh);
-  } else {
-    uint32_t v32 = static_cast<uint32_t>(v);
-    if (v32 != 0xFFFFFFFF) {
-      if ((v32 & 0xFFFFFF00) == 0xFFFFFF00) {
-        emit(con, bici(dst->low, b->low, (~(v32 & 0xFF)) & 0xFF));
-      } else if ((v32 & 0xFFFFFF00) == 0) {
-        emit(con, andi(dst->low, b->low, v32 & 0xFF));
-      } else {
-        // todo: there are other cases we can handle in one
-        // instruction
-
-        bool useTemporary = b->low == dst->low;
-        lir::Register tmp(dst->low);
-        if (useTemporary) {
-          tmp.low = con->client->acquireTemporary(GPR_MASK);
-        }
-
-        moveCR(con, 4, a, 4, &tmp);
-        andR(con, 4, b, &tmp, dst);
-        
-        if (useTemporary) {
-          con->client->releaseTemporary(tmp.low);
-        }
-      }
-    } else {
-      moveRR(con, size, b, size, dst);
-    }
-  }
-}
-
-void
-orR(Context* con, unsigned size, lir::Register* a,
-    lir::Register* b, lir::Register* dst)
-{
-  if (size == 8) emit(con, orr(dst->high, a->high, b->high));
-  emit(con, orr(dst->low, a->low, b->low));
-}
-
-void
-xorR(Context* con, unsigned size, lir::Register* a,
-     lir::Register* b, lir::Register* dst)
-{
-  if (size == 8) emit(con, eor(dst->high, a->high, b->high));
-  emit(con, eor(dst->low, a->low, b->low));
-}
-
-void
-moveAR2(Context* con, unsigned srcSize, lir::Address* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  assert(con, srcSize == 4 and dstSize == 4);
-
-  lir::Constant constant(src->address);
-  moveCR(con, srcSize, &constant, dstSize, dst);
-
-  lir::Memory memory(dst->low, 0, -1, 0);
-  moveMR(con, dstSize, &memory, dstSize, dst);
-}
-
-void
-moveAR(Context* con, unsigned srcSize, lir::Address* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  moveAR2(con, srcSize, src, dstSize, dst);
-}
-
-void
-compareRR(Context* con, unsigned aSize, lir::Register* a,
-          unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(con, !(isFpr(a) ^ isFpr(b))); // regs must be of the same type
-
-  if (!isFpr(a)) { // GPR compare
-    assert(con, aSize == 4 && bSize == 4);
-    /**///assert(con, b->low != a->low);
-    emit(con, cmp(b->low, a->low));
-  } else {         // FPR compare
-    assert(con, aSize == bSize);
-    if (aSize == 8) emit(con, fcmpd(fpr64(b), fpr64(a))); // double
-    else            emit(con, fcmps(fpr32(b), fpr32(a))); // single
-    emit(con, fmstat());
-  }
-}
-
-void
-compareCR(Context* con, unsigned aSize, lir::Constant* a,
-          unsigned bSize, lir::Register* b)
-{
-  assert(con, aSize == 4 and bSize == 4);
-
-  if (!isFpr(b) && a->value->resolved() &&
-      isOfWidth(a->value->value(), 8)) {
-    emit(con, cmpi(b->low, a->value->value()));
-  } else {
-    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-    moveCR(con, aSize, a, bSize, &tmp);
-    compareRR(con, bSize, &tmp, bSize, b);
-    con->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-compareCM(Context* con, unsigned aSize, lir::Constant* a,
-          unsigned bSize, lir::Memory* b)
-{
-  assert(con, aSize == 4 and bSize == 4);
-
-  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-  moveMR(con, bSize, b, bSize, &tmp);
-  compareCR(con, aSize, a, bSize, &tmp);
-  con->client->releaseTemporary(tmp.low);
-}
-
-void
-compareRM(Context* con, unsigned aSize, lir::Register* a,
-          unsigned bSize, lir::Memory* b)
-{
-  assert(con, aSize == 4 and bSize == 4);
-
-  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-  moveMR(con, bSize, b, bSize, &tmp);
-  compareRR(con, aSize, a, bSize, &tmp);
-  con->client->releaseTemporary(tmp.low);
-}
-
-int32_t
-branch(Context* con, lir::TernaryOperation op)
-{
-  switch (op) {
-  case lir::JumpIfEqual:
-  case lir::JumpIfFloatEqual:
-    return beq(0);
-
-  case lir::JumpIfNotEqual:
-  case lir::JumpIfFloatNotEqual:
-    return bne(0);
-
-  case lir::JumpIfLess:
-  case lir::JumpIfFloatLess:
-  case lir::JumpIfFloatLessOrUnordered:
-    return blt(0);
-
-  case lir::JumpIfGreater:
-  case lir::JumpIfFloatGreater:
-    return bgt(0);
-
-  case lir::JumpIfLessOrEqual:
-  case lir::JumpIfFloatLessOrEqual:
-  case lir::JumpIfFloatLessOrEqualOrUnordered:
-    return ble(0);
-
-  case lir::JumpIfGreaterOrEqual:
-  case lir::JumpIfFloatGreaterOrEqual:
-    return bge(0);
-
-  case lir::JumpIfFloatGreaterOrUnordered:
-    return bhi(0);
-
-  case lir::JumpIfFloatGreaterOrEqualOrUnordered:
-    return bpl(0);
- 
-  default:
-    abort(con);
-  }
-}
-
-void
-conditional(Context* con, int32_t branch, lir::Constant* target)
-{
-  appendOffsetTask(con, target->value, offsetPromise(con));
-  emit(con, branch);
-}
-
-void
-branch(Context* con, lir::TernaryOperation op, lir::Constant* target)
-{
-  conditional(con, branch(con, op), target);
-}
-
-void
-branchLong(Context* con, lir::TernaryOperation op, lir::Operand* al,
-           lir::Operand* ah, lir::Operand* bl,
-           lir::Operand* bh, lir::Constant* target,
-           BinaryOperationType compareSigned,
-           BinaryOperationType compareUnsigned)
-{
-  compareSigned(con, 4, ah, 4, bh);
-
-  unsigned next = 0;
-  
-  switch (op) {
-  case lir::JumpIfEqual:
-  case lir::JumpIfFloatEqual:
-    next = con->code.length();
-    emit(con, bne(0));
-
-    compareSigned(con, 4, al, 4, bl);
-    conditional(con, beq(0), target);
-    break;
-
-  case lir::JumpIfNotEqual:
-  case lir::JumpIfFloatNotEqual:
-    conditional(con, bne(0), target);
-
-    compareSigned(con, 4, al, 4, bl);
-    conditional(con, bne(0), target);
-    break;
-
-  case lir::JumpIfLess:
-  case lir::JumpIfFloatLess:
-    conditional(con, blt(0), target);
-
-    next = con->code.length();
-    emit(con, bgt(0));
-
-    compareUnsigned(con, 4, al, 4, bl);
-    conditional(con, blo(0), target);
-    break;
-
-  case lir::JumpIfGreater:
-  case lir::JumpIfFloatGreater:
-    conditional(con, bgt(0), target);
-
-    next = con->code.length();
-    emit(con, blt(0));
-
-    compareUnsigned(con, 4, al, 4, bl);
-    conditional(con, bhi(0), target);
-    break;
-
-  case lir::JumpIfLessOrEqual:
-  case lir::JumpIfFloatLessOrEqual:
-    conditional(con, blt(0), target);
-
-    next = con->code.length();
-    emit(con, bgt(0));
-
-    compareUnsigned(con, 4, al, 4, bl);
-    conditional(con, bls(0), target);
-    break;
-
-  case lir::JumpIfGreaterOrEqual:
-  case lir::JumpIfFloatGreaterOrEqual:
-    conditional(con, bgt(0), target);
-
-    next = con->code.length();
-    emit(con, blt(0));
-
-    compareUnsigned(con, 4, al, 4, bl);
-    conditional(con, bhs(0), target);
-    break;
-
-  default:
-    abort(con);
-  }
-
-  if (next) {
-    updateOffset
-      (con->s, con->code.data + next, reinterpret_cast<intptr_t>
-       (con->code.data + con->code.length()));
-  }
-}
-
-void
-branchRR(Context* con, lir::TernaryOperation op, unsigned size,
-         lir::Register* a, lir::Register* b,
-         lir::Constant* target)
-{
-  if (!isFpr(a) && size > TargetBytesPerWord) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    branchLong(con, op, a, &ah, b, &bh, target, CAST2(compareRR),
-               CAST2(compareRR));
-  } else {
-    compareRR(con, size, a, size, b);
-    branch(con, op, target);
-  }
-}
-
-void
-branchCR(Context* con, lir::TernaryOperation op, unsigned size,
-         lir::Constant* a, lir::Register* b,
-         lir::Constant* target)
-{
-  assert(con, !isFloatBranch(op));
-
-  if (size > TargetBytesPerWord) {
-    int64_t v = a->value->value();
-
-    ResolvedPromise low(v & ~static_cast<target_uintptr_t>(0));
-    lir::Constant al(&low);
-
-    ResolvedPromise high((v >> 32) & ~static_cast<target_uintptr_t>(0));
-    lir::Constant ah(&high);
-
-    lir::Register bh(b->high);
-
-    branchLong(con, op, &al, &ah, b, &bh, target, CAST2(compareCR),
-               CAST2(compareCR));
-  } else {
-    compareCR(con, size, a, size, b);
-    branch(con, op, target);
-  }
-}
-
-void
-branchRM(Context* con, lir::TernaryOperation op, unsigned size,
-         lir::Register* a, lir::Memory* b,
-         lir::Constant* target)
-{
-  assert(con, !isFloatBranch(op));
-  assert(con, size <= TargetBytesPerWord);
-
-  compareRM(con, size, a, size, b);
-  branch(con, op, target);
-}
-
-void
-branchCM(Context* con, lir::TernaryOperation op, unsigned size,
-         lir::Constant* a, lir::Memory* b,
-         lir::Constant* target)
-{
-  assert(con, !isFloatBranch(op));
-  assert(con, size <= TargetBytesPerWord);
-
-  compareCM(con, size, a, size, b);
-  branch(con, op, target);
-}
-
-ShiftMaskPromise*
-shiftMaskPromise(Context* con, Promise* base, unsigned shift, int64_t mask)
-{
-  return new(con->zone) ShiftMaskPromise(base, shift, mask);
-}
-
-void
-moveCM(Context* con, unsigned srcSize, lir::Constant* src,
-       unsigned dstSize, lir::Memory* dst)
-{
-  switch (dstSize) {
-  case 8: {
-    lir::Constant srcHigh
-      (shiftMaskPromise(con, src->value, 32, 0xFFFFFFFF));
-    lir::Constant srcLow
-      (shiftMaskPromise(con, src->value, 0, 0xFFFFFFFF));
-    
-    lir::Memory dstLow
-      (dst->base, dst->offset + 4, dst->index, dst->scale);
-    
-    moveCM(con, 4, &srcLow, 4, &dstLow);
-    moveCM(con, 4, &srcHigh, 4, dst);
-  } break;
-
-  default:
-    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-    moveCR(con, srcSize, src, dstSize, &tmp);
-    moveRM(con, dstSize, &tmp, dstSize, dst);
-    con->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-negateRR(Context* con, unsigned srcSize, lir::Register* src,
-         unsigned dstSize UNUSED, lir::Register* dst)
-{
-  assert(con, srcSize == dstSize);
-
-  emit(con, mvn(dst->low, src->low));
-  emit(con, SETS(addi(dst->low, dst->low, 1)));
-  if (srcSize == 8) {
-    emit(con, mvn(dst->high, src->high));
-    emit(con, adci(dst->high, dst->high, 0));
-  }
-}
-
-void
-callR(Context* con, unsigned size UNUSED, lir::Register* target)
-{
-  assert(con, size == TargetBytesPerWord);
-  emit(con, blx(target->low));
-}
-
-void
-callC(Context* con, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  appendOffsetTask(con, target->value, offsetPromise(con));
-  emit(con, bl(0));
-}
-
-void
-longCallC(Context* con, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  lir::Register tmp(4);
-  moveCR2(con, TargetBytesPerWord, target, &tmp, offsetPromise(con));
-  callR(con, TargetBytesPerWord, &tmp);
-}
-
-void
-longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  lir::Register tmp(4); // a non-arg reg that we don't mind clobbering
-  moveCR2(con, TargetBytesPerWord, target, &tmp, offsetPromise(con));
-  jumpR(con, TargetBytesPerWord, &tmp);
-}
-
-void
-jumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  appendOffsetTask(con, target->value, offsetPromise(con));
-  emit(con, b(0));
-}
-
-void
-return_(Context* con)
-{
-  emit(con, bx(LinkRegister));
-}
-
-void
-trap(Context* con)
-{
-  emit(con, bkpt(0));
-}
-
-void
-memoryBarrier(Context*) {}
-
 // END OPERATION COMPILERS
 
 unsigned
@@ -1861,99 +138,6 @@ nextFrame(ArchitectureContext* con, uint32_t* start, unsigned size UNUSED,
   *stack = static_cast<void**>(*stack) + offset;
 }
 
-void
-populateTables(ArchitectureContext* con)
-{
-  const lir::OperandType C = lir::ConstantOperand;
-  const lir::OperandType A = lir::AddressOperand;
-  const lir::OperandType R = lir::RegisterOperand;
-  const lir::OperandType M = lir::MemoryOperand;
-
-  OperationType* zo = con->operations;
-  UnaryOperationType* uo = con->unaryOperations;
-  BinaryOperationType* bo = con->binaryOperations;
-  TernaryOperationType* to = con->ternaryOperations;
-  BranchOperationType* bro = con->branchOperations;
-
-  zo[lir::Return] = return_;
-  zo[lir::LoadBarrier] = memoryBarrier;
-  zo[lir::StoreStoreBarrier] = memoryBarrier;
-  zo[lir::StoreLoadBarrier] = memoryBarrier;
-  zo[lir::Trap] = trap;
-
-  uo[index(con, lir::LongCall, C)] = CAST1(longCallC);
-
-  uo[index(con, lir::AlignedLongCall, C)] = CAST1(longCallC);
-
-  uo[index(con, lir::LongJump, C)] = CAST1(longJumpC);
-
-  uo[index(con, lir::AlignedLongJump, C)] = CAST1(longJumpC);
-
-  uo[index(con, lir::Jump, R)] = CAST1(jumpR);
-  uo[index(con, lir::Jump, C)] = CAST1(jumpC);
-
-  uo[index(con, lir::AlignedJump, R)] = CAST1(jumpR);
-  uo[index(con, lir::AlignedJump, C)] = CAST1(jumpC);
-
-  uo[index(con, lir::Call, C)] = CAST1(callC);
-  uo[index(con, lir::Call, R)] = CAST1(callR);
-
-  uo[index(con, lir::AlignedCall, C)] = CAST1(callC);
-  uo[index(con, lir::AlignedCall, R)] = CAST1(callR);
-
-  bo[index(con, lir::Move, R, R)] = CAST2(moveRR);
-  bo[index(con, lir::Move, C, R)] = CAST2(moveCR);
-  bo[index(con, lir::Move, C, M)] = CAST2(moveCM);
-  bo[index(con, lir::Move, M, R)] = CAST2(moveMR);
-  bo[index(con, lir::Move, R, M)] = CAST2(moveRM);
-  bo[index(con, lir::Move, A, R)] = CAST2(moveAR);
-
-  bo[index(con, lir::MoveZ, R, R)] = CAST2(moveZRR);
-  bo[index(con, lir::MoveZ, M, R)] = CAST2(moveZMR);
-  bo[index(con, lir::MoveZ, C, R)] = CAST2(moveCR);
-
-  bo[index(con, lir::Negate, R, R)] = CAST2(negateRR);
-
-  bo[index(con, lir::FloatAbsolute, R, R)] = CAST2(floatAbsoluteRR);
-  bo[index(con, lir::FloatNegate, R, R)] = CAST2(floatNegateRR);
-  bo[index(con, lir::Float2Float, R, R)] = CAST2(float2FloatRR);
-  bo[index(con, lir::Float2Int, R, R)] = CAST2(float2IntRR);
-  bo[index(con, lir::Int2Float, R, R)] = CAST2(int2FloatRR);
-  bo[index(con, lir::FloatSquareRoot, R, R)] = CAST2(floatSqrtRR);
-
-  to[index(con, lir::Add, R)] = CAST3(addR);
-
-  to[index(con, lir::Subtract, R)] = CAST3(subR);
-
-  to[index(con, lir::Multiply, R)] = CAST3(multiplyR);
-
-  to[index(con, lir::FloatAdd, R)] = CAST3(floatAddR);
-  to[index(con, lir::FloatSubtract, R)] = CAST3(floatSubtractR);
-  to[index(con, lir::FloatMultiply, R)] = CAST3(floatMultiplyR);
-  to[index(con, lir::FloatDivide, R)] = CAST3(floatDivideR);
-
-  to[index(con, lir::ShiftLeft, R)] = CAST3(shiftLeftR);
-  to[index(con, lir::ShiftLeft, C)] = CAST3(shiftLeftC);
-
-  to[index(con, lir::ShiftRight, R)] = CAST3(shiftRightR);
-  to[index(con, lir::ShiftRight, C)] = CAST3(shiftRightC);
-
-  to[index(con, lir::UnsignedShiftRight, R)] = CAST3(unsignedShiftRightR);
-  to[index(con, lir::UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC);
-
-  to[index(con, lir::And, R)] = CAST3(andR);
-  to[index(con, lir::And, C)] = CAST3(andC);
-
-  to[index(con, lir::Or, R)] = CAST3(orR);
-
-  to[index(con, lir::Xor, R)] = CAST3(xorR);
-
-  bro[branchIndex(con, R, R)] = CAST_BRANCH(branchRR);
-  bro[branchIndex(con, C, R)] = CAST_BRANCH(branchCR);
-  bro[branchIndex(con, C, M)] = CAST_BRANCH(branchCM);
-  bro[branchIndex(con, R, M)] = CAST_BRANCH(branchRM);
-}
-
 class MyArchitecture: public Assembler::Architecture {
  public:
   MyArchitecture(System* system): con(system), referenceCount(0) {
diff --git a/src/codegen/arm/block.h b/src/codegen/arm/block.h
index 42f3cceaa3..cc634f7f75 100644
--- a/src/codegen/arm/block.h
+++ b/src/codegen/arm/block.h
@@ -13,7 +13,6 @@
 
 #include <avian/vm/codegen/lir.h>
 #include <avian/vm/codegen/assembler.h>
-#include "alloc-vector.h"
 
 namespace avian {
 namespace codegen {
diff --git a/src/codegen/arm/context.h b/src/codegen/arm/context.h
index ccba7e403d..a5388527ed 100644
--- a/src/codegen/arm/context.h
+++ b/src/codegen/arm/context.h
@@ -53,15 +53,6 @@ class Context {
   unsigned constantPoolCount;
 };
 
-class Task {
- public:
-  Task(Task* next): next(next) { }
-
-  virtual void run(Context* con) = 0;
-
-  Task* next;
-};
-
 typedef void (*OperationType)(Context*);
 
 typedef void (*UnaryOperationType)(Context*, unsigned, lir::Operand*);
diff --git a/src/codegen/arm/encode.h b/src/codegen/arm/encode.h
new file mode 100644
index 0000000000..d6d3e983b9
--- /dev/null
+++ b/src/codegen/arm/encode.h
@@ -0,0 +1,184 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_ENCODE_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_ENCODE_H
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+namespace isa {
+
+// SYSTEM REGISTERS
+const int FPSID = 0x0;
+const int FPSCR = 0x1;
+const int FPEXC = 0x8;
+// INSTRUCTION OPTIONS
+enum CONDITION { EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV };
+enum SHIFTOP { LSL, LSR, ASR, ROR };
+// INSTRUCTION FORMATS
+inline int DATA(int cond, int opcode, int S, int Rn, int Rd, int shift, int Sh, int Rm)
+{ return cond<<28 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | shift<<7 | Sh<<5 | Rm; }
+inline int DATAS(int cond, int opcode, int S, int Rn, int Rd, int Rs, int Sh, int Rm)
+{ return cond<<28 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | Rs<<8 | Sh<<5 | 1<<4 | Rm; }
+inline int DATAI(int cond, int opcode, int S, int Rn, int Rd, int rot, int imm)
+{ return cond<<28 | 1<<25 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | rot<<8 | (imm&0xff); }
+inline int BRANCH(int cond, int L, int offset)
+{ return cond<<28 | 5<<25 | L<<24 | (offset&0xffffff); }
+inline int BRANCHX(int cond, int L, int Rm)
+{ return cond<<28 | 0x4bffc<<6 | L<<5 | 1<<4 | Rm; }
+inline int MULTIPLY(int cond, int mul, int S, int Rd, int Rn, int Rs, int Rm)
+{ return cond<<28 | mul<<21 | S<<20 | Rd<<16 | Rn<<12 | Rs<<8 | 9<<4 | Rm; }
+inline int XFER(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int shift, int Sh, int Rm)
+{ return cond<<28 | 3<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | shift<<7 | Sh<<5 | Rm; }
+inline int XFERI(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int offset)
+{ return cond<<28 | 2<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | (offset&0xfff); }
+inline int XFER2(int cond, int P, int U, int W, int L, int Rn, int Rd, int S, int H, int Rm)
+{ return cond<<28 | P<<24 | U<<23 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | 1<<7 | S<<6 | H<<5 | 1<<4 | Rm; }
+inline int XFER2I(int cond, int P, int U, int W, int L, int Rn, int Rd, int offsetH, int S, int H, int offsetL)
+{ return cond<<28 | P<<24 | U<<23 | 1<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | offsetH<<8 | 1<<7 | S<<6 | H<<5 | 1<<4 | (offsetL&0xf); }
+inline int COOP(int cond, int opcode_1, int CRn, int CRd, int cp_num, int opcode_2, int CRm)
+{ return cond<<28 | 0xe<<24 | opcode_1<<20 | CRn<<16 | CRd<<12 | cp_num<<8 | opcode_2<<5 | CRm; }
+inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset) // offset is in words, not bytes
+{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff)>>2; }
+inline int COREG(int cond, int opcode_1, int L, int CRn, int Rd, int cp_num, int opcode_2, int CRm)
+{ return cond<<28 | 0xe<<24 | opcode_1<<21 | L<<20 | CRn<<16 | Rd<<12 | cp_num<<8 | opcode_2<<5 | 1<<4 | CRm; }
+inline int COREG2(int cond, int L, int Rn, int Rd, int cp_num, int opcode, int CRm)
+{ return cond<<28 | 0xc4<<20 | L<<20 | Rn<<16 | Rd<<12 | cp_num<<8 | opcode<<4 | CRm;}
+// FIELD CALCULATORS
+inline int calcU(int imm) { return imm >= 0 ? 1 : 0; }
+// INSTRUCTIONS
+// The "cond" and "S" fields are set using the SETCOND() and SETS() functions
+inline int b(int offset) { return BRANCH(AL, 0, offset); }
+inline int bl(int offset) { return BRANCH(AL, 1, offset); }
+inline int bx(int Rm) { return BRANCHX(AL, 0, Rm); }
+inline int blx(int Rm) { return BRANCHX(AL, 1, Rm); }
+inline int and_(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x0, 0, Rn, Rd, shift, Sh, Rm); }
+inline int eor(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x1, 0, Rn, Rd, shift, Sh, Rm); }
+inline int rsb(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x3, 0, Rn, Rd, shift, Sh, Rm); }
+inline int add(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x4, 0, Rn, Rd, shift, Sh, Rm); }
+inline int adc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x5, 0, Rn, Rd, shift, Sh, Rm); }
+inline int rsc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x7, 0, Rn, Rd, shift, Sh, Rm); }
+inline int cmp(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xa, 1, Rn, 0, shift, Sh, Rm); }
+inline int orr(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xc, 0, Rn, Rd, shift, Sh, Rm); }
+inline int mov(int Rd, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xd, 0, 0, Rd, shift, Sh, Rm); }
+inline int mvn(int Rd, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xf, 0, 0, Rd, shift, Sh, Rm); }
+inline int andi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x0, 0, Rn, Rd, rot, imm); }
+inline int subi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x2, 0, Rn, Rd, rot, imm); }
+inline int rsbi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x3, 0, Rn, Rd, rot, imm); }
+inline int addi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x4, 0, Rn, Rd, rot, imm); }
+inline int adci(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x5, 0, Rn, Rd, rot, imm); }
+inline int bici(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0xe, 0, Rn, Rd, rot, imm); }
+inline int cmpi(int Rn, int imm, int rot=0) { return DATAI(AL, 0xa, 1, Rn, 0, rot, imm); }
+inline int movi(int Rd, int imm, int rot=0) { return DATAI(AL, 0xd, 0, 0, Rd, rot, imm); }
+inline int orrsh(int Rd, int Rn, int Rm, int Rs, int Sh) { return DATAS(AL, 0xc, 0, Rn, Rd, Rs, Sh, Rm); }
+inline int movsh(int Rd, int Rm, int Rs, int Sh) { return DATAS(AL, 0xd, 0, 0, Rd, Rs, Sh, Rm); }
+inline int mul(int Rd, int Rm, int Rs) { return MULTIPLY(AL, 0, 0, Rd, 0, Rs, Rm); }
+inline int mla(int Rd, int Rm, int Rs, int Rn) { return MULTIPLY(AL, 1, 0, Rd, Rn, Rs, Rm); }
+inline int umull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 4, 0, RdHi, RdLo, Rs, Rm); }
+inline int ldr(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 1, Rn, Rd, 0, 0, Rm); }
+inline int ldri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 1, Rn, Rd, abs(imm)); }
+inline int ldrb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 1, Rn, Rd, 0, 0, Rm); }
+inline int ldrbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 1, Rn, Rd, abs(imm)); }
+inline int str(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 0, Rn, Rd, 0, 0, Rm); }
+inline int stri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 0, Rn, Rd, abs(imm)); }
+inline int strb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 0, Rn, Rd, 0, 0, Rm); }
+inline int strbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 0, Rn, Rd, abs(imm)); }
+inline int ldrh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 0, 1, Rm); }
+inline int ldrhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); }
+inline int strh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 0, Rn, Rd, 0, 1, Rm); }
+inline int strhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 0, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); }
+inline int ldrsh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 1, Rm); }
+inline int ldrshi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 1, abs(imm)&0xf); }
+inline int ldrsb(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 0, Rm); }
+inline int ldrsbi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 0, abs(imm)&0xf); }
+// breakpoint instruction, this really has its own instruction format
+inline int bkpt(int16_t immed) { return 0xe1200070 | (((unsigned)immed & 0xffff) >> 4 << 8) | (immed & 0xf); }
+// COPROCESSOR INSTRUCTIONS
+inline int mcr(int coproc, int opcode_1, int Rd, int CRn, int CRm, int opcode_2=0) { return COREG(AL, opcode_1, 0, CRn, Rd, coproc, opcode_2, CRm); }
+inline int mcrr(int coproc, int opcode, int Rd, int Rn, int CRm) { return COREG2(AL, 0, Rn, Rd, coproc, opcode, CRm); }
+inline int mrc(int coproc, int opcode_1, int Rd, int CRn, int CRm, int opcode_2=0) { return COREG(AL, opcode_1, 1, CRn, Rd, coproc, opcode_2, CRm); }
+inline int mrrc(int coproc, int opcode, int Rd, int Rn, int CRm) { return COREG2(AL, 1, Rn, Rd, coproc, opcode, CRm); }
+// VFP FLOATING-POINT INSTRUCTIONS
+inline int fmuls(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|2, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
+inline int fadds(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|3, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
+inline int fsubs(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|3, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1)|2, Sm>>1); }
+inline int fdivs(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|8, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
+inline int fmuld(int Dd, int Dn, int Dm) { return COOP(AL, 2, Dn, Dd, 11, 0, Dm); }
+inline int faddd(int Dd, int Dn, int Dm) { return COOP(AL, 3, Dn, Dd, 11, 0, Dm); }
+inline int fsubd(int Dd, int Dn, int Dm) { return COOP(AL, 3, Dn, Dd, 11, 2, Dm); }
+inline int fdivd(int Dd, int Dn, int Dm) { return COOP(AL, 8, Dn, Dd, 11, 0, Dm); }
+inline int fcpys(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
+inline int fabss(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
+inline int fnegs(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 1, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
+inline int fsqrts(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 1, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
+inline int fcmps(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 4, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
+inline int fcvtds(int Dd, int Sm) { return COOP(AL, 0xb, 7, Dd, 10, 6|(Sm&1), Sm>>1); }
+inline int fsitos(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 8, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
+inline int ftosizs(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0xd, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
+inline int fcpyd(int Dd, int Dm) { return COOP(AL, 0xb, 0, Dd, 11, 2, Dm); }
+inline int fabsd(int Dd, int Dm) { return COOP(AL, 0xb, 0, Dd, 11, 6, Dm); }
+inline int fnegd(int Dd, int Dm) { return COOP(AL, 0xb, 1, Dd, 11, 2, Dm); }
+inline int fsqrtd(int Dd, int Dm) { return COOP(AL, 0xb, 1, Dd, 11, 6, Dm); }
+// double-precision comparison instructions
+inline int fcmpd(int Dd, int Dm) { return COOP(AL, 0xb, 4, Dd, 11, 2, Dm); }
+// double-precision conversion instructions
+inline int fcvtsd(int Sd, int Dm) { return COOP(AL, 0xb|(Sd&1)<<2, 7, Sd>>1, 11, 6, Dm); }
+inline int fsitod(int Dd, int Sm) { return COOP(AL, 0xb, 8, Dd, 11, 6|(Sm&1), Sm>>1); }
+inline int ftosizd(int Sd, int Dm) { return COOP(AL, 0xb|(Sd&1)<<2, 0xd, Sd>>1, 11, 6, Dm); }
+// single load/store instructions for both precision types
+inline int flds(int Sd, int Rn, int offset=0) { return COXFER(AL, 1, 1, Sd&1, 0, 1, Rn, Sd>>1, 10, offset); };
+inline int fldd(int Dd, int Rn, int offset=0) { return COXFER(AL, 1, 1, 0, 0, 1, Rn, Dd, 11, offset); };
+inline int fsts(int Sd, int Rn, int offset=0) { return COXFER(AL, 1, 1, Sd&1, 0, 0, Rn, Sd>>1, 10, offset); };
+inline int fstd(int Dd, int Rn, int offset=0) { return COXFER(AL, 1, 1, 0, 0, 0, Rn, Dd, 11, offset); };
+// move between GPRs and FPRs
+inline int fmsr(int Sn, int Rd) { return mcr(10, 0, Rd, Sn>>1, 0, (Sn&1)<<2); }
+inline int fmrs(int Rd, int Sn) { return mrc(10, 0, Rd, Sn>>1, 0, (Sn&1)<<2); }
+// move to/from VFP system registers
+inline int fmrx(int Rd, int reg) { return mrc(10, 7, Rd, reg, 0); }
+// these move around pairs of single-precision registers
+inline int fmdrr(int Dm, int Rd, int Rn) { return mcrr(11, 1, Rd, Rn, Dm); }
+inline int fmrrd(int Rd, int Rn, int Dm) { return mrrc(11, 1, Rd, Rn, Dm); }
+// FLAG SETTERS
+inline int SETCOND(int ins, int cond) { return ((ins&0x0fffffff) | (cond<<28)); }
+inline int SETS(int ins) { return ins | 1<<20; }
+// PSEUDO-INSTRUCTIONS
+inline int lsl(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, LSL); }
+inline int lsli(int Rd, int Rm, int imm) { return mov(Rd, Rm, LSL, imm); }
+inline int lsr(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, LSR); }
+inline int lsri(int Rd, int Rm, int imm) { return mov(Rd, Rm, LSR, imm); }
+inline int asr(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, ASR); }
+inline int asri(int Rd, int Rm, int imm) { return mov(Rd, Rm, ASR, imm); }
+inline int beq(int offset) { return SETCOND(b(offset), EQ); }
+inline int bne(int offset) { return SETCOND(b(offset), NE); }
+inline int bls(int offset) { return SETCOND(b(offset), LS); }
+inline int bhi(int offset) { return SETCOND(b(offset), HI); }
+inline int blt(int offset) { return SETCOND(b(offset), LT); }
+inline int bgt(int offset) { return SETCOND(b(offset), GT); }
+inline int ble(int offset) { return SETCOND(b(offset), LE); }
+inline int bge(int offset) { return SETCOND(b(offset), GE); }
+inline int blo(int offset) { return SETCOND(b(offset), CC); }
+inline int bhs(int offset) { return SETCOND(b(offset), CS); }
+inline int bpl(int offset) { return SETCOND(b(offset), PL); }
+inline int fmstat() { return fmrx(15, FPSCR); }
+
+} // namespace isa
+
+inline void emit(Context* con, int code) { con->code.append4(code); }
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_ENCODE_H
diff --git a/src/codegen/arm/fixup.cpp b/src/codegen/arm/fixup.cpp
new file mode 100644
index 0000000000..2cf0b01216
--- /dev/null
+++ b/src/codegen/arm/fixup.cpp
@@ -0,0 +1,175 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "fixup.h"
+#include "block.h"
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+unsigned padding(MyBlock*, unsigned);
+
+OffsetPromise::OffsetPromise(Context* con, MyBlock* block, unsigned offset, bool forTrace):
+  con(con), block(block), offset(offset), forTrace(forTrace)
+{ }
+
+bool OffsetPromise::resolved() {
+  return block->start != static_cast<unsigned>(~0);
+}
+
+int64_t OffsetPromise::value() {
+  assert(con, resolved());
+
+  unsigned o = offset - block->offset;
+  return block->start + padding
+    (block, forTrace ? o - vm::TargetBytesPerWord : o) + o;
+}
+
+
+Promise* offsetPromise(Context* con, bool forTrace) {
+  return new(con->zone) OffsetPromise(con, con->lastBlock, con->code.length(), forTrace);
+}
+
+
+OffsetListener::OffsetListener(vm::System* s, uint8_t* instruction):
+  s(s),
+  instruction(instruction)
+{ }
+
+bool OffsetListener::resolve(int64_t value, void** location) {
+  void* p = updateOffset(s, instruction, value);
+  if (location) *location = p;
+  return false;
+}
+
+
+OffsetTask::OffsetTask(Task* next, Promise* promise, Promise* instructionOffset):
+  Task(next),
+  promise(promise),
+  instructionOffset(instructionOffset)
+{ }
+
+void OffsetTask::run(Context* con) {
+  if (promise->resolved()) {
+    updateOffset
+      (con->s, con->result + instructionOffset->value(), promise->value());
+  } else {
+    new (promise->listen(sizeof(OffsetListener)))
+      OffsetListener(con->s, con->result + instructionOffset->value());
+  }
+}
+
+void appendOffsetTask(Context* con, Promise* promise, Promise* instructionOffset) {
+  con->tasks = new(con->zone) OffsetTask(con->tasks, promise, instructionOffset);
+}
+
+bool bounded(int right, int left, int32_t v) {
+  return ((v << left) >> left) == v and ((v >> right) << right) == v;
+}
+
+void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) {
+  // ARM's PC is two words ahead, and branches drop the bottom 2 bits.
+  int32_t v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
+
+  int32_t mask;
+  expect(s, bounded(0, 8, v));
+  mask = 0xFFFFFF;
+
+  int32_t* p = reinterpret_cast<int32_t*>(instruction);
+  *p = (v & mask) | ((~mask) & *p);
+
+  return instruction + 4;
+}
+
+ConstantPoolEntry::ConstantPoolEntry(Context* con, Promise* constant, ConstantPoolEntry* next,
+                  Promise* callOffset):
+  con(con), constant(constant), next(next), callOffset(callOffset),
+  address(0)
+{ }
+
+int64_t ConstantPoolEntry::value() {
+  assert(con, resolved());
+
+  return reinterpret_cast<int64_t>(address);
+}
+
+bool ConstantPoolEntry::resolved() {
+  return address != 0;
+}
+
+ConstantPoolListener::ConstantPoolListener(vm::System* s, vm::target_uintptr_t* address,
+                     uint8_t* returnAddress):
+  s(s),
+  address(address),
+  returnAddress(returnAddress)
+{ }
+
+bool ConstantPoolListener::resolve(int64_t value, void** location) {
+  *address = value;
+  if (location) {
+    *location = returnAddress ? static_cast<void*>(returnAddress) : address;
+  }
+  return true;
+}
+
+PoolOffset::PoolOffset(MyBlock* block, ConstantPoolEntry* entry, unsigned offset):
+  block(block), entry(entry), next(0), offset(offset)
+{ }
+
+PoolEvent::PoolEvent(PoolOffset* poolOffsetHead, PoolOffset* poolOffsetTail,
+          unsigned offset):
+  poolOffsetHead(poolOffsetHead), poolOffsetTail(poolOffsetTail), next(0),
+  offset(offset)
+{ }
+
+void appendConstantPoolEntry(Context* con, Promise* constant, Promise* callOffset) {
+  if (constant->resolved()) {
+    // make a copy, since the original might be allocated on the
+    // stack, and we need our copy to live until assembly is complete
+    constant = new(con->zone) ResolvedPromise(constant->value());
+  }
+
+  con->constantPool = new(con->zone) ConstantPoolEntry(con, constant, con->constantPool, callOffset);
+
+  ++ con->constantPoolCount;
+
+  PoolOffset* o = new(con->zone) PoolOffset(con->lastBlock, con->constantPool, con->code.length() - con->lastBlock->offset);
+
+  if (DebugPool) {
+    fprintf(stderr, "add pool offset %p %d to block %p\n",
+            o, o->offset, con->lastBlock);
+  }
+
+  if (con->lastBlock->poolOffsetTail) {
+    con->lastBlock->poolOffsetTail->next = o;
+  } else {
+    con->lastBlock->poolOffsetHead = o;
+  }
+  con->lastBlock->poolOffsetTail = o;
+}
+
+void appendPoolEvent(Context* con, MyBlock* b, unsigned offset, PoolOffset* head,
+                PoolOffset* tail)
+{
+  PoolEvent* e = new(con->zone) PoolEvent(head, tail, offset);
+
+  if (b->poolEventTail) {
+    b->poolEventTail->next = e;
+  } else {
+    b->poolEventHead = e;
+  }
+  b->poolEventTail = e;
+}
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/arm/fixup.h b/src/codegen/arm/fixup.h
new file mode 100644
index 0000000000..77abf003bf
--- /dev/null
+++ b/src/codegen/arm/fixup.h
@@ -0,0 +1,140 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_PROMISE_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_PROMISE_H
+
+#include "target.h"
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+#include "alloc-vector.h"
+
+namespace vm {
+class System;
+}
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+const bool DebugPool = false;
+
+const int32_t PoolOffsetMask = 0xFFF;
+
+class Task {
+ public:
+  Task(Task* next): next(next) { }
+
+  virtual void run(Context* con) = 0;
+
+  Task* next;
+};
+
+class OffsetPromise: public Promise {
+ public:
+  OffsetPromise(Context* con, MyBlock* block, unsigned offset, bool forTrace);
+
+  virtual bool resolved();
+  
+  virtual int64_t value();
+
+  Context* con;
+  MyBlock* block;
+  unsigned offset;
+  bool forTrace;
+};
+
+Promise* offsetPromise(Context* con, bool forTrace = false);
+
+class OffsetListener: public Promise::Listener {
+ public:
+  OffsetListener(vm::System* s, uint8_t* instruction);
+
+  virtual bool resolve(int64_t value, void** location);
+
+  vm::System* s;
+  uint8_t* instruction;
+};
+
+class OffsetTask: public Task {
+ public:
+  OffsetTask(Task* next, Promise* promise, Promise* instructionOffset);
+
+  virtual void run(Context* con);
+
+  Promise* promise;
+  Promise* instructionOffset;
+};
+
+void appendOffsetTask(Context* con, Promise* promise, Promise* instructionOffset);
+
+void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value);
+
+class ConstantPoolEntry: public Promise {
+ public:
+  ConstantPoolEntry(Context* con, Promise* constant, ConstantPoolEntry* next,
+                    Promise* callOffset);
+
+  virtual int64_t value();
+
+  virtual bool resolved();
+
+  Context* con;
+  Promise* constant;
+  ConstantPoolEntry* next;
+  Promise* callOffset;
+  void* address;
+  unsigned constantPoolCount;
+};
+
+class ConstantPoolListener: public Promise::Listener {
+ public:
+  ConstantPoolListener(vm::System* s, vm::target_uintptr_t* address,
+                       uint8_t* returnAddress);
+
+  virtual bool resolve(int64_t value, void** location);
+
+  vm::System* s;
+  vm::target_uintptr_t* address;
+  uint8_t* returnAddress;
+};
+
+class PoolOffset {
+ public:
+  PoolOffset(MyBlock* block, ConstantPoolEntry* entry, unsigned offset);
+
+  MyBlock* block;
+  ConstantPoolEntry* entry;
+  PoolOffset* next;
+  unsigned offset;
+};
+
+class PoolEvent {
+ public:
+  PoolEvent(PoolOffset* poolOffsetHead, PoolOffset* poolOffsetTail,
+            unsigned offset);
+
+  PoolOffset* poolOffsetHead;
+  PoolOffset* poolOffsetTail;
+  PoolEvent* next;
+  unsigned offset;
+};
+
+void appendConstantPoolEntry(Context* con, Promise* constant, Promise* callOffset);
+
+void appendPoolEvent(Context* con, MyBlock* b, unsigned offset, PoolOffset* head,
+                PoolOffset* tail);
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_PROMISE_H
diff --git a/src/codegen/arm/multimethod.cpp b/src/codegen/arm/multimethod.cpp
new file mode 100644
index 0000000000..76c681a60f
--- /dev/null
+++ b/src/codegen/arm/multimethod.cpp
@@ -0,0 +1,142 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "multimethod.h"
+#include "operations.h"
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
+{
+  return operation + (lir::UnaryOperationCount * operand);
+}
+
+unsigned index(ArchitectureContext*,
+      lir::BinaryOperation operation,
+      lir::OperandType operand1,
+      lir::OperandType operand2)
+{
+  return operation
+    + (lir::BinaryOperationCount * operand1)
+    + (lir::BinaryOperationCount * lir::OperandTypeCount * operand2);
+}
+
+unsigned index(ArchitectureContext* con UNUSED,
+      lir::TernaryOperation operation,
+      lir::OperandType operand1)
+{
+  assert(con, not isBranch(operation));
+
+  return operation + (lir::NonBranchTernaryOperationCount * operand1);
+}
+
+unsigned branchIndex(ArchitectureContext* con UNUSED, lir::OperandType operand1,
+            lir::OperandType operand2)
+{
+  return operand1 + (lir::OperandTypeCount * operand2);
+}
+
+void populateTables(ArchitectureContext* con) {
+  const lir::OperandType C = lir::ConstantOperand;
+  const lir::OperandType A = lir::AddressOperand;
+  const lir::OperandType R = lir::RegisterOperand;
+  const lir::OperandType M = lir::MemoryOperand;
+
+  OperationType* zo = con->operations;
+  UnaryOperationType* uo = con->unaryOperations;
+  BinaryOperationType* bo = con->binaryOperations;
+  TernaryOperationType* to = con->ternaryOperations;
+  BranchOperationType* bro = con->branchOperations;
+
+  zo[lir::Return] = return_;
+  zo[lir::LoadBarrier] = memoryBarrier;
+  zo[lir::StoreStoreBarrier] = memoryBarrier;
+  zo[lir::StoreLoadBarrier] = memoryBarrier;
+  zo[lir::Trap] = trap;
+
+  uo[index(con, lir::LongCall, C)] = CAST1(longCallC);
+
+  uo[index(con, lir::AlignedLongCall, C)] = CAST1(longCallC);
+
+  uo[index(con, lir::LongJump, C)] = CAST1(longJumpC);
+
+  uo[index(con, lir::AlignedLongJump, C)] = CAST1(longJumpC);
+
+  uo[index(con, lir::Jump, R)] = CAST1(jumpR);
+  uo[index(con, lir::Jump, C)] = CAST1(jumpC);
+
+  uo[index(con, lir::AlignedJump, R)] = CAST1(jumpR);
+  uo[index(con, lir::AlignedJump, C)] = CAST1(jumpC);
+
+  uo[index(con, lir::Call, C)] = CAST1(callC);
+  uo[index(con, lir::Call, R)] = CAST1(callR);
+
+  uo[index(con, lir::AlignedCall, C)] = CAST1(callC);
+  uo[index(con, lir::AlignedCall, R)] = CAST1(callR);
+
+  bo[index(con, lir::Move, R, R)] = CAST2(moveRR);
+  bo[index(con, lir::Move, C, R)] = CAST2(moveCR);
+  bo[index(con, lir::Move, C, M)] = CAST2(moveCM);
+  bo[index(con, lir::Move, M, R)] = CAST2(moveMR);
+  bo[index(con, lir::Move, R, M)] = CAST2(moveRM);
+  bo[index(con, lir::Move, A, R)] = CAST2(moveAR);
+
+  bo[index(con, lir::MoveZ, R, R)] = CAST2(moveZRR);
+  bo[index(con, lir::MoveZ, M, R)] = CAST2(moveZMR);
+  bo[index(con, lir::MoveZ, C, R)] = CAST2(moveCR);
+
+  bo[index(con, lir::Negate, R, R)] = CAST2(negateRR);
+
+  bo[index(con, lir::FloatAbsolute, R, R)] = CAST2(floatAbsoluteRR);
+  bo[index(con, lir::FloatNegate, R, R)] = CAST2(floatNegateRR);
+  bo[index(con, lir::Float2Float, R, R)] = CAST2(float2FloatRR);
+  bo[index(con, lir::Float2Int, R, R)] = CAST2(float2IntRR);
+  bo[index(con, lir::Int2Float, R, R)] = CAST2(int2FloatRR);
+  bo[index(con, lir::FloatSquareRoot, R, R)] = CAST2(floatSqrtRR);
+
+  to[index(con, lir::Add, R)] = CAST3(addR);
+
+  to[index(con, lir::Subtract, R)] = CAST3(subR);
+
+  to[index(con, lir::Multiply, R)] = CAST3(multiplyR);
+
+  to[index(con, lir::FloatAdd, R)] = CAST3(floatAddR);
+  to[index(con, lir::FloatSubtract, R)] = CAST3(floatSubtractR);
+  to[index(con, lir::FloatMultiply, R)] = CAST3(floatMultiplyR);
+  to[index(con, lir::FloatDivide, R)] = CAST3(floatDivideR);
+
+  to[index(con, lir::ShiftLeft, R)] = CAST3(shiftLeftR);
+  to[index(con, lir::ShiftLeft, C)] = CAST3(shiftLeftC);
+
+  to[index(con, lir::ShiftRight, R)] = CAST3(shiftRightR);
+  to[index(con, lir::ShiftRight, C)] = CAST3(shiftRightC);
+
+  to[index(con, lir::UnsignedShiftRight, R)] = CAST3(unsignedShiftRightR);
+  to[index(con, lir::UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC);
+
+  to[index(con, lir::And, R)] = CAST3(andR);
+  to[index(con, lir::And, C)] = CAST3(andC);
+
+  to[index(con, lir::Or, R)] = CAST3(orR);
+
+  to[index(con, lir::Xor, R)] = CAST3(xorR);
+
+  bro[branchIndex(con, R, R)] = CAST_BRANCH(branchRR);
+  bro[branchIndex(con, C, R)] = CAST_BRANCH(branchCR);
+  bro[branchIndex(con, C, M)] = CAST_BRANCH(branchCM);
+  bro[branchIndex(con, R, M)] = CAST_BRANCH(branchRM);
+}
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/arm/multimethod.h b/src/codegen/arm/multimethod.h
new file mode 100644
index 0000000000..7c574b588c
--- /dev/null
+++ b/src/codegen/arm/multimethod.h
@@ -0,0 +1,46 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_MULTIMETHOD_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_MULTIMETHOD_H
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+
+#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
+#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
+#define CAST3(x) reinterpret_cast<TernaryOperationType>(x)
+#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand);
+
+unsigned index(ArchitectureContext*,
+      lir::BinaryOperation operation,
+      lir::OperandType operand1,
+      lir::OperandType operand2);
+
+unsigned index(ArchitectureContext* con UNUSED,
+      lir::TernaryOperation operation,
+      lir::OperandType operand1);
+
+unsigned branchIndex(ArchitectureContext* con UNUSED, lir::OperandType operand1,
+            lir::OperandType operand2);
+
+void populateTables(ArchitectureContext* con);
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_MULTIMETHOD_H
diff --git a/src/codegen/arm/operations.cpp b/src/codegen/arm/operations.cpp
new file mode 100644
index 0000000000..b896a88f00
--- /dev/null
+++ b/src/codegen/arm/operations.cpp
@@ -0,0 +1,1235 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "operations.h"
+#include "encode.h"
+#include "block.h"
+#include "fixup.h"
+#include "multimethod.h"
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+using namespace isa;
+using namespace avian::util;
+
+inline bool isOfWidth(int64_t i, int size) { return static_cast<uint64_t>(i) >> size == 0; }
+
+inline unsigned lo8(int64_t i) { return (unsigned)(i&MASK_LO8); }
+
+void andC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void shiftLeftR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
+{
+  if (size == 8) {
+    int tmp1 = newTemp(con), tmp2 = newTemp(con), tmp3 = newTemp(con);
+    ResolvedPromise maskPromise(0x3F);
+    lir::Constant mask(&maskPromise);
+    lir::Register dst(tmp3);
+    andC(con, 4, &mask, a, &dst);
+    emit(con, lsl(tmp1, b->high, tmp3));
+    emit(con, rsbi(tmp2, tmp3, 32));
+    emit(con, orrsh(tmp1, tmp1, b->low, tmp2, LSR));
+    emit(con, SETS(subi(t->high, tmp3, 32)));
+    emit(con, SETCOND(mov(t->high, tmp1), MI));
+    emit(con, SETCOND(lsl(t->high, b->low, t->high), PL));
+    emit(con, lsl(t->low, b->low, tmp3));
+    freeTemp(con, tmp1); freeTemp(con, tmp2); freeTemp(con, tmp3);
+  } else {
+    int tmp = newTemp(con);
+    ResolvedPromise maskPromise(0x1F);
+    lir::Constant mask(&maskPromise);
+    lir::Register dst(tmp);
+    andC(con, size, &mask, a, &dst);
+    emit(con, lsl(t->low, b->low, tmp));
+    freeTemp(con, tmp);
+  }
+}
+
+void moveRR(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst);
+
+void shiftLeftC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  if (getValue(a) & 0x1F) {
+    emit(con, lsli(t->low, b->low, getValue(a) & 0x1F));
+  } else {
+    moveRR(con, size, b, size, t);
+  }
+}
+
+void shiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
+{
+  if (size == 8) {
+    int tmp1 = newTemp(con), tmp2 = newTemp(con), tmp3 = newTemp(con);
+    ResolvedPromise maskPromise(0x3F);
+    lir::Constant mask(&maskPromise);
+    lir::Register dst(tmp3);
+    andC(con, 4, &mask, a, &dst);
+    emit(con, lsr(tmp1, b->low, tmp3));
+    emit(con, rsbi(tmp2, tmp3, 32));
+    emit(con, orrsh(tmp1, tmp1, b->high, tmp2, LSL));
+    emit(con, SETS(subi(t->low, tmp3, 32)));
+    emit(con, SETCOND(mov(t->low, tmp1), MI));
+    emit(con, SETCOND(asr(t->low, b->high, t->low), PL));
+    emit(con, asr(t->high, b->high, tmp3));
+    freeTemp(con, tmp1); freeTemp(con, tmp2); freeTemp(con, tmp3);
+  } else {
+    int tmp = newTemp(con);
+    ResolvedPromise maskPromise(0x1F);
+    lir::Constant mask(&maskPromise);
+    lir::Register dst(tmp);
+    andC(con, size, &mask, a, &dst);
+    emit(con, asr(t->low, b->low, tmp));
+    freeTemp(con, tmp);
+  }
+}
+
+void shiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  if (getValue(a) & 0x1F) {
+    emit(con, asri(t->low, b->low, getValue(a) & 0x1F));
+  } else {
+    moveRR(con, size, b, size, t);
+  }
+}
+
+void unsignedShiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
+{
+  int tmpShift = newTemp(con);
+  ResolvedPromise maskPromise(size == 8 ? 0x3F : 0x1F);
+  lir::Constant mask(&maskPromise);
+  lir::Register dst(tmpShift);
+  andC(con, 4, &mask, a, &dst);
+  emit(con, lsr(t->low, b->low, tmpShift));
+  if (size == 8) {
+    int tmpHi = newTemp(con), tmpLo = newTemp(con);
+    emit(con, SETS(rsbi(tmpHi, tmpShift, 32)));
+    emit(con, lsl(tmpLo, b->high, tmpHi));
+    emit(con, orr(t->low, t->low, tmpLo));
+    emit(con, addi(tmpHi, tmpShift, -32));
+    emit(con, lsr(tmpLo, b->high, tmpHi));
+    emit(con, orr(t->low, t->low, tmpLo));
+    emit(con, lsr(t->high, b->high, tmpShift));
+    freeTemp(con, tmpHi); freeTemp(con, tmpLo);
+  }
+  freeTemp(con, tmpShift);
+}
+
+void unsignedShiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  if (getValue(a) & 0x1F) {
+    emit(con, lsri(t->low, b->low, getValue(a) & 0x1F));
+  } else {
+    moveRR(con, size, b, size, t);
+  }
+}
+
+bool
+needJump(MyBlock* b)
+{
+  return b->next or b->size != (b->size & PoolOffsetMask);
+}
+
+unsigned
+padding(MyBlock* b, unsigned offset)
+{
+  unsigned total = 0;
+  for (PoolEvent* e = b->poolEventHead; e; e = e->next) {
+    if (e->offset <= offset) {
+      if (needJump(b)) {
+        total += vm::TargetBytesPerWord;
+      }
+      for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) {
+        total += vm::TargetBytesPerWord;
+      }
+    } else {
+      break;
+    }
+  }
+  return total;
+}
+
+void resolve(MyBlock* b)
+{
+  Context* con = b->context;
+
+  if (b->poolOffsetHead) {
+    if (con->poolOffsetTail) {
+      con->poolOffsetTail->next = b->poolOffsetHead;
+    } else {
+      con->poolOffsetHead = b->poolOffsetHead;
+    }
+    con->poolOffsetTail = b->poolOffsetTail;
+  }
+
+  if (con->poolOffsetHead) {
+    bool append;
+    if (b->next == 0 or b->next->poolEventHead) {
+      append = true;
+    } else {
+      int32_t v = (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8)
+        - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
+
+      append = (v != (v & PoolOffsetMask));
+
+      if (DebugPool) {
+        fprintf(stderr,
+                "current %p %d %d next %p %d %d\n",
+                b, b->start, b->size, b->next, b->start + b->size,
+                b->next->size);
+        fprintf(stderr,
+                "offset %p %d is of distance %d to next block; append? %d\n",
+                con->poolOffsetHead, con->poolOffsetHead->offset, v, append);
+      }
+    }
+
+    if (append) {
+#ifndef NDEBUG
+      int32_t v = (b->start + b->size - 8)
+        - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
+      
+      expect(con, v == (v & PoolOffsetMask));
+#endif // not NDEBUG
+
+      appendPoolEvent(con, b, b->size, con->poolOffsetHead, con->poolOffsetTail);
+
+      if (DebugPool) {
+        for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) {
+          fprintf(stderr,
+                  "include %p %d in pool event %p at offset %d in block %p\n",
+                  o, o->offset, b->poolEventTail, b->size, b);
+        }
+      }
+
+      con->poolOffsetHead = 0;
+      con->poolOffsetTail = 0;
+    }
+  }
+}
+
+void jumpR(Context* con, unsigned size UNUSED, lir::Register* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  emit(con, bx(target->low));
+}
+
+void swapRR(Context* con, unsigned aSize, lir::Register* a,
+       unsigned bSize, lir::Register* b)
+{
+  assert(con, aSize == vm::TargetBytesPerWord);
+  assert(con, bSize == vm::TargetBytesPerWord);
+
+  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+  moveRR(con, aSize, a, bSize, &tmp);
+  moveRR(con, bSize, b, aSize, a);
+  moveRR(con, bSize, &tmp, bSize, b);
+  con->client->releaseTemporary(tmp.low);
+}
+
+void moveRR(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst)
+{
+  bool srcIsFpr = isFpr(src);
+  bool dstIsFpr = isFpr(dst);
+  if (srcIsFpr || dstIsFpr) {   // FPR(s) involved
+    assert(con, srcSize == dstSize);
+    const bool dprec = srcSize == 8;
+    if (srcIsFpr && dstIsFpr) { // FPR to FPR
+      if (dprec) emit(con, fcpyd(fpr64(dst), fpr64(src))); // double
+      else       emit(con, fcpys(fpr32(dst), fpr32(src))); // single
+    } else if (srcIsFpr) {      // FPR to GPR
+      if (dprec) emit(con, fmrrd(dst->low, dst->high, fpr64(src)));
+      else       emit(con, fmrs(dst->low, fpr32(src)));
+    } else {                    // GPR to FPR
+      if (dprec) emit(con, fmdrr(fpr64(dst->low), src->low, src->high));
+      else       emit(con, fmsr(fpr32(dst), src->low));
+    }
+    return;
+  }
+
+  switch (srcSize) {
+  case 1:
+    emit(con, lsli(dst->low, src->low, 24));
+    emit(con, asri(dst->low, dst->low, 24));
+    break;
+
+  case 2:
+    emit(con, lsli(dst->low, src->low, 16));
+    emit(con, asri(dst->low, dst->low, 16));
+    break;
+
+  case 4:
+  case 8:
+    if (srcSize == 4 and dstSize == 8) {
+      moveRR(con, 4, src, 4, dst);
+      emit(con, asri(dst->high, src->low, 31));
+    } else if (srcSize == 8 and dstSize == 8) {
+      lir::Register srcHigh(src->high);
+      lir::Register dstHigh(dst->high);
+
+      if (src->high == dst->low) {
+        if (src->low == dst->high) {
+          swapRR(con, 4, src, 4, dst);
+        } else {
+          moveRR(con, 4, &srcHigh, 4, &dstHigh);
+          moveRR(con, 4, src, 4, dst);
+        }
+      } else {
+        moveRR(con, 4, src, 4, dst);
+        moveRR(con, 4, &srcHigh, 4, &dstHigh);
+      }
+    } else if (src->low != dst->low) {
+      emit(con, mov(dst->low, src->low));
+    }
+    break;
+
+  default: abort(con);
+  }
+}
+
+void moveZRR(Context* con, unsigned srcSize, lir::Register* src,
+        unsigned, lir::Register* dst)
+{
+  switch (srcSize) {
+  case 2:
+    emit(con, lsli(dst->low, src->low, 16));
+    emit(con, lsri(dst->low, dst->low, 16));
+    break;
+
+  default: abort(con);
+  }
+}
+
+void moveCR(Context* con, unsigned size, lir::Constant* src,
+            unsigned, lir::Register* dst);
+
+void moveCR2(Context* con, unsigned size, lir::Constant* src,
+        lir::Register* dst, Promise* callOffset)
+{
+  if (isFpr(dst)) { // floating-point
+    lir::Register tmp = size > 4 ? makeTemp64(con) :
+                                         makeTemp(con);
+    moveCR(con, size, src, size, &tmp);
+    moveRR(con, size, &tmp, size, dst);
+    freeTemp(con, tmp);
+  } else if (size > 4) { 
+    uint64_t value = (uint64_t)src->value->value();
+    ResolvedPromise loBits(value & MASK_LO32);
+    lir::Constant srcLo(&loBits);
+    ResolvedPromise hiBits(value >> 32); 
+    lir::Constant srcHi(&hiBits);
+    lir::Register dstHi(dst->high);
+    moveCR(con, 4, &srcLo, 4, dst);
+    moveCR(con, 4, &srcHi, 4, &dstHi);
+  } else if (src->value->resolved() and isOfWidth(getValue(src), 8)) {
+    emit(con, movi(dst->low, lo8(getValue(src)))); // fits in immediate
+  } else {
+    appendConstantPoolEntry(con, src->value, callOffset);
+    emit(con, ldri(dst->low, ProgramCounter, 0)); // load 32 bits
+  }
+}
+
+void moveCR(Context* con, unsigned size, lir::Constant* src,
+       unsigned, lir::Register* dst)
+{
+  moveCR2(con, size, src, dst, 0);
+}
+
+void addR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, SETS(add(t->low, a->low, b->low)));
+    emit(con, adc(t->high, a->high, b->high));
+  } else {
+    emit(con, add(t->low, a->low, b->low));
+  }
+}
+
+void subR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, SETS(rsb(t->low, a->low, b->low)));
+    emit(con, rsc(t->high, a->high, b->high));
+  } else {
+    emit(con, rsb(t->low, a->low, b->low));
+  }
+}
+
+void addC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  int32_t v = a->value->value();
+  if (v) {
+    if (v > 0 and v < 256) {
+      emit(con, addi(dst->low, b->low, v));
+    } else if (v > 0 and v < 1024 and v % 4 == 0) {
+      emit(con, addi(dst->low, b->low, v >> 2, 15));
+    } else {
+      // todo
+      abort(con);
+    }
+  } else {
+    moveRR(con, size, b, size, dst);
+  }
+}
+
+void subC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  int32_t v = a->value->value();
+  if (v) {
+    if (v > 0 and v < 256) {
+      emit(con, subi(dst->low, b->low, v));
+    } else if (v > 0 and v < 1024 and v % 4 == 0) {
+      emit(con, subi(dst->low, b->low, v >> 2, 15));
+    } else {
+      // todo
+      abort(con);
+    }
+  } else {
+    moveRR(con, size, b, size, dst);
+  }
+}
+
+void multiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    bool useTemporaries = b->low == t->low;
+    int tmpLow  = useTemporaries ? con->client->acquireTemporary(GPR_MASK) : t->low;
+    int tmpHigh = useTemporaries ? con->client->acquireTemporary(GPR_MASK) : t->high;
+
+    emit(con, umull(tmpLow, tmpHigh, a->low, b->low));
+    emit(con, mla(tmpHigh, a->low, b->high, tmpHigh));
+    emit(con, mla(tmpHigh, a->high, b->low, tmpHigh));
+
+    if (useTemporaries) {
+      emit(con, mov(t->low, tmpLow));
+      emit(con, mov(t->high, tmpHigh));
+      con->client->releaseTemporary(tmpLow);
+      con->client->releaseTemporary(tmpHigh);
+    }
+  } else {
+    emit(con, mul(t->low, a->low, b->low));
+  }
+}
+
+void floatAbsoluteRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  if (size == 8) {
+    emit(con, fabsd(fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fabss(fpr32(b), fpr32(a)));
+  }
+}
+
+void floatNegateRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  if (size == 8) {
+    emit(con, fnegd(fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fnegs(fpr32(b), fpr32(a)));
+  }
+}
+
+void float2FloatRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  if (size == 8) {
+    emit(con, fcvtsd(fpr32(b), fpr64(a)));
+  } else {
+    emit(con, fcvtds(fpr64(b), fpr32(a)));
+  }
+}
+
+void float2IntRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  int tmp = newTemp(con, FPR_MASK);
+  int ftmp = fpr32(tmp);
+  if (size == 8) { // double to int
+    emit(con, ftosizd(ftmp, fpr64(a)));
+  } else {         // float to int
+    emit(con, ftosizs(ftmp, fpr32(a)));
+  }                // else thunked
+  emit(con, fmrs(b->low, ftmp));
+  freeTemp(con, tmp);
+}
+
+void int2FloatRR(Context* con, unsigned, lir::Register* a, unsigned size, lir::Register* b) {
+  emit(con, fmsr(fpr32(b), a->low));
+  if (size == 8) { // int to double
+    emit(con, fsitod(fpr64(b), fpr32(b)));
+  } else {         // int to float
+    emit(con, fsitos(fpr32(b), fpr32(b)));
+  }                // else thunked
+}
+
+void floatSqrtRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  if (size == 8) {
+    emit(con, fsqrtd(fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fsqrts(fpr32(b), fpr32(a)));
+  }
+}
+
+void floatAddR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, faddd(fpr64(t), fpr64(a), fpr64(b)));
+  } else {
+    emit(con, fadds(fpr32(t), fpr32(a), fpr32(b)));
+  }
+}
+
+void floatSubtractR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, fsubd(fpr64(t), fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fsubs(fpr32(t), fpr32(b), fpr32(a)));
+  }
+}
+
+void floatMultiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, fmuld(fpr64(t), fpr64(a), fpr64(b)));
+  } else {
+    emit(con, fmuls(fpr32(t), fpr32(a), fpr32(b)));
+  }
+}
+
+void floatDivideR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) { 
+    emit(con, fdivd(fpr64(t), fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fdivs(fpr32(t), fpr32(b), fpr32(a)));
+  }
+}
+
+int normalize(Context* con, int offset, int index, unsigned scale, 
+          bool* preserveIndex, bool* release)
+{
+  if (offset != 0 or scale != 1) {
+    lir::Register normalizedIndex
+      (*preserveIndex ? con->client->acquireTemporary(GPR_MASK) : index);
+    
+    if (*preserveIndex) {
+      *release = true;
+      *preserveIndex = false;
+    } else {
+      *release = false;
+    }
+
+    int scaled;
+
+    if (scale != 1) {
+      lir::Register unscaledIndex(index);
+
+      ResolvedPromise scalePromise(log(scale));
+      lir::Constant scaleConstant(&scalePromise);
+      
+      shiftLeftC(con, vm::TargetBytesPerWord, &scaleConstant,
+                 &unscaledIndex, &normalizedIndex);
+
+      scaled = normalizedIndex.low;
+    } else {
+      scaled = index;
+    }
+
+    if (offset != 0) {
+      lir::Register untranslatedIndex(scaled);
+
+      ResolvedPromise offsetPromise(offset);
+      lir::Constant offsetConstant(&offsetPromise);
+
+      lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+      moveCR(con, vm::TargetBytesPerWord, &offsetConstant, vm::TargetBytesPerWord, &tmp);
+      addR(con, vm::TargetBytesPerWord, &tmp, &untranslatedIndex, &normalizedIndex);
+      con->client->releaseTemporary(tmp.low);
+    }
+
+    return normalizedIndex.low;
+  } else {
+    *release = false;
+    return index;
+  }
+}
+
+void store(Context* con, unsigned size, lir::Register* src,
+      int base, int offset, int index, unsigned scale, bool preserveIndex)
+{
+  if (index != lir::NoRegister) {
+    bool release;
+    int normalized = normalize
+      (con, offset, index, scale, &preserveIndex, &release);
+
+    if (!isFpr(src)) { // GPR store
+      switch (size) {
+      case 1:
+        emit(con, strb(src->low, base, normalized));
+        break;
+
+      case 2:
+        emit(con, strh(src->low, base, normalized));
+        break;
+
+      case 4:
+        emit(con, str(src->low, base, normalized));
+        break;
+
+      case 8: { // split into 2 32-bit stores
+        lir::Register srcHigh(src->high);
+        store(con, 4, &srcHigh, base, 0, normalized, 1, preserveIndex);
+        store(con, 4, src, base, 4, normalized, 1, preserveIndex);
+      } break;
+
+      default: abort(con);
+      }
+    } else { // FPR store
+      lir::Register base_(base),
+                          normalized_(normalized),
+                          absAddr = makeTemp(con);
+      // FPR stores have only bases, so we must add the index
+      addR(con, vm::TargetBytesPerWord, &base_, &normalized_, &absAddr);
+      // double-precision
+      if (size == 8) emit(con, fstd(fpr64(src), absAddr.low));
+      // single-precision
+      else           emit(con, fsts(fpr32(src), absAddr.low));
+      freeTemp(con, absAddr);
+    }
+
+    if (release) con->client->releaseTemporary(normalized);
+  } else if (size == 8
+             or abs(offset) == (abs(offset) & 0xFF)
+             or (size != 2 and abs(offset) == (abs(offset) & 0xFFF)))
+  {
+    if (!isFpr(src)) { // GPR store
+      switch (size) {
+      case 1:
+        emit(con, strbi(src->low, base, offset));
+        break;
+
+      case 2:
+        emit(con, strhi(src->low, base, offset));
+        break;
+
+      case 4:
+        emit(con, stri(src->low, base, offset));
+        break;
+
+      case 8: { // split into 2 32-bit stores
+        lir::Register srcHigh(src->high);
+        store(con, 4, &srcHigh, base, offset, lir::NoRegister, 1, false);
+        store(con, 4, src, base, offset + 4, lir::NoRegister, 1, false);
+      } break;
+
+      default: abort(con);
+      }
+    } else { // FPR store
+      // double-precision
+      if (size == 8) emit(con, fstd(fpr64(src), base, offset));
+      // single-precision
+      else           emit(con, fsts(fpr32(src), base, offset));
+    }
+  } else {
+    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+    ResolvedPromise offsetPromise(offset);
+    lir::Constant offsetConstant(&offsetPromise);
+    moveCR(con, vm::TargetBytesPerWord, &offsetConstant,
+           vm::TargetBytesPerWord, &tmp);
+    
+    store(con, size, src, base, 0, tmp.low, 1, false);
+
+    con->client->releaseTemporary(tmp.low);
+  }
+}
+
+void moveRM(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize UNUSED, lir::Memory* dst)
+{
+  assert(con, srcSize == dstSize);
+
+  store(con, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true);
+}
+
+void load(Context* con, unsigned srcSize, int base, int offset, int index,
+     unsigned scale, unsigned dstSize, lir::Register* dst,
+     bool preserveIndex, bool signExtend)
+{
+  if (index != lir::NoRegister) {
+    bool release;
+    int normalized = normalize
+      (con, offset, index, scale, &preserveIndex, &release);
+
+    if (!isFpr(dst)) { // GPR load
+      switch (srcSize) {
+      case 1:
+        if (signExtend) {
+          emit(con, ldrsb(dst->low, base, normalized));
+        } else {
+          emit(con, ldrb(dst->low, base, normalized));
+        }
+        break;
+
+      case 2:
+        if (signExtend) {
+          emit(con, ldrsh(dst->low, base, normalized));
+        } else {
+          emit(con, ldrh(dst->low, base, normalized));
+        }
+        break;
+
+      case 4:
+      case 8: {
+        if (srcSize == 4 and dstSize == 8) {
+          load(con, 4, base, 0, normalized, 1, 4, dst, preserveIndex,
+               false);
+          moveRR(con, 4, dst, 8, dst);
+        } else if (srcSize == 8 and dstSize == 8) {
+          lir::Register dstHigh(dst->high);
+          load(con, 4, base, 0, normalized, 1, 4, &dstHigh,
+              preserveIndex, false);
+          load(con, 4, base, 4, normalized, 1, 4, dst, preserveIndex,
+               false);
+        } else {
+          emit(con, ldr(dst->low, base, normalized));
+        }
+      } break;
+
+      default: abort(con);
+      }
+    } else { // FPR load
+      lir::Register base_(base),
+                          normalized_(normalized),
+                          absAddr = makeTemp(con);
+      // VFP loads only have bases, so we must add the index
+      addR(con, vm::TargetBytesPerWord, &base_, &normalized_, &absAddr);
+      // double-precision
+      if (srcSize == 8) emit(con, fldd(fpr64(dst), absAddr.low));
+      // single-precision
+      else              emit(con, flds(fpr32(dst), absAddr.low));
+      freeTemp(con, absAddr);
+    }
+
+    if (release) con->client->releaseTemporary(normalized);
+  } else if ((srcSize == 8 and dstSize == 8)
+             or abs(offset) == (abs(offset) & 0xFF)
+             or (srcSize != 2
+                 and (srcSize != 1 or not signExtend)
+                 and abs(offset) == (abs(offset) & 0xFFF)))
+  {
+    if (!isFpr(dst)) { // GPR load
+      switch (srcSize) {
+      case 1:
+        if (signExtend) {
+          emit(con, ldrsbi(dst->low, base, offset));
+        } else {
+          emit(con, ldrbi(dst->low, base, offset));
+        }
+        break;
+
+      case 2:
+        if (signExtend) {
+          emit(con, ldrshi(dst->low, base, offset));
+        } else {
+          emit(con, ldrhi(dst->low, base, offset));
+        }
+        break;
+
+      case 4:
+        emit(con, ldri(dst->low, base, offset));
+        break;
+
+      case 8: {
+        if (dstSize == 8) {
+          lir::Register dstHigh(dst->high);
+          load(con, 4, base, offset, lir::NoRegister, 1, 4, &dstHigh, false,
+               false);
+          load(con, 4, base, offset + 4, lir::NoRegister, 1, 4, dst, false,
+               false);
+        } else {
+          emit(con, ldri(dst->low, base, offset));
+        }
+      } break;
+
+      default: abort(con);
+      }
+    } else { // FPR load
+      // double-precision
+      if (srcSize == 8) emit(con, fldd(fpr64(dst), base, offset));
+      // single-precision
+      else              emit(con, flds(fpr32(dst), base, offset));
+    }
+  } else {
+    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+    ResolvedPromise offsetPromise(offset);
+    lir::Constant offsetConstant(&offsetPromise);
+    moveCR(con, vm::TargetBytesPerWord, &offsetConstant, vm::TargetBytesPerWord,
+           &tmp);
+    
+    load(con, srcSize, base, 0, tmp.low, 1, dstSize, dst, false,
+         signExtend);
+
+    con->client->releaseTemporary(tmp.low);
+  }
+}
+
+void moveMR(Context* con, unsigned srcSize, lir::Memory* src,
+       unsigned dstSize, lir::Register* dst)
+{
+  load(con, srcSize, src->base, src->offset, src->index, src->scale,
+       dstSize, dst, true, true);
+}
+
+void moveZMR(Context* con, unsigned srcSize, lir::Memory* src,
+        unsigned dstSize, lir::Register* dst)
+{
+  load(con, srcSize, src->base, src->offset, src->index, src->scale,
+       dstSize, dst, true, false);
+}
+
+void andR(Context* con, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst)
+{
+  if (size == 8) emit(con, and_(dst->high, a->high, b->high));
+  emit(con, and_(dst->low, a->low, b->low));
+}
+
+void andC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst)
+{
+  int64_t v = a->value->value();
+
+  if (size == 8) {
+    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+    lir::Constant ah(&high);
+
+    ResolvedPromise low(v & 0xFFFFFFFF);
+    lir::Constant al(&low);
+
+    lir::Register bh(b->high);
+    lir::Register dh(dst->high);
+
+    andC(con, 4, &al, b, dst);
+    andC(con, 4, &ah, &bh, &dh);
+  } else {
+    uint32_t v32 = static_cast<uint32_t>(v);
+    if (v32 != 0xFFFFFFFF) {
+      if ((v32 & 0xFFFFFF00) == 0xFFFFFF00) {
+        emit(con, bici(dst->low, b->low, (~(v32 & 0xFF)) & 0xFF));
+      } else if ((v32 & 0xFFFFFF00) == 0) {
+        emit(con, andi(dst->low, b->low, v32 & 0xFF));
+      } else {
+        // todo: there are other cases we can handle in one
+        // instruction
+
+        bool useTemporary = b->low == dst->low;
+        lir::Register tmp(dst->low);
+        if (useTemporary) {
+          tmp.low = con->client->acquireTemporary(GPR_MASK);
+        }
+
+        moveCR(con, 4, a, 4, &tmp);
+        andR(con, 4, b, &tmp, dst);
+        
+        if (useTemporary) {
+          con->client->releaseTemporary(tmp.low);
+        }
+      }
+    } else {
+      moveRR(con, size, b, size, dst);
+    }
+  }
+}
+
+void orR(Context* con, unsigned size, lir::Register* a,
+    lir::Register* b, lir::Register* dst)
+{
+  if (size == 8) emit(con, orr(dst->high, a->high, b->high));
+  emit(con, orr(dst->low, a->low, b->low));
+}
+
+void xorR(Context* con, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst)
+{
+  if (size == 8) emit(con, eor(dst->high, a->high, b->high));
+  emit(con, eor(dst->low, a->low, b->low));
+}
+
+void moveAR2(Context* con, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst)
+{
+  assert(con, srcSize == 4 and dstSize == 4);
+
+  lir::Constant constant(src->address);
+  moveCR(con, srcSize, &constant, dstSize, dst);
+
+  lir::Memory memory(dst->low, 0, -1, 0);
+  moveMR(con, dstSize, &memory, dstSize, dst);
+}
+
+void moveAR(Context* con, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst)
+{
+  moveAR2(con, srcSize, src, dstSize, dst);
+}
+
+void compareRR(Context* con, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(con, !(isFpr(a) ^ isFpr(b))); // regs must be of the same type
+
+  if (!isFpr(a)) { // GPR compare
+    assert(con, aSize == 4 && bSize == 4);
+    /**///assert(con, b->low != a->low);
+    emit(con, cmp(b->low, a->low));
+  } else {         // FPR compare
+    assert(con, aSize == bSize);
+    if (aSize == 8) emit(con, fcmpd(fpr64(b), fpr64(a))); // double
+    else            emit(con, fcmps(fpr32(b), fpr32(a))); // single
+    emit(con, fmstat());
+  }
+}
+
+void compareCR(Context* con, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b)
+{
+  assert(con, aSize == 4 and bSize == 4);
+
+  if (!isFpr(b) && a->value->resolved() &&
+      isOfWidth(a->value->value(), 8)) {
+    emit(con, cmpi(b->low, a->value->value()));
+  } else {
+    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+    moveCR(con, aSize, a, bSize, &tmp);
+    compareRR(con, bSize, &tmp, bSize, b);
+    con->client->releaseTemporary(tmp.low);
+  }
+}
+
+void compareCM(Context* con, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Memory* b)
+{
+  assert(con, aSize == 4 and bSize == 4);
+
+  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+  moveMR(con, bSize, b, bSize, &tmp);
+  compareCR(con, aSize, a, bSize, &tmp);
+  con->client->releaseTemporary(tmp.low);
+}
+
+void compareRM(Context* con, unsigned aSize, lir::Register* a,
+          unsigned bSize, lir::Memory* b)
+{
+  assert(con, aSize == 4 and bSize == 4);
+
+  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+  moveMR(con, bSize, b, bSize, &tmp);
+  compareRR(con, aSize, a, bSize, &tmp);
+  con->client->releaseTemporary(tmp.low);
+}
+
+int32_t
+branch(Context* con, lir::TernaryOperation op)
+{
+  switch (op) {
+  case lir::JumpIfEqual:
+  case lir::JumpIfFloatEqual:
+    return beq(0);
+
+  case lir::JumpIfNotEqual:
+  case lir::JumpIfFloatNotEqual:
+    return bne(0);
+
+  case lir::JumpIfLess:
+  case lir::JumpIfFloatLess:
+  case lir::JumpIfFloatLessOrUnordered:
+    return blt(0);
+
+  case lir::JumpIfGreater:
+  case lir::JumpIfFloatGreater:
+    return bgt(0);
+
+  case lir::JumpIfLessOrEqual:
+  case lir::JumpIfFloatLessOrEqual:
+  case lir::JumpIfFloatLessOrEqualOrUnordered:
+    return ble(0);
+
+  case lir::JumpIfGreaterOrEqual:
+  case lir::JumpIfFloatGreaterOrEqual:
+    return bge(0);
+
+  case lir::JumpIfFloatGreaterOrUnordered:
+    return bhi(0);
+
+  case lir::JumpIfFloatGreaterOrEqualOrUnordered:
+    return bpl(0);
+ 
+  default:
+    abort(con);
+  }
+}
+
+void conditional(Context* con, int32_t branch, lir::Constant* target)
+{
+  appendOffsetTask(con, target->value, offsetPromise(con));
+  emit(con, branch);
+}
+
+void branch(Context* con, lir::TernaryOperation op, lir::Constant* target)
+{
+  conditional(con, branch(con, op), target);
+}
+
+void branchLong(Context* con, lir::TernaryOperation op, lir::Operand* al,
+           lir::Operand* ah, lir::Operand* bl,
+           lir::Operand* bh, lir::Constant* target,
+           BinaryOperationType compareSigned,
+           BinaryOperationType compareUnsigned)
+{
+  compareSigned(con, 4, ah, 4, bh);
+
+  unsigned next = 0;
+  
+  switch (op) {
+  case lir::JumpIfEqual:
+  case lir::JumpIfFloatEqual:
+    next = con->code.length();
+    emit(con, bne(0));
+
+    compareSigned(con, 4, al, 4, bl);
+    conditional(con, beq(0), target);
+    break;
+
+  case lir::JumpIfNotEqual:
+  case lir::JumpIfFloatNotEqual:
+    conditional(con, bne(0), target);
+
+    compareSigned(con, 4, al, 4, bl);
+    conditional(con, bne(0), target);
+    break;
+
+  case lir::JumpIfLess:
+  case lir::JumpIfFloatLess:
+    conditional(con, blt(0), target);
+
+    next = con->code.length();
+    emit(con, bgt(0));
+
+    compareUnsigned(con, 4, al, 4, bl);
+    conditional(con, blo(0), target);
+    break;
+
+  case lir::JumpIfGreater:
+  case lir::JumpIfFloatGreater:
+    conditional(con, bgt(0), target);
+
+    next = con->code.length();
+    emit(con, blt(0));
+
+    compareUnsigned(con, 4, al, 4, bl);
+    conditional(con, bhi(0), target);
+    break;
+
+  case lir::JumpIfLessOrEqual:
+  case lir::JumpIfFloatLessOrEqual:
+    conditional(con, blt(0), target);
+
+    next = con->code.length();
+    emit(con, bgt(0));
+
+    compareUnsigned(con, 4, al, 4, bl);
+    conditional(con, bls(0), target);
+    break;
+
+  case lir::JumpIfGreaterOrEqual:
+  case lir::JumpIfFloatGreaterOrEqual:
+    conditional(con, bgt(0), target);
+
+    next = con->code.length();
+    emit(con, blt(0));
+
+    compareUnsigned(con, 4, al, 4, bl);
+    conditional(con, bhs(0), target);
+    break;
+
+  default:
+    abort(con);
+  }
+
+  if (next) {
+    updateOffset
+      (con->s, con->code.data + next, reinterpret_cast<intptr_t>
+       (con->code.data + con->code.length()));
+  }
+}
+
+void branchRR(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Register* b,
+         lir::Constant* target)
+{
+  if (!isFpr(a) && size > vm::TargetBytesPerWord) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    branchLong(con, op, a, &ah, b, &bh, target, CAST2(compareRR),
+               CAST2(compareRR));
+  } else {
+    compareRR(con, size, a, size, b);
+    branch(con, op, target);
+  }
+}
+
+void branchCR(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Register* b,
+         lir::Constant* target)
+{
+  assert(con, !isFloatBranch(op));
+
+  if (size > vm::TargetBytesPerWord) {
+    int64_t v = a->value->value();
+
+    ResolvedPromise low(v & ~static_cast<vm::target_uintptr_t>(0));
+    lir::Constant al(&low);
+
+    ResolvedPromise high((v >> 32) & ~static_cast<vm::target_uintptr_t>(0));
+    lir::Constant ah(&high);
+
+    lir::Register bh(b->high);
+
+    branchLong(con, op, &al, &ah, b, &bh, target, CAST2(compareCR),
+               CAST2(compareCR));
+  } else {
+    compareCR(con, size, a, size, b);
+    branch(con, op, target);
+  }
+}
+
+void branchRM(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Memory* b,
+         lir::Constant* target)
+{
+  assert(con, !isFloatBranch(op));
+  assert(con, size <= vm::TargetBytesPerWord);
+
+  compareRM(con, size, a, size, b);
+  branch(con, op, target);
+}
+
+void branchCM(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Memory* b,
+         lir::Constant* target)
+{
+  assert(con, !isFloatBranch(op));
+  assert(con, size <= vm::TargetBytesPerWord);
+
+  compareCM(con, size, a, size, b);
+  branch(con, op, target);
+}
+
+ShiftMaskPromise*
+shiftMaskPromise(Context* con, Promise* base, unsigned shift, int64_t mask)
+{
+  return new(con->zone) ShiftMaskPromise(base, shift, mask);
+}
+
+void moveCM(Context* con, unsigned srcSize, lir::Constant* src,
+       unsigned dstSize, lir::Memory* dst)
+{
+  switch (dstSize) {
+  case 8: {
+    lir::Constant srcHigh
+      (shiftMaskPromise(con, src->value, 32, 0xFFFFFFFF));
+    lir::Constant srcLow
+      (shiftMaskPromise(con, src->value, 0, 0xFFFFFFFF));
+    
+    lir::Memory dstLow
+      (dst->base, dst->offset + 4, dst->index, dst->scale);
+    
+    moveCM(con, 4, &srcLow, 4, &dstLow);
+    moveCM(con, 4, &srcHigh, 4, dst);
+  } break;
+
+  default:
+    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+    moveCR(con, srcSize, src, dstSize, &tmp);
+    moveRM(con, dstSize, &tmp, dstSize, dst);
+    con->client->releaseTemporary(tmp.low);
+  }
+}
+
+void negateRR(Context* con, unsigned srcSize, lir::Register* src,
+         unsigned dstSize UNUSED, lir::Register* dst)
+{
+  assert(con, srcSize == dstSize);
+
+  emit(con, mvn(dst->low, src->low));
+  emit(con, SETS(addi(dst->low, dst->low, 1)));
+  if (srcSize == 8) {
+    emit(con, mvn(dst->high, src->high));
+    emit(con, adci(dst->high, dst->high, 0));
+  }
+}
+
+void callR(Context* con, unsigned size UNUSED, lir::Register* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  emit(con, blx(target->low));
+}
+
+void callC(Context* con, unsigned size UNUSED, lir::Constant* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  appendOffsetTask(con, target->value, offsetPromise(con));
+  emit(con, bl(0));
+}
+
+void longCallC(Context* con, unsigned size UNUSED, lir::Constant* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  lir::Register tmp(4);
+  moveCR2(con, vm::TargetBytesPerWord, target, &tmp, offsetPromise(con));
+  callR(con, vm::TargetBytesPerWord, &tmp);
+}
+
+void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  lir::Register tmp(4); // a non-arg reg that we don't mind clobbering
+  moveCR2(con, vm::TargetBytesPerWord, target, &tmp, offsetPromise(con));
+  jumpR(con, vm::TargetBytesPerWord, &tmp);
+}
+
+void jumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  appendOffsetTask(con, target->value, offsetPromise(con));
+  emit(con, b(0));
+}
+
+void return_(Context* con)
+{
+  emit(con, bx(LinkRegister));
+}
+
+void trap(Context* con)
+{
+  emit(con, bkpt(0));
+}
+
+void memoryBarrier(Context*) {}
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/arm/operations.h b/src/codegen/arm/operations.h
new file mode 100644
index 0000000000..2d598b6d9e
--- /dev/null
+++ b/src/codegen/arm/operations.h
@@ -0,0 +1,240 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_OPERATIONS_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_OPERATIONS_H
+
+#include "registers.h"
+
+namespace vm {
+class System;
+}
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+class Context;
+
+// shortcut functions
+
+inline int newTemp(Context* con) {
+  return con->client->acquireTemporary(GPR_MASK);
+}
+
+inline int newTemp(Context* con, unsigned mask) {
+  return con->client->acquireTemporary(mask);
+}
+
+inline void freeTemp(Context* con, int r) {
+  con->client->releaseTemporary(r);
+}
+
+inline int64_t getValue(lir::Constant* con) {
+  return con->value->value();
+}
+
+inline lir::Register makeTemp(Context* con) {
+  lir::Register tmp(newTemp(con));
+  return tmp;
+}
+
+inline lir::Register makeTemp64(Context* con) {
+  lir::Register tmp(newTemp(con), newTemp(con));
+  return tmp;
+}
+
+inline void freeTemp(Context* con, const lir::Register& tmp) {
+  if (tmp.low != lir::NoRegister) freeTemp(con, tmp.low);
+  if (tmp.high != lir::NoRegister) freeTemp(con, tmp.high);
+}
+
+void shiftLeftR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void moveRR(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst);
+
+void shiftLeftC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+void shiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void shiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+void unsignedShiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void unsignedShiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+bool needJump(MyBlock* b);
+
+unsigned padding(MyBlock* b, unsigned offset);
+
+void resolve(MyBlock* b);
+
+void jumpR(Context* con, unsigned size UNUSED, lir::Register* target);
+
+void swapRR(Context* con, unsigned aSize, lir::Register* a,
+       unsigned bSize, lir::Register* b);
+
+void moveRR(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst);
+
+void moveZRR(Context* con, unsigned srcSize, lir::Register* src,
+        unsigned, lir::Register* dst);
+
+void moveCR(Context* con, unsigned size, lir::Constant* src,
+            unsigned, lir::Register* dst);
+
+void moveCR2(Context* con, unsigned size, lir::Constant* src,
+        lir::Register* dst, Promise* callOffset);
+
+void moveCR(Context* con, unsigned size, lir::Constant* src,
+       unsigned, lir::Register* dst);
+
+void addR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void subR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void addC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void subC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void multiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void floatAbsoluteRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void floatNegateRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void float2FloatRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void float2IntRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void int2FloatRR(Context* con, unsigned, lir::Register* a, unsigned size, lir::Register* b);
+
+void floatSqrtRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void floatAddR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void floatSubtractR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void floatMultiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void floatDivideR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+int normalize(Context* con, int offset, int index, unsigned scale, 
+          bool* preserveIndex, bool* release);
+
+void store(Context* con, unsigned size, lir::Register* src,
+      int base, int offset, int index, unsigned scale, bool preserveIndex);
+
+void moveRM(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize UNUSED, lir::Memory* dst);
+
+void load(Context* con, unsigned srcSize, int base, int offset, int index,
+     unsigned scale, unsigned dstSize, lir::Register* dst,
+     bool preserveIndex, bool signExtend);
+
+void moveMR(Context* con, unsigned srcSize, lir::Memory* src,
+       unsigned dstSize, lir::Register* dst);
+
+void moveZMR(Context* con, unsigned srcSize, lir::Memory* src,
+        unsigned dstSize, lir::Register* dst);
+
+void andR(Context* con, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst);
+
+void andC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void orR(Context* con, unsigned size, lir::Register* a,
+    lir::Register* b, lir::Register* dst);
+
+void xorR(Context* con, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst);
+
+void moveAR2(Context* con, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst);
+
+void moveAR(Context* con, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst);
+
+void compareRR(Context* con, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b);
+
+void compareCR(Context* con, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b);
+
+void compareCM(Context* con, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Memory* b);
+
+void compareRM(Context* con, unsigned aSize, lir::Register* a,
+          unsigned bSize, lir::Memory* b);
+
+int32_t
+branch(Context* con, lir::TernaryOperation op);
+
+void conditional(Context* con, int32_t branch, lir::Constant* target);
+
+void branch(Context* con, lir::TernaryOperation op, lir::Constant* target);
+
+void branchLong(Context* con, lir::TernaryOperation op, lir::Operand* al,
+           lir::Operand* ah, lir::Operand* bl,
+           lir::Operand* bh, lir::Constant* target,
+           BinaryOperationType compareSigned,
+           BinaryOperationType compareUnsigned);
+
+void branchRR(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Register* b,
+         lir::Constant* target);
+
+void branchCR(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Register* b,
+         lir::Constant* target);
+
+void branchRM(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Memory* b,
+         lir::Constant* target);
+
+void branchCM(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Memory* b,
+         lir::Constant* target);
+
+ShiftMaskPromise*
+shiftMaskPromise(Context* con, Promise* base, unsigned shift, int64_t mask);
+
+void moveCM(Context* con, unsigned srcSize, lir::Constant* src,
+       unsigned dstSize, lir::Memory* dst);
+
+void negateRR(Context* con, unsigned srcSize, lir::Register* src,
+         unsigned dstSize UNUSED, lir::Register* dst);
+
+void callR(Context* con, unsigned size UNUSED, lir::Register* target);
+
+void callC(Context* con, unsigned size UNUSED, lir::Constant* target);
+
+void longCallC(Context* con, unsigned size UNUSED, lir::Constant* target);
+
+void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target);
+
+void jumpC(Context* con, unsigned size UNUSED, lir::Constant* target);
+
+void return_(Context* con);
+
+void trap(Context* con);
+
+void memoryBarrier(Context*);
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_OPERATIONS_H
+
diff --git a/src/codegen/arm/registers.h b/src/codegen/arm/registers.h
new file mode 100644
index 0000000000..85c389b222
--- /dev/null
+++ b/src/codegen/arm/registers.h
@@ -0,0 +1,52 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_REGISTERS_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_REGISTERS_H
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+
+const uint64_t MASK_LO32 = 0xffffffff;
+const unsigned MASK_LO16 = 0xffff;
+const unsigned MASK_LO8  = 0xff;
+
+const int N_GPRS = 16;
+const int N_FPRS = 16;
+const uint32_t GPR_MASK = 0xffff;
+const uint32_t FPR_MASK = 0xffff0000;
+
+const uint64_t GPR_MASK64 = GPR_MASK | (uint64_t)GPR_MASK << 32;
+const uint64_t FPR_MASK64 = FPR_MASK | (uint64_t)FPR_MASK << 32;
+
+inline bool isFpr(lir::Register* reg) {
+  return reg->low >= N_GPRS;
+}
+
+inline int fpr64(int reg) { return reg - N_GPRS; }
+inline int fpr64(lir::Register* reg) { return fpr64(reg->low); }
+inline int fpr32(int reg) { return fpr64(reg) << 1; }
+inline int fpr32(lir::Register* reg) { return fpr64(reg) << 1; }
+
+const int ThreadRegister = 8;
+const int StackRegister = 13;
+const int LinkRegister = 14;
+const int ProgramCounter = 15;
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_REGISTERS_H
diff --git a/src/common.h b/src/common.h
index a99d246f67..02e52aeae1 100644
--- a/src/common.h
+++ b/src/common.h
@@ -477,6 +477,12 @@ hash(const uint16_t* s, unsigned length)
   return h;
 }
 
+inline void
+write4(uint8_t* dst, uint32_t v)
+{
+  memcpy(dst, &v, 4);
+}
+
 inline uint32_t
 floatToBits(float f)
 {

From 73dda9c26e593eea9b314ec0eb4ff554fdcb08d5 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Fri, 22 Feb 2013 23:15:40 -0700
Subject: [PATCH 12/22] begin splitting out powerpc assembler

---
 src/codegen/powerpc/assembler.cpp | 265 +++---------------------------
 src/codegen/powerpc/block.cpp     |  42 +++++
 src/codegen/powerpc/block.h       |  44 +++++
 src/codegen/powerpc/context.cpp   |  29 ++++
 src/codegen/powerpc/context.h     |  95 +++++++++++
 src/codegen/powerpc/encode.h      | 137 +++++++++++++++
 src/codegen/powerpc/fixup.h       |  32 ++++
 7 files changed, 403 insertions(+), 241 deletions(-)
 create mode 100644 src/codegen/powerpc/block.cpp
 create mode 100644 src/codegen/powerpc/block.h
 create mode 100644 src/codegen/powerpc/context.cpp
 create mode 100644 src/codegen/powerpc/context.h
 create mode 100644 src/codegen/powerpc/encode.h
 create mode 100644 src/codegen/powerpc/fixup.h

diff --git a/src/codegen/powerpc/assembler.cpp b/src/codegen/powerpc/assembler.cpp
index e470ad61b5..b1ad5f03ba 100644
--- a/src/codegen/powerpc/assembler.cpp
+++ b/src/codegen/powerpc/assembler.cpp
@@ -14,129 +14,22 @@
 #include "alloc-vector.h"
 #include <avian/util/abort.h>
 
+#include "encode.h"
+#include "context.h"
+#include "fixup.h"
+#include "block.h"
+
 #define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
 #define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
 #define CAST3(x) reinterpret_cast<TernaryOperationType>(x)
 #define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
 
 using namespace vm;
-using namespace avian::codegen;
 using namespace avian::util;
 
-namespace {
-
-namespace isa {
-// INSTRUCTION FORMATS
-inline int D(int op, int rt, int ra, int d) { return op<<26|rt<<21|ra<<16|(d & 0xFFFF); }
-// inline int DS(int op, int rt, int ra, int ds, int xo) { return op<<26|rt<<21|ra<<16|ds<<2|xo; }
-inline int I(int op, int li, int aa, int lk) { return op<<26|(li & 0x3FFFFFC)|aa<<1|lk; }
-inline int B(int op, int bo, int bi, int bd, int aa, int lk) { return op<<26|bo<<21|bi<<16|(bd & 0xFFFC)|aa<<1|lk; }
-// inline int SC(int op, int lev) { return op<<26|lev<<5|2; }
-inline int X(int op, int rt, int ra, int rb, int xo, int rc) { return op<<26|rt<<21|ra<<16|rb<<11|xo<<1|rc; }
-inline int XL(int op, int bt, int ba, int bb, int xo, int lk) { return op<<26|bt<<21|ba<<16|bb<<11|xo<<1|lk; }
-inline int XFX(int op, int rt, int spr, int xo) { return op<<26|rt<<21|((spr >> 5) | ((spr << 5) & 0x3E0))<<11|xo<<1; }
-// inline int XFL(int op, int flm, int frb, int xo, int rc) { return op<<26|flm<<17|frb<<11|xo<<1|rc; }
-// inline int XS(int op, int rs, int ra, int sh, int xo, int sh2, int rc) { return op<<26|rs<<21|ra<<16|sh<<11|xo<<2|sh2<<1|rc; }
-inline int XO(int op, int rt, int ra, int rb, int oe, int xo, int rc) { return op<<26|rt<<21|ra<<16|rb<<11|oe<<10|xo<<1|rc; }
-// inline int A(int op, int frt, int fra, int frb, int frc, int xo, int rc) { return op<<26|frt<<21|fra<<16|frb<<11|frc<<6|xo<<1|rc; }
-inline int M(int op, int rs, int ra, int rb, int mb, int me, int rc) { return op<<26|rs<<21|ra<<16|rb<<11|mb<<6|me<<1|rc; }
-// inline int MD(int op, int rs, int ra, int sh, int mb, int xo, int sh2, int rc) { return op<<26|rs<<21|ra<<16|sh<<11|mb<<5|xo<<2|sh2<<1|rc; }
-// inline int MDS(int op, int rs, int ra, int rb, int mb, int xo, int rc) { return op<<26|rs<<21|ra<<16|rb<<11|mb<<5|xo<<1|rc; }
-// INSTRUCTIONS
-inline int lbz(int rt, int ra, int i) { return D(34, rt, ra, i); }
-inline int lbzx(int rt, int ra, int rb) { return X(31, rt, ra, rb, 87, 0); }
-inline int lha(int rt, int ra, int i) { return D(42, rt, ra, i); }
-inline int lhax(int rt, int ra, int rb) { return X(31, rt, ra, rb, 343, 0); }
-// inline int lhz(int rt, int ra, int i) { return D(40, rt, ra, i); }
-inline int lhzx(int rt, int ra, int rb) { return X(31, rt, ra, rb, 279, 0); }
-inline int lwz(int rt, int ra, int i) { return D(32, rt, ra, i); }
-inline int lwzx(int rt, int ra, int rb) { return X(31, rt, ra, rb, 23, 0); }
-inline int stb(int rs, int ra, int i) { return D(38, rs, ra, i); }
-inline int stbx(int rs, int ra, int rb) { return X(31, rs, ra, rb, 215, 0); }
-inline int sth(int rs, int ra, int i) { return D(44, rs, ra, i); }
-inline int sthx(int rs, int ra, int rb) { return X(31, rs, ra, rb, 407, 0); }
-inline int stw(int rs, int ra, int i) { return D(36, rs, ra, i); }
-inline int stwu(int rs, int ra, int i) { return D(37, rs, ra, i); }
-inline int stwux(int rs, int ra, int rb) { return X(31, rs, ra, rb, 183, 0); }
-inline int stwx(int rs, int ra, int rb) { return X(31, rs, ra, rb, 151, 0); }
-inline int add(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 266, 0); }
-inline int addc(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 10, 0); }
-inline int adde(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 138, 0); }
-inline int addi(int rt, int ra, int i) { return D(14, rt, ra, i); }
-inline int addic(int rt, int ra, int i) { return D(12, rt, ra, i); }
-inline int addis(int rt, int ra, int i) { return D(15, rt, ra, i); }
-inline int subf(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 40, 0); }
-inline int subfc(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 8, 0); }
-inline int subfe(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 136, 0); }
-inline int subfic(int rt, int ra, int i) { return D(8, rt, ra, i); }
-inline int subfze(int rt, int ra) { return XO(31, rt, ra, 0, 0, 200, 0); }
-inline int mullw(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 235, 0); }
-// inline int mulhw(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 75, 0); }
-inline int mulhwu(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 11, 0); }
-// inline int mulli(int rt, int ra, int i) { return D(7, rt, ra, i); }
-inline int divw(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 491, 0); }
-// inline int divwu(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 459, 0); }
-// inline int divd(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 489, 0); }
-// inline int divdu(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 457, 0); }
-inline int neg(int rt, int ra) { return XO(31, rt, ra, 0, 0, 104, 0); }
-inline int and_(int rt, int ra, int rb) { return X(31, ra, rt, rb, 28, 0); }
-inline int andi(int rt, int ra, int i) { return D(28, ra, rt, i); }
-inline int andis(int rt, int ra, int i) { return D(29, ra, rt, i); }
-inline int or_(int rt, int ra, int rb) { return X(31, ra, rt, rb, 444, 0); }
-inline int ori(int rt, int ra, int i) { return D(24, rt, ra, i); }
-inline int xor_(int rt, int ra, int rb) { return X(31, ra, rt, rb, 316, 0); }
-inline int oris(int rt, int ra, int i) { return D(25, rt, ra, i); }
-inline int xori(int rt, int ra, int i) { return D(26, rt, ra, i); }
-inline int xoris(int rt, int ra, int i) { return D(27, rt, ra, i); }
-inline int rlwinm(int rt, int ra, int i, int mb, int me) { return M(21, ra, rt, i, mb, me, 0); }
-inline int rlwimi(int rt, int ra, int i, int mb, int me) { return M(20, ra, rt, i, mb, me, 0); }
-inline int slw(int rt, int ra, int sh) { return X(31, ra, rt, sh, 24, 0); }
-// inline int sld(int rt, int ra, int rb) { return X(31, ra, rt, rb, 27, 0); }
-inline int srw(int rt, int ra, int sh) { return X(31, ra, rt, sh, 536, 0); }
-inline int sraw(int rt, int ra, int sh) { return X(31, ra, rt, sh, 792, 0); }
-inline int srawi(int rt, int ra, int sh) { return X(31, ra, rt, sh, 824, 0); }
-inline int extsb(int rt, int rs) { return X(31, rs, rt, 0, 954, 0); }
-inline int extsh(int rt, int rs) { return X(31, rs, rt, 0, 922, 0); }
-inline int mfspr(int rt, int spr) { return XFX(31, rt, spr, 339); }
-inline int mtspr(int spr, int rs) { return XFX(31, rs, spr, 467); }
-inline int b(int i) { return I(18, i, 0, 0); }
-inline int bl(int i) { return I(18, i, 0, 1); }
-inline int bcctr(int bo, int bi, int lk) { return XL(19, bo, bi, 0, 528, lk); }
-inline int bclr(int bo, int bi, int lk) { return XL(19, bo, bi, 0, 16, lk); }
-inline int bc(int bo, int bi, int bd, int lk) { return B(16, bo, bi, bd, 0, lk); }
-inline int cmp(int bf, int ra, int rb) { return X(31, bf << 2, ra, rb, 0, 0); }
-inline int cmpl(int bf, int ra, int rb) { return X(31, bf << 2, ra, rb, 32, 0); }
-inline int cmpi(int bf, int ra, int i) { return D(11, bf << 2, ra, i); }
-inline int cmpli(int bf, int ra, int i) { return D(10, bf << 2, ra, i); }
-inline int sync(int L) { return X(31, L, 0, 0, 598, 0); }
-// PSEUDO-INSTRUCTIONS
-inline int li(int rt, int i) { return addi(rt, 0, i); }
-inline int lis(int rt, int i) { return addis(rt, 0, i); }
-inline int slwi(int rt, int ra, int i) { return rlwinm(rt, ra, i, 0, 31-i); }
-inline int srwi(int rt, int ra, int i) { return rlwinm(rt, ra, 32-i, i, 31); }
-// inline int sub(int rt, int ra, int rb) { return subf(rt, rb, ra); }
-// inline int subc(int rt, int ra, int rb) { return subfc(rt, rb, ra); }
-// inline int subi(int rt, int ra, int i) { return addi(rt, ra, -i); }
-// inline int subis(int rt, int ra, int i) { return addis(rt, ra, -i); }
-inline int mr(int rt, int ra) { return or_(rt, ra, ra); }
-inline int mflr(int rx) { return mfspr(rx, 8); }
-inline int mtlr(int rx) { return mtspr(8, rx); }
-inline int mtctr(int rd) { return mtspr(9, rd); }
-inline int bctr() { return bcctr(20, 0, 0); }
-inline int bctrl() { return bcctr(20, 0, 1); }
-inline int blr() { return bclr(20, 0, 0); }
-inline int blt(int i) { return bc(12, 0, i, 0); }
-inline int bgt(int i) { return bc(12, 1, i, 0); }
-inline int bge(int i) { return bc(4, 0, i, 0); }
-inline int ble(int i) { return bc(4, 1, i, 0); }
-inline int beq(int i) { return bc(12, 2, i, 0); }
-inline int bne(int i) { return bc(4, 2, i, 0); }
-inline int cmpw(int ra, int rb) { return cmp(0, ra, rb); }
-inline int cmplw(int ra, int rb) { return cmpl(0, ra, rb); }
-inline int cmpwi(int ra, int i) { return cmpi(0, ra, i); }
-inline int cmplwi(int ra, int i) { return cmpli(0, ra, i); }
-inline int trap() { return 0x7fe00008; } // todo: macro-ify
-}
+namespace avian {
+namespace codegen {
+namespace powerpc {
 
 const int64_t MASK_LO32 = 0x0ffffffff;
 const int     MASK_LO16 = 0x0ffff;
@@ -189,10 +82,7 @@ const int ThreadRegister = 13;
 
 const bool DebugJumps = false;
 
-class Context;
-class MyBlock;
 class JumpOffset;
-class JumpEvent;
 
 void
 resolve(MyBlock*);
@@ -200,116 +90,12 @@ resolve(MyBlock*);
 unsigned
 padding(MyBlock*, unsigned);
 
-class MyBlock: public Assembler::Block {
- public:
-  MyBlock(Context* context, unsigned offset):
-    context(context), next(0), jumpOffsetHead(0), jumpOffsetTail(0),
-    lastJumpOffsetTail(0), jumpEventHead(0), jumpEventTail(0),
-    lastEventOffset(0), offset(offset), start(~0), size(0), resolved(false)
-  { }
-
-  virtual unsigned resolve(unsigned start, Assembler::Block* next) {
-    this->start = start;
-    this->next = static_cast<MyBlock*>(next);
-
-    ::resolve(this);
-
-    this->resolved = true;
-
-    return start + size + padding(this, size);
-  }
-
-  Context* context;
-  MyBlock* next;
-  JumpOffset* jumpOffsetHead;
-  JumpOffset* jumpOffsetTail;
-  JumpOffset* lastJumpOffsetTail;
-  JumpEvent* jumpEventHead;
-  JumpEvent* jumpEventTail;
-  unsigned lastEventOffset;
-  unsigned offset;
-  unsigned start;
-  unsigned size;
-  bool resolved;
-};
-
 class Task;
 class ConstantPoolEntry;
 
-class Context {
+class OffsetPromise: public Promise {
  public:
-  Context(System* s, Allocator* a, Zone* zone):
-    s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0),
-    firstBlock(new(zone) MyBlock(this, 0)),
-    lastBlock(firstBlock), jumpOffsetHead(0), jumpOffsetTail(0),
-    constantPool(0), constantPoolCount(0)
-  { }
-
-  System* s;
-  Zone* zone;
-  Assembler::Client* client;
-  Vector code;
-  Task* tasks;
-  uint8_t* result;
-  MyBlock* firstBlock;
-  MyBlock* lastBlock;
-  JumpOffset* jumpOffsetHead;
-  JumpOffset* jumpOffsetTail;
-  ConstantPoolEntry* constantPool;
-  unsigned constantPoolCount;
-};
-
-class Task {
- public:
-  Task(Task* next): next(next) { }
-
-  virtual void run(Context* c) = 0;
-
-  Task* next;
-};
-
-typedef void (*OperationType)(Context*);
-
-typedef void (*UnaryOperationType)(Context*, unsigned, lir::Operand*);
-
-typedef void (*BinaryOperationType)
-(Context*, unsigned, lir::Operand*, unsigned, lir::Operand*);
-
-typedef void (*TernaryOperationType)
-(Context*, unsigned, lir::Operand*, lir::Operand*,
- lir::Operand*);
-
-typedef void (*BranchOperationType)
-(Context*, lir::TernaryOperation, unsigned, lir::Operand*,
- lir::Operand*, lir::Operand*);
-
-class ArchitectureContext {
- public:
-  ArchitectureContext(System* s): s(s) { }
-
-  System* s;
-  OperationType operations[lir::OperationCount];
-  UnaryOperationType unaryOperations[lir::UnaryOperationCount
-                                     * lir::OperandTypeCount];
-  BinaryOperationType binaryOperations
-  [lir::BinaryOperationCount * lir::OperandTypeCount * lir::OperandTypeCount];
-  TernaryOperationType ternaryOperations
-  [lir::NonBranchTernaryOperationCount * lir::OperandTypeCount];
-  BranchOperationType branchOperations
-  [lir::BranchOperationCount * lir::OperandTypeCount * lir::OperandTypeCount];
-};
-
-inline Aborter* getAborter(Context* con) {
-  return con->s;
-}
-
-inline Aborter* getAborter(ArchitectureContext* con) {
-  return con->s;
-}
-
-class Offset: public Promise {
- public:
-  Offset(Context* c, MyBlock* block, unsigned offset):
+  OffsetPromise(Context* c, MyBlock* block, unsigned offset):
     c(c), block(block), offset(offset)
   { }
 
@@ -330,9 +116,9 @@ class Offset: public Promise {
 };
 
 Promise*
-offset(Context* c)
+offsetPromise(Context* c)
 {
-  return new(c->zone) Offset(c, c->lastBlock, c->code.length());
+  return new(c->zone) OffsetPromise(c, c->lastBlock, c->code.length());
 }
 
 bool
@@ -978,7 +764,7 @@ moveCR2(Context* c, unsigned, lir::Constant* src,
       }
     } else {
       appendImmediateTask
-        (c, src->value, offset(c), TargetBytesPerWord, promiseOffset, false);
+        (c, src->value, offsetPromise(c), TargetBytesPerWord, promiseOffset, false);
       emit(c, lis(dst->low, 0));
       emit(c, ori(dst->low, dst->low, 0));
     }
@@ -1505,7 +1291,7 @@ moveAR2(Context* c, unsigned srcSize UNUSED, lir::Address* src,
   lir::Memory memory(dst->low, 0, -1, 0);
   
   appendImmediateTask
-    (c, src->address, offset(c), TargetBytesPerWord, promiseOffset, true);
+    (c, src->address, offsetPromise(c), TargetBytesPerWord, promiseOffset, true);
   
   emit(c, lis(dst->low, 0));
   moveMR(c, dstSize, &memory, dstSize, dst);
@@ -1622,7 +1408,7 @@ branch(Context* c, lir::TernaryOperation op)
 void
 conditional(Context* c, int32_t branch, lir::Constant* target)
 {
-  appendOffsetTask(c, target->value, offset(c), true);
+  appendOffsetTask(c, target->value, offsetPromise(c), true);
   emit(c, branch);
 }
 
@@ -1835,7 +1621,7 @@ callC(Context* c, unsigned size UNUSED, lir::Constant* target)
 {
   assert(c, size == TargetBytesPerWord);
 
-  appendOffsetTask(c, target->value, offset(c), false);
+  appendOffsetTask(c, target->value, offsetPromise(c), false);
   emit(c, bl(0));
 }
 
@@ -1888,7 +1674,7 @@ jumpC(Context* c, unsigned size UNUSED, lir::Constant* target)
 {
   assert(c, size == TargetBytesPerWord);
 
-  appendOffsetTask(c, target->value, offset(c), false);
+  appendOffsetTask(c, target->value, offsetPromise(c), false);
   emit(c, b(0));
 }
 
@@ -1901,7 +1687,7 @@ return_(Context* c)
 void
 trap(Context* c)
 {
-  emit(c, trap());
+  emit(c, isa::trap());
 }
 
 void
@@ -2127,7 +1913,7 @@ class MyArchitecture: public Assembler::Architecture {
   }
 
   virtual unsigned argumentFootprint(unsigned footprint) {
-    return ::argumentFootprint(footprint);
+    return powerpc::argumentFootprint(footprint);
   }
 
   virtual bool argumentAlignment() {
@@ -2213,7 +1999,7 @@ class MyArchitecture: public Assembler::Architecture {
                          unsigned targetParameterFootprint, void** ip,
                          void** stack)
   {
-    ::nextFrame(&c, static_cast<int32_t*>(start), size, footprint, link,
+    powerpc::nextFrame(&c, static_cast<int32_t*>(start), size, footprint, link,
                 mostRecent, targetParameterFootprint, ip, stack);
   }
 
@@ -2714,7 +2500,7 @@ class MyAssembler: public Assembler {
 
         bool jump = needJump(b);
         if (jump) {
-          write4(dst + dstOffset, ::b(jumpTableSize + TargetBytesPerWord));
+          write4(dst + dstOffset, isa::b(jumpTableSize + TargetBytesPerWord));
         }
 
         dstOffset += jumpTableSize + (jump ? TargetBytesPerWord : 0);
@@ -2747,7 +2533,7 @@ class MyAssembler: public Assembler {
   }
 
   virtual Promise* offset(bool) {
-    return ::offset(&c);
+    return powerpc::offsetPromise(&c);
   }
 
   virtual Block* endBlock(bool startNew) {
@@ -2815,15 +2601,12 @@ Assembler* MyArchitecture::makeAssembler(Allocator* allocator, Zone* zone) {
   return new(zone) MyAssembler(this->c.s, allocator, zone, this);
 }
 
-} // namespace
-
-namespace avian {
-namespace codegen {
+} // namespace powerpc
 
 Assembler::Architecture*
 makeArchitecturePowerpc(System* system, bool)
 {
-  return new (allocate(system, sizeof(MyArchitecture))) MyArchitecture(system);
+  return new (allocate(system, sizeof(powerpc::MyArchitecture))) powerpc::MyArchitecture(system);
 }
 
 } // namespace codegen
diff --git a/src/codegen/powerpc/block.cpp b/src/codegen/powerpc/block.cpp
new file mode 100644
index 0000000000..14453c3bde
--- /dev/null
+++ b/src/codegen/powerpc/block.cpp
@@ -0,0 +1,42 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "block.h"
+#include "common.h"
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+void resolve(MyBlock*);
+
+unsigned padding(MyBlock*, unsigned);
+
+MyBlock::MyBlock(Context* context, unsigned offset):
+  context(context), next(0), jumpOffsetHead(0), jumpOffsetTail(0),
+  lastJumpOffsetTail(0), jumpEventHead(0), jumpEventTail(0),
+  lastEventOffset(0), offset(offset), start(~0), size(0), resolved(false)
+{ }
+
+unsigned MyBlock::resolve(unsigned start, Assembler::Block* next) {
+  this->start = start;
+  this->next = static_cast<MyBlock*>(next);
+
+  powerpc::resolve(this);
+
+  this->resolved = true;
+
+  return start + size + padding(this, size);
+}
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/powerpc/block.h b/src/codegen/powerpc/block.h
new file mode 100644
index 0000000000..e0dd563ad7
--- /dev/null
+++ b/src/codegen/powerpc/block.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_POWERPC_BLOCK_H
+#define AVIAN_CODEGEN_ASSEMBLER_POWERPC_BLOCK_H
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+class JumpEvent;
+
+class MyBlock: public Assembler::Block {
+ public:
+  MyBlock(Context* context, unsigned offset);
+
+  virtual unsigned resolve(unsigned start, Assembler::Block* next);
+
+  Context* context;
+  MyBlock* next;
+  JumpOffset* jumpOffsetHead;
+  JumpOffset* jumpOffsetTail;
+  JumpOffset* lastJumpOffsetTail;
+  JumpEvent* jumpEventHead;
+  JumpEvent* jumpEventTail;
+  unsigned lastEventOffset;
+  unsigned offset;
+  unsigned start;
+  unsigned size;
+  bool resolved;
+};
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_POWERPC_BLOCK_H
diff --git a/src/codegen/powerpc/context.cpp b/src/codegen/powerpc/context.cpp
new file mode 100644
index 0000000000..c61b55ef55
--- /dev/null
+++ b/src/codegen/powerpc/context.cpp
@@ -0,0 +1,29 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "block.h"
+#include "common.h"
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+
+Context::Context(vm::System* s, vm::Allocator* a, vm::Zone* zone):
+  s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0),
+  firstBlock(new(zone) MyBlock(this, 0)),
+  lastBlock(firstBlock), jumpOffsetHead(0), jumpOffsetTail(0),
+  constantPool(0), constantPoolCount(0)
+{ }
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/powerpc/context.h b/src/codegen/powerpc/context.h
new file mode 100644
index 0000000000..f458c74330
--- /dev/null
+++ b/src/codegen/powerpc/context.h
@@ -0,0 +1,95 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_POWERPC_CONTEXT_H
+#define AVIAN_CODEGEN_ASSEMBLER_POWERPC_CONTEXT_H
+
+#include <avian/vm/codegen/assembler.h>
+#include "alloc-vector.h"
+
+
+namespace vm {
+class System;
+class Allocator;
+class Zone;
+} // namespace vm
+
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+class Task;
+class JumpOffset;
+class ConstantPoolEntry;
+class MyBlock;
+
+class Context {
+ public:
+  Context(vm::System* s, vm::Allocator* a, vm::Zone* zone);
+
+  vm::System* s;
+  vm::Zone* zone;
+  Assembler::Client* client;
+  vm::Vector code;
+  Task* tasks;
+  uint8_t* result;
+  MyBlock* firstBlock;
+  MyBlock* lastBlock;
+  JumpOffset* jumpOffsetHead;
+  JumpOffset* jumpOffsetTail;
+  ConstantPoolEntry* constantPool;
+  unsigned constantPoolCount;
+};
+
+typedef void (*OperationType)(Context*);
+
+typedef void (*UnaryOperationType)(Context*, unsigned, lir::Operand*);
+
+typedef void (*BinaryOperationType)
+(Context*, unsigned, lir::Operand*, unsigned, lir::Operand*);
+
+typedef void (*TernaryOperationType)
+(Context*, unsigned, lir::Operand*, lir::Operand*,
+ lir::Operand*);
+
+typedef void (*BranchOperationType)
+(Context*, lir::TernaryOperation, unsigned, lir::Operand*,
+ lir::Operand*, lir::Operand*);
+
+class ArchitectureContext {
+ public:
+  ArchitectureContext(vm::System* s): s(s) { }
+
+  vm::System* s;
+  OperationType operations[lir::OperationCount];
+  UnaryOperationType unaryOperations[lir::UnaryOperationCount
+                                     * lir::OperandTypeCount];
+  BinaryOperationType binaryOperations
+  [lir::BinaryOperationCount * lir::OperandTypeCount * lir::OperandTypeCount];
+  TernaryOperationType ternaryOperations
+  [lir::NonBranchTernaryOperationCount * lir::OperandTypeCount];
+  BranchOperationType branchOperations
+  [lir::BranchOperationCount * lir::OperandTypeCount * lir::OperandTypeCount];
+};
+
+inline avian::util::Aborter* getAborter(Context* con) {
+  return con->s;
+}
+
+inline avian::util::Aborter* getAborter(ArchitectureContext* con) {
+  return con->s;
+}
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_POWERPC_CONTEXT_H
diff --git a/src/codegen/powerpc/encode.h b/src/codegen/powerpc/encode.h
new file mode 100644
index 0000000000..54f7f7f493
--- /dev/null
+++ b/src/codegen/powerpc/encode.h
@@ -0,0 +1,137 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_POWERPC_ENCODE_H
+#define AVIAN_CODEGEN_ASSEMBLER_POWERPC_ENCODE_H
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+namespace isa {
+// INSTRUCTION FORMATS
+inline int D(int op, int rt, int ra, int d) { return op<<26|rt<<21|ra<<16|(d & 0xFFFF); }
+// inline int DS(int op, int rt, int ra, int ds, int xo) { return op<<26|rt<<21|ra<<16|ds<<2|xo; }
+inline int I(int op, int li, int aa, int lk) { return op<<26|(li & 0x3FFFFFC)|aa<<1|lk; }
+inline int B(int op, int bo, int bi, int bd, int aa, int lk) { return op<<26|bo<<21|bi<<16|(bd & 0xFFFC)|aa<<1|lk; }
+// inline int SC(int op, int lev) { return op<<26|lev<<5|2; }
+inline int X(int op, int rt, int ra, int rb, int xo, int rc) { return op<<26|rt<<21|ra<<16|rb<<11|xo<<1|rc; }
+inline int XL(int op, int bt, int ba, int bb, int xo, int lk) { return op<<26|bt<<21|ba<<16|bb<<11|xo<<1|lk; }
+inline int XFX(int op, int rt, int spr, int xo) { return op<<26|rt<<21|((spr >> 5) | ((spr << 5) & 0x3E0))<<11|xo<<1; }
+// inline int XFL(int op, int flm, int frb, int xo, int rc) { return op<<26|flm<<17|frb<<11|xo<<1|rc; }
+// inline int XS(int op, int rs, int ra, int sh, int xo, int sh2, int rc) { return op<<26|rs<<21|ra<<16|sh<<11|xo<<2|sh2<<1|rc; }
+inline int XO(int op, int rt, int ra, int rb, int oe, int xo, int rc) { return op<<26|rt<<21|ra<<16|rb<<11|oe<<10|xo<<1|rc; }
+// inline int A(int op, int frt, int fra, int frb, int frc, int xo, int rc) { return op<<26|frt<<21|fra<<16|frb<<11|frc<<6|xo<<1|rc; }
+inline int M(int op, int rs, int ra, int rb, int mb, int me, int rc) { return op<<26|rs<<21|ra<<16|rb<<11|mb<<6|me<<1|rc; }
+// inline int MD(int op, int rs, int ra, int sh, int mb, int xo, int sh2, int rc) { return op<<26|rs<<21|ra<<16|sh<<11|mb<<5|xo<<2|sh2<<1|rc; }
+// inline int MDS(int op, int rs, int ra, int rb, int mb, int xo, int rc) { return op<<26|rs<<21|ra<<16|rb<<11|mb<<5|xo<<1|rc; }
+// INSTRUCTIONS
+inline int lbz(int rt, int ra, int i) { return D(34, rt, ra, i); }
+inline int lbzx(int rt, int ra, int rb) { return X(31, rt, ra, rb, 87, 0); }
+inline int lha(int rt, int ra, int i) { return D(42, rt, ra, i); }
+inline int lhax(int rt, int ra, int rb) { return X(31, rt, ra, rb, 343, 0); }
+// inline int lhz(int rt, int ra, int i) { return D(40, rt, ra, i); }
+inline int lhzx(int rt, int ra, int rb) { return X(31, rt, ra, rb, 279, 0); }
+inline int lwz(int rt, int ra, int i) { return D(32, rt, ra, i); }
+inline int lwzx(int rt, int ra, int rb) { return X(31, rt, ra, rb, 23, 0); }
+inline int stb(int rs, int ra, int i) { return D(38, rs, ra, i); }
+inline int stbx(int rs, int ra, int rb) { return X(31, rs, ra, rb, 215, 0); }
+inline int sth(int rs, int ra, int i) { return D(44, rs, ra, i); }
+inline int sthx(int rs, int ra, int rb) { return X(31, rs, ra, rb, 407, 0); }
+inline int stw(int rs, int ra, int i) { return D(36, rs, ra, i); }
+inline int stwu(int rs, int ra, int i) { return D(37, rs, ra, i); }
+inline int stwux(int rs, int ra, int rb) { return X(31, rs, ra, rb, 183, 0); }
+inline int stwx(int rs, int ra, int rb) { return X(31, rs, ra, rb, 151, 0); }
+inline int add(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 266, 0); }
+inline int addc(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 10, 0); }
+inline int adde(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 138, 0); }
+inline int addi(int rt, int ra, int i) { return D(14, rt, ra, i); }
+inline int addic(int rt, int ra, int i) { return D(12, rt, ra, i); }
+inline int addis(int rt, int ra, int i) { return D(15, rt, ra, i); }
+inline int subf(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 40, 0); }
+inline int subfc(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 8, 0); }
+inline int subfe(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 136, 0); }
+inline int subfic(int rt, int ra, int i) { return D(8, rt, ra, i); }
+inline int subfze(int rt, int ra) { return XO(31, rt, ra, 0, 0, 200, 0); }
+inline int mullw(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 235, 0); }
+// inline int mulhw(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 75, 0); }
+inline int mulhwu(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 11, 0); }
+// inline int mulli(int rt, int ra, int i) { return D(7, rt, ra, i); }
+inline int divw(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 491, 0); }
+// inline int divwu(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 459, 0); }
+// inline int divd(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 489, 0); }
+// inline int divdu(int rt, int ra, int rb) { return XO(31, rt, ra, rb, 0, 457, 0); }
+inline int neg(int rt, int ra) { return XO(31, rt, ra, 0, 0, 104, 0); }
+inline int and_(int rt, int ra, int rb) { return X(31, ra, rt, rb, 28, 0); }
+inline int andi(int rt, int ra, int i) { return D(28, ra, rt, i); }
+inline int andis(int rt, int ra, int i) { return D(29, ra, rt, i); }
+inline int or_(int rt, int ra, int rb) { return X(31, ra, rt, rb, 444, 0); }
+inline int ori(int rt, int ra, int i) { return D(24, rt, ra, i); }
+inline int xor_(int rt, int ra, int rb) { return X(31, ra, rt, rb, 316, 0); }
+inline int oris(int rt, int ra, int i) { return D(25, rt, ra, i); }
+inline int xori(int rt, int ra, int i) { return D(26, rt, ra, i); }
+inline int xoris(int rt, int ra, int i) { return D(27, rt, ra, i); }
+inline int rlwinm(int rt, int ra, int i, int mb, int me) { return M(21, ra, rt, i, mb, me, 0); }
+inline int rlwimi(int rt, int ra, int i, int mb, int me) { return M(20, ra, rt, i, mb, me, 0); }
+inline int slw(int rt, int ra, int sh) { return X(31, ra, rt, sh, 24, 0); }
+// inline int sld(int rt, int ra, int rb) { return X(31, ra, rt, rb, 27, 0); }
+inline int srw(int rt, int ra, int sh) { return X(31, ra, rt, sh, 536, 0); }
+inline int sraw(int rt, int ra, int sh) { return X(31, ra, rt, sh, 792, 0); }
+inline int srawi(int rt, int ra, int sh) { return X(31, ra, rt, sh, 824, 0); }
+inline int extsb(int rt, int rs) { return X(31, rs, rt, 0, 954, 0); }
+inline int extsh(int rt, int rs) { return X(31, rs, rt, 0, 922, 0); }
+inline int mfspr(int rt, int spr) { return XFX(31, rt, spr, 339); }
+inline int mtspr(int spr, int rs) { return XFX(31, rs, spr, 467); }
+inline int b(int i) { return I(18, i, 0, 0); }
+inline int bl(int i) { return I(18, i, 0, 1); }
+inline int bcctr(int bo, int bi, int lk) { return XL(19, bo, bi, 0, 528, lk); }
+inline int bclr(int bo, int bi, int lk) { return XL(19, bo, bi, 0, 16, lk); }
+inline int bc(int bo, int bi, int bd, int lk) { return B(16, bo, bi, bd, 0, lk); }
+inline int cmp(int bf, int ra, int rb) { return X(31, bf << 2, ra, rb, 0, 0); }
+inline int cmpl(int bf, int ra, int rb) { return X(31, bf << 2, ra, rb, 32, 0); }
+inline int cmpi(int bf, int ra, int i) { return D(11, bf << 2, ra, i); }
+inline int cmpli(int bf, int ra, int i) { return D(10, bf << 2, ra, i); }
+inline int sync(int L) { return X(31, L, 0, 0, 598, 0); }
+// PSEUDO-INSTRUCTIONS
+inline int li(int rt, int i) { return addi(rt, 0, i); }
+inline int lis(int rt, int i) { return addis(rt, 0, i); }
+inline int slwi(int rt, int ra, int i) { return rlwinm(rt, ra, i, 0, 31-i); }
+inline int srwi(int rt, int ra, int i) { return rlwinm(rt, ra, 32-i, i, 31); }
+// inline int sub(int rt, int ra, int rb) { return subf(rt, rb, ra); }
+// inline int subc(int rt, int ra, int rb) { return subfc(rt, rb, ra); }
+// inline int subi(int rt, int ra, int i) { return addi(rt, ra, -i); }
+// inline int subis(int rt, int ra, int i) { return addis(rt, ra, -i); }
+inline int mr(int rt, int ra) { return or_(rt, ra, ra); }
+inline int mflr(int rx) { return mfspr(rx, 8); }
+inline int mtlr(int rx) { return mtspr(8, rx); }
+inline int mtctr(int rd) { return mtspr(9, rd); }
+inline int bctr() { return bcctr(20, 0, 0); }
+inline int bctrl() { return bcctr(20, 0, 1); }
+inline int blr() { return bclr(20, 0, 0); }
+inline int blt(int i) { return bc(12, 0, i, 0); }
+inline int bgt(int i) { return bc(12, 1, i, 0); }
+inline int bge(int i) { return bc(4, 0, i, 0); }
+inline int ble(int i) { return bc(4, 1, i, 0); }
+inline int beq(int i) { return bc(12, 2, i, 0); }
+inline int bne(int i) { return bc(4, 2, i, 0); }
+inline int cmpw(int ra, int rb) { return cmp(0, ra, rb); }
+inline int cmplw(int ra, int rb) { return cmpl(0, ra, rb); }
+inline int cmpwi(int ra, int i) { return cmpi(0, ra, i); }
+inline int cmplwi(int ra, int i) { return cmpli(0, ra, i); }
+inline int trap() { return 0x7fe00008; } // todo: macro-ify
+
+} // namespace isa
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_POWERPC_ENCODE_H
+
diff --git a/src/codegen/powerpc/fixup.h b/src/codegen/powerpc/fixup.h
new file mode 100644
index 0000000000..42fa62afc7
--- /dev/null
+++ b/src/codegen/powerpc/fixup.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_POWERPC_FIXUP_H
+#define AVIAN_CODEGEN_ASSEMBLER_POWERPC_FIXUP_H
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+
+class Task {
+ public:
+  Task(Task* next): next(next) { }
+
+  virtual void run(Context* c) = 0;
+
+  Task* next;
+};
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_POWERPC_FIXUP_H

From 900b447e274032a268921d73140bf2d0d247577a Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sat, 23 Feb 2013 18:08:34 -0700
Subject: [PATCH 13/22] further break out powerpc assembler

---
 src/codegen/powerpc/assembler.cpp   | 1609 +--------------------------
 src/codegen/powerpc/fixup.cpp       |  242 ++++
 src/codegen/powerpc/fixup.h         |  128 +++
 src/codegen/powerpc/multimethod.cpp |  111 ++
 src/codegen/powerpc/multimethod.h   |   60 +
 src/codegen/powerpc/operations.cpp  | 1097 ++++++++++++++++++
 src/codegen/powerpc/operations.h    |  197 ++++
 src/codegen/powerpc/registers.h     |   23 +
 8 files changed, 1862 insertions(+), 1605 deletions(-)
 create mode 100644 src/codegen/powerpc/fixup.cpp
 create mode 100644 src/codegen/powerpc/multimethod.cpp
 create mode 100644 src/codegen/powerpc/multimethod.h
 create mode 100644 src/codegen/powerpc/operations.cpp
 create mode 100644 src/codegen/powerpc/operations.h
 create mode 100644 src/codegen/powerpc/registers.h

diff --git a/src/codegen/powerpc/assembler.cpp b/src/codegen/powerpc/assembler.cpp
index b1ad5f03ba..39405769bf 100644
--- a/src/codegen/powerpc/assembler.cpp
+++ b/src/codegen/powerpc/assembler.cpp
@@ -18,11 +18,8 @@
 #include "context.h"
 #include "fixup.h"
 #include "block.h"
-
-#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
-#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
-#define CAST3(x) reinterpret_cast<TernaryOperationType>(x)
-#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
+#include "multimethod.h"
+#include "operations.h"
 
 using namespace vm;
 using namespace avian::util;
@@ -31,38 +28,12 @@ namespace avian {
 namespace codegen {
 namespace powerpc {
 
-const int64_t MASK_LO32 = 0x0ffffffff;
-const int     MASK_LO16 = 0x0ffff;
-const int     MASK_LO8  = 0x0ff;
-// inline int lo32(int64_t i) { return (int)(i & MASK_LO32); }
-// inline int hi32(int64_t i) { return lo32(i >> 32); }
-inline int lo16(int64_t i) { return (int)(i & MASK_LO16); }
-inline int hi16(int64_t i) { return lo16(i >> 16); }
-// inline int lo8(int64_t i) { return (int)(i & MASK_LO8); }
-// inline int hi8(int64_t i) { return lo8(i >> 8); }
-
-inline int ha16(int32_t i) { 
-    return ((i >> 16) + ((i & 0x8000) ? 1 : 0)) & 0xffff;
-}
-
 inline int unha16(int32_t high, int32_t low) {
     return ((high - ((low & 0x8000) ? 1 : 0)) << 16) | low; 
 }
 
 const RegisterFile MyRegisterFile(0xFFFFFFFF, 0);
 
-inline bool
-isInt16(target_intptr_t v)
-{
-  return v == static_cast<int16_t>(v);
-}
-
-inline int
-carry16(target_intptr_t v)
-{
-  return static_cast<int16_t>(v) < 0 ? 1 : 0;
-}
-
 #ifdef __APPLE__
 const unsigned FrameFooterSize = 6;
 const unsigned ReturnAddressOffset = 2;
@@ -84,190 +55,13 @@ const bool DebugJumps = false;
 
 class JumpOffset;
 
-void
-resolve(MyBlock*);
+unsigned padding(MyBlock*, unsigned);
 
-unsigned
-padding(MyBlock*, unsigned);
+bool bounded(int right, int left, int32_t v);
 
 class Task;
 class ConstantPoolEntry;
 
-class OffsetPromise: public Promise {
- public:
-  OffsetPromise(Context* c, MyBlock* block, unsigned offset):
-    c(c), block(block), offset(offset)
-  { }
-
-  virtual bool resolved() {
-    return block->resolved;
-  }
-  
-  virtual int64_t value() {
-    assert(c, resolved());
-
-    unsigned o = offset - block->offset;
-    return block->start + padding(block, o) + o;
-  }
-
-  Context* c;
-  MyBlock* block;
-  unsigned offset;
-};
-
-Promise*
-offsetPromise(Context* c)
-{
-  return new(c->zone) OffsetPromise(c, c->lastBlock, c->code.length());
-}
-
-bool
-bounded(int right, int left, int32_t v)
-{
-  return ((v << left) >> left) == v and ((v >> right) << right) == v;
-}
-
-void*
-updateOffset(System* s, uint8_t* instruction, bool conditional, int64_t value,
-             void* jumpAddress)
-{
-  int32_t v = reinterpret_cast<uint8_t*>(value) - instruction;
-   
-  int32_t mask;
-  if (conditional) {
-    if (not bounded(2, 16, v)) {
-      *static_cast<uint32_t*>(jumpAddress) = isa::b(0);
-      updateOffset(s, static_cast<uint8_t*>(jumpAddress), false, value, 0);
-
-      v = static_cast<uint8_t*>(jumpAddress) - instruction;
-
-      expect(s, bounded(2, 16, v));
-    }
-    mask = 0xFFFC;
-  } else {
-    expect(s, bounded(2, 6, v));
-    mask = 0x3FFFFFC;
-  }
-
-  int32_t* p = reinterpret_cast<int32_t*>(instruction);
-  *p = targetV4((v & mask) | ((~mask) & targetV4(*p)));
-
-  return instruction + 4;
-}
-
-class OffsetListener: public Promise::Listener {
- public:
-  OffsetListener(System* s, uint8_t* instruction, bool conditional,
-                 void* jumpAddress):
-    s(s),
-    instruction(instruction),
-    jumpAddress(jumpAddress),
-    conditional(conditional)
-  { }
-
-  virtual bool resolve(int64_t value, void** location) {
-    void* p = updateOffset(s, instruction, conditional, value, jumpAddress);
-    if (location) *location = p;
-    return false;
-  }
-
-  System* s;
-  uint8_t* instruction;
-  void* jumpAddress;
-  bool conditional;
-};
-
-class OffsetTask: public Task {
- public:
-  OffsetTask(Task* next, Promise* promise, Promise* instructionOffset,
-             bool conditional):
-    Task(next),
-    promise(promise),
-    instructionOffset(instructionOffset),
-    jumpAddress(0),
-    conditional(conditional)
-  { }
-
-  virtual void run(Context* c) {
-    if (promise->resolved()) {
-      updateOffset
-        (c->s, c->result + instructionOffset->value(), conditional,
-         promise->value(), jumpAddress);
-    } else {
-      new (promise->listen(sizeof(OffsetListener)))
-        OffsetListener(c->s, c->result + instructionOffset->value(),
-                       conditional, jumpAddress);
-    }
-  }
-
-  Promise* promise;
-  Promise* instructionOffset;
-  void* jumpAddress;
-  bool conditional;
-};
-
-class JumpOffset {
- public:
-  JumpOffset(MyBlock* block, OffsetTask* task, unsigned offset):
-    block(block), task(task), next(0), offset(offset)
-  { }
-
-  MyBlock* block;
-  OffsetTask* task;
-  JumpOffset* next;
-  unsigned offset;  
-};
-
-class JumpEvent {
- public:
-  JumpEvent(JumpOffset* jumpOffsetHead, JumpOffset* jumpOffsetTail,
-            unsigned offset):
-    jumpOffsetHead(jumpOffsetHead), jumpOffsetTail(jumpOffsetTail), next(0),
-    offset(offset)
-  { }
-
-  JumpOffset* jumpOffsetHead;
-  JumpOffset* jumpOffsetTail;
-  JumpEvent* next;
-  unsigned offset;
-};
-
-void
-appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset,
-                 bool conditional)
-{
-  OffsetTask* task = new(c->zone) OffsetTask(c->tasks, promise, instructionOffset, conditional);
-
-  c->tasks = task;
-
-  if (conditional) {
-    JumpOffset* offset =
-      new(c->zone) JumpOffset(c->lastBlock, task, c->code.length() - c->lastBlock->offset);
-
-    if (c->lastBlock->jumpOffsetTail) {
-      c->lastBlock->jumpOffsetTail->next = offset;
-    } else {
-      c->lastBlock->jumpOffsetHead = offset;
-    }
-    c->lastBlock->jumpOffsetTail = offset;
-  }
-}
-
-void
-appendJumpEvent(Context* c, MyBlock* b, unsigned offset, JumpOffset* head,
-                JumpOffset* tail)
-{
-  JumpEvent* e = new(c->zone) JumpEvent
-    (head, tail, offset);
-
-  if (b->jumpEventTail) {
-    b->jumpEventTail->next = e;
-  } else {
-    b->jumpEventHead = e;
-  }
-  b->jumpEventTail = e;
-}
-
 bool
 needJump(MyBlock* b)
 {
@@ -380,1322 +174,16 @@ resolve(MyBlock* b)
   }
 }
 
-inline unsigned
-index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
-{
-  return operation + (lir::UnaryOperationCount * operand);
-}
-
-inline unsigned
-index(ArchitectureContext*,
-      lir::BinaryOperation operation,
-      lir::OperandType operand1,
-      lir::OperandType operand2)
-{
-  return operation
-    + (lir::BinaryOperationCount * operand1)
-    + (lir::BinaryOperationCount * lir::OperandTypeCount * operand2);
-}
-
-inline unsigned
-index(ArchitectureContext* c UNUSED,
-      lir::TernaryOperation operation,
-      lir::OperandType operand1)
-{
-  assert(c, not isBranch(operation));
-
-  return operation + (lir::NonBranchTernaryOperationCount * operand1);
-}
-
-unsigned
-branchIndex(ArchitectureContext* c UNUSED, lir::OperandType operand1,
-            lir::OperandType operand2)
-{
-  return operand1 + (lir::OperandTypeCount * operand2);
-}
-
 // BEGIN OPERATION COMPILERS
 
 using namespace isa;
 
-inline void emit(Context* con, int code) { con->code.append4(targetV4(code)); }
-inline int newTemp(Context* con) { return con->client->acquireTemporary(); }
-inline void freeTemp(Context* con, int r) { con->client->releaseTemporary(r); }
-inline int64_t getValue(lir::Constant* c) { return c->value->value(); }
-
 inline void
 write4(uint8_t* dst, uint32_t v)
 {
   memcpy(dst, &v, 4);
 }
 
-void
-andC(Context* c, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst);
-
-void shiftLeftR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
-{
-  if(size == 8) {
-    lir::Register Tmp(newTemp(con), newTemp(con)); lir::Register* tmp = &Tmp;
-    emit(con, subfic(tmp->high, a->low, 32));
-    emit(con, slw(t->high, b->high, a->low));
-    emit(con, srw(tmp->low, b->low, tmp->high));
-    emit(con, or_(t->high, t->high, tmp->low));
-    emit(con, addi(tmp->high, a->low, -32));
-    emit(con, slw(tmp->low, b->low, tmp->high));
-    emit(con, or_(t->high, t->high, tmp->low));
-    emit(con, slw(t->low, b->low, a->low));
-    freeTemp(con, tmp->high); freeTemp(con, tmp->low);
-  } else {
-    emit(con, slw(t->low, b->low, a->low));
-  }
-}
-
-void
-moveRR(Context* c, unsigned srcSize, lir::Register* src,
-       unsigned dstSize, lir::Register* dst);
-
-void shiftLeftC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t)
-{
-  int sh = getValue(a);
-  if (size == 8) {
-    sh &= 0x3F;
-    if (sh) {
-      if (sh < 32) {
-        emit(con, rlwinm(t->high,b->high,sh,0,31-sh));
-        emit(con, rlwimi(t->high,b->low,sh,32-sh,31));
-        emit(con, slwi(t->low, b->low, sh));
-      } else {
-        emit(con, rlwinm(t->high,b->low,sh-32,0,63-sh));
-        emit(con, li(t->low,0));
-      }
-    } else {
-      moveRR(con, size, b, size, t);
-    }
-  } else {
-    emit(con, slwi(t->low, b->low, sh & 0x1F));
-  }
-}
-
-void shiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
-{
-  if(size == 8) {
-    lir::Register Tmp(newTemp(con), newTemp(con)); lir::Register* tmp = &Tmp;
-    emit(con, subfic(tmp->high, a->low, 32));
-    emit(con, srw(t->low, b->low, a->low));
-    emit(con, slw(tmp->low, b->high, tmp->high));
-    emit(con, or_(t->low, t->low, tmp->low));
-    emit(con, addic(tmp->high, a->low, -32));
-    emit(con, sraw(tmp->low, b->high, tmp->high));
-    emit(con, ble(8));
-    emit(con, ori(t->low, tmp->low, 0));
-    emit(con, sraw(t->high, b->high, a->low));
-    freeTemp(con, tmp->high); freeTemp(con, tmp->low);
-  } else {
-    emit(con, sraw(t->low, b->low, a->low));
-  }
-}
-
-void shiftRightC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t)
-{
-  int sh = getValue(a);
-  if(size == 8) {
-    sh &= 0x3F;
-    if (sh) {
-      if (sh < 32) {
-        emit(con, rlwinm(t->low,b->low,32-sh,sh,31));
-        emit(con, rlwimi(t->low,b->high,32-sh,0,sh-1));
-        emit(con, srawi(t->high,b->high,sh));
-      } else {
-        emit(con, srawi(t->high,b->high,31));
-        emit(con, srawi(t->low,b->high,sh-32));
-      }
-    } else {
-      moveRR(con, size, b, size, t);
-    }
-  } else {
-    emit(con, srawi(t->low, b->low, sh & 0x1F));
-  }
-}
-
-void unsignedShiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
-{
-  emit(con, srw(t->low, b->low, a->low));
-  if(size == 8) {
-    lir::Register Tmp(newTemp(con), newTemp(con)); lir::Register* tmp = &Tmp;
-    emit(con, subfic(tmp->high, a->low, 32));
-    emit(con, slw(tmp->low, b->high, tmp->high));
-    emit(con, or_(t->low, t->low, tmp->low));
-    emit(con, addi(tmp->high, a->low, -32));
-    emit(con, srw(tmp->low, b->high, tmp->high));
-    emit(con, or_(t->low, t->low, tmp->low));
-    emit(con, srw(t->high, b->high, a->low));
-    freeTemp(con, tmp->high); freeTemp(con, tmp->low);
-  }
-}
-
-void unsignedShiftRightC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t)
-{
-  int sh = getValue(a);
-  if (size == 8) {
-    if (sh & 0x3F) {
-      if (sh == 32) {
-        lir::Register high(b->high);
-        moveRR(con, 4, &high, 4, t);
-        emit(con, li(t->high,0));
-      } else if (sh < 32) {
-        emit(con, srwi(t->low, b->low, sh));
-        emit(con, rlwimi(t->low,b->high,32-sh,0,sh-1));
-        emit(con, rlwinm(t->high,b->high,32-sh,sh,31));
-      } else {
-        emit(con, rlwinm(t->low,b->high,64-sh,sh-32,31));
-        emit(con, li(t->high,0));
-      }
-    } else {
-      moveRR(con, size, b, size, t);
-    }
-  } else {
-    if (sh & 0x1F) {
-      emit(con, srwi(t->low, b->low, sh & 0x1F));
-    } else {
-      moveRR(con, size, b, size, t);
-    }
-  }
-}
-
-void
-updateImmediate(System* s, void* dst, int32_t src, unsigned size, bool address)
-{
-  switch (size) {
-  case 4: {
-    int32_t* p = static_cast<int32_t*>(dst);
-    int r = (targetV4(p[1]) >> 21) & 31;
-
-    if (address) {
-      p[0] = targetV4(lis(r, ha16(src)));
-      p[1] |= targetV4(src & 0xFFFF);
-    } else {
-      p[0] = targetV4(lis(r, src >> 16));
-      p[1] = targetV4(ori(r, r, src));
-    }
-  } break;
-
-  default: abort(s);
-  }
-}
-
-class ImmediateListener: public Promise::Listener {
- public:
-  ImmediateListener(System* s, void* dst, unsigned size, unsigned offset,
-                    bool address):
-    s(s), dst(dst), size(size), offset(offset), address(address)
-  { }
-
-  virtual bool resolve(int64_t value, void** location) {
-    updateImmediate(s, dst, value, size, address);
-    if (location) *location = static_cast<uint8_t*>(dst) + offset;
-    return false;
-  }
-
-  System* s;
-  void* dst;
-  unsigned size;
-  unsigned offset;
-  bool address;
-};
-
-class ImmediateTask: public Task {
- public:
-  ImmediateTask(Task* next, Promise* promise, Promise* offset, unsigned size,
-                unsigned promiseOffset, bool address):
-    Task(next),
-    promise(promise),
-    offset(offset),
-    size(size),
-    promiseOffset(promiseOffset),
-    address(address)
-  { }
-
-  virtual void run(Context* c) {
-    if (promise->resolved()) {
-      updateImmediate
-        (c->s, c->result + offset->value(), promise->value(), size, address);
-    } else {
-      new (promise->listen(sizeof(ImmediateListener))) ImmediateListener
-        (c->s, c->result + offset->value(), size, promiseOffset, address);
-    }
-  }
-
-  Promise* promise;
-  Promise* offset;
-  unsigned size;
-  unsigned promiseOffset;
-  bool address;
-};
-
-void
-appendImmediateTask(Context* c, Promise* promise, Promise* offset,
-                    unsigned size, unsigned promiseOffset, bool address)
-{
-  c->tasks = new(c->zone) ImmediateTask(c->tasks, promise, offset, size, promiseOffset, address);
-}
-
-class ConstantPoolEntry: public Promise {
- public:
-  ConstantPoolEntry(Context* c, Promise* constant):
-    c(c), constant(constant), next(c->constantPool), address(0)
-  {
-    c->constantPool = this;
-    ++ c->constantPoolCount;
-  }
-
-  virtual int64_t value() {
-    assert(c, resolved());
-
-    return reinterpret_cast<intptr_t>(address);
-  }
-
-  virtual bool resolved() {
-    return address != 0;
-  }
-
-  Context* c;
-  Promise* constant;
-  ConstantPoolEntry* next;
-  void* address;
-};
-
-ConstantPoolEntry*
-appendConstantPoolEntry(Context* c, Promise* constant)
-{
-  return new (c->zone) ConstantPoolEntry(c, constant);
-}
-
-void
-jumpR(Context* c, unsigned size UNUSED, lir::Register* target)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  emit(c, mtctr(target->low));
-  emit(c, bctr());
-}
-
-void
-swapRR(Context* c, unsigned aSize, lir::Register* a,
-       unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == TargetBytesPerWord);
-  assert(c, bSize == TargetBytesPerWord);
-
-  lir::Register tmp(c->client->acquireTemporary());
-  moveRR(c, aSize, a, bSize, &tmp);
-  moveRR(c, bSize, b, aSize, a);
-  moveRR(c, bSize, &tmp, bSize, b);
-  c->client->releaseTemporary(tmp.low);
-}
-
-void
-moveRR(Context* c, unsigned srcSize, lir::Register* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  switch (srcSize) {
-  case 1:
-    emit(c, extsb(dst->low, src->low));
-    break;
-    
-  case 2:
-    emit(c, extsh(dst->low, src->low));
-    break;
-    
-  case 4:
-  case 8:
-    if (srcSize == 4 and dstSize == 8) {
-      moveRR(c, 4, src, 4, dst);
-      emit(c, srawi(dst->high, src->low, 31));
-    } else if (srcSize == 8 and dstSize == 8) {
-      lir::Register srcHigh(src->high);
-      lir::Register dstHigh(dst->high);
-
-      if (src->high == dst->low) {
-        if (src->low == dst->high) {
-          swapRR(c, 4, src, 4, dst);
-        } else {
-          moveRR(c, 4, &srcHigh, 4, &dstHigh);
-          moveRR(c, 4, src, 4, dst);
-        }
-      } else {
-        moveRR(c, 4, src, 4, dst);
-        moveRR(c, 4, &srcHigh, 4, &dstHigh);
-      }
-    } else if (src->low != dst->low) {
-      emit(c, mr(dst->low, src->low));
-    }
-    break;
-
-  default: abort(c);
-  }
-}
-
-void
-moveZRR(Context* c, unsigned srcSize, lir::Register* src,
-        unsigned, lir::Register* dst)
-{
-  switch (srcSize) {
-  case 2:
-    emit(c, andi(dst->low, src->low, 0xFFFF));
-    break;
-
-  default: abort(c);
-  }
-}
-
-void
-moveCR2(Context* c, unsigned, lir::Constant* src,
-       unsigned dstSize, lir::Register* dst, unsigned promiseOffset)
-{
-  if (dstSize <= 4) {
-    if (src->value->resolved()) {
-      int32_t v = src->value->value();
-      if (isInt16(v)) {
-        emit(c, li(dst->low, v));
-      } else {
-        emit(c, lis(dst->low, v >> 16));
-        emit(c, ori(dst->low, dst->low, v));
-      }
-    } else {
-      appendImmediateTask
-        (c, src->value, offsetPromise(c), TargetBytesPerWord, promiseOffset, false);
-      emit(c, lis(dst->low, 0));
-      emit(c, ori(dst->low, dst->low, 0));
-    }
-  } else {
-    abort(c); // todo
-  }
-}
-
-void
-moveCR(Context* c, unsigned srcSize, lir::Constant* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  moveCR2(c, srcSize, src, dstSize, dst, 0);
-}
-
-void addR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if(size == 8) {
-    emit(con, addc(t->low, a->low, b->low));
-    emit(con, adde(t->high, a->high, b->high));
-  } else {
-    emit(con, add(t->low, a->low, b->low));
-  }
-}
-
-void addC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t) {
-  assert(con, size == TargetBytesPerWord);
-
-  int32_t i = getValue(a);
-  if(i) {
-    emit(con, addi(t->low, b->low, lo16(i)));
-    if(not isInt16(i))
-      emit(con, addis(t->low, t->low, hi16(i) + carry16(i)));
-  } else {
-    moveRR(con, size, b, size, t);
-  }
-}
-
-void subR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if(size == 8) {
-    emit(con, subfc(t->low, a->low, b->low));
-    emit(con, subfe(t->high, a->high, b->high));
-  } else {
-    emit(con, subf(t->low, a->low, b->low));
-  }
-}
-
-void subC(Context* c, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t) {
-  assert(c, size == TargetBytesPerWord);
-
-  ResolvedPromise promise(- a->value->value());
-  lir::Constant constant(&promise);
-  addC(c, size, &constant, b, t);
-}
-
-void multiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if(size == 8) {
-    bool useTemporaries = b->low == t->low;
-    int tmpLow;
-    int tmpHigh;
-    if (useTemporaries) {
-      tmpLow = con->client->acquireTemporary();
-      tmpHigh = con->client->acquireTemporary();
-    } else {
-      tmpLow = t->low;
-      tmpHigh = t->high;
-    }
-
-    emit(con, mullw(tmpHigh, a->high, b->low));
-    emit(con, mullw(tmpLow, a->low, b->high));
-    emit(con, add(t->high, tmpHigh, tmpLow));
-    emit(con, mulhwu(tmpLow, a->low, b->low));
-    emit(con, add(t->high, t->high, tmpLow));
-    emit(con, mullw(t->low, a->low, b->low));
-
-    if (useTemporaries) {
-      con->client->releaseTemporary(tmpLow);
-      con->client->releaseTemporary(tmpHigh);
-    }
-  } else {
-    emit(con, mullw(t->low, a->low, b->low));
-  }
-}
-
-void divideR(Context* con, unsigned size UNUSED, lir::Register* a, lir::Register* b, lir::Register* t) {
-  assert(con, size == 4);
-  emit(con, divw(t->low, b->low, a->low));
-}
-
-void remainderR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  bool useTemporary = b->low == t->low;
-  lir::Register tmp(t->low);
-  if (useTemporary) {
-    tmp.low = con->client->acquireTemporary();
-  }
-
-  divideR(con, size, a, b, &tmp);
-  multiplyR(con, size, a, &tmp, &tmp);
-  subR(con, size, &tmp, b, t);
-
-  if (useTemporary) {
-    con->client->releaseTemporary(tmp.low);
-  }
-}
-
-int
-normalize(Context* c, int offset, int index, unsigned scale, 
-          bool* preserveIndex, bool* release)
-{
-  if (offset != 0 or scale != 1) {
-    lir::Register normalizedIndex
-      (*preserveIndex ? c->client->acquireTemporary() : index);
-    
-    if (*preserveIndex) {
-      *release = true;
-      *preserveIndex = false;
-    } else {
-      *release = false;
-    }
-
-    int scaled;
-
-    if (scale != 1) {
-      lir::Register unscaledIndex(index);
-
-      ResolvedPromise scalePromise(log(scale));
-      lir::Constant scaleConstant(&scalePromise);
-      
-      shiftLeftC(c, TargetBytesPerWord, &scaleConstant,
-                 &unscaledIndex, &normalizedIndex);
-
-      scaled = normalizedIndex.low;
-    } else {
-      scaled = index;
-    }
-
-    if (offset != 0) {
-      lir::Register untranslatedIndex(scaled);
-
-      ResolvedPromise offsetPromise(offset);
-      lir::Constant offsetConstant(&offsetPromise);
-
-      addC(c, TargetBytesPerWord, &offsetConstant,
-           &untranslatedIndex, &normalizedIndex);
-    }
-
-    return normalizedIndex.low;
-  } else {
-    *release = false;
-    return index;
-  }
-}
-
-void
-store(Context* c, unsigned size, lir::Register* src,
-      int base, int offset, int index, unsigned scale, bool preserveIndex)
-{
-  if (index != lir::NoRegister) {
-    bool release;
-    int normalized = normalize
-      (c, offset, index, scale, &preserveIndex, &release);
-
-    switch (size) {
-    case 1:
-      emit(c, stbx(src->low, base, normalized));
-      break;
-
-    case 2:
-      emit(c, sthx(src->low, base, normalized));
-      break;
-
-    case 4:
-      emit(c, stwx(src->low, base, normalized));
-      break;
-
-    case 8: {
-      lir::Register srcHigh(src->high);
-      store(c, 4, &srcHigh, base, 0, normalized, 1, preserveIndex);
-      store(c, 4, src, base, 4, normalized, 1, preserveIndex);
-    } break;
-
-    default: abort(c);
-    }
-
-    if (release) c->client->releaseTemporary(normalized);
-  } else {
-    switch (size) {
-    case 1:
-      emit(c, stb(src->low, base, offset));
-      break;
-
-    case 2:
-      emit(c, sth(src->low, base, offset));
-      break;
-
-    case 4:
-      emit(c, stw(src->low, base, offset));
-      break;
-
-    case 8: {
-      lir::Register srcHigh(src->high);
-      store(c, 4, &srcHigh, base, offset, lir::NoRegister, 1, false);
-      store(c, 4, src, base, offset + 4, lir::NoRegister, 1, false);
-    } break;
-
-    default: abort(c);
-    }
-  }
-}
-
-void
-moveRM(Context* c, unsigned srcSize, lir::Register* src,
-       unsigned dstSize UNUSED, lir::Memory* dst)
-{
-  assert(c, srcSize == dstSize);
-
-  store(c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true);
-}
-
-void
-moveAndUpdateRM(Context* c, unsigned srcSize UNUSED, lir::Register* src,
-                unsigned dstSize UNUSED, lir::Memory* dst)
-{
-  assert(c, srcSize == TargetBytesPerWord);
-  assert(c, dstSize == TargetBytesPerWord);
-
-  if (dst->index == lir::NoRegister) {
-    emit(c, stwu(src->low, dst->base, dst->offset));
-  } else {
-    assert(c, dst->offset == 0);
-    assert(c, dst->scale == 1);
-    
-    emit(c, stwux(src->low, dst->base, dst->index));
-  }
-}
-
-void
-load(Context* c, unsigned srcSize, int base, int offset, int index,
-     unsigned scale, unsigned dstSize, lir::Register* dst,
-     bool preserveIndex, bool signExtend)
-{
-  if (index != lir::NoRegister) {
-    bool release;
-    int normalized = normalize
-      (c, offset, index, scale, &preserveIndex, &release);
-
-    switch (srcSize) {
-    case 1:
-      emit(c, lbzx(dst->low, base, normalized));
-      if (signExtend) {
-        emit(c, extsb(dst->low, dst->low));
-      }
-      break;
-
-    case 2:
-      if (signExtend) {
-        emit(c, lhax(dst->low, base, normalized));
-      } else {
-        emit(c, lhzx(dst->low, base, normalized));
-      }
-      break;
-
-    case 4:
-    case 8: {
-      if (srcSize == 4 and dstSize == 8) {
-        load(c, 4, base, 0, normalized, 1, 4, dst, preserveIndex, false);
-        moveRR(c, 4, dst, 8, dst);
-      } else if (srcSize == 8 and dstSize == 8) {
-        lir::Register dstHigh(dst->high);
-        load(c, 4, base, 0, normalized, 1, 4, &dstHigh, preserveIndex, false);
-        load(c, 4, base, 4, normalized, 1, 4, dst, preserveIndex, false);
-      } else {
-        emit(c, lwzx(dst->low, base, normalized));
-      }
-    } break;
-
-    default: abort(c);
-    }
-
-    if (release) c->client->releaseTemporary(normalized);
-  } else {
-    switch (srcSize) {
-    case 1:
-      emit(c, lbz(dst->low, base, offset));
-      if (signExtend) {
-        emit(c, extsb(dst->low, dst->low));
-      }
-      break;
-
-    case 2:
-      if (signExtend) {
-        emit(c, lha(dst->low, base, offset));
-      } else {
-        emit(c, lha(dst->low, base, offset));
-      }
-      break;
-
-    case 4:
-      emit(c, lwz(dst->low, base, offset));
-      break;
-
-    case 8: {
-      if (dstSize == 8) {
-        lir::Register dstHigh(dst->high);
-        load(c, 4, base, offset, lir::NoRegister, 1, 4, &dstHigh, false, false);
-        load(c, 4, base, offset + 4, lir::NoRegister, 1, 4, dst, false, false);
-      } else {
-        emit(c, lwzx(dst->low, base, offset));
-      }
-    } break;
-
-    default: abort(c);
-    }
-  }
-}
-
-void
-moveMR(Context* c, unsigned srcSize, lir::Memory* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  load(c, srcSize, src->base, src->offset, src->index, src->scale,
-       dstSize, dst, true, true);
-}
-
-void
-moveZMR(Context* c, unsigned srcSize, lir::Memory* src,
-        unsigned dstSize, lir::Register* dst)
-{
-  load(c, srcSize, src->base, src->offset, src->index, src->scale,
-       dstSize, dst, true, false);
-}
-
-void
-andR(Context* c, unsigned size, lir::Register* a,
-     lir::Register* b, lir::Register* dst)
-{
-  if (size == 8) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-    lir::Register dh(dst->high);
-    
-    andR(c, 4, a, b, dst);
-    andR(c, 4, &ah, &bh, &dh);
-  } else {
-    emit(c, and_(dst->low, a->low, b->low));
-  }
-}
-
-void
-andC(Context* c, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst)
-{
-  int64_t v = a->value->value();
-
-  if (size == 8) {
-    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-    lir::Constant ah(&high);
-
-    ResolvedPromise low(v & 0xFFFFFFFF);
-    lir::Constant al(&low);
-
-    lir::Register bh(b->high);
-    lir::Register dh(dst->high);
-
-    andC(c, 4, &al, b, dst);
-    andC(c, 4, &ah, &bh, &dh);
-  } else {
-    // bitmasks of the form regex 0*1*0* can be handled in a single
-    // rlwinm instruction, hence the following:
-
-    uint32_t v32 = static_cast<uint32_t>(v);
-    unsigned state = 0;
-    unsigned start = 0;
-    unsigned end = 31;
-    for (unsigned i = 0; i < 32; ++i) {
-      unsigned bit = (v32 >> i) & 1;
-      switch (state) {
-      case 0:
-        if (bit) {
-          start = i;
-          state = 1;
-        }
-        break;
-
-      case 1:
-        if (bit == 0) {
-          end = i - 1;
-          state = 2;
-        }
-        break;
-
-      case 2:
-        if (bit) {
-          // not in 0*1*0* form.  We can only use andi(s) if either
-          // the topmost or bottommost 16 bits are zero.
-
-          if ((v32 >> 16) == 0) {
-            emit(c, andi(dst->low, b->low, v32));
-          } else if ((v32 & 0xFFFF) == 0) {
-            emit(c, andis(dst->low, b->low, v32 >> 16));
-          } else {
-            bool useTemporary = b->low == dst->low;
-            lir::Register tmp(dst->low);
-            if (useTemporary) {
-              tmp.low = c->client->acquireTemporary();
-            }
-
-            moveCR(c, 4, a, 4, &tmp);
-            andR(c, 4, b, &tmp, dst);
-
-            if (useTemporary) {
-              c->client->releaseTemporary(tmp.low);
-            }
-          }
-          return;
-        }
-        break;
-      }
-    }
-
-    if (state) {
-      if (start != 0 or end != 31) {
-        emit(c, rlwinm(dst->low, b->low, 0, 31 - end, 31 - start));
-      } else {
-        moveRR(c, 4, b, 4, dst);
-      }
-    } else {
-      emit(c, li(dst->low, 0));
-    }
-  }
-}
-
-void
-orR(Context* c, unsigned size, lir::Register* a,
-    lir::Register* b, lir::Register* dst)
-{
-  if (size == 8) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-    lir::Register dh(dst->high);
-    
-    orR(c, 4, a, b, dst);
-    orR(c, 4, &ah, &bh, &dh);
-  } else {
-    emit(c, or_(dst->low, a->low, b->low));
-  }
-}
-
-void
-orC(Context* c, unsigned size, lir::Constant* a,
-    lir::Register* b, lir::Register* dst)
-{
-  int64_t v = a->value->value();
-
-  if (size == 8) {
-    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-    lir::Constant ah(&high);
-
-    ResolvedPromise low(v & 0xFFFFFFFF);
-    lir::Constant al(&low);
-
-    lir::Register bh(b->high);
-    lir::Register dh(dst->high);
-
-    orC(c, 4, &al, b, dst);
-    orC(c, 4, &ah, &bh, &dh);
-  } else {
-    emit(c, ori(b->low, dst->low, v));
-    if (v >> 16) {
-      emit(c, oris(dst->low, dst->low, v >> 16));
-    }
-  }
-}
-
-void
-xorR(Context* c, unsigned size, lir::Register* a,
-     lir::Register* b, lir::Register* dst)
-{
-  if (size == 8) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-    lir::Register dh(dst->high);
-    
-    xorR(c, 4, a, b, dst);
-    xorR(c, 4, &ah, &bh, &dh);
-  } else {
-    emit(c, xor_(dst->low, a->low, b->low));
-  }
-}
-
-void
-xorC(Context* c, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst)
-{
-  uint64_t v = a->value->value();
-
-  if (size == 8) {
-    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-    lir::Constant ah(&high);
-
-    ResolvedPromise low(v & 0xFFFFFFFF);
-    lir::Constant al(&low);
-
-    lir::Register bh(b->high);
-    lir::Register dh(dst->high);
-
-    xorC(c, 4, &al, b, dst);
-    xorC(c, 4, &ah, &bh, &dh);
-  } else {
-    if (v >> 16) {
-      emit(c, xoris(b->low, dst->low, v >> 16));
-      emit(c, xori(dst->low, dst->low, v));
-    } else {
-      emit(c, xori(b->low, dst->low, v));
-    }
-  }
-}
-
-void
-moveAR2(Context* c, unsigned srcSize UNUSED, lir::Address* src,
-        unsigned dstSize, lir::Register* dst, unsigned promiseOffset)
-{
-  assert(c, srcSize == 4 and dstSize == 4);
-
-  lir::Memory memory(dst->low, 0, -1, 0);
-  
-  appendImmediateTask
-    (c, src->address, offsetPromise(c), TargetBytesPerWord, promiseOffset, true);
-  
-  emit(c, lis(dst->low, 0));
-  moveMR(c, dstSize, &memory, dstSize, dst);
-}
-
-void
-moveAR(Context* c, unsigned srcSize, lir::Address* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  moveAR2(c, srcSize, src, dstSize, dst, 0);
-}
-
-void
-compareRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
-          unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == 4 and bSize == 4);
-  
-  emit(c, cmpw(b->low, a->low));
-}
-
-void
-compareCR(Context* c, unsigned aSize, lir::Constant* a,
-          unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == 4 and bSize == 4);
-
-  if (a->value->resolved() and isInt16(a->value->value())) {
-    emit(c, cmpwi(b->low, a->value->value()));
-  } else {
-    lir::Register tmp(c->client->acquireTemporary());
-    moveCR(c, aSize, a, bSize, &tmp);
-    compareRR(c, bSize, &tmp, bSize, b);
-    c->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-compareCM(Context* c, unsigned aSize, lir::Constant* a,
-          unsigned bSize, lir::Memory* b)
-{
-  assert(c, aSize == 4 and bSize == 4);
-
-  lir::Register tmp(c->client->acquireTemporary());
-  moveMR(c, bSize, b, bSize, &tmp);
-  compareCR(c, aSize, a, bSize, &tmp);
-  c->client->releaseTemporary(tmp.low);
-}
-
-void
-compareRM(Context* c, unsigned aSize, lir::Register* a,
-          unsigned bSize, lir::Memory* b)
-{
-  assert(c, aSize == 4 and bSize == 4);
-
-  lir::Register tmp(c->client->acquireTemporary());
-  moveMR(c, bSize, b, bSize, &tmp);
-  compareRR(c, aSize, a, bSize, &tmp);
-  c->client->releaseTemporary(tmp.low);
-}
-
-void
-compareUnsignedRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
-                  unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(c, aSize == 4 and bSize == 4);
-  
-  emit(c, cmplw(b->low, a->low));
-}
-
-void
-compareUnsignedCR(Context* c, unsigned aSize, lir::Constant* a,
-                  unsigned bSize, lir::Register* b)
-{
-  assert(c, aSize == 4 and bSize == 4);
-
-  if (a->value->resolved() and (a->value->value() >> 16) == 0) {
-    emit(c, cmplwi(b->low, a->value->value()));
-  } else {
-    lir::Register tmp(c->client->acquireTemporary());
-    moveCR(c, aSize, a, bSize, &tmp);
-    compareUnsignedRR(c, bSize, &tmp, bSize, b);
-    c->client->releaseTemporary(tmp.low);
-  }
-}
-
-int32_t
-branch(Context* c, lir::TernaryOperation op)
-{
-  switch (op) {
-  case lir::JumpIfEqual:
-    return beq(0);
-    
-  case lir::JumpIfNotEqual:
-    return bne(0);
-    
-  case lir::JumpIfLess:
-    return blt(0);
-    
-  case lir::JumpIfGreater:
-    return bgt(0);
-    
-  case lir::JumpIfLessOrEqual:
-    return ble(0);
-    
-  case lir::JumpIfGreaterOrEqual:
-    return bge(0);
-    
-  default:
-    abort(c);
-  }
-}
-
-void
-conditional(Context* c, int32_t branch, lir::Constant* target)
-{
-  appendOffsetTask(c, target->value, offsetPromise(c), true);
-  emit(c, branch);
-}
-
-void
-branch(Context* c, lir::TernaryOperation op, lir::Constant* target)
-{
-  conditional(c, branch(c, op), target);
-}
-
-void
-branchLong(Context* c, lir::TernaryOperation op, lir::Operand* al,
-           lir::Operand* ah, lir::Operand* bl,
-           lir::Operand* bh, lir::Constant* target,
-           BinaryOperationType compareSigned,
-           BinaryOperationType compareUnsigned)
-{
-  compareSigned(c, 4, ah, 4, bh);
-
-  unsigned next = 0;
-  
-  switch (op) {
-  case lir::JumpIfEqual:
-    next = c->code.length();
-    emit(c, bne(0));
-
-    compareSigned(c, 4, al, 4, bl);
-    conditional(c, beq(0), target);
-    break;
-
-  case lir::JumpIfNotEqual:
-    conditional(c, bne(0), target);
-
-    compareSigned(c, 4, al, 4, bl);
-    conditional(c, bne(0), target);
-    break;
-
-  case lir::JumpIfLess:
-    conditional(c, blt(0), target);
-
-    next = c->code.length();
-    emit(c, bgt(0));
-
-    compareUnsigned(c, 4, al, 4, bl);
-    conditional(c, blt(0), target);
-    break;
-
-  case lir::JumpIfGreater:
-    conditional(c, bgt(0), target);
-
-    next = c->code.length();
-    emit(c, blt(0));
-
-    compareUnsigned(c, 4, al, 4, bl);
-    conditional(c, bgt(0), target);
-    break;
-
-  case lir::JumpIfLessOrEqual:
-    conditional(c, blt(0), target);
-
-    next = c->code.length();
-    emit(c, bgt(0));
-
-    compareUnsigned(c, 4, al, 4, bl);
-    conditional(c, ble(0), target);
-    break;
-
-  case lir::JumpIfGreaterOrEqual:
-    conditional(c, bgt(0), target);
-
-    next = c->code.length();
-    emit(c, blt(0));
-
-    compareUnsigned(c, 4, al, 4, bl);
-    conditional(c, bge(0), target);
-    break;
-
-  default:
-    abort(c);
-  }
-
-  if (next) {
-    updateOffset
-      (c->s, c->code.data + next, true, reinterpret_cast<intptr_t>
-       (c->code.data + c->code.length()), 0);
-  }
-}
-
-void
-branchRR(Context* c, lir::TernaryOperation op, unsigned size,
-         lir::Register* a, lir::Register* b,
-         lir::Constant* target)
-{
-  if (size > TargetBytesPerWord) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    branchLong(c, op, a, &ah, b, &bh, target, CAST2(compareRR),
-               CAST2(compareUnsignedRR));
-  } else {
-    compareRR(c, size, a, size, b);
-    branch(c, op, target);
-  }
-}
-
-void
-branchCR(Context* c, lir::TernaryOperation op, unsigned size,
-         lir::Constant* a, lir::Register* b,
-         lir::Constant* target)
-{
-  if (size > TargetBytesPerWord) {
-    int64_t v = a->value->value();
-
-    ResolvedPromise low(v & ~static_cast<target_uintptr_t>(0));
-    lir::Constant al(&low);
-
-    ResolvedPromise high((v >> 32) & ~static_cast<target_uintptr_t>(0));
-    lir::Constant ah(&high);
-
-    lir::Register bh(b->high);
-
-    branchLong(c, op, &al, &ah, b, &bh, target, CAST2(compareCR),
-               CAST2(compareUnsignedCR));
-  } else {
-    compareCR(c, size, a, size, b);
-    branch(c, op, target);
-  }
-}
-
-void
-branchRM(Context* c, lir::TernaryOperation op, unsigned size,
-         lir::Register* a, lir::Memory* b,
-         lir::Constant* target)
-{
-  assert(c, size <= TargetBytesPerWord);
-
-  compareRM(c, size, a, size, b);
-  branch(c, op, target);
-}
-
-void
-branchCM(Context* c, lir::TernaryOperation op, unsigned size,
-         lir::Constant* a, lir::Memory* b,
-         lir::Constant* target)
-{
-  assert(c, size <= TargetBytesPerWord);
-
-  compareCM(c, size, a, size, b);
-  branch(c, op, target);
-}
-
-ShiftMaskPromise*
-shiftMaskPromise(Context* c, Promise* base, unsigned shift, int64_t mask)
-{
-  return new (c->zone) ShiftMaskPromise(base, shift, mask);
-}
-
-void
-moveCM(Context* c, unsigned srcSize, lir::Constant* src,
-       unsigned dstSize, lir::Memory* dst)
-{
-  switch (dstSize) {
-  case 8: {
-    lir::Constant srcHigh
-      (shiftMaskPromise(c, src->value, 32, 0xFFFFFFFF));
-    lir::Constant srcLow
-      (shiftMaskPromise(c, src->value, 0, 0xFFFFFFFF));
-    
-    lir::Memory dstLow
-      (dst->base, dst->offset + 4, dst->index, dst->scale);
-    
-    moveCM(c, 4, &srcLow, 4, &dstLow);
-    moveCM(c, 4, &srcHigh, 4, dst);
-  } break;
-
-  default:
-    lir::Register tmp(c->client->acquireTemporary());
-    moveCR(c, srcSize, src, dstSize, &tmp);
-    moveRM(c, dstSize, &tmp, dstSize, dst);
-    c->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-negateRR(Context* c, unsigned srcSize, lir::Register* src,
-         unsigned dstSize UNUSED, lir::Register* dst)
-{
-  assert(c, srcSize == dstSize);
-
-  if (srcSize == 8) {
-    lir::Register dstHigh(dst->high);
-
-    emit(c, subfic(dst->low, src->low, 0));
-    emit(c, subfze(dst->high, src->high));
-  } else {
-    emit(c, neg(dst->low, src->low));
-  }
-}
-
-void
-callR(Context* c, unsigned size UNUSED, lir::Register* target)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  emit(c, mtctr(target->low));
-  emit(c, bctrl());
-}
-
-void
-callC(Context* c, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  appendOffsetTask(c, target->value, offsetPromise(c), false);
-  emit(c, bl(0));
-}
-
-void
-longCallC(Context* c, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  lir::Register tmp(0);
-  moveCR2(c, TargetBytesPerWord, target, TargetBytesPerWord, &tmp, 12);
-  callR(c, TargetBytesPerWord, &tmp);
-}
-
-void
-alignedLongCallC(Context* c, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  lir::Register tmp(c->client->acquireTemporary());
-  lir::Address address(appendConstantPoolEntry(c, target->value));
-  moveAR2(c, TargetBytesPerWord, &address, TargetBytesPerWord, &tmp, 12);
-  callR(c, TargetBytesPerWord, &tmp);
-  c->client->releaseTemporary(tmp.low);
-}
-
-void
-longJumpC(Context* c, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  lir::Register tmp(0);
-  moveCR2(c, TargetBytesPerWord, target, TargetBytesPerWord, &tmp, 12);
-  jumpR(c, TargetBytesPerWord, &tmp);
-}
-
-void
-alignedLongJumpC(Context* c, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  lir::Register tmp(c->client->acquireTemporary());
-  lir::Address address(appendConstantPoolEntry(c, target->value));
-  moveAR2(c, TargetBytesPerWord, &address, TargetBytesPerWord, &tmp, 12);
-  jumpR(c, TargetBytesPerWord, &tmp);
-  c->client->releaseTemporary(tmp.low);
-}
-
-void
-jumpC(Context* c, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(c, size == TargetBytesPerWord);
-
-  appendOffsetTask(c, target->value, offsetPromise(c), false);
-  emit(c, b(0));
-}
-
-void
-return_(Context* c)
-{
-  emit(c, blr());
-}
-
-void
-trap(Context* c)
-{
-  emit(c, isa::trap());
-}
-
-void
-memoryBarrier(Context* c)
-{
-  emit(c, sync(0));
-}
-
 // END OPERATION COMPILERS
 
 unsigned
@@ -1753,95 +241,6 @@ nextFrame(ArchitectureContext* c UNUSED, int32_t* start, unsigned size,
   *stack = static_cast<void**>(*stack) + offset;
 }
 
-void
-populateTables(ArchitectureContext* c)
-{
-  const lir::OperandType C = lir::ConstantOperand;
-  const lir::OperandType A = lir::AddressOperand;
-  const lir::OperandType R = lir::RegisterOperand;
-  const lir::OperandType M = lir::MemoryOperand;
-
-  OperationType* zo = c->operations;
-  UnaryOperationType* uo = c->unaryOperations;
-  BinaryOperationType* bo = c->binaryOperations;
-  TernaryOperationType* to = c->ternaryOperations;
-  BranchOperationType* bro = c->branchOperations;
-
-  zo[lir::Return] = return_;
-  zo[lir::LoadBarrier] = memoryBarrier;
-  zo[lir::StoreStoreBarrier] = memoryBarrier;
-  zo[lir::StoreLoadBarrier] = memoryBarrier;
-  zo[lir::Trap] = trap;
-
-  uo[index(c, lir::LongCall, C)] = CAST1(longCallC);
-
-  uo[index(c, lir::AlignedLongCall, C)] = CAST1(alignedLongCallC);
-
-  uo[index(c, lir::LongJump, C)] = CAST1(longJumpC);
-
-  uo[index(c, lir::AlignedLongJump, C)] = CAST1(alignedLongJumpC);
-
-  uo[index(c, lir::Jump, R)] = CAST1(jumpR);
-  uo[index(c, lir::Jump, C)] = CAST1(jumpC);
-
-  uo[index(c, lir::AlignedJump, R)] = CAST1(jumpR);
-  uo[index(c, lir::AlignedJump, C)] = CAST1(jumpC);
-
-  uo[index(c, lir::Call, C)] = CAST1(callC);
-  uo[index(c, lir::Call, R)] = CAST1(callR);
-
-  uo[index(c, lir::AlignedCall, C)] = CAST1(callC);
-  uo[index(c, lir::AlignedCall, R)] = CAST1(callR);
-
-  bo[index(c, lir::Move, R, R)] = CAST2(moveRR);
-  bo[index(c, lir::Move, C, R)] = CAST2(moveCR);
-  bo[index(c, lir::Move, C, M)] = CAST2(moveCM);
-  bo[index(c, lir::Move, M, R)] = CAST2(moveMR);
-  bo[index(c, lir::Move, R, M)] = CAST2(moveRM);
-  bo[index(c, lir::Move, A, R)] = CAST2(moveAR);
-
-  bo[index(c, lir::MoveZ, R, R)] = CAST2(moveZRR);
-  bo[index(c, lir::MoveZ, M, R)] = CAST2(moveZMR);
-  bo[index(c, lir::MoveZ, C, R)] = CAST2(moveCR);
-
-  bo[index(c, lir::Negate, R, R)] = CAST2(negateRR);
-
-  to[index(c, lir::Add, R)] = CAST3(addR);
-  to[index(c, lir::Add, C)] = CAST3(addC);
-
-  to[index(c, lir::Subtract, R)] = CAST3(subR);
-  to[index(c, lir::Subtract, C)] = CAST3(subC);
-
-  to[index(c, lir::Multiply, R)] = CAST3(multiplyR);
-
-  to[index(c, lir::Divide, R)] = CAST3(divideR);
-
-  to[index(c, lir::Remainder, R)] = CAST3(remainderR);
-
-  to[index(c, lir::ShiftLeft, R)] = CAST3(shiftLeftR);
-  to[index(c, lir::ShiftLeft, C)] = CAST3(shiftLeftC);
-
-  to[index(c, lir::ShiftRight, R)] = CAST3(shiftRightR);
-  to[index(c, lir::ShiftRight, C)] = CAST3(shiftRightC);
-
-  to[index(c, lir::UnsignedShiftRight, R)] = CAST3(unsignedShiftRightR);
-  to[index(c, lir::UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC);
-
-  to[index(c, lir::And, C)] = CAST3(andC);
-  to[index(c, lir::And, R)] = CAST3(andR);
-
-  to[index(c, lir::Or, C)] = CAST3(orC);
-  to[index(c, lir::Or, R)] = CAST3(orR);
-
-  to[index(c, lir::Xor, C)] = CAST3(xorC);
-  to[index(c, lir::Xor, R)] = CAST3(xorR);
-
-  bro[branchIndex(c, R, R)] = CAST_BRANCH(branchRR);
-  bro[branchIndex(c, C, R)] = CAST_BRANCH(branchCR);
-  bro[branchIndex(c, C, M)] = CAST_BRANCH(branchCM);
-  bro[branchIndex(c, R, M)] = CAST_BRANCH(branchRM);
-}
-
 class MyArchitecture: public Assembler::Architecture {
  public:
   MyArchitecture(System* system): c(system), referenceCount(0) {
diff --git a/src/codegen/powerpc/fixup.cpp b/src/codegen/powerpc/fixup.cpp
new file mode 100644
index 0000000000..eb8a4aac6d
--- /dev/null
+++ b/src/codegen/powerpc/fixup.cpp
@@ -0,0 +1,242 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "block.h"
+#include "fixup.h"
+#include "encode.h"
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+using namespace isa;
+
+unsigned padding(MyBlock*, unsigned);
+
+inline int ha16(int32_t i);
+
+bool bounded(int right, int left, int32_t v) {
+  return ((v << left) >> left) == v and ((v >> right) << right) == v;
+}
+
+OffsetPromise::OffsetPromise(Context* c, MyBlock* block, unsigned offset):
+  c(c), block(block), offset(offset)
+{ }
+
+bool OffsetPromise::resolved() {
+  return block->resolved;
+}
+
+int64_t OffsetPromise::value() {
+  assert(c, resolved());
+
+  unsigned o = offset - block->offset;
+  return block->start + padding(block, o) + o;
+}
+
+Promise* offsetPromise(Context* c) {
+  return new(c->zone) OffsetPromise(c, c->lastBlock, c->code.length());
+}
+
+void* updateOffset(vm::System* s, uint8_t* instruction, bool conditional, int64_t value,
+             void* jumpAddress)
+{
+  int32_t v = reinterpret_cast<uint8_t*>(value) - instruction;
+   
+  int32_t mask;
+  if (conditional) {
+    if (not bounded(2, 16, v)) {
+      *static_cast<uint32_t*>(jumpAddress) = isa::b(0);
+      updateOffset(s, static_cast<uint8_t*>(jumpAddress), false, value, 0);
+
+      v = static_cast<uint8_t*>(jumpAddress) - instruction;
+
+      expect(s, bounded(2, 16, v));
+    }
+    mask = 0xFFFC;
+  } else {
+    expect(s, bounded(2, 6, v));
+    mask = 0x3FFFFFC;
+  }
+
+  int32_t* p = reinterpret_cast<int32_t*>(instruction);
+  *p = vm::targetV4((v & mask) | ((~mask) & vm::targetV4(*p)));
+
+  return instruction + 4;
+}
+
+OffsetListener::OffsetListener(vm::System* s, uint8_t* instruction, bool conditional,
+               void* jumpAddress):
+  s(s),
+  instruction(instruction),
+  jumpAddress(jumpAddress),
+  conditional(conditional)
+{ }
+
+bool OffsetListener::resolve(int64_t value, void** location) {
+  void* p = updateOffset(s, instruction, conditional, value, jumpAddress);
+  if (location) *location = p;
+  return false;
+}
+
+OffsetTask::OffsetTask(Task* next, Promise* promise, Promise* instructionOffset,
+           bool conditional):
+  Task(next),
+  promise(promise),
+  instructionOffset(instructionOffset),
+  jumpAddress(0),
+  conditional(conditional)
+{ }
+
+void OffsetTask::run(Context* c) {
+  if (promise->resolved()) {
+    updateOffset
+      (c->s, c->result + instructionOffset->value(), conditional,
+       promise->value(), jumpAddress);
+  } else {
+    new (promise->listen(sizeof(OffsetListener)))
+      OffsetListener(c->s, c->result + instructionOffset->value(),
+                     conditional, jumpAddress);
+  }
+}
+
+JumpOffset::JumpOffset(MyBlock* block, OffsetTask* task, unsigned offset):
+  block(block), task(task), next(0), offset(offset)
+{ }
+
+JumpEvent::JumpEvent(JumpOffset* jumpOffsetHead, JumpOffset* jumpOffsetTail,
+          unsigned offset):
+  jumpOffsetHead(jumpOffsetHead), jumpOffsetTail(jumpOffsetTail), next(0),
+  offset(offset)
+{ }
+
+void appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset,
+                 bool conditional)
+{
+  OffsetTask* task = new(c->zone) OffsetTask(c->tasks, promise, instructionOffset, conditional);
+
+  c->tasks = task;
+
+  if (conditional) {
+    JumpOffset* offset =
+      new(c->zone) JumpOffset(c->lastBlock, task, c->code.length() - c->lastBlock->offset);
+
+    if (c->lastBlock->jumpOffsetTail) {
+      c->lastBlock->jumpOffsetTail->next = offset;
+    } else {
+      c->lastBlock->jumpOffsetHead = offset;
+    }
+    c->lastBlock->jumpOffsetTail = offset;
+  }
+}
+
+void appendJumpEvent(Context* c, MyBlock* b, unsigned offset, JumpOffset* head,
+                JumpOffset* tail)
+{
+  JumpEvent* e = new(c->zone) JumpEvent
+    (head, tail, offset);
+
+  if (b->jumpEventTail) {
+    b->jumpEventTail->next = e;
+  } else {
+    b->jumpEventHead = e;
+  }
+  b->jumpEventTail = e;
+}
+
+ShiftMaskPromise* shiftMaskPromise(Context* c, Promise* base, unsigned shift, int64_t mask) {
+  return new (c->zone) ShiftMaskPromise(base, shift, mask);
+}
+
+void
+updateImmediate(vm::System* s, void* dst, int32_t src, unsigned size, bool address)
+{
+  switch (size) {
+  case 4: {
+    int32_t* p = static_cast<int32_t*>(dst);
+    int r = (vm::targetV4(p[1]) >> 21) & 31;
+
+    if (address) {
+      p[0] = vm::targetV4(lis(r, ha16(src)));
+      p[1] |= vm::targetV4(src & 0xFFFF);
+    } else {
+      p[0] = vm::targetV4(lis(r, src >> 16));
+      p[1] = vm::targetV4(ori(r, r, src));
+    }
+  } break;
+
+  default: abort(s);
+  }
+}
+
+ImmediateListener::ImmediateListener(vm::System* s, void* dst, unsigned size, unsigned offset,
+                  bool address):
+  s(s), dst(dst), size(size), offset(offset), address(address)
+{ }
+
+bool ImmediateListener::resolve(int64_t value, void** location) {
+  updateImmediate(s, dst, value, size, address);
+  if (location) *location = static_cast<uint8_t*>(dst) + offset;
+  return false;
+}
+
+ImmediateTask::ImmediateTask(Task* next, Promise* promise, Promise* offset, unsigned size,
+              unsigned promiseOffset, bool address):
+  Task(next),
+  promise(promise),
+  offset(offset),
+  size(size),
+  promiseOffset(promiseOffset),
+  address(address)
+{ }
+
+void ImmediateTask::run(Context* c) {
+  if (promise->resolved()) {
+    updateImmediate
+      (c->s, c->result + offset->value(), promise->value(), size, address);
+  } else {
+    new (promise->listen(sizeof(ImmediateListener))) ImmediateListener
+      (c->s, c->result + offset->value(), size, promiseOffset, address);
+  }
+}
+
+void
+appendImmediateTask(Context* c, Promise* promise, Promise* offset,
+                    unsigned size, unsigned promiseOffset, bool address)
+{
+  c->tasks = new(c->zone) ImmediateTask(c->tasks, promise, offset, size, promiseOffset, address);
+}
+
+ConstantPoolEntry::ConstantPoolEntry(Context* c, Promise* constant):
+  c(c), constant(constant), next(c->constantPool), address(0)
+{
+  c->constantPool = this;
+  ++ c->constantPoolCount;
+}
+
+int64_t ConstantPoolEntry::value() {
+  assert(c, resolved());
+
+  return reinterpret_cast<intptr_t>(address);
+}
+
+bool ConstantPoolEntry::resolved() {
+  return address != 0;
+}
+
+ConstantPoolEntry* appendConstantPoolEntry(Context* c, Promise* constant) {
+  return new (c->zone) ConstantPoolEntry(c, constant);
+}
+
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/powerpc/fixup.h b/src/codegen/powerpc/fixup.h
index 42fa62afc7..a51e5f6d5b 100644
--- a/src/codegen/powerpc/fixup.h
+++ b/src/codegen/powerpc/fixup.h
@@ -25,6 +25,134 @@ class Task {
   Task* next;
 };
 
+class OffsetPromise: public Promise {
+ public:
+  OffsetPromise(Context* c, MyBlock* block, unsigned offset);
+
+  virtual bool resolved();
+  
+  virtual int64_t value();
+
+  Context* c;
+  MyBlock* block;
+  unsigned offset;
+};
+
+Promise* offsetPromise(Context* c);
+
+void*
+updateOffset(vm::System* s, uint8_t* instruction, bool conditional, int64_t value,
+             void* jumpAddress);
+
+class OffsetListener: public Promise::Listener {
+ public:
+  OffsetListener(vm::System* s, uint8_t* instruction, bool conditional,
+                 void* jumpAddress);
+
+  virtual bool resolve(int64_t value, void** location);
+
+  vm::System* s;
+  uint8_t* instruction;
+  void* jumpAddress;
+  bool conditional;
+};
+
+class OffsetTask: public Task {
+ public:
+  OffsetTask(Task* next, Promise* promise, Promise* instructionOffset,
+             bool conditional);
+
+  virtual void run(Context* c);
+
+  Promise* promise;
+  Promise* instructionOffset;
+  void* jumpAddress;
+  bool conditional;
+};
+
+class JumpOffset {
+ public:
+  JumpOffset(MyBlock* block, OffsetTask* task, unsigned offset);
+
+  MyBlock* block;
+  OffsetTask* task;
+  JumpOffset* next;
+  unsigned offset;  
+};
+
+class JumpEvent {
+ public:
+  JumpEvent(JumpOffset* jumpOffsetHead, JumpOffset* jumpOffsetTail,
+            unsigned offset);
+
+  JumpOffset* jumpOffsetHead;
+  JumpOffset* jumpOffsetTail;
+  JumpEvent* next;
+  unsigned offset;
+};
+
+void appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset,
+                 bool conditional);
+
+void appendJumpEvent(Context* c, MyBlock* b, unsigned offset, JumpOffset* head,
+                JumpOffset* tail);
+
+ShiftMaskPromise* shiftMaskPromise(Context* c, Promise* base, unsigned shift, int64_t mask);
+
+void updateImmediate(vm::System* s, void* dst, int32_t src, unsigned size, bool address);
+
+class ImmediateListener: public Promise::Listener {
+ public:
+  ImmediateListener(vm::System* s, void* dst, unsigned size, unsigned offset,
+                    bool address);
+
+  virtual bool resolve(int64_t value, void** location);
+
+  vm::System* s;
+  void* dst;
+  unsigned size;
+  unsigned offset;
+  bool address;
+};
+
+class ImmediateTask: public Task {
+ public:
+  ImmediateTask(Task* next, Promise* promise, Promise* offset, unsigned size,
+                unsigned promiseOffset, bool address);
+
+  virtual void run(Context* c);
+
+  Promise* promise;
+  Promise* offset;
+  unsigned size;
+  unsigned promiseOffset;
+  bool address;
+};
+
+void
+appendImmediateTask(Context* c, Promise* promise, Promise* offset,
+                    unsigned size, unsigned promiseOffset, bool address);
+
+class ConstantPoolEntry: public Promise {
+ public:
+  ConstantPoolEntry(Context* c, Promise* constant);
+
+  virtual int64_t value();
+
+  virtual bool resolved();
+
+  Context* c;
+  Promise* constant;
+  ConstantPoolEntry* next;
+  void* address;
+};
+
+ConstantPoolEntry* appendConstantPoolEntry(Context* c, Promise* constant);
+
+inline int ha16(int32_t i) { 
+    return ((i >> 16) + ((i & 0x8000) ? 1 : 0)) & 0xffff;
+}
+
 } // namespace powerpc
 } // namespace codegen
 } // namespace avian
diff --git a/src/codegen/powerpc/multimethod.cpp b/src/codegen/powerpc/multimethod.cpp
new file mode 100644
index 0000000000..79b5f9bf7b
--- /dev/null
+++ b/src/codegen/powerpc/multimethod.cpp
@@ -0,0 +1,111 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "block.h"
+#include "common.h"
+
+#include "operations.h"
+#include "multimethod.h"
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+void populateTables(ArchitectureContext* c) {
+  const lir::OperandType C = lir::ConstantOperand;
+  const lir::OperandType A = lir::AddressOperand;
+  const lir::OperandType R = lir::RegisterOperand;
+  const lir::OperandType M = lir::MemoryOperand;
+
+  OperationType* zo = c->operations;
+  UnaryOperationType* uo = c->unaryOperations;
+  BinaryOperationType* bo = c->binaryOperations;
+  TernaryOperationType* to = c->ternaryOperations;
+  BranchOperationType* bro = c->branchOperations;
+
+  zo[lir::Return] = return_;
+  zo[lir::LoadBarrier] = memoryBarrier;
+  zo[lir::StoreStoreBarrier] = memoryBarrier;
+  zo[lir::StoreLoadBarrier] = memoryBarrier;
+  zo[lir::Trap] = trap;
+
+  uo[index(c, lir::LongCall, C)] = CAST1(longCallC);
+
+  uo[index(c, lir::AlignedLongCall, C)] = CAST1(alignedLongCallC);
+
+  uo[index(c, lir::LongJump, C)] = CAST1(longJumpC);
+
+  uo[index(c, lir::AlignedLongJump, C)] = CAST1(alignedLongJumpC);
+
+  uo[index(c, lir::Jump, R)] = CAST1(jumpR);
+  uo[index(c, lir::Jump, C)] = CAST1(jumpC);
+
+  uo[index(c, lir::AlignedJump, R)] = CAST1(jumpR);
+  uo[index(c, lir::AlignedJump, C)] = CAST1(jumpC);
+
+  uo[index(c, lir::Call, C)] = CAST1(callC);
+  uo[index(c, lir::Call, R)] = CAST1(callR);
+
+  uo[index(c, lir::AlignedCall, C)] = CAST1(callC);
+  uo[index(c, lir::AlignedCall, R)] = CAST1(callR);
+
+  bo[index(c, lir::Move, R, R)] = CAST2(moveRR);
+  bo[index(c, lir::Move, C, R)] = CAST2(moveCR);
+  bo[index(c, lir::Move, C, M)] = CAST2(moveCM);
+  bo[index(c, lir::Move, M, R)] = CAST2(moveMR);
+  bo[index(c, lir::Move, R, M)] = CAST2(moveRM);
+  bo[index(c, lir::Move, A, R)] = CAST2(moveAR);
+
+  bo[index(c, lir::MoveZ, R, R)] = CAST2(moveZRR);
+  bo[index(c, lir::MoveZ, M, R)] = CAST2(moveZMR);
+  bo[index(c, lir::MoveZ, C, R)] = CAST2(moveCR);
+
+  bo[index(c, lir::Negate, R, R)] = CAST2(negateRR);
+
+  to[index(c, lir::Add, R)] = CAST3(addR);
+  to[index(c, lir::Add, C)] = CAST3(addC);
+
+  to[index(c, lir::Subtract, R)] = CAST3(subR);
+  to[index(c, lir::Subtract, C)] = CAST3(subC);
+
+  to[index(c, lir::Multiply, R)] = CAST3(multiplyR);
+
+  to[index(c, lir::Divide, R)] = CAST3(divideR);
+
+  to[index(c, lir::Remainder, R)] = CAST3(remainderR);
+
+  to[index(c, lir::ShiftLeft, R)] = CAST3(shiftLeftR);
+  to[index(c, lir::ShiftLeft, C)] = CAST3(shiftLeftC);
+
+  to[index(c, lir::ShiftRight, R)] = CAST3(shiftRightR);
+  to[index(c, lir::ShiftRight, C)] = CAST3(shiftRightC);
+
+  to[index(c, lir::UnsignedShiftRight, R)] = CAST3(unsignedShiftRightR);
+  to[index(c, lir::UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC);
+
+  to[index(c, lir::And, C)] = CAST3(andC);
+  to[index(c, lir::And, R)] = CAST3(andR);
+
+  to[index(c, lir::Or, C)] = CAST3(orC);
+  to[index(c, lir::Or, R)] = CAST3(orR);
+
+  to[index(c, lir::Xor, C)] = CAST3(xorC);
+  to[index(c, lir::Xor, R)] = CAST3(xorR);
+
+  bro[branchIndex(c, R, R)] = CAST_BRANCH(branchRR);
+  bro[branchIndex(c, C, R)] = CAST_BRANCH(branchCR);
+  bro[branchIndex(c, C, M)] = CAST_BRANCH(branchCM);
+  bro[branchIndex(c, R, M)] = CAST_BRANCH(branchRM);
+}
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/powerpc/multimethod.h b/src/codegen/powerpc/multimethod.h
new file mode 100644
index 0000000000..b2fe6f206d
--- /dev/null
+++ b/src/codegen/powerpc/multimethod.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_POWERPC_MULTIMETHOD_H
+#define AVIAN_CODEGEN_ASSEMBLER_POWERPC_MULTIMETHOD_H
+
+#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
+#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
+#define CAST3(x) reinterpret_cast<TernaryOperationType>(x)
+#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+
+inline unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
+{
+  return operation + (lir::UnaryOperationCount * operand);
+}
+
+inline unsigned index(ArchitectureContext*,
+      lir::BinaryOperation operation,
+      lir::OperandType operand1,
+      lir::OperandType operand2)
+{
+  return operation
+    + (lir::BinaryOperationCount * operand1)
+    + (lir::BinaryOperationCount * lir::OperandTypeCount * operand2);
+}
+
+inline unsigned index(ArchitectureContext* c UNUSED,
+      lir::TernaryOperation operation,
+      lir::OperandType operand1)
+{
+  assert(c, not isBranch(operation));
+
+  return operation + (lir::NonBranchTernaryOperationCount * operand1);
+}
+
+inline unsigned branchIndex(ArchitectureContext* c UNUSED, lir::OperandType operand1,
+            lir::OperandType operand2)
+{
+  return operand1 + (lir::OperandTypeCount * operand2);
+}
+
+void populateTables(ArchitectureContext* c);
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_POWERPC_MULTIMETHOD_H
diff --git a/src/codegen/powerpc/operations.cpp b/src/codegen/powerpc/operations.cpp
new file mode 100644
index 0000000000..8127e85591
--- /dev/null
+++ b/src/codegen/powerpc/operations.cpp
@@ -0,0 +1,1097 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "block.h"
+#include "common.h"
+#include "encode.h"
+#include "operations.h"
+#include "fixup.h"
+#include "multimethod.h"
+
+using namespace vm;
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+using namespace isa;
+using namespace util;
+
+const int64_t MASK_LO32 = 0x0ffffffff;
+const int     MASK_LO16 = 0x0ffff;
+const int     MASK_LO8  = 0x0ff;
+// inline int lo32(int64_t i) { return (int)(i & MASK_LO32); }
+// inline int hi32(int64_t i) { return lo32(i >> 32); }
+inline int lo16(int64_t i) { return (int)(i & MASK_LO16); }
+inline int hi16(int64_t i) { return lo16(i >> 16); }
+// inline int lo8(int64_t i) { return (int)(i & MASK_LO8); }
+// inline int hi8(int64_t i) { return lo8(i >> 8); }
+
+inline int carry16(target_intptr_t v) {
+  return static_cast<int16_t>(v) < 0 ? 1 : 0;
+}
+
+void andC(Context* c, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void shiftLeftR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if(size == 8) {
+    lir::Register Tmp(newTemp(con), newTemp(con)); lir::Register* tmp = &Tmp;
+    emit(con, subfic(tmp->high, a->low, 32));
+    emit(con, slw(t->high, b->high, a->low));
+    emit(con, srw(tmp->low, b->low, tmp->high));
+    emit(con, or_(t->high, t->high, tmp->low));
+    emit(con, addi(tmp->high, a->low, -32));
+    emit(con, slw(tmp->low, b->low, tmp->high));
+    emit(con, or_(t->high, t->high, tmp->low));
+    emit(con, slw(t->low, b->low, a->low));
+    freeTemp(con, tmp->high); freeTemp(con, tmp->low);
+  } else {
+    emit(con, slw(t->low, b->low, a->low));
+  }
+}
+
+void moveRR(Context* c, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst);
+
+void shiftLeftC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t) {
+  int sh = getValue(a);
+  if (size == 8) {
+    sh &= 0x3F;
+    if (sh) {
+      if (sh < 32) {
+        emit(con, rlwinm(t->high,b->high,sh,0,31-sh));
+        emit(con, rlwimi(t->high,b->low,sh,32-sh,31));
+        emit(con, slwi(t->low, b->low, sh));
+      } else {
+        emit(con, rlwinm(t->high,b->low,sh-32,0,63-sh));
+        emit(con, li(t->low,0));
+      }
+    } else {
+      moveRR(con, size, b, size, t);
+    }
+  } else {
+    emit(con, slwi(t->low, b->low, sh & 0x1F));
+  }
+}
+
+void shiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if(size == 8) {
+    lir::Register Tmp(newTemp(con), newTemp(con)); lir::Register* tmp = &Tmp;
+    emit(con, subfic(tmp->high, a->low, 32));
+    emit(con, srw(t->low, b->low, a->low));
+    emit(con, slw(tmp->low, b->high, tmp->high));
+    emit(con, or_(t->low, t->low, tmp->low));
+    emit(con, addic(tmp->high, a->low, -32));
+    emit(con, sraw(tmp->low, b->high, tmp->high));
+    emit(con, ble(8));
+    emit(con, ori(t->low, tmp->low, 0));
+    emit(con, sraw(t->high, b->high, a->low));
+    freeTemp(con, tmp->high); freeTemp(con, tmp->low);
+  } else {
+    emit(con, sraw(t->low, b->low, a->low));
+  }
+}
+
+void shiftRightC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t) {
+  int sh = getValue(a);
+  if(size == 8) {
+    sh &= 0x3F;
+    if (sh) {
+      if (sh < 32) {
+        emit(con, rlwinm(t->low,b->low,32-sh,sh,31));
+        emit(con, rlwimi(t->low,b->high,32-sh,0,sh-1));
+        emit(con, srawi(t->high,b->high,sh));
+      } else {
+        emit(con, srawi(t->high,b->high,31));
+        emit(con, srawi(t->low,b->high,sh-32));
+      }
+    } else {
+      moveRR(con, size, b, size, t);
+    }
+  } else {
+    emit(con, srawi(t->low, b->low, sh & 0x1F));
+  }
+}
+
+void unsignedShiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  emit(con, srw(t->low, b->low, a->low));
+  if(size == 8) {
+    lir::Register Tmp(newTemp(con), newTemp(con)); lir::Register* tmp = &Tmp;
+    emit(con, subfic(tmp->high, a->low, 32));
+    emit(con, slw(tmp->low, b->high, tmp->high));
+    emit(con, or_(t->low, t->low, tmp->low));
+    emit(con, addi(tmp->high, a->low, -32));
+    emit(con, srw(tmp->low, b->high, tmp->high));
+    emit(con, or_(t->low, t->low, tmp->low));
+    emit(con, srw(t->high, b->high, a->low));
+    freeTemp(con, tmp->high); freeTemp(con, tmp->low);
+  }
+}
+
+void unsignedShiftRightC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t) {
+  int sh = getValue(a);
+  if (size == 8) {
+    if (sh & 0x3F) {
+      if (sh == 32) {
+        lir::Register high(b->high);
+        moveRR(con, 4, &high, 4, t);
+        emit(con, li(t->high,0));
+      } else if (sh < 32) {
+        emit(con, srwi(t->low, b->low, sh));
+        emit(con, rlwimi(t->low,b->high,32-sh,0,sh-1));
+        emit(con, rlwinm(t->high,b->high,32-sh,sh,31));
+      } else {
+        emit(con, rlwinm(t->low,b->high,64-sh,sh-32,31));
+        emit(con, li(t->high,0));
+      }
+    } else {
+      moveRR(con, size, b, size, t);
+    }
+  } else {
+    if (sh & 0x1F) {
+      emit(con, srwi(t->low, b->low, sh & 0x1F));
+    } else {
+      moveRR(con, size, b, size, t);
+    }
+  }
+}
+
+void jumpR(Context* c, unsigned size UNUSED, lir::Register* target) {
+  assert(c, size == TargetBytesPerWord);
+
+  emit(c, mtctr(target->low));
+  emit(c, bctr());
+}
+
+void swapRR(Context* c, unsigned aSize, lir::Register* a,
+       unsigned bSize, lir::Register* b) {
+  assert(c, aSize == TargetBytesPerWord);
+  assert(c, bSize == TargetBytesPerWord);
+
+  lir::Register tmp(c->client->acquireTemporary());
+  moveRR(c, aSize, a, bSize, &tmp);
+  moveRR(c, bSize, b, aSize, a);
+  moveRR(c, bSize, &tmp, bSize, b);
+  c->client->releaseTemporary(tmp.low);
+}
+
+void moveRR(Context* c, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst) {
+  switch (srcSize) {
+  case 1:
+    emit(c, extsb(dst->low, src->low));
+    break;
+    
+  case 2:
+    emit(c, extsh(dst->low, src->low));
+    break;
+    
+  case 4:
+  case 8:
+    if (srcSize == 4 and dstSize == 8) {
+      moveRR(c, 4, src, 4, dst);
+      emit(c, srawi(dst->high, src->low, 31));
+    } else if (srcSize == 8 and dstSize == 8) {
+      lir::Register srcHigh(src->high);
+      lir::Register dstHigh(dst->high);
+
+      if (src->high == dst->low) {
+        if (src->low == dst->high) {
+          swapRR(c, 4, src, 4, dst);
+        } else {
+          moveRR(c, 4, &srcHigh, 4, &dstHigh);
+          moveRR(c, 4, src, 4, dst);
+        }
+      } else {
+        moveRR(c, 4, src, 4, dst);
+        moveRR(c, 4, &srcHigh, 4, &dstHigh);
+      }
+    } else if (src->low != dst->low) {
+      emit(c, mr(dst->low, src->low));
+    }
+    break;
+
+  default: abort(c);
+  }
+}
+
+void moveZRR(Context* c, unsigned srcSize, lir::Register* src,
+        unsigned, lir::Register* dst) {
+  switch (srcSize) {
+  case 2:
+    emit(c, andi(dst->low, src->low, 0xFFFF));
+    break;
+
+  default: abort(c);
+  }
+}
+
+void moveCR2(Context* c, unsigned, lir::Constant* src,
+       unsigned dstSize, lir::Register* dst, unsigned promiseOffset) {
+  if (dstSize <= 4) {
+    if (src->value->resolved()) {
+      int32_t v = src->value->value();
+      if (fitsInInt16(v)) {
+        emit(c, li(dst->low, v));
+      } else {
+        emit(c, lis(dst->low, v >> 16));
+        emit(c, ori(dst->low, dst->low, v));
+      }
+    } else {
+      appendImmediateTask
+        (c, src->value, offsetPromise(c), TargetBytesPerWord, promiseOffset, false);
+      emit(c, lis(dst->low, 0));
+      emit(c, ori(dst->low, dst->low, 0));
+    }
+  } else {
+    abort(c); // todo
+  }
+}
+
+void moveCR(Context* c, unsigned srcSize, lir::Constant* src,
+       unsigned dstSize, lir::Register* dst) {
+  moveCR2(c, srcSize, src, dstSize, dst, 0);
+}
+
+void addR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if(size == 8) {
+    emit(con, addc(t->low, a->low, b->low));
+    emit(con, adde(t->high, a->high, b->high));
+  } else {
+    emit(con, add(t->low, a->low, b->low));
+  }
+}
+
+void addC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t) {
+  assert(con, size == TargetBytesPerWord);
+
+  int32_t i = getValue(a);
+  if(i) {
+    emit(con, addi(t->low, b->low, lo16(i)));
+    if(not fitsInInt16(i))
+      emit(con, addis(t->low, t->low, hi16(i) + carry16(i)));
+  } else {
+    moveRR(con, size, b, size, t);
+  }
+}
+
+void subR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if(size == 8) {
+    emit(con, subfc(t->low, a->low, b->low));
+    emit(con, subfe(t->high, a->high, b->high));
+  } else {
+    emit(con, subf(t->low, a->low, b->low));
+  }
+}
+
+void subC(Context* c, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t) {
+  assert(c, size == TargetBytesPerWord);
+
+  ResolvedPromise promise(- a->value->value());
+  lir::Constant constant(&promise);
+  addC(c, size, &constant, b, t);
+}
+
+void multiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if(size == 8) {
+    bool useTemporaries = b->low == t->low;
+    int tmpLow;
+    int tmpHigh;
+    if (useTemporaries) {
+      tmpLow = con->client->acquireTemporary();
+      tmpHigh = con->client->acquireTemporary();
+    } else {
+      tmpLow = t->low;
+      tmpHigh = t->high;
+    }
+
+    emit(con, mullw(tmpHigh, a->high, b->low));
+    emit(con, mullw(tmpLow, a->low, b->high));
+    emit(con, add(t->high, tmpHigh, tmpLow));
+    emit(con, mulhwu(tmpLow, a->low, b->low));
+    emit(con, add(t->high, t->high, tmpLow));
+    emit(con, mullw(t->low, a->low, b->low));
+
+    if (useTemporaries) {
+      con->client->releaseTemporary(tmpLow);
+      con->client->releaseTemporary(tmpHigh);
+    }
+  } else {
+    emit(con, mullw(t->low, a->low, b->low));
+  }
+}
+
+void divideR(Context* con, unsigned size UNUSED, lir::Register* a, lir::Register* b, lir::Register* t) {
+  assert(con, size == 4);
+  emit(con, divw(t->low, b->low, a->low));
+}
+
+void remainderR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  bool useTemporary = b->low == t->low;
+  lir::Register tmp(t->low);
+  if (useTemporary) {
+    tmp.low = con->client->acquireTemporary();
+  }
+
+  divideR(con, size, a, b, &tmp);
+  multiplyR(con, size, a, &tmp, &tmp);
+  subR(con, size, &tmp, b, t);
+
+  if (useTemporary) {
+    con->client->releaseTemporary(tmp.low);
+  }
+}
+
+int
+normalize(Context* c, int offset, int index, unsigned scale, 
+          bool* preserveIndex, bool* release) {
+  if (offset != 0 or scale != 1) {
+    lir::Register normalizedIndex
+      (*preserveIndex ? c->client->acquireTemporary() : index);
+    
+    if (*preserveIndex) {
+      *release = true;
+      *preserveIndex = false;
+    } else {
+      *release = false;
+    }
+
+    int scaled;
+
+    if (scale != 1) {
+      lir::Register unscaledIndex(index);
+
+      ResolvedPromise scalePromise(log(scale));
+      lir::Constant scaleConstant(&scalePromise);
+      
+      shiftLeftC(c, TargetBytesPerWord, &scaleConstant,
+                 &unscaledIndex, &normalizedIndex);
+
+      scaled = normalizedIndex.low;
+    } else {
+      scaled = index;
+    }
+
+    if (offset != 0) {
+      lir::Register untranslatedIndex(scaled);
+
+      ResolvedPromise offsetPromise(offset);
+      lir::Constant offsetConstant(&offsetPromise);
+
+      addC(c, TargetBytesPerWord, &offsetConstant,
+           &untranslatedIndex, &normalizedIndex);
+    }
+
+    return normalizedIndex.low;
+  } else {
+    *release = false;
+    return index;
+  }
+}
+
+void store(Context* c, unsigned size, lir::Register* src,
+      int base, int offset, int index, unsigned scale, bool preserveIndex) {
+  if (index != lir::NoRegister) {
+    bool release;
+    int normalized = normalize
+      (c, offset, index, scale, &preserveIndex, &release);
+
+    switch (size) {
+    case 1:
+      emit(c, stbx(src->low, base, normalized));
+      break;
+
+    case 2:
+      emit(c, sthx(src->low, base, normalized));
+      break;
+
+    case 4:
+      emit(c, stwx(src->low, base, normalized));
+      break;
+
+    case 8: {
+      lir::Register srcHigh(src->high);
+      store(c, 4, &srcHigh, base, 0, normalized, 1, preserveIndex);
+      store(c, 4, src, base, 4, normalized, 1, preserveIndex);
+    } break;
+
+    default: abort(c);
+    }
+
+    if (release) c->client->releaseTemporary(normalized);
+  } else {
+    switch (size) {
+    case 1:
+      emit(c, stb(src->low, base, offset));
+      break;
+
+    case 2:
+      emit(c, sth(src->low, base, offset));
+      break;
+
+    case 4:
+      emit(c, stw(src->low, base, offset));
+      break;
+
+    case 8: {
+      lir::Register srcHigh(src->high);
+      store(c, 4, &srcHigh, base, offset, lir::NoRegister, 1, false);
+      store(c, 4, src, base, offset + 4, lir::NoRegister, 1, false);
+    } break;
+
+    default: abort(c);
+    }
+  }
+}
+
+void moveRM(Context* c, unsigned srcSize, lir::Register* src,
+       unsigned dstSize UNUSED, lir::Memory* dst) {
+  assert(c, srcSize == dstSize);
+
+  store(c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true);
+}
+
+void moveAndUpdateRM(Context* c, unsigned srcSize UNUSED, lir::Register* src,
+                unsigned dstSize UNUSED, lir::Memory* dst) {
+  assert(c, srcSize == TargetBytesPerWord);
+  assert(c, dstSize == TargetBytesPerWord);
+
+  if (dst->index == lir::NoRegister) {
+    emit(c, stwu(src->low, dst->base, dst->offset));
+  } else {
+    assert(c, dst->offset == 0);
+    assert(c, dst->scale == 1);
+    
+    emit(c, stwux(src->low, dst->base, dst->index));
+  }
+}
+
+void load(Context* c, unsigned srcSize, int base, int offset, int index,
+     unsigned scale, unsigned dstSize, lir::Register* dst,
+     bool preserveIndex, bool signExtend) {
+  if (index != lir::NoRegister) {
+    bool release;
+    int normalized = normalize
+      (c, offset, index, scale, &preserveIndex, &release);
+
+    switch (srcSize) {
+    case 1:
+      emit(c, lbzx(dst->low, base, normalized));
+      if (signExtend) {
+        emit(c, extsb(dst->low, dst->low));
+      }
+      break;
+
+    case 2:
+      if (signExtend) {
+        emit(c, lhax(dst->low, base, normalized));
+      } else {
+        emit(c, lhzx(dst->low, base, normalized));
+      }
+      break;
+
+    case 4:
+    case 8: {
+      if (srcSize == 4 and dstSize == 8) {
+        load(c, 4, base, 0, normalized, 1, 4, dst, preserveIndex, false);
+        moveRR(c, 4, dst, 8, dst);
+      } else if (srcSize == 8 and dstSize == 8) {
+        lir::Register dstHigh(dst->high);
+        load(c, 4, base, 0, normalized, 1, 4, &dstHigh, preserveIndex, false);
+        load(c, 4, base, 4, normalized, 1, 4, dst, preserveIndex, false);
+      } else {
+        emit(c, lwzx(dst->low, base, normalized));
+      }
+    } break;
+
+    default: abort(c);
+    }
+
+    if (release) c->client->releaseTemporary(normalized);
+  } else {
+    switch (srcSize) {
+    case 1:
+      emit(c, lbz(dst->low, base, offset));
+      if (signExtend) {
+        emit(c, extsb(dst->low, dst->low));
+      }
+      break;
+
+    case 2:
+      if (signExtend) {
+        emit(c, lha(dst->low, base, offset));
+      } else {
+        emit(c, lha(dst->low, base, offset));
+      }
+      break;
+
+    case 4:
+      emit(c, lwz(dst->low, base, offset));
+      break;
+
+    case 8: {
+      if (dstSize == 8) {
+        lir::Register dstHigh(dst->high);
+        load(c, 4, base, offset, lir::NoRegister, 1, 4, &dstHigh, false, false);
+        load(c, 4, base, offset + 4, lir::NoRegister, 1, 4, dst, false, false);
+      } else {
+        emit(c, lwzx(dst->low, base, offset));
+      }
+    } break;
+
+    default: abort(c);
+    }
+  }
+}
+
+void moveMR(Context* c, unsigned srcSize, lir::Memory* src,
+       unsigned dstSize, lir::Register* dst) {
+  load(c, srcSize, src->base, src->offset, src->index, src->scale,
+       dstSize, dst, true, true);
+}
+
+void moveZMR(Context* c, unsigned srcSize, lir::Memory* src,
+        unsigned dstSize, lir::Register* dst) {
+  load(c, srcSize, src->base, src->offset, src->index, src->scale,
+       dstSize, dst, true, false);
+}
+
+void andR(Context* c, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst) {
+  if (size == 8) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+    lir::Register dh(dst->high);
+    
+    andR(c, 4, a, b, dst);
+    andR(c, 4, &ah, &bh, &dh);
+  } else {
+    emit(c, and_(dst->low, a->low, b->low));
+  }
+}
+
+void andC(Context* c, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst) {
+  int64_t v = a->value->value();
+
+  if (size == 8) {
+    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+    lir::Constant ah(&high);
+
+    ResolvedPromise low(v & 0xFFFFFFFF);
+    lir::Constant al(&low);
+
+    lir::Register bh(b->high);
+    lir::Register dh(dst->high);
+
+    andC(c, 4, &al, b, dst);
+    andC(c, 4, &ah, &bh, &dh);
+  } else {
+    // bitmasks of the form regex 0*1*0* can be handled in a single
+    // rlwinm instruction, hence the following:
+
+    uint32_t v32 = static_cast<uint32_t>(v);
+    unsigned state = 0;
+    unsigned start = 0;
+    unsigned end = 31;
+    for (unsigned i = 0; i < 32; ++i) {
+      unsigned bit = (v32 >> i) & 1;
+      switch (state) {
+      case 0:
+        if (bit) {
+          start = i;
+          state = 1;
+        }
+        break;
+
+      case 1:
+        if (bit == 0) {
+          end = i - 1;
+          state = 2;
+        }
+        break;
+
+      case 2:
+        if (bit) {
+          // not in 0*1*0* form.  We can only use andi(s) if either
+          // the topmost or bottommost 16 bits are zero.
+
+          if ((v32 >> 16) == 0) {
+            emit(c, andi(dst->low, b->low, v32));
+          } else if ((v32 & 0xFFFF) == 0) {
+            emit(c, andis(dst->low, b->low, v32 >> 16));
+          } else {
+            bool useTemporary = b->low == dst->low;
+            lir::Register tmp(dst->low);
+            if (useTemporary) {
+              tmp.low = c->client->acquireTemporary();
+            }
+
+            moveCR(c, 4, a, 4, &tmp);
+            andR(c, 4, b, &tmp, dst);
+
+            if (useTemporary) {
+              c->client->releaseTemporary(tmp.low);
+            }
+          }
+          return;
+        }
+        break;
+      }
+    }
+
+    if (state) {
+      if (start != 0 or end != 31) {
+        emit(c, rlwinm(dst->low, b->low, 0, 31 - end, 31 - start));
+      } else {
+        moveRR(c, 4, b, 4, dst);
+      }
+    } else {
+      emit(c, li(dst->low, 0));
+    }
+  }
+}
+
+void orR(Context* c, unsigned size, lir::Register* a,
+    lir::Register* b, lir::Register* dst) {
+  if (size == 8) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+    lir::Register dh(dst->high);
+    
+    orR(c, 4, a, b, dst);
+    orR(c, 4, &ah, &bh, &dh);
+  } else {
+    emit(c, or_(dst->low, a->low, b->low));
+  }
+}
+
+void orC(Context* c, unsigned size, lir::Constant* a,
+    lir::Register* b, lir::Register* dst) {
+  int64_t v = a->value->value();
+
+  if (size == 8) {
+    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+    lir::Constant ah(&high);
+
+    ResolvedPromise low(v & 0xFFFFFFFF);
+    lir::Constant al(&low);
+
+    lir::Register bh(b->high);
+    lir::Register dh(dst->high);
+
+    orC(c, 4, &al, b, dst);
+    orC(c, 4, &ah, &bh, &dh);
+  } else {
+    emit(c, ori(b->low, dst->low, v));
+    if (v >> 16) {
+      emit(c, oris(dst->low, dst->low, v >> 16));
+    }
+  }
+}
+
+void xorR(Context* c, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst) {
+  if (size == 8) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+    lir::Register dh(dst->high);
+    
+    xorR(c, 4, a, b, dst);
+    xorR(c, 4, &ah, &bh, &dh);
+  } else {
+    emit(c, xor_(dst->low, a->low, b->low));
+  }
+}
+
+void xorC(Context* c, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst) {
+  uint64_t v = a->value->value();
+
+  if (size == 8) {
+    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+    lir::Constant ah(&high);
+
+    ResolvedPromise low(v & 0xFFFFFFFF);
+    lir::Constant al(&low);
+
+    lir::Register bh(b->high);
+    lir::Register dh(dst->high);
+
+    xorC(c, 4, &al, b, dst);
+    xorC(c, 4, &ah, &bh, &dh);
+  } else {
+    if (v >> 16) {
+      emit(c, xoris(b->low, dst->low, v >> 16));
+      emit(c, xori(dst->low, dst->low, v));
+    } else {
+      emit(c, xori(b->low, dst->low, v));
+    }
+  }
+}
+
+void moveAR2(Context* c, unsigned srcSize UNUSED, lir::Address* src,
+        unsigned dstSize, lir::Register* dst, unsigned promiseOffset) {
+  assert(c, srcSize == 4 and dstSize == 4);
+
+  lir::Memory memory(dst->low, 0, -1, 0);
+  
+  appendImmediateTask
+    (c, src->address, offsetPromise(c), TargetBytesPerWord, promiseOffset, true);
+  
+  emit(c, lis(dst->low, 0));
+  moveMR(c, dstSize, &memory, dstSize, dst);
+}
+
+void moveAR(Context* c, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst) {
+  moveAR2(c, srcSize, src, dstSize, dst, 0);
+}
+
+void compareRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b) {
+  assert(c, aSize == 4 and bSize == 4);
+  
+  emit(c, cmpw(b->low, a->low));
+}
+
+void compareCR(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b) {
+  assert(c, aSize == 4 and bSize == 4);
+
+  if (a->value->resolved() and fitsInInt16(a->value->value())) {
+    emit(c, cmpwi(b->low, a->value->value()));
+  } else {
+    lir::Register tmp(c->client->acquireTemporary());
+    moveCR(c, aSize, a, bSize, &tmp);
+    compareRR(c, bSize, &tmp, bSize, b);
+    c->client->releaseTemporary(tmp.low);
+  }
+}
+
+void compareCM(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Memory* b) {
+  assert(c, aSize == 4 and bSize == 4);
+
+  lir::Register tmp(c->client->acquireTemporary());
+  moveMR(c, bSize, b, bSize, &tmp);
+  compareCR(c, aSize, a, bSize, &tmp);
+  c->client->releaseTemporary(tmp.low);
+}
+
+void compareRM(Context* c, unsigned aSize, lir::Register* a,
+          unsigned bSize, lir::Memory* b) {
+  assert(c, aSize == 4 and bSize == 4);
+
+  lir::Register tmp(c->client->acquireTemporary());
+  moveMR(c, bSize, b, bSize, &tmp);
+  compareRR(c, aSize, a, bSize, &tmp);
+  c->client->releaseTemporary(tmp.low);
+}
+
+void compareUnsignedRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
+                  unsigned bSize UNUSED, lir::Register* b) {
+  assert(c, aSize == 4 and bSize == 4);
+  
+  emit(c, cmplw(b->low, a->low));
+}
+
+void compareUnsignedCR(Context* c, unsigned aSize, lir::Constant* a,
+                  unsigned bSize, lir::Register* b) {
+  assert(c, aSize == 4 and bSize == 4);
+
+  if (a->value->resolved() and (a->value->value() >> 16) == 0) {
+    emit(c, cmplwi(b->low, a->value->value()));
+  } else {
+    lir::Register tmp(c->client->acquireTemporary());
+    moveCR(c, aSize, a, bSize, &tmp);
+    compareUnsignedRR(c, bSize, &tmp, bSize, b);
+    c->client->releaseTemporary(tmp.low);
+  }
+}
+
+int32_t
+branch(Context* c, lir::TernaryOperation op) {
+  switch (op) {
+  case lir::JumpIfEqual:
+    return beq(0);
+    
+  case lir::JumpIfNotEqual:
+    return bne(0);
+    
+  case lir::JumpIfLess:
+    return blt(0);
+    
+  case lir::JumpIfGreater:
+    return bgt(0);
+    
+  case lir::JumpIfLessOrEqual:
+    return ble(0);
+    
+  case lir::JumpIfGreaterOrEqual:
+    return bge(0);
+    
+  default:
+    abort(c);
+  }
+}
+
+void conditional(Context* c, int32_t branch, lir::Constant* target) {
+  appendOffsetTask(c, target->value, offsetPromise(c), true);
+  emit(c, branch);
+}
+
+void branch(Context* c, lir::TernaryOperation op, lir::Constant* target) {
+  conditional(c, branch(c, op), target);
+}
+
+void branchLong(Context* c, lir::TernaryOperation op, lir::Operand* al,
+           lir::Operand* ah, lir::Operand* bl,
+           lir::Operand* bh, lir::Constant* target,
+           BinaryOperationType compareSigned,
+           BinaryOperationType compareUnsigned) {
+  compareSigned(c, 4, ah, 4, bh);
+
+  unsigned next = 0;
+  
+  switch (op) {
+  case lir::JumpIfEqual:
+    next = c->code.length();
+    emit(c, bne(0));
+
+    compareSigned(c, 4, al, 4, bl);
+    conditional(c, beq(0), target);
+    break;
+
+  case lir::JumpIfNotEqual:
+    conditional(c, bne(0), target);
+
+    compareSigned(c, 4, al, 4, bl);
+    conditional(c, bne(0), target);
+    break;
+
+  case lir::JumpIfLess:
+    conditional(c, blt(0), target);
+
+    next = c->code.length();
+    emit(c, bgt(0));
+
+    compareUnsigned(c, 4, al, 4, bl);
+    conditional(c, blt(0), target);
+    break;
+
+  case lir::JumpIfGreater:
+    conditional(c, bgt(0), target);
+
+    next = c->code.length();
+    emit(c, blt(0));
+
+    compareUnsigned(c, 4, al, 4, bl);
+    conditional(c, bgt(0), target);
+    break;
+
+  case lir::JumpIfLessOrEqual:
+    conditional(c, blt(0), target);
+
+    next = c->code.length();
+    emit(c, bgt(0));
+
+    compareUnsigned(c, 4, al, 4, bl);
+    conditional(c, ble(0), target);
+    break;
+
+  case lir::JumpIfGreaterOrEqual:
+    conditional(c, bgt(0), target);
+
+    next = c->code.length();
+    emit(c, blt(0));
+
+    compareUnsigned(c, 4, al, 4, bl);
+    conditional(c, bge(0), target);
+    break;
+
+  default:
+    abort(c);
+  }
+
+  if (next) {
+    updateOffset
+      (c->s, c->code.data + next, true, reinterpret_cast<intptr_t>
+       (c->code.data + c->code.length()), 0);
+  }
+}
+
+void branchRR(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Register* b,
+         lir::Constant* target) {
+  if (size > TargetBytesPerWord) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    branchLong(c, op, a, &ah, b, &bh, target, CAST2(compareRR),
+               CAST2(compareUnsignedRR));
+  } else {
+    compareRR(c, size, a, size, b);
+    branch(c, op, target);
+  }
+}
+
+void branchCR(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Register* b,
+         lir::Constant* target) {
+  if (size > TargetBytesPerWord) {
+    int64_t v = a->value->value();
+
+    ResolvedPromise low(v & ~static_cast<target_uintptr_t>(0));
+    lir::Constant al(&low);
+
+    ResolvedPromise high((v >> 32) & ~static_cast<target_uintptr_t>(0));
+    lir::Constant ah(&high);
+
+    lir::Register bh(b->high);
+
+    branchLong(c, op, &al, &ah, b, &bh, target, CAST2(compareCR),
+               CAST2(compareUnsignedCR));
+  } else {
+    compareCR(c, size, a, size, b);
+    branch(c, op, target);
+  }
+}
+
+void branchRM(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Memory* b,
+         lir::Constant* target) {
+  assert(c, size <= TargetBytesPerWord);
+
+  compareRM(c, size, a, size, b);
+  branch(c, op, target);
+}
+
+void branchCM(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Memory* b,
+         lir::Constant* target) {
+  assert(c, size <= TargetBytesPerWord);
+
+  compareCM(c, size, a, size, b);
+  branch(c, op, target);
+}
+
+void moveCM(Context* c, unsigned srcSize, lir::Constant* src,
+       unsigned dstSize, lir::Memory* dst) {
+  switch (dstSize) {
+  case 8: {
+    lir::Constant srcHigh
+      (shiftMaskPromise(c, src->value, 32, 0xFFFFFFFF));
+    lir::Constant srcLow
+      (shiftMaskPromise(c, src->value, 0, 0xFFFFFFFF));
+    
+    lir::Memory dstLow
+      (dst->base, dst->offset + 4, dst->index, dst->scale);
+    
+    moveCM(c, 4, &srcLow, 4, &dstLow);
+    moveCM(c, 4, &srcHigh, 4, dst);
+  } break;
+
+  default:
+    lir::Register tmp(c->client->acquireTemporary());
+    moveCR(c, srcSize, src, dstSize, &tmp);
+    moveRM(c, dstSize, &tmp, dstSize, dst);
+    c->client->releaseTemporary(tmp.low);
+  }
+}
+
+void negateRR(Context* c, unsigned srcSize, lir::Register* src,
+         unsigned dstSize UNUSED, lir::Register* dst) {
+  assert(c, srcSize == dstSize);
+
+  if (srcSize == 8) {
+    lir::Register dstHigh(dst->high);
+
+    emit(c, subfic(dst->low, src->low, 0));
+    emit(c, subfze(dst->high, src->high));
+  } else {
+    emit(c, neg(dst->low, src->low));
+  }
+}
+
+void callR(Context* c, unsigned size UNUSED, lir::Register* target) {
+  assert(c, size == TargetBytesPerWord);
+
+  emit(c, mtctr(target->low));
+  emit(c, bctrl());
+}
+
+void callC(Context* c, unsigned size UNUSED, lir::Constant* target) {
+  assert(c, size == TargetBytesPerWord);
+
+  appendOffsetTask(c, target->value, offsetPromise(c), false);
+  emit(c, bl(0));
+}
+
+void longCallC(Context* c, unsigned size UNUSED, lir::Constant* target) {
+  assert(c, size == TargetBytesPerWord);
+
+  lir::Register tmp(0);
+  moveCR2(c, TargetBytesPerWord, target, TargetBytesPerWord, &tmp, 12);
+  callR(c, TargetBytesPerWord, &tmp);
+}
+
+void alignedLongCallC(Context* c, unsigned size UNUSED, lir::Constant* target) {
+  assert(c, size == TargetBytesPerWord);
+
+  lir::Register tmp(c->client->acquireTemporary());
+  lir::Address address(appendConstantPoolEntry(c, target->value));
+  moveAR2(c, TargetBytesPerWord, &address, TargetBytesPerWord, &tmp, 12);
+  callR(c, TargetBytesPerWord, &tmp);
+  c->client->releaseTemporary(tmp.low);
+}
+
+void longJumpC(Context* c, unsigned size UNUSED, lir::Constant* target) {
+  assert(c, size == TargetBytesPerWord);
+
+  lir::Register tmp(0);
+  moveCR2(c, TargetBytesPerWord, target, TargetBytesPerWord, &tmp, 12);
+  jumpR(c, TargetBytesPerWord, &tmp);
+}
+
+void alignedLongJumpC(Context* c, unsigned size UNUSED, lir::Constant* target) {
+  assert(c, size == TargetBytesPerWord);
+
+  lir::Register tmp(c->client->acquireTemporary());
+  lir::Address address(appendConstantPoolEntry(c, target->value));
+  moveAR2(c, TargetBytesPerWord, &address, TargetBytesPerWord, &tmp, 12);
+  jumpR(c, TargetBytesPerWord, &tmp);
+  c->client->releaseTemporary(tmp.low);
+}
+
+void jumpC(Context* c, unsigned size UNUSED, lir::Constant* target) {
+  assert(c, size == TargetBytesPerWord);
+
+  appendOffsetTask(c, target->value, offsetPromise(c), false);
+  emit(c, b(0));
+}
+
+void return_(Context* c) {
+  emit(c, blr());
+}
+
+void trap(Context* c) {
+  emit(c, isa::trap());
+}
+
+void memoryBarrier(Context* c) {
+  emit(c, sync(0));
+}
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/powerpc/operations.h b/src/codegen/powerpc/operations.h
new file mode 100644
index 0000000000..3e16dc5292
--- /dev/null
+++ b/src/codegen/powerpc/operations.h
@@ -0,0 +1,197 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_POWERPC_OPERATIONS_H
+#define AVIAN_CODEGEN_ASSEMBLER_POWERPC_OPERATIONS_H
+
+#include "context.h"
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+inline void emit(Context* con, int code) { con->code.append4(vm::targetV4(code)); }
+inline int newTemp(Context* con) { return con->client->acquireTemporary(); }
+inline void freeTemp(Context* con, int r) { con->client->releaseTemporary(r); }
+inline int64_t getValue(lir::Constant* c) { return c->value->value(); }
+
+void andC(Context* c, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void shiftLeftR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void moveRR(Context* c, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst);
+
+void shiftLeftC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+void shiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void shiftRightC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+void unsignedShiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void unsignedShiftRightC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+void jumpR(Context* c, unsigned size UNUSED, lir::Register* target);
+
+void swapRR(Context* c, unsigned aSize, lir::Register* a,
+       unsigned bSize, lir::Register* b);
+
+void moveRR(Context* c, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst);
+
+void moveZRR(Context* c, unsigned srcSize, lir::Register* src,
+        unsigned, lir::Register* dst);
+
+void moveCR2(Context* c, unsigned, lir::Constant* src,
+       unsigned dstSize, lir::Register* dst, unsigned promiseOffset);
+
+void moveCR(Context* c, unsigned srcSize, lir::Constant* src,
+       unsigned dstSize, lir::Register* dst);
+
+void addR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void addC(Context* con, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+void subR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void subC(Context* c, unsigned size, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+void multiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void divideR(Context* con, unsigned size UNUSED, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void remainderR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+int
+normalize(Context* c, int offset, int index, unsigned scale, 
+          bool* preserveIndex, bool* release);
+
+void store(Context* c, unsigned size, lir::Register* src,
+      int base, int offset, int index, unsigned scale, bool preserveIndex);
+
+void moveRM(Context* c, unsigned srcSize, lir::Register* src,
+       unsigned dstSize UNUSED, lir::Memory* dst);
+
+void moveAndUpdateRM(Context* c, unsigned srcSize UNUSED, lir::Register* src,
+                unsigned dstSize UNUSED, lir::Memory* dst);
+
+void load(Context* c, unsigned srcSize, int base, int offset, int index,
+     unsigned scale, unsigned dstSize, lir::Register* dst,
+     bool preserveIndex, bool signExtend);
+
+void moveMR(Context* c, unsigned srcSize, lir::Memory* src,
+       unsigned dstSize, lir::Register* dst);
+
+void moveZMR(Context* c, unsigned srcSize, lir::Memory* src,
+        unsigned dstSize, lir::Register* dst);
+
+void andR(Context* c, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst);
+
+void andC(Context* c, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void orR(Context* c, unsigned size, lir::Register* a,
+    lir::Register* b, lir::Register* dst);
+
+void orC(Context* c, unsigned size, lir::Constant* a,
+    lir::Register* b, lir::Register* dst);
+
+void xorR(Context* c, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst);
+
+void xorC(Context* c, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void moveAR2(Context* c, unsigned srcSize UNUSED, lir::Address* src,
+        unsigned dstSize, lir::Register* dst, unsigned promiseOffset);
+
+void moveAR(Context* c, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst);
+
+void compareRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b);
+
+void compareCR(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b);
+
+void compareCM(Context* c, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Memory* b);
+
+void compareRM(Context* c, unsigned aSize, lir::Register* a,
+          unsigned bSize, lir::Memory* b);
+
+void compareUnsignedRR(Context* c, unsigned aSize UNUSED, lir::Register* a,
+                  unsigned bSize UNUSED, lir::Register* b);
+
+void compareUnsignedCR(Context* c, unsigned aSize, lir::Constant* a,
+                  unsigned bSize, lir::Register* b);
+
+int32_t branch(Context* c, lir::TernaryOperation op);
+
+void conditional(Context* c, int32_t branch, lir::Constant* target);
+
+void branch(Context* c, lir::TernaryOperation op, lir::Constant* target);
+
+void branchLong(Context* c, lir::TernaryOperation op, lir::Operand* al,
+           lir::Operand* ah, lir::Operand* bl,
+           lir::Operand* bh, lir::Constant* target,
+           BinaryOperationType compareSigned,
+           BinaryOperationType compareUnsigned);
+
+void branchRR(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Register* b,
+         lir::Constant* target);
+
+void branchCR(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Register* b,
+         lir::Constant* target);
+
+void branchRM(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Memory* b,
+         lir::Constant* target);
+
+void branchCM(Context* c, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Memory* b,
+         lir::Constant* target);
+
+void moveCM(Context* c, unsigned srcSize, lir::Constant* src,
+       unsigned dstSize, lir::Memory* dst);
+
+void negateRR(Context* c, unsigned srcSize, lir::Register* src,
+         unsigned dstSize UNUSED, lir::Register* dst);
+
+void callR(Context* c, unsigned size UNUSED, lir::Register* target);
+
+void callC(Context* c, unsigned size UNUSED, lir::Constant* target);
+
+void longCallC(Context* c, unsigned size UNUSED, lir::Constant* target);
+
+void alignedLongCallC(Context* c, unsigned size UNUSED, lir::Constant* target);
+
+void longJumpC(Context* c, unsigned size UNUSED, lir::Constant* target);
+
+void alignedLongJumpC(Context* c, unsigned size UNUSED, lir::Constant* target);
+
+void jumpC(Context* c, unsigned size UNUSED, lir::Constant* target);
+
+void return_(Context* c);
+
+void trap(Context* c);
+
+void memoryBarrier(Context* c);
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_POWERPC_OPERATIONS_H
diff --git a/src/codegen/powerpc/registers.h b/src/codegen/powerpc/registers.h
new file mode 100644
index 0000000000..ce395a373e
--- /dev/null
+++ b/src/codegen/powerpc/registers.h
@@ -0,0 +1,23 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_POWERPC_REGISTERS_H
+#define AVIAN_CODEGEN_ASSEMBLER_POWERPC_REGISTERS_H
+
+namespace avian {
+namespace codegen {
+namespace powerpc {
+
+
+} // namespace powerpc
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_POWERPC_REGISTERS_H

From 3d1ae1d517c3b1b11b7be4b4a634ea8e3a4a54e8 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sat, 23 Feb 2013 22:00:15 -0700
Subject: [PATCH 14/22] fix include-what-you-use violations in x86 assembler

---
 src/codegen/arm/assembler.cpp     |  6 ++---
 src/codegen/powerpc/assembler.cpp |  6 -----
 src/codegen/powerpc/fixup.cpp     |  2 +-
 src/codegen/x86/assembler.cpp     | 37 ++++++++++++++++++++-----------
 src/codegen/x86/block.cpp         |  5 +++--
 src/codegen/x86/context.cpp       |  7 ++++--
 src/codegen/x86/context.h         |  7 +++++-
 src/codegen/x86/detect.cpp        |  5 +++--
 src/codegen/x86/encode.cpp        | 15 +++++++++----
 src/codegen/x86/encode.h          | 12 ++++++++--
 src/codegen/x86/fixup.cpp         | 20 ++++++++++++-----
 src/codegen/x86/fixup.h           |  8 ++++---
 src/codegen/x86/multimethod.cpp   |  8 ++++---
 src/codegen/x86/multimethod.h     |  4 ++++
 src/codegen/x86/operations.cpp    | 26 ++++++++++++++++------
 src/codegen/x86/operations.h      |  6 +++++
 src/codegen/x86/padding.cpp       |  9 ++++----
 src/codegen/x86/padding.h         |  1 +
 18 files changed, 123 insertions(+), 61 deletions(-)

diff --git a/src/codegen/arm/assembler.cpp b/src/codegen/arm/assembler.cpp
index ac00fdb5cb..511f7ccc32 100644
--- a/src/codegen/arm/assembler.cpp
+++ b/src/codegen/arm/assembler.cpp
@@ -60,11 +60,9 @@ const unsigned StackAlignmentInBytes = 8;
 const unsigned StackAlignmentInWords
 = StackAlignmentInBytes / TargetBytesPerWord;
 
-void
-resolve(MyBlock*);
+void resolve(MyBlock*);
 
-unsigned
-padding(MyBlock*, unsigned);
+unsigned padding(MyBlock*, unsigned);
 
 class ConstantPoolEntry;
 
diff --git a/src/codegen/powerpc/assembler.cpp b/src/codegen/powerpc/assembler.cpp
index 39405769bf..cda4fdbc96 100644
--- a/src/codegen/powerpc/assembler.cpp
+++ b/src/codegen/powerpc/assembler.cpp
@@ -178,12 +178,6 @@ resolve(MyBlock* b)
 
 using namespace isa;
 
-inline void
-write4(uint8_t* dst, uint32_t v)
-{
-  memcpy(dst, &v, 4);
-}
-
 // END OPERATION COMPILERS
 
 unsigned
diff --git a/src/codegen/powerpc/fixup.cpp b/src/codegen/powerpc/fixup.cpp
index eb8a4aac6d..06075c00e6 100644
--- a/src/codegen/powerpc/fixup.cpp
+++ b/src/codegen/powerpc/fixup.cpp
@@ -21,7 +21,7 @@ using namespace isa;
 
 unsigned padding(MyBlock*, unsigned);
 
-inline int ha16(int32_t i);
+int ha16(int32_t i);
 
 bool bounded(int right, int left, int32_t v) {
   return ((v << left) >> left) == v and ((v >> right) << right) == v;
diff --git a/src/codegen/x86/assembler.cpp b/src/codegen/x86/assembler.cpp
index 79b5377959..a568430f91 100644
--- a/src/codegen/x86/assembler.cpp
+++ b/src/codegen/x86/assembler.cpp
@@ -8,25 +8,36 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
+#include <stdarg.h>
+#include <stdint.h>
+#include <string.h>
+
 #include "environment.h"
 #include "target.h"
 #include "alloc-vector.h"
-
-#include <avian/vm/codegen/assembler.h>
-#include <avian/vm/codegen/registers.h>
-
-#include "codegen/x86/context.h"
-#include "codegen/x86/block.h"
-#include "codegen/x86/fixup.h"
-#include "codegen/x86/padding.h"
-#include "codegen/x86/registers.h"
-#include "codegen/x86/encode.h"
-#include "codegen/x86/operations.h"
-#include "codegen/x86/detect.h"
-#include "codegen/x86/multimethod.h"
+#include "common.h"
+#include "allocator.h"
+#include "zone.h"
 
 #include <avian/util/runtime-array.h>
 #include <avian/util/abort.h>
+#include <avian/util/math.h>
+
+#include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/registers.h>
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/promise.h>
+
+#include <avian/vm/system/system.h>
+
+#include "context.h"
+#include "block.h"
+#include "fixup.h"
+#include "padding.h"
+#include "registers.h"
+#include "operations.h"
+#include "detect.h"
+#include "multimethod.h"
 
 #define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
 #define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
diff --git a/src/codegen/x86/block.cpp b/src/codegen/x86/block.cpp
index 97bac8dbf1..5c2b125283 100644
--- a/src/codegen/x86/block.cpp
+++ b/src/codegen/x86/block.cpp
@@ -8,8 +8,9 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
-#include "codegen/x86/block.h"
-#include "common.h"
+#include "block.h"
+
+#include <avian/vm/codegen/assembler.h>
 
 namespace avian {
 namespace codegen {
diff --git a/src/codegen/x86/context.cpp b/src/codegen/x86/context.cpp
index 4c996f2b75..88de52a2d1 100644
--- a/src/codegen/x86/context.cpp
+++ b/src/codegen/x86/context.cpp
@@ -8,8 +8,11 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
-#include "codegen/x86/context.h"
-#include "codegen/x86/block.h"
+#include "allocator.h"
+#include "zone.h"
+
+#include "context.h"
+#include "block.h"
 
 namespace avian {
 namespace codegen {
diff --git a/src/codegen/x86/context.h b/src/codegen/x86/context.h
index d85eb58758..ce1a3c1e70 100644
--- a/src/codegen/x86/context.h
+++ b/src/codegen/x86/context.h
@@ -15,9 +15,14 @@
 #define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
 #define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
 
+#include <stdint.h>
+
+#include "alloc-vector.h"
+
 #include <avian/vm/codegen/lir.h>
 #include <avian/vm/codegen/assembler.h>
-#include "alloc-vector.h"
+
+#include <avian/vm/system/system.h>
 
 namespace vm {
 class System;
diff --git a/src/codegen/x86/detect.cpp b/src/codegen/x86/detect.cpp
index eaf6af54ac..9f154aff0a 100644
--- a/src/codegen/x86/detect.cpp
+++ b/src/codegen/x86/detect.cpp
@@ -9,8 +9,9 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
-#include "codegen/x86/context.h"
-#include "codegen/x86/block.h"
+#include "target.h"
+
+#include "context.h"
 
 namespace avian {
 namespace codegen {
diff --git a/src/codegen/x86/encode.cpp b/src/codegen/x86/encode.cpp
index 8293550be6..730d71fdc9 100644
--- a/src/codegen/x86/encode.cpp
+++ b/src/codegen/x86/encode.cpp
@@ -9,11 +9,18 @@
    details. */
 
 #include "target.h"
+#include "alloc-vector.h"
 
-#include "codegen/x86/context.h"
-#include "codegen/x86/encode.h"
-#include "codegen/x86/registers.h"
-#include "codegen/x86/fixup.h"
+#include <avian/util/abort.h>
+#include <avian/util/math.h>
+
+#include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/promise.h>
+
+#include "context.h"
+#include "encode.h"
+#include "registers.h"
+#include "fixup.h"
 
 using namespace avian::util;
 
diff --git a/src/codegen/x86/encode.h b/src/codegen/x86/encode.h
index e8fb3f9d7f..853d7324e4 100644
--- a/src/codegen/x86/encode.h
+++ b/src/codegen/x86/encode.h
@@ -11,12 +11,20 @@
 #ifndef AVIAN_CODEGEN_ASSEMBLER_X86_ENCODE_H
 #define AVIAN_CODEGEN_ASSEMBLER_X86_ENCODE_H
 
-#include <avian/vm/codegen/assembler.h>
-#include "codegen/x86/registers.h"
+#include <stdint.h>
+
+#include "common.h"
+
+#include <avian/vm/codegen/lir.h>
+
+#include "registers.h"
 
 namespace avian {
 namespace codegen {
 namespace x86 {
+
+class Context;
+
 void maybeRex(Context* c, unsigned size, int a, int index, int base, bool always);
 
 void maybeRex(Context* c, unsigned size, lir::Register* a, lir::Register* b);
diff --git a/src/codegen/x86/fixup.cpp b/src/codegen/x86/fixup.cpp
index d914a22bdb..5e85d7c062 100644
--- a/src/codegen/x86/fixup.cpp
+++ b/src/codegen/x86/fixup.cpp
@@ -8,12 +8,20 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
-#include <avian/vm/codegen/assembler.h>
-   
-#include "codegen/x86/context.h"
-#include "codegen/x86/fixup.h"
-#include "codegen/x86/padding.h"
-#include "codegen/x86/block.h"
+#include <string.h>
+
+#include "allocator.h"
+#include "alloc-vector.h"
+#include "common.h"
+#include "zone.h"
+
+#include <avian/util/abort.h>
+#include <avian/vm/system/system.h>
+
+#include "context.h"
+#include "fixup.h"
+#include "padding.h"
+#include "block.h"
 
 namespace avian {
 namespace codegen {
diff --git a/src/codegen/x86/fixup.h b/src/codegen/x86/fixup.h
index 2bf8ff3481..78f77b3044 100644
--- a/src/codegen/x86/fixup.h
+++ b/src/codegen/x86/fixup.h
@@ -11,17 +11,19 @@
 #ifndef AVIAN_CODEGEN_ASSEMBLER_X86_FIXUP_H
 #define AVIAN_CODEGEN_ASSEMBLER_X86_FIXUP_H
 
+#include <stdint.h>
+
+#include <avian/vm/codegen/promise.h>
+
 namespace vm {
 class System;
 }
 
 namespace avian {
 namespace codegen {
-
-class Promise;
-
 namespace x86 {
 
+class Context;
 class MyBlock;
 class AlignmentPadding;
 
diff --git a/src/codegen/x86/multimethod.cpp b/src/codegen/x86/multimethod.cpp
index 5c4663c7f9..f0ad4621b2 100644
--- a/src/codegen/x86/multimethod.cpp
+++ b/src/codegen/x86/multimethod.cpp
@@ -10,11 +10,13 @@
 
 #include "common.h"
 
+#include <avian/util/abort.h>
+
 #include <avian/vm/codegen/lir.h>
 
-#include "codegen/x86/context.h"
-#include "codegen/x86/multimethod.h"
-#include "codegen/x86/operations.h"
+#include "context.h"
+#include "multimethod.h"
+#include "operations.h"
 
 namespace avian {
 namespace codegen {
diff --git a/src/codegen/x86/multimethod.h b/src/codegen/x86/multimethod.h
index 5a73850f29..6ede17f17a 100644
--- a/src/codegen/x86/multimethod.h
+++ b/src/codegen/x86/multimethod.h
@@ -11,6 +11,10 @@
 #ifndef AVIAN_CODEGEN_ASSEMBLER_X86_MULTIMETHOD_H
 #define AVIAN_CODEGEN_ASSEMBLER_X86_MULTIMETHOD_H
 
+#include "common.h"
+
+#include <avian/vm/codegen/lir.h>
+
 namespace avian {
 namespace codegen {
 namespace x86 {
diff --git a/src/codegen/x86/operations.cpp b/src/codegen/x86/operations.cpp
index 56eb77c127..60d33adedc 100644
--- a/src/codegen/x86/operations.cpp
+++ b/src/codegen/x86/operations.cpp
@@ -8,13 +8,25 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
-#include "codegen/x86/context.h"
-#include "codegen/x86/encode.h"
-#include "codegen/x86/registers.h"
-#include "codegen/x86/detect.h"
-#include "codegen/x86/operations.h"
-#include "codegen/x86/padding.h"
-#include "codegen/x86/fixup.h"
+#include <stdint.h>
+
+#include "target.h"
+#include "alloc-vector.h"
+#include "allocator.h"
+#include "zone.h"
+
+#include <avian/util/abort.h>
+
+#include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/promise.h>
+
+#include "context.h"
+#include "encode.h"
+#include "registers.h"
+#include "detect.h"
+#include "operations.h"
+#include "padding.h"
+#include "fixup.h"
 
 using namespace avian::util;
 
diff --git a/src/codegen/x86/operations.h b/src/codegen/x86/operations.h
index 52d0d8dc0f..2b5081591c 100644
--- a/src/codegen/x86/operations.h
+++ b/src/codegen/x86/operations.h
@@ -11,6 +11,12 @@
 #ifndef AVIAN_CODEGEN_ASSEMBLER_X86_OPERATIONS_H
 #define AVIAN_CODEGEN_ASSEMBLER_X86_OPERATIONS_H
 
+#include "common.h"
+
+#include <avian/vm/codegen/lir.h>
+
+#include "context.h"
+
 namespace avian {
 namespace codegen {
 namespace x86 {
diff --git a/src/codegen/x86/padding.cpp b/src/codegen/x86/padding.cpp
index 7873145bb4..e31e8626e9 100644
--- a/src/codegen/x86/padding.cpp
+++ b/src/codegen/x86/padding.cpp
@@ -8,12 +8,11 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
-#include <avian/vm/codegen/assembler.h>
+#include "alloc-vector.h"
 
-#include "codegen/x86/context.h"
-#include "codegen/x86/fixup.h"
-#include "codegen/x86/padding.h"
-#include "codegen/x86/block.h"
+#include "context.h"
+#include "padding.h"
+#include "block.h"
 
 namespace avian {
 namespace codegen {
diff --git a/src/codegen/x86/padding.h b/src/codegen/x86/padding.h
index 32fc358142..ef9f834f18 100644
--- a/src/codegen/x86/padding.h
+++ b/src/codegen/x86/padding.h
@@ -15,6 +15,7 @@ namespace avian {
 namespace codegen {
 namespace x86 {
 
+class Context;
 
 class AlignmentPadding {
  public:

From 56625b89d893228e96072842193bd0e66e323a70 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sat, 23 Feb 2013 22:47:52 -0700
Subject: [PATCH 15/22] group x86, arm, and powerpc target directories into
 'target'

---
 makefile                                         | 12 ++++++------
 src/codegen/{ => target}/arm/assembler.cpp       |  0
 src/codegen/{ => target}/arm/block.cpp           |  0
 src/codegen/{ => target}/arm/block.h             |  0
 src/codegen/{ => target}/arm/context.cpp         |  0
 src/codegen/{ => target}/arm/context.h           |  0
 src/codegen/{ => target}/arm/encode.h            |  0
 src/codegen/{ => target}/arm/fixup.cpp           |  0
 src/codegen/{ => target}/arm/fixup.h             |  0
 src/codegen/{ => target}/arm/multimethod.cpp     |  0
 src/codegen/{ => target}/arm/multimethod.h       |  0
 src/codegen/{ => target}/arm/operations.cpp      |  0
 src/codegen/{ => target}/arm/operations.h        |  0
 src/codegen/{ => target}/arm/registers.h         |  0
 src/codegen/{ => target}/powerpc/assembler.cpp   |  0
 src/codegen/{ => target}/powerpc/block.cpp       |  0
 src/codegen/{ => target}/powerpc/block.h         |  0
 src/codegen/{ => target}/powerpc/context.cpp     |  0
 src/codegen/{ => target}/powerpc/context.h       |  0
 src/codegen/{ => target}/powerpc/encode.h        |  0
 src/codegen/{ => target}/powerpc/fixup.cpp       |  0
 src/codegen/{ => target}/powerpc/fixup.h         |  0
 src/codegen/{ => target}/powerpc/multimethod.cpp |  0
 src/codegen/{ => target}/powerpc/multimethod.h   |  0
 src/codegen/{ => target}/powerpc/operations.cpp  |  0
 src/codegen/{ => target}/powerpc/operations.h    |  0
 src/codegen/{ => target}/powerpc/registers.h     |  0
 src/codegen/{ => target}/x86/assembler.cpp       |  0
 src/codegen/{ => target}/x86/block.cpp           |  0
 src/codegen/{ => target}/x86/block.h             |  0
 src/codegen/{ => target}/x86/context.cpp         |  0
 src/codegen/{ => target}/x86/context.h           |  0
 src/codegen/{ => target}/x86/detect.cpp          |  0
 src/codegen/{ => target}/x86/detect.h            |  0
 src/codegen/{ => target}/x86/encode.cpp          |  0
 src/codegen/{ => target}/x86/encode.h            |  0
 src/codegen/{ => target}/x86/fixup.cpp           |  0
 src/codegen/{ => target}/x86/fixup.h             |  0
 src/codegen/{ => target}/x86/multimethod.cpp     |  0
 src/codegen/{ => target}/x86/multimethod.h       |  0
 src/codegen/{ => target}/x86/operations.cpp      |  0
 src/codegen/{ => target}/x86/operations.h        |  0
 src/codegen/{ => target}/x86/padding.cpp         |  0
 src/codegen/{ => target}/x86/padding.h           |  0
 src/codegen/{ => target}/x86/registers.h         |  0
 45 files changed, 6 insertions(+), 6 deletions(-)
 rename src/codegen/{ => target}/arm/assembler.cpp (100%)
 rename src/codegen/{ => target}/arm/block.cpp (100%)
 rename src/codegen/{ => target}/arm/block.h (100%)
 rename src/codegen/{ => target}/arm/context.cpp (100%)
 rename src/codegen/{ => target}/arm/context.h (100%)
 rename src/codegen/{ => target}/arm/encode.h (100%)
 rename src/codegen/{ => target}/arm/fixup.cpp (100%)
 rename src/codegen/{ => target}/arm/fixup.h (100%)
 rename src/codegen/{ => target}/arm/multimethod.cpp (100%)
 rename src/codegen/{ => target}/arm/multimethod.h (100%)
 rename src/codegen/{ => target}/arm/operations.cpp (100%)
 rename src/codegen/{ => target}/arm/operations.h (100%)
 rename src/codegen/{ => target}/arm/registers.h (100%)
 rename src/codegen/{ => target}/powerpc/assembler.cpp (100%)
 rename src/codegen/{ => target}/powerpc/block.cpp (100%)
 rename src/codegen/{ => target}/powerpc/block.h (100%)
 rename src/codegen/{ => target}/powerpc/context.cpp (100%)
 rename src/codegen/{ => target}/powerpc/context.h (100%)
 rename src/codegen/{ => target}/powerpc/encode.h (100%)
 rename src/codegen/{ => target}/powerpc/fixup.cpp (100%)
 rename src/codegen/{ => target}/powerpc/fixup.h (100%)
 rename src/codegen/{ => target}/powerpc/multimethod.cpp (100%)
 rename src/codegen/{ => target}/powerpc/multimethod.h (100%)
 rename src/codegen/{ => target}/powerpc/operations.cpp (100%)
 rename src/codegen/{ => target}/powerpc/operations.h (100%)
 rename src/codegen/{ => target}/powerpc/registers.h (100%)
 rename src/codegen/{ => target}/x86/assembler.cpp (100%)
 rename src/codegen/{ => target}/x86/block.cpp (100%)
 rename src/codegen/{ => target}/x86/block.h (100%)
 rename src/codegen/{ => target}/x86/context.cpp (100%)
 rename src/codegen/{ => target}/x86/context.h (100%)
 rename src/codegen/{ => target}/x86/detect.cpp (100%)
 rename src/codegen/{ => target}/x86/detect.h (100%)
 rename src/codegen/{ => target}/x86/encode.cpp (100%)
 rename src/codegen/{ => target}/x86/encode.h (100%)
 rename src/codegen/{ => target}/x86/fixup.cpp (100%)
 rename src/codegen/{ => target}/x86/fixup.h (100%)
 rename src/codegen/{ => target}/x86/multimethod.cpp (100%)
 rename src/codegen/{ => target}/x86/multimethod.h (100%)
 rename src/codegen/{ => target}/x86/operations.cpp (100%)
 rename src/codegen/{ => target}/x86/operations.h (100%)
 rename src/codegen/{ => target}/x86/padding.cpp (100%)
 rename src/codegen/{ => target}/x86/padding.h (100%)
 rename src/codegen/{ => target}/x86/registers.h (100%)

diff --git a/makefile b/makefile
index 05fe57adba..5029fe0b36 100755
--- a/makefile
+++ b/makefile
@@ -978,17 +978,17 @@ compiler-sources = \
 compiler-objects = $(call cpp-objects,$(compiler-sources),$(src),$(build))
 $(compiler-objects): $(wildcard $(src)/codegen/compiler/*.h) $(vm-depends)
 
-x86-assembler-sources = $(wildcard $(src)/codegen/x86/*.cpp)
+x86-assembler-sources = $(wildcard $(src)/codegen/target/x86/*.cpp)
 x86-assembler-objects = $(call cpp-objects,$(x86-assembler-sources),$(src),$(build))
-$(x86-assembler-objects): $(wildcard $(src)/codegen/x86/*.h) $(vm-depends)
+$(x86-assembler-objects): $(wildcard $(src)/codegen/target/x86/*.h) $(vm-depends)
 
-arm-assembler-sources = $(wildcard $(src)/codegen/arm/*.cpp)
+arm-assembler-sources = $(wildcard $(src)/codegen/target/arm/*.cpp)
 arm-assembler-objects = $(call cpp-objects,$(arm-assembler-sources),$(src),$(build))
-$(arm-assembler-objects): $(wildcard $(src)/codegen/arm/*.h) $(vm-depends)
+$(arm-assembler-objects): $(wildcard $(src)/codegen/target/arm/*.h) $(vm-depends)
 
-powerpc-assembler-sources = $(wildcard $(src)/codegen/powerpc/*.cpp)
+powerpc-assembler-sources = $(wildcard $(src)/codegen/target/powerpc/*.cpp)
 powerpc-assembler-objects = $(call cpp-objects,$(powerpc-assembler-sources),$(src),$(build))
-$(powerpc-assembler-objects): $(wildcard $(src)/codegen/powerpc/*.h) $(vm-depends)
+$(powerpc-assembler-objects): $(wildcard $(src)/codegen/target/powerpc/*.h) $(vm-depends)
 
 all-assembler-sources = \
 	$(x86-assembler-sources) \
diff --git a/src/codegen/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp
similarity index 100%
rename from src/codegen/arm/assembler.cpp
rename to src/codegen/target/arm/assembler.cpp
diff --git a/src/codegen/arm/block.cpp b/src/codegen/target/arm/block.cpp
similarity index 100%
rename from src/codegen/arm/block.cpp
rename to src/codegen/target/arm/block.cpp
diff --git a/src/codegen/arm/block.h b/src/codegen/target/arm/block.h
similarity index 100%
rename from src/codegen/arm/block.h
rename to src/codegen/target/arm/block.h
diff --git a/src/codegen/arm/context.cpp b/src/codegen/target/arm/context.cpp
similarity index 100%
rename from src/codegen/arm/context.cpp
rename to src/codegen/target/arm/context.cpp
diff --git a/src/codegen/arm/context.h b/src/codegen/target/arm/context.h
similarity index 100%
rename from src/codegen/arm/context.h
rename to src/codegen/target/arm/context.h
diff --git a/src/codegen/arm/encode.h b/src/codegen/target/arm/encode.h
similarity index 100%
rename from src/codegen/arm/encode.h
rename to src/codegen/target/arm/encode.h
diff --git a/src/codegen/arm/fixup.cpp b/src/codegen/target/arm/fixup.cpp
similarity index 100%
rename from src/codegen/arm/fixup.cpp
rename to src/codegen/target/arm/fixup.cpp
diff --git a/src/codegen/arm/fixup.h b/src/codegen/target/arm/fixup.h
similarity index 100%
rename from src/codegen/arm/fixup.h
rename to src/codegen/target/arm/fixup.h
diff --git a/src/codegen/arm/multimethod.cpp b/src/codegen/target/arm/multimethod.cpp
similarity index 100%
rename from src/codegen/arm/multimethod.cpp
rename to src/codegen/target/arm/multimethod.cpp
diff --git a/src/codegen/arm/multimethod.h b/src/codegen/target/arm/multimethod.h
similarity index 100%
rename from src/codegen/arm/multimethod.h
rename to src/codegen/target/arm/multimethod.h
diff --git a/src/codegen/arm/operations.cpp b/src/codegen/target/arm/operations.cpp
similarity index 100%
rename from src/codegen/arm/operations.cpp
rename to src/codegen/target/arm/operations.cpp
diff --git a/src/codegen/arm/operations.h b/src/codegen/target/arm/operations.h
similarity index 100%
rename from src/codegen/arm/operations.h
rename to src/codegen/target/arm/operations.h
diff --git a/src/codegen/arm/registers.h b/src/codegen/target/arm/registers.h
similarity index 100%
rename from src/codegen/arm/registers.h
rename to src/codegen/target/arm/registers.h
diff --git a/src/codegen/powerpc/assembler.cpp b/src/codegen/target/powerpc/assembler.cpp
similarity index 100%
rename from src/codegen/powerpc/assembler.cpp
rename to src/codegen/target/powerpc/assembler.cpp
diff --git a/src/codegen/powerpc/block.cpp b/src/codegen/target/powerpc/block.cpp
similarity index 100%
rename from src/codegen/powerpc/block.cpp
rename to src/codegen/target/powerpc/block.cpp
diff --git a/src/codegen/powerpc/block.h b/src/codegen/target/powerpc/block.h
similarity index 100%
rename from src/codegen/powerpc/block.h
rename to src/codegen/target/powerpc/block.h
diff --git a/src/codegen/powerpc/context.cpp b/src/codegen/target/powerpc/context.cpp
similarity index 100%
rename from src/codegen/powerpc/context.cpp
rename to src/codegen/target/powerpc/context.cpp
diff --git a/src/codegen/powerpc/context.h b/src/codegen/target/powerpc/context.h
similarity index 100%
rename from src/codegen/powerpc/context.h
rename to src/codegen/target/powerpc/context.h
diff --git a/src/codegen/powerpc/encode.h b/src/codegen/target/powerpc/encode.h
similarity index 100%
rename from src/codegen/powerpc/encode.h
rename to src/codegen/target/powerpc/encode.h
diff --git a/src/codegen/powerpc/fixup.cpp b/src/codegen/target/powerpc/fixup.cpp
similarity index 100%
rename from src/codegen/powerpc/fixup.cpp
rename to src/codegen/target/powerpc/fixup.cpp
diff --git a/src/codegen/powerpc/fixup.h b/src/codegen/target/powerpc/fixup.h
similarity index 100%
rename from src/codegen/powerpc/fixup.h
rename to src/codegen/target/powerpc/fixup.h
diff --git a/src/codegen/powerpc/multimethod.cpp b/src/codegen/target/powerpc/multimethod.cpp
similarity index 100%
rename from src/codegen/powerpc/multimethod.cpp
rename to src/codegen/target/powerpc/multimethod.cpp
diff --git a/src/codegen/powerpc/multimethod.h b/src/codegen/target/powerpc/multimethod.h
similarity index 100%
rename from src/codegen/powerpc/multimethod.h
rename to src/codegen/target/powerpc/multimethod.h
diff --git a/src/codegen/powerpc/operations.cpp b/src/codegen/target/powerpc/operations.cpp
similarity index 100%
rename from src/codegen/powerpc/operations.cpp
rename to src/codegen/target/powerpc/operations.cpp
diff --git a/src/codegen/powerpc/operations.h b/src/codegen/target/powerpc/operations.h
similarity index 100%
rename from src/codegen/powerpc/operations.h
rename to src/codegen/target/powerpc/operations.h
diff --git a/src/codegen/powerpc/registers.h b/src/codegen/target/powerpc/registers.h
similarity index 100%
rename from src/codegen/powerpc/registers.h
rename to src/codegen/target/powerpc/registers.h
diff --git a/src/codegen/x86/assembler.cpp b/src/codegen/target/x86/assembler.cpp
similarity index 100%
rename from src/codegen/x86/assembler.cpp
rename to src/codegen/target/x86/assembler.cpp
diff --git a/src/codegen/x86/block.cpp b/src/codegen/target/x86/block.cpp
similarity index 100%
rename from src/codegen/x86/block.cpp
rename to src/codegen/target/x86/block.cpp
diff --git a/src/codegen/x86/block.h b/src/codegen/target/x86/block.h
similarity index 100%
rename from src/codegen/x86/block.h
rename to src/codegen/target/x86/block.h
diff --git a/src/codegen/x86/context.cpp b/src/codegen/target/x86/context.cpp
similarity index 100%
rename from src/codegen/x86/context.cpp
rename to src/codegen/target/x86/context.cpp
diff --git a/src/codegen/x86/context.h b/src/codegen/target/x86/context.h
similarity index 100%
rename from src/codegen/x86/context.h
rename to src/codegen/target/x86/context.h
diff --git a/src/codegen/x86/detect.cpp b/src/codegen/target/x86/detect.cpp
similarity index 100%
rename from src/codegen/x86/detect.cpp
rename to src/codegen/target/x86/detect.cpp
diff --git a/src/codegen/x86/detect.h b/src/codegen/target/x86/detect.h
similarity index 100%
rename from src/codegen/x86/detect.h
rename to src/codegen/target/x86/detect.h
diff --git a/src/codegen/x86/encode.cpp b/src/codegen/target/x86/encode.cpp
similarity index 100%
rename from src/codegen/x86/encode.cpp
rename to src/codegen/target/x86/encode.cpp
diff --git a/src/codegen/x86/encode.h b/src/codegen/target/x86/encode.h
similarity index 100%
rename from src/codegen/x86/encode.h
rename to src/codegen/target/x86/encode.h
diff --git a/src/codegen/x86/fixup.cpp b/src/codegen/target/x86/fixup.cpp
similarity index 100%
rename from src/codegen/x86/fixup.cpp
rename to src/codegen/target/x86/fixup.cpp
diff --git a/src/codegen/x86/fixup.h b/src/codegen/target/x86/fixup.h
similarity index 100%
rename from src/codegen/x86/fixup.h
rename to src/codegen/target/x86/fixup.h
diff --git a/src/codegen/x86/multimethod.cpp b/src/codegen/target/x86/multimethod.cpp
similarity index 100%
rename from src/codegen/x86/multimethod.cpp
rename to src/codegen/target/x86/multimethod.cpp
diff --git a/src/codegen/x86/multimethod.h b/src/codegen/target/x86/multimethod.h
similarity index 100%
rename from src/codegen/x86/multimethod.h
rename to src/codegen/target/x86/multimethod.h
diff --git a/src/codegen/x86/operations.cpp b/src/codegen/target/x86/operations.cpp
similarity index 100%
rename from src/codegen/x86/operations.cpp
rename to src/codegen/target/x86/operations.cpp
diff --git a/src/codegen/x86/operations.h b/src/codegen/target/x86/operations.h
similarity index 100%
rename from src/codegen/x86/operations.h
rename to src/codegen/target/x86/operations.h
diff --git a/src/codegen/x86/padding.cpp b/src/codegen/target/x86/padding.cpp
similarity index 100%
rename from src/codegen/x86/padding.cpp
rename to src/codegen/target/x86/padding.cpp
diff --git a/src/codegen/x86/padding.h b/src/codegen/target/x86/padding.h
similarity index 100%
rename from src/codegen/x86/padding.h
rename to src/codegen/target/x86/padding.h
diff --git a/src/codegen/x86/registers.h b/src/codegen/target/x86/registers.h
similarity index 100%
rename from src/codegen/x86/registers.h
rename to src/codegen/target/x86/registers.h

From 4c8b593539dd927593a43f1e92f521addf33c8c9 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sat, 23 Feb 2013 23:03:01 -0700
Subject: [PATCH 16/22] break Architecture out of Assembler

---
 include/avian/vm/codegen/architecture.h  | 137 +++++++++++++++++++++++
 include/avian/vm/codegen/assembler.h     | 110 +-----------------
 include/avian/vm/codegen/targets.h       |  14 ++-
 src/codegen/compiler.cpp                 |   1 +
 src/codegen/compiler/context.cpp         |   2 +
 src/codegen/compiler/context.h           |   2 +-
 src/codegen/compiler/frame.cpp           |   2 +
 src/codegen/compiler/site.h              |   2 +
 src/codegen/target/arm/assembler.cpp     |   5 +-
 src/codegen/target/powerpc/assembler.cpp |   5 +-
 src/codegen/target/x86/assembler.cpp     |   5 +-
 src/codegen/targets.cpp                  |   4 +-
 src/compile.cpp                          |   3 +-
 src/tools/audit-codegen/main.cpp         |   2 +-
 unittest/codegen/assembler-test.cpp      |   3 +-
 15 files changed, 172 insertions(+), 125 deletions(-)
 create mode 100644 include/avian/vm/codegen/architecture.h

diff --git a/include/avian/vm/codegen/architecture.h b/include/avian/vm/codegen/architecture.h
new file mode 100644
index 0000000000..07c3fc5e35
--- /dev/null
+++ b/include/avian/vm/codegen/architecture.h
@@ -0,0 +1,137 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ARCHITECTURE_H
+#define AVIAN_CODEGEN_ARCHITECTURE_H
+
+namespace vm {
+class Allocator;
+class Zone;
+}
+
+namespace avian {
+namespace codegen {
+
+class Assembler;
+
+class RegisterFile;
+
+class OperandMask {
+public:
+  uint8_t typeMask;
+  uint64_t registerMask;
+
+  OperandMask(uint8_t typeMask, uint64_t registerMask):
+    typeMask(typeMask),
+    registerMask(registerMask)
+  { }
+
+  OperandMask():
+    typeMask(~0),
+    registerMask(~static_cast<uint64_t>(0))
+  { }
+};
+
+class Architecture {
+public:
+virtual unsigned floatRegisterSize() = 0;
+
+virtual const RegisterFile* registerFile() = 0;
+
+virtual int scratch() = 0;
+virtual int stack() = 0;
+virtual int thread() = 0;
+virtual int returnLow() = 0;
+virtual int returnHigh() = 0;
+virtual int virtualCallTarget() = 0;
+virtual int virtualCallIndex() = 0;
+
+virtual bool bigEndian() = 0;
+
+virtual uintptr_t maximumImmediateJump() = 0;
+
+virtual bool alwaysCondensed(lir::BinaryOperation op) = 0;
+virtual bool alwaysCondensed(lir::TernaryOperation op) = 0;
+
+virtual bool reserved(int register_) = 0;
+
+virtual unsigned frameFootprint(unsigned footprint) = 0;
+virtual unsigned argumentFootprint(unsigned footprint) = 0;
+virtual bool argumentAlignment() = 0;
+virtual bool argumentRegisterAlignment() = 0;
+virtual unsigned argumentRegisterCount() = 0;
+virtual int argumentRegister(unsigned index) = 0;
+
+virtual bool hasLinkRegister() = 0;
+
+virtual unsigned stackAlignmentInWords() = 0;
+
+virtual bool matchCall(void* returnAddress, void* target) = 0;
+
+virtual void updateCall(lir::UnaryOperation op, void* returnAddress,
+                        void* newTarget) = 0;
+
+virtual void setConstant(void* dst, uint64_t constant) = 0;
+
+virtual unsigned alignFrameSize(unsigned sizeInWords) = 0;
+
+virtual void nextFrame(void* start, unsigned size, unsigned footprint,
+                       void* link, bool mostRecent,
+                       unsigned targetParameterFootprint, void** ip,
+                       void** stack) = 0;
+virtual void* frameIp(void* stack) = 0;
+virtual unsigned frameHeaderSize() = 0;
+virtual unsigned frameReturnAddressSize() = 0;
+virtual unsigned frameFooterSize() = 0;
+virtual int returnAddressOffset() = 0;
+virtual int framePointerOffset() = 0;
+
+virtual void plan
+(lir::UnaryOperation op,
+ unsigned aSize, OperandMask& aMask,
+ bool* thunk) = 0;
+
+virtual void planSource
+(lir::BinaryOperation op,
+ unsigned aSize, OperandMask& aMask,
+ unsigned bSize, bool* thunk) = 0;
+ 
+virtual void planDestination
+(lir::BinaryOperation op,
+ unsigned aSize, const OperandMask& aMask,
+ unsigned bSize, OperandMask& bMask) = 0;
+
+virtual void planMove
+(unsigned size, OperandMask& src,
+ OperandMask& tmp,
+ const OperandMask& dst) = 0; 
+
+virtual void planSource
+(lir::TernaryOperation op,
+ unsigned aSize, OperandMask& aMask,
+ unsigned bSize, OperandMask& bMask,
+ unsigned cSize, bool* thunk) = 0; 
+
+virtual void planDestination
+(lir::TernaryOperation op,
+ unsigned aSize, const OperandMask& aMask,
+ unsigned bSize, const OperandMask& bMask,
+ unsigned cSize, OperandMask& cMask) = 0;
+
+virtual Assembler* makeAssembler(vm::Allocator*, vm::Zone*) = 0;
+
+virtual void acquire() = 0;
+virtual void release() = 0;
+};
+
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ARCHITECTURE_H
\ No newline at end of file
diff --git a/include/avian/vm/codegen/assembler.h b/include/avian/vm/codegen/assembler.h
index 6f370cbe3c..8c18176892 100644
--- a/include/avian/vm/codegen/assembler.h
+++ b/include/avian/vm/codegen/assembler.h
@@ -20,7 +20,7 @@
 namespace avian {
 namespace codegen {
 
-class RegisterFile;
+class Architecture;
 
 class OperandInfo {
 public:
@@ -35,22 +35,6 @@ public:
   { }
 };
 
-class OperandMask {
-public:
-  uint8_t typeMask;
-  uint64_t registerMask;
-
-  OperandMask(uint8_t typeMask, uint64_t registerMask):
-    typeMask(typeMask),
-    registerMask(registerMask)
-  { }
-
-  OperandMask():
-    typeMask(~0),
-    registerMask(~static_cast<uint64_t>(0))
-  { }
-};
-
 #ifdef AVIAN_TAILS
 const bool TailCalls = true;
 #else
@@ -80,98 +64,6 @@ class Assembler {
     virtual unsigned resolve(unsigned start, Block* next) = 0;
   };
 
-  class Architecture {
-   public:
-    virtual unsigned floatRegisterSize() = 0;
-
-    virtual const RegisterFile* registerFile() = 0;
-
-    virtual int scratch() = 0;
-    virtual int stack() = 0;
-    virtual int thread() = 0;
-    virtual int returnLow() = 0;
-    virtual int returnHigh() = 0;
-    virtual int virtualCallTarget() = 0;
-    virtual int virtualCallIndex() = 0;
-
-    virtual bool bigEndian() = 0;
-
-    virtual uintptr_t maximumImmediateJump() = 0;
-
-    virtual bool alwaysCondensed(lir::BinaryOperation op) = 0;
-    virtual bool alwaysCondensed(lir::TernaryOperation op) = 0;
-
-    virtual bool reserved(int register_) = 0;
-
-    virtual unsigned frameFootprint(unsigned footprint) = 0;
-    virtual unsigned argumentFootprint(unsigned footprint) = 0;
-    virtual bool argumentAlignment() = 0;
-    virtual bool argumentRegisterAlignment() = 0;
-    virtual unsigned argumentRegisterCount() = 0;
-    virtual int argumentRegister(unsigned index) = 0;
-
-    virtual bool hasLinkRegister() = 0;
-
-    virtual unsigned stackAlignmentInWords() = 0;
-
-    virtual bool matchCall(void* returnAddress, void* target) = 0;
-
-    virtual void updateCall(lir::UnaryOperation op, void* returnAddress,
-                            void* newTarget) = 0;
-
-    virtual void setConstant(void* dst, uint64_t constant) = 0;
-
-    virtual unsigned alignFrameSize(unsigned sizeInWords) = 0;
-
-    virtual void nextFrame(void* start, unsigned size, unsigned footprint,
-                           void* link, bool mostRecent,
-                           unsigned targetParameterFootprint, void** ip,
-                           void** stack) = 0;
-    virtual void* frameIp(void* stack) = 0;
-    virtual unsigned frameHeaderSize() = 0;
-    virtual unsigned frameReturnAddressSize() = 0;
-    virtual unsigned frameFooterSize() = 0;
-    virtual int returnAddressOffset() = 0;
-    virtual int framePointerOffset() = 0;
-
-    virtual void plan
-    (lir::UnaryOperation op,
-     unsigned aSize, OperandMask& aMask,
-     bool* thunk) = 0;
-
-    virtual void planSource
-    (lir::BinaryOperation op,
-     unsigned aSize, OperandMask& aMask,
-     unsigned bSize, bool* thunk) = 0;
-     
-    virtual void planDestination
-    (lir::BinaryOperation op,
-     unsigned aSize, const OperandMask& aMask,
-     unsigned bSize, OperandMask& bMask) = 0;
-
-    virtual void planMove
-    (unsigned size, OperandMask& src,
-     OperandMask& tmp,
-     const OperandMask& dst) = 0; 
-
-    virtual void planSource
-    (lir::TernaryOperation op,
-     unsigned aSize, OperandMask& aMask,
-     unsigned bSize, OperandMask& bMask,
-     unsigned cSize, bool* thunk) = 0; 
-
-    virtual void planDestination
-    (lir::TernaryOperation op,
-     unsigned aSize, const OperandMask& aMask,
-     unsigned bSize, const OperandMask& bMask,
-     unsigned cSize, OperandMask& cMask) = 0;
-
-    virtual Assembler* makeAssembler(vm::Allocator*, vm::Zone*) = 0;
-
-    virtual void acquire() = 0;
-    virtual void release() = 0;
-  };
-
   virtual void setClient(Client* client) = 0;
 
   virtual Architecture* arch() = 0;
diff --git a/include/avian/vm/codegen/targets.h b/include/avian/vm/codegen/targets.h
index e3ffbd981a..a8adb246ce 100644
--- a/include/avian/vm/codegen/targets.h
+++ b/include/avian/vm/codegen/targets.h
@@ -11,16 +11,20 @@
 #ifndef AVIAN_CODEGEN_TARGETS_H
 #define AVIAN_CODEGEN_TARGETS_H
 
-#include <avian/vm/codegen/assembler.h>
+namespace vm {
+class System;
+}
 
 namespace avian {
 namespace codegen {
 
-Assembler::Architecture* makeArchitectureNative(vm::System* system, bool useNativeFeatures);
+class Architecture;
 
-Assembler::Architecture* makeArchitectureX86(vm::System* system, bool useNativeFeatures);
-Assembler::Architecture* makeArchitectureArm(vm::System* system, bool useNativeFeatures);
-Assembler::Architecture* makeArchitecturePowerpc(vm::System* system, bool useNativeFeatures);
+Architecture* makeArchitectureNative(vm::System* system, bool useNativeFeatures);
+
+Architecture* makeArchitectureX86(vm::System* system, bool useNativeFeatures);
+Architecture* makeArchitectureArm(vm::System* system, bool useNativeFeatures);
+Architecture* makeArchitecturePowerpc(vm::System* system, bool useNativeFeatures);
 
 } // namespace codegen
 } // namespace avian
diff --git a/src/codegen/compiler.cpp b/src/codegen/compiler.cpp
index 1985629cfb..a093c1f5bf 100644
--- a/src/codegen/compiler.cpp
+++ b/src/codegen/compiler.cpp
@@ -14,6 +14,7 @@
 
 #include <avian/vm/codegen/compiler.h>
 #include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/architecture.h>
 #include <avian/vm/codegen/promise.h>
 
 #include "codegen/compiler/regalloc.h"
diff --git a/src/codegen/compiler/context.cpp b/src/codegen/compiler/context.cpp
index dd1f129bad..5d77fdcd77 100644
--- a/src/codegen/compiler/context.cpp
+++ b/src/codegen/compiler/context.cpp
@@ -11,6 +11,8 @@
 #include "codegen/compiler/context.h"
 #include "codegen/compiler/resource.h"
 
+#include <avian/vm/codegen/architecture.h>
+
 namespace avian {
 namespace codegen {
 namespace compiler {
diff --git a/src/codegen/compiler/context.h b/src/codegen/compiler/context.h
index d9894eda8c..cd19097c42 100644
--- a/src/codegen/compiler/context.h
+++ b/src/codegen/compiler/context.h
@@ -75,7 +75,7 @@ class Context {
 
   vm::System* system;
   Assembler* assembler;
-  Assembler::Architecture* arch;
+  Architecture* arch;
   vm::Zone* zone;
   Compiler::Client* client;
   Stack* stack;
diff --git a/src/codegen/compiler/frame.cpp b/src/codegen/compiler/frame.cpp
index e879238b79..cae48b9fee 100644
--- a/src/codegen/compiler/frame.cpp
+++ b/src/codegen/compiler/frame.cpp
@@ -13,6 +13,8 @@
 #include "codegen/compiler/context.h"
 #include "codegen/compiler/frame.h"
 
+#include <avian/vm/codegen/architecture.h>
+
 namespace avian {
 namespace codegen {
 namespace compiler {
diff --git a/src/codegen/compiler/site.h b/src/codegen/compiler/site.h
index bf2cef34b6..aedcc7ab09 100644
--- a/src/codegen/compiler/site.h
+++ b/src/codegen/compiler/site.h
@@ -11,6 +11,8 @@
 #ifndef AVIAN_CODEGEN_COMPILER_SITE_H
 #define AVIAN_CODEGEN_COMPILER_SITE_H
 
+#include <avian/vm/codegen/architecture.h>
+
 #include "codegen/compiler/value.h"
 #include "codegen/compiler/context.h"
 
diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp
index 511f7ccc32..8be76085e1 100644
--- a/src/codegen/target/arm/assembler.cpp
+++ b/src/codegen/target/arm/assembler.cpp
@@ -11,6 +11,7 @@
 #include <avian/util/runtime-array.h>
 
 #include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/architecture.h>
 #include <avian/vm/codegen/registers.h>
 
 #include "context.h"
@@ -136,7 +137,7 @@ nextFrame(ArchitectureContext* con, uint32_t* start, unsigned size UNUSED,
   *stack = static_cast<void**>(*stack) + offset;
 }
 
-class MyArchitecture: public Assembler::Architecture {
+class MyArchitecture: public Architecture {
  public:
   MyArchitecture(System* system): con(system), referenceCount(0) {
     populateTables(&con);
@@ -950,7 +951,7 @@ Assembler* MyArchitecture::makeAssembler(Allocator* allocator, Zone* zone) {
 
 } // namespace arm
 
-Assembler::Architecture*
+Architecture*
 makeArchitectureArm(System* system, bool)
 {
   return new (allocate(system, sizeof(arm::MyArchitecture))) arm::MyArchitecture(system);
diff --git a/src/codegen/target/powerpc/assembler.cpp b/src/codegen/target/powerpc/assembler.cpp
index cda4fdbc96..5ad517871a 100644
--- a/src/codegen/target/powerpc/assembler.cpp
+++ b/src/codegen/target/powerpc/assembler.cpp
@@ -9,6 +9,7 @@
    details. */
 
 #include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/architecture.h>
 #include <avian/vm/codegen/registers.h>
 
 #include "alloc-vector.h"
@@ -235,7 +236,7 @@ nextFrame(ArchitectureContext* c UNUSED, int32_t* start, unsigned size,
   *stack = static_cast<void**>(*stack) + offset;
 }
 
-class MyArchitecture: public Assembler::Architecture {
+class MyArchitecture: public Architecture {
  public:
   MyArchitecture(System* system): c(system), referenceCount(0) {
     populateTables(&c);
@@ -996,7 +997,7 @@ Assembler* MyArchitecture::makeAssembler(Allocator* allocator, Zone* zone) {
 
 } // namespace powerpc
 
-Assembler::Architecture*
+Architecture*
 makeArchitecturePowerpc(System* system, bool)
 {
   return new (allocate(system, sizeof(powerpc::MyArchitecture))) powerpc::MyArchitecture(system);
diff --git a/src/codegen/target/x86/assembler.cpp b/src/codegen/target/x86/assembler.cpp
index a568430f91..6b1a1679ad 100644
--- a/src/codegen/target/x86/assembler.cpp
+++ b/src/codegen/target/x86/assembler.cpp
@@ -24,6 +24,7 @@
 #include <avian/util/math.h>
 
 #include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/architecture.h>
 #include <avian/vm/codegen/registers.h>
 #include <avian/vm/codegen/lir.h>
 #include <avian/vm/codegen/promise.h>
@@ -147,7 +148,7 @@ nextFrame(ArchitectureContext* c UNUSED, uint8_t* start, unsigned size UNUSED,
   *stack = static_cast<void**>(*stack) + offset;
 }
 
-class MyArchitecture: public Assembler::Architecture {
+class MyArchitecture: public Architecture {
  public:
   MyArchitecture(System* system, bool useNativeFeatures):
     c(system, useNativeFeatures),
@@ -1148,7 +1149,7 @@ Assembler* MyArchitecture::makeAssembler(Allocator* allocator, Zone* zone) {
 
 } // namespace x86
 
-Assembler::Architecture* makeArchitectureX86(System* system, bool useNativeFeatures)
+Architecture* makeArchitectureX86(System* system, bool useNativeFeatures)
 {
   return new (allocate(system, sizeof(x86::MyArchitecture)))
     x86::MyArchitecture(system, useNativeFeatures);
diff --git a/src/codegen/targets.cpp b/src/codegen/targets.cpp
index 4b9d44fc7e..af8f66219c 100644
--- a/src/codegen/targets.cpp
+++ b/src/codegen/targets.cpp
@@ -8,6 +8,8 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
+#include "common.h"
+
 #include <avian/vm/codegen/targets.h>
 
 #include "environment.h"
@@ -15,7 +17,7 @@
 namespace avian {
 namespace codegen {
 
-Assembler::Architecture* makeArchitectureNative(vm::System* system, bool useNativeFeatures UNUSED) {
+Architecture* makeArchitectureNative(vm::System* system, bool useNativeFeatures UNUSED) {
 #ifndef AVIAN_TARGET_ARCH
   #error "Must specify native target!"
 #endif
diff --git a/src/compile.cpp b/src/compile.cpp
index 02a87cd4c8..b19b45ca7e 100644
--- a/src/compile.cpp
+++ b/src/compile.cpp
@@ -16,6 +16,7 @@
 #include "arch.h"
 
 #include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/architecture.h>
 #include <avian/vm/codegen/compiler.h>
 #include <avian/vm/codegen/targets.h>
 
@@ -294,7 +295,7 @@ class MyThread: public Thread {
   void** thunkTable;
   CallTrace* trace;
   Reference* reference;
-  avian::codegen::Assembler::Architecture* arch;
+  avian::codegen::Architecture* arch;
   Context* transition;
   TraceContext* traceContext;
   uintptr_t stackLimit;
diff --git a/src/tools/audit-codegen/main.cpp b/src/tools/audit-codegen/main.cpp
index d201cd43a4..5cc7d79709 100644
--- a/src/tools/audit-codegen/main.cpp
+++ b/src/tools/audit-codegen/main.cpp
@@ -31,7 +31,7 @@ class BasicEnv {
 public:
   System* s;
   Heap* heap;
-  Assembler::Architecture* arch;
+  Architecture* arch;
 
   BasicEnv():
     s(makeSystem(0)),
diff --git a/unittest/codegen/assembler-test.cpp b/unittest/codegen/assembler-test.cpp
index dde1b1b074..0412766b59 100644
--- a/unittest/codegen/assembler-test.cpp
+++ b/unittest/codegen/assembler-test.cpp
@@ -16,6 +16,7 @@
 #include "target.h"
 
 #include <avian/vm/codegen/assembler.h>
+#include <avian/vm/codegen/architecture.h>
 #include <avian/vm/codegen/targets.h>
 #include <avian/vm/codegen/lir.h>
 
@@ -29,7 +30,7 @@ class BasicEnv {
 public:
   System* s;
   Heap* heap;
-  Assembler::Architecture* arch;
+  Architecture* arch;
 
   BasicEnv():
     s(makeSystem(0)),

From d2caf50772201a5aee852589e8985e1431e8d01c Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sun, 24 Feb 2013 16:03:19 -0700
Subject: [PATCH 17/22] fix debug build

---
 src/codegen/target/arm/fixup.cpp           |  2 ++
 src/codegen/target/arm/multimethod.cpp     |  2 ++
 src/codegen/target/powerpc/fixup.cpp       |  1 +
 src/codegen/target/powerpc/multimethod.cpp | 32 ++++++++++++++++++++++
 src/codegen/target/powerpc/multimethod.h   | 30 +++++---------------
 src/codegen/target/x86/fixup.cpp           |  2 ++
 src/codegen/target/x86/multimethod.cpp     |  2 ++
 7 files changed, 48 insertions(+), 23 deletions(-)

diff --git a/src/codegen/target/arm/fixup.cpp b/src/codegen/target/arm/fixup.cpp
index 2cf0b01216..88b5789f5a 100644
--- a/src/codegen/target/arm/fixup.cpp
+++ b/src/codegen/target/arm/fixup.cpp
@@ -16,6 +16,8 @@ namespace avian {
 namespace codegen {
 namespace arm {
 
+using namespace util;
+
 unsigned padding(MyBlock*, unsigned);
 
 OffsetPromise::OffsetPromise(Context* con, MyBlock* block, unsigned offset, bool forTrace):
diff --git a/src/codegen/target/arm/multimethod.cpp b/src/codegen/target/arm/multimethod.cpp
index 76c681a60f..4e789cc18c 100644
--- a/src/codegen/target/arm/multimethod.cpp
+++ b/src/codegen/target/arm/multimethod.cpp
@@ -16,6 +16,8 @@ namespace avian {
 namespace codegen {
 namespace arm {
 
+using namespace util;
+
 unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
 {
   return operation + (lir::UnaryOperationCount * operand);
diff --git a/src/codegen/target/powerpc/fixup.cpp b/src/codegen/target/powerpc/fixup.cpp
index 06075c00e6..6de1698fe2 100644
--- a/src/codegen/target/powerpc/fixup.cpp
+++ b/src/codegen/target/powerpc/fixup.cpp
@@ -18,6 +18,7 @@ namespace codegen {
 namespace powerpc {
 
 using namespace isa;
+using namespace util;
 
 unsigned padding(MyBlock*, unsigned);
 
diff --git a/src/codegen/target/powerpc/multimethod.cpp b/src/codegen/target/powerpc/multimethod.cpp
index 79b5f9bf7b..d8ffba5008 100644
--- a/src/codegen/target/powerpc/multimethod.cpp
+++ b/src/codegen/target/powerpc/multimethod.cpp
@@ -19,6 +19,38 @@ namespace avian {
 namespace codegen {
 namespace powerpc {
 
+using namespace util;
+
+unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
+{
+  return operation + (lir::UnaryOperationCount * operand);
+}
+
+unsigned index(ArchitectureContext*,
+      lir::BinaryOperation operation,
+      lir::OperandType operand1,
+      lir::OperandType operand2)
+{
+  return operation
+    + (lir::BinaryOperationCount * operand1)
+    + (lir::BinaryOperationCount * lir::OperandTypeCount * operand2);
+}
+
+unsigned index(ArchitectureContext* c UNUSED,
+      lir::TernaryOperation operation,
+      lir::OperandType operand1)
+{
+  assert(c, not isBranch(operation));
+
+  return operation + (lir::NonBranchTernaryOperationCount * operand1);
+}
+
+unsigned branchIndex(ArchitectureContext* c UNUSED, lir::OperandType operand1,
+            lir::OperandType operand2)
+{
+  return operand1 + (lir::OperandTypeCount * operand2);
+}
+
 void populateTables(ArchitectureContext* c) {
   const lir::OperandType C = lir::ConstantOperand;
   const lir::OperandType A = lir::AddressOperand;
diff --git a/src/codegen/target/powerpc/multimethod.h b/src/codegen/target/powerpc/multimethod.h
index b2fe6f206d..06b881e97a 100644
--- a/src/codegen/target/powerpc/multimethod.h
+++ b/src/codegen/target/powerpc/multimethod.h
@@ -21,35 +21,19 @@ namespace codegen {
 namespace powerpc {
 
 
-inline unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
-{
-  return operation + (lir::UnaryOperationCount * operand);
-}
+unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand);
 
-inline unsigned index(ArchitectureContext*,
+unsigned index(ArchitectureContext*,
       lir::BinaryOperation operation,
       lir::OperandType operand1,
-      lir::OperandType operand2)
-{
-  return operation
-    + (lir::BinaryOperationCount * operand1)
-    + (lir::BinaryOperationCount * lir::OperandTypeCount * operand2);
-}
+      lir::OperandType operand2);
 
-inline unsigned index(ArchitectureContext* c UNUSED,
+unsigned index(ArchitectureContext* c UNUSED,
       lir::TernaryOperation operation,
-      lir::OperandType operand1)
-{
-  assert(c, not isBranch(operation));
+      lir::OperandType operand1);
 
-  return operation + (lir::NonBranchTernaryOperationCount * operand1);
-}
-
-inline unsigned branchIndex(ArchitectureContext* c UNUSED, lir::OperandType operand1,
-            lir::OperandType operand2)
-{
-  return operand1 + (lir::OperandTypeCount * operand2);
-}
+unsigned branchIndex(ArchitectureContext* c UNUSED, lir::OperandType operand1,
+            lir::OperandType operand2);
 
 void populateTables(ArchitectureContext* c);
 
diff --git a/src/codegen/target/x86/fixup.cpp b/src/codegen/target/x86/fixup.cpp
index 5e85d7c062..ba0a06f906 100644
--- a/src/codegen/target/x86/fixup.cpp
+++ b/src/codegen/target/x86/fixup.cpp
@@ -27,6 +27,8 @@ namespace avian {
 namespace codegen {
 namespace x86 {
 
+using namespace util;
+
 ResolvedPromise* resolvedPromise(Context* c, int64_t value) {
   return new(c->zone) ResolvedPromise(value);
 }
diff --git a/src/codegen/target/x86/multimethod.cpp b/src/codegen/target/x86/multimethod.cpp
index f0ad4621b2..9146c9d9cb 100644
--- a/src/codegen/target/x86/multimethod.cpp
+++ b/src/codegen/target/x86/multimethod.cpp
@@ -22,6 +22,8 @@ namespace avian {
 namespace codegen {
 namespace x86 {
 
+using namespace util;
+
 
 unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand) {
   return operation + (lir::UnaryOperationCount * operand);

From 82eec288563dc374994aad340da4f8b6fc6ab806 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sun, 24 Feb 2013 20:48:06 -0700
Subject: [PATCH 18/22] rename x86 Offset for consistency with arm and powerpc
 OffsetPromise

---
 src/codegen/target/x86/fixup.cpp |  8 ++++----
 src/codegen/target/x86/fixup.h   | 23 +++++++++++------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/codegen/target/x86/fixup.cpp b/src/codegen/target/x86/fixup.cpp
index ba0a06f906..294780fae5 100644
--- a/src/codegen/target/x86/fixup.cpp
+++ b/src/codegen/target/x86/fixup.cpp
@@ -33,15 +33,15 @@ ResolvedPromise* resolvedPromise(Context* c, int64_t value) {
   return new(c->zone) ResolvedPromise(value);
 }
 
-Offset::Offset(Context* c, MyBlock* block, unsigned offset, AlignmentPadding* limit):
+OffsetPromise::OffsetPromise(Context* c, MyBlock* block, unsigned offset, AlignmentPadding* limit):
   c(c), block(block), offset(offset), limit(limit), value_(-1)
 { }
 
-bool Offset::resolved() {
+bool OffsetPromise::resolved() {
   return block->start != static_cast<unsigned>(~0);
 }
 
-int64_t Offset::value() {
+int64_t OffsetPromise::value() {
   assert(c, resolved());
 
   if (value_ == -1) {
@@ -52,7 +52,7 @@ int64_t Offset::value() {
   return value_;
 }
 Promise* offsetPromise(Context* c) {
-  return new(c->zone) Offset(c, c->lastBlock, c->code.length(), c->lastBlock->lastPadding);
+  return new(c->zone) OffsetPromise(c, c->lastBlock, c->code.length(), c->lastBlock->lastPadding);
 }
 
 void*
diff --git a/src/codegen/target/x86/fixup.h b/src/codegen/target/x86/fixup.h
index 78f77b3044..fa170bb542 100644
--- a/src/codegen/target/x86/fixup.h
+++ b/src/codegen/target/x86/fixup.h
@@ -29,9 +29,18 @@ class AlignmentPadding;
 
 ResolvedPromise* resolvedPromise(Context* c, int64_t value);
 
-class Offset: public Promise {
+class Task {
  public:
-  Offset(Context* c, MyBlock* block, unsigned offset, AlignmentPadding* limit);
+  Task(Task* next): next(next) { }
+
+  virtual void run(Context* c) = 0;
+
+  Task* next;
+};
+
+class OffsetPromise: public Promise {
+ public:
+  OffsetPromise(Context* c, MyBlock* block, unsigned offset, AlignmentPadding* limit);
 
   virtual bool resolved();
   
@@ -46,16 +55,6 @@ class Offset: public Promise {
 
 Promise* offsetPromise(Context* c);
 
-
-class Task {
- public:
-  Task(Task* next): next(next) { }
-
-  virtual void run(Context* c) = 0;
-
-  Task* next;
-};
-
 void* resolveOffset(vm::System* s, uint8_t* instruction, unsigned instructionSize, int64_t value);
 
 class OffsetListener: public Promise::Listener {

From 4d38873096c337aa6471489ba091bebd4dc8da61 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Sun, 24 Feb 2013 22:38:58 -0700
Subject: [PATCH 19/22] begin merging target assembler multimethod code

---
 src/codegen/target/arm/assembler.cpp       |  3 +-
 src/codegen/target/arm/multimethod.cpp     | 33 ++++++++++------------
 src/codegen/target/arm/multimethod.h       |  2 --
 src/codegen/target/multimethod.h           | 29 +++++++++++++++++++
 src/codegen/target/powerpc/assembler.cpp   |  5 ++--
 src/codegen/target/powerpc/multimethod.cpp | 32 +++++++++------------
 src/codegen/target/powerpc/multimethod.h   |  3 --
 src/codegen/target/x86/assembler.cpp       |  3 +-
 src/codegen/target/x86/multimethod.cpp     | 33 ++++++++++------------
 src/codegen/target/x86/multimethod.h       |  2 --
 10 files changed, 80 insertions(+), 65 deletions(-)
 create mode 100644 src/codegen/target/multimethod.h

diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp
index 8be76085e1..089c56379f 100644
--- a/src/codegen/target/arm/assembler.cpp
+++ b/src/codegen/target/arm/assembler.cpp
@@ -21,6 +21,7 @@
 #include "encode.h"
 #include "operations.h"
 #include "registers.h"
+#include "../multimethod.h"
 
 #include "alloc-vector.h"
 #include <avian/util/abort.h>
@@ -769,7 +770,7 @@ class MyAssembler: public Assembler {
 
   virtual void apply(lir::UnaryOperation op, OperandInfo a)
   {
-    arch_->con.unaryOperations[index(&(arch_->con), op, a.type)]
+    arch_->con.unaryOperations[Multimethod::index(op, a.type)]
       (&con, a.size, a.operand);
   }
 
diff --git a/src/codegen/target/arm/multimethod.cpp b/src/codegen/target/arm/multimethod.cpp
index 4e789cc18c..a88180fc9b 100644
--- a/src/codegen/target/arm/multimethod.cpp
+++ b/src/codegen/target/arm/multimethod.cpp
@@ -9,20 +9,17 @@
    details. */
 
 #include "context.h"
-#include "multimethod.h"
 #include "operations.h"
 
+#include "multimethod.h"
+#include "../multimethod.h"
+
 namespace avian {
 namespace codegen {
 namespace arm {
 
 using namespace util;
 
-unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
-{
-  return operation + (lir::UnaryOperationCount * operand);
-}
-
 unsigned index(ArchitectureContext*,
       lir::BinaryOperation operation,
       lir::OperandType operand1,
@@ -66,25 +63,25 @@ void populateTables(ArchitectureContext* con) {
   zo[lir::StoreLoadBarrier] = memoryBarrier;
   zo[lir::Trap] = trap;
 
-  uo[index(con, lir::LongCall, C)] = CAST1(longCallC);
+  uo[Multimethod::index(lir::LongCall, C)] = CAST1(longCallC);
 
-  uo[index(con, lir::AlignedLongCall, C)] = CAST1(longCallC);
+  uo[Multimethod::index(lir::AlignedLongCall, C)] = CAST1(longCallC);
 
-  uo[index(con, lir::LongJump, C)] = CAST1(longJumpC);
+  uo[Multimethod::index(lir::LongJump, C)] = CAST1(longJumpC);
 
-  uo[index(con, lir::AlignedLongJump, C)] = CAST1(longJumpC);
+  uo[Multimethod::index(lir::AlignedLongJump, C)] = CAST1(longJumpC);
 
-  uo[index(con, lir::Jump, R)] = CAST1(jumpR);
-  uo[index(con, lir::Jump, C)] = CAST1(jumpC);
+  uo[Multimethod::index(lir::Jump, R)] = CAST1(jumpR);
+  uo[Multimethod::index(lir::Jump, C)] = CAST1(jumpC);
 
-  uo[index(con, lir::AlignedJump, R)] = CAST1(jumpR);
-  uo[index(con, lir::AlignedJump, C)] = CAST1(jumpC);
+  uo[Multimethod::index(lir::AlignedJump, R)] = CAST1(jumpR);
+  uo[Multimethod::index(lir::AlignedJump, C)] = CAST1(jumpC);
 
-  uo[index(con, lir::Call, C)] = CAST1(callC);
-  uo[index(con, lir::Call, R)] = CAST1(callR);
+  uo[Multimethod::index(lir::Call, C)] = CAST1(callC);
+  uo[Multimethod::index(lir::Call, R)] = CAST1(callR);
 
-  uo[index(con, lir::AlignedCall, C)] = CAST1(callC);
-  uo[index(con, lir::AlignedCall, R)] = CAST1(callR);
+  uo[Multimethod::index(lir::AlignedCall, C)] = CAST1(callC);
+  uo[Multimethod::index(lir::AlignedCall, R)] = CAST1(callR);
 
   bo[index(con, lir::Move, R, R)] = CAST2(moveRR);
   bo[index(con, lir::Move, C, R)] = CAST2(moveCR);
diff --git a/src/codegen/target/arm/multimethod.h b/src/codegen/target/arm/multimethod.h
index 7c574b588c..cda1daff37 100644
--- a/src/codegen/target/arm/multimethod.h
+++ b/src/codegen/target/arm/multimethod.h
@@ -23,8 +23,6 @@ namespace avian {
 namespace codegen {
 namespace arm {
 
-unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand);
-
 unsigned index(ArchitectureContext*,
       lir::BinaryOperation operation,
       lir::OperandType operand1,
diff --git a/src/codegen/target/multimethod.h b/src/codegen/target/multimethod.h
new file mode 100644
index 0000000000..750a02d8c8
--- /dev/null
+++ b/src/codegen/target/multimethod.h
@@ -0,0 +1,29 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_TARGET_MULTIMETHOD_H
+#define AVIAN_CODEGEN_TARGET_MULTIMETHOD_H
+
+
+namespace avian {
+namespace codegen {
+
+class Multimethod {
+public:
+  inline static unsigned index(lir::UnaryOperation operation, lir::OperandType operand) {
+    return operation + (lir::UnaryOperationCount * operand);
+  }
+};
+
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_TARGET_MULTIMETHOD_H
+
diff --git a/src/codegen/target/powerpc/assembler.cpp b/src/codegen/target/powerpc/assembler.cpp
index 5ad517871a..414ab71884 100644
--- a/src/codegen/target/powerpc/assembler.cpp
+++ b/src/codegen/target/powerpc/assembler.cpp
@@ -19,8 +19,9 @@
 #include "context.h"
 #include "fixup.h"
 #include "block.h"
-#include "multimethod.h"
 #include "operations.h"
+#include "multimethod.h"
+#include "../multimethod.h"
 
 using namespace vm;
 using namespace avian::util;
@@ -824,7 +825,7 @@ class MyAssembler: public Assembler {
 
   virtual void apply(lir::UnaryOperation op, OperandInfo a)
   {
-    arch_->c.unaryOperations[index(&(arch_->c), op, a.type)]
+    arch_->c.unaryOperations[Multimethod::index(op, a.type)]
       (&c, a.size, a.operand);
   }
 
diff --git a/src/codegen/target/powerpc/multimethod.cpp b/src/codegen/target/powerpc/multimethod.cpp
index d8ffba5008..234a4e055c 100644
--- a/src/codegen/target/powerpc/multimethod.cpp
+++ b/src/codegen/target/powerpc/multimethod.cpp
@@ -11,9 +11,10 @@
 #include "context.h"
 #include "block.h"
 #include "common.h"
-
 #include "operations.h"
+
 #include "multimethod.h"
+#include "../multimethod.h"
 
 namespace avian {
 namespace codegen {
@@ -21,11 +22,6 @@ namespace powerpc {
 
 using namespace util;
 
-unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
-{
-  return operation + (lir::UnaryOperationCount * operand);
-}
-
 unsigned index(ArchitectureContext*,
       lir::BinaryOperation operation,
       lir::OperandType operand1,
@@ -69,25 +65,25 @@ void populateTables(ArchitectureContext* c) {
   zo[lir::StoreLoadBarrier] = memoryBarrier;
   zo[lir::Trap] = trap;
 
-  uo[index(c, lir::LongCall, C)] = CAST1(longCallC);
+  uo[Multimethod::index(lir::LongCall, C)] = CAST1(longCallC);
 
-  uo[index(c, lir::AlignedLongCall, C)] = CAST1(alignedLongCallC);
+  uo[Multimethod::index(lir::AlignedLongCall, C)] = CAST1(alignedLongCallC);
 
-  uo[index(c, lir::LongJump, C)] = CAST1(longJumpC);
+  uo[Multimethod::index(lir::LongJump, C)] = CAST1(longJumpC);
 
-  uo[index(c, lir::AlignedLongJump, C)] = CAST1(alignedLongJumpC);
+  uo[Multimethod::index(lir::AlignedLongJump, C)] = CAST1(alignedLongJumpC);
 
-  uo[index(c, lir::Jump, R)] = CAST1(jumpR);
-  uo[index(c, lir::Jump, C)] = CAST1(jumpC);
+  uo[Multimethod::index(lir::Jump, R)] = CAST1(jumpR);
+  uo[Multimethod::index(lir::Jump, C)] = CAST1(jumpC);
 
-  uo[index(c, lir::AlignedJump, R)] = CAST1(jumpR);
-  uo[index(c, lir::AlignedJump, C)] = CAST1(jumpC);
+  uo[Multimethod::index(lir::AlignedJump, R)] = CAST1(jumpR);
+  uo[Multimethod::index(lir::AlignedJump, C)] = CAST1(jumpC);
 
-  uo[index(c, lir::Call, C)] = CAST1(callC);
-  uo[index(c, lir::Call, R)] = CAST1(callR);
+  uo[Multimethod::index(lir::Call, C)] = CAST1(callC);
+  uo[Multimethod::index(lir::Call, R)] = CAST1(callR);
 
-  uo[index(c, lir::AlignedCall, C)] = CAST1(callC);
-  uo[index(c, lir::AlignedCall, R)] = CAST1(callR);
+  uo[Multimethod::index(lir::AlignedCall, C)] = CAST1(callC);
+  uo[Multimethod::index(lir::AlignedCall, R)] = CAST1(callR);
 
   bo[index(c, lir::Move, R, R)] = CAST2(moveRR);
   bo[index(c, lir::Move, C, R)] = CAST2(moveCR);
diff --git a/src/codegen/target/powerpc/multimethod.h b/src/codegen/target/powerpc/multimethod.h
index 06b881e97a..b3565d0ed9 100644
--- a/src/codegen/target/powerpc/multimethod.h
+++ b/src/codegen/target/powerpc/multimethod.h
@@ -20,9 +20,6 @@ namespace avian {
 namespace codegen {
 namespace powerpc {
 
-
-unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand);
-
 unsigned index(ArchitectureContext*,
       lir::BinaryOperation operation,
       lir::OperandType operand1,
diff --git a/src/codegen/target/x86/assembler.cpp b/src/codegen/target/x86/assembler.cpp
index 6b1a1679ad..deeb6879f2 100644
--- a/src/codegen/target/x86/assembler.cpp
+++ b/src/codegen/target/x86/assembler.cpp
@@ -39,6 +39,7 @@
 #include "operations.h"
 #include "detect.h"
 #include "multimethod.h"
+#include "../multimethod.h"
 
 #define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
 #define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
@@ -1043,7 +1044,7 @@ class MyAssembler: public Assembler {
 
   virtual void apply(lir::UnaryOperation op, OperandInfo a)
   {
-    arch_->c.unaryOperations[index(&(arch_->c), op, a.type)]
+    arch_->c.unaryOperations[Multimethod::index(op, a.type)]
       (&c, a.size, a.operand);
   }
 
diff --git a/src/codegen/target/x86/multimethod.cpp b/src/codegen/target/x86/multimethod.cpp
index 9146c9d9cb..ddd6a1fe1d 100644
--- a/src/codegen/target/x86/multimethod.cpp
+++ b/src/codegen/target/x86/multimethod.cpp
@@ -15,20 +15,17 @@
 #include <avian/vm/codegen/lir.h>
 
 #include "context.h"
-#include "multimethod.h"
 #include "operations.h"
 
+#include "multimethod.h"
+#include "../multimethod.h"
+
 namespace avian {
 namespace codegen {
 namespace x86 {
 
 using namespace util;
 
-
-unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand) {
-  return operation + (lir::UnaryOperationCount * operand);
-}
-
 unsigned index(ArchitectureContext*, lir::BinaryOperation operation,
       lir::OperandType operand1,
       lir::OperandType operand2)
@@ -74,25 +71,25 @@ void populateTables(ArchitectureContext* c) {
   zo[lir::StoreLoadBarrier] = storeLoadBarrier;
   zo[lir::Trap] = trap;
 
-  uo[index(c, lir::Call, C)] = CAST1(callC);
-  uo[index(c, lir::Call, R)] = CAST1(callR);
-  uo[index(c, lir::Call, M)] = CAST1(callM);
+  uo[Multimethod::index(lir::Call, C)] = CAST1(callC);
+  uo[Multimethod::index(lir::Call, R)] = CAST1(callR);
+  uo[Multimethod::index(lir::Call, M)] = CAST1(callM);
 
-  uo[index(c, lir::AlignedCall, C)] = CAST1(alignedCallC);
+  uo[Multimethod::index(lir::AlignedCall, C)] = CAST1(alignedCallC);
 
-  uo[index(c, lir::LongCall, C)] = CAST1(longCallC);
+  uo[Multimethod::index(lir::LongCall, C)] = CAST1(longCallC);
 
-  uo[index(c, lir::AlignedLongCall, C)] = CAST1(alignedLongCallC);
+  uo[Multimethod::index(lir::AlignedLongCall, C)] = CAST1(alignedLongCallC);
 
-  uo[index(c, lir::Jump, R)] = CAST1(jumpR);
-  uo[index(c, lir::Jump, C)] = CAST1(jumpC);
-  uo[index(c, lir::Jump, M)] = CAST1(jumpM);
+  uo[Multimethod::index(lir::Jump, R)] = CAST1(jumpR);
+  uo[Multimethod::index(lir::Jump, C)] = CAST1(jumpC);
+  uo[Multimethod::index(lir::Jump, M)] = CAST1(jumpM);
 
-  uo[index(c, lir::AlignedJump, C)] = CAST1(alignedJumpC);
+  uo[Multimethod::index(lir::AlignedJump, C)] = CAST1(alignedJumpC);
 
-  uo[index(c, lir::LongJump, C)] = CAST1(longJumpC);
+  uo[Multimethod::index(lir::LongJump, C)] = CAST1(longJumpC);
 
-  uo[index(c, lir::AlignedLongJump, C)] = CAST1(alignedLongJumpC);
+  uo[Multimethod::index(lir::AlignedLongJump, C)] = CAST1(alignedLongJumpC);
 
   bo[index(c, lir::Negate, R, R)] = CAST2(negateRR);
 
diff --git a/src/codegen/target/x86/multimethod.h b/src/codegen/target/x86/multimethod.h
index 6ede17f17a..6846f2f042 100644
--- a/src/codegen/target/x86/multimethod.h
+++ b/src/codegen/target/x86/multimethod.h
@@ -21,8 +21,6 @@ namespace x86 {
 
 class ArchitectureContext;
 
-unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand);
-
 unsigned index(ArchitectureContext*, lir::BinaryOperation operation,
       lir::OperandType operand1,
       lir::OperandType operand2);

From 0b01dd565aa730b395911a06a555aa4f819bfea0 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshua.warner@readytalk.com>
Date: Thu, 28 Feb 2013 15:57:09 -0700
Subject: [PATCH 20/22] fix multiple-include-of-stdint errors

---
 src/avian/common.h                    | 8 ++++----
 src/codegen/target/x86/assembler.cpp  | 1 -
 src/codegen/target/x86/operations.cpp | 2 --
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/avian/common.h b/src/avian/common.h
index dc5281511c..4fad202fe4 100644
--- a/src/avian/common.h
+++ b/src/avian/common.h
@@ -8,8 +8,8 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
-#ifndef COMMON_H
-#define COMMON_H
+#ifndef AVIAN_COMMON_H
+#define AVIAN_COMMON_H
 
 #ifndef __STDC_CONSTANT_MACROS
 #  define __STDC_CONSTANT_MACROS
@@ -113,7 +113,7 @@ typedef intptr_t intptr_alias_t;
 
 #else // not _MSC_VER
 
-#  include "stdint.h"
+#  include <stdint.h>
 
 #  define BYTES_PER_WORD __SIZEOF_POINTER__
 
@@ -554,4 +554,4 @@ equal(const void* a, unsigned al, const void* b, unsigned bl)
 
 } // namespace vm
 
-#endif // COMMON_H
+#endif // AVIAN_COMMON_H
diff --git a/src/codegen/target/x86/assembler.cpp b/src/codegen/target/x86/assembler.cpp
index d441c0030b..af289135cf 100644
--- a/src/codegen/target/x86/assembler.cpp
+++ b/src/codegen/target/x86/assembler.cpp
@@ -9,7 +9,6 @@
    details. */
 
 #include <stdarg.h>
-#include <stdint.h>
 #include <string.h>
 
 #include "avian/environment.h"
diff --git a/src/codegen/target/x86/operations.cpp b/src/codegen/target/x86/operations.cpp
index 13f6f1b0d6..477c3a8859 100644
--- a/src/codegen/target/x86/operations.cpp
+++ b/src/codegen/target/x86/operations.cpp
@@ -8,8 +8,6 @@
    There is NO WARRANTY for this software.  See license.txt for
    details. */
 
-#include <stdint.h>
-
 #include "avian/target.h"
 #include "avian/alloc-vector.h"
 #include "avian/allocator.h"

From add029ad3cf81a19df2d20226ac3c59060cab480 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshua.warner@readytalk.com>
Date: Thu, 28 Feb 2013 16:41:44 -0700
Subject: [PATCH 21/22] fix powerpc build

Evidently, the powerpc gcc compiler feels it necessary to #define powerpc to 1. Seriously?  SERIOUSLY????
---
 src/avian/common.h                   | 8 ++++++++
 src/codegen/target/powerpc/context.h | 3 +++
 src/codegen/target/powerpc/encode.h  | 4 ++++
 3 files changed, 15 insertions(+)

diff --git a/src/avian/common.h b/src/avian/common.h
index 4fad202fe4..f8a0e22168 100644
--- a/src/avian/common.h
+++ b/src/avian/common.h
@@ -27,6 +27,14 @@
 
 #include "float.h"
 
+#ifdef powerpc
+#  undef powerpc
+#endif
+
+#ifdef linux
+#  undef linux
+#endif
+
 // don't complain about using 'this' in member initializers:
 #  pragma warning(disable:4355)
 
diff --git a/src/codegen/target/powerpc/context.h b/src/codegen/target/powerpc/context.h
index 4e12f4882c..7726b88cf5 100644
--- a/src/codegen/target/powerpc/context.h
+++ b/src/codegen/target/powerpc/context.h
@@ -14,6 +14,9 @@
 #include <avian/vm/codegen/assembler.h>
 #include "avian/alloc-vector.h"
 
+#ifdef powerpc
+#undef powerpc
+#endif
 
 namespace vm {
 class System;
diff --git a/src/codegen/target/powerpc/encode.h b/src/codegen/target/powerpc/encode.h
index 54f7f7f493..811ddce402 100644
--- a/src/codegen/target/powerpc/encode.h
+++ b/src/codegen/target/powerpc/encode.h
@@ -11,6 +11,10 @@
 #ifndef AVIAN_CODEGEN_ASSEMBLER_POWERPC_ENCODE_H
 #define AVIAN_CODEGEN_ASSEMBLER_POWERPC_ENCODE_H
 
+#ifdef powerpc
+#undef powerpc
+#endif
+
 namespace avian {
 namespace codegen {
 namespace powerpc {

From df23c379bfebce0d493db41df456c399fc2fdb90 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshua.warner@readytalk.com>
Date: Fri, 1 Mar 2013 14:52:27 -0700
Subject: [PATCH 22/22] fix windows embed-loader build

---
 makefile | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/makefile b/makefile
index cfbcabd9b2..911861cfce 100755
--- a/makefile
+++ b/makefile
@@ -1564,19 +1564,18 @@ $(embed-loader-o): $(embed-loader) $(converter)
 	$(converter) $(<) $(@) _binary_loader_start \
 		_binary_loader_end $(target-format) $(arch)
 
-$(embed-loader): $(embed-loader-objects) $(static-library)
-	@mkdir -p $(dir $(@))
-	cd $(dir $(@)) && $(ar) x ../../../$(static-library)
+$(embed-loader): $(embed-loader-objects) $(vm-objects) $(classpath-objects) $(vm-heapwalk-objects) \
+		$(javahome-object) $(boot-javahome-object) $(lzma-decode-objects)
 ifdef ms_cl_compiler
-	$(ld) $(lflags) $(dir $(@))/*.o -out:$(@) \
+	$(ld) $(lflags) $(^) -out:$(@) \
 		-debug -PDB:$(subst $(exe-suffix),.pdb,$(@)) $(manifest-flags)
 ifdef mt
 	$(mt) -nologo -manifest $(@).manifest -outputresource:"$(@);1"
 endif
 else
-	$(dlltool) -z $(addsuffix .def,$(basename $(@))) $(dir $(@))/*.o
+	$(dlltool) -z $(addsuffix .def,$(basename $(@))) $(^)
 	$(dlltool) -d $(addsuffix .def,$(basename $(@))) -e $(addsuffix .exp,$(basename $(@))) 
-	$(ld) $(addsuffix .exp,$(basename $(@))) $(dir $(@))/*.o \
+	$(ld) $(addsuffix .exp,$(basename $(@))) $(^) \
 		$(lflags) $(bootimage-lflags) -o $(@)
 endif
 	$(strip) $(strip-all) $(@)