From 22d6ed1becde1d1507b6a7c5ad0ca348a9213b38 Mon Sep 17 00:00:00 2001
From: Joshua Warner <joshuawarner32@gmail.com>
Date: Fri, 22 Feb 2013 21:19:53 -0700
Subject: [PATCH] further split out arm assembler

---
 src/codegen/arm/assembler.cpp   | 1830 +------------------------------
 src/codegen/arm/block.h         |    1 -
 src/codegen/arm/context.h       |    9 -
 src/codegen/arm/encode.h        |  184 ++++
 src/codegen/arm/fixup.cpp       |  175 +++
 src/codegen/arm/fixup.h         |  140 +++
 src/codegen/arm/multimethod.cpp |  142 +++
 src/codegen/arm/multimethod.h   |   46 +
 src/codegen/arm/operations.cpp  | 1235 +++++++++++++++++++++
 src/codegen/arm/operations.h    |  240 ++++
 src/codegen/arm/registers.h     |   52 +
 src/common.h                    |    6 +
 12 files changed, 2227 insertions(+), 1833 deletions(-)
 create mode 100644 src/codegen/arm/encode.h
 create mode 100644 src/codegen/arm/fixup.cpp
 create mode 100644 src/codegen/arm/fixup.h
 create mode 100644 src/codegen/arm/multimethod.cpp
 create mode 100644 src/codegen/arm/multimethod.h
 create mode 100644 src/codegen/arm/operations.cpp
 create mode 100644 src/codegen/arm/operations.h
 create mode 100644 src/codegen/arm/registers.h

diff --git a/src/codegen/arm/assembler.cpp b/src/codegen/arm/assembler.cpp
index c4b4ca7dfe..ac00fdb5cb 100644
--- a/src/codegen/arm/assembler.cpp
+++ b/src/codegen/arm/assembler.cpp
@@ -15,176 +15,24 @@
 
 #include "context.h"
 #include "block.h"
+#include "fixup.h"
+#include "multimethod.h"
+#include "encode.h"
+#include "operations.h"
+#include "registers.h"
 
 #include "alloc-vector.h"
 #include <avian/util/abort.h>
 
-
-#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
-#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
-#define CAST3(x) reinterpret_cast<TernaryOperationType>(x)
-#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
-
 using namespace vm;
 using namespace avian::codegen;
 using namespace avian::util;
 
 namespace avian {
 namespace codegen {
+namespace arm {
 
 namespace isa {
-// SYSTEM REGISTERS
-const int FPSID = 0x0;
-const int FPSCR = 0x1;
-const int FPEXC = 0x8;
-// INSTRUCTION OPTIONS
-enum CONDITION { EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV };
-enum SHIFTOP { LSL, LSR, ASR, ROR };
-// INSTRUCTION FORMATS
-inline int DATA(int cond, int opcode, int S, int Rn, int Rd, int shift, int Sh, int Rm)
-{ return cond<<28 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | shift<<7 | Sh<<5 | Rm; }
-inline int DATAS(int cond, int opcode, int S, int Rn, int Rd, int Rs, int Sh, int Rm)
-{ return cond<<28 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | Rs<<8 | Sh<<5 | 1<<4 | Rm; }
-inline int DATAI(int cond, int opcode, int S, int Rn, int Rd, int rot, int imm)
-{ return cond<<28 | 1<<25 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | rot<<8 | (imm&0xff); }
-inline int BRANCH(int cond, int L, int offset)
-{ return cond<<28 | 5<<25 | L<<24 | (offset&0xffffff); }
-inline int BRANCHX(int cond, int L, int Rm)
-{ return cond<<28 | 0x4bffc<<6 | L<<5 | 1<<4 | Rm; }
-inline int MULTIPLY(int cond, int mul, int S, int Rd, int Rn, int Rs, int Rm)
-{ return cond<<28 | mul<<21 | S<<20 | Rd<<16 | Rn<<12 | Rs<<8 | 9<<4 | Rm; }
-inline int XFER(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int shift, int Sh, int Rm)
-{ return cond<<28 | 3<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | shift<<7 | Sh<<5 | Rm; }
-inline int XFERI(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int offset)
-{ return cond<<28 | 2<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | (offset&0xfff); }
-inline int XFER2(int cond, int P, int U, int W, int L, int Rn, int Rd, int S, int H, int Rm)
-{ return cond<<28 | P<<24 | U<<23 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | 1<<7 | S<<6 | H<<5 | 1<<4 | Rm; }
-inline int XFER2I(int cond, int P, int U, int W, int L, int Rn, int Rd, int offsetH, int S, int H, int offsetL)
-{ return cond<<28 | P<<24 | U<<23 | 1<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | offsetH<<8 | 1<<7 | S<<6 | H<<5 | 1<<4 | (offsetL&0xf); }
-inline int COOP(int cond, int opcode_1, int CRn, int CRd, int cp_num, int opcode_2, int CRm)
-{ return cond<<28 | 0xe<<24 | opcode_1<<20 | CRn<<16 | CRd<<12 | cp_num<<8 | opcode_2<<5 | CRm; }
-inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset) // offset is in words, not bytes
-{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff)>>2; }
-inline int COREG(int cond, int opcode_1, int L, int CRn, int Rd, int cp_num, int opcode_2, int CRm)
-{ return cond<<28 | 0xe<<24 | opcode_1<<21 | L<<20 | CRn<<16 | Rd<<12 | cp_num<<8 | opcode_2<<5 | 1<<4 | CRm; }
-inline int COREG2(int cond, int L, int Rn, int Rd, int cp_num, int opcode, int CRm)
-{ return cond<<28 | 0xc4<<20 | L<<20 | Rn<<16 | Rd<<12 | cp_num<<8 | opcode<<4 | CRm;}
-// FIELD CALCULATORS
-inline int calcU(int imm) { return imm >= 0 ? 1 : 0; }
-// INSTRUCTIONS
-// The "cond" and "S" fields are set using the SETCOND() and SETS() functions
-inline int b(int offset) { return BRANCH(AL, 0, offset); }
-inline int bl(int offset) { return BRANCH(AL, 1, offset); }
-inline int bx(int Rm) { return BRANCHX(AL, 0, Rm); }
-inline int blx(int Rm) { return BRANCHX(AL, 1, Rm); }
-inline int and_(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x0, 0, Rn, Rd, shift, Sh, Rm); }
-inline int eor(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x1, 0, Rn, Rd, shift, Sh, Rm); }
-inline int rsb(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x3, 0, Rn, Rd, shift, Sh, Rm); }
-inline int add(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x4, 0, Rn, Rd, shift, Sh, Rm); }
-inline int adc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x5, 0, Rn, Rd, shift, Sh, Rm); }
-inline int rsc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x7, 0, Rn, Rd, shift, Sh, Rm); }
-inline int cmp(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xa, 1, Rn, 0, shift, Sh, Rm); }
-inline int orr(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xc, 0, Rn, Rd, shift, Sh, Rm); }
-inline int mov(int Rd, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xd, 0, 0, Rd, shift, Sh, Rm); }
-inline int mvn(int Rd, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xf, 0, 0, Rd, shift, Sh, Rm); }
-inline int andi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x0, 0, Rn, Rd, rot, imm); }
-inline int subi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x2, 0, Rn, Rd, rot, imm); }
-inline int rsbi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x3, 0, Rn, Rd, rot, imm); }
-inline int addi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x4, 0, Rn, Rd, rot, imm); }
-inline int adci(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x5, 0, Rn, Rd, rot, imm); }
-inline int bici(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0xe, 0, Rn, Rd, rot, imm); }
-inline int cmpi(int Rn, int imm, int rot=0) { return DATAI(AL, 0xa, 1, Rn, 0, rot, imm); }
-inline int movi(int Rd, int imm, int rot=0) { return DATAI(AL, 0xd, 0, 0, Rd, rot, imm); }
-inline int orrsh(int Rd, int Rn, int Rm, int Rs, int Sh) { return DATAS(AL, 0xc, 0, Rn, Rd, Rs, Sh, Rm); }
-inline int movsh(int Rd, int Rm, int Rs, int Sh) { return DATAS(AL, 0xd, 0, 0, Rd, Rs, Sh, Rm); }
-inline int mul(int Rd, int Rm, int Rs) { return MULTIPLY(AL, 0, 0, Rd, 0, Rs, Rm); }
-inline int mla(int Rd, int Rm, int Rs, int Rn) { return MULTIPLY(AL, 1, 0, Rd, Rn, Rs, Rm); }
-inline int umull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 4, 0, RdHi, RdLo, Rs, Rm); }
-inline int ldr(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 1, Rn, Rd, 0, 0, Rm); }
-inline int ldri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 1, Rn, Rd, abs(imm)); }
-inline int ldrb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 1, Rn, Rd, 0, 0, Rm); }
-inline int ldrbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 1, Rn, Rd, abs(imm)); }
-inline int str(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 0, Rn, Rd, 0, 0, Rm); }
-inline int stri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 0, Rn, Rd, abs(imm)); }
-inline int strb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 0, Rn, Rd, 0, 0, Rm); }
-inline int strbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 0, Rn, Rd, abs(imm)); }
-inline int ldrh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 0, 1, Rm); }
-inline int ldrhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); }
-inline int strh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 0, Rn, Rd, 0, 1, Rm); }
-inline int strhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 0, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); }
-inline int ldrsh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 1, Rm); }
-inline int ldrshi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 1, abs(imm)&0xf); }
-inline int ldrsb(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 0, Rm); }
-inline int ldrsbi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 0, abs(imm)&0xf); }
-// breakpoint instruction, this really has its own instruction format
-inline int bkpt(int16_t immed) { return 0xe1200070 | (((unsigned)immed & 0xffff) >> 4 << 8) | (immed & 0xf); }
-// COPROCESSOR INSTRUCTIONS
-inline int mcr(int coproc, int opcode_1, int Rd, int CRn, int CRm, int opcode_2=0) { return COREG(AL, opcode_1, 0, CRn, Rd, coproc, opcode_2, CRm); }
-inline int mcrr(int coproc, int opcode, int Rd, int Rn, int CRm) { return COREG2(AL, 0, Rn, Rd, coproc, opcode, CRm); }
-inline int mrc(int coproc, int opcode_1, int Rd, int CRn, int CRm, int opcode_2=0) { return COREG(AL, opcode_1, 1, CRn, Rd, coproc, opcode_2, CRm); }
-inline int mrrc(int coproc, int opcode, int Rd, int Rn, int CRm) { return COREG2(AL, 1, Rn, Rd, coproc, opcode, CRm); }
-// VFP FLOATING-POINT INSTRUCTIONS
-inline int fmuls(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|2, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
-inline int fadds(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|3, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
-inline int fsubs(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|3, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1)|2, Sm>>1); }
-inline int fdivs(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|8, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
-inline int fmuld(int Dd, int Dn, int Dm) { return COOP(AL, 2, Dn, Dd, 11, 0, Dm); }
-inline int faddd(int Dd, int Dn, int Dm) { return COOP(AL, 3, Dn, Dd, 11, 0, Dm); }
-inline int fsubd(int Dd, int Dn, int Dm) { return COOP(AL, 3, Dn, Dd, 11, 2, Dm); }
-inline int fdivd(int Dd, int Dn, int Dm) { return COOP(AL, 8, Dn, Dd, 11, 0, Dm); }
-inline int fcpys(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
-inline int fabss(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
-inline int fnegs(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 1, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
-inline int fsqrts(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 1, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
-inline int fcmps(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 4, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
-inline int fcvtds(int Dd, int Sm) { return COOP(AL, 0xb, 7, Dd, 10, 6|(Sm&1), Sm>>1); }
-inline int fsitos(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 8, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
-inline int ftosizs(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0xd, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
-inline int fcpyd(int Dd, int Dm) { return COOP(AL, 0xb, 0, Dd, 11, 2, Dm); }
-inline int fabsd(int Dd, int Dm) { return COOP(AL, 0xb, 0, Dd, 11, 6, Dm); }
-inline int fnegd(int Dd, int Dm) { return COOP(AL, 0xb, 1, Dd, 11, 2, Dm); }
-inline int fsqrtd(int Dd, int Dm) { return COOP(AL, 0xb, 1, Dd, 11, 6, Dm); }
-// double-precision comparison instructions
-inline int fcmpd(int Dd, int Dm) { return COOP(AL, 0xb, 4, Dd, 11, 2, Dm); }
-// double-precision conversion instructions
-inline int fcvtsd(int Sd, int Dm) { return COOP(AL, 0xb|(Sd&1)<<2, 7, Sd>>1, 11, 6, Dm); }
-inline int fsitod(int Dd, int Sm) { return COOP(AL, 0xb, 8, Dd, 11, 6|(Sm&1), Sm>>1); }
-inline int ftosizd(int Sd, int Dm) { return COOP(AL, 0xb|(Sd&1)<<2, 0xd, Sd>>1, 11, 6, Dm); }
-// single load/store instructions for both precision types
-inline int flds(int Sd, int Rn, int offset=0) { return COXFER(AL, 1, 1, Sd&1, 0, 1, Rn, Sd>>1, 10, offset); };
-inline int fldd(int Dd, int Rn, int offset=0) { return COXFER(AL, 1, 1, 0, 0, 1, Rn, Dd, 11, offset); };
-inline int fsts(int Sd, int Rn, int offset=0) { return COXFER(AL, 1, 1, Sd&1, 0, 0, Rn, Sd>>1, 10, offset); };
-inline int fstd(int Dd, int Rn, int offset=0) { return COXFER(AL, 1, 1, 0, 0, 0, Rn, Dd, 11, offset); };
-// move between GPRs and FPRs
-inline int fmsr(int Sn, int Rd) { return mcr(10, 0, Rd, Sn>>1, 0, (Sn&1)<<2); }
-inline int fmrs(int Rd, int Sn) { return mrc(10, 0, Rd, Sn>>1, 0, (Sn&1)<<2); }
-// move to/from VFP system registers
-inline int fmrx(int Rd, int reg) { return mrc(10, 7, Rd, reg, 0); }
-// these move around pairs of single-precision registers
-inline int fmdrr(int Dm, int Rd, int Rn) { return mcrr(11, 1, Rd, Rn, Dm); }
-inline int fmrrd(int Rd, int Rn, int Dm) { return mrrc(11, 1, Rd, Rn, Dm); }
-// FLAG SETTERS
-inline int SETCOND(int ins, int cond) { return ((ins&0x0fffffff) | (cond<<28)); }
-inline int SETS(int ins) { return ins | 1<<20; }
-// PSEUDO-INSTRUCTIONS
-inline int lsl(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, LSL); }
-inline int lsli(int Rd, int Rm, int imm) { return mov(Rd, Rm, LSL, imm); }
-inline int lsr(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, LSR); }
-inline int lsri(int Rd, int Rm, int imm) { return mov(Rd, Rm, LSR, imm); }
-inline int asr(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, ASR); }
-inline int asri(int Rd, int Rm, int imm) { return mov(Rd, Rm, ASR, imm); }
-inline int beq(int offset) { return SETCOND(b(offset), EQ); }
-inline int bne(int offset) { return SETCOND(b(offset), NE); }
-inline int bls(int offset) { return SETCOND(b(offset), LS); }
-inline int bhi(int offset) { return SETCOND(b(offset), HI); }
-inline int blt(int offset) { return SETCOND(b(offset), LT); }
-inline int bgt(int offset) { return SETCOND(b(offset), GT); }
-inline int ble(int offset) { return SETCOND(b(offset), LE); }
-inline int bge(int offset) { return SETCOND(b(offset), GE); }
-inline int blo(int offset) { return SETCOND(b(offset), CC); }
-inline int bhs(int offset) { return SETCOND(b(offset), CS); }
-inline int bpl(int offset) { return SETCOND(b(offset), PL); }
-inline int fmstat() { return fmrx(15, FPSCR); }
 // HARDWARE FLAGS
 bool vfpSupported() {
   // TODO: Use at runtime detection
@@ -199,1602 +47,31 @@ bool vfpSupported() {
   return false;
 #endif
 }
-}
+} // namespace isa
 
-namespace arm {
-
-const uint64_t MASK_LO32 = 0xffffffff;
-const unsigned MASK_LO16 = 0xffff;
-const unsigned MASK_LO8  = 0xff;
 inline unsigned lo8(int64_t i) { return (unsigned)(i&MASK_LO8); }
 
-inline bool isOfWidth(int64_t i, int size) { return static_cast<uint64_t>(i) >> size == 0; }
-
-const int N_GPRS = 16;
-const int N_FPRS = 16;
-const uint32_t GPR_MASK = 0xffff;
-const uint32_t FPR_MASK = 0xffff0000;
-// for source-to-destination masks
-const uint64_t GPR_MASK64 = GPR_MASK | (uint64_t)GPR_MASK << 32;
-// making the following const somehow breaks debug symbol output in GDB
-/* const */ uint64_t FPR_MASK64 = FPR_MASK | (uint64_t)FPR_MASK << 32;
-
 const RegisterFile MyRegisterFileWithoutFloats(GPR_MASK, 0);
 const RegisterFile MyRegisterFileWithFloats(GPR_MASK, FPR_MASK);
 
-inline bool isFpr(lir::Register* reg) {
-  return reg->low >= N_GPRS;
-}
-
-inline int fpr64(int reg) { return reg - N_GPRS; }
-inline int fpr64(lir::Register* reg) { return fpr64(reg->low); }
-inline int fpr32(int reg) { return fpr64(reg) << 1; }
-inline int fpr32(lir::Register* reg) { return fpr64(reg) << 1; }
-
 const unsigned FrameHeaderSize = 1;
 
 const unsigned StackAlignmentInBytes = 8;
 const unsigned StackAlignmentInWords
 = StackAlignmentInBytes / TargetBytesPerWord;
 
-const int ThreadRegister = 8;
-const int StackRegister = 13;
-const int LinkRegister = 14;
-const int ProgramCounter = 15;
-
-const int32_t PoolOffsetMask = 0xFFF;
-
-const bool DebugPool = false;
-
-class PoolOffset;
-class PoolEvent;
-
 void
 resolve(MyBlock*);
 
 unsigned
 padding(MyBlock*, unsigned);
 
-class Task;
 class ConstantPoolEntry;
 
-class OffsetPromise: public Promise {
- public:
-  OffsetPromise(Context* con, MyBlock* block, unsigned offset, bool forTrace):
-    con(con), block(block), offset(offset), forTrace(forTrace)
-  { }
-
-  virtual bool resolved() {
-    return block->start != static_cast<unsigned>(~0);
-  }
-  
-  virtual int64_t value() {
-    assert(con, resolved());
-
-    unsigned o = offset - block->offset;
-    return block->start + padding
-      (block, forTrace ? o - TargetBytesPerWord : o) + o;
-  }
-
-  Context* con;
-  MyBlock* block;
-  unsigned offset;
-  bool forTrace;
-};
-
-Promise* offsetPromise(Context* con, bool forTrace = false) {
-  return new(con->zone) OffsetPromise(con, con->lastBlock, con->code.length(), forTrace);
-}
-
-bool
-bounded(int right, int left, int32_t v)
-{
-  return ((v << left) >> left) == v and ((v >> right) << right) == v;
-}
-
-void*
-updateOffset(System* s, uint8_t* instruction, int64_t value)
-{
-  // ARM's PC is two words ahead, and branches drop the bottom 2 bits.
-  int32_t v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
-
-  int32_t mask;
-  expect(s, bounded(0, 8, v));
-  mask = 0xFFFFFF;
-
-  int32_t* p = reinterpret_cast<int32_t*>(instruction);
-  *p = (v & mask) | ((~mask) & *p);
-
-  return instruction + 4;
-}
-
-class OffsetListener: public Promise::Listener {
- public:
-  OffsetListener(System* s, uint8_t* instruction):
-    s(s),
-    instruction(instruction)
-  { }
-
-  virtual bool resolve(int64_t value, void** location) {
-    void* p = updateOffset(s, instruction, value);
-    if (location) *location = p;
-    return false;
-  }
-
-  System* s;
-  uint8_t* instruction;
-};
-
-class OffsetTask: public Task {
- public:
-  OffsetTask(Task* next, Promise* promise, Promise* instructionOffset):
-    Task(next),
-    promise(promise),
-    instructionOffset(instructionOffset)
-  { }
-
-  virtual void run(Context* con) {
-    if (promise->resolved()) {
-      updateOffset
-        (con->s, con->result + instructionOffset->value(), promise->value());
-    } else {
-      new (promise->listen(sizeof(OffsetListener)))
-        OffsetListener(con->s, con->result + instructionOffset->value());
-    }
-  }
-
-  Promise* promise;
-  Promise* instructionOffset;
-};
-
-void
-appendOffsetTask(Context* con, Promise* promise, Promise* instructionOffset)
-{
-  con->tasks = new(con->zone) OffsetTask(con->tasks, promise, instructionOffset);
-}
-
-inline unsigned
-index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
-{
-  return operation + (lir::UnaryOperationCount * operand);
-}
-
-inline unsigned
-index(ArchitectureContext*,
-      lir::BinaryOperation operation,
-      lir::OperandType operand1,
-      lir::OperandType operand2)
-{
-  return operation
-    + (lir::BinaryOperationCount * operand1)
-    + (lir::BinaryOperationCount * lir::OperandTypeCount * operand2);
-}
-
-inline unsigned
-index(ArchitectureContext* con UNUSED,
-      lir::TernaryOperation operation,
-      lir::OperandType operand1)
-{
-  assert(con, not isBranch(operation));
-
-  return operation + (lir::NonBranchTernaryOperationCount * operand1);
-}
-
-unsigned
-branchIndex(ArchitectureContext* con UNUSED, lir::OperandType operand1,
-            lir::OperandType operand2)
-{
-  return operand1 + (lir::OperandTypeCount * operand2);
-}
-
 // BEGIN OPERATION COMPILERS
 
 using namespace isa;
 
-// shortcut functions
-inline void emit(Context* con, int code) { con->code.append4(code); }
-
-inline int newTemp(Context* con) {
-  return con->client->acquireTemporary(GPR_MASK);
-}
-
-inline int newTemp(Context* con, unsigned mask) {
-  return con->client->acquireTemporary(mask);
-}
-
-inline void freeTemp(Context* con, int r) {
-  con->client->releaseTemporary(r);
-}
-
-inline int64_t getValue(lir::Constant* con) {
-  return con->value->value();
-}
-
-inline lir::Register makeTemp(Context* con) {
-  lir::Register tmp(newTemp(con));
-  return tmp;
-}
-
-inline lir::Register makeTemp64(Context* con) {
-  lir::Register tmp(newTemp(con), newTemp(con));
-  return tmp;
-}
-
-inline void freeTemp(Context* con, const lir::Register& tmp) {
-  if (tmp.low != lir::NoRegister) freeTemp(con, tmp.low);
-  if (tmp.high != lir::NoRegister) freeTemp(con, tmp.high);
-}
-
-inline void
-write4(uint8_t* dst, uint32_t v)
-{
-  memcpy(dst, &v, 4);
-}
-
-void
-andC(Context* con, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst);
-
-void shiftLeftR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
-{
-  if (size == 8) {
-    int tmp1 = newTemp(con), tmp2 = newTemp(con), tmp3 = newTemp(con);
-    ResolvedPromise maskPromise(0x3F);
-    lir::Constant mask(&maskPromise);
-    lir::Register dst(tmp3);
-    andC(con, 4, &mask, a, &dst);
-    emit(con, lsl(tmp1, b->high, tmp3));
-    emit(con, rsbi(tmp2, tmp3, 32));
-    emit(con, orrsh(tmp1, tmp1, b->low, tmp2, LSR));
-    emit(con, SETS(subi(t->high, tmp3, 32)));
-    emit(con, SETCOND(mov(t->high, tmp1), MI));
-    emit(con, SETCOND(lsl(t->high, b->low, t->high), PL));
-    emit(con, lsl(t->low, b->low, tmp3));
-    freeTemp(con, tmp1); freeTemp(con, tmp2); freeTemp(con, tmp3);
-  } else {
-    int tmp = newTemp(con);
-    ResolvedPromise maskPromise(0x1F);
-    lir::Constant mask(&maskPromise);
-    lir::Register dst(tmp);
-    andC(con, size, &mask, a, &dst);
-    emit(con, lsl(t->low, b->low, tmp));
-    freeTemp(con, tmp);
-  }
-}
-
-void
-moveRR(Context* con, unsigned srcSize, lir::Register* src,
-       unsigned dstSize, lir::Register* dst);
-
-void shiftLeftC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
-{
-  assert(con, size == TargetBytesPerWord);
-  if (getValue(a) & 0x1F) {
-    emit(con, lsli(t->low, b->low, getValue(a) & 0x1F));
-  } else {
-    moveRR(con, size, b, size, t);
-  }
-}
-
-void shiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
-{
-  if (size == 8) {
-    int tmp1 = newTemp(con), tmp2 = newTemp(con), tmp3 = newTemp(con);
-    ResolvedPromise maskPromise(0x3F);
-    lir::Constant mask(&maskPromise);
-    lir::Register dst(tmp3);
-    andC(con, 4, &mask, a, &dst);
-    emit(con, lsr(tmp1, b->low, tmp3));
-    emit(con, rsbi(tmp2, tmp3, 32));
-    emit(con, orrsh(tmp1, tmp1, b->high, tmp2, LSL));
-    emit(con, SETS(subi(t->low, tmp3, 32)));
-    emit(con, SETCOND(mov(t->low, tmp1), MI));
-    emit(con, SETCOND(asr(t->low, b->high, t->low), PL));
-    emit(con, asr(t->high, b->high, tmp3));
-    freeTemp(con, tmp1); freeTemp(con, tmp2); freeTemp(con, tmp3);
-  } else {
-    int tmp = newTemp(con);
-    ResolvedPromise maskPromise(0x1F);
-    lir::Constant mask(&maskPromise);
-    lir::Register dst(tmp);
-    andC(con, size, &mask, a, &dst);
-    emit(con, asr(t->low, b->low, tmp));
-    freeTemp(con, tmp);
-  }
-}
-
-void shiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
-{
-  assert(con, size == TargetBytesPerWord);
-  if (getValue(a) & 0x1F) {
-    emit(con, asri(t->low, b->low, getValue(a) & 0x1F));
-  } else {
-    moveRR(con, size, b, size, t);
-  }
-}
-
-void unsignedShiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
-{
-  int tmpShift = newTemp(con);
-  ResolvedPromise maskPromise(size == 8 ? 0x3F : 0x1F);
-  lir::Constant mask(&maskPromise);
-  lir::Register dst(tmpShift);
-  andC(con, 4, &mask, a, &dst);
-  emit(con, lsr(t->low, b->low, tmpShift));
-  if (size == 8) {
-    int tmpHi = newTemp(con), tmpLo = newTemp(con);
-    emit(con, SETS(rsbi(tmpHi, tmpShift, 32)));
-    emit(con, lsl(tmpLo, b->high, tmpHi));
-    emit(con, orr(t->low, t->low, tmpLo));
-    emit(con, addi(tmpHi, tmpShift, -32));
-    emit(con, lsr(tmpLo, b->high, tmpHi));
-    emit(con, orr(t->low, t->low, tmpLo));
-    emit(con, lsr(t->high, b->high, tmpShift));
-    freeTemp(con, tmpHi); freeTemp(con, tmpLo);
-  }
-  freeTemp(con, tmpShift);
-}
-
-void unsignedShiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
-{
-  assert(con, size == TargetBytesPerWord);
-  if (getValue(a) & 0x1F) {
-    emit(con, lsri(t->low, b->low, getValue(a) & 0x1F));
-  } else {
-    moveRR(con, size, b, size, t);
-  }
-}
-
-class ConstantPoolEntry: public Promise {
- public:
-  ConstantPoolEntry(Context* con, Promise* constant, ConstantPoolEntry* next,
-                    Promise* callOffset):
-    con(con), constant(constant), next(next), callOffset(callOffset),
-    address(0)
-  { }
-
-  virtual int64_t value() {
-    assert(con, resolved());
-
-    return reinterpret_cast<int64_t>(address);
-  }
-
-  virtual bool resolved() {
-    return address != 0;
-  }
-
-  Context* con;
-  Promise* constant;
-  ConstantPoolEntry* next;
-  Promise* callOffset;
-  void* address;
-  unsigned constantPoolCount;
-};
-
-class ConstantPoolListener: public Promise::Listener {
- public:
-  ConstantPoolListener(System* s, target_uintptr_t* address,
-                       uint8_t* returnAddress):
-    s(s),
-    address(address),
-    returnAddress(returnAddress)
-  { }
-
-  virtual bool resolve(int64_t value, void** location) {
-    *address = value;
-    if (location) {
-      *location = returnAddress ? static_cast<void*>(returnAddress) : address;
-    }
-    return true;
-  }
-
-  System* s;
-  target_uintptr_t* address;
-  uint8_t* returnAddress;
-};
-
-class PoolOffset {
- public:
-  PoolOffset(MyBlock* block, ConstantPoolEntry* entry, unsigned offset):
-    block(block), entry(entry), next(0), offset(offset)
-  { }
-
-  MyBlock* block;
-  ConstantPoolEntry* entry;
-  PoolOffset* next;
-  unsigned offset;
-};
-
-class PoolEvent {
- public:
-  PoolEvent(PoolOffset* poolOffsetHead, PoolOffset* poolOffsetTail,
-            unsigned offset):
-    poolOffsetHead(poolOffsetHead), poolOffsetTail(poolOffsetTail), next(0),
-    offset(offset)
-  { }
-
-  PoolOffset* poolOffsetHead;
-  PoolOffset* poolOffsetTail;
-  PoolEvent* next;
-  unsigned offset;
-};
-
-void
-appendConstantPoolEntry(Context* con, Promise* constant, Promise* callOffset)
-{
-  if (constant->resolved()) {
-    // make a copy, since the original might be allocated on the
-    // stack, and we need our copy to live until assembly is complete
-    constant = new(con->zone) ResolvedPromise(constant->value());
-  }
-
-  con->constantPool = new(con->zone) ConstantPoolEntry(con, constant, con->constantPool, callOffset);
-
-  ++ con->constantPoolCount;
-
-  PoolOffset* o = new(con->zone) PoolOffset(con->lastBlock, con->constantPool, con->code.length() - con->lastBlock->offset);
-
-  if (DebugPool) {
-    fprintf(stderr, "add pool offset %p %d to block %p\n",
-            o, o->offset, con->lastBlock);
-  }
-
-  if (con->lastBlock->poolOffsetTail) {
-    con->lastBlock->poolOffsetTail->next = o;
-  } else {
-    con->lastBlock->poolOffsetHead = o;
-  }
-  con->lastBlock->poolOffsetTail = o;
-}
-
-void
-appendPoolEvent(Context* con, MyBlock* b, unsigned offset, PoolOffset* head,
-                PoolOffset* tail)
-{
-  PoolEvent* e = new(con->zone) PoolEvent(head, tail, offset);
-
-  if (b->poolEventTail) {
-    b->poolEventTail->next = e;
-  } else {
-    b->poolEventHead = e;
-  }
-  b->poolEventTail = e;
-}
-
-bool
-needJump(MyBlock* b)
-{
-  return b->next or b->size != (b->size & PoolOffsetMask);
-}
-
-unsigned
-padding(MyBlock* b, unsigned offset)
-{
-  unsigned total = 0;
-  for (PoolEvent* e = b->poolEventHead; e; e = e->next) {
-    if (e->offset <= offset) {
-      if (needJump(b)) {
-        total += TargetBytesPerWord;
-      }
-      for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) {
-        total += TargetBytesPerWord;
-      }
-    } else {
-      break;
-    }
-  }
-  return total;
-}
-
-void
-resolve(MyBlock* b)
-{
-  Context* con = b->context;
-
-  if (b->poolOffsetHead) {
-    if (con->poolOffsetTail) {
-      con->poolOffsetTail->next = b->poolOffsetHead;
-    } else {
-      con->poolOffsetHead = b->poolOffsetHead;
-    }
-    con->poolOffsetTail = b->poolOffsetTail;
-  }
-
-  if (con->poolOffsetHead) {
-    bool append;
-    if (b->next == 0 or b->next->poolEventHead) {
-      append = true;
-    } else {
-      int32_t v = (b->start + b->size + b->next->size + TargetBytesPerWord - 8)
-        - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
-
-      append = (v != (v & PoolOffsetMask));
-
-      if (DebugPool) {
-        fprintf(stderr,
-                "current %p %d %d next %p %d %d\n",
-                b, b->start, b->size, b->next, b->start + b->size,
-                b->next->size);
-        fprintf(stderr,
-                "offset %p %d is of distance %d to next block; append? %d\n",
-                con->poolOffsetHead, con->poolOffsetHead->offset, v, append);
-      }
-    }
-
-    if (append) {
-#ifndef NDEBUG
-      int32_t v = (b->start + b->size - 8)
-        - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
-      
-      expect(con, v == (v & PoolOffsetMask));
-#endif // not NDEBUG
-
-      appendPoolEvent(con, b, b->size, con->poolOffsetHead, con->poolOffsetTail);
-
-      if (DebugPool) {
-        for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) {
-          fprintf(stderr,
-                  "include %p %d in pool event %p at offset %d in block %p\n",
-                  o, o->offset, b->poolEventTail, b->size, b);
-        }
-      }
-
-      con->poolOffsetHead = 0;
-      con->poolOffsetTail = 0;
-    }
-  }
-}
-
-void
-jumpR(Context* con, unsigned size UNUSED, lir::Register* target)
-{
-  assert(con, size == TargetBytesPerWord);
-  emit(con, bx(target->low));
-}
-
-void
-swapRR(Context* con, unsigned aSize, lir::Register* a,
-       unsigned bSize, lir::Register* b)
-{
-  assert(con, aSize == TargetBytesPerWord);
-  assert(con, bSize == TargetBytesPerWord);
-
-  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-  moveRR(con, aSize, a, bSize, &tmp);
-  moveRR(con, bSize, b, aSize, a);
-  moveRR(con, bSize, &tmp, bSize, b);
-  con->client->releaseTemporary(tmp.low);
-}
-
-void
-moveRR(Context* con, unsigned srcSize, lir::Register* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  bool srcIsFpr = isFpr(src);
-  bool dstIsFpr = isFpr(dst);
-  if (srcIsFpr || dstIsFpr) {   // FPR(s) involved
-    assert(con, srcSize == dstSize);
-    const bool dprec = srcSize == 8;
-    if (srcIsFpr && dstIsFpr) { // FPR to FPR
-      if (dprec) emit(con, fcpyd(fpr64(dst), fpr64(src))); // double
-      else       emit(con, fcpys(fpr32(dst), fpr32(src))); // single
-    } else if (srcIsFpr) {      // FPR to GPR
-      if (dprec) emit(con, fmrrd(dst->low, dst->high, fpr64(src)));
-      else       emit(con, fmrs(dst->low, fpr32(src)));
-    } else {                    // GPR to FPR
-      if (dprec) emit(con, fmdrr(fpr64(dst->low), src->low, src->high));
-      else       emit(con, fmsr(fpr32(dst), src->low));
-    }
-    return;
-  }
-
-  switch (srcSize) {
-  case 1:
-    emit(con, lsli(dst->low, src->low, 24));
-    emit(con, asri(dst->low, dst->low, 24));
-    break;
-
-  case 2:
-    emit(con, lsli(dst->low, src->low, 16));
-    emit(con, asri(dst->low, dst->low, 16));
-    break;
-
-  case 4:
-  case 8:
-    if (srcSize == 4 and dstSize == 8) {
-      moveRR(con, 4, src, 4, dst);
-      emit(con, asri(dst->high, src->low, 31));
-    } else if (srcSize == 8 and dstSize == 8) {
-      lir::Register srcHigh(src->high);
-      lir::Register dstHigh(dst->high);
-
-      if (src->high == dst->low) {
-        if (src->low == dst->high) {
-          swapRR(con, 4, src, 4, dst);
-        } else {
-          moveRR(con, 4, &srcHigh, 4, &dstHigh);
-          moveRR(con, 4, src, 4, dst);
-        }
-      } else {
-        moveRR(con, 4, src, 4, dst);
-        moveRR(con, 4, &srcHigh, 4, &dstHigh);
-      }
-    } else if (src->low != dst->low) {
-      emit(con, mov(dst->low, src->low));
-    }
-    break;
-
-  default: abort(con);
-  }
-}
-
-void
-moveZRR(Context* con, unsigned srcSize, lir::Register* src,
-        unsigned, lir::Register* dst)
-{
-  switch (srcSize) {
-  case 2:
-    emit(con, lsli(dst->low, src->low, 16));
-    emit(con, lsri(dst->low, dst->low, 16));
-    break;
-
-  default: abort(con);
-  }
-}
-
-void moveCR(Context* con, unsigned size, lir::Constant* src,
-            unsigned, lir::Register* dst);
-
-void
-moveCR2(Context* con, unsigned size, lir::Constant* src,
-        lir::Register* dst, Promise* callOffset)
-{
-  if (isFpr(dst)) { // floating-point
-    lir::Register tmp = size > 4 ? makeTemp64(con) :
-                                         makeTemp(con);
-    moveCR(con, size, src, size, &tmp);
-    moveRR(con, size, &tmp, size, dst);
-    freeTemp(con, tmp);
-  } else if (size > 4) { 
-    uint64_t value = (uint64_t)src->value->value();
-    ResolvedPromise loBits(value & MASK_LO32);
-    lir::Constant srcLo(&loBits);
-    ResolvedPromise hiBits(value >> 32); 
-    lir::Constant srcHi(&hiBits);
-    lir::Register dstHi(dst->high);
-    moveCR(con, 4, &srcLo, 4, dst);
-    moveCR(con, 4, &srcHi, 4, &dstHi);
-  } else if (src->value->resolved() and isOfWidth(getValue(src), 8)) {
-    emit(con, movi(dst->low, lo8(getValue(src)))); // fits in immediate
-  } else {
-    appendConstantPoolEntry(con, src->value, callOffset);
-    emit(con, ldri(dst->low, ProgramCounter, 0)); // load 32 bits
-  }
-}
-
-void
-moveCR(Context* con, unsigned size, lir::Constant* src,
-       unsigned, lir::Register* dst)
-{
-  moveCR2(con, size, src, dst, 0);
-}
-
-void addR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, SETS(add(t->low, a->low, b->low)));
-    emit(con, adc(t->high, a->high, b->high));
-  } else {
-    emit(con, add(t->low, a->low, b->low));
-  }
-}
-
-void subR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, SETS(rsb(t->low, a->low, b->low)));
-    emit(con, rsc(t->high, a->high, b->high));
-  } else {
-    emit(con, rsb(t->low, a->low, b->low));
-  }
-}
-
-void
-addC(Context* con, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  int32_t v = a->value->value();
-  if (v) {
-    if (v > 0 and v < 256) {
-      emit(con, addi(dst->low, b->low, v));
-    } else if (v > 0 and v < 1024 and v % 4 == 0) {
-      emit(con, addi(dst->low, b->low, v >> 2, 15));
-    } else {
-      // todo
-      abort(con);
-    }
-  } else {
-    moveRR(con, size, b, size, dst);
-  }
-}
-
-void
-subC(Context* con, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  int32_t v = a->value->value();
-  if (v) {
-    if (v > 0 and v < 256) {
-      emit(con, subi(dst->low, b->low, v));
-    } else if (v > 0 and v < 1024 and v % 4 == 0) {
-      emit(con, subi(dst->low, b->low, v >> 2, 15));
-    } else {
-      // todo
-      abort(con);
-    }
-  } else {
-    moveRR(con, size, b, size, dst);
-  }
-}
-
-void multiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    bool useTemporaries = b->low == t->low;
-    int tmpLow  = useTemporaries ? con->client->acquireTemporary(GPR_MASK) : t->low;
-    int tmpHigh = useTemporaries ? con->client->acquireTemporary(GPR_MASK) : t->high;
-
-    emit(con, umull(tmpLow, tmpHigh, a->low, b->low));
-    emit(con, mla(tmpHigh, a->low, b->high, tmpHigh));
-    emit(con, mla(tmpHigh, a->high, b->low, tmpHigh));
-
-    if (useTemporaries) {
-      emit(con, mov(t->low, tmpLow));
-      emit(con, mov(t->high, tmpHigh));
-      con->client->releaseTemporary(tmpLow);
-      con->client->releaseTemporary(tmpHigh);
-    }
-  } else {
-    emit(con, mul(t->low, a->low, b->low));
-  }
-}
-
-void floatAbsoluteRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  if (size == 8) {
-    emit(con, fabsd(fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fabss(fpr32(b), fpr32(a)));
-  }
-}
-
-void floatNegateRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  if (size == 8) {
-    emit(con, fnegd(fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fnegs(fpr32(b), fpr32(a)));
-  }
-}
-
-void float2FloatRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  if (size == 8) {
-    emit(con, fcvtsd(fpr32(b), fpr64(a)));
-  } else {
-    emit(con, fcvtds(fpr64(b), fpr32(a)));
-  }
-}
-
-void float2IntRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  int tmp = newTemp(con, FPR_MASK);
-  int ftmp = fpr32(tmp);
-  if (size == 8) { // double to int
-    emit(con, ftosizd(ftmp, fpr64(a)));
-  } else {         // float to int
-    emit(con, ftosizs(ftmp, fpr32(a)));
-  }                // else thunked
-  emit(con, fmrs(b->low, ftmp));
-  freeTemp(con, tmp);
-}
-
-void int2FloatRR(Context* con, unsigned, lir::Register* a, unsigned size, lir::Register* b) {
-  emit(con, fmsr(fpr32(b), a->low));
-  if (size == 8) { // int to double
-    emit(con, fsitod(fpr64(b), fpr32(b)));
-  } else {         // int to float
-    emit(con, fsitos(fpr32(b), fpr32(b)));
-  }                // else thunked
-}
-
-void floatSqrtRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
-  if (size == 8) {
-    emit(con, fsqrtd(fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fsqrts(fpr32(b), fpr32(a)));
-  }
-}
-
-void floatAddR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, faddd(fpr64(t), fpr64(a), fpr64(b)));
-  } else {
-    emit(con, fadds(fpr32(t), fpr32(a), fpr32(b)));
-  }
-}
-
-void floatSubtractR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, fsubd(fpr64(t), fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fsubs(fpr32(t), fpr32(b), fpr32(a)));
-  }
-}
-
-void floatMultiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) {
-    emit(con, fmuld(fpr64(t), fpr64(a), fpr64(b)));
-  } else {
-    emit(con, fmuls(fpr32(t), fpr32(a), fpr32(b)));
-  }
-}
-
-void floatDivideR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
-  if (size == 8) { 
-    emit(con, fdivd(fpr64(t), fpr64(b), fpr64(a)));
-  } else {
-    emit(con, fdivs(fpr32(t), fpr32(b), fpr32(a)));
-  }
-}
-
-int
-normalize(Context* con, int offset, int index, unsigned scale, 
-          bool* preserveIndex, bool* release)
-{
-  if (offset != 0 or scale != 1) {
-    lir::Register normalizedIndex
-      (*preserveIndex ? con->client->acquireTemporary(GPR_MASK) : index);
-    
-    if (*preserveIndex) {
-      *release = true;
-      *preserveIndex = false;
-    } else {
-      *release = false;
-    }
-
-    int scaled;
-
-    if (scale != 1) {
-      lir::Register unscaledIndex(index);
-
-      ResolvedPromise scalePromise(log(scale));
-      lir::Constant scaleConstant(&scalePromise);
-      
-      shiftLeftC(con, TargetBytesPerWord, &scaleConstant,
-                 &unscaledIndex, &normalizedIndex);
-
-      scaled = normalizedIndex.low;
-    } else {
-      scaled = index;
-    }
-
-    if (offset != 0) {
-      lir::Register untranslatedIndex(scaled);
-
-      ResolvedPromise offsetPromise(offset);
-      lir::Constant offsetConstant(&offsetPromise);
-
-      lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-      moveCR(con, TargetBytesPerWord, &offsetConstant, TargetBytesPerWord, &tmp);
-      addR(con, TargetBytesPerWord, &tmp, &untranslatedIndex, &normalizedIndex);
-      con->client->releaseTemporary(tmp.low);
-    }
-
-    return normalizedIndex.low;
-  } else {
-    *release = false;
-    return index;
-  }
-}
-
-void
-store(Context* con, unsigned size, lir::Register* src,
-      int base, int offset, int index, unsigned scale, bool preserveIndex)
-{
-  if (index != lir::NoRegister) {
-    bool release;
-    int normalized = normalize
-      (con, offset, index, scale, &preserveIndex, &release);
-
-    if (!isFpr(src)) { // GPR store
-      switch (size) {
-      case 1:
-        emit(con, strb(src->low, base, normalized));
-        break;
-
-      case 2:
-        emit(con, strh(src->low, base, normalized));
-        break;
-
-      case 4:
-        emit(con, str(src->low, base, normalized));
-        break;
-
-      case 8: { // split into 2 32-bit stores
-        lir::Register srcHigh(src->high);
-        store(con, 4, &srcHigh, base, 0, normalized, 1, preserveIndex);
-        store(con, 4, src, base, 4, normalized, 1, preserveIndex);
-      } break;
-
-      default: abort(con);
-      }
-    } else { // FPR store
-      lir::Register base_(base),
-                          normalized_(normalized),
-                          absAddr = makeTemp(con);
-      // FPR stores have only bases, so we must add the index
-      addR(con, TargetBytesPerWord, &base_, &normalized_, &absAddr);
-      // double-precision
-      if (size == 8) emit(con, fstd(fpr64(src), absAddr.low));
-      // single-precision
-      else           emit(con, fsts(fpr32(src), absAddr.low));
-      freeTemp(con, absAddr);
-    }
-
-    if (release) con->client->releaseTemporary(normalized);
-  } else if (size == 8
-             or abs(offset) == (abs(offset) & 0xFF)
-             or (size != 2 and abs(offset) == (abs(offset) & 0xFFF)))
-  {
-    if (!isFpr(src)) { // GPR store
-      switch (size) {
-      case 1:
-        emit(con, strbi(src->low, base, offset));
-        break;
-
-      case 2:
-        emit(con, strhi(src->low, base, offset));
-        break;
-
-      case 4:
-        emit(con, stri(src->low, base, offset));
-        break;
-
-      case 8: { // split into 2 32-bit stores
-        lir::Register srcHigh(src->high);
-        store(con, 4, &srcHigh, base, offset, lir::NoRegister, 1, false);
-        store(con, 4, src, base, offset + 4, lir::NoRegister, 1, false);
-      } break;
-
-      default: abort(con);
-      }
-    } else { // FPR store
-      // double-precision
-      if (size == 8) emit(con, fstd(fpr64(src), base, offset));
-      // single-precision
-      else           emit(con, fsts(fpr32(src), base, offset));
-    }
-  } else {
-    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-    ResolvedPromise offsetPromise(offset);
-    lir::Constant offsetConstant(&offsetPromise);
-    moveCR(con, TargetBytesPerWord, &offsetConstant,
-           TargetBytesPerWord, &tmp);
-    
-    store(con, size, src, base, 0, tmp.low, 1, false);
-
-    con->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-moveRM(Context* con, unsigned srcSize, lir::Register* src,
-       unsigned dstSize UNUSED, lir::Memory* dst)
-{
-  assert(con, srcSize == dstSize);
-
-  store(con, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true);
-}
-
-void
-load(Context* con, unsigned srcSize, int base, int offset, int index,
-     unsigned scale, unsigned dstSize, lir::Register* dst,
-     bool preserveIndex, bool signExtend)
-{
-  if (index != lir::NoRegister) {
-    bool release;
-    int normalized = normalize
-      (con, offset, index, scale, &preserveIndex, &release);
-
-    if (!isFpr(dst)) { // GPR load
-      switch (srcSize) {
-      case 1:
-        if (signExtend) {
-          emit(con, ldrsb(dst->low, base, normalized));
-        } else {
-          emit(con, ldrb(dst->low, base, normalized));
-        }
-        break;
-
-      case 2:
-        if (signExtend) {
-          emit(con, ldrsh(dst->low, base, normalized));
-        } else {
-          emit(con, ldrh(dst->low, base, normalized));
-        }
-        break;
-
-      case 4:
-      case 8: {
-        if (srcSize == 4 and dstSize == 8) {
-          load(con, 4, base, 0, normalized, 1, 4, dst, preserveIndex,
-               false);
-          moveRR(con, 4, dst, 8, dst);
-        } else if (srcSize == 8 and dstSize == 8) {
-          lir::Register dstHigh(dst->high);
-          load(con, 4, base, 0, normalized, 1, 4, &dstHigh,
-              preserveIndex, false);
-          load(con, 4, base, 4, normalized, 1, 4, dst, preserveIndex,
-               false);
-        } else {
-          emit(con, ldr(dst->low, base, normalized));
-        }
-      } break;
-
-      default: abort(con);
-      }
-    } else { // FPR load
-      lir::Register base_(base),
-                          normalized_(normalized),
-                          absAddr = makeTemp(con);
-      // VFP loads only have bases, so we must add the index
-      addR(con, TargetBytesPerWord, &base_, &normalized_, &absAddr);
-      // double-precision
-      if (srcSize == 8) emit(con, fldd(fpr64(dst), absAddr.low));
-      // single-precision
-      else              emit(con, flds(fpr32(dst), absAddr.low));
-      freeTemp(con, absAddr);
-    }
-
-    if (release) con->client->releaseTemporary(normalized);
-  } else if ((srcSize == 8 and dstSize == 8)
-             or abs(offset) == (abs(offset) & 0xFF)
-             or (srcSize != 2
-                 and (srcSize != 1 or not signExtend)
-                 and abs(offset) == (abs(offset) & 0xFFF)))
-  {
-    if (!isFpr(dst)) { // GPR load
-      switch (srcSize) {
-      case 1:
-        if (signExtend) {
-          emit(con, ldrsbi(dst->low, base, offset));
-        } else {
-          emit(con, ldrbi(dst->low, base, offset));
-        }
-        break;
-
-      case 2:
-        if (signExtend) {
-          emit(con, ldrshi(dst->low, base, offset));
-        } else {
-          emit(con, ldrhi(dst->low, base, offset));
-        }
-        break;
-
-      case 4:
-        emit(con, ldri(dst->low, base, offset));
-        break;
-
-      case 8: {
-        if (dstSize == 8) {
-          lir::Register dstHigh(dst->high);
-          load(con, 4, base, offset, lir::NoRegister, 1, 4, &dstHigh, false,
-               false);
-          load(con, 4, base, offset + 4, lir::NoRegister, 1, 4, dst, false,
-               false);
-        } else {
-          emit(con, ldri(dst->low, base, offset));
-        }
-      } break;
-
-      default: abort(con);
-      }
-    } else { // FPR load
-      // double-precision
-      if (srcSize == 8) emit(con, fldd(fpr64(dst), base, offset));
-      // single-precision
-      else              emit(con, flds(fpr32(dst), base, offset));
-    }
-  } else {
-    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-    ResolvedPromise offsetPromise(offset);
-    lir::Constant offsetConstant(&offsetPromise);
-    moveCR(con, TargetBytesPerWord, &offsetConstant, TargetBytesPerWord,
-           &tmp);
-    
-    load(con, srcSize, base, 0, tmp.low, 1, dstSize, dst, false,
-         signExtend);
-
-    con->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-moveMR(Context* con, unsigned srcSize, lir::Memory* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  load(con, srcSize, src->base, src->offset, src->index, src->scale,
-       dstSize, dst, true, true);
-}
-
-void
-moveZMR(Context* con, unsigned srcSize, lir::Memory* src,
-        unsigned dstSize, lir::Register* dst)
-{
-  load(con, srcSize, src->base, src->offset, src->index, src->scale,
-       dstSize, dst, true, false);
-}
-
-void
-andR(Context* con, unsigned size, lir::Register* a,
-     lir::Register* b, lir::Register* dst)
-{
-  if (size == 8) emit(con, and_(dst->high, a->high, b->high));
-  emit(con, and_(dst->low, a->low, b->low));
-}
-
-void
-andC(Context* con, unsigned size, lir::Constant* a,
-     lir::Register* b, lir::Register* dst)
-{
-  int64_t v = a->value->value();
-
-  if (size == 8) {
-    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
-    lir::Constant ah(&high);
-
-    ResolvedPromise low(v & 0xFFFFFFFF);
-    lir::Constant al(&low);
-
-    lir::Register bh(b->high);
-    lir::Register dh(dst->high);
-
-    andC(con, 4, &al, b, dst);
-    andC(con, 4, &ah, &bh, &dh);
-  } else {
-    uint32_t v32 = static_cast<uint32_t>(v);
-    if (v32 != 0xFFFFFFFF) {
-      if ((v32 & 0xFFFFFF00) == 0xFFFFFF00) {
-        emit(con, bici(dst->low, b->low, (~(v32 & 0xFF)) & 0xFF));
-      } else if ((v32 & 0xFFFFFF00) == 0) {
-        emit(con, andi(dst->low, b->low, v32 & 0xFF));
-      } else {
-        // todo: there are other cases we can handle in one
-        // instruction
-
-        bool useTemporary = b->low == dst->low;
-        lir::Register tmp(dst->low);
-        if (useTemporary) {
-          tmp.low = con->client->acquireTemporary(GPR_MASK);
-        }
-
-        moveCR(con, 4, a, 4, &tmp);
-        andR(con, 4, b, &tmp, dst);
-        
-        if (useTemporary) {
-          con->client->releaseTemporary(tmp.low);
-        }
-      }
-    } else {
-      moveRR(con, size, b, size, dst);
-    }
-  }
-}
-
-void
-orR(Context* con, unsigned size, lir::Register* a,
-    lir::Register* b, lir::Register* dst)
-{
-  if (size == 8) emit(con, orr(dst->high, a->high, b->high));
-  emit(con, orr(dst->low, a->low, b->low));
-}
-
-void
-xorR(Context* con, unsigned size, lir::Register* a,
-     lir::Register* b, lir::Register* dst)
-{
-  if (size == 8) emit(con, eor(dst->high, a->high, b->high));
-  emit(con, eor(dst->low, a->low, b->low));
-}
-
-void
-moveAR2(Context* con, unsigned srcSize, lir::Address* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  assert(con, srcSize == 4 and dstSize == 4);
-
-  lir::Constant constant(src->address);
-  moveCR(con, srcSize, &constant, dstSize, dst);
-
-  lir::Memory memory(dst->low, 0, -1, 0);
-  moveMR(con, dstSize, &memory, dstSize, dst);
-}
-
-void
-moveAR(Context* con, unsigned srcSize, lir::Address* src,
-       unsigned dstSize, lir::Register* dst)
-{
-  moveAR2(con, srcSize, src, dstSize, dst);
-}
-
-void
-compareRR(Context* con, unsigned aSize, lir::Register* a,
-          unsigned bSize UNUSED, lir::Register* b)
-{
-  assert(con, !(isFpr(a) ^ isFpr(b))); // regs must be of the same type
-
-  if (!isFpr(a)) { // GPR compare
-    assert(con, aSize == 4 && bSize == 4);
-    /**///assert(con, b->low != a->low);
-    emit(con, cmp(b->low, a->low));
-  } else {         // FPR compare
-    assert(con, aSize == bSize);
-    if (aSize == 8) emit(con, fcmpd(fpr64(b), fpr64(a))); // double
-    else            emit(con, fcmps(fpr32(b), fpr32(a))); // single
-    emit(con, fmstat());
-  }
-}
-
-void
-compareCR(Context* con, unsigned aSize, lir::Constant* a,
-          unsigned bSize, lir::Register* b)
-{
-  assert(con, aSize == 4 and bSize == 4);
-
-  if (!isFpr(b) && a->value->resolved() &&
-      isOfWidth(a->value->value(), 8)) {
-    emit(con, cmpi(b->low, a->value->value()));
-  } else {
-    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-    moveCR(con, aSize, a, bSize, &tmp);
-    compareRR(con, bSize, &tmp, bSize, b);
-    con->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-compareCM(Context* con, unsigned aSize, lir::Constant* a,
-          unsigned bSize, lir::Memory* b)
-{
-  assert(con, aSize == 4 and bSize == 4);
-
-  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-  moveMR(con, bSize, b, bSize, &tmp);
-  compareCR(con, aSize, a, bSize, &tmp);
-  con->client->releaseTemporary(tmp.low);
-}
-
-void
-compareRM(Context* con, unsigned aSize, lir::Register* a,
-          unsigned bSize, lir::Memory* b)
-{
-  assert(con, aSize == 4 and bSize == 4);
-
-  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-  moveMR(con, bSize, b, bSize, &tmp);
-  compareRR(con, aSize, a, bSize, &tmp);
-  con->client->releaseTemporary(tmp.low);
-}
-
-int32_t
-branch(Context* con, lir::TernaryOperation op)
-{
-  switch (op) {
-  case lir::JumpIfEqual:
-  case lir::JumpIfFloatEqual:
-    return beq(0);
-
-  case lir::JumpIfNotEqual:
-  case lir::JumpIfFloatNotEqual:
-    return bne(0);
-
-  case lir::JumpIfLess:
-  case lir::JumpIfFloatLess:
-  case lir::JumpIfFloatLessOrUnordered:
-    return blt(0);
-
-  case lir::JumpIfGreater:
-  case lir::JumpIfFloatGreater:
-    return bgt(0);
-
-  case lir::JumpIfLessOrEqual:
-  case lir::JumpIfFloatLessOrEqual:
-  case lir::JumpIfFloatLessOrEqualOrUnordered:
-    return ble(0);
-
-  case lir::JumpIfGreaterOrEqual:
-  case lir::JumpIfFloatGreaterOrEqual:
-    return bge(0);
-
-  case lir::JumpIfFloatGreaterOrUnordered:
-    return bhi(0);
-
-  case lir::JumpIfFloatGreaterOrEqualOrUnordered:
-    return bpl(0);
- 
-  default:
-    abort(con);
-  }
-}
-
-void
-conditional(Context* con, int32_t branch, lir::Constant* target)
-{
-  appendOffsetTask(con, target->value, offsetPromise(con));
-  emit(con, branch);
-}
-
-void
-branch(Context* con, lir::TernaryOperation op, lir::Constant* target)
-{
-  conditional(con, branch(con, op), target);
-}
-
-void
-branchLong(Context* con, lir::TernaryOperation op, lir::Operand* al,
-           lir::Operand* ah, lir::Operand* bl,
-           lir::Operand* bh, lir::Constant* target,
-           BinaryOperationType compareSigned,
-           BinaryOperationType compareUnsigned)
-{
-  compareSigned(con, 4, ah, 4, bh);
-
-  unsigned next = 0;
-  
-  switch (op) {
-  case lir::JumpIfEqual:
-  case lir::JumpIfFloatEqual:
-    next = con->code.length();
-    emit(con, bne(0));
-
-    compareSigned(con, 4, al, 4, bl);
-    conditional(con, beq(0), target);
-    break;
-
-  case lir::JumpIfNotEqual:
-  case lir::JumpIfFloatNotEqual:
-    conditional(con, bne(0), target);
-
-    compareSigned(con, 4, al, 4, bl);
-    conditional(con, bne(0), target);
-    break;
-
-  case lir::JumpIfLess:
-  case lir::JumpIfFloatLess:
-    conditional(con, blt(0), target);
-
-    next = con->code.length();
-    emit(con, bgt(0));
-
-    compareUnsigned(con, 4, al, 4, bl);
-    conditional(con, blo(0), target);
-    break;
-
-  case lir::JumpIfGreater:
-  case lir::JumpIfFloatGreater:
-    conditional(con, bgt(0), target);
-
-    next = con->code.length();
-    emit(con, blt(0));
-
-    compareUnsigned(con, 4, al, 4, bl);
-    conditional(con, bhi(0), target);
-    break;
-
-  case lir::JumpIfLessOrEqual:
-  case lir::JumpIfFloatLessOrEqual:
-    conditional(con, blt(0), target);
-
-    next = con->code.length();
-    emit(con, bgt(0));
-
-    compareUnsigned(con, 4, al, 4, bl);
-    conditional(con, bls(0), target);
-    break;
-
-  case lir::JumpIfGreaterOrEqual:
-  case lir::JumpIfFloatGreaterOrEqual:
-    conditional(con, bgt(0), target);
-
-    next = con->code.length();
-    emit(con, blt(0));
-
-    compareUnsigned(con, 4, al, 4, bl);
-    conditional(con, bhs(0), target);
-    break;
-
-  default:
-    abort(con);
-  }
-
-  if (next) {
-    updateOffset
-      (con->s, con->code.data + next, reinterpret_cast<intptr_t>
-       (con->code.data + con->code.length()));
-  }
-}
-
-void
-branchRR(Context* con, lir::TernaryOperation op, unsigned size,
-         lir::Register* a, lir::Register* b,
-         lir::Constant* target)
-{
-  if (!isFpr(a) && size > TargetBytesPerWord) {
-    lir::Register ah(a->high);
-    lir::Register bh(b->high);
-
-    branchLong(con, op, a, &ah, b, &bh, target, CAST2(compareRR),
-               CAST2(compareRR));
-  } else {
-    compareRR(con, size, a, size, b);
-    branch(con, op, target);
-  }
-}
-
-void
-branchCR(Context* con, lir::TernaryOperation op, unsigned size,
-         lir::Constant* a, lir::Register* b,
-         lir::Constant* target)
-{
-  assert(con, !isFloatBranch(op));
-
-  if (size > TargetBytesPerWord) {
-    int64_t v = a->value->value();
-
-    ResolvedPromise low(v & ~static_cast<target_uintptr_t>(0));
-    lir::Constant al(&low);
-
-    ResolvedPromise high((v >> 32) & ~static_cast<target_uintptr_t>(0));
-    lir::Constant ah(&high);
-
-    lir::Register bh(b->high);
-
-    branchLong(con, op, &al, &ah, b, &bh, target, CAST2(compareCR),
-               CAST2(compareCR));
-  } else {
-    compareCR(con, size, a, size, b);
-    branch(con, op, target);
-  }
-}
-
-void
-branchRM(Context* con, lir::TernaryOperation op, unsigned size,
-         lir::Register* a, lir::Memory* b,
-         lir::Constant* target)
-{
-  assert(con, !isFloatBranch(op));
-  assert(con, size <= TargetBytesPerWord);
-
-  compareRM(con, size, a, size, b);
-  branch(con, op, target);
-}
-
-void
-branchCM(Context* con, lir::TernaryOperation op, unsigned size,
-         lir::Constant* a, lir::Memory* b,
-         lir::Constant* target)
-{
-  assert(con, !isFloatBranch(op));
-  assert(con, size <= TargetBytesPerWord);
-
-  compareCM(con, size, a, size, b);
-  branch(con, op, target);
-}
-
-ShiftMaskPromise*
-shiftMaskPromise(Context* con, Promise* base, unsigned shift, int64_t mask)
-{
-  return new(con->zone) ShiftMaskPromise(base, shift, mask);
-}
-
-void
-moveCM(Context* con, unsigned srcSize, lir::Constant* src,
-       unsigned dstSize, lir::Memory* dst)
-{
-  switch (dstSize) {
-  case 8: {
-    lir::Constant srcHigh
-      (shiftMaskPromise(con, src->value, 32, 0xFFFFFFFF));
-    lir::Constant srcLow
-      (shiftMaskPromise(con, src->value, 0, 0xFFFFFFFF));
-    
-    lir::Memory dstLow
-      (dst->base, dst->offset + 4, dst->index, dst->scale);
-    
-    moveCM(con, 4, &srcLow, 4, &dstLow);
-    moveCM(con, 4, &srcHigh, 4, dst);
-  } break;
-
-  default:
-    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
-    moveCR(con, srcSize, src, dstSize, &tmp);
-    moveRM(con, dstSize, &tmp, dstSize, dst);
-    con->client->releaseTemporary(tmp.low);
-  }
-}
-
-void
-negateRR(Context* con, unsigned srcSize, lir::Register* src,
-         unsigned dstSize UNUSED, lir::Register* dst)
-{
-  assert(con, srcSize == dstSize);
-
-  emit(con, mvn(dst->low, src->low));
-  emit(con, SETS(addi(dst->low, dst->low, 1)));
-  if (srcSize == 8) {
-    emit(con, mvn(dst->high, src->high));
-    emit(con, adci(dst->high, dst->high, 0));
-  }
-}
-
-void
-callR(Context* con, unsigned size UNUSED, lir::Register* target)
-{
-  assert(con, size == TargetBytesPerWord);
-  emit(con, blx(target->low));
-}
-
-void
-callC(Context* con, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  appendOffsetTask(con, target->value, offsetPromise(con));
-  emit(con, bl(0));
-}
-
-void
-longCallC(Context* con, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  lir::Register tmp(4);
-  moveCR2(con, TargetBytesPerWord, target, &tmp, offsetPromise(con));
-  callR(con, TargetBytesPerWord, &tmp);
-}
-
-void
-longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  lir::Register tmp(4); // a non-arg reg that we don't mind clobbering
-  moveCR2(con, TargetBytesPerWord, target, &tmp, offsetPromise(con));
-  jumpR(con, TargetBytesPerWord, &tmp);
-}
-
-void
-jumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
-{
-  assert(con, size == TargetBytesPerWord);
-
-  appendOffsetTask(con, target->value, offsetPromise(con));
-  emit(con, b(0));
-}
-
-void
-return_(Context* con)
-{
-  emit(con, bx(LinkRegister));
-}
-
-void
-trap(Context* con)
-{
-  emit(con, bkpt(0));
-}
-
-void
-memoryBarrier(Context*) {}
-
 // END OPERATION COMPILERS
 
 unsigned
@@ -1861,99 +138,6 @@ nextFrame(ArchitectureContext* con, uint32_t* start, unsigned size UNUSED,
   *stack = static_cast<void**>(*stack) + offset;
 }
 
-void
-populateTables(ArchitectureContext* con)
-{
-  const lir::OperandType C = lir::ConstantOperand;
-  const lir::OperandType A = lir::AddressOperand;
-  const lir::OperandType R = lir::RegisterOperand;
-  const lir::OperandType M = lir::MemoryOperand;
-
-  OperationType* zo = con->operations;
-  UnaryOperationType* uo = con->unaryOperations;
-  BinaryOperationType* bo = con->binaryOperations;
-  TernaryOperationType* to = con->ternaryOperations;
-  BranchOperationType* bro = con->branchOperations;
-
-  zo[lir::Return] = return_;
-  zo[lir::LoadBarrier] = memoryBarrier;
-  zo[lir::StoreStoreBarrier] = memoryBarrier;
-  zo[lir::StoreLoadBarrier] = memoryBarrier;
-  zo[lir::Trap] = trap;
-
-  uo[index(con, lir::LongCall, C)] = CAST1(longCallC);
-
-  uo[index(con, lir::AlignedLongCall, C)] = CAST1(longCallC);
-
-  uo[index(con, lir::LongJump, C)] = CAST1(longJumpC);
-
-  uo[index(con, lir::AlignedLongJump, C)] = CAST1(longJumpC);
-
-  uo[index(con, lir::Jump, R)] = CAST1(jumpR);
-  uo[index(con, lir::Jump, C)] = CAST1(jumpC);
-
-  uo[index(con, lir::AlignedJump, R)] = CAST1(jumpR);
-  uo[index(con, lir::AlignedJump, C)] = CAST1(jumpC);
-
-  uo[index(con, lir::Call, C)] = CAST1(callC);
-  uo[index(con, lir::Call, R)] = CAST1(callR);
-
-  uo[index(con, lir::AlignedCall, C)] = CAST1(callC);
-  uo[index(con, lir::AlignedCall, R)] = CAST1(callR);
-
-  bo[index(con, lir::Move, R, R)] = CAST2(moveRR);
-  bo[index(con, lir::Move, C, R)] = CAST2(moveCR);
-  bo[index(con, lir::Move, C, M)] = CAST2(moveCM);
-  bo[index(con, lir::Move, M, R)] = CAST2(moveMR);
-  bo[index(con, lir::Move, R, M)] = CAST2(moveRM);
-  bo[index(con, lir::Move, A, R)] = CAST2(moveAR);
-
-  bo[index(con, lir::MoveZ, R, R)] = CAST2(moveZRR);
-  bo[index(con, lir::MoveZ, M, R)] = CAST2(moveZMR);
-  bo[index(con, lir::MoveZ, C, R)] = CAST2(moveCR);
-
-  bo[index(con, lir::Negate, R, R)] = CAST2(negateRR);
-
-  bo[index(con, lir::FloatAbsolute, R, R)] = CAST2(floatAbsoluteRR);
-  bo[index(con, lir::FloatNegate, R, R)] = CAST2(floatNegateRR);
-  bo[index(con, lir::Float2Float, R, R)] = CAST2(float2FloatRR);
-  bo[index(con, lir::Float2Int, R, R)] = CAST2(float2IntRR);
-  bo[index(con, lir::Int2Float, R, R)] = CAST2(int2FloatRR);
-  bo[index(con, lir::FloatSquareRoot, R, R)] = CAST2(floatSqrtRR);
-
-  to[index(con, lir::Add, R)] = CAST3(addR);
-
-  to[index(con, lir::Subtract, R)] = CAST3(subR);
-
-  to[index(con, lir::Multiply, R)] = CAST3(multiplyR);
-
-  to[index(con, lir::FloatAdd, R)] = CAST3(floatAddR);
-  to[index(con, lir::FloatSubtract, R)] = CAST3(floatSubtractR);
-  to[index(con, lir::FloatMultiply, R)] = CAST3(floatMultiplyR);
-  to[index(con, lir::FloatDivide, R)] = CAST3(floatDivideR);
-
-  to[index(con, lir::ShiftLeft, R)] = CAST3(shiftLeftR);
-  to[index(con, lir::ShiftLeft, C)] = CAST3(shiftLeftC);
-
-  to[index(con, lir::ShiftRight, R)] = CAST3(shiftRightR);
-  to[index(con, lir::ShiftRight, C)] = CAST3(shiftRightC);
-
-  to[index(con, lir::UnsignedShiftRight, R)] = CAST3(unsignedShiftRightR);
-  to[index(con, lir::UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC);
-
-  to[index(con, lir::And, R)] = CAST3(andR);
-  to[index(con, lir::And, C)] = CAST3(andC);
-
-  to[index(con, lir::Or, R)] = CAST3(orR);
-
-  to[index(con, lir::Xor, R)] = CAST3(xorR);
-
-  bro[branchIndex(con, R, R)] = CAST_BRANCH(branchRR);
-  bro[branchIndex(con, C, R)] = CAST_BRANCH(branchCR);
-  bro[branchIndex(con, C, M)] = CAST_BRANCH(branchCM);
-  bro[branchIndex(con, R, M)] = CAST_BRANCH(branchRM);
-}
-
 class MyArchitecture: public Assembler::Architecture {
  public:
   MyArchitecture(System* system): con(system), referenceCount(0) {
diff --git a/src/codegen/arm/block.h b/src/codegen/arm/block.h
index 42f3cceaa3..cc634f7f75 100644
--- a/src/codegen/arm/block.h
+++ b/src/codegen/arm/block.h
@@ -13,7 +13,6 @@
 
 #include <avian/vm/codegen/lir.h>
 #include <avian/vm/codegen/assembler.h>
-#include "alloc-vector.h"
 
 namespace avian {
 namespace codegen {
diff --git a/src/codegen/arm/context.h b/src/codegen/arm/context.h
index ccba7e403d..a5388527ed 100644
--- a/src/codegen/arm/context.h
+++ b/src/codegen/arm/context.h
@@ -53,15 +53,6 @@ class Context {
   unsigned constantPoolCount;
 };
 
-class Task {
- public:
-  Task(Task* next): next(next) { }
-
-  virtual void run(Context* con) = 0;
-
-  Task* next;
-};
-
 typedef void (*OperationType)(Context*);
 
 typedef void (*UnaryOperationType)(Context*, unsigned, lir::Operand*);
diff --git a/src/codegen/arm/encode.h b/src/codegen/arm/encode.h
new file mode 100644
index 0000000000..d6d3e983b9
--- /dev/null
+++ b/src/codegen/arm/encode.h
@@ -0,0 +1,184 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_ENCODE_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_ENCODE_H
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+namespace isa {
+
+// SYSTEM REGISTERS
+const int FPSID = 0x0;
+const int FPSCR = 0x1;
+const int FPEXC = 0x8;
+// INSTRUCTION OPTIONS
+enum CONDITION { EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV };
+enum SHIFTOP { LSL, LSR, ASR, ROR };
+// INSTRUCTION FORMATS
+inline int DATA(int cond, int opcode, int S, int Rn, int Rd, int shift, int Sh, int Rm)
+{ return cond<<28 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | shift<<7 | Sh<<5 | Rm; }
+inline int DATAS(int cond, int opcode, int S, int Rn, int Rd, int Rs, int Sh, int Rm)
+{ return cond<<28 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | Rs<<8 | Sh<<5 | 1<<4 | Rm; }
+inline int DATAI(int cond, int opcode, int S, int Rn, int Rd, int rot, int imm)
+{ return cond<<28 | 1<<25 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | rot<<8 | (imm&0xff); }
+inline int BRANCH(int cond, int L, int offset)
+{ return cond<<28 | 5<<25 | L<<24 | (offset&0xffffff); }
+inline int BRANCHX(int cond, int L, int Rm)
+{ return cond<<28 | 0x4bffc<<6 | L<<5 | 1<<4 | Rm; }
+inline int MULTIPLY(int cond, int mul, int S, int Rd, int Rn, int Rs, int Rm)
+{ return cond<<28 | mul<<21 | S<<20 | Rd<<16 | Rn<<12 | Rs<<8 | 9<<4 | Rm; }
+inline int XFER(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int shift, int Sh, int Rm)
+{ return cond<<28 | 3<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | shift<<7 | Sh<<5 | Rm; }
+inline int XFERI(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int offset)
+{ return cond<<28 | 2<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | (offset&0xfff); }
+inline int XFER2(int cond, int P, int U, int W, int L, int Rn, int Rd, int S, int H, int Rm)
+{ return cond<<28 | P<<24 | U<<23 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | 1<<7 | S<<6 | H<<5 | 1<<4 | Rm; }
+inline int XFER2I(int cond, int P, int U, int W, int L, int Rn, int Rd, int offsetH, int S, int H, int offsetL)
+{ return cond<<28 | P<<24 | U<<23 | 1<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | offsetH<<8 | 1<<7 | S<<6 | H<<5 | 1<<4 | (offsetL&0xf); }
+inline int COOP(int cond, int opcode_1, int CRn, int CRd, int cp_num, int opcode_2, int CRm)
+{ return cond<<28 | 0xe<<24 | opcode_1<<20 | CRn<<16 | CRd<<12 | cp_num<<8 | opcode_2<<5 | CRm; }
+inline int COXFER(int cond, int P, int U, int N, int W, int L, int Rn, int CRd, int cp_num, int offset) // offset is in words, not bytes
+{ return cond<<28 | 0x6<<25 | P<<24 | U<<23 | N<<22 | W<<21 | L<<20 | Rn<<16 | CRd<<12 | cp_num<<8 | (offset&0xff)>>2; }
+inline int COREG(int cond, int opcode_1, int L, int CRn, int Rd, int cp_num, int opcode_2, int CRm)
+{ return cond<<28 | 0xe<<24 | opcode_1<<21 | L<<20 | CRn<<16 | Rd<<12 | cp_num<<8 | opcode_2<<5 | 1<<4 | CRm; }
+inline int COREG2(int cond, int L, int Rn, int Rd, int cp_num, int opcode, int CRm)
+{ return cond<<28 | 0xc4<<20 | L<<20 | Rn<<16 | Rd<<12 | cp_num<<8 | opcode<<4 | CRm;}
+// FIELD CALCULATORS
+inline int calcU(int imm) { return imm >= 0 ? 1 : 0; }
+// INSTRUCTIONS
+// The "cond" and "S" fields are set using the SETCOND() and SETS() functions
+inline int b(int offset) { return BRANCH(AL, 0, offset); }
+inline int bl(int offset) { return BRANCH(AL, 1, offset); }
+inline int bx(int Rm) { return BRANCHX(AL, 0, Rm); }
+inline int blx(int Rm) { return BRANCHX(AL, 1, Rm); }
+inline int and_(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x0, 0, Rn, Rd, shift, Sh, Rm); }
+inline int eor(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x1, 0, Rn, Rd, shift, Sh, Rm); }
+inline int rsb(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x3, 0, Rn, Rd, shift, Sh, Rm); }
+inline int add(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x4, 0, Rn, Rd, shift, Sh, Rm); }
+inline int adc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x5, 0, Rn, Rd, shift, Sh, Rm); }
+inline int rsc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x7, 0, Rn, Rd, shift, Sh, Rm); }
+inline int cmp(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xa, 1, Rn, 0, shift, Sh, Rm); }
+inline int orr(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xc, 0, Rn, Rd, shift, Sh, Rm); }
+inline int mov(int Rd, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xd, 0, 0, Rd, shift, Sh, Rm); }
+inline int mvn(int Rd, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xf, 0, 0, Rd, shift, Sh, Rm); }
+inline int andi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x0, 0, Rn, Rd, rot, imm); }
+inline int subi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x2, 0, Rn, Rd, rot, imm); }
+inline int rsbi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x3, 0, Rn, Rd, rot, imm); }
+inline int addi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x4, 0, Rn, Rd, rot, imm); }
+inline int adci(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x5, 0, Rn, Rd, rot, imm); }
+inline int bici(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0xe, 0, Rn, Rd, rot, imm); }
+inline int cmpi(int Rn, int imm, int rot=0) { return DATAI(AL, 0xa, 1, Rn, 0, rot, imm); }
+inline int movi(int Rd, int imm, int rot=0) { return DATAI(AL, 0xd, 0, 0, Rd, rot, imm); }
+inline int orrsh(int Rd, int Rn, int Rm, int Rs, int Sh) { return DATAS(AL, 0xc, 0, Rn, Rd, Rs, Sh, Rm); }
+inline int movsh(int Rd, int Rm, int Rs, int Sh) { return DATAS(AL, 0xd, 0, 0, Rd, Rs, Sh, Rm); }
+inline int mul(int Rd, int Rm, int Rs) { return MULTIPLY(AL, 0, 0, Rd, 0, Rs, Rm); }
+inline int mla(int Rd, int Rm, int Rs, int Rn) { return MULTIPLY(AL, 1, 0, Rd, Rn, Rs, Rm); }
+inline int umull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 4, 0, RdHi, RdLo, Rs, Rm); }
+inline int ldr(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 1, Rn, Rd, 0, 0, Rm); }
+inline int ldri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 1, Rn, Rd, abs(imm)); }
+inline int ldrb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 1, Rn, Rd, 0, 0, Rm); }
+inline int ldrbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 1, Rn, Rd, abs(imm)); }
+inline int str(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 0, Rn, Rd, 0, 0, Rm); }
+inline int stri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 0, Rn, Rd, abs(imm)); }
+inline int strb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 0, Rn, Rd, 0, 0, Rm); }
+inline int strbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 0, Rn, Rd, abs(imm)); }
+inline int ldrh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 0, 1, Rm); }
+inline int ldrhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); }
+inline int strh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 0, Rn, Rd, 0, 1, Rm); }
+inline int strhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 0, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); }
+inline int ldrsh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 1, Rm); }
+inline int ldrshi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 1, abs(imm)&0xf); }
+inline int ldrsb(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 0, Rm); }
+inline int ldrsbi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 0, abs(imm)&0xf); }
+// breakpoint instruction, this really has its own instruction format
+inline int bkpt(int16_t immed) { return 0xe1200070 | (((unsigned)immed & 0xffff) >> 4 << 8) | (immed & 0xf); }
+// COPROCESSOR INSTRUCTIONS
+inline int mcr(int coproc, int opcode_1, int Rd, int CRn, int CRm, int opcode_2=0) { return COREG(AL, opcode_1, 0, CRn, Rd, coproc, opcode_2, CRm); }
+inline int mcrr(int coproc, int opcode, int Rd, int Rn, int CRm) { return COREG2(AL, 0, Rn, Rd, coproc, opcode, CRm); }
+inline int mrc(int coproc, int opcode_1, int Rd, int CRn, int CRm, int opcode_2=0) { return COREG(AL, opcode_1, 1, CRn, Rd, coproc, opcode_2, CRm); }
+inline int mrrc(int coproc, int opcode, int Rd, int Rn, int CRm) { return COREG2(AL, 1, Rn, Rd, coproc, opcode, CRm); }
+// VFP FLOATING-POINT INSTRUCTIONS
+inline int fmuls(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|2, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
+inline int fadds(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|3, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
+inline int fsubs(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|3, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1)|2, Sm>>1); }
+inline int fdivs(int Sd, int Sn, int Sm) { return COOP(AL, (Sd&1)<<2|8, Sn>>1, Sd>>1, 10, (Sn&1)<<2|(Sm&1), Sm>>1); }
+inline int fmuld(int Dd, int Dn, int Dm) { return COOP(AL, 2, Dn, Dd, 11, 0, Dm); }
+inline int faddd(int Dd, int Dn, int Dm) { return COOP(AL, 3, Dn, Dd, 11, 0, Dm); }
+inline int fsubd(int Dd, int Dn, int Dm) { return COOP(AL, 3, Dn, Dd, 11, 2, Dm); }
+inline int fdivd(int Dd, int Dn, int Dm) { return COOP(AL, 8, Dn, Dd, 11, 0, Dm); }
+inline int fcpys(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
+inline int fabss(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
+inline int fnegs(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 1, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
+inline int fsqrts(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 1, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
+inline int fcmps(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 4, Sd>>1, 10, 2|(Sm&1), Sm>>1); }
+inline int fcvtds(int Dd, int Sm) { return COOP(AL, 0xb, 7, Dd, 10, 6|(Sm&1), Sm>>1); }
+inline int fsitos(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 8, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
+inline int ftosizs(int Sd, int Sm) { return COOP(AL, 0xb|(Sd&1)<<2, 0xd, Sd>>1, 10, 6|(Sm&1), Sm>>1); }
+inline int fcpyd(int Dd, int Dm) { return COOP(AL, 0xb, 0, Dd, 11, 2, Dm); }
+inline int fabsd(int Dd, int Dm) { return COOP(AL, 0xb, 0, Dd, 11, 6, Dm); }
+inline int fnegd(int Dd, int Dm) { return COOP(AL, 0xb, 1, Dd, 11, 2, Dm); }
+inline int fsqrtd(int Dd, int Dm) { return COOP(AL, 0xb, 1, Dd, 11, 6, Dm); }
+// double-precision comparison instructions
+inline int fcmpd(int Dd, int Dm) { return COOP(AL, 0xb, 4, Dd, 11, 2, Dm); }
+// double-precision conversion instructions
+inline int fcvtsd(int Sd, int Dm) { return COOP(AL, 0xb|(Sd&1)<<2, 7, Sd>>1, 11, 6, Dm); }
+inline int fsitod(int Dd, int Sm) { return COOP(AL, 0xb, 8, Dd, 11, 6|(Sm&1), Sm>>1); }
+inline int ftosizd(int Sd, int Dm) { return COOP(AL, 0xb|(Sd&1)<<2, 0xd, Sd>>1, 11, 6, Dm); }
+// single load/store instructions for both precision types
+inline int flds(int Sd, int Rn, int offset=0) { return COXFER(AL, 1, 1, Sd&1, 0, 1, Rn, Sd>>1, 10, offset); };
+inline int fldd(int Dd, int Rn, int offset=0) { return COXFER(AL, 1, 1, 0, 0, 1, Rn, Dd, 11, offset); };
+inline int fsts(int Sd, int Rn, int offset=0) { return COXFER(AL, 1, 1, Sd&1, 0, 0, Rn, Sd>>1, 10, offset); };
+inline int fstd(int Dd, int Rn, int offset=0) { return COXFER(AL, 1, 1, 0, 0, 0, Rn, Dd, 11, offset); };
+// move between GPRs and FPRs
+inline int fmsr(int Sn, int Rd) { return mcr(10, 0, Rd, Sn>>1, 0, (Sn&1)<<2); }
+inline int fmrs(int Rd, int Sn) { return mrc(10, 0, Rd, Sn>>1, 0, (Sn&1)<<2); }
+// move to/from VFP system registers
+inline int fmrx(int Rd, int reg) { return mrc(10, 7, Rd, reg, 0); }
+// these move around pairs of single-precision registers
+inline int fmdrr(int Dm, int Rd, int Rn) { return mcrr(11, 1, Rd, Rn, Dm); }
+inline int fmrrd(int Rd, int Rn, int Dm) { return mrrc(11, 1, Rd, Rn, Dm); }
+// FLAG SETTERS
+inline int SETCOND(int ins, int cond) { return ((ins&0x0fffffff) | (cond<<28)); }
+inline int SETS(int ins) { return ins | 1<<20; }
+// PSEUDO-INSTRUCTIONS
+inline int lsl(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, LSL); }
+inline int lsli(int Rd, int Rm, int imm) { return mov(Rd, Rm, LSL, imm); }
+inline int lsr(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, LSR); }
+inline int lsri(int Rd, int Rm, int imm) { return mov(Rd, Rm, LSR, imm); }
+inline int asr(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, ASR); }
+inline int asri(int Rd, int Rm, int imm) { return mov(Rd, Rm, ASR, imm); }
+inline int beq(int offset) { return SETCOND(b(offset), EQ); }
+inline int bne(int offset) { return SETCOND(b(offset), NE); }
+inline int bls(int offset) { return SETCOND(b(offset), LS); }
+inline int bhi(int offset) { return SETCOND(b(offset), HI); }
+inline int blt(int offset) { return SETCOND(b(offset), LT); }
+inline int bgt(int offset) { return SETCOND(b(offset), GT); }
+inline int ble(int offset) { return SETCOND(b(offset), LE); }
+inline int bge(int offset) { return SETCOND(b(offset), GE); }
+inline int blo(int offset) { return SETCOND(b(offset), CC); }
+inline int bhs(int offset) { return SETCOND(b(offset), CS); }
+inline int bpl(int offset) { return SETCOND(b(offset), PL); }
+inline int fmstat() { return fmrx(15, FPSCR); }
+
+} // namespace isa
+
+inline void emit(Context* con, int code) { con->code.append4(code); }
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_ENCODE_H
diff --git a/src/codegen/arm/fixup.cpp b/src/codegen/arm/fixup.cpp
new file mode 100644
index 0000000000..2cf0b01216
--- /dev/null
+++ b/src/codegen/arm/fixup.cpp
@@ -0,0 +1,175 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "fixup.h"
+#include "block.h"
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+unsigned padding(MyBlock*, unsigned);
+
+OffsetPromise::OffsetPromise(Context* con, MyBlock* block, unsigned offset, bool forTrace):
+  con(con), block(block), offset(offset), forTrace(forTrace)
+{ }
+
+bool OffsetPromise::resolved() {
+  return block->start != static_cast<unsigned>(~0);
+}
+
+int64_t OffsetPromise::value() {
+  assert(con, resolved());
+
+  unsigned o = offset - block->offset;
+  return block->start + padding
+    (block, forTrace ? o - vm::TargetBytesPerWord : o) + o;
+}
+
+
+Promise* offsetPromise(Context* con, bool forTrace) {
+  return new(con->zone) OffsetPromise(con, con->lastBlock, con->code.length(), forTrace);
+}
+
+
+OffsetListener::OffsetListener(vm::System* s, uint8_t* instruction):
+  s(s),
+  instruction(instruction)
+{ }
+
+bool OffsetListener::resolve(int64_t value, void** location) {
+  void* p = updateOffset(s, instruction, value);
+  if (location) *location = p;
+  return false;
+}
+
+
+OffsetTask::OffsetTask(Task* next, Promise* promise, Promise* instructionOffset):
+  Task(next),
+  promise(promise),
+  instructionOffset(instructionOffset)
+{ }
+
+void OffsetTask::run(Context* con) {
+  if (promise->resolved()) {
+    updateOffset
+      (con->s, con->result + instructionOffset->value(), promise->value());
+  } else {
+    new (promise->listen(sizeof(OffsetListener)))
+      OffsetListener(con->s, con->result + instructionOffset->value());
+  }
+}
+
+void appendOffsetTask(Context* con, Promise* promise, Promise* instructionOffset) {
+  con->tasks = new(con->zone) OffsetTask(con->tasks, promise, instructionOffset);
+}
+
+bool bounded(int right, int left, int32_t v) {
+  return ((v << left) >> left) == v and ((v >> right) << right) == v;
+}
+
+void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) {
+  // ARM's PC is two words ahead, and branches drop the bottom 2 bits.
+  int32_t v = (reinterpret_cast<uint8_t*>(value) - (instruction + 8)) >> 2;
+
+  int32_t mask;
+  expect(s, bounded(0, 8, v));
+  mask = 0xFFFFFF;
+
+  int32_t* p = reinterpret_cast<int32_t*>(instruction);
+  *p = (v & mask) | ((~mask) & *p);
+
+  return instruction + 4;
+}
+
+ConstantPoolEntry::ConstantPoolEntry(Context* con, Promise* constant, ConstantPoolEntry* next,
+                  Promise* callOffset):
+  con(con), constant(constant), next(next), callOffset(callOffset),
+  address(0)
+{ }
+
+int64_t ConstantPoolEntry::value() {
+  assert(con, resolved());
+
+  return reinterpret_cast<int64_t>(address);
+}
+
+bool ConstantPoolEntry::resolved() {
+  return address != 0;
+}
+
+ConstantPoolListener::ConstantPoolListener(vm::System* s, vm::target_uintptr_t* address,
+                     uint8_t* returnAddress):
+  s(s),
+  address(address),
+  returnAddress(returnAddress)
+{ }
+
+bool ConstantPoolListener::resolve(int64_t value, void** location) {
+  *address = value;
+  if (location) {
+    *location = returnAddress ? static_cast<void*>(returnAddress) : address;
+  }
+  return true;
+}
+
+PoolOffset::PoolOffset(MyBlock* block, ConstantPoolEntry* entry, unsigned offset):
+  block(block), entry(entry), next(0), offset(offset)
+{ }
+
+PoolEvent::PoolEvent(PoolOffset* poolOffsetHead, PoolOffset* poolOffsetTail,
+          unsigned offset):
+  poolOffsetHead(poolOffsetHead), poolOffsetTail(poolOffsetTail), next(0),
+  offset(offset)
+{ }
+
+void appendConstantPoolEntry(Context* con, Promise* constant, Promise* callOffset) {
+  if (constant->resolved()) {
+    // make a copy, since the original might be allocated on the
+    // stack, and we need our copy to live until assembly is complete
+    constant = new(con->zone) ResolvedPromise(constant->value());
+  }
+
+  con->constantPool = new(con->zone) ConstantPoolEntry(con, constant, con->constantPool, callOffset);
+
+  ++ con->constantPoolCount;
+
+  PoolOffset* o = new(con->zone) PoolOffset(con->lastBlock, con->constantPool, con->code.length() - con->lastBlock->offset);
+
+  if (DebugPool) {
+    fprintf(stderr, "add pool offset %p %d to block %p\n",
+            o, o->offset, con->lastBlock);
+  }
+
+  if (con->lastBlock->poolOffsetTail) {
+    con->lastBlock->poolOffsetTail->next = o;
+  } else {
+    con->lastBlock->poolOffsetHead = o;
+  }
+  con->lastBlock->poolOffsetTail = o;
+}
+
+void appendPoolEvent(Context* con, MyBlock* b, unsigned offset, PoolOffset* head,
+                PoolOffset* tail)
+{
+  PoolEvent* e = new(con->zone) PoolEvent(head, tail, offset);
+
+  if (b->poolEventTail) {
+    b->poolEventTail->next = e;
+  } else {
+    b->poolEventHead = e;
+  }
+  b->poolEventTail = e;
+}
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/arm/fixup.h b/src/codegen/arm/fixup.h
new file mode 100644
index 0000000000..77abf003bf
--- /dev/null
+++ b/src/codegen/arm/fixup.h
@@ -0,0 +1,140 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_PROMISE_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_PROMISE_H
+
+#include "target.h"
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+#include "alloc-vector.h"
+
+namespace vm {
+class System;
+}
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+const bool DebugPool = false;
+
+const int32_t PoolOffsetMask = 0xFFF;
+
+class Task {
+ public:
+  Task(Task* next): next(next) { }
+
+  virtual void run(Context* con) = 0;
+
+  Task* next;
+};
+
+class OffsetPromise: public Promise {
+ public:
+  OffsetPromise(Context* con, MyBlock* block, unsigned offset, bool forTrace);
+
+  virtual bool resolved();
+  
+  virtual int64_t value();
+
+  Context* con;
+  MyBlock* block;
+  unsigned offset;
+  bool forTrace;
+};
+
+Promise* offsetPromise(Context* con, bool forTrace = false);
+
+class OffsetListener: public Promise::Listener {
+ public:
+  OffsetListener(vm::System* s, uint8_t* instruction);
+
+  virtual bool resolve(int64_t value, void** location);
+
+  vm::System* s;
+  uint8_t* instruction;
+};
+
+class OffsetTask: public Task {
+ public:
+  OffsetTask(Task* next, Promise* promise, Promise* instructionOffset);
+
+  virtual void run(Context* con);
+
+  Promise* promise;
+  Promise* instructionOffset;
+};
+
+void appendOffsetTask(Context* con, Promise* promise, Promise* instructionOffset);
+
+void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value);
+
+class ConstantPoolEntry: public Promise {
+ public:
+  ConstantPoolEntry(Context* con, Promise* constant, ConstantPoolEntry* next,
+                    Promise* callOffset);
+
+  virtual int64_t value();
+
+  virtual bool resolved();
+
+  Context* con;
+  Promise* constant;
+  ConstantPoolEntry* next;
+  Promise* callOffset;
+  void* address;
+  unsigned constantPoolCount;
+};
+
+class ConstantPoolListener: public Promise::Listener {
+ public:
+  ConstantPoolListener(vm::System* s, vm::target_uintptr_t* address,
+                       uint8_t* returnAddress);
+
+  virtual bool resolve(int64_t value, void** location);
+
+  vm::System* s;
+  vm::target_uintptr_t* address;
+  uint8_t* returnAddress;
+};
+
+class PoolOffset {
+ public:
+  PoolOffset(MyBlock* block, ConstantPoolEntry* entry, unsigned offset);
+
+  MyBlock* block;
+  ConstantPoolEntry* entry;
+  PoolOffset* next;
+  unsigned offset;
+};
+
+class PoolEvent {
+ public:
+  PoolEvent(PoolOffset* poolOffsetHead, PoolOffset* poolOffsetTail,
+            unsigned offset);
+
+  PoolOffset* poolOffsetHead;
+  PoolOffset* poolOffsetTail;
+  PoolEvent* next;
+  unsigned offset;
+};
+
+void appendConstantPoolEntry(Context* con, Promise* constant, Promise* callOffset);
+
+void appendPoolEvent(Context* con, MyBlock* b, unsigned offset, PoolOffset* head,
+                PoolOffset* tail);
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_PROMISE_H
diff --git a/src/codegen/arm/multimethod.cpp b/src/codegen/arm/multimethod.cpp
new file mode 100644
index 0000000000..76c681a60f
--- /dev/null
+++ b/src/codegen/arm/multimethod.cpp
@@ -0,0 +1,142 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "multimethod.h"
+#include "operations.h"
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand)
+{
+  return operation + (lir::UnaryOperationCount * operand);
+}
+
+unsigned index(ArchitectureContext*,
+      lir::BinaryOperation operation,
+      lir::OperandType operand1,
+      lir::OperandType operand2)
+{
+  return operation
+    + (lir::BinaryOperationCount * operand1)
+    + (lir::BinaryOperationCount * lir::OperandTypeCount * operand2);
+}
+
+unsigned index(ArchitectureContext* con UNUSED,
+      lir::TernaryOperation operation,
+      lir::OperandType operand1)
+{
+  assert(con, not isBranch(operation));
+
+  return operation + (lir::NonBranchTernaryOperationCount * operand1);
+}
+
+unsigned branchIndex(ArchitectureContext* con UNUSED, lir::OperandType operand1,
+            lir::OperandType operand2)
+{
+  return operand1 + (lir::OperandTypeCount * operand2);
+}
+
+void populateTables(ArchitectureContext* con) {
+  const lir::OperandType C = lir::ConstantOperand;
+  const lir::OperandType A = lir::AddressOperand;
+  const lir::OperandType R = lir::RegisterOperand;
+  const lir::OperandType M = lir::MemoryOperand;
+
+  OperationType* zo = con->operations;
+  UnaryOperationType* uo = con->unaryOperations;
+  BinaryOperationType* bo = con->binaryOperations;
+  TernaryOperationType* to = con->ternaryOperations;
+  BranchOperationType* bro = con->branchOperations;
+
+  zo[lir::Return] = return_;
+  zo[lir::LoadBarrier] = memoryBarrier;
+  zo[lir::StoreStoreBarrier] = memoryBarrier;
+  zo[lir::StoreLoadBarrier] = memoryBarrier;
+  zo[lir::Trap] = trap;
+
+  uo[index(con, lir::LongCall, C)] = CAST1(longCallC);
+
+  uo[index(con, lir::AlignedLongCall, C)] = CAST1(longCallC);
+
+  uo[index(con, lir::LongJump, C)] = CAST1(longJumpC);
+
+  uo[index(con, lir::AlignedLongJump, C)] = CAST1(longJumpC);
+
+  uo[index(con, lir::Jump, R)] = CAST1(jumpR);
+  uo[index(con, lir::Jump, C)] = CAST1(jumpC);
+
+  uo[index(con, lir::AlignedJump, R)] = CAST1(jumpR);
+  uo[index(con, lir::AlignedJump, C)] = CAST1(jumpC);
+
+  uo[index(con, lir::Call, C)] = CAST1(callC);
+  uo[index(con, lir::Call, R)] = CAST1(callR);
+
+  uo[index(con, lir::AlignedCall, C)] = CAST1(callC);
+  uo[index(con, lir::AlignedCall, R)] = CAST1(callR);
+
+  bo[index(con, lir::Move, R, R)] = CAST2(moveRR);
+  bo[index(con, lir::Move, C, R)] = CAST2(moveCR);
+  bo[index(con, lir::Move, C, M)] = CAST2(moveCM);
+  bo[index(con, lir::Move, M, R)] = CAST2(moveMR);
+  bo[index(con, lir::Move, R, M)] = CAST2(moveRM);
+  bo[index(con, lir::Move, A, R)] = CAST2(moveAR);
+
+  bo[index(con, lir::MoveZ, R, R)] = CAST2(moveZRR);
+  bo[index(con, lir::MoveZ, M, R)] = CAST2(moveZMR);
+  bo[index(con, lir::MoveZ, C, R)] = CAST2(moveCR);
+
+  bo[index(con, lir::Negate, R, R)] = CAST2(negateRR);
+
+  bo[index(con, lir::FloatAbsolute, R, R)] = CAST2(floatAbsoluteRR);
+  bo[index(con, lir::FloatNegate, R, R)] = CAST2(floatNegateRR);
+  bo[index(con, lir::Float2Float, R, R)] = CAST2(float2FloatRR);
+  bo[index(con, lir::Float2Int, R, R)] = CAST2(float2IntRR);
+  bo[index(con, lir::Int2Float, R, R)] = CAST2(int2FloatRR);
+  bo[index(con, lir::FloatSquareRoot, R, R)] = CAST2(floatSqrtRR);
+
+  to[index(con, lir::Add, R)] = CAST3(addR);
+
+  to[index(con, lir::Subtract, R)] = CAST3(subR);
+
+  to[index(con, lir::Multiply, R)] = CAST3(multiplyR);
+
+  to[index(con, lir::FloatAdd, R)] = CAST3(floatAddR);
+  to[index(con, lir::FloatSubtract, R)] = CAST3(floatSubtractR);
+  to[index(con, lir::FloatMultiply, R)] = CAST3(floatMultiplyR);
+  to[index(con, lir::FloatDivide, R)] = CAST3(floatDivideR);
+
+  to[index(con, lir::ShiftLeft, R)] = CAST3(shiftLeftR);
+  to[index(con, lir::ShiftLeft, C)] = CAST3(shiftLeftC);
+
+  to[index(con, lir::ShiftRight, R)] = CAST3(shiftRightR);
+  to[index(con, lir::ShiftRight, C)] = CAST3(shiftRightC);
+
+  to[index(con, lir::UnsignedShiftRight, R)] = CAST3(unsignedShiftRightR);
+  to[index(con, lir::UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC);
+
+  to[index(con, lir::And, R)] = CAST3(andR);
+  to[index(con, lir::And, C)] = CAST3(andC);
+
+  to[index(con, lir::Or, R)] = CAST3(orR);
+
+  to[index(con, lir::Xor, R)] = CAST3(xorR);
+
+  bro[branchIndex(con, R, R)] = CAST_BRANCH(branchRR);
+  bro[branchIndex(con, C, R)] = CAST_BRANCH(branchCR);
+  bro[branchIndex(con, C, M)] = CAST_BRANCH(branchCM);
+  bro[branchIndex(con, R, M)] = CAST_BRANCH(branchRM);
+}
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/arm/multimethod.h b/src/codegen/arm/multimethod.h
new file mode 100644
index 0000000000..7c574b588c
--- /dev/null
+++ b/src/codegen/arm/multimethod.h
@@ -0,0 +1,46 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_MULTIMETHOD_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_MULTIMETHOD_H
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+
+#define CAST1(x) reinterpret_cast<UnaryOperationType>(x)
+#define CAST2(x) reinterpret_cast<BinaryOperationType>(x)
+#define CAST3(x) reinterpret_cast<TernaryOperationType>(x)
+#define CAST_BRANCH(x) reinterpret_cast<BranchOperationType>(x)
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+unsigned index(ArchitectureContext*, lir::UnaryOperation operation, lir::OperandType operand);
+
+unsigned index(ArchitectureContext*,
+      lir::BinaryOperation operation,
+      lir::OperandType operand1,
+      lir::OperandType operand2);
+
+unsigned index(ArchitectureContext* con UNUSED,
+      lir::TernaryOperation operation,
+      lir::OperandType operand1);
+
+unsigned branchIndex(ArchitectureContext* con UNUSED, lir::OperandType operand1,
+            lir::OperandType operand2);
+
+void populateTables(ArchitectureContext* con);
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_MULTIMETHOD_H
diff --git a/src/codegen/arm/operations.cpp b/src/codegen/arm/operations.cpp
new file mode 100644
index 0000000000..b896a88f00
--- /dev/null
+++ b/src/codegen/arm/operations.cpp
@@ -0,0 +1,1235 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#include "context.h"
+#include "operations.h"
+#include "encode.h"
+#include "block.h"
+#include "fixup.h"
+#include "multimethod.h"
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+using namespace isa;
+using namespace avian::util;
+
+inline bool isOfWidth(int64_t i, int size) { return static_cast<uint64_t>(i) >> size == 0; }
+
+inline unsigned lo8(int64_t i) { return (unsigned)(i&MASK_LO8); }
+
+void andC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void shiftLeftR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
+{
+  if (size == 8) {
+    int tmp1 = newTemp(con), tmp2 = newTemp(con), tmp3 = newTemp(con);
+    ResolvedPromise maskPromise(0x3F);
+    lir::Constant mask(&maskPromise);
+    lir::Register dst(tmp3);
+    andC(con, 4, &mask, a, &dst);
+    emit(con, lsl(tmp1, b->high, tmp3));
+    emit(con, rsbi(tmp2, tmp3, 32));
+    emit(con, orrsh(tmp1, tmp1, b->low, tmp2, LSR));
+    emit(con, SETS(subi(t->high, tmp3, 32)));
+    emit(con, SETCOND(mov(t->high, tmp1), MI));
+    emit(con, SETCOND(lsl(t->high, b->low, t->high), PL));
+    emit(con, lsl(t->low, b->low, tmp3));
+    freeTemp(con, tmp1); freeTemp(con, tmp2); freeTemp(con, tmp3);
+  } else {
+    int tmp = newTemp(con);
+    ResolvedPromise maskPromise(0x1F);
+    lir::Constant mask(&maskPromise);
+    lir::Register dst(tmp);
+    andC(con, size, &mask, a, &dst);
+    emit(con, lsl(t->low, b->low, tmp));
+    freeTemp(con, tmp);
+  }
+}
+
+void moveRR(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst);
+
+void shiftLeftC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  if (getValue(a) & 0x1F) {
+    emit(con, lsli(t->low, b->low, getValue(a) & 0x1F));
+  } else {
+    moveRR(con, size, b, size, t);
+  }
+}
+
+void shiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
+{
+  if (size == 8) {
+    int tmp1 = newTemp(con), tmp2 = newTemp(con), tmp3 = newTemp(con);
+    ResolvedPromise maskPromise(0x3F);
+    lir::Constant mask(&maskPromise);
+    lir::Register dst(tmp3);
+    andC(con, 4, &mask, a, &dst);
+    emit(con, lsr(tmp1, b->low, tmp3));
+    emit(con, rsbi(tmp2, tmp3, 32));
+    emit(con, orrsh(tmp1, tmp1, b->high, tmp2, LSL));
+    emit(con, SETS(subi(t->low, tmp3, 32)));
+    emit(con, SETCOND(mov(t->low, tmp1), MI));
+    emit(con, SETCOND(asr(t->low, b->high, t->low), PL));
+    emit(con, asr(t->high, b->high, tmp3));
+    freeTemp(con, tmp1); freeTemp(con, tmp2); freeTemp(con, tmp3);
+  } else {
+    int tmp = newTemp(con);
+    ResolvedPromise maskPromise(0x1F);
+    lir::Constant mask(&maskPromise);
+    lir::Register dst(tmp);
+    andC(con, size, &mask, a, &dst);
+    emit(con, asr(t->low, b->low, tmp));
+    freeTemp(con, tmp);
+  }
+}
+
+void shiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  if (getValue(a) & 0x1F) {
+    emit(con, asri(t->low, b->low, getValue(a) & 0x1F));
+  } else {
+    moveRR(con, size, b, size, t);
+  }
+}
+
+void unsignedShiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t)
+{
+  int tmpShift = newTemp(con);
+  ResolvedPromise maskPromise(size == 8 ? 0x3F : 0x1F);
+  lir::Constant mask(&maskPromise);
+  lir::Register dst(tmpShift);
+  andC(con, 4, &mask, a, &dst);
+  emit(con, lsr(t->low, b->low, tmpShift));
+  if (size == 8) {
+    int tmpHi = newTemp(con), tmpLo = newTemp(con);
+    emit(con, SETS(rsbi(tmpHi, tmpShift, 32)));
+    emit(con, lsl(tmpLo, b->high, tmpHi));
+    emit(con, orr(t->low, t->low, tmpLo));
+    emit(con, addi(tmpHi, tmpShift, -32));
+    emit(con, lsr(tmpLo, b->high, tmpHi));
+    emit(con, orr(t->low, t->low, tmpLo));
+    emit(con, lsr(t->high, b->high, tmpShift));
+    freeTemp(con, tmpHi); freeTemp(con, tmpLo);
+  }
+  freeTemp(con, tmpShift);
+}
+
+void unsignedShiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  if (getValue(a) & 0x1F) {
+    emit(con, lsri(t->low, b->low, getValue(a) & 0x1F));
+  } else {
+    moveRR(con, size, b, size, t);
+  }
+}
+
+bool
+needJump(MyBlock* b)
+{
+  return b->next or b->size != (b->size & PoolOffsetMask);
+}
+
+unsigned
+padding(MyBlock* b, unsigned offset)
+{
+  unsigned total = 0;
+  for (PoolEvent* e = b->poolEventHead; e; e = e->next) {
+    if (e->offset <= offset) {
+      if (needJump(b)) {
+        total += vm::TargetBytesPerWord;
+      }
+      for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) {
+        total += vm::TargetBytesPerWord;
+      }
+    } else {
+      break;
+    }
+  }
+  return total;
+}
+
+void resolve(MyBlock* b)
+{
+  Context* con = b->context;
+
+  if (b->poolOffsetHead) {
+    if (con->poolOffsetTail) {
+      con->poolOffsetTail->next = b->poolOffsetHead;
+    } else {
+      con->poolOffsetHead = b->poolOffsetHead;
+    }
+    con->poolOffsetTail = b->poolOffsetTail;
+  }
+
+  if (con->poolOffsetHead) {
+    bool append;
+    if (b->next == 0 or b->next->poolEventHead) {
+      append = true;
+    } else {
+      int32_t v = (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8)
+        - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
+
+      append = (v != (v & PoolOffsetMask));
+
+      if (DebugPool) {
+        fprintf(stderr,
+                "current %p %d %d next %p %d %d\n",
+                b, b->start, b->size, b->next, b->start + b->size,
+                b->next->size);
+        fprintf(stderr,
+                "offset %p %d is of distance %d to next block; append? %d\n",
+                con->poolOffsetHead, con->poolOffsetHead->offset, v, append);
+      }
+    }
+
+    if (append) {
+#ifndef NDEBUG
+      int32_t v = (b->start + b->size - 8)
+        - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start);
+      
+      expect(con, v == (v & PoolOffsetMask));
+#endif // not NDEBUG
+
+      appendPoolEvent(con, b, b->size, con->poolOffsetHead, con->poolOffsetTail);
+
+      if (DebugPool) {
+        for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) {
+          fprintf(stderr,
+                  "include %p %d in pool event %p at offset %d in block %p\n",
+                  o, o->offset, b->poolEventTail, b->size, b);
+        }
+      }
+
+      con->poolOffsetHead = 0;
+      con->poolOffsetTail = 0;
+    }
+  }
+}
+
+void jumpR(Context* con, unsigned size UNUSED, lir::Register* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  emit(con, bx(target->low));
+}
+
+void swapRR(Context* con, unsigned aSize, lir::Register* a,
+       unsigned bSize, lir::Register* b)
+{
+  assert(con, aSize == vm::TargetBytesPerWord);
+  assert(con, bSize == vm::TargetBytesPerWord);
+
+  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+  moveRR(con, aSize, a, bSize, &tmp);
+  moveRR(con, bSize, b, aSize, a);
+  moveRR(con, bSize, &tmp, bSize, b);
+  con->client->releaseTemporary(tmp.low);
+}
+
+void moveRR(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst)
+{
+  bool srcIsFpr = isFpr(src);
+  bool dstIsFpr = isFpr(dst);
+  if (srcIsFpr || dstIsFpr) {   // FPR(s) involved
+    assert(con, srcSize == dstSize);
+    const bool dprec = srcSize == 8;
+    if (srcIsFpr && dstIsFpr) { // FPR to FPR
+      if (dprec) emit(con, fcpyd(fpr64(dst), fpr64(src))); // double
+      else       emit(con, fcpys(fpr32(dst), fpr32(src))); // single
+    } else if (srcIsFpr) {      // FPR to GPR
+      if (dprec) emit(con, fmrrd(dst->low, dst->high, fpr64(src)));
+      else       emit(con, fmrs(dst->low, fpr32(src)));
+    } else {                    // GPR to FPR
+      if (dprec) emit(con, fmdrr(fpr64(dst->low), src->low, src->high));
+      else       emit(con, fmsr(fpr32(dst), src->low));
+    }
+    return;
+  }
+
+  switch (srcSize) {
+  case 1:
+    emit(con, lsli(dst->low, src->low, 24));
+    emit(con, asri(dst->low, dst->low, 24));
+    break;
+
+  case 2:
+    emit(con, lsli(dst->low, src->low, 16));
+    emit(con, asri(dst->low, dst->low, 16));
+    break;
+
+  case 4:
+  case 8:
+    if (srcSize == 4 and dstSize == 8) {
+      moveRR(con, 4, src, 4, dst);
+      emit(con, asri(dst->high, src->low, 31));
+    } else if (srcSize == 8 and dstSize == 8) {
+      lir::Register srcHigh(src->high);
+      lir::Register dstHigh(dst->high);
+
+      if (src->high == dst->low) {
+        if (src->low == dst->high) {
+          swapRR(con, 4, src, 4, dst);
+        } else {
+          moveRR(con, 4, &srcHigh, 4, &dstHigh);
+          moveRR(con, 4, src, 4, dst);
+        }
+      } else {
+        moveRR(con, 4, src, 4, dst);
+        moveRR(con, 4, &srcHigh, 4, &dstHigh);
+      }
+    } else if (src->low != dst->low) {
+      emit(con, mov(dst->low, src->low));
+    }
+    break;
+
+  default: abort(con);
+  }
+}
+
+void moveZRR(Context* con, unsigned srcSize, lir::Register* src,
+        unsigned, lir::Register* dst)
+{
+  switch (srcSize) {
+  case 2:
+    emit(con, lsli(dst->low, src->low, 16));
+    emit(con, lsri(dst->low, dst->low, 16));
+    break;
+
+  default: abort(con);
+  }
+}
+
+void moveCR(Context* con, unsigned size, lir::Constant* src,
+            unsigned, lir::Register* dst);
+
+void moveCR2(Context* con, unsigned size, lir::Constant* src,
+        lir::Register* dst, Promise* callOffset)
+{
+  if (isFpr(dst)) { // floating-point
+    lir::Register tmp = size > 4 ? makeTemp64(con) :
+                                         makeTemp(con);
+    moveCR(con, size, src, size, &tmp);
+    moveRR(con, size, &tmp, size, dst);
+    freeTemp(con, tmp);
+  } else if (size > 4) { 
+    uint64_t value = (uint64_t)src->value->value();
+    ResolvedPromise loBits(value & MASK_LO32);
+    lir::Constant srcLo(&loBits);
+    ResolvedPromise hiBits(value >> 32); 
+    lir::Constant srcHi(&hiBits);
+    lir::Register dstHi(dst->high);
+    moveCR(con, 4, &srcLo, 4, dst);
+    moveCR(con, 4, &srcHi, 4, &dstHi);
+  } else if (src->value->resolved() and isOfWidth(getValue(src), 8)) {
+    emit(con, movi(dst->low, lo8(getValue(src)))); // fits in immediate
+  } else {
+    appendConstantPoolEntry(con, src->value, callOffset);
+    emit(con, ldri(dst->low, ProgramCounter, 0)); // load 32 bits
+  }
+}
+
+void moveCR(Context* con, unsigned size, lir::Constant* src,
+       unsigned, lir::Register* dst)
+{
+  moveCR2(con, size, src, dst, 0);
+}
+
+void addR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, SETS(add(t->low, a->low, b->low)));
+    emit(con, adc(t->high, a->high, b->high));
+  } else {
+    emit(con, add(t->low, a->low, b->low));
+  }
+}
+
+void subR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, SETS(rsb(t->low, a->low, b->low)));
+    emit(con, rsc(t->high, a->high, b->high));
+  } else {
+    emit(con, rsb(t->low, a->low, b->low));
+  }
+}
+
+void addC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  int32_t v = a->value->value();
+  if (v) {
+    if (v > 0 and v < 256) {
+      emit(con, addi(dst->low, b->low, v));
+    } else if (v > 0 and v < 1024 and v % 4 == 0) {
+      emit(con, addi(dst->low, b->low, v >> 2, 15));
+    } else {
+      // todo
+      abort(con);
+    }
+  } else {
+    moveRR(con, size, b, size, dst);
+  }
+}
+
+void subC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  int32_t v = a->value->value();
+  if (v) {
+    if (v > 0 and v < 256) {
+      emit(con, subi(dst->low, b->low, v));
+    } else if (v > 0 and v < 1024 and v % 4 == 0) {
+      emit(con, subi(dst->low, b->low, v >> 2, 15));
+    } else {
+      // todo
+      abort(con);
+    }
+  } else {
+    moveRR(con, size, b, size, dst);
+  }
+}
+
+void multiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    bool useTemporaries = b->low == t->low;
+    int tmpLow  = useTemporaries ? con->client->acquireTemporary(GPR_MASK) : t->low;
+    int tmpHigh = useTemporaries ? con->client->acquireTemporary(GPR_MASK) : t->high;
+
+    emit(con, umull(tmpLow, tmpHigh, a->low, b->low));
+    emit(con, mla(tmpHigh, a->low, b->high, tmpHigh));
+    emit(con, mla(tmpHigh, a->high, b->low, tmpHigh));
+
+    if (useTemporaries) {
+      emit(con, mov(t->low, tmpLow));
+      emit(con, mov(t->high, tmpHigh));
+      con->client->releaseTemporary(tmpLow);
+      con->client->releaseTemporary(tmpHigh);
+    }
+  } else {
+    emit(con, mul(t->low, a->low, b->low));
+  }
+}
+
+void floatAbsoluteRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  if (size == 8) {
+    emit(con, fabsd(fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fabss(fpr32(b), fpr32(a)));
+  }
+}
+
+void floatNegateRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  if (size == 8) {
+    emit(con, fnegd(fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fnegs(fpr32(b), fpr32(a)));
+  }
+}
+
+void float2FloatRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  if (size == 8) {
+    emit(con, fcvtsd(fpr32(b), fpr64(a)));
+  } else {
+    emit(con, fcvtds(fpr64(b), fpr32(a)));
+  }
+}
+
+void float2IntRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  int tmp = newTemp(con, FPR_MASK);
+  int ftmp = fpr32(tmp);
+  if (size == 8) { // double to int
+    emit(con, ftosizd(ftmp, fpr64(a)));
+  } else {         // float to int
+    emit(con, ftosizs(ftmp, fpr32(a)));
+  }                // else thunked
+  emit(con, fmrs(b->low, ftmp));
+  freeTemp(con, tmp);
+}
+
+void int2FloatRR(Context* con, unsigned, lir::Register* a, unsigned size, lir::Register* b) {
+  emit(con, fmsr(fpr32(b), a->low));
+  if (size == 8) { // int to double
+    emit(con, fsitod(fpr64(b), fpr32(b)));
+  } else {         // int to float
+    emit(con, fsitos(fpr32(b), fpr32(b)));
+  }                // else thunked
+}
+
+void floatSqrtRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b) {
+  if (size == 8) {
+    emit(con, fsqrtd(fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fsqrts(fpr32(b), fpr32(a)));
+  }
+}
+
+void floatAddR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, faddd(fpr64(t), fpr64(a), fpr64(b)));
+  } else {
+    emit(con, fadds(fpr32(t), fpr32(a), fpr32(b)));
+  }
+}
+
+void floatSubtractR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, fsubd(fpr64(t), fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fsubs(fpr32(t), fpr32(b), fpr32(a)));
+  }
+}
+
+void floatMultiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) {
+    emit(con, fmuld(fpr64(t), fpr64(a), fpr64(b)));
+  } else {
+    emit(con, fmuls(fpr32(t), fpr32(a), fpr32(b)));
+  }
+}
+
+void floatDivideR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t) {
+  if (size == 8) { 
+    emit(con, fdivd(fpr64(t), fpr64(b), fpr64(a)));
+  } else {
+    emit(con, fdivs(fpr32(t), fpr32(b), fpr32(a)));
+  }
+}
+
+int normalize(Context* con, int offset, int index, unsigned scale, 
+          bool* preserveIndex, bool* release)
+{
+  if (offset != 0 or scale != 1) {
+    lir::Register normalizedIndex
+      (*preserveIndex ? con->client->acquireTemporary(GPR_MASK) : index);
+    
+    if (*preserveIndex) {
+      *release = true;
+      *preserveIndex = false;
+    } else {
+      *release = false;
+    }
+
+    int scaled;
+
+    if (scale != 1) {
+      lir::Register unscaledIndex(index);
+
+      ResolvedPromise scalePromise(log(scale));
+      lir::Constant scaleConstant(&scalePromise);
+      
+      shiftLeftC(con, vm::TargetBytesPerWord, &scaleConstant,
+                 &unscaledIndex, &normalizedIndex);
+
+      scaled = normalizedIndex.low;
+    } else {
+      scaled = index;
+    }
+
+    if (offset != 0) {
+      lir::Register untranslatedIndex(scaled);
+
+      ResolvedPromise offsetPromise(offset);
+      lir::Constant offsetConstant(&offsetPromise);
+
+      lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+      moveCR(con, vm::TargetBytesPerWord, &offsetConstant, vm::TargetBytesPerWord, &tmp);
+      addR(con, vm::TargetBytesPerWord, &tmp, &untranslatedIndex, &normalizedIndex);
+      con->client->releaseTemporary(tmp.low);
+    }
+
+    return normalizedIndex.low;
+  } else {
+    *release = false;
+    return index;
+  }
+}
+
+void store(Context* con, unsigned size, lir::Register* src,
+      int base, int offset, int index, unsigned scale, bool preserveIndex)
+{
+  if (index != lir::NoRegister) {
+    bool release;
+    int normalized = normalize
+      (con, offset, index, scale, &preserveIndex, &release);
+
+    if (!isFpr(src)) { // GPR store
+      switch (size) {
+      case 1:
+        emit(con, strb(src->low, base, normalized));
+        break;
+
+      case 2:
+        emit(con, strh(src->low, base, normalized));
+        break;
+
+      case 4:
+        emit(con, str(src->low, base, normalized));
+        break;
+
+      case 8: { // split into 2 32-bit stores
+        lir::Register srcHigh(src->high);
+        store(con, 4, &srcHigh, base, 0, normalized, 1, preserveIndex);
+        store(con, 4, src, base, 4, normalized, 1, preserveIndex);
+      } break;
+
+      default: abort(con);
+      }
+    } else { // FPR store
+      lir::Register base_(base),
+                          normalized_(normalized),
+                          absAddr = makeTemp(con);
+      // FPR stores have only bases, so we must add the index
+      addR(con, vm::TargetBytesPerWord, &base_, &normalized_, &absAddr);
+      // double-precision
+      if (size == 8) emit(con, fstd(fpr64(src), absAddr.low));
+      // single-precision
+      else           emit(con, fsts(fpr32(src), absAddr.low));
+      freeTemp(con, absAddr);
+    }
+
+    if (release) con->client->releaseTemporary(normalized);
+  } else if (size == 8
+             or abs(offset) == (abs(offset) & 0xFF)
+             or (size != 2 and abs(offset) == (abs(offset) & 0xFFF)))
+  {
+    if (!isFpr(src)) { // GPR store
+      switch (size) {
+      case 1:
+        emit(con, strbi(src->low, base, offset));
+        break;
+
+      case 2:
+        emit(con, strhi(src->low, base, offset));
+        break;
+
+      case 4:
+        emit(con, stri(src->low, base, offset));
+        break;
+
+      case 8: { // split into 2 32-bit stores
+        lir::Register srcHigh(src->high);
+        store(con, 4, &srcHigh, base, offset, lir::NoRegister, 1, false);
+        store(con, 4, src, base, offset + 4, lir::NoRegister, 1, false);
+      } break;
+
+      default: abort(con);
+      }
+    } else { // FPR store
+      // double-precision
+      if (size == 8) emit(con, fstd(fpr64(src), base, offset));
+      // single-precision
+      else           emit(con, fsts(fpr32(src), base, offset));
+    }
+  } else {
+    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+    ResolvedPromise offsetPromise(offset);
+    lir::Constant offsetConstant(&offsetPromise);
+    moveCR(con, vm::TargetBytesPerWord, &offsetConstant,
+           vm::TargetBytesPerWord, &tmp);
+    
+    store(con, size, src, base, 0, tmp.low, 1, false);
+
+    con->client->releaseTemporary(tmp.low);
+  }
+}
+
+void moveRM(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize UNUSED, lir::Memory* dst)
+{
+  assert(con, srcSize == dstSize);
+
+  store(con, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true);
+}
+
+void load(Context* con, unsigned srcSize, int base, int offset, int index,
+     unsigned scale, unsigned dstSize, lir::Register* dst,
+     bool preserveIndex, bool signExtend)
+{
+  if (index != lir::NoRegister) {
+    bool release;
+    int normalized = normalize
+      (con, offset, index, scale, &preserveIndex, &release);
+
+    if (!isFpr(dst)) { // GPR load
+      switch (srcSize) {
+      case 1:
+        if (signExtend) {
+          emit(con, ldrsb(dst->low, base, normalized));
+        } else {
+          emit(con, ldrb(dst->low, base, normalized));
+        }
+        break;
+
+      case 2:
+        if (signExtend) {
+          emit(con, ldrsh(dst->low, base, normalized));
+        } else {
+          emit(con, ldrh(dst->low, base, normalized));
+        }
+        break;
+
+      case 4:
+      case 8: {
+        if (srcSize == 4 and dstSize == 8) {
+          load(con, 4, base, 0, normalized, 1, 4, dst, preserveIndex,
+               false);
+          moveRR(con, 4, dst, 8, dst);
+        } else if (srcSize == 8 and dstSize == 8) {
+          lir::Register dstHigh(dst->high);
+          load(con, 4, base, 0, normalized, 1, 4, &dstHigh,
+              preserveIndex, false);
+          load(con, 4, base, 4, normalized, 1, 4, dst, preserveIndex,
+               false);
+        } else {
+          emit(con, ldr(dst->low, base, normalized));
+        }
+      } break;
+
+      default: abort(con);
+      }
+    } else { // FPR load
+      lir::Register base_(base),
+                          normalized_(normalized),
+                          absAddr = makeTemp(con);
+      // VFP loads only have bases, so we must add the index
+      addR(con, vm::TargetBytesPerWord, &base_, &normalized_, &absAddr);
+      // double-precision
+      if (srcSize == 8) emit(con, fldd(fpr64(dst), absAddr.low));
+      // single-precision
+      else              emit(con, flds(fpr32(dst), absAddr.low));
+      freeTemp(con, absAddr);
+    }
+
+    if (release) con->client->releaseTemporary(normalized);
+  } else if ((srcSize == 8 and dstSize == 8)
+             or abs(offset) == (abs(offset) & 0xFF)
+             or (srcSize != 2
+                 and (srcSize != 1 or not signExtend)
+                 and abs(offset) == (abs(offset) & 0xFFF)))
+  {
+    if (!isFpr(dst)) { // GPR load
+      switch (srcSize) {
+      case 1:
+        if (signExtend) {
+          emit(con, ldrsbi(dst->low, base, offset));
+        } else {
+          emit(con, ldrbi(dst->low, base, offset));
+        }
+        break;
+
+      case 2:
+        if (signExtend) {
+          emit(con, ldrshi(dst->low, base, offset));
+        } else {
+          emit(con, ldrhi(dst->low, base, offset));
+        }
+        break;
+
+      case 4:
+        emit(con, ldri(dst->low, base, offset));
+        break;
+
+      case 8: {
+        if (dstSize == 8) {
+          lir::Register dstHigh(dst->high);
+          load(con, 4, base, offset, lir::NoRegister, 1, 4, &dstHigh, false,
+               false);
+          load(con, 4, base, offset + 4, lir::NoRegister, 1, 4, dst, false,
+               false);
+        } else {
+          emit(con, ldri(dst->low, base, offset));
+        }
+      } break;
+
+      default: abort(con);
+      }
+    } else { // FPR load
+      // double-precision
+      if (srcSize == 8) emit(con, fldd(fpr64(dst), base, offset));
+      // single-precision
+      else              emit(con, flds(fpr32(dst), base, offset));
+    }
+  } else {
+    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+    ResolvedPromise offsetPromise(offset);
+    lir::Constant offsetConstant(&offsetPromise);
+    moveCR(con, vm::TargetBytesPerWord, &offsetConstant, vm::TargetBytesPerWord,
+           &tmp);
+    
+    load(con, srcSize, base, 0, tmp.low, 1, dstSize, dst, false,
+         signExtend);
+
+    con->client->releaseTemporary(tmp.low);
+  }
+}
+
+void moveMR(Context* con, unsigned srcSize, lir::Memory* src,
+       unsigned dstSize, lir::Register* dst)
+{
+  load(con, srcSize, src->base, src->offset, src->index, src->scale,
+       dstSize, dst, true, true);
+}
+
+void moveZMR(Context* con, unsigned srcSize, lir::Memory* src,
+        unsigned dstSize, lir::Register* dst)
+{
+  load(con, srcSize, src->base, src->offset, src->index, src->scale,
+       dstSize, dst, true, false);
+}
+
+void andR(Context* con, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst)
+{
+  if (size == 8) emit(con, and_(dst->high, a->high, b->high));
+  emit(con, and_(dst->low, a->low, b->low));
+}
+
+void andC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst)
+{
+  int64_t v = a->value->value();
+
+  if (size == 8) {
+    ResolvedPromise high((v >> 32) & 0xFFFFFFFF);
+    lir::Constant ah(&high);
+
+    ResolvedPromise low(v & 0xFFFFFFFF);
+    lir::Constant al(&low);
+
+    lir::Register bh(b->high);
+    lir::Register dh(dst->high);
+
+    andC(con, 4, &al, b, dst);
+    andC(con, 4, &ah, &bh, &dh);
+  } else {
+    uint32_t v32 = static_cast<uint32_t>(v);
+    if (v32 != 0xFFFFFFFF) {
+      if ((v32 & 0xFFFFFF00) == 0xFFFFFF00) {
+        emit(con, bici(dst->low, b->low, (~(v32 & 0xFF)) & 0xFF));
+      } else if ((v32 & 0xFFFFFF00) == 0) {
+        emit(con, andi(dst->low, b->low, v32 & 0xFF));
+      } else {
+        // todo: there are other cases we can handle in one
+        // instruction
+
+        bool useTemporary = b->low == dst->low;
+        lir::Register tmp(dst->low);
+        if (useTemporary) {
+          tmp.low = con->client->acquireTemporary(GPR_MASK);
+        }
+
+        moveCR(con, 4, a, 4, &tmp);
+        andR(con, 4, b, &tmp, dst);
+        
+        if (useTemporary) {
+          con->client->releaseTemporary(tmp.low);
+        }
+      }
+    } else {
+      moveRR(con, size, b, size, dst);
+    }
+  }
+}
+
+void orR(Context* con, unsigned size, lir::Register* a,
+    lir::Register* b, lir::Register* dst)
+{
+  if (size == 8) emit(con, orr(dst->high, a->high, b->high));
+  emit(con, orr(dst->low, a->low, b->low));
+}
+
+void xorR(Context* con, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst)
+{
+  if (size == 8) emit(con, eor(dst->high, a->high, b->high));
+  emit(con, eor(dst->low, a->low, b->low));
+}
+
+void moveAR2(Context* con, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst)
+{
+  assert(con, srcSize == 4 and dstSize == 4);
+
+  lir::Constant constant(src->address);
+  moveCR(con, srcSize, &constant, dstSize, dst);
+
+  lir::Memory memory(dst->low, 0, -1, 0);
+  moveMR(con, dstSize, &memory, dstSize, dst);
+}
+
+void moveAR(Context* con, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst)
+{
+  moveAR2(con, srcSize, src, dstSize, dst);
+}
+
+void compareRR(Context* con, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b)
+{
+  assert(con, !(isFpr(a) ^ isFpr(b))); // regs must be of the same type
+
+  if (!isFpr(a)) { // GPR compare
+    assert(con, aSize == 4 && bSize == 4);
+    /**///assert(con, b->low != a->low);
+    emit(con, cmp(b->low, a->low));
+  } else {         // FPR compare
+    assert(con, aSize == bSize);
+    if (aSize == 8) emit(con, fcmpd(fpr64(b), fpr64(a))); // double
+    else            emit(con, fcmps(fpr32(b), fpr32(a))); // single
+    emit(con, fmstat());
+  }
+}
+
+void compareCR(Context* con, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b)
+{
+  assert(con, aSize == 4 and bSize == 4);
+
+  if (!isFpr(b) && a->value->resolved() &&
+      isOfWidth(a->value->value(), 8)) {
+    emit(con, cmpi(b->low, a->value->value()));
+  } else {
+    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+    moveCR(con, aSize, a, bSize, &tmp);
+    compareRR(con, bSize, &tmp, bSize, b);
+    con->client->releaseTemporary(tmp.low);
+  }
+}
+
+void compareCM(Context* con, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Memory* b)
+{
+  assert(con, aSize == 4 and bSize == 4);
+
+  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+  moveMR(con, bSize, b, bSize, &tmp);
+  compareCR(con, aSize, a, bSize, &tmp);
+  con->client->releaseTemporary(tmp.low);
+}
+
+void compareRM(Context* con, unsigned aSize, lir::Register* a,
+          unsigned bSize, lir::Memory* b)
+{
+  assert(con, aSize == 4 and bSize == 4);
+
+  lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+  moveMR(con, bSize, b, bSize, &tmp);
+  compareRR(con, aSize, a, bSize, &tmp);
+  con->client->releaseTemporary(tmp.low);
+}
+
+int32_t
+branch(Context* con, lir::TernaryOperation op)
+{
+  switch (op) {
+  case lir::JumpIfEqual:
+  case lir::JumpIfFloatEqual:
+    return beq(0);
+
+  case lir::JumpIfNotEqual:
+  case lir::JumpIfFloatNotEqual:
+    return bne(0);
+
+  case lir::JumpIfLess:
+  case lir::JumpIfFloatLess:
+  case lir::JumpIfFloatLessOrUnordered:
+    return blt(0);
+
+  case lir::JumpIfGreater:
+  case lir::JumpIfFloatGreater:
+    return bgt(0);
+
+  case lir::JumpIfLessOrEqual:
+  case lir::JumpIfFloatLessOrEqual:
+  case lir::JumpIfFloatLessOrEqualOrUnordered:
+    return ble(0);
+
+  case lir::JumpIfGreaterOrEqual:
+  case lir::JumpIfFloatGreaterOrEqual:
+    return bge(0);
+
+  case lir::JumpIfFloatGreaterOrUnordered:
+    return bhi(0);
+
+  case lir::JumpIfFloatGreaterOrEqualOrUnordered:
+    return bpl(0);
+ 
+  default:
+    abort(con);
+  }
+}
+
+void conditional(Context* con, int32_t branch, lir::Constant* target)
+{
+  appendOffsetTask(con, target->value, offsetPromise(con));
+  emit(con, branch);
+}
+
+void branch(Context* con, lir::TernaryOperation op, lir::Constant* target)
+{
+  conditional(con, branch(con, op), target);
+}
+
+void branchLong(Context* con, lir::TernaryOperation op, lir::Operand* al,
+           lir::Operand* ah, lir::Operand* bl,
+           lir::Operand* bh, lir::Constant* target,
+           BinaryOperationType compareSigned,
+           BinaryOperationType compareUnsigned)
+{
+  compareSigned(con, 4, ah, 4, bh);
+
+  unsigned next = 0;
+  
+  switch (op) {
+  case lir::JumpIfEqual:
+  case lir::JumpIfFloatEqual:
+    next = con->code.length();
+    emit(con, bne(0));
+
+    compareSigned(con, 4, al, 4, bl);
+    conditional(con, beq(0), target);
+    break;
+
+  case lir::JumpIfNotEqual:
+  case lir::JumpIfFloatNotEqual:
+    conditional(con, bne(0), target);
+
+    compareSigned(con, 4, al, 4, bl);
+    conditional(con, bne(0), target);
+    break;
+
+  case lir::JumpIfLess:
+  case lir::JumpIfFloatLess:
+    conditional(con, blt(0), target);
+
+    next = con->code.length();
+    emit(con, bgt(0));
+
+    compareUnsigned(con, 4, al, 4, bl);
+    conditional(con, blo(0), target);
+    break;
+
+  case lir::JumpIfGreater:
+  case lir::JumpIfFloatGreater:
+    conditional(con, bgt(0), target);
+
+    next = con->code.length();
+    emit(con, blt(0));
+
+    compareUnsigned(con, 4, al, 4, bl);
+    conditional(con, bhi(0), target);
+    break;
+
+  case lir::JumpIfLessOrEqual:
+  case lir::JumpIfFloatLessOrEqual:
+    conditional(con, blt(0), target);
+
+    next = con->code.length();
+    emit(con, bgt(0));
+
+    compareUnsigned(con, 4, al, 4, bl);
+    conditional(con, bls(0), target);
+    break;
+
+  case lir::JumpIfGreaterOrEqual:
+  case lir::JumpIfFloatGreaterOrEqual:
+    conditional(con, bgt(0), target);
+
+    next = con->code.length();
+    emit(con, blt(0));
+
+    compareUnsigned(con, 4, al, 4, bl);
+    conditional(con, bhs(0), target);
+    break;
+
+  default:
+    abort(con);
+  }
+
+  if (next) {
+    updateOffset
+      (con->s, con->code.data + next, reinterpret_cast<intptr_t>
+       (con->code.data + con->code.length()));
+  }
+}
+
+void branchRR(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Register* b,
+         lir::Constant* target)
+{
+  if (!isFpr(a) && size > vm::TargetBytesPerWord) {
+    lir::Register ah(a->high);
+    lir::Register bh(b->high);
+
+    branchLong(con, op, a, &ah, b, &bh, target, CAST2(compareRR),
+               CAST2(compareRR));
+  } else {
+    compareRR(con, size, a, size, b);
+    branch(con, op, target);
+  }
+}
+
+void branchCR(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Register* b,
+         lir::Constant* target)
+{
+  assert(con, !isFloatBranch(op));
+
+  if (size > vm::TargetBytesPerWord) {
+    int64_t v = a->value->value();
+
+    ResolvedPromise low(v & ~static_cast<vm::target_uintptr_t>(0));
+    lir::Constant al(&low);
+
+    ResolvedPromise high((v >> 32) & ~static_cast<vm::target_uintptr_t>(0));
+    lir::Constant ah(&high);
+
+    lir::Register bh(b->high);
+
+    branchLong(con, op, &al, &ah, b, &bh, target, CAST2(compareCR),
+               CAST2(compareCR));
+  } else {
+    compareCR(con, size, a, size, b);
+    branch(con, op, target);
+  }
+}
+
+void branchRM(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Memory* b,
+         lir::Constant* target)
+{
+  assert(con, !isFloatBranch(op));
+  assert(con, size <= vm::TargetBytesPerWord);
+
+  compareRM(con, size, a, size, b);
+  branch(con, op, target);
+}
+
+void branchCM(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Memory* b,
+         lir::Constant* target)
+{
+  assert(con, !isFloatBranch(op));
+  assert(con, size <= vm::TargetBytesPerWord);
+
+  compareCM(con, size, a, size, b);
+  branch(con, op, target);
+}
+
+ShiftMaskPromise*
+shiftMaskPromise(Context* con, Promise* base, unsigned shift, int64_t mask)
+{
+  return new(con->zone) ShiftMaskPromise(base, shift, mask);
+}
+
+void moveCM(Context* con, unsigned srcSize, lir::Constant* src,
+       unsigned dstSize, lir::Memory* dst)
+{
+  switch (dstSize) {
+  case 8: {
+    lir::Constant srcHigh
+      (shiftMaskPromise(con, src->value, 32, 0xFFFFFFFF));
+    lir::Constant srcLow
+      (shiftMaskPromise(con, src->value, 0, 0xFFFFFFFF));
+    
+    lir::Memory dstLow
+      (dst->base, dst->offset + 4, dst->index, dst->scale);
+    
+    moveCM(con, 4, &srcLow, 4, &dstLow);
+    moveCM(con, 4, &srcHigh, 4, dst);
+  } break;
+
+  default:
+    lir::Register tmp(con->client->acquireTemporary(GPR_MASK));
+    moveCR(con, srcSize, src, dstSize, &tmp);
+    moveRM(con, dstSize, &tmp, dstSize, dst);
+    con->client->releaseTemporary(tmp.low);
+  }
+}
+
+void negateRR(Context* con, unsigned srcSize, lir::Register* src,
+         unsigned dstSize UNUSED, lir::Register* dst)
+{
+  assert(con, srcSize == dstSize);
+
+  emit(con, mvn(dst->low, src->low));
+  emit(con, SETS(addi(dst->low, dst->low, 1)));
+  if (srcSize == 8) {
+    emit(con, mvn(dst->high, src->high));
+    emit(con, adci(dst->high, dst->high, 0));
+  }
+}
+
+void callR(Context* con, unsigned size UNUSED, lir::Register* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+  emit(con, blx(target->low));
+}
+
+void callC(Context* con, unsigned size UNUSED, lir::Constant* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  appendOffsetTask(con, target->value, offsetPromise(con));
+  emit(con, bl(0));
+}
+
+void longCallC(Context* con, unsigned size UNUSED, lir::Constant* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  lir::Register tmp(4);
+  moveCR2(con, vm::TargetBytesPerWord, target, &tmp, offsetPromise(con));
+  callR(con, vm::TargetBytesPerWord, &tmp);
+}
+
+void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  lir::Register tmp(4); // a non-arg reg that we don't mind clobbering
+  moveCR2(con, vm::TargetBytesPerWord, target, &tmp, offsetPromise(con));
+  jumpR(con, vm::TargetBytesPerWord, &tmp);
+}
+
+void jumpC(Context* con, unsigned size UNUSED, lir::Constant* target)
+{
+  assert(con, size == vm::TargetBytesPerWord);
+
+  appendOffsetTask(con, target->value, offsetPromise(con));
+  emit(con, b(0));
+}
+
+void return_(Context* con)
+{
+  emit(con, bx(LinkRegister));
+}
+
+void trap(Context* con)
+{
+  emit(con, bkpt(0));
+}
+
+void memoryBarrier(Context*) {}
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
diff --git a/src/codegen/arm/operations.h b/src/codegen/arm/operations.h
new file mode 100644
index 0000000000..2d598b6d9e
--- /dev/null
+++ b/src/codegen/arm/operations.h
@@ -0,0 +1,240 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_OPERATIONS_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_OPERATIONS_H
+
+#include "registers.h"
+
+namespace vm {
+class System;
+}
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+class Context;
+
+// shortcut functions
+
+inline int newTemp(Context* con) {
+  return con->client->acquireTemporary(GPR_MASK);
+}
+
+inline int newTemp(Context* con, unsigned mask) {
+  return con->client->acquireTemporary(mask);
+}
+
+inline void freeTemp(Context* con, int r) {
+  con->client->releaseTemporary(r);
+}
+
+inline int64_t getValue(lir::Constant* con) {
+  return con->value->value();
+}
+
+inline lir::Register makeTemp(Context* con) {
+  lir::Register tmp(newTemp(con));
+  return tmp;
+}
+
+inline lir::Register makeTemp64(Context* con) {
+  lir::Register tmp(newTemp(con), newTemp(con));
+  return tmp;
+}
+
+inline void freeTemp(Context* con, const lir::Register& tmp) {
+  if (tmp.low != lir::NoRegister) freeTemp(con, tmp.low);
+  if (tmp.high != lir::NoRegister) freeTemp(con, tmp.high);
+}
+
+void shiftLeftR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void moveRR(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst);
+
+void shiftLeftC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+void shiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void shiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+void unsignedShiftRightR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void unsignedShiftRightC(Context* con, unsigned size UNUSED, lir::Constant* a, lir::Register* b, lir::Register* t);
+
+bool needJump(MyBlock* b);
+
+unsigned padding(MyBlock* b, unsigned offset);
+
+void resolve(MyBlock* b);
+
+void jumpR(Context* con, unsigned size UNUSED, lir::Register* target);
+
+void swapRR(Context* con, unsigned aSize, lir::Register* a,
+       unsigned bSize, lir::Register* b);
+
+void moveRR(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize, lir::Register* dst);
+
+void moveZRR(Context* con, unsigned srcSize, lir::Register* src,
+        unsigned, lir::Register* dst);
+
+void moveCR(Context* con, unsigned size, lir::Constant* src,
+            unsigned, lir::Register* dst);
+
+void moveCR2(Context* con, unsigned size, lir::Constant* src,
+        lir::Register* dst, Promise* callOffset);
+
+void moveCR(Context* con, unsigned size, lir::Constant* src,
+       unsigned, lir::Register* dst);
+
+void addR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void subR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void addC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void subC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void multiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void floatAbsoluteRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void floatNegateRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void float2FloatRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void float2IntRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void int2FloatRR(Context* con, unsigned, lir::Register* a, unsigned size, lir::Register* b);
+
+void floatSqrtRR(Context* con, unsigned size, lir::Register* a, unsigned, lir::Register* b);
+
+void floatAddR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void floatSubtractR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void floatMultiplyR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+void floatDivideR(Context* con, unsigned size, lir::Register* a, lir::Register* b, lir::Register* t);
+
+int normalize(Context* con, int offset, int index, unsigned scale, 
+          bool* preserveIndex, bool* release);
+
+void store(Context* con, unsigned size, lir::Register* src,
+      int base, int offset, int index, unsigned scale, bool preserveIndex);
+
+void moveRM(Context* con, unsigned srcSize, lir::Register* src,
+       unsigned dstSize UNUSED, lir::Memory* dst);
+
+void load(Context* con, unsigned srcSize, int base, int offset, int index,
+     unsigned scale, unsigned dstSize, lir::Register* dst,
+     bool preserveIndex, bool signExtend);
+
+void moveMR(Context* con, unsigned srcSize, lir::Memory* src,
+       unsigned dstSize, lir::Register* dst);
+
+void moveZMR(Context* con, unsigned srcSize, lir::Memory* src,
+        unsigned dstSize, lir::Register* dst);
+
+void andR(Context* con, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst);
+
+void andC(Context* con, unsigned size, lir::Constant* a,
+     lir::Register* b, lir::Register* dst);
+
+void orR(Context* con, unsigned size, lir::Register* a,
+    lir::Register* b, lir::Register* dst);
+
+void xorR(Context* con, unsigned size, lir::Register* a,
+     lir::Register* b, lir::Register* dst);
+
+void moveAR2(Context* con, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst);
+
+void moveAR(Context* con, unsigned srcSize, lir::Address* src,
+       unsigned dstSize, lir::Register* dst);
+
+void compareRR(Context* con, unsigned aSize, lir::Register* a,
+          unsigned bSize UNUSED, lir::Register* b);
+
+void compareCR(Context* con, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Register* b);
+
+void compareCM(Context* con, unsigned aSize, lir::Constant* a,
+          unsigned bSize, lir::Memory* b);
+
+void compareRM(Context* con, unsigned aSize, lir::Register* a,
+          unsigned bSize, lir::Memory* b);
+
+int32_t
+branch(Context* con, lir::TernaryOperation op);
+
+void conditional(Context* con, int32_t branch, lir::Constant* target);
+
+void branch(Context* con, lir::TernaryOperation op, lir::Constant* target);
+
+void branchLong(Context* con, lir::TernaryOperation op, lir::Operand* al,
+           lir::Operand* ah, lir::Operand* bl,
+           lir::Operand* bh, lir::Constant* target,
+           BinaryOperationType compareSigned,
+           BinaryOperationType compareUnsigned);
+
+void branchRR(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Register* b,
+         lir::Constant* target);
+
+void branchCR(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Register* b,
+         lir::Constant* target);
+
+void branchRM(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Register* a, lir::Memory* b,
+         lir::Constant* target);
+
+void branchCM(Context* con, lir::TernaryOperation op, unsigned size,
+         lir::Constant* a, lir::Memory* b,
+         lir::Constant* target);
+
+ShiftMaskPromise*
+shiftMaskPromise(Context* con, Promise* base, unsigned shift, int64_t mask);
+
+void moveCM(Context* con, unsigned srcSize, lir::Constant* src,
+       unsigned dstSize, lir::Memory* dst);
+
+void negateRR(Context* con, unsigned srcSize, lir::Register* src,
+         unsigned dstSize UNUSED, lir::Register* dst);
+
+void callR(Context* con, unsigned size UNUSED, lir::Register* target);
+
+void callC(Context* con, unsigned size UNUSED, lir::Constant* target);
+
+void longCallC(Context* con, unsigned size UNUSED, lir::Constant* target);
+
+void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target);
+
+void jumpC(Context* con, unsigned size UNUSED, lir::Constant* target);
+
+void return_(Context* con);
+
+void trap(Context* con);
+
+void memoryBarrier(Context*);
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_OPERATIONS_H
+
diff --git a/src/codegen/arm/registers.h b/src/codegen/arm/registers.h
new file mode 100644
index 0000000000..85c389b222
--- /dev/null
+++ b/src/codegen/arm/registers.h
@@ -0,0 +1,52 @@
+/* Copyright (c) 2008-2012, Avian Contributors
+
+   Permission to use, copy, modify, and/or distribute this software
+   for any purpose with or without fee is hereby granted, provided
+   that the above copyright notice and this permission notice appear
+   in all copies.
+
+   There is NO WARRANTY for this software.  See license.txt for
+   details. */
+
+#ifndef AVIAN_CODEGEN_ASSEMBLER_ARM_REGISTERS_H
+#define AVIAN_CODEGEN_ASSEMBLER_ARM_REGISTERS_H
+
+#include <avian/vm/codegen/lir.h>
+#include <avian/vm/codegen/assembler.h>
+
+namespace avian {
+namespace codegen {
+namespace arm {
+
+
+const uint64_t MASK_LO32 = 0xffffffff;
+const unsigned MASK_LO16 = 0xffff;
+const unsigned MASK_LO8  = 0xff;
+
+const int N_GPRS = 16;
+const int N_FPRS = 16;
+const uint32_t GPR_MASK = 0xffff;
+const uint32_t FPR_MASK = 0xffff0000;
+
+const uint64_t GPR_MASK64 = GPR_MASK | (uint64_t)GPR_MASK << 32;
+const uint64_t FPR_MASK64 = FPR_MASK | (uint64_t)FPR_MASK << 32;
+
+inline bool isFpr(lir::Register* reg) {
+  return reg->low >= N_GPRS;
+}
+
+inline int fpr64(int reg) { return reg - N_GPRS; }
+inline int fpr64(lir::Register* reg) { return fpr64(reg->low); }
+inline int fpr32(int reg) { return fpr64(reg) << 1; }
+inline int fpr32(lir::Register* reg) { return fpr64(reg) << 1; }
+
+const int ThreadRegister = 8;
+const int StackRegister = 13;
+const int LinkRegister = 14;
+const int ProgramCounter = 15;
+
+} // namespace arm
+} // namespace codegen
+} // namespace avian
+
+#endif // AVIAN_CODEGEN_ASSEMBLER_ARM_REGISTERS_H
diff --git a/src/common.h b/src/common.h
index a99d246f67..02e52aeae1 100644
--- a/src/common.h
+++ b/src/common.h
@@ -477,6 +477,12 @@ hash(const uint16_t* s, unsigned length)
   return h;
 }
 
+inline void
+write4(uint8_t* dst, uint32_t v)
+{
+  memcpy(dst, &v, 4);
+}
+
 inline uint32_t
 floatToBits(float f)
 {