From d9aac52b3d8d456473181713ccf6cccc72416fc9 Mon Sep 17 00:00:00 2001 From: jet Date: Mon, 12 Jul 2010 14:18:36 -0600 Subject: [PATCH 01/23] First version; interpreted mode works and JIT mode compiles. --- src/arm.S | 2 +- src/arm.cpp | 773 ++++++++++++++++++++++++++++++++-------------- src/arm.h | 2 +- src/compile-arm.S | 110 +++++++ 4 files changed, 650 insertions(+), 237 deletions(-) create mode 100644 src/compile-arm.S diff --git a/src/arm.S b/src/arm.S index f3dd2b146f..9df1b14ac3 100644 --- a/src/arm.S +++ b/src/arm.S @@ -53,4 +53,4 @@ vmJump: mov sp, r2 mov r4, r3 ldmia sp, {r0,r1} - mov pc, lr + bx lr diff --git a/src/arm.cpp b/src/arm.cpp index 66af8a37d3..387c4ee688 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2009, Avian Contributors +/* Copyright (c) 2010, Avian Contributors Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided @@ -14,6 +14,7 @@ #define CAST1(x) reinterpret_cast(x) #define CAST2(x) reinterpret_cast(x) #define CAST3(x) reinterpret_cast(x) +#define CAST_BRANCH(x) reinterpret_cast(x) using namespace vm; @@ -111,7 +112,7 @@ inline int stmib(int Rn, int rlist) { return BLOCKXFER(AL, 1, 1, 0, 0, 0, Rn, rl inline int stmdb(int Rn, int rlist) { return BLOCKXFER(AL, 1, 0, 0, 0, 0, Rn, rlist); } inline int swp(int Rd, int Rm, int Rn) { return SWAP(AL, 0, Rn, Rd, Rm); } inline int swpb(int Rd, int Rm, int Rn) { return SWAP(AL, 1, Rn, Rd, Rm); } -inline int SETCOND(int ins, int cond) { return ins&0x0fffffff | cond<<28; } +inline int SETCOND(int ins, int cond) { return ((ins&0x0fffffff) | (cond<<28)); } inline int SETS(int ins) { return ins | 1<<20; } // PSEUDO-INSTRUCTIONS inline int nop() { return mov(0, 0); } @@ -122,6 +123,14 @@ inline int lsri(int Rd, int Rm, int imm) { return mov(Rd, Rm, LSR, imm); } inline int asr(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, ASR); } inline int asri(int Rd, int Rm, int imm) { return mov(Rd, Rm, ASR, imm); } inline int ror(int Rd, int Rm, int Rs) { return movsh(Rd, Rm, Rs, ROR); } +inline int beq(int offset) { return SETCOND(b(offset), EQ); } +inline int bne(int offset) { return SETCOND(b(offset), NE); } +inline int bls(int offset) { return SETCOND(b(offset), LS); } +inline int bhi(int offset) { return SETCOND(b(offset), HI); } +inline int blt(int offset) { return SETCOND(b(offset), LT); } +inline int bgt(int offset) { return SETCOND(b(offset), GT); } +inline int ble(int offset) { return SETCOND(b(offset), LE); } +inline int bge(int offset) { return SETCOND(b(offset), GE); } } const uint64_t MASK_LO32 = 0xffffffff; @@ -134,17 +143,27 @@ inline unsigned hi16(int64_t i) { return lo16(i>>16); } inline unsigned lo8(int64_t i) { return (unsigned)(i&MASK_LO8); } inline unsigned hi8(int64_t i) { return lo8(i>>8); } +inline int ha16(int32_t i) { + return ((i >> 16) + ((i & 0x8000) ? 1 : 0)) & 0xffff; +} +inline int unha16(int32_t high, int32_t low) { + return ((high - ((low & 0x8000) ? 1 : 0)) << 16) | low; +} + inline bool isInt8(intptr_t v) { return v == static_cast(v); } inline bool isInt16(intptr_t v) { return v == static_cast(v); } -inline bool isInt24(intptr_t v) { return v == v & 0xffffff; } +inline bool isInt24(intptr_t v) { return v == (v & 0xffffff); } inline bool isInt32(intptr_t v) { return v == static_cast(v); } inline int carry16(intptr_t v) { return static_cast(v) < 0 ? 1 : 0; } -const unsigned FrameFooterSize = 0; +const unsigned FrameFooterSize = 2; +const unsigned FrameHeaderSize = 0; + const unsigned StackAlignmentInBytes = 8; const unsigned StackAlignmentInWords = StackAlignmentInBytes / BytesPerWord; const int StackRegister = 13; +const int BaseRegister = 11; const int ThreadRegister = 12; class MyBlock: public Assembler::Block { @@ -167,13 +186,14 @@ class MyBlock: public Assembler::Block { }; class Task; +class ConstantPoolEntry; class Context { public: Context(System* s, Allocator* a, Zone* zone): s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0), firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(0)), - lastBlock(firstBlock) + lastBlock(firstBlock), constantPool(0), constantPoolCount(0) { } System* s; @@ -184,6 +204,8 @@ class Context { uint8_t* result; MyBlock* firstBlock; MyBlock* lastBlock; + ConstantPoolEntry* constantPool; + unsigned constantPoolCount; }; class Task { @@ -206,6 +228,10 @@ typedef void (*TernaryOperationType) (Context*, unsigned, Assembler::Operand*, Assembler::Operand*, Assembler::Operand*); +typedef void (*BranchOperationType) +(Context*, TernaryOperation, unsigned, Assembler::Operand*, + Assembler::Operand*, Assembler::Operand*); + class ArchitectureContext { public: ArchitectureContext(System* s): s(s) { } @@ -217,7 +243,9 @@ class ArchitectureContext { BinaryOperationType binaryOperations [BinaryOperationCount * OperandTypeCount * OperandTypeCount]; TernaryOperationType ternaryOperations - [TernaryOperationCount * OperandTypeCount]; + [NonBranchTernaryOperationCount * OperandTypeCount]; + BranchOperationType branchOperations + [BranchOperationCount * OperandTypeCount * OperandTypeCount]; }; inline void NO_RETURN @@ -356,13 +384,14 @@ appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset, } inline unsigned -index(UnaryOperation operation, OperandType operand) +index(ArchitectureContext*, UnaryOperation operation, OperandType operand) { return operation + (UnaryOperationCount * operand); } inline unsigned -index(BinaryOperation operation, +index(ArchitectureContext*, + BinaryOperation operation, OperandType operand1, OperandType operand2) { @@ -371,13 +400,34 @@ index(BinaryOperation operation, + (BinaryOperationCount * OperandTypeCount * operand2); } -inline unsigned -index(TernaryOperation operation, - OperandType operand1) +bool +isBranch(TernaryOperation op) { - return operation + (TernaryOperationCount * operand1); + return op > FloatMin; } +bool +isFloatBranch(TernaryOperation op) +{ + return op > JumpIfNotEqual; +} + +inline unsigned +index(ArchitectureContext* c UNUSED, + TernaryOperation operation, + OperandType operand1) +{ + assert(c, not isBranch(operation)); + + return operation + (NonBranchTernaryOperationCount * operand1); +} + +unsigned +branchIndex(ArchitectureContext* c UNUSED, OperandType operand1, + OperandType operand2) +{ + return operand1 + (OperandTypeCount * operand2); +} // BEGIN OPERATION COMPILERS @@ -387,10 +437,9 @@ using namespace isa; inline void emit(Context* con, int code) { con->code.append4(code); } inline int newTemp(Context* con) { return con->client->acquireTemporary(); } inline void freeTemp(Context* con, int r) { con->client->releaseTemporary(r); } -inline int64_t getValue(Assembler::Constant c) { return c->value->value(); } +inline int64_t getValue(Assembler::Constant* c) { return c->value->value(); } - -void shiftLeftR(Context* con, unsigned size, Assembler::Register a, Assembler::Register b, Assembler::Register t) +void shiftLeftR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { int tmpHi = newTemp(con), tmpLo = newTemp(con); @@ -406,13 +455,13 @@ void shiftLeftR(Context* con, unsigned size, Assembler::Register a, Assembler::R emit(con, lsl(t->low, b->low, a->low)); } -void shiftLeftC(Context* con, unsigned size, Assembler::Constant a, Assembler::Register b, Assembler::Register t) +void shiftLeftC(Context* con, unsigned, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { assert(con, size == BytesPerWord); emit(con, lsli(t->low, b->low, getValue(a))); } -void shiftRightR(Context* con, unsigned size, Assembler::Register a, Assembler::Register b, Assembler::Register t) +void shiftRightR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { int tmpHi = newTemp(con), tmpLo = newTemp(con); @@ -422,7 +471,7 @@ void shiftRightR(Context* con, unsigned size, Assembler::Register a, Assembler:: emit(con, orr(t->low, t->low, tmpLo)); emit(con, SETS(addi(tmpHi, a->low, -32))); emit(con, asr(tmpLo, b->high, tmpHi)); - emit(con, SETCOND(b(8), LE)); + emit(con, SETCOND(::b(8), LE)); emit(con, orri(t->low, tmpLo, 0)); emit(con, asr(t->high, b->high, a->low)); freeTemp(con, tmpHi); freeTemp(con, tmpLo); @@ -431,13 +480,13 @@ void shiftRightR(Context* con, unsigned size, Assembler::Register a, Assembler:: } } -void shiftRightC(Context* con, unsigned size, Assembler::Constant a, Assembler::Register b, Assembler::Register t) +void shiftRightC(Context* con, unsigned, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { assert(con, size == BytesPerWord); emit(con, asri(t->low, b->low, getValue(a))); } -void unsignedShiftRightR(Context* con, unsigned size, Assembler::Register a, Assembler::Register b, Assembler::Register t) +void unsignedShiftRightR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { emit(con, lsr(t->low, b->low, a->low)); if (size == 8) { @@ -453,14 +502,14 @@ void unsignedShiftRightR(Context* con, unsigned size, Assembler::Register a, Ass } } -void unsignedShiftRightC(Context* con, unsigned size, Assembler::Constant a, Assembler::Register b, Assembler::Register t) +void unsignedShiftRightC(Context* con, unsigned, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { assert(con, size == BytesPerWord); emit(con, lsri(t->low, b->low, getValue(a))); } void -updateImmediate(System* s, void* dst, int64_t src, unsigned size) +updateImmediate(System* s, void* dst, int64_t src, unsigned size, bool) { switch (size) { case 4: { @@ -479,12 +528,13 @@ updateImmediate(System* s, void* dst, int64_t src, unsigned size) class ImmediateListener: public Promise::Listener { public: - ImmediateListener(System* s, void* dst, unsigned size, unsigned offset): - s(s), dst(dst), size(size), offset(offset) + ImmediateListener(System* s, void* dst, unsigned size, unsigned offset, + bool address): + s(s), dst(dst), size(size), offset(offset), address(address) { } virtual bool resolve(int64_t value, void** location) { - updateImmediate(s, dst, value, size); + updateImmediate(s, dst, value, size, address); if (location) *location = static_cast(dst) + offset; return false; } @@ -493,26 +543,28 @@ class ImmediateListener: public Promise::Listener { void* dst; unsigned size; unsigned offset; + bool address; }; class ImmediateTask: public Task { public: ImmediateTask(Task* next, Promise* promise, Promise* offset, unsigned size, - unsigned promiseOffset): + unsigned promiseOffset, bool address): Task(next), promise(promise), offset(offset), size(size), - promiseOffset(promiseOffset) + promiseOffset(promiseOffset), + address(address) { } virtual void run(Context* c) { if (promise->resolved()) { updateImmediate - (c->s, c->result + offset->value(), promise->value(), size); + (c->s, c->result + offset->value(), promise->value(), size, address); } else { new (promise->listen(sizeof(ImmediateListener))) ImmediateListener - (c->s, c->result + offset->value(), size, promiseOffset); + (c->s, c->result + offset->value(), size, promiseOffset, address); } } @@ -520,14 +572,48 @@ class ImmediateTask: public Task { Promise* offset; unsigned size; unsigned promiseOffset; + bool address; }; void appendImmediateTask(Context* c, Promise* promise, Promise* offset, - unsigned size, unsigned promiseOffset = 0) + unsigned size, unsigned promiseOffset, bool address) { c->tasks = new (c->zone->allocate(sizeof(ImmediateTask))) ImmediateTask - (c->tasks, promise, offset, size, promiseOffset); + (c->tasks, promise, offset, size, promiseOffset, address); +} + +class ConstantPoolEntry: public Promise { + public: + ConstantPoolEntry(Context* c, Promise* constant): + c(c), constant(constant), next(c->constantPool), address(0) + { + c->constantPool = this; + ++ c->constantPoolCount; + } + + virtual int64_t value() { + assert(c, resolved()); + + return reinterpret_cast(address); + } + + virtual bool resolved() { + return address != 0; + } + + Context* c; + Promise* constant; + ConstantPoolEntry* next; + void* address; + unsigned constantPoolCount; +}; + +ConstantPoolEntry* +appendConstantPoolEntry(Context* c, Promise* constant) +{ + return new (c->zone->allocate(sizeof(ConstantPoolEntry))) + ConstantPoolEntry(c, constant); } void @@ -619,7 +705,7 @@ moveCR2(Context* c, unsigned, Assembler::Constant* src, { if (dstSize <= 4) { if (src->value->resolved()) { - int32_t i = getValue(c); + int32_t i = getValue(src); emit(c, movi(dst->low, lo8(i))); if (!isInt8(i)) { emit(c, orri(dst->low, dst->low, hi8(i), 12)); @@ -632,7 +718,7 @@ moveCR2(Context* c, unsigned, Assembler::Constant* src, } } else { appendImmediateTask - (c, src->value, offset(c), BytesPerWord, promiseOffset); + (c, src->value, offset(c), BytesPerWord, promiseOffset, false); emit(c, movi(dst->low, 0)); emit(c, orri(dst->low, dst->low, 0, 12)); emit(c, orri(dst->low, dst->low, 0, 8)); @@ -650,16 +736,16 @@ moveCR(Context* c, unsigned srcSize, Assembler::Constant* src, moveCR2(c, srcSize, src, dstSize, dst, 0); } -void addR(Context* con, unsigned size, Assembler::Register a, Assembler::Register b, Assembler::Register t) { +void addR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { - emit(con, SETS(addc(t->low, a->low, b->low))); + emit(con, SETS(adc(t->low, a->low, b->low))); emit(con, adc(t->high, a->high, b->high)); } else { emit(con, add(t->low, a->low, b->low)); } } -void addC(Context* con, unsigned size, Assembler::Constant a, Assembler::Register b, Assembler::Register t) { +void addC(Context* con, unsigned size, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { assert(con, size == BytesPerWord); int32_t i = getValue(a); @@ -679,7 +765,7 @@ void addC(Context* con, unsigned size, Assembler::Constant a, Assembler::Registe } } -void subR(Context* con, unsigned size, Assembler::Register a, Assembler::Register b, Assembler::Register t) { +void subR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { emit(con, SETS(rsb(t->low, a->low, b->low))); emit(con, rsc(t->high, a->high, b->high)); @@ -688,7 +774,7 @@ void subR(Context* con, unsigned size, Assembler::Register a, Assembler::Registe } } -void subC(Context* c, unsigned size, Assembler::Constant a, Assembler::Register b, Assembler::Register t) { +void subC(Context* c, unsigned size, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { assert(c, size == BytesPerWord); ResolvedPromise promise(- a->value->value()); @@ -696,7 +782,7 @@ void subC(Context* c, unsigned size, Assembler::Constant a, Assembler::Register addC(c, size, &constant, b, t); } -void multiplyR(Context* con, unsigned size, Assembler::Register a, Assembler::Register b, Assembler::Register t) { +void multiplyR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { emit(con, mul(t->high, a->low, b->high)); emit(con, mla(t->high, a->high, b->low, t->high)); @@ -944,19 +1030,19 @@ andR(Context* c, unsigned size, Assembler::Register* a, } void -andC(Context* c, unsigned size, Assembler::Constant* a, +andC(Context* con, unsigned size, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* dst) { assert(con, size == BytesPerWord); int32_t i = getValue(a); if (i) { - emit(con, andi(t->low, b->low, lo8(i))); - emit(con, andi(t->low, b->low, hi8(i), 12)); - emit(con, andi(t->low, b->low, lo8(hi16(i)), 8)); - emit(con, andi(t->low, b->low, hi8(hi16(i)), 4)); + emit(con, andi(dst->low, b->low, lo8(i))); + emit(con, andi(dst->low, b->low, hi8(i), 12)); + emit(con, andi(dst->low, b->low, lo8(hi16(i)), 8)); + emit(con, andi(dst->low, b->low, hi8(hi16(i)), 4)); } else { - moveRR(con, size, b, size, t); + moveRR(con, size, b, size, dst); } } @@ -969,72 +1055,82 @@ orR(Context* c, unsigned size, Assembler::Register* a, } void -orC(Context* c, unsigned size, Assembler::Constant* a, +orC(Context* con, unsigned size, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* dst) { assert(con, size == BytesPerWord); int32_t i = getValue(a); if (i) { - emit(con, orri(t->low, b->low, lo8(i))); + emit(con, orri(dst->low, b->low, lo8(i))); if (!isInt8(i)) { - emit(con, orri(t->low, b->low, hi8(i), 12)); + emit(con, orri(dst->low, b->low, hi8(i), 12)); if (!isInt16(i)) { - emit(con, orri(t->low, b->low, lo8(hi16(i)), 8)); + emit(con, orri(dst->low, b->low, lo8(hi16(i)), 8)); if (!isInt24(i)) { - emit(con, orri(t->low, b->low, hi8(hi16(i)), 4)); + emit(con, orri(dst->low, b->low, hi8(hi16(i)), 4)); } } } } else { - moveRR(con, size, b, size, t); + moveRR(con, size, b, size, dst); } } void -xorR(Context* com, unsigned size, Assembler::Register* a, +xorR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* dst) { - if (size == 8) emit(com, eor(dst->high, a->high, b->high)); - emit(com, eor(dst->low, a->low, b->low)); + if (size == 8) emit(con, eor(dst->high, a->high, b->high)); + emit(con, eor(dst->low, a->low, b->low)); } void -xorC(Context* com, unsigned size, Assembler::Constant* a, +xorC(Context* con, unsigned size, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* dst) { assert(con, size == BytesPerWord); int32_t i = getValue(a); if (i) { - emit(con, eori(t->low, b->low, lo8(i))); + emit(con, eori(dst->low, b->low, lo8(i))); if (!isInt8(i)) { - emit(con, eori(t->low, b->low, hi8(i), 12)); + emit(con, eori(dst->low, b->low, hi8(i), 12)); if (!isInt16(i)) { - emit(con, eori(t->low, b->low, lo8(hi16(i)), 8)); + emit(con, eori(dst->low, b->low, lo8(hi16(i)), 8)); if (!isInt24(i)) { - emit(con, eori(t->low, b->low, hi8(hi16(i)), 4)); + emit(con, eori(dst->low, b->low, hi8(hi16(i)), 4)); } } } } else { - moveRR(con, size, b, size, t); + moveRR(con, size, b, size, dst); } } void -moveAR(Context* c, unsigned srcSize, Assembler::Address* src, - unsigned dstSize, Assembler::Register* dst) +moveAR2(Context* c, unsigned srcSize, Assembler::Address* src, + unsigned dstSize, Assembler::Register* dst, unsigned promiseOffset) { assert(c, srcSize == 4 and dstSize == 4); Assembler::Constant constant(src->address); Assembler::Memory memory(dst->low, 0, -1, 0); + appendImmediateTask + (c, src->address, offset(c), BytesPerWord, promiseOffset, true); + moveCR(c, srcSize, &constant, dstSize, dst); moveMR(c, dstSize, &memory, dstSize, dst); } +void +moveAR(Context* c, unsigned srcSize, Assembler::Address* src, + unsigned dstSize, Assembler::Register* dst) +{ + moveAR2(c, srcSize, src, dstSize, dst, 0); +} + void compareRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a, unsigned bSize UNUSED, Assembler::Register* b) @@ -1083,105 +1179,185 @@ compareRM(Context* c, unsigned aSize, Assembler::Register* a, c->client->releaseTemporary(tmp.low); } -void -longCompare(Context* c, Assembler::Operand* al, Assembler::Operand* ah, - Assembler::Operand* bl, Assembler::Operand* bh, - Assembler::Register* dst, BinaryOperationType compareSigned, - BinaryOperationType compareUnsigned) +int32_t +branch(Context* c, TernaryOperation op) { - ResolvedPromise negativePromise(-1); - Assembler::Constant negative(&negativePromise); + switch (op) { + case JumpIfEqual: + return beq(0); + + case JumpIfNotEqual: + return bne(0); + + case JumpIfLess: + return blt(0); + + case JumpIfGreater: + return bgt(0); + + case JumpIfLessOrEqual: + return ble(0); + + case JumpIfGreaterOrEqual: + return bge(0); + + default: + abort(c); + } +} - ResolvedPromise zeroPromise(0); - Assembler::Constant zero(&zeroPromise); +void +conditional(Context* c, int32_t branch, Assembler::Constant* target) +{ + appendOffsetTask(c, target->value, offset(c), true); + emit(c, branch); +} - ResolvedPromise positivePromise(1); - Assembler::Constant positive(&positivePromise); +void +branch(Context* c, TernaryOperation op, Assembler::Constant* target) +{ + conditional(c, branch(c, op), target); +} +void +branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, + Assembler::Operand* ah, Assembler::Operand* bl, + Assembler::Operand* bh, Assembler::Constant* target, + BinaryOperationType compareSigned, + BinaryOperationType compareUnsigned) +{ compareSigned(c, 4, ah, 4, bh); - unsigned less = c->code.length(); - emit(c, blt(0)); + unsigned next = 0; + + switch (op) { + case JumpIfEqual: + next = c->code.length(); + emit(c, bne(0)); - unsigned greater = c->code.length(); - emit(c, bgt(0)); + compareSigned(c, 4, al, 4, bl); + conditional(c, beq(0), target); + break; - compareUnsigned(c, 4, al, 4, bl); + case JumpIfNotEqual: + conditional(c, bne(0), target); - unsigned above = c->code.length(); - emit(c, bgt(0)); + compareSigned(c, 4, al, 4, bl); + conditional(c, bne(0), target); + break; - unsigned below = c->code.length(); - emit(c, blt(0)); + case JumpIfLess: + conditional(c, blt(0), target); - moveCR(c, 4, &zero, 4, dst); + next = c->code.length(); + emit(c, bgt(0)); - unsigned nextFirst = c->code.length(); - emit(c, b(0)); + compareUnsigned(c, 4, al, 4, bl); + conditional(c, blt(0), target); + break; - updateOffset - (c->s, c->code.data + less, true, reinterpret_cast - (c->code.data + c->code.length())); + case JumpIfGreater: + conditional(c, bgt(0), target); - updateOffset - (c->s, c->code.data + below, true, reinterpret_cast - (c->code.data + c->code.length())); + next = c->code.length(); + emit(c, blt(0)); - moveCR(c, 4, &negative, 4, dst); + compareUnsigned(c, 4, al, 4, bl); + conditional(c, bgt(0), target); + break; - unsigned nextSecond = c->code.length(); - emit(c, b(0)); + case JumpIfLessOrEqual: + conditional(c, blt(0), target); - updateOffset - (c->s, c->code.data + greater, true, reinterpret_cast - (c->code.data + c->code.length())); + next = c->code.length(); + emit(c, bgt(0)); - updateOffset - (c->s, c->code.data + above, true, reinterpret_cast - (c->code.data + c->code.length())); + compareUnsigned(c, 4, al, 4, bl); + conditional(c, ble(0), target); + break; - moveCR(c, 4, &positive, 4, dst); + case JumpIfGreaterOrEqual: + conditional(c, bgt(0), target); - updateOffset - (c->s, c->code.data + nextFirst, false, reinterpret_cast - (c->code.data + c->code.length())); + next = c->code.length(); + emit(c, blt(0)); - updateOffset - (c->s, c->code.data + nextSecond, false, reinterpret_cast - (c->code.data + c->code.length())); + compareUnsigned(c, 4, al, 4, bl); + conditional(c, bge(0), target); + break; + + default: + abort(c); + } + + if (next) { + updateOffset + (c->s, c->code.data + next, true, reinterpret_cast + (c->code.data + c->code.length())); + } } void -longCompareR(Context* c, unsigned size UNUSED, Assembler::Register* a, - Assembler::Register* b, Assembler::Register* dst) +branchRR(Context* c, TernaryOperation op, unsigned size, + Assembler::Register* a, Assembler::Register* b, + Assembler::Constant* target) { - assert(c, size == 8); - - Assembler::Register ah(a->high); - Assembler::Register bh(b->high); - - longCompare(c, a, &ah, b, &bh, dst, CAST2(compareRR), - CAST2(compareUnsignedRR)); + if (size > BytesPerWord) { + Assembler::Register ah(a->high); + Assembler::Register bh(b->high); + + branchLong(c, op, a, &ah, b, &bh, target, CAST2(compareRR), + CAST2(compareRR)); + } else { + compareRR(c, size, a, size, b); + branch(c, op, target); + } } void -longCompareC(Context* c, unsigned size UNUSED, Assembler::Constant* a, - Assembler::Register* b, Assembler::Register* dst) +branchCR(Context* c, TernaryOperation op, unsigned size, + Assembler::Constant* a, Assembler::Register* b, + Assembler::Constant* target) { - assert(c, size == 8); + if (size > BytesPerWord) { + int64_t v = a->value->value(); - int64_t v = a->value->value(); + ResolvedPromise low(v & ~static_cast(0)); + Assembler::Constant al(&low); - ResolvedPromise low(v & ~static_cast(0)); - Assembler::Constant al(&low); - - ResolvedPromise high((v >> 32) & ~static_cast(0)); - Assembler::Constant ah(&high); - - Assembler::Register bh(b->high); - - longCompare(c, &al, &ah, b, &bh, dst, CAST2(compareCR), - CAST2(compareUnsignedCR)); + ResolvedPromise high((v >> 32) & ~static_cast(0)); + Assembler::Constant ah(&high); + + Assembler::Register bh(b->high); + + branchLong(c, op, &al, &ah, b, &bh, target, CAST2(compareCR), + CAST2(compareCR)); + } else { + compareCR(c, size, a, size, b); + branch(c, op, target); + } +} + +void +branchRM(Context* c, TernaryOperation op, unsigned size, + Assembler::Register* a, Assembler::Memory* b, + Assembler::Constant* target) +{ + assert(c, size <= BytesPerWord); + + compareRM(c, size, a, size, b); + branch(c, op, target); +} + +void +branchCM(Context* c, TernaryOperation op, unsigned size, + Assembler::Constant* a, Assembler::Memory* b, + Assembler::Constant* target) +{ + assert(c, size <= BytesPerWord); + + compareCM(c, size, a, size, b); + branch(c, op, target); } ShiftMaskPromise* @@ -1257,6 +1433,18 @@ longCallC(Context* c, unsigned size UNUSED, Assembler::Constant* target) callR(c, BytesPerWord, &tmp); } +void +alignedLongCallC(Context* c, unsigned size UNUSED, Assembler::Constant* target) +{ + assert(c, size == BytesPerWord); + + Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Address address(appendConstantPoolEntry(c, target->value)); + moveAR2(c, BytesPerWord, &address, BytesPerWord, &tmp, 12); + callR(c, BytesPerWord, &tmp); + c->client->releaseTemporary(tmp.low); +} + void longJumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) { @@ -1267,6 +1455,18 @@ longJumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) jumpR(c, BytesPerWord, &tmp); } +void +alignedLongJumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) +{ + assert(c, size == BytesPerWord); + + Assembler::Register tmp(c->client->acquireTemporary()); + Assembler::Address address(appendConstantPoolEntry(c, target->value)); + moveAR2(c, BytesPerWord, &address, BytesPerWord, &tmp, 12); + jumpR(c, BytesPerWord, &tmp); + c->client->releaseTemporary(tmp.low); +} + void jumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) { @@ -1339,7 +1539,7 @@ return_(Context* c) } void -memoryBarrier(Context* c) {} +memoryBarrier(Context*) {} // END OPERATION COMPILERS @@ -1355,96 +1555,94 @@ populateTables(ArchitectureContext* c) UnaryOperationType* uo = c->unaryOperations; BinaryOperationType* bo = c->binaryOperations; TernaryOperationType* to = c->ternaryOperations; + BranchOperationType* bro = c->branchOperations; zo[Return] = return_; zo[LoadBarrier] = memoryBarrier; zo[StoreStoreBarrier] = memoryBarrier; zo[StoreLoadBarrier] = memoryBarrier; - uo[index(LongCall, C)] = CAST1(longCallC); + uo[index(c, LongCall, C)] = CAST1(longCallC); - uo[index(LongJump, C)] = CAST1(longJumpC); + uo[index(c, AlignedLongCall, C)] = CAST1(alignedLongCallC); - uo[index(Jump, R)] = CAST1(jumpR); - uo[index(Jump, C)] = CAST1(jumpC); + uo[index(c, LongJump, C)] = CAST1(longJumpC); - uo[index(AlignedJump, R)] = CAST1(jumpR); - uo[index(AlignedJump, C)] = CAST1(jumpC); + uo[index(c, AlignedLongJump, C)] = CAST1(alignedLongJumpC); - uo[index(JumpIfEqual, C)] = CAST1(jumpIfEqualC); - uo[index(JumpIfNotEqual, C)] = CAST1(jumpIfNotEqualC); - uo[index(JumpIfGreater, C)] = CAST1(jumpIfGreaterC); - uo[index(JumpIfGreaterOrEqual, C)] = CAST1(jumpIfGreaterOrEqualC); - uo[index(JumpIfLess, C)] = CAST1(jumpIfLessC); - uo[index(JumpIfLessOrEqual, C)] = CAST1(jumpIfLessOrEqualC); + uo[index(c, Jump, R)] = CAST1(jumpR); + uo[index(c, Jump, C)] = CAST1(jumpC); - uo[index(Call, C)] = CAST1(callC); - uo[index(Call, R)] = CAST1(callR); + uo[index(c, AlignedJump, R)] = CAST1(jumpR); + uo[index(c, AlignedJump, C)] = CAST1(jumpC); - uo[index(AlignedCall, C)] = CAST1(callC); - uo[index(AlignedCall, R)] = CAST1(callR); + uo[index(c, Call, C)] = CAST1(callC); + uo[index(c, Call, R)] = CAST1(callR); - bo[index(Move, R, R)] = CAST2(moveRR); - bo[index(Move, C, R)] = CAST2(moveCR); - bo[index(Move, C, M)] = CAST2(moveCM); - bo[index(Move, M, R)] = CAST2(moveMR); - bo[index(Move, R, M)] = CAST2(moveRM); - bo[index(Move, A, R)] = CAST2(moveAR); + uo[index(c, AlignedCall, C)] = CAST1(callC); + uo[index(c, AlignedCall, R)] = CAST1(callR); - bo[index(MoveZ, R, R)] = CAST2(moveZRR); - bo[index(MoveZ, M, R)] = CAST2(moveZMR); - bo[index(MoveZ, C, R)] = CAST2(moveCR); + bo[index(c, Move, R, R)] = CAST2(moveRR); + bo[index(c, Move, C, R)] = CAST2(moveCR); + bo[index(c, Move, C, M)] = CAST2(moveCM); + bo[index(c, Move, M, R)] = CAST2(moveMR); + bo[index(c, Move, R, M)] = CAST2(moveRM); + bo[index(c, Move, A, R)] = CAST2(moveAR); - bo[index(Compare, R, R)] = CAST2(compareRR); - bo[index(Compare, C, R)] = CAST2(compareCR); - bo[index(Compare, R, M)] = CAST2(compareRM); - bo[index(Compare, C, M)] = CAST2(compareCM); + bo[index(c, MoveZ, R, R)] = CAST2(moveZRR); + bo[index(c, MoveZ, M, R)] = CAST2(moveZMR); + bo[index(c, MoveZ, C, R)] = CAST2(moveCR); - bo[index(Negate, R, R)] = CAST2(negateRR); + bo[index(c, Negate, R, R)] = CAST2(negateRR); - to[index(Add, R)] = CAST3(addR); - to[index(Add, C)] = CAST3(addC); + to[index(c, Add, R)] = CAST3(addR); + to[index(c, Add, C)] = CAST3(addC); - to[index(Subtract, R)] = CAST3(subR); - to[index(Subtract, C)] = CAST3(subC); + to[index(c, Subtract, R)] = CAST3(subR); + to[index(c, Subtract, C)] = CAST3(subC); - to[index(Multiply, R)] = CAST3(multiplyR); + to[index(c, Multiply, R)] = CAST3(multiplyR); - to[index(Divide, R)] = CAST3(divideR); + to[index(c, ShiftLeft, R)] = CAST3(shiftLeftR); + to[index(c, ShiftLeft, C)] = CAST3(shiftLeftC); - to[index(Remainder, R)] = CAST3(remainderR); + to[index(c, ShiftRight, R)] = CAST3(shiftRightR); + to[index(c, ShiftRight, C)] = CAST3(shiftRightC); - to[index(ShiftLeft, R)] = CAST3(shiftLeftR); - to[index(ShiftLeft, C)] = CAST3(shiftLeftC); + to[index(c, UnsignedShiftRight, R)] = CAST3(unsignedShiftRightR); + to[index(c, UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC); - to[index(ShiftRight, R)] = CAST3(shiftRightR); - to[index(ShiftRight, C)] = CAST3(shiftRightC); + to[index(c, And, C)] = CAST3(andC); + to[index(c, And, R)] = CAST3(andR); - to[index(UnsignedShiftRight, R)] = CAST3(unsignedShiftRightR); - to[index(UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC); + to[index(c, Or, C)] = CAST3(orC); + to[index(c, Or, R)] = CAST3(orR); - to[index(And, C)] = CAST3(andC); - to[index(And, R)] = CAST3(andR); + to[index(c, Xor, C)] = CAST3(xorC); + to[index(c, Xor, R)] = CAST3(xorR); - to[index(Or, C)] = CAST3(orC); - to[index(Or, R)] = CAST3(orR); - - to[index(Xor, C)] = CAST3(xorC); - to[index(Xor, R)] = CAST3(xorR); - - to[index(LongCompare, R)] = CAST3(longCompareR); - to[index(LongCompare, C)] = CAST3(longCompareC); + bro[branchIndex(c, R, R)] = CAST_BRANCH(branchRR); + bro[branchIndex(c, C, R)] = CAST_BRANCH(branchCR); + bro[branchIndex(c, C, M)] = CAST_BRANCH(branchCM); + bro[branchIndex(c, R, M)] = CAST_BRANCH(branchRM); } -// TODO class MyArchitecture: public Assembler::Architecture { public: MyArchitecture(System* system): c(system), referenceCount(0) { populateTables(&c); } - virtual unsigned registerCount() { - return 16; + virtual unsigned floatRegisterSize() { + return 0; + } + + virtual uint32_t generalRegisterMask() { + return 0xFFFFFFFF; + } + + virtual uint32_t floatRegisterMask() { + return 0; } virtual int stack() { @@ -1456,11 +1654,11 @@ class MyArchitecture: public Assembler::Architecture { } virtual int returnLow() { - return 4; + return 0; } virtual int returnHigh() { - return (BytesPerWord == 4 ? 3 : NoRegister); + return 1; } virtual int virtualCallTarget() { @@ -1471,12 +1669,12 @@ class MyArchitecture: public Assembler::Architecture { return 3; } - virtual bool condensedAddressing() { + virtual bool bigEndian() { return false; } - virtual bool bigEndian() { - return false; + virtual uintptr_t maximumImmediateJump() { + return 0x7FFFFF; } virtual bool reserved(int register_) { @@ -1508,7 +1706,7 @@ class MyArchitecture: public Assembler::Architecture { return index + 0; } - + virtual unsigned stackAlignmentInWords() { return StackAlignmentInWords; } @@ -1522,12 +1720,14 @@ class MyArchitecture: public Assembler::Architecture { } virtual void updateCall(UnaryOperation op UNUSED, - bool assertAlignment UNUSED, void* returnAddress, + void* returnAddress, void* newTarget) { switch (op) { case Call: - case Jump: { + case Jump: + case AlignedCall: + case AlignedJump: { updateOffset(c.s, static_cast(returnAddress) - 4, false, reinterpret_cast(newTarget)); } break; @@ -1535,7 +1735,15 @@ class MyArchitecture: public Assembler::Architecture { case LongCall: case LongJump: { updateImmediate(c.s, static_cast(returnAddress) - 12, - reinterpret_cast(newTarget), BytesPerWord); + reinterpret_cast(newTarget), BytesPerWord, + false); + } break; + + case AlignedLongCall: + case AlignedLongJump: { + uint32_t* p = static_cast(returnAddress) - 4; + *reinterpret_cast(unha16(p[0] & 0xFFFF, p[1] & 0xFFFF)) + = newTarget; } break; default: abort(&c); @@ -1546,13 +1754,8 @@ class MyArchitecture: public Assembler::Architecture { return 4; } - virtual uintptr_t getConstant(const void* src) { - const int32_t* p = static_cast(src); - return (p[0] << 16) | (p[1] & 0xFFFF); - } - virtual void setConstant(void* dst, uintptr_t constant) { - updateImmediate(c.s, dst, constant, BytesPerWord); + updateImmediate(c.s, dst, constant, BytesPerWord, false); } virtual unsigned alignFrameSize(unsigned sizeInWords) { @@ -1565,11 +1768,11 @@ class MyArchitecture: public Assembler::Architecture { } virtual unsigned frameHeaderSize() { - return 0; + return FrameHeaderSize; } virtual unsigned frameReturnAddressSize() { - return 0; + return 1; } virtual unsigned frameFooterSize() { @@ -1577,7 +1780,7 @@ class MyArchitecture: public Assembler::Architecture { } virtual int returnAddressOffset() { - return 8 / BytesPerWord; + return 1; } virtual int framePointerOffset() { @@ -1590,6 +1793,22 @@ class MyArchitecture: public Assembler::Architecture { *stack = *static_cast(*stack); } + virtual BinaryOperation hasBinaryIntrinsic(Thread*, object) { + return NoBinaryOperation; + } + + virtual TernaryOperation hasTernaryIntrinsic(Thread*, object) { + return NoTernaryOperation; + } + + virtual bool alwaysCondensed(BinaryOperation) { + return false; + } + + virtual bool alwaysCondensed(TernaryOperation) { + return false; + } + virtual void plan (UnaryOperation, unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask, @@ -1600,28 +1819,46 @@ class MyArchitecture: public Assembler::Architecture { *thunk = false; } - virtual void plan + virtual void planSource (BinaryOperation op, unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask, - unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask, - bool* thunk) + unsigned, bool* thunk) { *aTypeMask = ~0; *aRegisterMask = ~static_cast(0); - *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); - *bRegisterMask = ~static_cast(0); - *thunk = false; switch (op) { - case Compare: - *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); - *bTypeMask = (1 << RegisterOperand); - break; - case Negate: *aTypeMask = (1 << RegisterOperand); + break; + + case Absolute: + case FloatAbsolute: + case FloatSquareRoot: + case FloatNegate: + case Float2Float: + case Float2Int: + case Int2Float: + *thunk = true; + break; + + default: + break; + } + } + + virtual void planDestination + (BinaryOperation op, + unsigned, uint8_t, uint64_t, + unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask) + { + *bTypeMask = (1 << RegisterOperand) | (1 << MemoryOperand); + *bRegisterMask = ~static_cast(0); + + switch (op) { + case Negate: *bTypeMask = (1 << RegisterOperand); break; @@ -1630,12 +1867,30 @@ class MyArchitecture: public Assembler::Architecture { } } - virtual void plan + virtual void planMove + (unsigned, uint8_t* srcTypeMask, uint64_t* srcRegisterMask, + uint8_t* tmpTypeMask, uint64_t* tmpRegisterMask, + uint8_t dstTypeMask, uint64_t) + { + *srcTypeMask = ~0; + *srcRegisterMask = ~static_cast(0); + + *tmpTypeMask = 0; + *tmpRegisterMask = 0; + + if (dstTypeMask & (1 << MemoryOperand)) { + // can't move directly from memory or constant to memory + *srcTypeMask = 1 << RegisterOperand; + *tmpTypeMask = 1 << RegisterOperand; + *tmpRegisterMask = ~static_cast(0); + } + } + + virtual void planSource (TernaryOperation op, unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask, - unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask, - bool* thunk) + unsigned, bool* thunk) { *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); *aRegisterMask = ~static_cast(0); @@ -1657,22 +1912,44 @@ class MyArchitecture: public Assembler::Architecture { *aTypeMask = *bTypeMask = (1 << RegisterOperand); break; - case LongCompare: - *bTypeMask = (1 << RegisterOperand); - break; - case Divide: case Remainder: - *bTypeMask = ~0; + case FloatAdd: + case FloatSubtract: + case FloatMultiply: + case FloatDivide: + case FloatRemainder: + case JumpIfFloatEqual: + case JumpIfFloatNotEqual: + case JumpIfFloatLess: + case JumpIfFloatGreater: + case JumpIfFloatLessOrEqual: + case JumpIfFloatGreaterOrEqual: + case JumpIfFloatLessOrUnordered: + case JumpIfFloatGreaterOrUnordered: + case JumpIfFloatLessOrEqualOrUnordered: + case JumpIfFloatGreaterOrEqualOrUnordered: *thunk = true; break; default: break; } + } - *cTypeMask = *bTypeMask; - *cRegisterMask = *bRegisterMask; + virtual void planDestination + (TernaryOperation op, + unsigned, uint8_t, uint64_t, + unsigned, uint8_t, const uint64_t, + unsigned, uint8_t* cTypeMask, uint64_t* cRegisterMask) + { + if (isBranch(op)) { + *cTypeMask = (1 << ConstantOperand); + *cRegisterMask = 0; + } else { + *cTypeMask = (1 << RegisterOperand); + *cRegisterMask = ~static_cast(0); + } } virtual void acquire() { @@ -1874,30 +2151,40 @@ class MyAssembler: public Assembler { virtual void apply(UnaryOperation op, unsigned aSize, OperandType aType, Operand* aOperand) { - arch_->c.unaryOperations[index(op, aType)](&c, aSize, aOperand); + arch_->c.unaryOperations[index(&(arch_->c), op, aType)] + (&c, aSize, aOperand); } virtual void apply(BinaryOperation op, unsigned aSize, OperandType aType, Operand* aOperand, unsigned bSize, OperandType bType, Operand* bOperand) { - arch_->c.binaryOperations[index(op, aType, bType)] + arch_->c.binaryOperations[index(&(arch_->c), op, aType, bType)] (&c, aSize, aOperand, bSize, bOperand); } virtual void apply(TernaryOperation op, - unsigned, OperandType aType, Operand* aOperand, + unsigned aSize, OperandType aType, Operand* aOperand, unsigned bSize, OperandType bType UNUSED, Operand* bOperand, unsigned cSize UNUSED, OperandType cType UNUSED, Operand* cOperand) { - assert(&c, bSize == cSize); - assert(&c, bType == RegisterOperand); - assert(&c, cType == RegisterOperand); + if (isBranch(op)) { + assert(&c, aSize == bSize); + assert(&c, cSize == BytesPerWord); + assert(&c, cType == ConstantOperand); - arch_->c.ternaryOperations[index(op, aType)] - (&c, bSize, aOperand, bOperand, cOperand); + arch_->c.branchOperations[branchIndex(&(arch_->c), aType, bType)] + (&c, op, aSize, aOperand, bOperand, cOperand); + } else { + assert(&c, bSize == cSize); + assert(&c, bType == RegisterOperand); + assert(&c, cType == RegisterOperand); + + arch_->c.ternaryOperations[index(&(arch_->c), op, aType)] + (&c, bSize, aOperand, bOperand, cOperand); + } } virtual void writeTo(uint8_t* dst) { @@ -1907,9 +2194,21 @@ class MyAssembler: public Assembler { memcpy(dst + b->start, c.code.data + b->offset, b->size); } + unsigned index = c.code.length(); + assert(&c, index % BytesPerWord == 0); + for (ConstantPoolEntry* e = c.constantPool; e; e = e->next) { + e->address = dst + index; + index += BytesPerWord; + } + for (Task* t = c.tasks; t; t = t->next) { t->run(&c); } + + for (ConstantPoolEntry* e = c.constantPool; e; e = e->next) { + *static_cast(e->address) = e->constant->value(); +// fprintf(stderr, "constant %p at %p\n", reinterpret_cast(e->constant->value()), e->address); + } } virtual Promise* offset() { @@ -1932,6 +2231,10 @@ class MyAssembler: public Assembler { return c.code.length(); } + virtual unsigned scratchSize() { + return c.constantPoolCount * BytesPerWord; + } + virtual void dispose() { c.code.dispose(); } @@ -1945,7 +2248,7 @@ class MyAssembler: public Assembler { namespace vm { Assembler::Architecture* -makeArchitecture(System* system) +makeArchitecture(System* system, bool) { return new (allocate(system, sizeof(MyArchitecture))) MyArchitecture(system); } diff --git a/src/arm.h b/src/arm.h index b7b844685d..f510d3e960 100644 --- a/src/arm.h +++ b/src/arm.h @@ -27,7 +27,7 @@ namespace vm { inline void trap() { - asm("nop"); + asm("bkpt"); } inline void diff --git a/src/compile-arm.S b/src/compile-arm.S new file mode 100644 index 0000000000..5ddf7140f2 --- /dev/null +++ b/src/compile-arm.S @@ -0,0 +1,110 @@ +/* Copyright (c) 2010, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#ifdef AVIAN_CONTINUATIONS +# error "Continuations not yet supported on ARM port" +#endif + +#include "types.h" + +.text + +#define BYTES_PER_WORD 4 + +#define LOCAL(x) L##x + +#ifdef __APPLE__ +# define GLOBAL(x) _##x +#else +# define GLOBAL(x) x +#endif + +.globl GLOBAL(vmInvoke) +GLOBAL(vmInvoke): + /* + arguments + r0 : thread + r1 : function + r2 : arguments + r3 : argumentFootprint + [sp, #0] : frameSize (not used) + [sp, #4] : returnType + */ + + // save stack frame + mov ip, sp + + // save all non-volatile registers + stmfd sp!, {r4-r11, lr} + + // save return type + ldr r4, [ip, #4] + str r4, [sp, #-4]! + + // we're at the bottom of our local stack frame; save it + mov ip, sp + + // align stack, if necessary + eor r4, sp, r3 + tst r4, #4 + subne sp, sp, #4 + + // copy arguments into place + sub sp, r3 + mov r4, #0 + b LOCAL(vmInvoke_argumentTest) + +LOCAL(vmInvoke_argumentLoop): + ldr r5, [r2, r4] + str r5, [sp, r4] + add r4, r4, #BYTES_PER_WORD + +LOCAL(vmInvoke_argumentTest): + cmp r4, r3 + blt LOCAL(vmInvoke_argumentLoop) + + // save the beginning of our stack frame + str ip, [sp, #-8]! + + // we use ip (r12) to hold the thread pointer, by convention + mov ip, r0 + + // load and call function address + blx r1 + +LOCAL(vmInvoke_returnAddress): + // restore stack pointer + ldr sp, [sp] + + // restore return type + ldr ip, [sp] + + // restore callee-saved registers + ldmfd sp!, {r4-r11, lr} + +LOCAL(vmInvoke_void): + cmp ip, #VOID_TYPE + beq LOCAL(vmInvoke_return) + +LOCAL(vmInvoke_int64): + cmp ip, #INT64_TYPE + beq LOCAL(vmInvoke_return) + +LOCAL(vmInvoke_int32): + mov r1, #0 + +LOCAL(vmInvoke_return): + bx lr + +.globl GLOBAL(vmJumpAndInvoke) +GLOBAL(vmJumpAndInvoke): + // vmJumpAndInvoke should only be called when continuations are + // enabled + bkpt From 5c00cfac6fe349688042668a99901866aabb2def Mon Sep 17 00:00:00 2001 From: jet Date: Tue, 24 Aug 2010 17:59:01 -0600 Subject: [PATCH 02/23] Incomplete debugging of "Hello World!" on ARM. --- src/arm.cpp | 46 +++++++++++++++++++++++++--------------------- src/compile-arm.S | 5 ++++- src/compile.cpp | 2 +- src/compiler.cpp | 4 ++-- 4 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index 387c4ee688..b6968127dd 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -30,9 +30,9 @@ inline int DATA(int cond, int opcode, int S, int Rn, int Rd, int shift, int Sh, inline int DATAS(int cond, int opcode, int S, int Rn, int Rd, int Rs, int Sh, int Rm) { return cond<<28 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | Rs<<8 | Sh<<5 | 1<<4 | Rm; } inline int DATAI(int cond, int opcode, int S, int Rn, int Rd, int rot, int imm) -{ return cond<<28 | 1<<25 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | rot<<8 | imm; } +{ return cond<<28 | 1<<25 | opcode<<21 | S<<20 | Rn<<16 | Rd<<12 | rot<<8 | (imm&0xff); } inline int BRANCH(int cond, int L, int offset) -{ return cond<<28 | 5<<25 | L<<24 | offset; } +{ return cond<<28 | 5<<25 | L<<24 | (offset&0xffffff); } inline int BRANCHX(int cond, int L, int Rm) { return cond<<28 | 0x4bffc<<6 | L<<5 | 1<<4 | Rm; } inline int MULTIPLY(int cond, int mul, int S, int Rd, int Rn, int Rs, int Rm) @@ -40,7 +40,7 @@ inline int MULTIPLY(int cond, int mul, int S, int Rd, int Rn, int Rs, int Rm) inline int XFER(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int shift, int Sh, int Rm) { return cond<<28 | 3<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | shift<<7 | Sh<<5 | Rm; } inline int XFERI(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, int offset) -{ return cond<<28 | 2<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | offset; } +{ return cond<<28 | 2<<25 | P<<24 | U<<23 | B<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | (offset&0xfff); } inline int XFER2(int cond, int P, int U, int W, int L, int Rn, int Rd, int S, int H, int Rm) { return cond<<28 | P<<24 | U<<23 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | 1<<7 | S<<6 | H<<5 | 1<<4 | Rm; } inline int XFER2I(int cond, int P, int U, int W, int L, int Rn, int Rd, int offsetH, int S, int H, int offsetL) @@ -48,9 +48,11 @@ inline int XFER2I(int cond, int P, int U, int W, int L, int Rn, int Rd, int offs inline int BLOCKXFER(int cond, int P, int U, int S, int W, int L, int Rn, int rlist) { return cond<<28 | 4<<25 | P<<24 | U<<23 | S<<22 | W<<21 | L<<20 | Rn<<16 | rlist; } inline int SWI(int cond, int imm) -{ return cond<<28 | 0x0f<<24 | imm; } +{ return cond<<28 | 0x0f<<24 | (imm&0xffffff); } inline int SWAP(int cond, int B, int Rn, int Rd, int Rm) { return cond<<28 | 1<<24 | B<<22 | Rn<<16 | Rd<<12 | 9<<4 | Rm; } +// FIELD CALCULATORS +inline int calcU(int imm) { return imm >= 0 ? 1 : 0; } // INSTRUCTIONS // The "cond" and "S" fields are set using the SETCOND() and SETS() functions inline int b(int offset) { return BRANCH(AL, 0, offset); } @@ -91,21 +93,21 @@ inline int umlal(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 5, 0, inline int smull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 6, 0, RdLo, RdHi, Rs, Rm); } inline int smlal(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 7, 0, RdLo, RdHi, Rs, Rm); } inline int ldr(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 0, 0, 1, Rn, Rd, 0, 0, Rm); } -inline int ldri(int Rd, int Rn, int imm) { return XFERI(AL, 1, 1, 0, 0, 1, Rn, Rd, imm); } +inline int ldri(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 0, 0, 1, Rn, Rd, abs(imm)); } inline int ldrb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 1, Rn, Rd, 0, 0, Rm); } -inline int ldrbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, 1, 1, 0, 1, Rn, Rd, imm); } +inline int ldrbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 1, Rn, Rd, abs(imm)); } inline int str(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 0, Rn, Rd, 0, 0, Rm); } -inline int stri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, 1, 0, W, 0, Rn, Rd, imm); } +inline int stri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 0, Rn, Rd, abs(imm)); } inline int strb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 0, Rn, Rd, 0, 0, Rm); } -inline int strbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, 1, 1, 0, 0, Rn, Rd, imm); } +inline int strbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 0, Rn, Rd, abs(imm)); } inline int ldrh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 0, 1, Rm); } -inline int ldrhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, 1, 0, 1, Rn, Rd, imm>>4 & 0xf, 0, 1, imm&0xf); } +inline int ldrhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); } inline int strh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 0, Rn, Rd, 0, 1, Rm); } -inline int strhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, 1, 0, 0, Rn, Rd, imm>>4 & 0xf, 0, 1, imm&0xf); } +inline int strhi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 0, Rn, Rd, abs(imm)>>4 & 0xf, 0, 1, abs(imm)&0xf); } inline int ldrsh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 1, Rm); } -inline int ldrshi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, 1, 0, 1, Rn, Rd, imm>>4 & 0xf, 1, 1, imm&0xf); } +inline int ldrshi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 1, abs(imm)&0xf); } inline int ldrsb(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 0, Rm); } -inline int ldrsbi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, 1, 0, 1, Rn, Rd, imm>>4 & 0xf, 1, 0, imm&0xf); } +inline int ldrsbi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 0, abs(imm)&0xf); } inline int ldmib(int Rn, int rlist) { return BLOCKXFER(AL, 1, 1, 0, 0, 1, Rn, rlist); } inline int ldmia(int Rn, int rlist) { return BLOCKXFER(AL, 0, 1, 0, 0, 1, Rn, rlist); } inline int stmib(int Rn, int rlist) { return BLOCKXFER(AL, 1, 1, 0, 0, 0, Rn, rlist); } @@ -165,6 +167,7 @@ const unsigned StackAlignmentInWords = StackAlignmentInBytes / BytesPerWord; const int StackRegister = 13; const int BaseRegister = 11; const int ThreadRegister = 12; +const int ProgramCounter = 15; class MyBlock: public Assembler::Block { public: @@ -317,8 +320,9 @@ bounded(int right, int left, int32_t v) void* updateOffset(System* s, uint8_t* instruction, bool conditional UNUSED, int64_t value) { - int32_t v = reinterpret_cast(value) - instruction; - + // ARM's PC is two words ahead, and branches drop the bottom 2 bits. + int32_t v = (reinterpret_cast(value) - (instruction + 8)) >> 2; + int32_t mask; expect(s, bounded(0, 8, v)); mask = 0xFFFFFF; @@ -455,7 +459,7 @@ void shiftLeftR(Context* con, unsigned size, Assembler::Register* a, Assembler:: emit(con, lsl(t->low, b->low, a->low)); } -void shiftLeftC(Context* con, unsigned, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) +void shiftLeftC(Context* con, unsigned size UNUSED, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { assert(con, size == BytesPerWord); emit(con, lsli(t->low, b->low, getValue(a))); @@ -480,7 +484,7 @@ void shiftRightR(Context* con, unsigned size, Assembler::Register* a, Assembler: } } -void shiftRightC(Context* con, unsigned, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) +void shiftRightC(Context* con, unsigned size UNUSED, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { assert(con, size == BytesPerWord); emit(con, asri(t->low, b->low, getValue(a))); @@ -502,7 +506,7 @@ void unsignedShiftRightR(Context* con, unsigned size, Assembler::Register* a, As } } -void unsignedShiftRightC(Context* con, unsigned, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) +void unsignedShiftRightC(Context* con, unsigned size UNUSED, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { assert(con, size == BytesPerWord); emit(con, lsri(t->low, b->low, getValue(a))); @@ -1450,7 +1454,7 @@ longJumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) { assert(c, size == BytesPerWord); - Assembler::Register tmp(0); + Assembler::Register tmp(5); // a non-arg reg that we don't mind clobbering moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp, 12); jumpR(c, BytesPerWord, &tmp); } @@ -1638,7 +1642,7 @@ class MyArchitecture: public Assembler::Architecture { } virtual uint32_t generalRegisterMask() { - return 0xFFFFFFFF; + return 0xFFFF; } virtual uint32_t floatRegisterMask() { @@ -1674,14 +1678,14 @@ class MyArchitecture: public Assembler::Architecture { } virtual uintptr_t maximumImmediateJump() { - return 0x7FFFFF; + return 0x1FFFFFF; } virtual bool reserved(int register_) { switch (register_) { case StackRegister: case ThreadRegister: - case 15: + case ProgramCounter: return true; default: diff --git a/src/compile-arm.S b/src/compile-arm.S index 5ddf7140f2..ca70ca8fd2 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -79,10 +79,13 @@ LOCAL(vmInvoke_argumentTest): // load and call function address blx r1 -LOCAL(vmInvoke_returnAddress): +.globl GLOBAL(vmInvoke_returnAddress) +GLOBAL(vmInvoke_returnAddress): // restore stack pointer ldr sp, [sp] +.globl GLOBAL(vmInvoke_safeStack) +GLOBAL(vmInvoke_safeStack): // restore return type ldr ip, [sp] diff --git a/src/compile.cpp b/src/compile.cpp index b84d2ae6e8..fe193d00d7 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -40,7 +40,7 @@ namespace { namespace local { -const bool DebugCompile = false; +const bool DebugCompile = true; const bool DebugNatives = false; const bool DebugCallTable = false; const bool DebugMethodTree = false; diff --git a/src/compiler.cpp b/src/compiler.cpp index 44217336a2..84860cea2a 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -19,8 +19,8 @@ namespace local { const bool DebugAppend = false; const bool DebugCompile = false; -const bool DebugResources = false; -const bool DebugFrame = false; +const bool DebugResources = true; +const bool DebugFrame = true; const bool DebugControl = false; const bool DebugReads = false; const bool DebugSites = false; From f740570ff671d39305238b81fab91f01726e31ed Mon Sep 17 00:00:00 2001 From: jet Date: Fri, 27 Aug 2010 18:52:33 -0600 Subject: [PATCH 03/23] Further debugging of ARM "Hello World!" JIT functionality. --- src/arm.S | 6 +- src/arm.cpp | 157 ++++++++++++++-------------------------------- src/compile-arm.S | 28 +++++++-- src/compiler.cpp | 4 +- 4 files changed, 74 insertions(+), 121 deletions(-) diff --git a/src/arm.S b/src/arm.S index 9df1b14ac3..fe068824b0 100644 --- a/src/arm.S +++ b/src/arm.S @@ -50,7 +50,9 @@ vmNativeCall: .globl vmJump vmJump: + mov lr, r0 + ldr r0, [sp] + ldr r1, [sp, #4] mov sp, r2 - mov r4, r3 - ldmia sp, {r0,r1} + mov r8, r3 bx lr diff --git a/src/arm.cpp b/src/arm.cpp index b6968127dd..564e3b9f23 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -68,10 +68,10 @@ inline int add(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, inline int adc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x5, 0, Rn, Rd, shift, Sh, Rm); } inline int sbc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x6, 0, Rn, Rd, shift, Sh, Rm); } inline int rsc(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x7, 0, Rn, Rd, shift, Sh, Rm); } -inline int tst(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x8, 0, Rn, 0, shift, Sh, Rm); } -inline int teq(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x9, 0, Rn, 0, shift, Sh, Rm); } -inline int cmp(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xa, 0, Rn, 0, shift, Sh, Rm); } -inline int cmn(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xb, 0, Rn, 0, shift, Sh, Rm); } +inline int tst(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x8, 1, Rn, 0, shift, Sh, Rm); } +inline int teq(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0x9, 1, Rn, 0, shift, Sh, Rm); } +inline int cmp(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xa, 1, Rn, 0, shift, Sh, Rm); } +inline int cmn(int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xb, 1, Rn, 0, shift, Sh, Rm); } inline int orr(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xc, 0, Rn, Rd, shift, Sh, Rm); } inline int mov(int Rd, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xd, 0, 0, Rd, shift, Sh, Rm); } inline int bic(int Rd, int Rn, int Rm, int Sh=0, int shift=0) { return DATA(AL, 0xe, 0, Rn, Rd, shift, Sh, Rm); } @@ -82,7 +82,7 @@ inline int subi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x2, 0, R inline int rsbi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x3, 0, Rn, Rd, rot, imm); } inline int addi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x4, 0, Rn, Rd, rot, imm); } inline int adci(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x5, 0, Rn, Rd, rot, imm); } -inline int cmpi(int Rn, int imm, int rot=0) { return DATAI(AL, 0x0, 0, Rn, 0, rot, imm); } +inline int cmpi(int Rn, int imm, int rot=0) { return DATAI(AL, 0xa, 1, Rn, 0, rot, imm); } inline int orri(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0xc, 0, Rn, Rd, rot, imm); } inline int movi(int Rd, int imm, int rot=0) { return DATAI(AL, 0xd, 0, 0, Rd, rot, imm); } inline int movsh(int Rd, int Rm, int Rs, int Sh) { return DATAS(AL, 0xd, 0, 0, Rd, Rs, Sh, Rm); } @@ -92,8 +92,8 @@ inline int umull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 4, 0, inline int umlal(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 5, 0, RdLo, RdHi, Rs, Rm); } inline int smull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 6, 0, RdLo, RdHi, Rs, Rm); } inline int smlal(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 7, 0, RdLo, RdHi, Rs, Rm); } -inline int ldr(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 0, 0, 1, Rn, Rd, 0, 0, Rm); } -inline int ldri(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 0, 0, 1, Rn, Rd, abs(imm)); } +inline int ldr(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 1, Rn, Rd, 0, 0, Rm); } +inline int ldri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 1, Rn, Rd, abs(imm)); } inline int ldrb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 1, Rn, Rd, 0, 0, Rm); } inline int ldrbi(int Rd, int Rn, int imm) { return XFERI(AL, 1, calcU(imm), 1, 0, 1, Rn, Rd, abs(imm)); } inline int str(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 0, Rn, Rd, 0, 0, Rm); } @@ -108,10 +108,9 @@ inline int ldrsh(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, inline int ldrshi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 1, abs(imm)&0xf); } inline int ldrsb(int Rd, int Rn, int Rm) { return XFER2(AL, 1, 1, 0, 1, Rn, Rd, 1, 0, Rm); } inline int ldrsbi(int Rd, int Rn, int imm) { return XFER2I(AL, 1, calcU(imm), 0, 1, Rn, Rd, abs(imm)>>4 & 0xf, 1, 0, abs(imm)&0xf); } -inline int ldmib(int Rn, int rlist) { return BLOCKXFER(AL, 1, 1, 0, 0, 1, Rn, rlist); } -inline int ldmia(int Rn, int rlist) { return BLOCKXFER(AL, 0, 1, 0, 0, 1, Rn, rlist); } -inline int stmib(int Rn, int rlist) { return BLOCKXFER(AL, 1, 1, 0, 0, 0, Rn, rlist); } -inline int stmdb(int Rn, int rlist) { return BLOCKXFER(AL, 1, 0, 0, 0, 0, Rn, rlist); } +inline int pop(int Rd) { return XFERI(AL, 0, 1, 0, 0, 1, 13, Rd, 4); } +inline int ldmfd(int Rn, int rlist) { return BLOCKXFER(AL, 0, 1, 0, 1, 1, Rn, rlist); } +inline int stmfd(int Rn, int rlist) { return BLOCKXFER(AL, 1, 0, 0, 1, 0, Rn, rlist); } inline int swp(int Rd, int Rm, int Rn) { return SWAP(AL, 0, Rn, Rd, Rm); } inline int swpb(int Rd, int Rm, int Rn) { return SWAP(AL, 1, Rn, Rd, Rm); } inline int SETCOND(int ins, int cond) { return ((ins&0x0fffffff) | (cond<<28)); } @@ -158,15 +157,18 @@ inline bool isInt24(intptr_t v) { return v == (v & 0xffffff); } inline bool isInt32(intptr_t v) { return v == static_cast(v); } inline int carry16(intptr_t v) { return static_cast(v) < 0 ? 1 : 0; } +inline bool isOfWidth(int i, int size) { return static_cast(i) >> size == 0; } + const unsigned FrameFooterSize = 2; const unsigned FrameHeaderSize = 0; const unsigned StackAlignmentInBytes = 8; const unsigned StackAlignmentInWords = StackAlignmentInBytes / BytesPerWord; -const int StackRegister = 13; +const int ThreadRegister = 8; const int BaseRegister = 11; -const int ThreadRegister = 12; +const int StackRegister = 13; +const int LinkRegister = 14; const int ProgramCounter = 15; class MyBlock: public Assembler::Block { @@ -711,11 +713,11 @@ moveCR2(Context* c, unsigned, Assembler::Constant* src, if (src->value->resolved()) { int32_t i = getValue(src); emit(c, movi(dst->low, lo8(i))); - if (!isInt8(i)) { + if (!isOfWidth(i, 8)) { emit(c, orri(dst->low, dst->low, hi8(i), 12)); - if (!isInt16(i)) { + if (!isOfWidth(i, 16)) { emit(c, orri(dst->low, dst->low, lo8(hi16(i)), 8)); - if (!isInt24(i)) { + if (!isOfWidth(i, 24)) { emit(c, orri(dst->low, dst->low, hi8(hi16(i)), 4)); } } @@ -755,11 +757,11 @@ void addC(Context* con, unsigned size, Assembler::Constant* a, Assembler::Regist int32_t i = getValue(a); if (i) { emit(con, addi(t->low, b->low, lo8(i))); - if (!isInt8(i)) { + if (!isOfWidth(i, 8)) { emit(con, addi(t->low, b->low, hi8(i), 12)); - if (!isInt16(i)) { + if (!isOfWidth(i, 16)) { emit(con, addi(t->low, b->low, lo8(hi16(i)), 8)); - if (!isInt24(i)) { + if (!isOfWidth(i, 24)) { emit(con, addi(t->low, b->low, hi8(hi16(i)), 4)); } } @@ -918,12 +920,12 @@ moveAndUpdateRM(Context* c, unsigned srcSize UNUSED, Assembler::Register* src, assert(c, dstSize == BytesPerWord); if (dst->index == NoRegister) { - emit(c, stri(src->low, dst->base, dst->offset, 1)); + emit(c, stri(src->low, dst->base, dst->offset, dst->offset ? 1 : 0)); } else { assert(c, dst->offset == 0); assert(c, dst->scale == 1); - emit(c, str(src->low, dst->base, dst->index, 1)); + emit(c, str(src->low, dst->base, dst->index, dst->offset ? 1 : 0)); } } @@ -1041,12 +1043,12 @@ andC(Context* con, unsigned size, Assembler::Constant* a, int32_t i = getValue(a); if (i) { - emit(con, andi(dst->low, b->low, lo8(i))); - emit(con, andi(dst->low, b->low, hi8(i), 12)); - emit(con, andi(dst->low, b->low, lo8(hi16(i)), 8)); - emit(con, andi(dst->low, b->low, hi8(hi16(i)), 4)); + Assembler::Register tmp(con->client->acquireTemporary()); + moveCR(con, size, a, size, &tmp); + andR(con, size, &tmp, b, dst); + con->client->releaseTemporary(tmp.low); } else { - moveRR(con, size, b, size, dst); + emit(con, mov(dst->low, 0)); } } @@ -1067,11 +1069,11 @@ orC(Context* con, unsigned size, Assembler::Constant* a, int32_t i = getValue(a); if (i) { emit(con, orri(dst->low, b->low, lo8(i))); - if (!isInt8(i)) { + if (!isOfWidth(i, 8)) { emit(con, orri(dst->low, b->low, hi8(i), 12)); - if (!isInt16(i)) { + if (!isOfWidth(i, 16)) { emit(con, orri(dst->low, b->low, lo8(hi16(i)), 8)); - if (!isInt24(i)) { + if (!isOfWidth(i, 24)) { emit(con, orri(dst->low, b->low, hi8(hi16(i)), 4)); } } @@ -1097,18 +1099,10 @@ xorC(Context* con, unsigned size, Assembler::Constant* a, int32_t i = getValue(a); if (i) { - emit(con, eori(dst->low, b->low, lo8(i))); - if (!isInt8(i)) { - emit(con, eori(dst->low, b->low, hi8(i), 12)); - if (!isInt16(i)) { - emit(con, eori(dst->low, b->low, lo8(hi16(i)), 8)); - if (!isInt24(i)) { - emit(con, eori(dst->low, b->low, hi8(hi16(i)), 4)); - } - } - } - } else { - moveRR(con, size, b, size, dst); + Assembler::Register tmp(con->client->acquireTemporary()); + moveCR(con, size, a, size, &tmp); + xorR(con, size, &tmp, b, dst); + con->client->releaseTemporary(tmp.low); } } @@ -1432,7 +1426,7 @@ longCallC(Context* c, unsigned size UNUSED, Assembler::Constant* target) { assert(c, size == BytesPerWord); - Assembler::Register tmp(0); + Assembler::Register tmp(4); moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp, 12); callR(c, BytesPerWord, &tmp); } @@ -1454,7 +1448,7 @@ longJumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) { assert(c, size == BytesPerWord); - Assembler::Register tmp(5); // a non-arg reg that we don't mind clobbering + Assembler::Register tmp(4); // a non-arg reg that we don't mind clobbering moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp, 12); jumpR(c, BytesPerWord, &tmp); } @@ -1480,66 +1474,10 @@ jumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) emit(c, b(0)); } -void -jumpIfEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* target) -{ - assert(c, size == BytesPerWord); - - appendOffsetTask(c, target->value, offset(c), true); - emit(c, SETCOND(b(0), EQ)); -} - -void -jumpIfNotEqualC(Context* c, unsigned size UNUSED, Assembler::Constant* target) -{ - assert(c, size == BytesPerWord); - - appendOffsetTask(c, target->value, offset(c), true); - emit(c, SETCOND(b(0), NE)); -} - -void -jumpIfGreaterC(Context* c, unsigned size UNUSED, Assembler::Constant* target) -{ - assert(c, size == BytesPerWord); - - appendOffsetTask(c, target->value, offset(c), true); - emit(c, SETCOND(b(0), GT)); -} - -void -jumpIfGreaterOrEqualC(Context* c, unsigned size UNUSED, - Assembler::Constant* target) -{ - assert(c, size == BytesPerWord); - - appendOffsetTask(c, target->value, offset(c), true); - emit(c, SETCOND(b(0), GE)); -} - -void -jumpIfLessC(Context* c, unsigned size UNUSED, Assembler::Constant* target) -{ - assert(c, size == BytesPerWord); - - appendOffsetTask(c, target->value, offset(c), true); - emit(c, SETCOND(b(0), LS)); -} - -void -jumpIfLessOrEqualC(Context* c, unsigned size UNUSED, - Assembler::Constant* target) -{ - assert(c, size == BytesPerWord); - - appendOffsetTask(c, target->value, offset(c), true); - emit(c, SETCOND(b(0), LE)); -} - void return_(Context* c) { - emit(c, mov(15, 14)); + emit(c, bx(LinkRegister)); } void @@ -1768,7 +1706,7 @@ class MyArchitecture: public Assembler::Architecture { } virtual void* frameIp(void* stack) { - return stack ? static_cast(stack)[2] : 0; + return stack ? static_cast(stack)[returnAddressOffset()] : 0; } virtual unsigned frameHeaderSize() { @@ -1776,7 +1714,7 @@ class MyArchitecture: public Assembler::Architecture { } virtual unsigned frameReturnAddressSize() { - return 1; + return 0; } virtual unsigned frameFooterSize() { @@ -2033,10 +1971,9 @@ class MyAssembler: public Assembler { } virtual void allocateFrame(unsigned footprint) { - Register returnAddress(0); - emit(&c, mov(returnAddress.low, 14)); + Register returnAddress(LinkRegister); - Memory returnAddressDst(StackRegister, 8); + Memory returnAddressDst(StackRegister, arch_->returnAddressOffset() * BytesPerWord); moveRM(&c, BytesPerWord, &returnAddress, BytesPerWord, &returnAddressDst); Register stack(StackRegister); @@ -2055,14 +1992,12 @@ class MyAssembler: public Assembler { virtual void popFrame() { Register stack(StackRegister); - Memory stackSrc(StackRegister, 0); + Memory stackSrc(StackRegister, arch_->framePointerOffset() * BytesPerWord); moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &stack); - Register returnAddress(0); - Memory returnAddressSrc(StackRegister, 8); + Register returnAddress(LinkRegister); + Memory returnAddressSrc(StackRegister, arch_->returnAddressOffset() * BytesPerWord); moveMR(&c, BytesPerWord, &returnAddressSrc, BytesPerWord, &returnAddress); - - emit(&c, mov(14, returnAddress.low)); } virtual void popFrameForTailCall(unsigned footprint, @@ -2076,7 +2011,7 @@ class MyAssembler: public Assembler { Memory returnAddressSrc(StackRegister, 8 + (footprint * BytesPerWord)); moveMR(&c, BytesPerWord, &returnAddressSrc, BytesPerWord, &tmp); - emit(&c, mov(14, tmp.low)); + emit(&c, mov(LinkRegister, tmp.low)); Memory stackSrc(StackRegister, footprint * BytesPerWord); moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &tmp); diff --git a/src/compile-arm.S b/src/compile-arm.S index ca70ca8fd2..3ac3310030 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -25,7 +25,14 @@ #else # define GLOBAL(x) x #endif - + +#define THREAD_STACK 2144 +#define THREAD_CONTINUATION 2148 +#define THREAD_EXCEPTION 44 +#define THREAD_EXCEPTION_STACK_ADJUSTMENT 2152 +#define THREAD_EXCEPTION_OFFSET 2156 +#define THREAD_EXCEPTION_HANDLER 2160 + .globl GLOBAL(vmInvoke) GLOBAL(vmInvoke): /* @@ -70,24 +77,33 @@ LOCAL(vmInvoke_argumentTest): cmp r4, r3 blt LOCAL(vmInvoke_argumentLoop) - // save the beginning of our stack frame + // save frame str ip, [sp, #-8]! - // we use ip (r12) to hold the thread pointer, by convention - mov ip, r0 + // we use r8 to hold the thread pointer, by convention + mov r8, r0 +.global GLOBAL(beforecall) +GLOBAL(beforecall): // load and call function address blx r1 +.global GLOBAL(aftercall) +GLOBAL(aftercall): .globl GLOBAL(vmInvoke_returnAddress) GLOBAL(vmInvoke_returnAddress): - // restore stack pointer + + // restore frame ldr sp, [sp] .globl GLOBAL(vmInvoke_safeStack) GLOBAL(vmInvoke_safeStack): + + mov ip, #0 + str ip, [r8, #THREAD_STACK] + // restore return type - ldr ip, [sp] + ldr ip, [sp], #4 // restore callee-saved registers ldmfd sp!, {r4-r11, lr} diff --git a/src/compiler.cpp b/src/compiler.cpp index 84860cea2a..44217336a2 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -19,8 +19,8 @@ namespace local { const bool DebugAppend = false; const bool DebugCompile = false; -const bool DebugResources = true; -const bool DebugFrame = true; +const bool DebugResources = false; +const bool DebugFrame = false; const bool DebugControl = false; const bool DebugReads = false; const bool DebugSites = false; From 56b59cef5c5c0d19bc988fb1d0d234bcc929b2ff Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 30 Aug 2010 16:16:02 +0100 Subject: [PATCH 04/23] use r6 instead of r0 in popFrameAndUpdateStackAndReturn This avoids clobbering the return value. --- src/arm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arm.cpp b/src/arm.cpp index 564e3b9f23..266f95ed97 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -2066,7 +2066,7 @@ class MyAssembler: public Assembler { { popFrame(); - Register tmp1(0); + Register tmp1(6); Memory stackSrc(StackRegister, 0); moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &tmp1); From b6a839950f2ff6a6a4352f4a16fda05352a8182f Mon Sep 17 00:00:00 2001 From: jet Date: Mon, 30 Aug 2010 16:13:10 -0600 Subject: [PATCH 05/23] Nine tests (including float and integer calculations) are now passing. --- src/arm.cpp | 107 +++++----------------------------------------------- 1 file changed, 10 insertions(+), 97 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index 266f95ed97..95e07bc9b6 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -157,6 +157,7 @@ inline bool isInt24(intptr_t v) { return v == (v & 0xffffff); } inline bool isInt32(intptr_t v) { return v == static_cast(v); } inline int carry16(intptr_t v) { return static_cast(v) < 0 ? 1 : 0; } +inline bool isOfWidth(long long i, int size) { return static_cast(i) >> size == 0; } inline bool isOfWidth(int i, int size) { return static_cast(i) >> size == 0; } const unsigned FrameFooterSize = 2; @@ -751,26 +752,6 @@ void addR(Context* con, unsigned size, Assembler::Register* a, Assembler::Regist } } -void addC(Context* con, unsigned size, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { - assert(con, size == BytesPerWord); - - int32_t i = getValue(a); - if (i) { - emit(con, addi(t->low, b->low, lo8(i))); - if (!isOfWidth(i, 8)) { - emit(con, addi(t->low, b->low, hi8(i), 12)); - if (!isOfWidth(i, 16)) { - emit(con, addi(t->low, b->low, lo8(hi16(i)), 8)); - if (!isOfWidth(i, 24)) { - emit(con, addi(t->low, b->low, hi8(hi16(i)), 4)); - } - } - } - } else { - moveRR(con, size, b, size, t); - } -} - void subR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { emit(con, SETS(rsb(t->low, a->low, b->low))); @@ -780,14 +761,6 @@ void subR(Context* con, unsigned size, Assembler::Register* a, Assembler::Regist } } -void subC(Context* c, unsigned size, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) { - assert(c, size == BytesPerWord); - - ResolvedPromise promise(- a->value->value()); - Assembler::Constant constant(&promise); - addC(c, size, &constant, b, t); -} - void multiplyR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { emit(con, mul(t->high, a->low, b->high)); @@ -835,8 +808,10 @@ normalize(Context* c, int offset, int index, unsigned scale, ResolvedPromise offsetPromise(offset); Assembler::Constant offsetConstant(&offsetPromise); - addC(c, BytesPerWord, &offsetConstant, - &untranslatedIndex, &normalizedIndex); + Assembler::Register tmp(c->client->acquireTemporary()); + moveCR(c, BytesPerWord, &offsetConstant, BytesPerWord, &tmp); + addR(c, BytesPerWord, &tmp, &untranslatedIndex, &normalizedIndex); + c->client->releaseTemporary(tmp.low); } return normalizedIndex.low; @@ -1035,23 +1010,6 @@ andR(Context* c, unsigned size, Assembler::Register* a, emit(c, and_(dst->low, a->low, b->low)); } -void -andC(Context* con, unsigned size, Assembler::Constant* a, - Assembler::Register* b, Assembler::Register* dst) -{ - assert(con, size == BytesPerWord); - - int32_t i = getValue(a); - if (i) { - Assembler::Register tmp(con->client->acquireTemporary()); - moveCR(con, size, a, size, &tmp); - andR(con, size, &tmp, b, dst); - con->client->releaseTemporary(tmp.low); - } else { - emit(con, mov(dst->low, 0)); - } -} - void orR(Context* c, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* dst) @@ -1060,29 +1018,6 @@ orR(Context* c, unsigned size, Assembler::Register* a, emit(c, orr(dst->low, a->low, b->low)); } -void -orC(Context* con, unsigned size, Assembler::Constant* a, - Assembler::Register* b, Assembler::Register* dst) -{ - assert(con, size == BytesPerWord); - - int32_t i = getValue(a); - if (i) { - emit(con, orri(dst->low, b->low, lo8(i))); - if (!isOfWidth(i, 8)) { - emit(con, orri(dst->low, b->low, hi8(i), 12)); - if (!isOfWidth(i, 16)) { - emit(con, orri(dst->low, b->low, lo8(hi16(i)), 8)); - if (!isOfWidth(i, 24)) { - emit(con, orri(dst->low, b->low, hi8(hi16(i)), 4)); - } - } - } - } else { - moveRR(con, size, b, size, dst); - } -} - void xorR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* dst) @@ -1091,21 +1026,6 @@ xorR(Context* con, unsigned size, Assembler::Register* a, emit(con, eor(dst->low, a->low, b->low)); } -void -xorC(Context* con, unsigned size, Assembler::Constant* a, - Assembler::Register* b, Assembler::Register* dst) -{ - assert(con, size == BytesPerWord); - - int32_t i = getValue(a); - if (i) { - Assembler::Register tmp(con->client->acquireTemporary()); - moveCR(con, size, a, size, &tmp); - xorR(con, size, &tmp, b, dst); - con->client->releaseTemporary(tmp.low); - } -} - void moveAR2(Context* c, unsigned srcSize, Assembler::Address* src, unsigned dstSize, Assembler::Register* dst, unsigned promiseOffset) @@ -1143,7 +1063,7 @@ compareCR(Context* c, unsigned aSize, Assembler::Constant* a, { assert(c, aSize == 4 and bSize == 4); - if (a->value->resolved() and isInt16(a->value->value())) { + if (a->value->resolved() and isOfWidth(a->value->value(), 8)) { emit(c, cmpi(b->low, a->value->value())); } else { Assembler::Register tmp(c->client->acquireTemporary()); @@ -1538,10 +1458,8 @@ populateTables(ArchitectureContext* c) bo[index(c, Negate, R, R)] = CAST2(negateRR); to[index(c, Add, R)] = CAST3(addR); - to[index(c, Add, C)] = CAST3(addC); to[index(c, Subtract, R)] = CAST3(subR); - to[index(c, Subtract, C)] = CAST3(subC); to[index(c, Multiply, R)] = CAST3(multiplyR); @@ -1554,13 +1472,10 @@ populateTables(ArchitectureContext* c) to[index(c, UnsignedShiftRight, R)] = CAST3(unsignedShiftRightR); to[index(c, UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC); - to[index(c, And, C)] = CAST3(andC); to[index(c, And, R)] = CAST3(andR); - to[index(c, Or, C)] = CAST3(orC); to[index(c, Or, R)] = CAST3(orR); - to[index(c, Xor, C)] = CAST3(xorC); to[index(c, Xor, R)] = CAST3(xorR); bro[branchIndex(c, R, R)] = CAST_BRANCH(branchRR); @@ -1830,7 +1745,7 @@ class MyArchitecture: public Assembler::Architecture { virtual void planSource (TernaryOperation op, - unsigned aSize, uint8_t* aTypeMask, uint64_t* aRegisterMask, + unsigned aSize UNUSED, uint8_t* aTypeMask, uint64_t* aRegisterMask, unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask, unsigned, bool* thunk) { @@ -1845,11 +1760,9 @@ class MyArchitecture: public Assembler::Architecture { switch (op) { case Add: case Subtract: - if (aSize == 8) { - *aTypeMask = *bTypeMask = (1 << RegisterOperand); - } - break; - + case And: + case Or: + case Xor: case Multiply: *aTypeMask = *bTypeMask = (1 << RegisterOperand); break; From b26dd4abf1e25127e5bd1542ccba5482145f6ccb Mon Sep 17 00:00:00 2001 From: jet Date: Tue, 31 Aug 2010 18:35:55 -0600 Subject: [PATCH 06/23] All but 6 tests are now passing in JIT mode on ARM. --- src/arm.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index 95e07bc9b6..43885f57ab 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -132,6 +132,8 @@ inline int blt(int offset) { return SETCOND(b(offset), LT); } inline int bgt(int offset) { return SETCOND(b(offset), GT); } inline int ble(int offset) { return SETCOND(b(offset), LE); } inline int bge(int offset) { return SETCOND(b(offset), GE); } +inline int blo(int offset) { return SETCOND(b(offset), CC); } +inline int bhs(int offset) { return SETCOND(b(offset), CS); } } const uint64_t MASK_LO32 = 0xffffffff; @@ -699,7 +701,7 @@ moveZRR(Context* c, unsigned srcSize, Assembler::Register* src, switch (srcSize) { case 2: emit(c, lsli(dst->low, src->low, 16)); - emit(c, lsri(dst->low, src->low, 16)); + emit(c, lsri(dst->low, dst->low, 16)); break; default: abort(c); @@ -745,7 +747,7 @@ moveCR(Context* c, unsigned srcSize, Assembler::Constant* src, void addR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { - emit(con, SETS(adc(t->low, a->low, b->low))); + emit(con, SETS(add(t->low, a->low, b->low))); emit(con, adc(t->high, a->high, b->high)); } else { emit(con, add(t->low, a->low, b->low)); @@ -1171,7 +1173,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, emit(c, bgt(0)); compareUnsigned(c, 4, al, 4, bl); - conditional(c, blt(0), target); + conditional(c, blo(0), target); break; case JumpIfGreater: @@ -1181,7 +1183,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, emit(c, blt(0)); compareUnsigned(c, 4, al, 4, bl); - conditional(c, bgt(0), target); + conditional(c, bhi(0), target); break; case JumpIfLessOrEqual: @@ -1191,7 +1193,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, emit(c, bgt(0)); compareUnsigned(c, 4, al, 4, bl); - conditional(c, ble(0), target); + conditional(c, bls(0), target); break; case JumpIfGreaterOrEqual: @@ -1201,7 +1203,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, emit(c, blt(0)); compareUnsigned(c, 4, al, 4, bl); - conditional(c, bge(0), target); + conditional(c, bhs(0), target); break; default: @@ -1745,8 +1747,8 @@ class MyArchitecture: public Assembler::Architecture { virtual void planSource (TernaryOperation op, - unsigned aSize UNUSED, uint8_t* aTypeMask, uint64_t* aRegisterMask, - unsigned, uint8_t* bTypeMask, uint64_t* bRegisterMask, + unsigned, uint8_t* aTypeMask, uint64_t* aRegisterMask, + unsigned bSize, uint8_t* bTypeMask, uint64_t* bRegisterMask, unsigned, bool* thunk) { *aTypeMask = (1 << RegisterOperand) | (1 << ConstantOperand); @@ -1758,6 +1760,12 @@ class MyArchitecture: public Assembler::Architecture { *thunk = false; switch (op) { + case ShiftLeft: + case ShiftRight: + case UnsignedShiftRight: + if (bSize == 8) *aTypeMask = *bTypeMask = (1 << RegisterOperand); + break; + case Add: case Subtract: case And: From a20d7e028b13788ce1abc9f7ed469115c665e19d Mon Sep 17 00:00:00 2001 From: jet Date: Thu, 2 Sep 2010 16:09:01 -0600 Subject: [PATCH 07/23] Longs.java test now progresses further before failure. --- src/arm.cpp | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index 43885f57ab..5551042148 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -88,10 +88,10 @@ inline int movi(int Rd, int imm, int rot=0) { return DATAI(AL, 0xd, 0, 0, Rd, ro inline int movsh(int Rd, int Rm, int Rs, int Sh) { return DATAS(AL, 0xd, 0, 0, Rd, Rs, Sh, Rm); } inline int mul(int Rd, int Rm, int Rs) { return MULTIPLY(AL, 0, 0, Rd, 0, Rs, Rm); } inline int mla(int Rd, int Rm, int Rs, int Rn) { return MULTIPLY(AL, 1, 0, Rd, Rn, Rs, Rm); } -inline int umull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 4, 0, RdLo, RdHi, Rs, Rm); } -inline int umlal(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 5, 0, RdLo, RdHi, Rs, Rm); } -inline int smull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 6, 0, RdLo, RdHi, Rs, Rm); } -inline int smlal(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 7, 0, RdLo, RdHi, Rs, Rm); } +inline int umull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 4, 0, RdHi, RdLo, Rs, Rm); } +inline int umlal(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 5, 0, RdHi, RdLo, Rs, Rm); } +inline int smull(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 6, 0, RdHi, RdLo, Rs, Rm); } +inline int smlal(int RdLo, int RdHi, int Rm, int Rs) { return MULTIPLY(AL, 7, 0, RdHi, RdLo, Rs, Rm); } inline int ldr(int Rd, int Rn, int Rm, int W=0) { return XFER(AL, 1, 1, 0, W, 1, Rn, Rd, 0, 0, Rm); } inline int ldri(int Rd, int Rn, int imm, int W=0) { return XFERI(AL, 1, calcU(imm), 0, W, 1, Rn, Rd, abs(imm)); } inline int ldrb(int Rd, int Rn, int Rm) { return XFER(AL, 1, 1, 1, 0, 1, Rn, Rd, 0, 0, Rm); } @@ -479,8 +479,8 @@ void shiftRightR(Context* con, unsigned size, Assembler::Register* a, Assembler: emit(con, lsl(tmpLo, b->high, tmpHi)); emit(con, orr(t->low, t->low, tmpLo)); emit(con, SETS(addi(tmpHi, a->low, -32))); - emit(con, asr(tmpLo, b->high, tmpHi)); - emit(con, SETCOND(::b(8), LE)); + emit(con, SETS(asr(tmpLo, b->high, tmpHi))); + emit(con, ble(4)); emit(con, orri(t->low, tmpLo, 0)); emit(con, asr(t->high, b->high, a->low)); freeTemp(con, tmpHi); freeTemp(con, tmpLo); @@ -765,9 +765,20 @@ void subR(Context* con, unsigned size, Assembler::Register* a, Assembler::Regist void multiplyR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { - emit(con, mul(t->high, a->low, b->high)); - emit(con, mla(t->high, a->high, b->low, t->high)); - emit(con, smlal(t->low, t->high, a->low, b->low)); + bool useTemporaries = b->low == t->low; + int tmpLow = useTemporaries ? con->client->acquireTemporary() : t->low; + int tmpHigh = useTemporaries ? con->client->acquireTemporary() : t->high; + + emit(con, umull(tmpLow, tmpHigh, a->low, b->low)); + emit(con, mla(tmpHigh, a->low, b->high, tmpHigh)); + emit(con, mla(tmpHigh, a->high, b->low, tmpHigh)); + + if (useTemporaries) { + emit(con, mov(t->low, tmpLow)); + emit(con, mov(t->high, tmpHigh)); + con->client->releaseTemporary(tmpLow); + con->client->releaseTemporary(tmpHigh); + } } else { emit(con, mul(t->low, a->low, b->low)); } From bd01784249bb5b0e4f500b560ea71727f841c9b9 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Fri, 3 Sep 2010 00:18:19 +0100 Subject: [PATCH 08/23] save return address in arm.cpp's MyAssembler::saveFrame This is necessary to allow safe stack unwinding (e.g. for exception handling and garbage collection) from native code. --- src/arm.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/arm.cpp b/src/arm.cpp index 5551042148..7bb209f16e 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -1856,6 +1856,11 @@ class MyAssembler: public Assembler { } virtual void saveFrame(unsigned stackOffset, unsigned) { + Register returnAddress(LinkRegister); + Memory returnAddressDst + (StackRegister, arch_->returnAddressOffset() * BytesPerWord); + moveRM(&c, BytesPerWord, &returnAddress, BytesPerWord, &returnAddressDst); + Register stack(StackRegister); Memory stackDst(ThreadRegister, stackOffset); moveRM(&c, BytesPerWord, &stack, BytesPerWord, &stackDst); From dd0a696932ab7c6eb004235958efe42cccc9c0ea Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Fri, 3 Sep 2010 18:32:22 +0100 Subject: [PATCH 09/23] handle logical AND with a constant in a single instruction where possible --- src/arm.cpp | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/src/arm.cpp b/src/arm.cpp index 7bb209f16e..2014c9fd40 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -82,6 +82,7 @@ inline int subi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x2, 0, R inline int rsbi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x3, 0, Rn, Rd, rot, imm); } inline int addi(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x4, 0, Rn, Rd, rot, imm); } inline int adci(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0x5, 0, Rn, Rd, rot, imm); } +inline int bici(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0xe, 0, Rn, Rd, rot, imm); } inline int cmpi(int Rn, int imm, int rot=0) { return DATAI(AL, 0xa, 1, Rn, 0, rot, imm); } inline int orri(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0xc, 0, Rn, Rd, rot, imm); } inline int movi(int Rd, int imm, int rot=0) { return DATAI(AL, 0xd, 0, 0, Rd, rot, imm); } @@ -1023,6 +1024,54 @@ andR(Context* c, unsigned size, Assembler::Register* a, emit(c, and_(dst->low, a->low, b->low)); } +void +andC(Context* c, unsigned size, Assembler::Constant* a, + Assembler::Register* b, Assembler::Register* dst) +{ + int64_t v = a->value->value(); + + if (size == 8) { + ResolvedPromise high((v >> 32) & 0xFFFFFFFF); + Assembler::Constant ah(&high); + + ResolvedPromise low(v & 0xFFFFFFFF); + Assembler::Constant al(&low); + + Assembler::Register bh(b->high); + Assembler::Register dh(dst->high); + + andC(c, 4, &al, b, dst); + andC(c, 4, &ah, &bh, &dh); + } else { + uint32_t v32 = static_cast(v); + if (v32 != 0xFFFFFFFF) { + if ((v32 & 0xFFFFFF00) == 0xFFFFFF00) { + emit(c, bici(dst->low, b->low, (~(v32 & 0xFF)) & 0xFF)); + } else if ((v32 & 0xFFFFFF00) == 0) { + emit(c, andi(dst->low, b->low, v32 & 0xFF)); + } else { + // todo: there are other cases we can handle in one + // instruction + + bool useTemporary = b->low == dst->low; + Assembler::Register tmp(dst->low); + if (useTemporary) { + tmp.low = c->client->acquireTemporary(); + } + + moveCR(c, 4, a, 4, &tmp); + andR(c, 4, b, &tmp, dst); + + if (useTemporary) { + c->client->releaseTemporary(tmp.low); + } + } + } else { + moveRR(c, size, b, size, dst); + } + } +} + void orR(Context* c, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* dst) @@ -1486,6 +1535,7 @@ populateTables(ArchitectureContext* c) to[index(c, UnsignedShiftRight, C)] = CAST3(unsignedShiftRightC); to[index(c, And, R)] = CAST3(andR); + to[index(c, And, C)] = CAST3(andC); to[index(c, Or, R)] = CAST3(orR); @@ -1779,7 +1829,6 @@ class MyArchitecture: public Assembler::Architecture { case Add: case Subtract: - case And: case Or: case Xor: case Multiply: From a1f5456451ef2caf3990a5aa5257566a87634eae Mon Sep 17 00:00:00 2001 From: jet Date: Fri, 3 Sep 2010 12:52:11 -0600 Subject: [PATCH 10/23] All tests passing for ARM port in JIT mode. --- src/arm.cpp | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index 2014c9fd40..6fca682e8f 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -86,6 +86,7 @@ inline int bici(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0xe, 0, R inline int cmpi(int Rn, int imm, int rot=0) { return DATAI(AL, 0xa, 1, Rn, 0, rot, imm); } inline int orri(int Rd, int Rn, int imm, int rot=0) { return DATAI(AL, 0xc, 0, Rn, Rd, rot, imm); } inline int movi(int Rd, int imm, int rot=0) { return DATAI(AL, 0xd, 0, 0, Rd, rot, imm); } +inline int orrsh(int Rd, int Rn, int Rm, int Rs, int Sh) { return DATAS(AL, 0xc, 0, Rn, Rd, Rs, Sh, Rm); } inline int movsh(int Rd, int Rm, int Rs, int Sh) { return DATAS(AL, 0xd, 0, 0, Rd, Rs, Sh, Rm); } inline int mul(int Rd, int Rm, int Rs) { return MULTIPLY(AL, 0, 0, Rd, 0, Rs, Rm); } inline int mla(int Rd, int Rm, int Rs, int Rn) { return MULTIPLY(AL, 1, 0, Rd, Rn, Rs, Rm); } @@ -452,17 +453,18 @@ inline int64_t getValue(Assembler::Constant* c) { return c->value->value(); } void shiftLeftR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { - int tmpHi = newTemp(con), tmpLo = newTemp(con); - emit(con, SETS(rsbi(tmpHi, a->low, 32))); - emit(con, lsl(t->high, b->high, a->low)); - emit(con, lsr(tmpLo, b->low, tmpHi)); - emit(con, orr(t->high, t->high, tmpLo)); - emit(con, addi(tmpHi, a->low, -32)); - emit(con, lsl(tmpLo, b->low, tmpHi)); - emit(con, orr(t->high, t->high, tmpLo)); - freeTemp(con, tmpHi); freeTemp(con, tmpLo); + int tmp1 = newTemp(con), tmp2 = newTemp(con); + emit(con, lsl(tmp1, b->high, a->low)); + emit(con, rsbi(tmp2, a->low, 32)); + emit(con, orrsh(tmp1, tmp1, b->low, tmp2, LSR)); + emit(con, SETS(subi(t->high, a->low, 32))); + emit(con, SETCOND(mov(t->high, tmp1), MI)); + emit(con, SETCOND(lsl(t->high, b->low, t->high), PL)); + emit(con, lsl(t->low, b->low, a->low)); + freeTemp(con, tmp1); freeTemp(con, tmp2); + } else { + emit(con, lsl(t->low, b->low, a->low)); } - emit(con, lsl(t->low, b->low, a->low)); } void shiftLeftC(Context* con, unsigned size UNUSED, Assembler::Constant* a, Assembler::Register* b, Assembler::Register* t) @@ -474,17 +476,15 @@ void shiftLeftC(Context* con, unsigned size UNUSED, Assembler::Constant* a, Asse void shiftRightR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { - int tmpHi = newTemp(con), tmpLo = newTemp(con); - emit(con, SETS(rsbi(tmpHi, a->low, 32))); - emit(con, lsr(t->low, b->low, a->low)); - emit(con, lsl(tmpLo, b->high, tmpHi)); - emit(con, orr(t->low, t->low, tmpLo)); - emit(con, SETS(addi(tmpHi, a->low, -32))); - emit(con, SETS(asr(tmpLo, b->high, tmpHi))); - emit(con, ble(4)); - emit(con, orri(t->low, tmpLo, 0)); + int tmp1 = newTemp(con), tmp2 = newTemp(con); + emit(con, lsr(tmp1, b->low, a->low)); + emit(con, rsbi(tmp2, a->low, 32)); + emit(con, orrsh(tmp1, tmp1, b->high, tmp2, LSL)); + emit(con, SETS(subi(t->low, a->low, 32))); + emit(con, SETCOND(mov(t->low, tmp1), MI)); + emit(con, SETCOND(asr(t->low, b->high, t->low), PL)); emit(con, asr(t->high, b->high, a->low)); - freeTemp(con, tmpHi); freeTemp(con, tmpLo); + freeTemp(con, tmp1); freeTemp(con, tmp2); } else { emit(con, asr(t->low, b->low, a->low)); } @@ -1076,7 +1076,7 @@ void orR(Context* c, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* dst) { - if (size == 8) orr(dst->high, a->high, b->high); + if (size == 8) emit(c, orr(dst->high, a->high, b->high)); emit(c, orr(dst->low, a->low, b->low)); } From 5d5dbd860b1b2cee5054b2c3d94bb4d73391e371 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 8 Nov 2010 00:41:44 +0000 Subject: [PATCH 11/23] fix ARM tails=true build This requires adding LinkRegister to the list of reserved registers, since it must be preserved in the thunk code generated by compileDirectInvoke. An alternative would be to explicitly preserve it in that special case, but that would complicate the code quite a bit. --- src/arm.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index 6fca682e8f..13dc298032 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -171,7 +171,6 @@ const unsigned StackAlignmentInBytes = 8; const unsigned StackAlignmentInWords = StackAlignmentInBytes / BytesPerWord; const int ThreadRegister = 8; -const int BaseRegister = 11; const int StackRegister = 13; const int LinkRegister = 14; const int ProgramCounter = 15; @@ -914,7 +913,7 @@ moveAndUpdateRM(Context* c, unsigned srcSize UNUSED, Assembler::Register* src, assert(c, dst->offset == 0); assert(c, dst->scale == 1); - emit(c, str(src->low, dst->base, dst->index, dst->offset ? 1 : 0)); + emit(c, str(src->low, dst->base, dst->index, 1)); } } @@ -1599,6 +1598,7 @@ class MyArchitecture: public Assembler::Architecture { virtual bool reserved(int register_) { switch (register_) { + case LinkRegister: case StackRegister: case ThreadRegister: case ProgramCounter: @@ -1624,7 +1624,7 @@ class MyArchitecture: public Assembler::Architecture { virtual int argumentRegister(unsigned index) { assert(&c, index < argumentRegisterCount()); - return index + 0; + return index; } virtual unsigned stackAlignmentInWords() { @@ -1968,7 +1968,7 @@ class MyAssembler: public Assembler { } virtual void adjustFrame(unsigned footprint) { - Register nextStack(0); + Register nextStack(5); Memory stackSrc(StackRegister, 0); moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &nextStack); @@ -1993,23 +1993,25 @@ class MyAssembler: public Assembler { { if (TailCalls) { if (offset) { - Register tmp(0); - Memory returnAddressSrc(StackRegister, 8 + (footprint * BytesPerWord)); - moveMR(&c, BytesPerWord, &returnAddressSrc, BytesPerWord, &tmp); + Register link(LinkRegister); + Memory returnAddressSrc + (StackRegister, BytesPerWord + (footprint * BytesPerWord)); + moveMR(&c, BytesPerWord, &returnAddressSrc, BytesPerWord, &link); - emit(&c, mov(LinkRegister, tmp.low)); - + Register tmp(c.client->acquireTemporary()); Memory stackSrc(StackRegister, footprint * BytesPerWord); moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &tmp); Memory stackDst(StackRegister, (footprint - offset) * BytesPerWord); moveAndUpdateRM(&c, BytesPerWord, &tmp, BytesPerWord, &stackDst); + c.client->releaseTemporary(tmp.low); + if (returnAddressSurrogate != NoRegister) { assert(&c, offset > 0); Register ras(returnAddressSurrogate); - Memory dst(StackRegister, 8 + (offset * BytesPerWord)); + Memory dst(StackRegister, BytesPerWord + (offset * BytesPerWord)); moveRM(&c, BytesPerWord, &ras, BytesPerWord, &dst); } @@ -2035,7 +2037,7 @@ class MyAssembler: public Assembler { assert(&c, (argumentFootprint % StackAlignmentInWords) == 0); if (TailCalls and argumentFootprint > StackAlignmentInWords) { - Register tmp(0); + Register tmp(5); Memory stackSrc(StackRegister, 0); moveMR(&c, BytesPerWord, &stackSrc, BytesPerWord, &tmp); From 36a8ba28e5513c147121342cea91d5f4588001b6 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 8 Nov 2010 04:15:31 +0000 Subject: [PATCH 12/23] disable debug logging in compile.cpp --- src/compile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compile.cpp b/src/compile.cpp index 7cf8cb700e..e5d7a7df2d 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -40,7 +40,7 @@ namespace { namespace local { -const bool DebugCompile = true; +const bool DebugCompile = false; const bool DebugNatives = false; const bool DebugCallTable = false; const bool DebugMethodTree = false; From 0f0427f23b4c6731104f8f41f8f96353eb425256 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 8 Nov 2010 04:18:10 +0000 Subject: [PATCH 13/23] implement continuations support for ARM --- src/compile-arm.S | 159 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 147 insertions(+), 12 deletions(-) diff --git a/src/compile-arm.S b/src/compile-arm.S index 3ac3310030..dcf1aec53b 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -8,17 +8,13 @@ There is NO WARRANTY for this software. See license.txt for details. */ -#ifdef AVIAN_CONTINUATIONS -# error "Continuations not yet supported on ARM port" -#endif - #include "types.h" .text #define BYTES_PER_WORD 4 -#define LOCAL(x) L##x +#define LOCAL(x) .L##x #ifdef __APPLE__ # define GLOBAL(x) _##x @@ -33,6 +29,15 @@ #define THREAD_EXCEPTION_OFFSET 2156 #define THREAD_EXCEPTION_HANDLER 2160 +#define CONTINUATION_NEXT 4 +#define CONTINUATION_ADDRESS 16 +#define CONTINUATION_RETURN_ADDRESS_OFFSET 20 +#define CONTINUATION_FRAME_POINTER_OFFSET 24 +#define CONTINUATION_LENGTH 28 +#define CONTINUATION_BODY 32 + +#define ARGUMENT_BASE (BYTES_PER_WORD * 2) + .globl GLOBAL(vmInvoke) GLOBAL(vmInvoke): /* @@ -83,12 +88,8 @@ LOCAL(vmInvoke_argumentTest): // we use r8 to hold the thread pointer, by convention mov r8, r0 -.global GLOBAL(beforecall) -GLOBAL(beforecall): // load and call function address blx r1 -.global GLOBAL(aftercall) -GLOBAL(aftercall): .globl GLOBAL(vmInvoke_returnAddress) GLOBAL(vmInvoke_returnAddress): @@ -99,6 +100,75 @@ GLOBAL(vmInvoke_returnAddress): .globl GLOBAL(vmInvoke_safeStack) GLOBAL(vmInvoke_safeStack): +#ifdef AVIAN_CONTINUATIONS + // call the next continuation, if any + ldr r5,[r8,#THREAD_CONTINUATION] + cmp r5,#0 + beq LOCAL(vmInvoke_exit) + + ldr r6,[r5,#CONTINUATION_LENGTH] + lsl r6,r6,#2 + neg r7,r6 + add r7,r7,#-80 + mov r4,sp + str r4,[sp,r7]! + + add r7,r5,#CONTINUATION_BODY + + mov r11,#0 + add r10,sp,#ARGUMENT_BASE + b LOCAL(vmInvoke_continuationTest) + +LOCAL(vmInvoke_continuationLoop): + ldr r9,[r7,r11] + str r9,[r10,r11] + add r11,r11,#4 + +LOCAL(vmInvoke_continuationTest): + cmp r11,r6 + ble LOCAL(vmInvoke_continuationLoop) + + ldr r7,[r5,#CONTINUATION_RETURN_ADDRESS_OFFSET] + ldr r10,LOCAL(vmInvoke_returnAddress_word) + ldr r11,LOCAL(vmInvoke_getAddress_word) +LOCAL(vmInvoke_getAddress): + add r11,pc,r11 + ldr r11,[r11,r10] + str r11,[sp,r7] + + ldr r7,[r5,#CONTINUATION_FRAME_POINTER_OFFSET] + ldr r11,[sp] + add r7,r7,sp + str r11,[r7] + str r7,[sp] + + ldr r7,[r5,#CONTINUATION_NEXT] + str r7,[r8,#THREAD_CONTINUATION] + + // call the continuation unless we're handling an exception + ldr r7,[r8,#THREAD_EXCEPTION] + cmp r7,#0 + bne LOCAL(vmInvoke_handleException) + ldr r7,[r5,#CONTINUATION_ADDRESS] + bx r7 + +LOCAL(vmInvoke_handleException): + // we're handling an exception - call the exception handler instead + mov r11,#0 + str r11,[r8,#THREAD_EXCEPTION] + ldr r11,[r8,#THREAD_EXCEPTION_STACK_ADJUSTMENT] + ldr r9,[sp] + neg r11,r11 + str r9,[sp,r11]! + ldr r11,[r8,#THREAD_EXCEPTION_OFFSET] + str r7,[sp,r11] + + ldr r7,[r8,#THREAD_EXCEPTION_HANDLER] + bx r7 + +LOCAL(vmInvoke_exit): +#endif // AVIAN_CONTINUATIONS + mov ip, #0 str ip, [r8, #THREAD_STACK] @@ -124,6 +194,71 @@ LOCAL(vmInvoke_return): .globl GLOBAL(vmJumpAndInvoke) GLOBAL(vmJumpAndInvoke): - // vmJumpAndInvoke should only be called when continuations are - // enabled - bkpt +#ifdef AVIAN_CONTINUATIONS + // r0: thread + // r1: address + // r2: (unused) + // r3: stack + // [sp,#0]: argumentFootprint + // [sp,#4]: arguments + // [sp,#8]: frameSize + + ldr r4,[sp] + ldr r5,[sp,#4] + ldr r6,[sp,#8] + + // restore (pseudo)-stack pointer (we don't want to touch the real + // stack pointer, since we haven't copied the arguments yet) + ldr r3,[r3] + + // make everything between sp and r3 one big stack frame while we + // shuffle things around + str r3,[sp] + + // allocate new frame, adding room for callee-saved registers + neg r10,r6 + add r10,r10,#-80 + mov r2,r3 + str r2,[r3,r10]! + + mov r8,r0 + + // copy arguments into place + mov r6,#0 + add r9,r3,#ARGUMENT_BASE + b LOCAL(vmJumpAndInvoke_argumentTest) + +LOCAL(vmJumpAndInvoke_argumentLoop): + ldr r12,[r5,r6] + str r12,[r9,r6] + add r6,r6,#4 + +LOCAL(vmJumpAndInvoke_argumentTest): + cmp r6,r4 + ble LOCAL(vmJumpAndInvoke_argumentLoop) + + // the arguments have been copied, so we can set the real stack + // pointer now + mov sp,r3 + + // set return address to vmInvoke_returnAddress + ldr r10,LOCAL(vmInvoke_returnAddress_word) + ldr r11,LOCAL(vmJumpAndInvoke_getAddress_word) +LOCAL(vmJumpAndInvoke_getAddress): + add r11,pc,r11 + ldr lr,[r11,r10] + + bx r1 +#else // not AVIAN_CONTINUATIONS + // vmJumpAndInvoke should only be called when continuations are + // enabled + bkpt +#endif // not AVIAN_CONTINUATIONS + +LOCAL(vmInvoke_returnAddress_word): + .word GLOBAL(vmInvoke_returnAddress)(GOT) +LOCAL(vmInvoke_getAddress_word): + .word _GLOBAL_OFFSET_TABLE_-(LOCAL(vmInvoke_getAddress)+8) +LOCAL(vmJumpAndInvoke_getAddress_word): + .word _GLOBAL_OFFSET_TABLE_-(LOCAL(vmJumpAndInvoke_getAddress)+8) + From 632e50efe61c6b82fcf290efa992f452a7255643 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 9 Nov 2010 02:13:23 +0000 Subject: [PATCH 14/23] fix non-continuations ARM build --- src/compile-arm.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/compile-arm.S b/src/compile-arm.S index dcf1aec53b..555c61476b 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -249,11 +249,6 @@ LOCAL(vmJumpAndInvoke_getAddress): ldr lr,[r11,r10] bx r1 -#else // not AVIAN_CONTINUATIONS - // vmJumpAndInvoke should only be called when continuations are - // enabled - bkpt -#endif // not AVIAN_CONTINUATIONS LOCAL(vmInvoke_returnAddress_word): .word GLOBAL(vmInvoke_returnAddress)(GOT) @@ -262,3 +257,8 @@ LOCAL(vmInvoke_getAddress_word): LOCAL(vmJumpAndInvoke_getAddress_word): .word _GLOBAL_OFFSET_TABLE_-(LOCAL(vmJumpAndInvoke_getAddress)+8) +#else // not AVIAN_CONTINUATIONS + // vmJumpAndInvoke should only be called when continuations are + // enabled + bkpt +#endif // not AVIAN_CONTINUATIONS From 51ba49def6dda47153a116523b830b2650983b65 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 8 Nov 2010 20:48:08 -0700 Subject: [PATCH 15/23] enable ARM cross builds --- makefile | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/makefile b/makefile index f0dcab31a5..9031e7a628 100644 --- a/makefile +++ b/makefile @@ -145,8 +145,9 @@ ifeq ($(arch),powerpc) pointer-size = 4 endif ifeq ($(arch),arm) - asm = arm - pointer-size = 4 + asm = arm + pointer-size = 4 + cflags += -Wno-psabi -march=armv5t endif ifeq ($(platform),darwin) @@ -179,6 +180,16 @@ ifeq ($(platform),darwin) endif endif +ifeq ($(arch),arm) + ifneq ($(arch),$(build-arch)) + cxx = arm-linux-gnueabi-g++ + cc = arm-linux-gnueabi-gcc + ar = arm-linux-gnueabi-ar + ranlib = arm-linux-gnueabi-ranlib + strip = arm-linux-gnueabi-strip + endif +endif + ifeq ($(platform),windows) inc = "$(root)/win32/include" lib = "$(root)/win32/lib" From 110f41b7a07e71d66858e812bdfe99719da2ff02 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 9 Nov 2010 11:28:58 -0700 Subject: [PATCH 16/23] ARM makefile tweaks --- makefile | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/makefile b/makefile index 9031e7a628..8c2f666220 100644 --- a/makefile +++ b/makefile @@ -147,7 +147,15 @@ endif ifeq ($(arch),arm) asm = arm pointer-size = 4 - cflags += -Wno-psabi -march=armv5t + cflags += -Wno-psabi + + ifneq ($(arch),$(build-arch)) + cxx = arm-linux-gnueabi-g++ + cc = arm-linux-gnueabi-gcc + ar = arm-linux-gnueabi-ar + ranlib = arm-linux-gnueabi-ranlib + strip = arm-linux-gnueabi-strip + endif endif ifeq ($(platform),darwin) @@ -180,16 +188,6 @@ ifeq ($(platform),darwin) endif endif -ifeq ($(arch),arm) - ifneq ($(arch),$(build-arch)) - cxx = arm-linux-gnueabi-g++ - cc = arm-linux-gnueabi-gcc - ar = arm-linux-gnueabi-ar - ranlib = arm-linux-gnueabi-ranlib - strip = arm-linux-gnueabi-strip - endif -endif - ifeq ($(platform),windows) inc = "$(root)/win32/include" lib = "$(root)/win32/lib" From 70fcbc2788f6304bcc99b5761d1f92b7ca165414 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 9 Nov 2010 11:34:56 -0700 Subject: [PATCH 17/23] freeze index site in BoundsCheckEvent::compile This ensures we don't use it as a temporary register when generating the comparison. --- src/compiler.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler.cpp b/src/compiler.cpp index 44217336a2..dcfd325d42 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -4885,10 +4885,14 @@ class BoundsCheckEvent: public Event { lengthOffset, NoRegister, 1); length.acquired = true; + index->source->freeze(c, index); + ConstantSite next(nextPromise); apply(c, JumpIfGreater, 4, index->source, index->source, 4, &length, &length, BytesPerWord, &next, &next); + index->source->thaw(c, index); + if (constant == 0) { outOfBoundsPromise->offset = a->offset(); } From 7978102cb6e3dbe00435183cadaa13ee82f1ba9c Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 9 Nov 2010 11:36:38 -0700 Subject: [PATCH 18/23] use register for indexing if constant offset is too large (or too small) Immediate indexes on ARM must be no more than 12 bits, so we must use a temporary register for values which don't fit. --- src/arm.cpp | 26 ++++++++++++++++++++++++-- test/Arrays.java | 6 ++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index 13dc298032..fd3ce34ee6 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -866,7 +866,7 @@ store(Context* c, unsigned size, Assembler::Register* src, } if (release) c->client->releaseTemporary(normalized); - } else { + } else if (size == 8 or abs(offset) == (abs(offset) & 0xFFF)) { switch (size) { case 1: emit(c, strbi(src->low, base, offset)); @@ -888,6 +888,15 @@ store(Context* c, unsigned size, Assembler::Register* src, default: abort(c); } + } else { + Assembler::Register tmp(c->client->acquireTemporary()); + ResolvedPromise offsetPromise(offset); + Assembler::Constant offsetConstant(&offsetPromise); + moveCR(c, BytesPerWord, &offsetConstant, BytesPerWord, &tmp); + + store(c, size, src, base, 0, tmp.low, 1, false); + + c->client->releaseTemporary(tmp.low); } } @@ -962,7 +971,9 @@ load(Context* c, unsigned srcSize, int base, int offset, int index, } if (release) c->client->releaseTemporary(normalized); - } else { + } else if ((srcSize == 8 and dstSize == 8) + or abs(offset) == (abs(offset) & 0xFFF)) + { switch (srcSize) { case 1: if (signExtend) { @@ -996,6 +1007,15 @@ load(Context* c, unsigned srcSize, int base, int offset, int index, default: abort(c); } + } else { + Assembler::Register tmp(c->client->acquireTemporary()); + ResolvedPromise offsetPromise(offset); + Assembler::Constant offsetConstant(&offsetPromise); + moveCR(c, BytesPerWord, &offsetConstant, BytesPerWord, &tmp); + + load(c, srcSize, base, 0, tmp.low, 1, dstSize, dst, false, signExtend); + + c->client->releaseTemporary(tmp.low); } } @@ -1115,6 +1135,8 @@ compareRR(Context* c, unsigned aSize UNUSED, Assembler::Register* a, unsigned bSize UNUSED, Assembler::Register* b) { assert(c, aSize == 4 and bSize == 4); + assert(c, b->low != a->low); + emit(c, cmp(b->low, a->low)); } diff --git a/test/Arrays.java b/test/Arrays.java index 2dd2bcfc30..1c620d17b3 100644 --- a/test/Arrays.java +++ b/test/Arrays.java @@ -54,5 +54,11 @@ public class Arrays { p = false; expect(array[1] == array[p ? 0 : 1]); } + + { int[] array = new int[1024]; + array[1023] = -1; + expect(array[1023] == -1); + expect(array[1022] == 0); + } } } From 6f555d42026af74feed5b0a61a47eafd6d7809c8 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 9 Nov 2010 17:31:42 -0700 Subject: [PATCH 19/23] minor code cleanup in compile.cpp --- src/compile.cpp | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/compile.cpp b/src/compile.cpp index e5d7a7df2d..2b1688e63a 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -5995,14 +5995,6 @@ compileMethod2(MyThread* t, void* ip) t->trace->targetMethod = 0; } - if (false) { - compile(t, codeAllocator(t), 0, resolveMethod - (t, t->m->loader, - "org/eclipse/swt/widgets/TableItem", - "getBounds", - "(IIZZZZJ)Lorg/eclipse/swt/internal/win32/RECT;")); - } - if (UNLIKELY(t->exception)) { return 0; } else { @@ -7485,12 +7477,11 @@ class MyProcessor: public Processor { if (false) { compile(static_cast(t), - local::codeAllocator(static_cast(t)), 0, - resolveMethod(t, t->m->loader, - "com/ecovate/nat/logic/Cache", - "findInCache", - "(Ljava/lang/String;Ljava/lang/String;JZ)Lcom/ecovate/shared/xmlrpc/Resource;")); - trap(); + local::codeAllocator(static_cast(t)), 0, resolveMethod + (t, t->m->loader, + "org/eclipse/swt/widgets/Display", + "runSettings", + "()Z")); } compile(static_cast(t), From f21d2b68b8a0dfbef4013fa9cf04d9dbc87de7d0 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 9 Nov 2010 17:31:52 -0700 Subject: [PATCH 20/23] fix another ARM immediate offset bug Some memory operations can only handle 8-bit immediate values, so we need to use a temporary register for those which don't fit. --- src/arm.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index fd3ce34ee6..ea7b458531 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -44,7 +44,7 @@ inline int XFERI(int cond, int P, int U, int B, int W, int L, int Rn, int Rd, in inline int XFER2(int cond, int P, int U, int W, int L, int Rn, int Rd, int S, int H, int Rm) { return cond<<28 | P<<24 | U<<23 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | 1<<7 | S<<6 | H<<5 | 1<<4 | Rm; } inline int XFER2I(int cond, int P, int U, int W, int L, int Rn, int Rd, int offsetH, int S, int H, int offsetL) -{ return cond<<28 | P<<24 | U<<23 | 1<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | offsetH<<8 | 1<<7 | S<<6 | H<<5 | 1<<4 | offsetL; } +{ return cond<<28 | P<<24 | U<<23 | 1<<22 | W<<21 | L<<20 | Rn<<16 | Rd<<12 | offsetH<<8 | 1<<7 | S<<6 | H<<5 | 1<<4 | (offsetL&0xf); } inline int BLOCKXFER(int cond, int P, int U, int S, int W, int L, int Rn, int rlist) { return cond<<28 | 4<<25 | P<<24 | U<<23 | S<<22 | W<<21 | L<<20 | Rn<<16 | rlist; } inline int SWI(int cond, int imm) @@ -866,7 +866,10 @@ store(Context* c, unsigned size, Assembler::Register* src, } if (release) c->client->releaseTemporary(normalized); - } else if (size == 8 or abs(offset) == (abs(offset) & 0xFFF)) { + } else if (size == 8 + or abs(offset) == (abs(offset) & 0xFF) + or (size != 2 and abs(offset) == (abs(offset) & 0xFFF))) + { switch (size) { case 1: emit(c, strbi(src->low, base, offset)); @@ -972,7 +975,10 @@ load(Context* c, unsigned srcSize, int base, int offset, int index, if (release) c->client->releaseTemporary(normalized); } else if ((srcSize == 8 and dstSize == 8) - or abs(offset) == (abs(offset) & 0xFFF)) + or abs(offset) == (abs(offset) & 0xFF) + or (srcSize != 2 + and (srcSize != 1 or not signExtend) + and abs(offset) == (abs(offset) & 0xFFF))) { switch (srcSize) { case 1: From 6bf74bf380d1fc2d89a24a4620c13e962c62a5a9 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 13 Nov 2010 19:28:05 -0700 Subject: [PATCH 21/23] optimize loads of constant values by using PC-relative addressing on ARM Previously, loading an arbitrary 32-bit constant required up to four instructions (128 bytes), since we did so one byte at a time via immediate-mode operations. The preferred way to load constants on ARM is via PC-relative addressing, but this is challenging because immediate memory offsets are limited to 4096 bytes in either direction. We frequently need to compile methods which are larger than 4096, or even 8192, bytes, so we must intersperse code and data if we want to use PC-relative loads everywhere. This commit enables pervasive PC-relative loads by handling the following cases: 1. Method is shorter than 4096 bytes: append data table to end 2. Method is longer than 4096 bytes, but no basic block is longer than 4096 bytes: insert data tables as necessary after blocks, taking care to minimize the total number of tables 3. Method is longer than 4096 bytes, and some blocks are longer than 4096 bytes: split large basic blocks and insert data tables as above --- src/arm.cpp | 507 +++++++++++++++++++++++++++++------------------ src/assembler.h | 4 +- src/compile.cpp | 44 ++-- src/compiler.cpp | 4 +- src/posix.cpp | 1 + src/powerpc.cpp | 8 +- src/x86.cpp | 8 +- 7 files changed, 348 insertions(+), 228 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index ea7b458531..27ba7d4e10 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -175,20 +175,46 @@ const int StackRegister = 13; const int LinkRegister = 14; const int ProgramCounter = 15; +const unsigned PoolOffsetMask = 0xFFF; + +const bool DebugPool = false; + +class Context; +class MyBlock; +class PoolOffset; +class PoolEvent; + +void +resolve(MyBlock*); + +unsigned +padding(MyBlock*, unsigned); + class MyBlock: public Assembler::Block { public: - MyBlock(unsigned offset): - next(0), offset(offset), start(~0), size(0) + MyBlock(Context* context, unsigned offset): + context(context), next(0), poolOffsetHead(0), poolOffsetTail(0), + lastPoolOffsetTail(0), poolEventHead(0), poolEventTail(0), + lastEventOffset(0), offset(offset), start(~0), size(0) { } virtual unsigned resolve(unsigned start, Assembler::Block* next) { this->start = start; this->next = static_cast(next); - return start + size; + ::resolve(this); + + return start + size + padding(this, size); } + Context* context; MyBlock* next; + PoolOffset* poolOffsetHead; + PoolOffset* poolOffsetTail; + PoolOffset* lastPoolOffsetTail; + PoolEvent* poolEventHead; + PoolEvent* poolEventTail; + unsigned lastEventOffset; unsigned offset; unsigned start; unsigned size; @@ -201,8 +227,9 @@ class Context { public: Context(System* s, Allocator* a, Zone* zone): s(s), zone(zone), client(0), code(s, a, 1024), tasks(0), result(0), - firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(0)), - lastBlock(firstBlock), constantPool(0), constantPoolCount(0) + firstBlock(new (zone->allocate(sizeof(MyBlock))) MyBlock(this, 0)), + lastBlock(firstBlock), poolOffsetHead(0), poolOffsetTail(0), + constantPool(0), constantPoolCount(0) { } System* s; @@ -213,6 +240,8 @@ class Context { uint8_t* result; MyBlock* firstBlock; MyBlock* lastBlock; + PoolOffset* poolOffsetHead; + PoolOffset* poolOffsetTail; ConstantPoolEntry* constantPool; unsigned constantPoolCount; }; @@ -302,7 +331,8 @@ class Offset: public Promise { virtual int64_t value() { assert(c, resolved()); - return block->start + (offset - block->offset); + unsigned o = offset - block->offset; + return block->start + padding(block, o) + o; } Context* c; @@ -324,7 +354,7 @@ bounded(int right, int left, int32_t v) } void* -updateOffset(System* s, uint8_t* instruction, bool conditional UNUSED, int64_t value) +updateOffset(System* s, uint8_t* instruction, int64_t value) { // ARM's PC is two words ahead, and branches drop the bottom 2 bits. int32_t v = (reinterpret_cast(value) - (instruction + 8)) >> 2; @@ -341,56 +371,48 @@ updateOffset(System* s, uint8_t* instruction, bool conditional UNUSED, int64_t v class OffsetListener: public Promise::Listener { public: - OffsetListener(System* s, uint8_t* instruction, bool conditional): + OffsetListener(System* s, uint8_t* instruction): s(s), - instruction(instruction), - conditional(conditional) + instruction(instruction) { } virtual bool resolve(int64_t value, void** location) { - void* p = updateOffset(s, instruction, conditional, value); + void* p = updateOffset(s, instruction, value); if (location) *location = p; return false; } System* s; uint8_t* instruction; - bool conditional; }; class OffsetTask: public Task { public: - OffsetTask(Task* next, Promise* promise, Promise* instructionOffset, - bool conditional): + OffsetTask(Task* next, Promise* promise, Promise* instructionOffset): Task(next), promise(promise), - instructionOffset(instructionOffset), - conditional(conditional) + instructionOffset(instructionOffset) { } virtual void run(Context* c) { if (promise->resolved()) { updateOffset - (c->s, c->result + instructionOffset->value(), conditional, - promise->value()); + (c->s, c->result + instructionOffset->value(), promise->value()); } else { new (promise->listen(sizeof(OffsetListener))) - OffsetListener(c->s, c->result + instructionOffset->value(), - conditional); + OffsetListener(c->s, c->result + instructionOffset->value()); } } Promise* promise; Promise* instructionOffset; - bool conditional; }; void -appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset, - bool conditional) +appendOffsetTask(Context* c, Promise* promise, Promise* instructionOffset) { c->tasks = new (c->zone->allocate(sizeof(OffsetTask))) OffsetTask - (c->tasks, promise, instructionOffset, conditional); + (c->tasks, promise, instructionOffset); } inline unsigned @@ -449,6 +471,12 @@ inline int newTemp(Context* con) { return con->client->acquireTemporary(); } inline void freeTemp(Context* con, int r) { con->client->releaseTemporary(r); } inline int64_t getValue(Assembler::Constant* c) { return c->value->value(); } +inline void +write4(uint8_t* dst, uint32_t v) +{ + memcpy(dst, &v, 4); +} + void shiftLeftR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { if (size == 8) { @@ -517,89 +545,11 @@ void unsignedShiftRightC(Context* con, unsigned size UNUSED, Assembler::Constant emit(con, lsri(t->low, b->low, getValue(a))); } -void -updateImmediate(System* s, void* dst, int64_t src, unsigned size, bool) -{ - switch (size) { - case 4: { - int32_t* p = static_cast(dst); - int r = (p[0] >> 12) & 15; - - p[0] = movi(r, lo8(src)); - p[1] = orri(r, r, hi8(src), 12); - p[2] = orri(r, r, lo8(hi16(src)), 8); - p[3] = orri(r, r, hi8(hi16(src)), 4); - } break; - - default: abort(s); - } -} - -class ImmediateListener: public Promise::Listener { - public: - ImmediateListener(System* s, void* dst, unsigned size, unsigned offset, - bool address): - s(s), dst(dst), size(size), offset(offset), address(address) - { } - - virtual bool resolve(int64_t value, void** location) { - updateImmediate(s, dst, value, size, address); - if (location) *location = static_cast(dst) + offset; - return false; - } - - System* s; - void* dst; - unsigned size; - unsigned offset; - bool address; -}; - -class ImmediateTask: public Task { - public: - ImmediateTask(Task* next, Promise* promise, Promise* offset, unsigned size, - unsigned promiseOffset, bool address): - Task(next), - promise(promise), - offset(offset), - size(size), - promiseOffset(promiseOffset), - address(address) - { } - - virtual void run(Context* c) { - if (promise->resolved()) { - updateImmediate - (c->s, c->result + offset->value(), promise->value(), size, address); - } else { - new (promise->listen(sizeof(ImmediateListener))) ImmediateListener - (c->s, c->result + offset->value(), size, promiseOffset, address); - } - } - - Promise* promise; - Promise* offset; - unsigned size; - unsigned promiseOffset; - bool address; -}; - -void -appendImmediateTask(Context* c, Promise* promise, Promise* offset, - unsigned size, unsigned promiseOffset, bool address) -{ - c->tasks = new (c->zone->allocate(sizeof(ImmediateTask))) ImmediateTask - (c->tasks, promise, offset, size, promiseOffset, address); -} - class ConstantPoolEntry: public Promise { public: - ConstantPoolEntry(Context* c, Promise* constant): - c(c), constant(constant), next(c->constantPool), address(0) - { - c->constantPool = this; - ++ c->constantPoolCount; - } + ConstantPoolEntry(Context* c, Promise* constant, ConstantPoolEntry* next): + c(c), constant(constant), next(next), address(0) + { } virtual int64_t value() { assert(c, resolved()); @@ -618,11 +568,158 @@ class ConstantPoolEntry: public Promise { unsigned constantPoolCount; }; -ConstantPoolEntry* +class ConstantPoolListener: public Promise::Listener { + public: + ConstantPoolListener(System* s, uintptr_t* address): + s(s), + address(address) + { } + + virtual bool resolve(int64_t value, void** location) { + *address = value; + if (location) *location = address; + return true; + } + + System* s; + uintptr_t* address; +}; + +class PoolOffset { + public: + PoolOffset(MyBlock* block, ConstantPoolEntry* entry, unsigned offset): + block(block), entry(entry), next(0), offset(offset) + { } + + MyBlock* block; + ConstantPoolEntry* entry; + PoolOffset* next; + unsigned offset; +}; + +class PoolEvent { + public: + PoolEvent(PoolOffset* poolOffsetHead, PoolOffset* poolOffsetTail, + unsigned offset): + poolOffsetHead(poolOffsetHead), poolOffsetTail(poolOffsetTail), next(0), + offset(offset) + { } + + PoolOffset* poolOffsetHead; + PoolOffset* poolOffsetTail; + PoolEvent* next; + unsigned offset; +}; + +void appendConstantPoolEntry(Context* c, Promise* constant) { - return new (c->zone->allocate(sizeof(ConstantPoolEntry))) - ConstantPoolEntry(c, constant); + if (constant->resolved()) { + // make a copy, since the original might be allocated on the + // stack, and we need our copy to live until assembly is complete + constant = new (c->zone->allocate(sizeof(ResolvedPromise))) + ResolvedPromise(constant->value()); + } + + c->constantPool = new (c->zone->allocate(sizeof(ConstantPoolEntry))) + ConstantPoolEntry(c, constant, c->constantPool); + + ++ c->constantPoolCount; + + PoolOffset* o = new (c->zone->allocate(sizeof(PoolOffset))) PoolOffset + (c->lastBlock, c->constantPool, c->code.length() - c->lastBlock->offset); + + if (DebugPool) { + fprintf(stderr, "add pool offset %p %d to block %p\n", + o, o->offset, c->lastBlock); + } + + if (c->lastBlock->poolOffsetTail) { + c->lastBlock->poolOffsetTail->next = o; + } else { + c->lastBlock->poolOffsetHead = o; + } + c->lastBlock->poolOffsetTail = o; +} + +void +appendPoolEvent(Context* c, MyBlock* b, unsigned offset, PoolOffset* head, + PoolOffset* tail) +{ + PoolEvent* e = new (c->zone->allocate(sizeof(PoolEvent))) PoolEvent + (head, tail, offset); + + if (b->poolEventTail) { + b->poolEventTail->next = e; + } else { + b->poolEventHead = e; + } + b->poolEventTail = e; +} + +unsigned +padding(MyBlock* b, unsigned offset) +{ + unsigned total = 0; + for (PoolEvent* e = b->poolEventHead; e; e = e->next) { + if (e->offset <= offset) { + total += BytesPerWord; + for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) { + total += BytesPerWord; + } + } else { + break; + } + } + return total; +} + +void +resolve(MyBlock* b) +{ + Context* c = b->context; + + if (b->poolOffsetHead) { + if (c->poolOffsetTail) { + c->poolOffsetTail->next = b->poolOffsetHead; + } else { + c->poolOffsetHead = b->poolOffsetHead; + } + c->poolOffsetTail = b->poolOffsetTail; + } + + if (c->poolOffsetHead) { + bool append; + if (b->next == 0 or b->next->poolEventHead) { + append = true; + } else { + int32_t v = (b->offset + b->size + b->next->size + BytesPerWord - 8) + - (c->poolOffsetHead->offset + c->poolOffsetHead->block->offset); + + append = (v != (v & PoolOffsetMask)); + + if (DebugPool) { + fprintf(stderr, + "offset %p %d is of distance %d to next block; append? %d\n", + c->poolOffsetHead, c->poolOffsetHead->offset, v, append); + } + } + + if (append) { + appendPoolEvent(c, b, b->size, c->poolOffsetHead, c->poolOffsetTail); + + if (DebugPool) { + for (PoolOffset* o = c->poolOffsetHead; o; o = o->next) { + fprintf(stderr, + "include %p %d in pool event %p at offset %d in block %p\n", + o, o->offset, b->poolEventTail, b->size, b); + } + } + + c->poolOffsetHead = 0; + c->poolOffsetTail = 0; + } + } } void @@ -710,28 +807,14 @@ moveZRR(Context* c, unsigned srcSize, Assembler::Register* src, void moveCR2(Context* c, unsigned, Assembler::Constant* src, - unsigned dstSize, Assembler::Register* dst, unsigned promiseOffset) + unsigned dstSize, Assembler::Register* dst) { if (dstSize <= 4) { - if (src->value->resolved()) { - int32_t i = getValue(src); - emit(c, movi(dst->low, lo8(i))); - if (!isOfWidth(i, 8)) { - emit(c, orri(dst->low, dst->low, hi8(i), 12)); - if (!isOfWidth(i, 16)) { - emit(c, orri(dst->low, dst->low, lo8(hi16(i)), 8)); - if (!isOfWidth(i, 24)) { - emit(c, orri(dst->low, dst->low, hi8(hi16(i)), 4)); - } - } - } + if (src->value->resolved() and isOfWidth(getValue(src), 8)) { + emit(c, movi(dst->low, lo8(getValue(src)))); } else { - appendImmediateTask - (c, src->value, offset(c), BytesPerWord, promiseOffset, false); - emit(c, movi(dst->low, 0)); - emit(c, orri(dst->low, dst->low, 0, 12)); - emit(c, orri(dst->low, dst->low, 0, 8)); - emit(c, orri(dst->low, dst->low, 0, 4)); + appendConstantPoolEntry(c, src->value); + emit(c, ldri(dst->low, ProgramCounter, 0)); } } else { abort(c); // todo @@ -742,7 +825,7 @@ void moveCR(Context* c, unsigned srcSize, Assembler::Constant* src, unsigned dstSize, Assembler::Register* dst) { - moveCR2(c, srcSize, src, dstSize, dst, 0); + moveCR2(c, srcSize, src, dstSize, dst); } void addR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { @@ -1115,17 +1198,14 @@ xorR(Context* con, unsigned size, Assembler::Register* a, void moveAR2(Context* c, unsigned srcSize, Assembler::Address* src, - unsigned dstSize, Assembler::Register* dst, unsigned promiseOffset) + unsigned dstSize, Assembler::Register* dst) { assert(c, srcSize == 4 and dstSize == 4); Assembler::Constant constant(src->address); - Assembler::Memory memory(dst->low, 0, -1, 0); - - appendImmediateTask - (c, src->address, offset(c), BytesPerWord, promiseOffset, true); - moveCR(c, srcSize, &constant, dstSize, dst); + + Assembler::Memory memory(dst->low, 0, -1, 0); moveMR(c, dstSize, &memory, dstSize, dst); } @@ -1133,7 +1213,7 @@ void moveAR(Context* c, unsigned srcSize, Assembler::Address* src, unsigned dstSize, Assembler::Register* dst) { - moveAR2(c, srcSize, src, dstSize, dst, 0); + moveAR2(c, srcSize, src, dstSize, dst); } void @@ -1216,7 +1296,7 @@ branch(Context* c, TernaryOperation op) void conditional(Context* c, int32_t branch, Assembler::Constant* target) { - appendOffsetTask(c, target->value, offset(c), true); + appendOffsetTask(c, target->value, offset(c)); emit(c, branch); } @@ -1299,7 +1379,7 @@ branchLong(Context* c, TernaryOperation op, Assembler::Operand* al, if (next) { updateOffset - (c->s, c->code.data + next, true, reinterpret_cast + (c->s, c->code.data + next, reinterpret_cast (c->code.data + c->code.length())); } } @@ -1426,7 +1506,7 @@ callC(Context* c, unsigned size UNUSED, Assembler::Constant* target) { assert(c, size == BytesPerWord); - appendOffsetTask(c, target->value, offset(c), false); + appendOffsetTask(c, target->value, offset(c)); emit(c, bl(0)); } @@ -1436,50 +1516,26 @@ longCallC(Context* c, unsigned size UNUSED, Assembler::Constant* target) assert(c, size == BytesPerWord); Assembler::Register tmp(4); - moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp, 12); + moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp); callR(c, BytesPerWord, &tmp); } -void -alignedLongCallC(Context* c, unsigned size UNUSED, Assembler::Constant* target) -{ - assert(c, size == BytesPerWord); - - Assembler::Register tmp(c->client->acquireTemporary()); - Assembler::Address address(appendConstantPoolEntry(c, target->value)); - moveAR2(c, BytesPerWord, &address, BytesPerWord, &tmp, 12); - callR(c, BytesPerWord, &tmp); - c->client->releaseTemporary(tmp.low); -} - void longJumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) { assert(c, size == BytesPerWord); Assembler::Register tmp(4); // a non-arg reg that we don't mind clobbering - moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp, 12); + moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp); jumpR(c, BytesPerWord, &tmp); } -void -alignedLongJumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) -{ - assert(c, size == BytesPerWord); - - Assembler::Register tmp(c->client->acquireTemporary()); - Assembler::Address address(appendConstantPoolEntry(c, target->value)); - moveAR2(c, BytesPerWord, &address, BytesPerWord, &tmp, 12); - jumpR(c, BytesPerWord, &tmp); - c->client->releaseTemporary(tmp.low); -} - void jumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) { assert(c, size == BytesPerWord); - appendOffsetTask(c, target->value, offset(c), false); + appendOffsetTask(c, target->value, offset(c)); emit(c, b(0)); } @@ -1515,11 +1571,11 @@ populateTables(ArchitectureContext* c) uo[index(c, LongCall, C)] = CAST1(longCallC); - uo[index(c, AlignedLongCall, C)] = CAST1(alignedLongCallC); + uo[index(c, AlignedLongCall, C)] = CAST1(longCallC); uo[index(c, LongJump, C)] = CAST1(longJumpC); - uo[index(c, AlignedLongJump, C)] = CAST1(alignedLongJumpC); + uo[index(c, AlignedLongJump, C)] = CAST1(longJumpC); uo[index(c, Jump, R)] = CAST1(jumpR); uo[index(c, Jump, C)] = CAST1(jumpC); @@ -1674,19 +1730,14 @@ class MyArchitecture: public Assembler::Architecture { switch (op) { case Call: case Jump: + case LongCall: + case LongJump: case AlignedCall: case AlignedJump: { - updateOffset(c.s, static_cast(returnAddress) - 4, false, + updateOffset(c.s, static_cast(returnAddress) - 4, reinterpret_cast(newTarget)); } break; - case LongCall: - case LongJump: { - updateImmediate(c.s, static_cast(returnAddress) - 12, - reinterpret_cast(newTarget), BytesPerWord, - false); - } break; - case AlignedLongCall: case AlignedLongJump: { uint32_t* p = static_cast(returnAddress) - 4; @@ -1703,7 +1754,7 @@ class MyArchitecture: public Assembler::Architecture { } virtual void setConstant(void* dst, uintptr_t constant) { - updateImmediate(c.s, dst, constant, BytesPerWord, false); + *static_cast(dst) = constant; } virtual unsigned alignFrameSize(unsigned sizeInWords) { @@ -2145,23 +2196,67 @@ class MyAssembler: public Assembler { virtual void writeTo(uint8_t* dst) { c.result = dst; + unsigned dstOffset = 0; for (MyBlock* b = c.firstBlock; b; b = b->next) { - memcpy(dst + b->start, c.code.data + b->offset, b->size); + if (DebugPool) { + fprintf(stderr, "write block %p\n", b); + } + + unsigned blockOffset = 0; + for (PoolEvent* e = b->poolEventHead; e; e = e->next) { + unsigned size = e->offset - blockOffset; + memcpy(dst + dstOffset, c.code.data + b->offset + blockOffset, size); + blockOffset = e->offset; + dstOffset += size; + + unsigned poolSize = 0; + for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) { + if (DebugPool) { + fprintf(stderr, "visit pool offset %p %d in block %p\n", + o, o->offset, b); + } + + poolSize += BytesPerWord; + + unsigned entry = dstOffset + poolSize; + + o->entry->address = dst + entry; + + unsigned instruction = o->block->start + + padding(o->block, o->offset) + o->offset; + + int32_t v = (entry - 8) - instruction; + expect(&c, v == (v & PoolOffsetMask)); + + int32_t* p = reinterpret_cast(dst + instruction); + *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); + } + + write4(dst + dstOffset, ::b((poolSize + BytesPerWord - 8) >> 2)); + + dstOffset += poolSize + BytesPerWord; + } + + unsigned size = b->size - blockOffset; + + memcpy(dst + dstOffset, + c.code.data + b->offset + blockOffset, + size); + + dstOffset += size; } - - unsigned index = c.code.length(); - assert(&c, index % BytesPerWord == 0); - for (ConstantPoolEntry* e = c.constantPool; e; e = e->next) { - e->address = dst + index; - index += BytesPerWord; - } - + for (Task* t = c.tasks; t; t = t->next) { t->run(&c); } for (ConstantPoolEntry* e = c.constantPool; e; e = e->next) { - *static_cast(e->address) = e->constant->value(); + if (e->constant->resolved()) { + *static_cast(e->address) = e->constant->value(); + } else { + new (e->constant->listen(sizeof(ConstantPoolListener))) + ConstantPoolListener(c.s, static_cast(e->address)); + } // fprintf(stderr, "constant %p at %p\n", reinterpret_cast(e->constant->value()), e->address); } } @@ -2175,19 +2270,49 @@ class MyAssembler: public Assembler { b->size = c.code.length() - b->offset; if (startNew) { c.lastBlock = new (c.zone->allocate(sizeof(MyBlock))) - MyBlock(c.code.length()); + MyBlock(&c, c.code.length()); } else { c.lastBlock = 0; } return b; } - virtual unsigned length() { - return c.code.length(); + virtual void endEvent() { + MyBlock* b = c.lastBlock; + unsigned thisEventOffset = c.code.length() - b->offset; + if (b->poolOffsetHead) { + int32_t v = (thisEventOffset + BytesPerWord - 8) + - b->poolOffsetHead->offset; + + if (v > 0 and v != (v & PoolOffsetMask)) { + appendPoolEvent + (&c, b, b->lastEventOffset, b->poolOffsetHead, + b->lastPoolOffsetTail); + + if (DebugPool) { + for (PoolOffset* o = b->poolOffsetHead; + o != b->lastPoolOffsetTail->next; o = o->next) + { + fprintf(stderr, + "in endEvent, include %p %d in pool event %p at offset %d " + "in block %p\n", + o, o->offset, b->poolEventTail, b->lastEventOffset, b); + } + } + + b->poolOffsetHead = b->lastPoolOffsetTail->next; + b->lastPoolOffsetTail->next = 0; + if (b->poolOffsetHead == 0) { + b->poolOffsetTail = 0; + } + } + } + b->lastEventOffset = thisEventOffset; + b->lastPoolOffsetTail = b->poolOffsetTail; } - virtual unsigned scratchSize() { - return c.constantPoolCount * BytesPerWord; + virtual unsigned length() { + return c.code.length(); } virtual void dispose() { diff --git a/src/assembler.h b/src/assembler.h index 918c548acb..70002768ca 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -418,9 +418,9 @@ class Assembler { virtual Block* endBlock(bool startNew) = 0; - virtual unsigned length() = 0; + virtual void endEvent() = 0; - virtual unsigned scratchSize() = 0; + virtual unsigned length() = 0; virtual void dispose() = 0; }; diff --git a/src/compile.cpp b/src/compile.cpp index 2b1688e63a..a3b93b1564 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -5391,14 +5391,14 @@ codeSingletonSizeInBytes(MyThread*, unsigned codeSizeInBytes) } uint8_t* -finish(MyThread* t, Allocator* allocator, Assembler* a, const char* name) +finish(MyThread* t, Allocator* allocator, Assembler* a, const char* name, + unsigned length) { - uint8_t* start = static_cast - (allocator->allocate(pad(a->length()))); + uint8_t* start = static_cast(allocator->allocate(pad(length))); a->writeTo(start); - logCompile(t, start, a->length(), 0, name, 0); + logCompile(t, start, length, 0, name, 0); return start; } @@ -8289,9 +8289,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) Assembler::Register result(t->arch->returnLow()); a->apply(Jump, BytesPerWord, RegisterOperand, &result); - a->endBlock(false)->resolve(0, 0); - - p->thunks.default_.length = a->length(); + p->thunks.default_.length = a->endBlock(false)->resolve(0, 0); } ThunkContext defaultVirtualContext(t, &zone); @@ -8335,9 +8333,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) Assembler::Register result(t->arch->returnLow()); a->apply(Jump, BytesPerWord, RegisterOperand, &result); - a->endBlock(false)->resolve(0, 0); - - p->thunks.defaultVirtual.length = a->length(); + p->thunks.defaultVirtual.length = a->endBlock(false)->resolve(0, 0); } ThunkContext nativeContext(t, &zone); @@ -8356,9 +8352,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) a->popFrameAndUpdateStackAndReturn(difference(&(t->stack), t)); - a->endBlock(false)->resolve(0, 0); - - p->thunks.native.length = a->length(); + p->thunks.native.length = a->endBlock(false)->resolve(0, 0); } ThunkContext aioobContext(t, &zone); @@ -8375,9 +8369,7 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) Assembler::Constant proc(&(aioobContext.promise)); a->apply(LongCall, BytesPerWord, ConstantOperand, &proc); - a->endBlock(false)->resolve(0, 0); - - p->thunks.aioob.length = a->length(); + p->thunks.aioob.length = a->endBlock(false)->resolve(0, 0); } ThunkContext tableContext(t, &zone); @@ -8391,13 +8383,12 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) Assembler::Constant proc(&(tableContext.promise)); a->apply(LongJump, BytesPerWord, ConstantOperand, &proc); - a->endBlock(false)->resolve(0, 0); - - p->thunks.table.length = a->length(); + p->thunks.table.length = a->endBlock(false)->resolve(0, 0); } p->thunks.default_.start = finish - (t, allocator, defaultContext.context.assembler, "default"); + (t, allocator, defaultContext.context.assembler, "default", + p->thunks.default_.length); BootImage* image = p->bootImage; uint8_t* imageBase = p->codeAllocator.base; @@ -8412,7 +8403,8 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) } p->thunks.defaultVirtual.start = finish - (t, allocator, defaultVirtualContext.context.assembler, "defaultVirtual"); + (t, allocator, defaultVirtualContext.context.assembler, "defaultVirtual", + p->thunks.defaultVirtual.length); { void* call; defaultVirtualContext.promise.listener->resolve @@ -8425,7 +8417,8 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) } p->thunks.native.start = finish - (t, allocator, nativeContext.context.assembler, "native"); + (t, allocator, nativeContext.context.assembler, "native", + p->thunks.native.length); { void* call; nativeContext.promise.listener->resolve @@ -8437,7 +8430,8 @@ compileThunks(MyThread* t, Allocator* allocator, MyProcessor* p) } p->thunks.aioob.start = finish - (t, allocator, aioobContext.context.assembler, "aioob"); + (t, allocator, aioobContext.context.assembler, "aioob", + p->thunks.aioob.length); { void* call; aioobContext.promise.listener->resolve @@ -8573,9 +8567,7 @@ compileVirtualThunk(MyThread* t, unsigned index, unsigned* size) Assembler::Constant thunk(&defaultVirtualThunkPromise); a->apply(Jump, BytesPerWord, ConstantOperand, &thunk); - a->endBlock(false)->resolve(0, 0); - - *size = a->length(); + *size = a->endBlock(false)->resolve(0, 0); uint8_t* start = static_cast(codeAllocator(t)->allocate(*size)); diff --git a/src/compiler.cpp b/src/compiler.cpp index dcfd325d42..2650fb7970 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -5698,6 +5698,8 @@ compile(Context* c) p->offset = a->offset(); } + a->endEvent(); + LogicalInstruction* nextInstruction = next(c, e->logicalInstruction); if (e->next == 0 or (e->next->logicalInstruction != e->logicalInstruction @@ -5735,7 +5737,7 @@ compile(Context* c) block = next; } - return block->assemblerBlock->resolve(block->start, 0) + a->scratchSize(); + return block->assemblerBlock->resolve(block->start, 0); } unsigned diff --git a/src/posix.cpp b/src/posix.cpp index da0397c107..a8e92eb8e4 100644 --- a/src/posix.cpp +++ b/src/posix.cpp @@ -782,6 +782,7 @@ class MySystem: public System { } virtual void abort() { + *static_cast(0) = 0; ::abort(); } diff --git a/src/powerpc.cpp b/src/powerpc.cpp index eb1d5ef33b..ce8a6bbb4b 100644 --- a/src/powerpc.cpp +++ b/src/powerpc.cpp @@ -2393,12 +2393,12 @@ class MyAssembler: public Assembler { return b; } - virtual unsigned length() { - return c.code.length(); + virtual void endEvent() { + // ignore } - virtual unsigned scratchSize() { - return c.constantPoolCount * BytesPerWord; + virtual unsigned length() { + return c.code.length(); } virtual void dispose() { diff --git a/src/x86.cpp b/src/x86.cpp index 2d4d3e55f7..3ff4f716bb 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -3532,12 +3532,12 @@ class MyAssembler: public Assembler { return b; } - virtual unsigned length() { - return c.code.length(); + virtual void endEvent() { + // ignore } - virtual unsigned scratchSize() { - return 0; + virtual unsigned length() { + return c.code.length(); } virtual void dispose() { From 3fb834d00886c6aed10cc8ecf7c1cf565aae497a Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Mon, 15 Nov 2010 23:56:34 +0000 Subject: [PATCH 22/23] fix pre-GCC-4.4 ARM build --- src/arm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arm.cpp b/src/arm.cpp index 27ba7d4e10..ccb0ece4d6 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -175,7 +175,7 @@ const int StackRegister = 13; const int LinkRegister = 14; const int ProgramCounter = 15; -const unsigned PoolOffsetMask = 0xFFF; +const int32_t PoolOffsetMask = 0xFFF; const bool DebugPool = false; From bc326fb5e97f11005ff604193abff1d9e522b192 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 16 Nov 2010 02:38:36 +0000 Subject: [PATCH 23/23] fix ARM bootimage=true build --- src/arm.cpp | 44 +++++++++++++++++++++++--------------- src/binaryToObject/elf.cpp | 2 +- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/arm.cpp b/src/arm.cpp index ccb0ece4d6..46e87a3b23 100644 --- a/src/arm.cpp +++ b/src/arm.cpp @@ -547,8 +547,10 @@ void unsignedShiftRightC(Context* con, unsigned size UNUSED, Assembler::Constant class ConstantPoolEntry: public Promise { public: - ConstantPoolEntry(Context* c, Promise* constant, ConstantPoolEntry* next): - c(c), constant(constant), next(next), address(0) + ConstantPoolEntry(Context* c, Promise* constant, ConstantPoolEntry* next, + Promise* callOffset): + c(c), constant(constant), next(next), callOffset(callOffset), + address(0) { } virtual int64_t value() { @@ -564,25 +566,30 @@ class ConstantPoolEntry: public Promise { Context* c; Promise* constant; ConstantPoolEntry* next; + Promise* callOffset; void* address; unsigned constantPoolCount; }; class ConstantPoolListener: public Promise::Listener { public: - ConstantPoolListener(System* s, uintptr_t* address): + ConstantPoolListener(System* s, uintptr_t* address, uint8_t* returnAddress): s(s), - address(address) + address(address), + returnAddress(returnAddress) { } virtual bool resolve(int64_t value, void** location) { *address = value; - if (location) *location = address; + if (location) { + *location = returnAddress ? static_cast(returnAddress) : address; + } return true; } System* s; uintptr_t* address; + uint8_t* returnAddress; }; class PoolOffset { @@ -612,7 +619,7 @@ class PoolEvent { }; void -appendConstantPoolEntry(Context* c, Promise* constant) +appendConstantPoolEntry(Context* c, Promise* constant, Promise* callOffset) { if (constant->resolved()) { // make a copy, since the original might be allocated on the @@ -622,7 +629,7 @@ appendConstantPoolEntry(Context* c, Promise* constant) } c->constantPool = new (c->zone->allocate(sizeof(ConstantPoolEntry))) - ConstantPoolEntry(c, constant, c->constantPool); + ConstantPoolEntry(c, constant, c->constantPool, callOffset); ++ c->constantPoolCount; @@ -807,13 +814,13 @@ moveZRR(Context* c, unsigned srcSize, Assembler::Register* src, void moveCR2(Context* c, unsigned, Assembler::Constant* src, - unsigned dstSize, Assembler::Register* dst) + unsigned dstSize, Assembler::Register* dst, Promise* callOffset) { if (dstSize <= 4) { if (src->value->resolved() and isOfWidth(getValue(src), 8)) { emit(c, movi(dst->low, lo8(getValue(src)))); } else { - appendConstantPoolEntry(c, src->value); + appendConstantPoolEntry(c, src->value, callOffset); emit(c, ldri(dst->low, ProgramCounter, 0)); } } else { @@ -825,7 +832,7 @@ void moveCR(Context* c, unsigned srcSize, Assembler::Constant* src, unsigned dstSize, Assembler::Register* dst) { - moveCR2(c, srcSize, src, dstSize, dst); + moveCR2(c, srcSize, src, dstSize, dst, 0); } void addR(Context* con, unsigned size, Assembler::Register* a, Assembler::Register* b, Assembler::Register* t) { @@ -1516,7 +1523,7 @@ longCallC(Context* c, unsigned size UNUSED, Assembler::Constant* target) assert(c, size == BytesPerWord); Assembler::Register tmp(4); - moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp); + moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp, offset(c)); callR(c, BytesPerWord, &tmp); } @@ -1526,7 +1533,7 @@ longJumpC(Context* c, unsigned size UNUSED, Assembler::Constant* target) assert(c, size == BytesPerWord); Assembler::Register tmp(4); // a non-arg reg that we don't mind clobbering - moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp); + moveCR2(c, BytesPerWord, target, BytesPerWord, &tmp, offset(c)); jumpR(c, BytesPerWord, &tmp); } @@ -1730,18 +1737,18 @@ class MyArchitecture: public Assembler::Architecture { switch (op) { case Call: case Jump: - case LongCall: - case LongJump: case AlignedCall: case AlignedJump: { updateOffset(c.s, static_cast(returnAddress) - 4, reinterpret_cast(newTarget)); } break; + case LongCall: + case LongJump: case AlignedLongCall: case AlignedLongJump: { - uint32_t* p = static_cast(returnAddress) - 4; - *reinterpret_cast(unha16(p[0] & 0xFFFF, p[1] & 0xFFFF)) + uint32_t* p = static_cast(returnAddress) - 2; + *reinterpret_cast(p + (((*p & PoolOffsetMask) + 8) / 4)) = newTarget; } break; @@ -2255,7 +2262,10 @@ class MyAssembler: public Assembler { *static_cast(e->address) = e->constant->value(); } else { new (e->constant->listen(sizeof(ConstantPoolListener))) - ConstantPoolListener(c.s, static_cast(e->address)); + ConstantPoolListener(c.s, static_cast(e->address), + e->callOffset + ? dst + e->callOffset->value() + 8 + : 0); } // fprintf(stderr, "constant %p at %p\n", reinterpret_cast(e->constant->value()), e->address); } diff --git a/src/binaryToObject/elf.cpp b/src/binaryToObject/elf.cpp index 8a6c2417d1..2e58f08e27 100644 --- a/src/binaryToObject/elf.cpp +++ b/src/binaryToObject/elf.cpp @@ -232,7 +232,7 @@ writeObject(const uint8_t* data, unsigned size, FILE* out, fileHeader.e_entry = 0; fileHeader.e_phoff = 0; fileHeader.e_shoff = sizeof(FileHeader); - fileHeader.e_flags = 0; + fileHeader.e_flags = (machine == EM_ARM ? 0x04000000 : 0); fileHeader.e_ehsize = sizeof(FileHeader); fileHeader.e_phentsize = 0; fileHeader.e_phnum = 0;