diff --git a/classpath/avian/Classes.java b/classpath/avian/Classes.java index 34877d5ef2..3fc9f37144 100644 --- a/classpath/avian/Classes.java +++ b/classpath/avian/Classes.java @@ -38,7 +38,7 @@ public class Classes { public static native VMClass primitiveClass(char name); public static native void initialize(VMClass vmClass); - + public static native boolean isAssignableFrom(VMClass a, VMClass b); public static native VMClass getVMClass(Object o); @@ -134,7 +134,7 @@ public class Classes { array[i] = parseAnnotationValue(loader, pool, in); } return array; - } + } default: throw new AssertionError(); } @@ -207,7 +207,7 @@ public class Classes { while (spec[end] != ';') ++ end; ++ end; break; - + default: ++ end; } @@ -295,9 +295,9 @@ public class Classes { } Class c = loader.loadClass(name); VMClass vmc = SystemClassLoader.vmClass(c); - Classes.link(vmc, loader); + link(vmc, loader); if (initialize) { - Classes.initialize(vmc); + initialize(vmc); } return c; } @@ -315,7 +315,7 @@ public class Classes { } else { if (name.length() == 1) { return SystemClassLoader.getClass - (Classes.primitiveClass(name.charAt(0))); + (primitiveClass(name.charAt(0))); } else { throw new ClassNotFoundException(name); } @@ -378,7 +378,7 @@ public class Classes { public static int findField(VMClass vmClass, String name) { if (vmClass.fieldTable != null) { - Classes.link(vmClass); + link(vmClass); for (int i = 0; i < vmClass.fieldTable.length; ++i) { if (toString(vmClass.fieldTable[i].name).equals(name)) { @@ -426,7 +426,7 @@ public class Classes { { VMMethod[] methodTable = vmClass.methodTable; if (methodTable != null) { - Classes.link(vmClass); + link(vmClass); if (parameterTypes == null) { parameterTypes = new Class[0]; @@ -464,7 +464,7 @@ public class Classes { Method[] array = new Method[countMethods(vmClass, publicOnly)]; VMMethod[] methodTable = vmClass.methodTable; if (methodTable != null) { - Classes.link(vmClass); + link(vmClass); int ai = 0; for (int i = 0, j = declaredMethodCount(vmClass); i < j; ++i) { @@ -498,7 +498,7 @@ public class Classes { public static Field[] getFields(VMClass vmClass, boolean publicOnly) { Field[] array = new Field[countFields(vmClass, publicOnly)]; if (vmClass.fieldTable != null) { - Classes.link(vmClass); + link(vmClass); int ai = 0; for (int i = 0; i < vmClass.fieldTable.length; ++i) { @@ -568,9 +568,9 @@ public class Classes { return new ProtectionDomain(source, p); } - + public static native Method makeMethod(Class c, int slot); - + public static native Field makeField(Class c, int slot); private static native void acquireClassLock(); diff --git a/include/avian/codegen/architecture.h b/include/avian/codegen/architecture.h index 47687aefaf..528be74858 100644 --- a/include/avian/codegen/architecture.h +++ b/include/avian/codegen/architecture.h @@ -28,8 +28,6 @@ namespace codegen { class Assembler; -class RegisterFile; - class OperandMask { public: uint8_t typeMask; diff --git a/src/arm64.S b/src/arm64.S index 6953ea0cf6..b5ce9a5000 100644 --- a/src/arm64.S +++ b/src/arm64.S @@ -35,6 +35,7 @@ GLOBAL(vmNativeCall): // allocate frame stp x29, x30, [sp,#-64]! + mov x29, sp // save callee-saved register values so we can clobber them stp x19, x20, [sp,#16] @@ -118,6 +119,7 @@ GLOBAL(vmRun): // allocate frame stp x29, x30, [sp,#-96]! + mov x29, sp // save callee-saved register values stp x19, x20, [sp,#16] diff --git a/src/codegen/compiler/site.h b/src/codegen/compiler/site.h index b2c10ddc39..5099704a34 100644 --- a/src/codegen/compiler/site.h +++ b/src/codegen/compiler/site.h @@ -123,7 +123,7 @@ class Site { virtual RegisterMask registerMask(Context*) { - return 0; + return RegisterMask(0); } virtual bool isVolatile(Context*) diff --git a/src/codegen/target/arm/CMakeLists.txt b/src/codegen/target/arm/CMakeLists.txt index bc26352adc..23faf6694f 100644 --- a/src/codegen/target/arm/CMakeLists.txt +++ b/src/codegen/target/arm/CMakeLists.txt @@ -4,5 +4,6 @@ add_library(avian_codegen_arm context.cpp fixup.cpp multimethod.cpp - operations.cpp + operations32.cpp + operations64.cpp ) diff --git a/src/codegen/target/arm/assembler.cpp b/src/codegen/target/arm/assembler.cpp index a6c7491279..3130662073 100644 --- a/src/codegen/target/arm/assembler.cpp +++ b/src/codegen/target/arm/assembler.cpp @@ -39,7 +39,7 @@ namespace isa { bool vfpSupported() { // TODO: Use at runtime detection -#if defined(__ARM_PCS_VFP) +#if (defined __ARM_PCS_VFP) || (defined ARCH_arm64) // armhf return true; #else @@ -55,9 +55,9 @@ bool vfpSupported() const RegisterFile MyRegisterFileWithoutFloats(GPR_MASK, 0); const RegisterFile MyRegisterFileWithFloats(GPR_MASK, FPR_MASK); -const unsigned FrameHeaderSize = 1; +const unsigned FrameHeaderSize = TargetBytesPerWord / 4; -const unsigned StackAlignmentInBytes = 8; +const unsigned StackAlignmentInBytes = TargetBytesPerWord * 2; const unsigned StackAlignmentInWords = StackAlignmentInBytes / TargetBytesPerWord; @@ -89,11 +89,11 @@ void nextFrame(ArchitectureContext* con, void** stack) { assertT(con, *ip >= start); - assertT(con, *ip <= start + (size / TargetBytesPerWord)); + assertT(con, *ip <= start + (size / 4)); uint32_t* instruction = static_cast(*ip); - if ((*start >> 20) == 0xe59) { + if ((*start >> 20) == (TargetBytesPerWord == 8 ? 0xf94 : 0xe59)) { // skip stack overflow check start += 3; } @@ -111,7 +111,8 @@ void nextFrame(ArchitectureContext* con, return; } - if (*instruction == 0xe12fff1e) { // return + if (*instruction == (TargetBytesPerWord == 8 ? 0xd61f03c0 : 0xe12fff1e)) { + // return *ip = link; return; } @@ -124,7 +125,20 @@ void nextFrame(ArchitectureContext* con, // check for post-non-tail-call stack adjustment of the form "sub // sp, sp, #offset": - if ((*instruction >> 12) == 0xe24dd) { + if (TargetBytesPerWord == 8 and (*instruction & 0xff0003ff) == 0xd10003ff) { + unsigned value = (*instruction >> 10) & 0xfff; + unsigned shift = (*instruction >> 22) & 1; + switch (shift) { + case 0: + offset -= value / TargetBytesPerWord; + break; + case 1: + offset -= (value << 12) / TargetBytesPerWord; + break; + default: + abort(con); + } + } else if (TargetBytesPerWord == 4 and (*instruction >> 12) == 0xe24dd) { unsigned value = *instruction & 0xff; unsigned rotation = (*instruction >> 8) & 0xf; switch (rotation) { @@ -218,6 +232,7 @@ class MyArchitecture : public Architecture { { switch (register_.index()) { case LinkRegister.index(): + case FrameRegister.index(): case StackRegister.index(): case ThreadRegister.index(): case ProgramCounter.index(): @@ -258,7 +273,7 @@ class MyArchitecture : public Architecture { virtual unsigned argumentRegisterCount() { - return 4; + return TargetBytesPerWord; } virtual Register argumentRegister(unsigned index) @@ -306,8 +321,13 @@ class MyArchitecture : public Architecture { case lir::AlignedLongCall: case lir::AlignedLongJump: { uint32_t* p = static_cast(returnAddress) - 2; - *reinterpret_cast(p + (((*p & PoolOffsetMask) + 8) / 4)) - = newTarget; + if (TargetBytesPerWord == 8) { + const int32_t mask = (PoolOffsetMask >> 2) << 5; + *reinterpret_cast(p + ((*p & mask) >> 5)) = newTarget; + } else { + *reinterpret_cast(p + (((*p & PoolOffsetMask) + 8) / 4)) + = newTarget; + } } break; default: @@ -434,11 +454,11 @@ class MyArchitecture : public Architecture { break; case lir::Float2Int: - // todo: Java requires different semantics than SSE for + // todo: Java requires different semantics than VFP for // converting floats to integers, we we need to either use // thunks or produce inline machine code which handles edge // cases properly. - if (false && vfpSupported() && bSize == 4) { + if (false && vfpSupported() && bSize <= TargetBytesPerWord) { aMask.typeMask = lir::Operand::RegisterPairMask; aMask.setLowHighRegisterMasks(FPR_MASK, FPR_MASK); } else { @@ -447,7 +467,7 @@ class MyArchitecture : public Architecture { break; case lir::Int2Float: - if (vfpSupported() && aSize == 4) { + if (vfpSupported() && aSize <= TargetBytesPerWord) { aMask.typeMask = lir::Operand::RegisterPairMask; aMask.setLowHighRegisterMasks(GPR_MASK, GPR_MASK); } else { @@ -544,7 +564,7 @@ class MyArchitecture : public Architecture { case lir::ShiftLeft: case lir::ShiftRight: case lir::UnsignedShiftRight: - if (bSize == 8) + if (bSize > TargetBytesPerWord) aMask.typeMask = bMask.typeMask = lir::Operand::RegisterPairMask; break; @@ -556,6 +576,11 @@ class MyArchitecture : public Architecture { aMask.typeMask = bMask.typeMask = lir::Operand::RegisterPairMask; break; + // todo: Although ARM has instructions for integer division and + // remainder, they don't trap on division by zero, which is why + // we use thunks. Alternatively, we could generate inline code + // with an explicit zero check, which would probably be a bit + // faster. case lir::Divide: case lir::Remainder: case lir::FloatRemainder: @@ -567,7 +592,7 @@ class MyArchitecture : public Architecture { case lir::FloatMultiply: case lir::FloatDivide: if (vfpSupported()) { - bMask.typeMask = lir::Operand::RegisterPairMask; + aMask.typeMask = lir::Operand::RegisterPairMask; aMask.setLowHighRegisterMasks(FPR_MASK, FPR_MASK); bMask = aMask; } else { @@ -745,19 +770,45 @@ class MyAssembler : public Assembler { // how to handle them: assertT(&con, footprint < 256); - lir::RegisterPair stack(StackRegister); - ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); - lir::Constant footprintConstant(&footprintPromise); - subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + // todo: the ARM ABI says the frame preamble should be of the form + // + // stp x29, x30, [sp,#-footprint]! + // mov x29, sp + // + // and the frame should be popped with e.g. + // + // ldp x29, x30, [sp],#footprint + // br x30 + // + // However, that will invalidate a lot of assumptions elsewhere + // about the return address being stored at the opposite end of + // the frame, so lots of other code will need to change before we + // can do that. The code below can be enabled as a starting point + // when we're ready to tackle that. + if (false and TargetBytesPerWord == 8) { + // stp x29, x30, [sp,#-footprint]! + con.code.append4(0xa9800000 | ((-footprint & 0x7f) << 15) + | (StackRegister.index() << 5) + | (LinkRegister.index() << 10) | FrameRegister.index()); - lir::RegisterPair returnAddress(LinkRegister); - lir::Memory returnAddressDst(StackRegister, - (footprint - 1) * TargetBytesPerWord); - moveRM(&con, - TargetBytesPerWord, - &returnAddress, - TargetBytesPerWord, - &returnAddressDst); + lir::RegisterPair stack(StackRegister); + lir::RegisterPair frame(FrameRegister); + moveRR(&con, TargetBytesPerWord, &stack, TargetBytesPerWord, &frame); + } else { + lir::RegisterPair stack(StackRegister); + ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); + lir::Constant footprintConstant(&footprintPromise); + subC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressDst(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveRM(&con, + TargetBytesPerWord, + &returnAddress, + TargetBytesPerWord, + &returnAddressDst); + } } virtual void adjustFrame(unsigned difference) @@ -772,19 +823,26 @@ class MyAssembler : public Assembler { { footprint += FrameHeaderSize; - lir::RegisterPair returnAddress(LinkRegister); - lir::Memory returnAddressSrc(StackRegister, - (footprint - 1) * TargetBytesPerWord); - moveMR(&con, - TargetBytesPerWord, - &returnAddressSrc, - TargetBytesPerWord, - &returnAddress); + // see comment regarding the ARM64 ABI in allocateFrame + if (false and TargetBytesPerWord == 8) { + // ldp x29, x30, [sp],#footprint + con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10) + | 29); + } else { + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressSrc(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveMR(&con, + TargetBytesPerWord, + &returnAddressSrc, + TargetBytesPerWord, + &returnAddress); - lir::RegisterPair stack(StackRegister); - ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); - lir::Constant footprintConstant(&footprintPromise); - addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + lir::RegisterPair stack(StackRegister); + ResolvedPromise footprintPromise(footprint * TargetBytesPerWord); + lir::Constant footprintConstant(&footprintPromise); + addC(&con, TargetBytesPerWord, &footprintConstant, &stack, &stack); + } } virtual void popFrameForTailCall(unsigned footprint, @@ -851,10 +909,26 @@ class MyAssembler : public Assembler { return_(&con); } - virtual void popFrameAndUpdateStackAndReturn(unsigned frameFootprint, + virtual void popFrameAndUpdateStackAndReturn(unsigned footprint, unsigned stackOffsetFromThread) { - popFrame(frameFootprint); + footprint += FrameHeaderSize; + + // see comment regarding the ARM64 ABI in allocateFrame + if (false and TargetBytesPerWord == 8) { + // ldp x29, x30, [sp],#footprint + con.code.append4(0xa8c00000 | (footprint << 15) | (31 << 5) | (30 << 10) + | 29); + } else { + lir::RegisterPair returnAddress(LinkRegister); + lir::Memory returnAddressSrc(StackRegister, + (footprint - 1) * TargetBytesPerWord); + moveMR(&con, + TargetBytesPerWord, + &returnAddressSrc, + TargetBytesPerWord, + &returnAddress); + } lir::RegisterPair stack(StackRegister); lir::Memory newStackSrc(ThreadRegister, stackOffsetFromThread); @@ -946,17 +1020,28 @@ class MyAssembler : public Assembler { unsigned instruction = o->block->start + padding(o->block, o->offset) + o->offset; - int32_t v = (entry - 8) - instruction; - expect(&con, v == (v & PoolOffsetMask)); - int32_t* p = reinterpret_cast(dst + instruction); - *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); + + if (TargetBytesPerWord == 8) { + int32_t v = entry - instruction; + expect(&con, v == (v & PoolOffsetMask)); + + const int32_t mask = (PoolOffsetMask >> 2) << 5; + *p = (((v >> 2) << 5) & mask) | ((~mask) & *p); + } else { + int32_t v = (entry - 8) - instruction; + expect(&con, v == (v & PoolOffsetMask)); + + *p = (v & PoolOffsetMask) | ((~PoolOffsetMask) & *p); + } poolSize += TargetBytesPerWord; } bool jump = needJump(b); if (jump) { + expect(&con, TargetBytesPerWord == 4); + write4(dst + dstOffset, isa::b((poolSize + TargetBytesPerWord - 8) >> 2)); } diff --git a/src/codegen/target/arm/fixup.cpp b/src/codegen/target/arm/fixup.cpp index e1d41b6eb4..3117688b15 100644 --- a/src/codegen/target/arm/fixup.cpp +++ b/src/codegen/target/arm/fixup.cpp @@ -12,6 +12,12 @@ #include "fixup.h" #include "block.h" +namespace { + +const unsigned InstructionSize = 4; + +} // namespace + namespace avian { namespace codegen { namespace arm { @@ -38,8 +44,7 @@ int64_t OffsetPromise::value() assertT(con, resolved()); unsigned o = offset - block->offset; - return block->start - + padding(block, forTrace ? o - vm::TargetBytesPerWord : o) + o; + return block->start + padding(block, forTrace ? o - InstructionSize : o) + o; } Promise* offsetPromise(Context* con, bool forTrace) @@ -92,17 +97,30 @@ bool bounded(int right, int left, int32_t v) void* updateOffset(vm::System* s, uint8_t* instruction, int64_t value) { - // ARM's PC is two words ahead, and branches drop the bottom 2 bits. - int32_t v = (reinterpret_cast(value) - (instruction + 8)) >> 2; - - int32_t mask; - expect(s, bounded(0, 8, v)); - mask = 0xFFFFFF; - int32_t* p = reinterpret_cast(instruction); + + int32_t v; + int32_t mask; + if (vm::TargetBytesPerWord == 8) { + if ((*p >> 24) == 0x54) { + // conditional branch + v = ((reinterpret_cast(value) - instruction) >> 2) << 5; + mask = 0xFFFFE0; + } else { + // unconditional branch + v = (reinterpret_cast(value) - instruction) >> 2; + mask = 0x3FFFFFF; + } + } else { + v = (reinterpret_cast(value) - (instruction + 8)) >> 2; + mask = 0xFFFFFF; + } + + expect(s, bounded(0, 8, v)); + *p = (v & mask) | ((~mask) & *p); - return instruction + 4; + return instruction + InstructionSize; } ConstantPoolEntry::ConstantPoolEntry(Context* con, @@ -214,6 +232,101 @@ void appendPoolEvent(Context* con, b->poolEventTail = e; } +bool needJump(MyBlock* b) +{ + return b->next or b->size != (b->size & PoolOffsetMask); +} + +unsigned padding(MyBlock* b, unsigned offset) +{ + unsigned total = 0; + for (PoolEvent* e = b->poolEventHead; e; e = e->next) { + if (e->offset <= offset) { + if (needJump(b)) { + total += vm::TargetBytesPerWord; + } + for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) { + total += vm::TargetBytesPerWord; + } + } else { + break; + } + } + return total; +} + +void resolve(MyBlock* b) +{ + Context* con = b->context; + + if (b->poolOffsetHead) { + if (con->poolOffsetTail) { + con->poolOffsetTail->next = b->poolOffsetHead; + } else { + con->poolOffsetHead = b->poolOffsetHead; + } + con->poolOffsetTail = b->poolOffsetTail; + } + + if (con->poolOffsetHead) { + bool append; + if (b->next == 0 or b->next->poolEventHead) { + append = true; + } else { + int32_t v + = (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8) + - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); + + append = (v != (v & PoolOffsetMask)); + + if (DebugPool) { + fprintf(stderr, + "current %p %d %d next %p %d %d\n", + b, + b->start, + b->size, + b->next, + b->start + b->size, + b->next->size); + fprintf(stderr, + "offset %p %d is of distance %d to next block; append? %d\n", + con->poolOffsetHead, + con->poolOffsetHead->offset, + v, + append); + } + } + + if (append) { +#ifndef NDEBUG + int32_t v + = (b->start + b->size - 8) + - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); + + expect(con, v == (v & PoolOffsetMask)); +#endif // not NDEBUG + + appendPoolEvent( + con, b, b->size, con->poolOffsetHead, con->poolOffsetTail); + + if (DebugPool) { + for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) { + fprintf(stderr, + "include %p %d in pool event %p at offset %d in block %p\n", + o, + o->offset, + b->poolEventTail, + b->size, + b); + } + } + + con->poolOffsetHead = 0; + con->poolOffsetTail = 0; + } + } +} + } // namespace arm } // namespace codegen } // namespace avian diff --git a/src/codegen/target/arm/fixup.h b/src/codegen/target/arm/fixup.h index 5460295d95..cce2b59dce 100644 --- a/src/codegen/target/arm/fixup.h +++ b/src/codegen/target/arm/fixup.h @@ -27,7 +27,7 @@ namespace arm { const bool DebugPool = false; -const int32_t PoolOffsetMask = 0xFFF; +const int32_t PoolOffsetMask = vm::TargetBytesPerWord == 8 ? 0x1FFFFF : 0xFFF; class Task { public: diff --git a/src/codegen/target/arm/operations.cpp b/src/codegen/target/arm/operations32.cpp similarity index 92% rename from src/codegen/target/arm/operations.cpp rename to src/codegen/target/arm/operations32.cpp index 87d88613fd..e9cd601fe3 100644 --- a/src/codegen/target/arm/operations.cpp +++ b/src/codegen/target/arm/operations32.cpp @@ -15,6 +15,8 @@ #include "fixup.h" #include "multimethod.h" +#if TARGET_BYTES_PER_WORD == 4 + namespace avian { namespace codegen { namespace arm { @@ -179,101 +181,6 @@ void unsignedShiftRightC(Context* con, } } -bool needJump(MyBlock* b) -{ - return b->next or b->size != (b->size & PoolOffsetMask); -} - -unsigned padding(MyBlock* b, unsigned offset) -{ - unsigned total = 0; - for (PoolEvent* e = b->poolEventHead; e; e = e->next) { - if (e->offset <= offset) { - if (needJump(b)) { - total += vm::TargetBytesPerWord; - } - for (PoolOffset* o = e->poolOffsetHead; o; o = o->next) { - total += vm::TargetBytesPerWord; - } - } else { - break; - } - } - return total; -} - -void resolve(MyBlock* b) -{ - Context* con = b->context; - - if (b->poolOffsetHead) { - if (con->poolOffsetTail) { - con->poolOffsetTail->next = b->poolOffsetHead; - } else { - con->poolOffsetHead = b->poolOffsetHead; - } - con->poolOffsetTail = b->poolOffsetTail; - } - - if (con->poolOffsetHead) { - bool append; - if (b->next == 0 or b->next->poolEventHead) { - append = true; - } else { - int32_t v - = (b->start + b->size + b->next->size + vm::TargetBytesPerWord - 8) - - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); - - append = (v != (v & PoolOffsetMask)); - - if (DebugPool) { - fprintf(stderr, - "current %p %d %d next %p %d %d\n", - b, - b->start, - b->size, - b->next, - b->start + b->size, - b->next->size); - fprintf(stderr, - "offset %p %d is of distance %d to next block; append? %d\n", - con->poolOffsetHead, - con->poolOffsetHead->offset, - v, - append); - } - } - - if (append) { -#ifndef NDEBUG - int32_t v - = (b->start + b->size - 8) - - (con->poolOffsetHead->offset + con->poolOffsetHead->block->start); - - expect(con, v == (v & PoolOffsetMask)); -#endif // not NDEBUG - - appendPoolEvent( - con, b, b->size, con->poolOffsetHead, con->poolOffsetTail); - - if (DebugPool) { - for (PoolOffset* o = con->poolOffsetHead; o; o = o->next) { - fprintf(stderr, - "include %p %d in pool event %p at offset %d in block %p\n", - o, - o->offset, - b->poolEventTail, - b->size, - b); - } - } - - con->poolOffsetHead = 0; - con->poolOffsetTail = 0; - } - } -} - void jumpR(Context* con, unsigned size UNUSED, lir::RegisterPair* target) { assertT(con, size == vm::TargetBytesPerWord); @@ -410,7 +317,8 @@ void moveCR2(Context* con, lir::RegisterPair dstHi(dst->high); moveCR(con, 4, &srcLo, 4, dst); moveCR(con, 4, &srcHi, 4, &dstHi); - } else if (src->value->resolved() and isOfWidth(getValue(src), 8)) { + } else if (callOffset == 0 and src->value->resolved() + and isOfWidth(getValue(src), 8)) { emit(con, movi(dst->low, lo8(getValue(src)))); // fits in immediate } else { appendConstantPoolEntry(con, src->value, callOffset); @@ -510,9 +418,9 @@ void multiplyR(Context* con, if (size == 8) { bool useTemporaries = b->low == t->low; Register tmpLow = useTemporaries ? con->client->acquireTemporary(GPR_MASK) - : t->low; + : t->low; Register tmpHigh = useTemporaries ? con->client->acquireTemporary(GPR_MASK) - : t->high; + : t->high; emit(con, umull(tmpLow, tmpHigh, a->low, b->low)); emit(con, mla(tmpHigh, a->low, b->high, tmpHigh)); @@ -665,11 +573,11 @@ void floatDivideR(Context* con, } Register normalize(Context* con, - int offset, - Register index, - unsigned scale, - bool* preserveIndex, - bool* release) + int offset, + Register index, + unsigned scale, + bool* preserveIndex, + bool* release) { if (offset != 0 or scale != 1) { lir::RegisterPair normalizedIndex( @@ -947,26 +855,8 @@ void load(Context* con, case 8: { if (dstSize == 8) { lir::RegisterPair dstHigh(dst->high); - load(con, - 4, - base, - offset, - NoRegister, - 1, - 4, - &dstHigh, - false, - false); - load(con, - 4, - base, - offset + 4, - NoRegister, - 1, - 4, - dst, - false, - false); + load(con, 4, base, offset, NoRegister, 1, 4, &dstHigh, false, false); + load(con, 4, base, offset + 4, NoRegister, 1, 4, dst, false, false); } else { emit(con, ldri(dst->low, base, offset)); } @@ -1496,15 +1386,26 @@ void longCallC(Context* con, unsigned size UNUSED, lir::Constant* target) callR(con, vm::TargetBytesPerWord, &tmp); } +void alignedLongCallC(Context* con, unsigned size, lir::Constant* target) +{ + longCallC(con, size, target); +} + void longJumpC(Context* con, unsigned size UNUSED, lir::Constant* target) { assertT(con, size == vm::TargetBytesPerWord); - lir::RegisterPair tmp(Register(4)); // a non-arg reg that we don't mind clobbering + lir::RegisterPair tmp( + Register(4)); // a non-arg reg that we don't mind clobbering moveCR2(con, vm::TargetBytesPerWord, target, &tmp, offsetPromise(con)); jumpR(con, vm::TargetBytesPerWord, &tmp); } +void alignedLongJumpC(Context* con, unsigned size, lir::Constant* target) +{ + longJumpC(con, size, target); +} + void jumpC(Context* con, unsigned size UNUSED, lir::Constant* target) { assertT(con, size == vm::TargetBytesPerWord); @@ -1554,3 +1455,5 @@ void storeLoadBarrier(Context* con) } // namespace arm } // namespace codegen } // namespace avian + +#endif // TARGET_BYTES_PER_WORD == 4 diff --git a/src/codegen/target/arm/operations64.cpp b/src/codegen/target/arm/operations64.cpp new file mode 100644 index 0000000000..e0c4a69ed6 --- /dev/null +++ b/src/codegen/target/arm/operations64.cpp @@ -0,0 +1,1625 @@ +/* Copyright (c) 2008-2014, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#include "context.h" +#include "operations.h" +#include "block.h" +#include "fixup.h" +#include "multimethod.h" + +#if TARGET_BYTES_PER_WORD == 8 + +namespace { + +using namespace avian::codegen; +using namespace avian::codegen::arm; + +Register fpr(Register reg) +{ + return Register(reg.index() - N_GPRS); +} + +Register fpr(lir::RegisterPair* reg) +{ + return fpr(reg->low); +} + +void append(Context* c, uint32_t instruction) +{ + c->code.append4(instruction); +} + +uint32_t lslv(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0x9ac02000 : 0x1ac02000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ubfm(Register Rd, Register Rn, int r, int s, unsigned size) +{ + return (size == 8 ? 0xd3400000 : 0x53000000) | (r << 16) | (s << 10) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t sbfm(Register Rd, Register Rn, int r, int s, unsigned size) +{ + return (size == 8 ? 0x93400000 : 0x13000000) | (r << 16) | (s << 10) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t lsli(Register Rd, Register Rn, int shift, unsigned size) +{ + if (size == 4) { + return ubfm(Rd, Rn, (32 - shift) & 0x1f, 31 - shift, size); + } else { + return ubfm(Rd, Rn, (64 - shift) & 0x3f, 63 - shift, size); + } +} + +uint32_t asrv(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0x9ac02800 : 0x1ac02800) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t lsrv(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0x9ac02400 : 0x1ac02400) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t lsri(Register Rd, Register Rn, int shift, unsigned size) +{ + return ubfm(Rd, Rn, shift, size == 8 ? 63 : 31, size); +} + +uint32_t asri(Register Rd, Register Rn, int shift, unsigned size) +{ + return sbfm(Rd, Rn, shift, size == 8 ? 63 : 31, size); +} + +uint32_t sxtb(Register Rd, Register Rn) +{ + return sbfm(Rd, Rn, 0, 7, 8); +} + +uint32_t sxth(Register Rd, Register Rn) +{ + return sbfm(Rd, Rn, 0, 15, 8); +} + +uint32_t uxth(Register Rd, Register Rn) +{ + return ubfm(Rd, Rn, 0, 15, 4); +} + +uint32_t sxtw(Register Rd, Register Rn) +{ + return sbfm(Rd, Rn, 0, 31, 8); +} + +uint32_t br(Register Rn) +{ + return 0xd61f0000 | (Rn.index() << 5); +} + +uint32_t fmovFdFn(Register Fd, Register Fn, unsigned size) +{ + return (size == 8 ? 0x1e604000 : 0x1e204000) | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fmovRdFn(Register Rd, Register Fn, unsigned size) +{ + return (size == 8 ? 0x9e660000 : 0x1e260000) | (Fn.index() << 5) | Rd.index(); +} + +uint32_t fmovFdRn(Register Fd, Register Rn, unsigned size) +{ + return (size == 8 ? 0x9e670000 : 0x1e270000) | (Rn.index() << 5) | Fd.index(); +} + +uint32_t orr(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xaa000000 : 0x2a000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t addi(Register Rd, Register Rn, int value, int shift, unsigned size) +{ + return (size == 8 ? 0x91000000 : 0x11000000) | (shift ? 0x400000 : 0) + | (value << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t mov(Register Rd, Register Rn, unsigned size) +{ + return Rn.index() == 31 or Rd.index() == 31 ? addi(Rd, Rn, 0, 0, size) + : orr(Rd, Register(31), Rn, size); +} + +uint32_t movz(Register Rd, int value, unsigned shift, unsigned size) +{ + return (size == 8 ? 0xd2800000 : 0x52800000) | ((shift >> 4) << 21) + | (value << 5) | Rd.index(); +} + +uint32_t movn(Register Rd, int value, unsigned shift, unsigned size) +{ + return (size == 8 ? 0x92800000 : 0x12800000) | ((shift >> 4) << 21) + | (value << 5) | Rd.index(); +} + +uint32_t movk(Register Rd, int value, unsigned shift, unsigned size) +{ + return (size == 8 ? 0xf2800000 : 0x72800000) | ((shift >> 4) << 21) + | (value << 5) | Rd.index(); +} + +uint32_t ldrPCRel(Register Rd, int offset, unsigned size) +{ + return (size == 8 ? 0x58000000 : 0x18000000) | ((offset >> 2) << 5) + | Rd.index(); +} + +uint32_t add(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0x8b000000 : 0x0b000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t sub(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xcb000000 : 0x4b000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t and_(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0x8a000000 : 0x0a000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t eor(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xca000000 : 0x4a000000) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t madd(Register Rd, Register Rn, Register Rm, Register Ra, unsigned size) +{ + return (size == 8 ? 0x9b000000 : 0x1b000000) | (Rm.index() << 16) + | (Ra.index() << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t mul(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return madd(Rd, Rn, Rm, Register(31), size); +} + +uint32_t subi(Register Rd, Register Rn, int value, int shift, unsigned size) +{ + return (size == 8 ? 0xd1000000 : 0x51000000) | (shift ? 0x400000 : 0) + | (value << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t fabs_(Register Fd, Register Fn, unsigned size) +{ + return (size == 8 ? 0x1e60c000 : 0x1e20c000) | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fneg(Register Fd, Register Fn, unsigned size) +{ + return (size == 8 ? 0x1e614000 : 0x1e214000) | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fsqrt(Register Fd, Register Fn, unsigned size) +{ + return (size == 8 ? 0x1e61c000 : 0x1e21c000) | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fadd(Register Fd, Register Fn, Register Fm, unsigned size) +{ + return (size == 8 ? 0x1e602800 : 0x1e202800) | (Fm.index() << 16) + | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fsub(Register Fd, Register Fn, Register Fm, unsigned size) +{ + return (size == 8 ? 0x1e603800 : 0x1e203800) | (Fm.index() << 16) + | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fmul(Register Fd, Register Fn, Register Fm, unsigned size) +{ + return (size == 8 ? 0x1e600800 : 0x1e200800) | (Fm.index() << 16) + | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fdiv(Register Fd, Register Fn, Register Fm, unsigned size) +{ + return (size == 8 ? 0x1e601800 : 0x1e201800) | (Fm.index() << 16) + | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fcvtSdDn(Register Fd, Register Fn) +{ + return 0x1e624000 | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fcvtDdSn(Register Fd, Register Fn) +{ + return 0x1e22c000 | (Fn.index() << 5) | Fd.index(); +} + +uint32_t fcvtasXdDn(Register Rd, Register Fn) +{ + return 0x9e640000 | (Fn.index() << 5) | Rd.index(); +} + +uint32_t fcvtasWdSn(Register Rd, Register Fn) +{ + return 0x1e240000 | (Fn.index() << 5) | Rd.index(); +} + +uint32_t scvtfDdXn(Register Fd, Register Rn) +{ + return 0x9e620000 | (Rn.index() << 5) | Fd.index(); +} + +uint32_t scvtfSdWn(Register Fd, Register Rn) +{ + return 0x1e220000 | (Rn.index() << 5) | Fd.index(); +} + +uint32_t strFs(Register Fs, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xfc206800 : 0xbc206800) | (Rm.index() << 16) + | (Rn.index() << 5) | Fs.index(); +} + +uint32_t strb(Register Rs, Register Rn, Register Rm) +{ + return 0x38206800 | (Rm.index() << 16) | (Rn.index() << 5) | Rs.index(); +} + +uint32_t strh(Register Rs, Register Rn, Register Rm) +{ + return 0x78206800 | (Rm.index() << 16) | (Rn.index() << 5) | Rs.index(); +} + +uint32_t striFs(Register Fs, Register Rn, int offset, unsigned size) +{ + return (size == 8 ? 0xfd000000 : 0xbd000000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) + | Fs.index(); +} + +uint32_t str(Register Rs, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xf8206800 : 0xb8206800) | (Rm.index() << 16) + | (Rn.index() << 5) | Rs.index(); +} + +uint32_t strbi(Register Rs, Register Rn, int offset) +{ + return 0x39000000 | (offset << 10) | (Rn.index() << 5) | Rs.index(); +} + +uint32_t strhi(Register Rs, Register Rn, int offset) +{ + return 0x79000000 | ((offset >> 1) << 10) | (Rn.index() << 5) | Rs.index(); +} + +uint32_t stri(Register Rs, Register Rn, int offset, unsigned size) +{ + return (size == 8 ? 0xf9000000 : 0xb9000000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) + | Rs.index(); +} + +uint32_t ldrFd(Register Fd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xfc606800 : 0xbc606800) | (Rm.index() << 16) + | (Rn.index() << 5) | Fd.index(); +} + +uint32_t ldrb(Register Rd, Register Rn, Register Rm) +{ + return 0x38606800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrsb(Register Rd, Register Rn, Register Rm) +{ + return 0x38e06800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrh(Register Rd, Register Rn, Register Rm) +{ + return 0x78606800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrsh(Register Rd, Register Rn, Register Rm) +{ + return 0x78e06800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrsw(Register Rd, Register Rn, Register Rm) +{ + return 0xb8a06800 | (Rm.index() << 16) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldr(Register Rd, Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xf8606800 : 0xb8606800) | (Rm.index() << 16) + | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldriFd(Register Fd, Register Rn, int offset, unsigned size) +{ + return (size == 8 ? 0xfd400000 : 0xbd400000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) + | Fd.index(); +} + +uint32_t ldrbi(Register Rd, Register Rn, int offset) +{ + return 0x39400000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrsbi(Register Rd, Register Rn, int offset) +{ + return 0x39c00000 | (offset << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrhi(Register Rd, Register Rn, int offset) +{ + return 0x79400000 | ((offset >> 1) << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrshi(Register Rd, Register Rn, int offset) +{ + return 0x79c00000 | ((offset >> 1) << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldrswi(Register Rd, Register Rn, int offset) +{ + return 0xb9800000 | ((offset >> 2) << 10) | (Rn.index() << 5) | Rd.index(); +} + +uint32_t ldri(Register Rd, Register Rn, int offset, unsigned size) +{ + return (size == 8 ? 0xf9400000 : 0xb9400000) + | ((offset >> (size == 8 ? 3 : 2)) << 10) | (Rn.index() << 5) + | Rd.index(); +} + +uint32_t fcmp(Register Fn, Register Fm, unsigned size) +{ + return (size == 8 ? 0x1e602000 : 0x1e202000) | (Fm.index() << 16) + | (Fn.index() << 5); +} + +uint32_t neg(Register Rd, Register Rm, unsigned size) +{ + return (size == 8 ? 0xcb0003e0 : 0x4b0003e0) | (Rm.index() << 16) + | Rd.index(); +} + +uint32_t cmp(Register Rn, Register Rm, unsigned size) +{ + return (size == 8 ? 0xeb00001f : 0x6b00001f) | (Rm.index() << 16) + | (Rn.index() == 31 ? 0x2063ff : (Rn.index() << 5)); +} + +uint32_t cmpi(Register Rn, int value, unsigned shift, unsigned size) +{ + return (size == 8 ? 0xf100001f : 0x7100001f) | (shift == 12 ? 0x400000 : 0) + | (value << 10) | (Rn.index() << 5); +} + +uint32_t b(int offset) +{ + return 0x14000000 | (offset >> 2); +} + +uint32_t bl(int offset) +{ + return 0x94000000 | (offset >> 2); +} + +uint32_t blr(Register Rn) +{ + return 0xd63f0000 | (Rn.index() << 5); +} + +uint32_t beq(int offset) +{ + return 0x54000000 | ((offset >> 2) << 5); +} + +uint32_t bne(int offset) +{ + return 0x54000001 | ((offset >> 2) << 5); +} + +uint32_t blt(int offset) +{ + return 0x5400000b | ((offset >> 2) << 5); +} + +uint32_t bgt(int offset) +{ + return 0x5400000c | ((offset >> 2) << 5); +} + +uint32_t ble(int offset) +{ + return 0x5400000d | ((offset >> 2) << 5); +} + +uint32_t bge(int offset) +{ + return 0x5400000a | ((offset >> 2) << 5); +} + +uint32_t bhi(int offset) +{ + return 0x54000008 | ((offset >> 2) << 5); +} + +uint32_t bpl(int offset) +{ + return 0x54000005 | ((offset >> 2) << 5); +} + +uint32_t brk(int flag) +{ + return 0xd4200020 | (flag << 5); +} + +uint32_t dmb(int flag) +{ + return 0xd50330bf | (flag << 8); +} + +} // namespace + +namespace avian { +namespace codegen { +namespace arm { + +using namespace avian::util; + +void shiftLeftR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, lslv(dst->low, b->low, a->low, size)); +} + +void shiftLeftC(Context* c, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + uint64_t value = a->value->value(); + if (size == 4 and (value & 0x1F)) { + append(c, lsli(dst->low, b->low, value & 0x1F, 4)); + } else if (size == 8 and (value & 0x3F)) { + append(c, lsli(dst->low, b->low, value & 0x3F, 8)); + } else { + moveRR(c, size, b, size, dst); + } +} + +void shiftRightR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, asrv(dst->low, b->low, a->low, size)); +} + +void shiftRightC(Context* c, + unsigned size UNUSED, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + uint64_t value = a->value->value(); + if (size == 4 and (value & 0x1F)) { + append(c, asri(dst->low, b->low, value & 0x1F, 4)); + } else if (size == 8 and (value & 0x3F)) { + append(c, asri(dst->low, b->low, value & 0x3F, 8)); + } else { + moveRR(c, size, b, size, dst); + } +} + +void unsignedShiftRightR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, lsrv(dst->low, b->low, a->low, size)); +} + +void unsignedShiftRightC(Context* c, + unsigned size UNUSED, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + uint64_t value = a->value->value(); + if (size == 4 and (value & 0x1F)) { + append(c, lsri(dst->low, b->low, value & 0x1F, 4)); + } else if (size == 8 and (value & 0x3F)) { + append(c, lsri(dst->low, b->low, value & 0x3F, 8)); + } else { + moveRR(c, size, b, size, dst); + } +} + +void jumpR(Context* c, unsigned size UNUSED, lir::RegisterPair* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + append(c, br(target->low)); +} + +void moveRR(Context* c, + unsigned srcSize, + lir::RegisterPair* src, + unsigned dstSize, + lir::RegisterPair* dst) +{ + bool srcIsFpr = isFpr(src); + bool dstIsFpr = isFpr(dst); + if (srcIsFpr or dstIsFpr) { + assertT(c, srcSize == dstSize); + + if (srcIsFpr and dstIsFpr) { + append(c, fmovFdFn(fpr(dst), fpr(src), srcSize)); + } else if (srcIsFpr) { + append(c, fmovRdFn(dst->low, fpr(src), srcSize)); + } else { + append(c, fmovFdRn(fpr(dst), src->low, srcSize)); + } + } else { + switch (srcSize) { + case 1: + append(c, sxtb(dst->low, src->low)); + break; + + case 2: + append(c, sxth(dst->low, src->low)); + break; + + case 4: + if (dstSize == 4) { + append(c, mov(dst->low, src->low, srcSize)); + } else { + append(c, sxtw(dst->low, src->low)); + } + break; + + case 8: + append(c, mov(dst->low, src->low, srcSize)); + break; + + default: + abort(c); + } + } +} + +void moveZRR(Context* c, + unsigned srcSize, + lir::RegisterPair* src, + unsigned, + lir::RegisterPair* dst) +{ + switch (srcSize) { + case 2: + append(c, uxth(dst->low, src->low)); + break; + + default: + abort(c); + } +} + +void moveCR2(Context* c, + unsigned size, + lir::Constant* src, + lir::RegisterPair* dst, + Promise* callOffset) +{ + if (isFpr(dst)) { + // todo: could use a single fmov here and avoid the temporary for + // constants that fit + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveCR(c, size, src, size, &tmp); + moveRR(c, size, &tmp, size, dst); + c->client->releaseTemporary(tmp.low); + } else if (callOffset == 0 and src->value->resolved()) { + // todo: Is it better performance-wise to load using immediate + // moves or via a PC-relative constant pool? Does it depend on + // how many significant bits there are? + + int64_t value = src->value->value(); + if (value >= 0) { + append(c, movz(dst->low, value & 0xFFFF, 0, size)); + if (value >> 16) { + if ((value >> 16) & 0xFFFF) { + append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); + } + if (value >> 32) { + if ((value >> 32) & 0xFFFF) { + append(c, movk(dst->low, (value >> 32) & 0xFFFF, 32, size)); + } + if (value >> 48) { + append(c, movk(dst->low, (value >> 48) & 0xFFFF, 48, size)); + } + } + } + } else { + append(c, movn(dst->low, (~value) & 0xFFFF, 0, size)); + if (~(value >> 16)) { + if (((value >> 16) & 0xFFFF) != 0xFFFF) { + append(c, movk(dst->low, (value >> 16) & 0xFFFF, 16, size)); + } + if (~(value >> 32)) { + if (((value >> 32) & 0xFFFF) != 0xFFFF) { + append(c, movk(dst->low, (value >> 32) & 0xFFFF, 32, size)); + } + if (~(value >> 48)) { + append(c, movk(dst->low, (value >> 48) & 0xFFFF, 48, size)); + } + } + } + } + } else { + appendConstantPoolEntry(c, src->value, callOffset); + append(c, ldrPCRel(dst->low, 0, size)); + } +} + +void moveCR(Context* c, + unsigned size, + lir::Constant* src, + unsigned, + lir::RegisterPair* dst) +{ + moveCR2(c, size, src, dst, 0); +} + +void addR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, add(dst->low, a->low, b->low, size)); +} + +void subR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, sub(dst->low, b->low, a->low, size)); +} + +void addC(Context* c, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + int64_t v = a->value->value(); + if (v) { + if (v > 0 and v < 0x1000) { + append(c, addi(dst->low, b->low, v, 0, size)); + } else if (v > 0 and v < 0x1000000 and v % 0x1000 == 0) { + append(c, addi(dst->low, b->low, v >> 12, 12, size)); + } else { + // todo + abort(c); + } + } else { + moveRR(c, size, b, size, dst); + } +} + +void subC(Context* c, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + int64_t v = a->value->value(); + if (v) { + if (v > 0 and v < 0x1000) { + append(c, subi(dst->low, b->low, v, 0, size)); + } else if (v > 0 and v < 0x1000000 and v % 0x1000 == 0) { + append(c, subi(dst->low, b->low, v >> 12, 12, size)); + } else { + // todo + abort(c); + } + } else { + moveRR(c, size, b, size, dst); + } +} + +void multiplyR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, mul(dst->low, a->low, b->low, size)); +} + +void floatAbsoluteRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + append(c, fabs_(fpr(b), fpr(a), size)); +} + +void floatNegateRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + append(c, fneg(fpr(b), fpr(a), size)); +} + +void float2FloatRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + if (size == 8) { + append(c, fcvtSdDn(fpr(b), fpr(a))); + } else { + append(c, fcvtDdSn(fpr(b), fpr(a))); + } +} + +void float2IntRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + if (size == 8) { + append(c, fcvtasXdDn(b->low, fpr(a))); + } else { + append(c, fcvtasWdSn(b->low, fpr(a))); + } +} + +void int2FloatRR(Context* c, + unsigned, + lir::RegisterPair* a, + unsigned size, + lir::RegisterPair* b) +{ + if (size == 8) { + append(c, scvtfDdXn(fpr(b), a->low)); + } else { + append(c, scvtfSdWn(fpr(b), a->low)); + } +} + +void floatSqrtRR(Context* c, + unsigned size, + lir::RegisterPair* a, + unsigned, + lir::RegisterPair* b) +{ + append(c, fsqrt(fpr(b), fpr(a), size)); +} + +void floatAddR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, fadd(fpr(dst), fpr(b), fpr(a), size)); +} + +void floatSubtractR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, fsub(fpr(dst), fpr(b), fpr(a), size)); +} + +void floatMultiplyR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, fmul(fpr(dst), fpr(b), fpr(a), size)); +} + +void floatDivideR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, fdiv(fpr(dst), fpr(b), fpr(a), size)); +} + +Register normalize(Context* c, + int offset, + Register index, + unsigned scale, + bool* preserveIndex, + bool* release) +{ + if (offset != 0 or scale != 1) { + lir::RegisterPair normalizedIndex( + *preserveIndex ? c->client->acquireTemporary(GPR_MASK) : index); + + if (*preserveIndex) { + *release = true; + *preserveIndex = false; + } else { + *release = false; + } + + Register scaled; + + if (scale != 1) { + lir::RegisterPair unscaledIndex(index); + + ResolvedPromise scalePromise(log(scale)); + lir::Constant scaleConstant(&scalePromise); + + shiftLeftC(c, + vm::TargetBytesPerWord, + &scaleConstant, + &unscaledIndex, + &normalizedIndex); + + scaled = normalizedIndex.low; + } else { + scaled = index; + } + + if (offset != 0) { + lir::RegisterPair untranslatedIndex(scaled); + + ResolvedPromise offsetPromise(offset); + lir::Constant offsetConstant(&offsetPromise); + + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveCR(c, + vm::TargetBytesPerWord, + &offsetConstant, + vm::TargetBytesPerWord, + &tmp); + addR(c, + vm::TargetBytesPerWord, + &tmp, + &untranslatedIndex, + &normalizedIndex); + c->client->releaseTemporary(tmp.low); + } + + return normalizedIndex.low; + } else { + *release = false; + return index; + } +} + +void store(Context* c, + unsigned size, + lir::RegisterPair* src, + Register base, + int offset, + Register index, + unsigned scale, + bool preserveIndex) +{ + if (index != NoRegister) { + bool release; + + // todo: browsing the instruction set, it looks like we could do a + // scaled store or load in a single instruction if the offset is + // zero, and we could simplify things for the case of non-zero + // offsets also + + Register normalized + = normalize(c, offset, index, scale, &preserveIndex, &release); + + if (isFpr(src)) { + switch (size) { + case 4: + case 8: + append(c, strFs(fpr(src->low), base, normalized, size)); + break; + + default: + abort(c); + } + } else { + switch (size) { + case 1: + append(c, strb(src->low, base, normalized)); + break; + + case 2: + append(c, strh(src->low, base, normalized)); + break; + + case 4: + case 8: + append(c, str(src->low, base, normalized, size)); + break; + + default: + abort(c); + } + } + + if (release) { + c->client->releaseTemporary(normalized); + } + } else if (abs(offset) == (abs(offset) & 0xFFF)) { + if (isFpr(src)) { + switch (size) { + case 4: + case 8: + assertT(c, offset == (offset & (size == 8 ? (~7) : (~3)))); + append(c, striFs(fpr(src->low), base, offset, size)); + break; + + default: + abort(c); + } + } else { // FPR store + switch (size) { + case 1: + append(c, strbi(src->low, base, offset)); + break; + + case 2: + assertT(c, offset == (offset & (~1))); + append(c, strhi(src->low, base, offset)); + break; + + case 4: + assertT(c, offset == (offset & (~3))); + append(c, stri(src->low, base, offset, size)); + break; + + case 8: + assertT(c, offset == (offset & (~7))); + append(c, stri(src->low, base, offset, size)); + break; + + default: + abort(c); + } + } + } else { + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + ResolvedPromise offsetPromise(offset); + lir::Constant offsetConstant(&offsetPromise); + moveCR(c, + vm::TargetBytesPerWord, + &offsetConstant, + vm::TargetBytesPerWord, + &tmp); + + store(c, size, src, base, 0, tmp.low, 1, false); + + c->client->releaseTemporary(tmp.low); + } +} + +void moveRM(Context* c, + unsigned srcSize, + lir::RegisterPair* src, + unsigned dstSize UNUSED, + lir::Memory* dst) +{ + assertT(c, srcSize == dstSize); + + if (src->low.index() == 31) { + assertT(c, c->client == 0); // the compiler should never ask us to + // store the SP; we'll only get here + // when assembling a thunk + + lir::RegisterPair tmp(Register(9)); // we're in a thunk, so we can + // clobber this + + moveRR(c, srcSize, src, srcSize, &tmp); + store( + c, srcSize, &tmp, dst->base, dst->offset, dst->index, dst->scale, true); + } else { + store( + c, srcSize, src, dst->base, dst->offset, dst->index, dst->scale, true); + } +} + +void load(Context* c, + unsigned srcSize, + Register base, + int offset, + Register index, + unsigned scale, + unsigned dstSize, + lir::RegisterPair* dst, + bool preserveIndex, + bool signExtend) +{ + if (index != NoRegister) { + bool release; + Register normalized + = normalize(c, offset, index, scale, &preserveIndex, &release); + + if (isFpr(dst)) { // FPR load + switch (srcSize) { + case 4: + case 8: + append(c, ldrFd(fpr(dst->low), base, normalized, srcSize)); + break; + + default: + abort(c); + } + } else { + switch (srcSize) { + case 1: + if (signExtend) { + append(c, ldrsb(dst->low, base, normalized)); + } else { + append(c, ldrb(dst->low, base, normalized)); + } + break; + + case 2: + if (signExtend) { + append(c, ldrsh(dst->low, base, normalized)); + } else { + append(c, ldrh(dst->low, base, normalized)); + } + break; + + case 4: + case 8: + if (signExtend and srcSize == 4 and dstSize == 8) { + append(c, ldrsw(dst->low, base, normalized)); + } else { + append(c, ldr(dst->low, base, normalized, srcSize)); + } + break; + + default: + abort(c); + } + } + + if (release) { + c->client->releaseTemporary(normalized); + } + } else if (abs(offset) == (abs(offset) & 0xFFF)) { + if (isFpr(dst)) { + switch (srcSize) { + case 4: + case 8: + assertT(c, offset == (offset & (srcSize == 8 ? (~7) : (~3)))); + append(c, ldriFd(fpr(dst->low), base, offset, srcSize)); + break; + + default: + abort(c); + } + } else { + switch (srcSize) { + case 1: + if (signExtend) { + append(c, ldrsbi(dst->low, base, offset)); + } else { + append(c, ldrbi(dst->low, base, offset)); + } + break; + + case 2: + assertT(c, offset == (offset & (~1))); + if (signExtend) { + append(c, ldrshi(dst->low, base, offset)); + } else { + append(c, ldrhi(dst->low, base, offset)); + } + break; + + case 4: + case 8: + if (signExtend and srcSize == 4 and dstSize == 8) { + assertT(c, offset == (offset & (~3))); + append(c, ldrswi(dst->low, base, offset)); + } else { + assertT(c, offset == (offset & (srcSize == 8 ? (~7) : (~3)))); + append(c, ldri(dst->low, base, offset, srcSize)); + } + break; + + default: + abort(c); + } + } + } else { + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + ResolvedPromise offsetPromise(offset); + lir::Constant offsetConstant(&offsetPromise); + moveCR(c, + vm::TargetBytesPerWord, + &offsetConstant, + vm::TargetBytesPerWord, + &tmp); + + load(c, srcSize, base, 0, tmp.low, 1, dstSize, dst, false, signExtend); + + c->client->releaseTemporary(tmp.low); + } +} + +void moveMR(Context* c, + unsigned srcSize, + lir::Memory* src, + unsigned dstSize, + lir::RegisterPair* dst) +{ + if (dst->low.index() == 31) { + assertT(c, c->client == 0); // the compiler should never ask us to + // load the SP; we'll only get here + // when assembling a thunk + + lir::RegisterPair tmp(Register(9)); // we're in a thunk, so we can + // clobber this + + load(c, srcSize, src->base, src->offset, src->index, src->scale, dstSize, &tmp, true, true); + moveRR(c, dstSize, &tmp, dstSize, dst); + } else { + load(c, + srcSize, + src->base, + src->offset, + src->index, + src->scale, + dstSize, + dst, + true, + true); + } +} + +void moveZMR(Context* c, + unsigned srcSize, + lir::Memory* src, + unsigned dstSize, + lir::RegisterPair* dst) +{ + load(c, + srcSize, + src->base, + src->offset, + src->index, + src->scale, + dstSize, + dst, + true, + false); +} + +void andR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, and_(dst->low, a->low, b->low, size)); +} + +void andC(Context* c, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + int64_t v = a->value->value(); + + if (~v) { + bool useTemporary = b->low == dst->low; + lir::RegisterPair tmp(dst->low); + if (useTemporary) { + tmp.low = c->client->acquireTemporary(GPR_MASK); + } + + moveCR(c, size, a, size, &tmp); + andR(c, size, b, &tmp, dst); + + if (useTemporary) { + c->client->releaseTemporary(tmp.low); + } + } else { + moveRR(c, size, b, size, dst); + } +} + +void orR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, orr(dst->low, a->low, b->low, size)); +} + +void xorR(Context* c, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::RegisterPair* dst) +{ + append(c, eor(dst->low, a->low, b->low, size)); +} + +void moveAR(Context* c, + unsigned srcSize, + lir::Address* src, + unsigned dstSize, + lir::RegisterPair* dst) +{ + assertT( + c, + srcSize == vm::TargetBytesPerWord and dstSize == vm::TargetBytesPerWord); + + lir::Constant constant(src->address); + moveCR(c, srcSize, &constant, dstSize, dst); + + lir::Memory memory(dst->low, 0, NoRegister, 0); + moveMR(c, dstSize, &memory, dstSize, dst); +} + +void compareRR(Context* c, + unsigned aSize, + lir::RegisterPair* a, + unsigned bSize UNUSED, + lir::RegisterPair* b) +{ + assertT(c, not(isFpr(a) xor isFpr(b))); + assertT(c, aSize == bSize); + + if (isFpr(a)) { + append(c, fcmp(fpr(b), fpr(a), aSize)); + } else { + append(c, cmp(b->low, a->low, aSize)); + } +} + +void compareCR(Context* c, + unsigned aSize, + lir::Constant* a, + unsigned bSize UNUSED, + lir::RegisterPair* b) +{ + assertT(c, aSize == bSize); + + if (!isFpr(b) && a->value->resolved()) { + int64_t v = a->value->value(); + if (v == 0) { + append(c, cmp(b->low, Register(31), aSize)); + return; + } else if (v > 0 and v < 0x1000) { + append(c, cmpi(b->low, v, 0, aSize)); + return; + } else if (v > 0 and v < 0x1000000 and v % 0x1000 == 0) { + append(c, cmpi(b->low, v >> 12, 12, aSize)); + return; + } + } + + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveCR(c, aSize, a, bSize, &tmp); + compareRR(c, bSize, &tmp, bSize, b); + c->client->releaseTemporary(tmp.low); +} + +void compareCM(Context* c, + unsigned aSize, + lir::Constant* a, + unsigned bSize, + lir::Memory* b) +{ + assertT(c, aSize == bSize); + + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveMR(c, bSize, b, bSize, &tmp); + compareCR(c, aSize, a, bSize, &tmp); + c->client->releaseTemporary(tmp.low); +} + +void compareRM(Context* c, + unsigned aSize, + lir::RegisterPair* a, + unsigned bSize, + lir::Memory* b) +{ + assertT(c, aSize == bSize); + + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveMR(c, bSize, b, bSize, &tmp); + compareRR(c, aSize, a, bSize, &tmp); + c->client->releaseTemporary(tmp.low); +} + +void compareMR(Context* c, + unsigned aSize, + lir::Memory* a, + unsigned bSize, + lir::RegisterPair* b) +{ + assertT(c, aSize == bSize); + + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveMR(c, aSize, a, aSize, &tmp); + compareRR(c, aSize, &tmp, bSize, b); + c->client->releaseTemporary(tmp.low); +} + +int32_t branch(Context* c, lir::TernaryOperation op) +{ + switch (op) { + case lir::JumpIfEqual: + case lir::JumpIfFloatEqual: + return beq(0); + + case lir::JumpIfNotEqual: + case lir::JumpIfFloatNotEqual: + return bne(0); + + case lir::JumpIfLess: + case lir::JumpIfFloatLess: + case lir::JumpIfFloatLessOrUnordered: + return blt(0); + + case lir::JumpIfGreater: + case lir::JumpIfFloatGreater: + return bgt(0); + + case lir::JumpIfLessOrEqual: + case lir::JumpIfFloatLessOrEqual: + case lir::JumpIfFloatLessOrEqualOrUnordered: + return ble(0); + + case lir::JumpIfGreaterOrEqual: + case lir::JumpIfFloatGreaterOrEqual: + return bge(0); + + case lir::JumpIfFloatGreaterOrUnordered: + return bhi(0); + + case lir::JumpIfFloatGreaterOrEqualOrUnordered: + return bpl(0); + + default: + abort(c); + } +} + +void conditional(Context* c, int32_t branch, lir::Constant* target) +{ + appendOffsetTask(c, target->value, offsetPromise(c)); + append(c, branch); +} + +void branch(Context* c, lir::TernaryOperation op, lir::Constant* target) +{ + conditional(c, branch(c, op), target); +} + +void branchRR(Context* c, + lir::TernaryOperation op, + unsigned size, + lir::RegisterPair* a, + lir::RegisterPair* b, + lir::Constant* target) +{ + compareRR(c, size, a, size, b); + branch(c, op, target); +} + +void branchCR(Context* c, + lir::TernaryOperation op, + unsigned size, + lir::Constant* a, + lir::RegisterPair* b, + lir::Constant* target) +{ + assertT(c, not isFloatBranch(op)); + + compareCR(c, size, a, size, b); + branch(c, op, target); +} + +void branchRM(Context* c, + lir::TernaryOperation op, + unsigned size, + lir::RegisterPair* a, + lir::Memory* b, + lir::Constant* target) +{ + assertT(c, not isFloatBranch(op)); + assertT(c, size <= vm::TargetBytesPerWord); + + if (a->low.index() == 31) { + // stack overflow checks need to compare to the stack pointer, but + // we can only encode that in the opposite operand order we're + // given, so we need to reverse everything: + assertT(c, op == lir::JumpIfGreaterOrEqual); + compareMR(c, size, b, size, a); + branch(c, lir::JumpIfLess, target); + } else { + compareRM(c, size, a, size, b); + branch(c, op, target); + } +} + +void branchCM(Context* c, + lir::TernaryOperation op, + unsigned size, + lir::Constant* a, + lir::Memory* b, + lir::Constant* target) +{ + assertT(c, not isFloatBranch(op)); + assertT(c, size <= vm::TargetBytesPerWord); + + compareCM(c, size, a, size, b); + branch(c, op, target); +} + +ShiftMaskPromise* shiftMaskPromise(Context* c, + Promise* base, + unsigned shift, + int64_t mask) +{ + return new (c->zone) ShiftMaskPromise(base, shift, mask); +} + +void moveCM(Context* c, + unsigned srcSize, + lir::Constant* src, + unsigned dstSize, + lir::Memory* dst) +{ + lir::RegisterPair tmp(c->client->acquireTemporary(GPR_MASK)); + moveCR(c, srcSize, src, dstSize, &tmp); + moveRM(c, dstSize, &tmp, dstSize, dst); + c->client->releaseTemporary(tmp.low); +} + +void negateRR(Context* c, + unsigned srcSize, + lir::RegisterPair* src, + unsigned dstSize UNUSED, + lir::RegisterPair* dst) +{ + assertT(c, srcSize == dstSize); + + append(c, neg(dst->low, src->low, srcSize)); +} + +void callR(Context* c, unsigned size UNUSED, lir::RegisterPair* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + append(c, blr(target->low)); +} + +void callC(Context* c, unsigned size UNUSED, lir::Constant* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + + appendOffsetTask(c, target->value, offsetPromise(c)); + append(c, bl(0)); +} + +void longCallC(Context* c, unsigned size UNUSED, lir::Constant* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + + lir::RegisterPair tmp( + Register(9)); // a non-arg reg that we don't mind clobbering + moveCR2(c, vm::TargetBytesPerWord, target, &tmp, offsetPromise(c)); + callR(c, vm::TargetBytesPerWord, &tmp); +} + +void longJumpC(Context* c, unsigned size UNUSED, lir::Constant* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + + lir::RegisterPair tmp( + Register(9)); // a non-arg reg that we don't mind clobbering + moveCR2(c, vm::TargetBytesPerWord, target, &tmp, offsetPromise(c)); + jumpR(c, vm::TargetBytesPerWord, &tmp); +} + +void jumpC(Context* c, unsigned size UNUSED, lir::Constant* target) +{ + assertT(c, size == vm::TargetBytesPerWord); + + appendOffsetTask(c, target->value, offsetPromise(c)); + append(c, b(0)); +} + +void return_(Context* c) +{ + append(c, br(LinkRegister)); +} + +void trap(Context* c) +{ + append(c, brk(0)); +} + +// todo: determine the minimal operation types and domains needed to +// implement the following barriers (see +// http://community.arm.com/groups/processors/blog/2011/10/19/memory-access-ordering-part-3--memory-access-ordering-in-the-arm-architecture). +// For now, we just use DMB SY as a conservative but not necessarily +// performant choice. + +void memoryBarrier(Context* c) +{ + append(c, dmb(0xF)); +} + +void loadBarrier(Context* c) +{ + memoryBarrier(c); +} + +void storeStoreBarrier(Context* c) +{ + memoryBarrier(c); +} + +void storeLoadBarrier(Context* c) +{ + memoryBarrier(c); +} + +} // namespace arm +} // namespace codegen +} // namespace avian + +#endif // TARGET_BYTES_PER_WORD == 8 diff --git a/src/codegen/target/arm/registers.h b/src/codegen/target/arm/registers.h index ad13db466a..3bf4dc4041 100644 --- a/src/codegen/target/arm/registers.h +++ b/src/codegen/target/arm/registers.h @@ -14,6 +14,8 @@ #include #include +#include "avian/environment.h" + namespace avian { namespace codegen { namespace arm { @@ -21,16 +23,30 @@ namespace arm { const uint64_t MASK_LO32 = 0xffffffff; const unsigned MASK_LO8 = 0xff; +#if TARGET_BYTES_PER_WORD == 8 +constexpr Register ThreadRegister(19); +constexpr Register StackRegister(31); +constexpr Register LinkRegister(30); +constexpr Register FrameRegister(29); +constexpr Register ProgramCounter(0xFE); // i.e. unaddressable + +const int N_GPRS = 32; +const int N_FPRS = 32; +const RegisterMask GPR_MASK = 0xffffffff; +const RegisterMask FPR_MASK = 0xffffffff00000000; + +#else +constexpr Register ThreadRegister(8); +constexpr Register StackRegister(13); +constexpr Register LinkRegister(14); +constexpr Register FrameRegister(0xFE); // i.e. there is none +constexpr Register ProgramCounter(15); + const int N_GPRS = 16; const int N_FPRS = 16; const RegisterMask GPR_MASK = 0xffff; const RegisterMask FPR_MASK = 0xffff0000; -inline bool isFpr(lir::RegisterPair* reg) -{ - return reg->low.index() >= N_GPRS; -} - inline int fpr64(Register reg) { return reg.index() - N_GPRS; @@ -47,19 +63,13 @@ inline int fpr32(lir::RegisterPair* reg) { return fpr64(reg) << 1; } - -#ifdef ARCH_arm64 -constexpr Register ThreadRegister(19); -constexpr Register StackRegister(31); -constexpr Register LinkRegister(30); -constexpr Register ProgramCounter(0xFE); // i.e. unaddressable -#else -constexpr Register ThreadRegister(8); -constexpr Register StackRegister(13); -constexpr Register LinkRegister(14); -constexpr Register ProgramCounter(15); #endif +inline bool isFpr(lir::RegisterPair* reg) +{ + return reg->low.index() >= N_GPRS; +} + } // namespace arm } // namespace codegen } // namespace avian diff --git a/src/compile-arm.S b/src/compile-arm.S index 37b61da454..83703af607 100644 --- a/src/compile-arm.S +++ b/src/compile-arm.S @@ -16,11 +16,11 @@ #define BYTES_PER_WORD 4 #define LOCAL(x) .L##x - + #ifdef __APPLE__ # define GLOBAL(x) _##x #else -# define GLOBAL(x) x +# define GLOBAL(x) x #endif #define CONTINUATION_NEXT 4 @@ -29,7 +29,7 @@ #define CONTINUATION_FRAME_POINTER_OFFSET 24 #define CONTINUATION_LENGTH 28 #define CONTINUATION_BODY 32 - + .globl GLOBAL(vmInvoke) .align 2 GLOBAL(vmInvoke): @@ -56,7 +56,7 @@ GLOBAL(vmInvoke): eor r4, sp, r3 tst r4, #4 subne sp, sp, #4 - + // copy arguments into place sub sp, r3 mov r4, #0 @@ -87,7 +87,7 @@ LOCAL(vmInvoke_argumentTest): GLOBAL(vmInvoke_returnAddress): // restore stack pointer ldr sp, [r8, #TARGET_THREAD_SCRATCH] - + // clear MyThread::stack to avoid confusing another thread calling // java.lang.Thread.getStackTrace on this one. See // MyProcess::getStackTrace in compile.cpp for details on how we get @@ -109,7 +109,7 @@ GLOBAL(vmInvoke_safeStack): ldr r6,[r5,#CONTINUATION_LENGTH] lsl r6,r6,#2 neg r7,r6 - add r7,r7,#-80 + add r7,r7,#-80 // 80 bytes for callee-saved register values mov r4,sp str r4,[sp,r7]! @@ -167,10 +167,10 @@ LOCAL(vmInvoke_handleException): bx r7 LOCAL(vmInvoke_exit): -#endif // AVIAN_CONTINUATIONS mov ip, #0 str ip, [r8, #TARGET_THREAD_STACK] +#endif // AVIAN_CONTINUATIONS // restore return type ldr ip, [sp], #4 @@ -201,7 +201,7 @@ GLOBAL(vmJumpAndInvoke): // which is not true in this case sub r2,r2,r6 sub r2,r2,#84 - + mov r8,r0 // copy arguments into place @@ -220,7 +220,7 @@ LOCAL(vmJumpAndInvoke_argumentTest): // the arguments have been copied, so we can set the real stack // pointer now mov sp,r2 - + // set return address to vmInvoke_returnAddress #ifdef __APPLE__ movw r11, :lower16:(GLOBAL(vmInvoke_returnAddress)-(LOCAL(vmJumpAndInvoke_getAddress)+8)) @@ -246,7 +246,7 @@ LOCAL(vmInvoke_getAddress_word): LOCAL(vmJumpAndInvoke_getAddress_word): .word _GLOBAL_OFFSET_TABLE_-(LOCAL(vmJumpAndInvoke_getAddress)+8) #endif // not __APPLE__ - + #else // not AVIAN_CONTINUATIONS // vmJumpAndInvoke should only be called when continuations are // enabled, so we force a crash if we reach here: diff --git a/src/compile-arm64.S b/src/compile-arm64.S index 65f76df6f3..c1c9c942b2 100644 --- a/src/compile-arm64.S +++ b/src/compile-arm64.S @@ -13,23 +13,23 @@ .text -#define BYTES_PER_WORD 4 +#define BYTES_PER_WORD 8 #define LOCAL(x) .L##x - + #ifdef __APPLE__ # define GLOBAL(x) _##x #else -# define GLOBAL(x) x +# define GLOBAL(x) x #endif -#define CONTINUATION_NEXT 4 -#define CONTINUATION_ADDRESS 16 -#define CONTINUATION_RETURN_ADDRESS_OFFSET 20 -#define CONTINUATION_FRAME_POINTER_OFFSET 24 -#define CONTINUATION_LENGTH 28 -#define CONTINUATION_BODY 32 - +#define CONTINUATION_NEXT 8 +#define CONTINUATION_ADDRESS 32 +#define CONTINUATION_RETURN_ADDRESS_OFFSET 40 +#define CONTINUATION_FRAME_POINTER_OFFSET 48 +#define CONTINUATION_LENGTH 56 +#define CONTINUATION_BODY 64 + .globl GLOBAL(vmInvoke) .align 2 GLOBAL(vmInvoke): @@ -43,6 +43,7 @@ GLOBAL(vmInvoke): // allocate frame stp x29, x30, [sp,#-96]! + mov x29, sp // save callee-saved register values stp x19, x20, [sp,#16] @@ -59,7 +60,7 @@ GLOBAL(vmInvoke): // copy arguments into place sub sp, sp, w3, uxtw - mov x5, #0 + mov x4, #0 b LOCAL(vmInvoke_argumentTest) LOCAL(vmInvoke_argumentLoop): @@ -89,22 +90,74 @@ GLOBAL(vmInvoke_returnAddress): // MyProcess::getStackTrace in compile.cpp for details on how we get // a reliable stack trace from a thread that might be interrupted at // any point in its execution. - mov x5, #0 - str x5, [x19, #TARGET_THREAD_STACK] + str xzr, [x19, #TARGET_THREAD_STACK] .globl GLOBAL(vmInvoke_safeStack) .align 2 GLOBAL(vmInvoke_safeStack): #ifdef AVIAN_CONTINUATIONS -#error todo + // call the next continuation, if any + ldr x5, [x19,#TARGET_THREAD_CONTINUATION] + cmp x5, xzr + b.eq LOCAL(vmInvoke_exit) + + ldr x6, [x5,#CONTINUATION_LENGTH] + lsl x6, x6, #3 + neg x7, x6 + add x7, x7, #-128 // 128 bytes for callee-saved register values + mov x4, sp + add sp, sp, x7 + str x4, [sp] + + add x7, x5, #CONTINUATION_BODY + mov x11, xzr + b LOCAL(vmInvoke_continuationTest) + +LOCAL(vmInvoke_continuationLoop): + ldr x9, [x7,x11] + str x9, [sp,x11] + add x11, x11, #8 + +LOCAL(vmInvoke_continuationTest): + cmp x11, x6 + b.le LOCAL(vmInvoke_continuationLoop) + + ldr x7, [x5,#CONTINUATION_RETURN_ADDRESS_OFFSET] + adr x11, GLOBAL(vmInvoke_returnAddress) + str x11, [sp,x7] + + ldr x7, [x5,#CONTINUATION_NEXT] + str x7, [x19,#TARGET_THREAD_CONTINUATION] + + // call the continuation unless we're handling an exception + ldr x7, [x19,#TARGET_THREAD_EXCEPTION] + cmp x7, xzr + b.ne LOCAL(vmInvoke_handleException) + ldr x7, [x5,#CONTINUATION_ADDRESS] + br x7 + +LOCAL(vmInvoke_handleException): + // we're handling an exception - call the exception handler instead + str xzr, [x19,#TARGET_THREAD_EXCEPTION] + ldr x11, [x19,#TARGET_THREAD_EXCEPTIONSTACKADJUSTMENT] + ldr x9, [sp] + neg x11, x11 + add sp, sp, x11 + str x9, [sp] + ldr x11, [x19,#TARGET_THREAD_EXCEPTIONOFFSET] + str x7, [sp,x11] + + ldr x7, [x19,#TARGET_THREAD_EXCEPTIONHANDLER] + br x7 + +LOCAL(vmInvoke_exit): + str xzr, [x19, #TARGET_THREAD_STACK] + #endif // AVIAN_CONTINUATIONS - mov x5, #0 - str x5, [x19, #TARGET_THREAD_STACK] - // restore return type - ldr w5, [sp], #4 + ldr w5, [sp],#16 // restore callee-saved register values ldp x19, x20, [sp,#16] @@ -121,7 +174,44 @@ LOCAL(vmInvoke_return): .align 2 GLOBAL(vmJumpAndInvoke): #ifdef AVIAN_CONTINUATIONS -#error todo + // x0: thread + // x1: address + // x2: stack + // x3: argumentFootprint + // x4: arguments + // x5: frameSize + + // allocate new frame, adding room for callee-saved registers, plus + // 8 bytes of padding since the calculation of frameSize assumes 8 + // bytes have already been allocated to save the return address, + // which is not true in this case + sub x2, x2, x5 + sub x2, x2, #136 + + mov x19, x0 + + // copy arguments into place + mov x6, xzr + b LOCAL(vmJumpAndInvoke_argumentTest) + +LOCAL(vmJumpAndInvoke_argumentLoop): + ldr x12, [x4,x6] + str x12, [x2,x6] + add x6, x6, #4 + +LOCAL(vmJumpAndInvoke_argumentTest): + cmp x6, x3 + ble LOCAL(vmJumpAndInvoke_argumentLoop) + + // the arguments have been copied, so we can set the real stack + // pointer now + mov sp, x2 + + // set return address to vmInvoke_returnAddress + adr x30, GLOBAL(vmInvoke_returnAddress) + + br x1 + #else // not AVIAN_CONTINUATIONS // vmJumpAndInvoke should only be called when continuations are // enabled, so we force a crash if we reach here: diff --git a/src/compile.cpp b/src/compile.cpp index 47b55574e7..51790bb0b3 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -2189,6 +2189,8 @@ GcContinuation* makeCurrentContinuation(MyThread* t, *targetIp = 0; while (*targetIp == 0) { + assertT(t, ip); + GcMethod* method = methodForIp(t, ip); if (method) { PROTECT(t, method);