From 2b1177039ea0033b9aa6468fd9e1fc63d1e447d1 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Fri, 10 Jan 2014 14:38:57 -0700 Subject: [PATCH] use armv7 memory barriers by default armv7 and later provide weaker cache coherency models than armv6 and earlier, so we cannot just implement memory barriers as no-ops. This patch uses the DMB instruction (or the equivalent OS-provided barrier function) to implement barriers. This should fix concurrency issues on newer chips such as the Apple A6 and A7. If you still need to support ARMv6 devices, you should pass "armv6=true" to make when building Avian. Ideally, the VM would detect what kind of CPU it was executing on at runtime and direct the JIT compiler accordingly, but I don't know how to do that on ARM. Patches are welcome, though! --- README.md | 4 ++++ makefile | 5 +++++ src/avian/arm.h | 18 +++++++++++++++-- src/codegen/target/arm/encode.h | 2 ++ src/codegen/target/arm/multimethod.cpp | 6 +++--- src/codegen/target/arm/operations.cpp | 28 +++++++++++++++++++++++++- src/codegen/target/arm/operations.h | 6 +++++- 7 files changed, 62 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 9bdf164297..8f722634be 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,10 @@ devices. See [here](https://github.com/ReadyTalk/hello-ios) for an example of an Xcode project for iOS which uses Avian. * _default:_ false + * `armv6` - if true, don't use any instructions newer than armv6. By +default, we assume the target is armv7 or later, and thus requires explicit +memory barrier instructions to ensure cache coherency + * `bootimage` - if true, create a boot image containing the pre-parsed class library and ahead-of-time compiled methods. This option is only valid for process=compile builds. Note that you may need to diff --git a/makefile b/makefile index 5f22bc19ca..0adfb9b2f8 100755 --- a/makefile +++ b/makefile @@ -470,6 +470,10 @@ ifeq ($(arch),arm) endif endif +ifeq ($(armv6),true) + cflags += -DAVIAN_ASSUME_ARMV6 +endif + ifeq ($(ios),true) cflags += -DAVIAN_IOS use-lto = false @@ -1895,6 +1899,7 @@ $(bootimage-generator): $(bootimage-generator-objects) $(vm-objects) arch=$(build-arch) \ aot-only=false \ target-arch=$(arch) \ + armv6=$(armv6) \ platform=$(bootimage-platform) \ target-format=$(target-format) \ openjdk=$(openjdk) \ diff --git a/src/avian/arm.h b/src/avian/arm.h index c2a88b608c..165c2d603c 100644 --- a/src/avian/arm.h +++ b/src/avian/arm.h @@ -79,11 +79,25 @@ trap() #endif } +// todo: determine the minimal operation types and domains needed to +// implement the following barriers (see +// http://community.arm.com/groups/processors/blog/2011/10/19/memory-access-ordering-part-3--memory-access-ordering-in-the-arm-architecture). +// For now, we just use DMB SY as a conservative but not necessarily +// performant choice. + #ifndef _MSC_VER inline void memoryBarrier() { - asm("nop"); +#ifdef __APPLE__ + OSMemoryBarrier(); +#elif (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1) + return __sync_synchronize(); +#elif (! defined AVIAN_ASSUME_ARMV6) + __asm__ __volatile__ ("dmb" : : : "memory"); +#else + __asm__ __volatile__ ("" : : : "memory"); +#endif } #endif @@ -148,7 +162,7 @@ inline bool atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) { #ifdef __APPLE__ - return OSAtomicCompareAndSwap32(old, new_, reinterpret_cast(p)); + return OSAtomicCompareAndSwap32Barrier(old, new_, reinterpret_cast(p)); #elif (defined __QNX__) return old == _smp_cmpxchg(p, old, new_); #else diff --git a/src/codegen/target/arm/encode.h b/src/codegen/target/arm/encode.h index 2e45bccebb..857148b837 100644 --- a/src/codegen/target/arm/encode.h +++ b/src/codegen/target/arm/encode.h @@ -172,6 +172,8 @@ inline int blo(int offset) { return SETCOND(b(offset), CC); } inline int bhs(int offset) { return SETCOND(b(offset), CS); } inline int bpl(int offset) { return SETCOND(b(offset), PL); } inline int fmstat() { return fmrx(15, FPSCR); } +// todo: make this pretty: +inline int dmb() { return 0xf57ff05f; } } // namespace isa diff --git a/src/codegen/target/arm/multimethod.cpp b/src/codegen/target/arm/multimethod.cpp index 35b10aefd2..1c17574967 100644 --- a/src/codegen/target/arm/multimethod.cpp +++ b/src/codegen/target/arm/multimethod.cpp @@ -58,9 +58,9 @@ void populateTables(ArchitectureContext* con) { BranchOperationType* bro = con->branchOperations; zo[lir::Return] = return_; - zo[lir::LoadBarrier] = memoryBarrier; - zo[lir::StoreStoreBarrier] = memoryBarrier; - zo[lir::StoreLoadBarrier] = memoryBarrier; + zo[lir::LoadBarrier] = loadBarrier; + zo[lir::StoreStoreBarrier] = storeStoreBarrier; + zo[lir::StoreLoadBarrier] = storeLoadBarrier; zo[lir::Trap] = trap; uo[Multimethod::index(lir::LongCall, C)] = CAST1(longCallC); diff --git a/src/codegen/target/arm/operations.cpp b/src/codegen/target/arm/operations.cpp index 6d37231ca7..6a4ff5ccaa 100644 --- a/src/codegen/target/arm/operations.cpp +++ b/src/codegen/target/arm/operations.cpp @@ -1228,7 +1228,33 @@ void trap(Context* con) emit(con, bkpt(0)); } -void memoryBarrier(Context*) {} +// todo: determine the minimal operation types and domains needed to +// implement the following barriers (see +// http://community.arm.com/groups/processors/blog/2011/10/19/memory-access-ordering-part-3--memory-access-ordering-in-the-arm-architecture). +// For now, we just use DMB SY as a conservative but not necessarily +// performant choice. + +void memoryBarrier(Context* con UNUSED) +{ +#ifndef AVIAN_ASSUME_ARMV6 + emit(con, dmb()); +#endif +} + +void loadBarrier(Context* con) +{ + memoryBarrier(con); +} + +void storeStoreBarrier(Context* con) +{ + memoryBarrier(con); +} + +void storeLoadBarrier(Context* con) +{ + memoryBarrier(con); +} } // namespace arm } // namespace codegen diff --git a/src/codegen/target/arm/operations.h b/src/codegen/target/arm/operations.h index 7d97e5a75e..7c5d063f80 100644 --- a/src/codegen/target/arm/operations.h +++ b/src/codegen/target/arm/operations.h @@ -230,7 +230,11 @@ void return_(Context* con); void trap(Context* con); -void memoryBarrier(Context*); +void loadBarrier(Context*); + +void storeStoreBarrier(Context*); + +void storeLoadBarrier(Context*); } // namespace arm } // namespace codegen