use armv7 memory barriers by default

armv7 and later provide weaker cache coherency models than armv6 and
earlier, so we cannot just implement memory barriers as no-ops.  This
patch uses the DMB instruction (or the equivalent OS-provided barrier
function) to implement barriers.  This should fix concurrency issues
on newer chips such as the Apple A6 and A7.

If you still need to support ARMv6 devices, you should pass
"armv6=true" to make when building Avian.  Ideally, the VM would
detect what kind of CPU it was executing on at runtime and direct the
JIT compiler accordingly, but I don't know how to do that on ARM.
Patches are welcome, though!
This commit is contained in:
Joel Dice 2014-01-10 14:38:57 -07:00
parent 43af2bf260
commit 2b1177039e
7 changed files with 62 additions and 7 deletions

View File

@ -127,6 +127,10 @@ devices. See [here](https://github.com/ReadyTalk/hello-ios) for an
example of an Xcode project for iOS which uses Avian. example of an Xcode project for iOS which uses Avian.
* _default:_ false * _default:_ false
* `armv6` - if true, don't use any instructions newer than armv6. By
default, we assume the target is armv7 or later, and thus requires explicit
memory barrier instructions to ensure cache coherency
* `bootimage` - if true, create a boot image containing the pre-parsed * `bootimage` - if true, create a boot image containing the pre-parsed
class library and ahead-of-time compiled methods. This option is class library and ahead-of-time compiled methods. This option is
only valid for process=compile builds. Note that you may need to only valid for process=compile builds. Note that you may need to

View File

@ -470,6 +470,10 @@ ifeq ($(arch),arm)
endif endif
endif endif
ifeq ($(armv6),true)
cflags += -DAVIAN_ASSUME_ARMV6
endif
ifeq ($(ios),true) ifeq ($(ios),true)
cflags += -DAVIAN_IOS cflags += -DAVIAN_IOS
use-lto = false use-lto = false
@ -1895,6 +1899,7 @@ $(bootimage-generator): $(bootimage-generator-objects) $(vm-objects)
arch=$(build-arch) \ arch=$(build-arch) \
aot-only=false \ aot-only=false \
target-arch=$(arch) \ target-arch=$(arch) \
armv6=$(armv6) \
platform=$(bootimage-platform) \ platform=$(bootimage-platform) \
target-format=$(target-format) \ target-format=$(target-format) \
openjdk=$(openjdk) \ openjdk=$(openjdk) \

View File

@ -79,11 +79,25 @@ trap()
#endif #endif
} }
// todo: determine the minimal operation types and domains needed to
// implement the following barriers (see
// http://community.arm.com/groups/processors/blog/2011/10/19/memory-access-ordering-part-3--memory-access-ordering-in-the-arm-architecture).
// For now, we just use DMB SY as a conservative but not necessarily
// performant choice.
#ifndef _MSC_VER #ifndef _MSC_VER
inline void inline void
memoryBarrier() memoryBarrier()
{ {
asm("nop"); #ifdef __APPLE__
OSMemoryBarrier();
#elif (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 1)
return __sync_synchronize();
#elif (! defined AVIAN_ASSUME_ARMV6)
__asm__ __volatile__ ("dmb" : : : "memory");
#else
__asm__ __volatile__ ("" : : : "memory");
#endif
} }
#endif #endif
@ -148,7 +162,7 @@ inline bool
atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_) atomicCompareAndSwap32(uint32_t* p, uint32_t old, uint32_t new_)
{ {
#ifdef __APPLE__ #ifdef __APPLE__
return OSAtomicCompareAndSwap32(old, new_, reinterpret_cast<int32_t*>(p)); return OSAtomicCompareAndSwap32Barrier(old, new_, reinterpret_cast<int32_t*>(p));
#elif (defined __QNX__) #elif (defined __QNX__)
return old == _smp_cmpxchg(p, old, new_); return old == _smp_cmpxchg(p, old, new_);
#else #else

View File

@ -172,6 +172,8 @@ inline int blo(int offset) { return SETCOND(b(offset), CC); }
inline int bhs(int offset) { return SETCOND(b(offset), CS); } inline int bhs(int offset) { return SETCOND(b(offset), CS); }
inline int bpl(int offset) { return SETCOND(b(offset), PL); } inline int bpl(int offset) { return SETCOND(b(offset), PL); }
inline int fmstat() { return fmrx(15, FPSCR); } inline int fmstat() { return fmrx(15, FPSCR); }
// todo: make this pretty:
inline int dmb() { return 0xf57ff05f; }
} // namespace isa } // namespace isa

View File

@ -58,9 +58,9 @@ void populateTables(ArchitectureContext* con) {
BranchOperationType* bro = con->branchOperations; BranchOperationType* bro = con->branchOperations;
zo[lir::Return] = return_; zo[lir::Return] = return_;
zo[lir::LoadBarrier] = memoryBarrier; zo[lir::LoadBarrier] = loadBarrier;
zo[lir::StoreStoreBarrier] = memoryBarrier; zo[lir::StoreStoreBarrier] = storeStoreBarrier;
zo[lir::StoreLoadBarrier] = memoryBarrier; zo[lir::StoreLoadBarrier] = storeLoadBarrier;
zo[lir::Trap] = trap; zo[lir::Trap] = trap;
uo[Multimethod::index(lir::LongCall, C)] = CAST1(longCallC); uo[Multimethod::index(lir::LongCall, C)] = CAST1(longCallC);

View File

@ -1228,7 +1228,33 @@ void trap(Context* con)
emit(con, bkpt(0)); emit(con, bkpt(0));
} }
void memoryBarrier(Context*) {} // todo: determine the minimal operation types and domains needed to
// implement the following barriers (see
// http://community.arm.com/groups/processors/blog/2011/10/19/memory-access-ordering-part-3--memory-access-ordering-in-the-arm-architecture).
// For now, we just use DMB SY as a conservative but not necessarily
// performant choice.
void memoryBarrier(Context* con UNUSED)
{
#ifndef AVIAN_ASSUME_ARMV6
emit(con, dmb());
#endif
}
void loadBarrier(Context* con)
{
memoryBarrier(con);
}
void storeStoreBarrier(Context* con)
{
memoryBarrier(con);
}
void storeLoadBarrier(Context* con)
{
memoryBarrier(con);
}
} // namespace arm } // namespace arm
} // namespace codegen } // namespace codegen

View File

@ -230,7 +230,11 @@ void return_(Context* con);
void trap(Context* con); void trap(Context* con);
void memoryBarrier(Context*); void loadBarrier(Context*);
void storeStoreBarrier(Context*);
void storeLoadBarrier(Context*);
} // namespace arm } // namespace arm
} // namespace codegen } // namespace codegen