fix aliasing bug in util.cpp

We use a template function called "cast" to get raw access to fields
in in the VM.  In particular, we use this function in util.cpp to
treat reference fields as intptr_t fields so we can use the least
significant bit as the red/black flag in red/black tree nodes.
Unfortunately, this runs afoul of the type aliasing rules in C/C++,
and the compiler is permitted to optimize in a way that assumes such
aliasing cannot occur.  Such optimization caused all the nodes in the
tree to be black, leading to extremely unbalanced trees and thus slow
performance.

The fix in this case is to use the __may_alias__ attribute to tell the
compiler we're doing something devious.  I've also used this technique
to avoid other potential aliasing problems.  There may be others
lurking, so a complete audit of the VM might be a good idea.
This commit is contained in:
Joel Dice 2011-02-11 21:57:27 -07:00
parent cb7dc1aeef
commit 8d50d0fd76
4 changed files with 29 additions and 17 deletions

View File

@ -59,6 +59,8 @@ typedef uint64_t uintptr_t;
# error "unsupported architecture"
# endif
typedef intptr_t alias_t;
#else // not _MSC_VER
# include "stdint.h"
@ -86,6 +88,8 @@ typedef uint64_t uintptr_t;
# error "unsupported architecture"
# endif
typedef intptr_t __attribute__((__may_alias__)) alias_t;
#endif // not _MSC_VER
#undef JNIEXPORT

View File

@ -2145,8 +2145,8 @@ class HeapClient: public Heap::Client {
memcpy(dst, src, n * BytesPerWord);
if (hashTaken(t, src)) {
cast<uintptr_t>(dst, 0) &= PointerMask;
cast<uintptr_t>(dst, 0) |= ExtendedMark;
cast<alias_t>(dst, 0) &= PointerMask;
cast<alias_t>(dst, 0) |= ExtendedMark;
extendedWord(t, dst, base) = takeHash(t, src);
}
}
@ -2781,7 +2781,7 @@ allocate3(Thread* t, Allocator* allocator, Machine::AllocationType type,
memset(o, 0, sizeInBytes);
cast<uintptr_t>(o, 0) = FixedMark;
cast<alias_t>(o, 0) = FixedMark;
t->m->fixedFootprint += total;
@ -2796,7 +2796,7 @@ allocate3(Thread* t, Allocator* allocator, Machine::AllocationType type,
memset(o, 0, sizeInBytes);
cast<uintptr_t>(o, 0) = FixedMark;
cast<alias_t>(o, 0) = FixedMark;
return o;
}

View File

@ -1878,8 +1878,8 @@ setObjectClass(Thread*, object o, object value)
{
cast<object>(o, 0)
= reinterpret_cast<object>
(reinterpret_cast<uintptr_t>(value)
| (reinterpret_cast<uintptr_t>(cast<object>(o, 0)) & (~PointerMask)));
(reinterpret_cast<alias_t>(value)
| (reinterpret_cast<alias_t>(cast<object>(o, 0)) & (~PointerMask)));
}
inline const char*
@ -2094,19 +2094,19 @@ setType(Thread* t, Machine::Type type, object value)
inline bool
objectFixed(Thread*, object o)
{
return (cast<uintptr_t>(o, 0) & (~PointerMask)) == FixedMark;
return (cast<alias_t>(o, 0) & (~PointerMask)) == FixedMark;
}
inline bool
objectExtended(Thread*, object o)
{
return (cast<uintptr_t>(o, 0) & (~PointerMask)) == ExtendedMark;
return (cast<alias_t>(o, 0) & (~PointerMask)) == ExtendedMark;
}
inline bool
hashTaken(Thread*, object o)
{
return (cast<uintptr_t>(o, 0) & (~PointerMask)) == HashTakenMark;
return (cast<alias_t>(o, 0) & (~PointerMask)) == HashTakenMark;
}
inline unsigned
@ -2237,7 +2237,7 @@ markHashTaken(Thread* t, object o)
ACQUIRE_RAW(t, t->m->heapLock);
cast<uintptr_t>(o, 0) |= HashTakenMark;
cast<alias_t>(o, 0) |= HashTakenMark;
t->m->heap->pad(o);
}

View File

@ -66,32 +66,32 @@ inline object
getTreeNodeValue(Thread*, object n)
{
return reinterpret_cast<object>
(cast<intptr_t>(n, TreeNodeValue) & PointerMask);
(cast<alias_t>(n, TreeNodeValue) & PointerMask);
}
inline void
setTreeNodeValue(Thread* t, object n, object value)
{
intptr_t red = cast<intptr_t>(n, TreeNodeValue) & (~PointerMask);
alias_t red = cast<alias_t>(n, TreeNodeValue) & (~PointerMask);
set(t, n, TreeNodeValue, value);
cast<intptr_t>(n, TreeNodeValue) |= red;
cast<alias_t>(n, TreeNodeValue) |= red;
}
inline bool
treeNodeRed(Thread*, object n)
{
return (cast<intptr_t>(n, TreeNodeValue) & (~PointerMask)) == 1;
return (cast<alias_t>(n, TreeNodeValue) & (~PointerMask)) == 1;
}
inline void
setTreeNodeRed(Thread*, object n, bool red)
{
if (red) {
cast<intptr_t>(n, TreeNodeValue) |= 1;
cast<alias_t>(n, TreeNodeValue) |= 1;
} else {
cast<intptr_t>(n, TreeNodeValue) &= PointerMask;
cast<alias_t>(n, TreeNodeValue) &= PointerMask;
}
}
@ -108,7 +108,7 @@ cloneTreeNode(Thread* t, object n)
object
treeFind(Thread* t, object tree, intptr_t key, object sentinal,
intptr_t (*compare)(Thread* t, intptr_t key, object b))
intptr_t (*compare)(Thread* t, intptr_t key, object b))
{
object node = tree;
while (node != sentinal) {
@ -140,6 +140,7 @@ treeFind(Thread* t, TreeContext* c, object old, intptr_t key, object node,
object new_ = newRoot;
PROTECT(t, new_);
int count = 0;
while (old != sentinal) {
c->ancestors = path(c, new_, c->ancestors);
@ -162,6 +163,13 @@ treeFind(Thread* t, TreeContext* c, object old, intptr_t key, object node,
c->ancestors = c->ancestors->next;
return;
}
if (++ count > 100) {
// if we've gone this deep, we probably have an unbalanced tree,
// which should only happen if there's a serious bug somewhere
// in our insertion process
abort(t);
}
}
setTreeNodeValue(t, new_, getTreeNodeValue(t, node));