diff --git a/repos/base/include/spec/arm/cpu/string.h b/repos/base/include/spec/arm/cpu/string.h index 2e65f39021..1cb8d3a15d 100644 --- a/repos/base/include/spec/arm/cpu/string.h +++ b/repos/base/include/spec/arm/cpu/string.h @@ -30,21 +30,25 @@ namespace Genode { { unsigned char *d = (unsigned char *)dst, *s = (unsigned char *)src; - /* check 4 byte; alignment */ - size_t d_align = (size_t)d & 0x3; - size_t s_align = (size_t)s & 0x3; + /* fetch the first cache line */ + asm volatile ("pld [%0, #0]\n\t" : "+r" (s)); - /* only same alignments work for the following LDM/STM loop */ - if (d_align != s_align) + /* check 32-byte (cache line) alignment */ + size_t d_align = (size_t)d & 0x1f; + size_t s_align = (size_t)s & 0x1f; + + /* only same word-alignments work for the following LDM/STM loop */ + if ((d_align & 0x3) != (s_align & 0x3)) return size; - /* copy to 4 byte alignment */ - for (; (size > 0) && (s_align > 0) && (s_align < 4); + /* copy to 32-byte alignment */ + for (; (size > 0) && (s_align > 0) && (s_align < 32); s_align++, *d++ = *s++, size--); /* copy 32 byte chunks */ for (; size >= 32; size -= 32) { asm volatile ("ldmia %0!, {r3 - r10} \n\t" + "pld [%0, #160]\n\t" "stmia %1!, {r3 - r10} \n\t" : "+r" (s), "+r" (d) :: "r3","r4","r5","r6","r7","r8","r9","r10");