From 9409f814a41fef978c73733c7ae988571656b624 Mon Sep 17 00:00:00 2001 From: Johannes Schlatow Date: Fri, 25 Mar 2022 13:30:53 +0100 Subject: [PATCH] memcpy (x86): implement memcpy_cpu By changing the bytewise copy into a wordwise copy, we get a speedup of ~3 (on base-linux x86_64). genodelabs/genode#4456 --- repos/base/include/spec/x86/cpu/string.h | 32 +++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/repos/base/include/spec/x86/cpu/string.h b/repos/base/include/spec/x86/cpu/string.h index cd45a6d4fd..fdb3e50dec 100644 --- a/repos/base/include/spec/x86/cpu/string.h +++ b/repos/base/include/spec/x86/cpu/string.h @@ -25,7 +25,37 @@ namespace Genode { * * \return number of bytes not copied */ - inline size_t memcpy_cpu(void *, const void *, size_t size) { return size; } + inline size_t memcpy_cpu(void * dst, const void * src, size_t size) + { + typedef unsigned long word_t; + + enum { + LEN = sizeof(word_t), + MASK = LEN-1 + }; + + unsigned char *d = (unsigned char *)dst, *s = (unsigned char *)src; + + /* check byte alignment */ + size_t d_align = (size_t)d & MASK; + size_t s_align = (size_t)s & MASK; + + /* only same alignments work */ + if (d_align != s_align) + return size; + + /* copy to word alignment */ + for (; (size > 0) && (s_align > 0) && (s_align < LEN); + s_align++, *d++ = *s++, size--); + + /* copy words */ + for (; size >= LEN; size -= LEN, + d += LEN, + s += LEN) + *(word_t*)d = *(word_t*)s; + + return size; + } } #endif /* _INCLUDE__SPEC__X86__CPU__STRING_H_ */