memcpy (x86): implement memcpy_cpu

By changing the bytewise copy into a wordwise copy, we get a speedup of
~3 (on base-linux x86_64).

genodelabs/genode#4456
This commit is contained in:
Johannes Schlatow 2022-03-25 13:30:53 +01:00 committed by Christian Helmuth
parent 0104a74028
commit 9409f814a4

View File

@ -25,7 +25,37 @@ namespace Genode {
*
* \return number of bytes not copied
*/
inline size_t memcpy_cpu(void *, const void *, size_t size) { return size; }
inline size_t memcpy_cpu(void * dst, const void * src, size_t size)
{
typedef unsigned long word_t;
enum {
LEN = sizeof(word_t),
MASK = LEN-1
};
unsigned char *d = (unsigned char *)dst, *s = (unsigned char *)src;
/* check byte alignment */
size_t d_align = (size_t)d & MASK;
size_t s_align = (size_t)s & MASK;
/* only same alignments work */
if (d_align != s_align)
return size;
/* copy to word alignment */
for (; (size > 0) && (s_align > 0) && (s_align < LEN);
s_align++, *d++ = *s++, size--);
/* copy words */
for (; size >= LEN; size -= LEN,
d += LEN,
s += LEN)
*(word_t*)d = *(word_t*)s;
return size;
}
}
#endif /* _INCLUDE__SPEC__X86__CPU__STRING_H_ */