memcpy (x86): implement memcpy_cpu

By changing the bytewise copy into a wordwise copy, we get a speedup of ~3 (on base-linux x86_64). genodelabs/genode#4456
2024-12-19 05:37:54 +00:00 · 2022-03-25 13:30:53 +01:00 · 2022-03-25 13:30:53 +01:00 · 9409f814a4
commit 9409f814a4
parent 0104a74028
1 changed files with 31 additions and 1 deletions
--- a/repos/base/include/spec/x86/cpu/string.h
+++ b/repos/base/include/spec/x86/cpu/string.h
@ -25,7 +25,37 @@ namespace Genode {
 	 *
 	 * \return      number of bytes not copied
 	 */
-	inline size_t memcpy_cpu(void *, const void *, size_t size) { return size; }
+	inline size_t memcpy_cpu(void * dst, const void * src, size_t size)
+	{
+		typedef unsigned long word_t;
+
+		enum {
+			LEN  = sizeof(word_t),
+			MASK = LEN-1
+		};
+
+		unsigned char *d = (unsigned char *)dst, *s = (unsigned char *)src;
+
+		/* check byte alignment */
+		size_t d_align = (size_t)d & MASK;
+		size_t s_align = (size_t)s & MASK;
+
+		/* only same alignments work */
+		if (d_align != s_align)
+			return size;
+
+		/* copy to word alignment */
+		for (; (size > 0) && (s_align > 0) && (s_align < LEN);
+		     s_align++, *d++ = *s++, size--);
+
+		/* copy words */
+		for (; size >= LEN; size -= LEN,
+		                       d += LEN,
+		                       s += LEN)
+			*(word_t*)d = *(word_t*)s;
+
+		return size;
+	}
 }

 #endif /* _INCLUDE__SPEC__X86__CPU__STRING_H_ */