diff --git a/repos/os/src/lib/blit/arm/blit_helper.h b/repos/os/src/lib/blit/arm/blit_helper.h index 33e86d510c..4e10e865c0 100644 --- a/repos/os/src/lib/blit/arm/blit_helper.h +++ b/repos/os/src/lib/blit/arm/blit_helper.h @@ -37,9 +37,9 @@ static inline void copy_16bit_column(char const *src, int src_w, * \param src_w width of source buffer in bytes * \param dst_w width of destination buffer in bytes */ -static void copy_block_32bit(char const *src, int src_w, - char *dst, int dst_w, - int w, int h) +static inline void copy_block_32bit(char const *src, int src_w, + char *dst, int dst_w, + int w, int h) { src_w -= w*4; dst_w -= w*4; @@ -60,17 +60,28 @@ static inline void copy_block_32byte(char const *src, int src_w, char *dst, int dst_w, int w, int h) { - if (((long)src & 3) || ((long)dst & 3)) - copy_block_32bit(src, src_w, dst, dst_w, w*8, h); - else { - src_w -= w*32; - dst_w -= w*32; - for (; h--; src += src_w, dst += dst_w) - for (int i = w; i--;) + for (; h > 0; h--) { + /* + * Depending on 'src_w' and 'dst_w', some lines may be properly aligned, + * while others may be not, so we need to check each line. + */ + if (((long)src & 3) || ((long)dst & 3)) { + copy_block_32bit(src, src_w, dst, dst_w, w*8, 1); + src += src_w; + dst += dst_w; + } else { + for (int i = w; i > 0; i--) asm volatile ("ldmia %0!, {r3 - r10} \n\t" "stmia %1!, {r3 - r10} \n\t" : "+r" (src), "+r" (dst) :: "r3","r4","r5","r6","r7","r8","r9","r10"); + /* + * 'src' and 'dst' got auto-incremented by the copy code, so only + * the remainder needs to get added + */ + src += (src_w - w*32); + dst += (dst_w - w*32); + } } }