diff --git a/repos/os/include/blit/internal/neon.h b/repos/os/include/blit/internal/neon.h index 7dfaf18a1a..f599832ed1 100644 --- a/repos/os/include/blit/internal/neon.h +++ b/repos/os/include/blit/internal/neon.h @@ -186,9 +186,11 @@ struct Blit::Neon::B2f }; -void Blit::Neon::B2f::r0(uint32_t *dst, unsigned const line_w, - uint32_t const *src, unsigned const w, unsigned const h) +void Blit::Neon::B2f::r0(uint32_t *dst, unsigned line_w, + uint32_t const *src, unsigned w, unsigned h) { + line_w >>= 3, w >>= 3, h >>= 3; + uint32x4_t const *s = (uint32x4_t const *)src; uint32x4_t *d = (uint32x4_t *)dst; @@ -200,10 +202,12 @@ void Blit::Neon::B2f::r0(uint32_t *dst, unsigned const line_w, } -void Blit::Neon::B2f::r90(uint32_t *dst, unsigned const dst_w, - uint32_t const *src, unsigned const src_w, - unsigned const w, unsigned const h) +void Blit::Neon::B2f::r90(uint32_t *dst, unsigned dst_w, + uint32_t const *src, unsigned src_w, + unsigned w, unsigned h) { + dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3; + Steps const steps { -2*int(src_w), 8*int(dst_w) }; Src_ptr4 src_ptr4 ((uint32x4_t *)src + 2*src_w*(8*h - 1), steps.src_y); @@ -213,9 +217,11 @@ void Blit::Neon::B2f::r90(uint32_t *dst, unsigned const dst_w, } -void Blit::Neon::B2f::r180(uint32_t *dst, unsigned const line_w, - uint32_t const *src, unsigned const w, unsigned const h) +void Blit::Neon::B2f::r180(uint32_t *dst, unsigned line_w, + uint32_t const *src, unsigned w, unsigned h) { + line_w >>= 3, w >>= 3, h >>= 3; + uint32x4_t *d = (uint32x4_t *)dst; uint32x4_t const *s = (uint32x4_t const *)src + 2*line_w*8*h; @@ -227,10 +233,12 @@ void Blit::Neon::B2f::r180(uint32_t *dst, unsigned const line_w, } -void Blit::Neon::B2f::r270(uint32_t *dst, unsigned const dst_w, - uint32_t const *src, unsigned const src_w, - unsigned const w, const unsigned h) +void Blit::Neon::B2f::r270(uint32_t *dst, unsigned dst_w, + uint32_t const *src, unsigned src_w, + unsigned w, unsigned h) { + dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3; + Steps const steps { 2*int(src_w), -8*int(dst_w) }; Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y); @@ -249,9 +257,11 @@ struct Blit::Neon::B2f_flip }; -void Blit::Neon::B2f_flip::r0(uint32_t *dst, unsigned const line_w, - uint32_t const *src, unsigned const w, unsigned const h) +void Blit::Neon::B2f_flip::r0(uint32_t *dst, unsigned line_w, + uint32_t const *src, unsigned w, unsigned h) { + line_w >>= 3, w >>= 3, h >>= 3; + uint32x4_t const *s = (uint32x4_t const *)src; uint32x4_t *d = (uint32x4_t *)dst; @@ -263,10 +273,12 @@ void Blit::Neon::B2f_flip::r0(uint32_t *dst, unsigned const line_w, } -void Blit::Neon::B2f_flip::r90(uint32_t *dst, unsigned const dst_w, - uint32_t const *src, unsigned const src_w, - unsigned const w, unsigned const h) +void Blit::Neon::B2f_flip::r90(uint32_t *dst, unsigned dst_w, + uint32_t const *src, unsigned src_w, + unsigned w, unsigned h) { + dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3; + Steps const steps { 2*int(src_w), 8*int(dst_w) }; Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y); @@ -276,9 +288,11 @@ void Blit::Neon::B2f_flip::r90(uint32_t *dst, unsigned const dst_w, } -void Blit::Neon::B2f_flip::r180(uint32_t *dst, unsigned const line_w, - uint32_t const *src, unsigned const w, unsigned const h) +void Blit::Neon::B2f_flip::r180(uint32_t *dst, unsigned line_w, + uint32_t const *src, unsigned w, unsigned h) { + line_w >>= 3, w >>= 3, h >>= 3; + uint32x4_t const *s = (uint32x4_t const *)src + 2*line_w*8*h; uint32x4_t *d = (uint32x4_t *)dst; @@ -290,10 +304,12 @@ void Blit::Neon::B2f_flip::r180(uint32_t *dst, unsigned const line_w, } -void Blit::Neon::B2f_flip::r270(uint32_t *dst, unsigned const dst_w, - uint32_t const *src, unsigned const src_w, - unsigned const w, const unsigned h) +void Blit::Neon::B2f_flip::r270(uint32_t *dst, unsigned dst_w, + uint32_t const *src, unsigned src_w, + unsigned w, unsigned h) { + dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3; + Steps const steps { -2*int(src_w), -8*int(dst_w) }; Src_ptr4 src_ptr4 ((uint32x4_t *)src + 2*src_w*(8*h - 1), steps.src_y); diff --git a/repos/os/include/blit/internal/slow.h b/repos/os/include/blit/internal/slow.h index 39a1e6a038..e003546fec 100644 --- a/repos/os/include/blit/internal/slow.h +++ b/repos/os/include/blit/internal/slow.h @@ -31,9 +31,9 @@ namespace Blit { uint32_t *dst, unsigned w, unsigned h, int dx, int dy) { - for (unsigned lines = h*8; lines; lines--) { - _write_line(src, dst, 8*w, dx); - src += 8*src_w; + for (unsigned lines = h; lines; lines--) { + _write_line(src, dst, w, dx); + src += src_w; dst += dy; } }; @@ -60,7 +60,7 @@ struct Blit::Slow::B2f void Blit::Slow::B2f::r0(uint32_t *dst, unsigned line_w, uint32_t const *src, unsigned w, unsigned h) { - _write_lines(src, line_w, dst, w, h, 1, 8*line_w); + _write_lines(src, line_w, dst, w, h, 1, line_w); } @@ -68,15 +68,15 @@ void Blit::Slow::B2f::r90(uint32_t *dst, unsigned dst_w, uint32_t const *src, unsigned src_w, unsigned w, unsigned h) { - _write_lines(src, src_w, dst + 8*h - 1, w, h, 8*dst_w, -1); + _write_lines(src, src_w, dst + h - 1, w, h, dst_w, -1); } void Blit::Slow::B2f::r180(uint32_t *dst, unsigned line_w, uint32_t const *src, unsigned w, unsigned h) { - dst += 8*w - 1 + (8*h - 1)*8*line_w; - _write_lines(src, line_w, dst, w, h, -1, -8*line_w); + dst += w - 1 + (h - 1)*line_w; + _write_lines(src, line_w, dst, w, h, -1, -line_w); } @@ -84,8 +84,8 @@ void Blit::Slow::B2f::r270(uint32_t *dst, unsigned dst_w, uint32_t const *src, unsigned src_w, unsigned w, unsigned h) { - dst += 8*dst_w*(8*w - 1); - _write_lines(src, src_w, dst, w, h, -8*dst_w, 1); + dst += dst_w*(w - 1); + _write_lines(src, src_w, dst, w, h, -dst_w, 1); } @@ -101,7 +101,7 @@ struct Blit::Slow::B2f_flip void Blit::Slow::B2f_flip::r0(uint32_t *dst, unsigned line_w, uint32_t const *src, unsigned w, unsigned h) { - _write_lines(src, line_w, dst + 8*w - 1, w, h, -1, 8*line_w); + _write_lines(src, line_w, dst + w - 1, w, h, -1, line_w); } @@ -109,15 +109,15 @@ void Blit::Slow::B2f_flip::r90(uint32_t *dst, unsigned dst_w, uint32_t const *src, unsigned src_w, unsigned w, unsigned h) { - _write_lines(src, src_w, dst, w, h, 8*dst_w, 1); + _write_lines(src, src_w, dst, w, h, dst_w, 1); } void Blit::Slow::B2f_flip::r180(uint32_t *dst, unsigned line_w, uint32_t const *src, unsigned w, unsigned h) { - dst += (8*h - 1)*8*line_w; - _write_lines(src, line_w, dst, w, h, 1, -8*line_w); + dst += (h - 1)*line_w; + _write_lines(src, line_w, dst, w, h, 1, -line_w); } @@ -125,8 +125,8 @@ void Blit::Slow::B2f_flip::r270(uint32_t *dst, unsigned dst_w, uint32_t const *src, unsigned src_w, unsigned w, unsigned h) { - dst += 8*h - 1 + 8*dst_w*(8*w - 1); - _write_lines(src, src_w, dst, w, h, -8*dst_w, -1); + dst += h - 1 + dst_w*(w - 1); + _write_lines(src, src_w, dst, w, h, -dst_w, -1); } diff --git a/repos/os/include/blit/internal/sse4.h b/repos/os/include/blit/internal/sse4.h index 6fc7af7399..501481316b 100644 --- a/repos/os/include/blit/internal/sse4.h +++ b/repos/os/include/blit/internal/sse4.h @@ -173,6 +173,8 @@ struct Blit::Sse4::B2f void Blit::Sse4::B2f::r0(uint32_t *dst, unsigned line_w, uint32_t const *src, unsigned w, unsigned h) { + line_w >>= 3, w >>= 3, h >>= 3; + __m128i const *s = (__m128i const *)src; __m128i *d = (__m128i *)dst; @@ -188,6 +190,8 @@ void Blit::Sse4::B2f::r90(uint32_t *dst, unsigned dst_w, uint32_t const *src, unsigned src_w, unsigned w, unsigned h) { + dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3; + Steps const steps { -2*int(src_w), 2*int(dst_w) }; Src_ptr4 src_ptr4 ((__m128i *)src + 2*src_w*(8*h - 1), steps.src_y_4); @@ -200,6 +204,8 @@ void Blit::Sse4::B2f::r90(uint32_t *dst, unsigned dst_w, void Blit::Sse4::B2f::r180(uint32_t *dst, unsigned line_w, uint32_t const *src, unsigned w, unsigned h) { + line_w >>= 3, w >>= 3, h >>= 3; + __m128i *d = (__m128i *)dst; __m128i const *s = (__m128i const *)src + 2*line_w*8*h; @@ -215,6 +221,8 @@ void Blit::Sse4::B2f::r270(uint32_t *dst, unsigned dst_w, uint32_t const *src, unsigned src_w, unsigned w, unsigned h) { + dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3; + Steps const steps { 2*int(src_w), -2*int(dst_w) }; Src_ptr4 src_ptr4 ((__m128i *)src, steps.src_y_4); @@ -236,6 +244,8 @@ struct Blit::Sse4::B2f_flip void Blit::Sse4::B2f_flip::r0(uint32_t *dst, unsigned line_w, uint32_t const *src, unsigned w, unsigned h) { + line_w >>= 3, w >>= 3, h >>= 3; + __m128i const *s = (__m128i const *)src; __m128i *d = (__m128i *)dst; @@ -251,6 +261,8 @@ void Blit::Sse4::B2f_flip::r90(uint32_t *dst, unsigned dst_w, uint32_t const *src, unsigned src_w, unsigned w, unsigned h) { + dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3; + Steps const steps { 2*int(src_w), 2*int(dst_w) }; Src_ptr4 src_ptr4 ((__m128i *)src, steps.src_y_4); @@ -263,6 +275,8 @@ void Blit::Sse4::B2f_flip::r90(uint32_t *dst, unsigned dst_w, void Blit::Sse4::B2f_flip::r180(uint32_t *dst, unsigned line_w, uint32_t const *src, unsigned w, unsigned h) { + line_w >>= 3, w >>= 3, h >>= 3; + __m128i const *s = (__m128i const *)src + 2*line_w*8*h; __m128i *d = (__m128i *)dst; @@ -278,6 +292,8 @@ void Blit::Sse4::B2f_flip::r270(uint32_t *dst, unsigned dst_w, uint32_t const *src, unsigned src_w, unsigned w, unsigned h) { + dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3; + Steps const steps { -2*int(src_w), -2*int(dst_w) }; Src_ptr4 src_ptr4 ((__m128i *)src + 2*int(src_w)*(h*8 - 1), steps.src_y_4); diff --git a/repos/os/include/blit/types.h b/repos/os/include/blit/types.h index ea3afe48b4..b4592875d9 100644 --- a/repos/os/include/blit/types.h +++ b/repos/os/include/blit/types.h @@ -74,6 +74,8 @@ namespace Blit { .y = ((r.y2() + 8) & ~0x7) - 1 }); } + static inline bool divisable_by_8x8(Area a) { return ((a.w | a.h) & 0x7) == 0; } + template static inline void _b2f(uint32_t *dst, unsigned dst_w, uint32_t const *src, unsigned src_w, @@ -92,23 +94,17 @@ namespace Blit { Texture const &texture, Rect rect, Rotate rotate, Flip flip) { - /* surface size must be divisible by 8 */ - if (!aligned(surface.size().w, 2) || !aligned(surface.size().h, 2)) { - warning("surface size ", surface.size(), " not divisible by 8"); - return; - } - /* check compatibility of surface with texture */ if (transformed(surface.size(), rotate) != texture.size()) { warning("surface ", surface.size(), " mismatches texture ", texture.size()); return; } - /* restrict rect to texture size */ - rect = Rect::intersect(rect, Rect { { }, texture.size() }); + /* snap src coordinates to multiple of px, restrict to texture size */ + Rect const src_rect = Rect::intersect(snapped_to_8x8_grid(rect), + Rect { { }, texture.size() }); /* compute base addresses of affected pixel window */ - Rect const src_rect = snapped_to_8x8_grid(rect); Rect const dst_rect = transformed(src_rect, texture.size(), rotate, flip); uint32_t const * const src = (uint32_t const *)texture.pixel() @@ -119,11 +115,10 @@ namespace Blit { + dst_rect.y1()*surface.size().w + dst_rect.x1(); - /* coordinates converted to 8x8 units */ - unsigned const src_w = texture.size().w >> 3, - dst_w = surface.size().w >> 3, - w = src_rect.area.w >> 3, - h = src_rect.area.h >> 3; + unsigned const src_w = texture.size().w, + dst_w = surface.size().w, + w = src_rect.area.w, + h = src_rect.area.h; if (w && h) { if (flip.enabled) diff --git a/repos/os/include/spec/arm_64/blit/blit.h b/repos/os/include/spec/arm_64/blit/blit.h index d6023995cd..2480c9520f 100644 --- a/repos/os/include/spec/arm_64/blit/blit.h +++ b/repos/os/include/spec/arm_64/blit/blit.h @@ -20,7 +20,16 @@ namespace Blit { - static inline void back2front (auto &&... args) { _b2f(args...); } + static inline void back2front(Surface &surface, + Texture const &texture, + Rect rect, Rotate rotate, Flip flip) + { + if (divisable_by_8x8(texture.size())) + _b2f(surface, texture, rect, rotate, flip); + else + _b2f(surface, texture, rect, rotate, flip); + } + static inline void blend_xrgb_a(auto &&... args) { Neon::Blend::xrgb_a(args...); } } diff --git a/repos/os/include/spec/x86_64/blit/blit.h b/repos/os/include/spec/x86_64/blit/blit.h index 67fbb60fc9..2e2a17c43f 100644 --- a/repos/os/include/spec/x86_64/blit/blit.h +++ b/repos/os/include/spec/x86_64/blit/blit.h @@ -20,7 +20,16 @@ namespace Blit { - static inline void back2front (auto &&... args) { _b2f(args...); } + static inline void back2front(Surface &surface, + Texture const &texture, + Rect rect, Rotate rotate, Flip flip) + { + if (divisable_by_8x8(texture.size())) + _b2f(surface, texture, rect, rotate, flip); + else + _b2f(surface, texture, rect, rotate, flip); + } + static inline void blend_xrgb_a(auto &&... args) { Sse4::Blend::xrgb_a(args...); } }