mirror of
https://github.com/genodelabs/genode.git
synced 2025-04-16 07:27:35 +00:00
blit: lift 8x8 restriction from back2front
This patch allows for the use of the back2front operation with textures that are not a multiple of 8x8 pixels, which makes the utility compatible with screen resolutions like 1366x768. In such cases, the implementation falls back to the non-SIMD variant. Issue #5428 Issue #5501
This commit is contained in:
parent
3ba0e6fda3
commit
3909f9b6e4
@ -186,9 +186,11 @@ struct Blit::Neon::B2f
|
||||
};
|
||||
|
||||
|
||||
void Blit::Neon::B2f::r0(uint32_t *dst, unsigned const line_w,
|
||||
uint32_t const *src, unsigned const w, unsigned const h)
|
||||
void Blit::Neon::B2f::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
line_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
uint32x4_t const *s = (uint32x4_t const *)src;
|
||||
uint32x4_t *d = (uint32x4_t *)dst;
|
||||
|
||||
@ -200,10 +202,12 @@ void Blit::Neon::B2f::r0(uint32_t *dst, unsigned const line_w,
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f::r90(uint32_t *dst, unsigned const dst_w,
|
||||
uint32_t const *src, unsigned const src_w,
|
||||
unsigned const w, unsigned const h)
|
||||
void Blit::Neon::B2f::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
Steps const steps { -2*int(src_w), 8*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((uint32x4_t *)src + 2*src_w*(8*h - 1), steps.src_y);
|
||||
@ -213,9 +217,11 @@ void Blit::Neon::B2f::r90(uint32_t *dst, unsigned const dst_w,
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f::r180(uint32_t *dst, unsigned const line_w,
|
||||
uint32_t const *src, unsigned const w, unsigned const h)
|
||||
void Blit::Neon::B2f::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
line_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
uint32x4_t *d = (uint32x4_t *)dst;
|
||||
uint32x4_t const *s = (uint32x4_t const *)src + 2*line_w*8*h;
|
||||
|
||||
@ -227,10 +233,12 @@ void Blit::Neon::B2f::r180(uint32_t *dst, unsigned const line_w,
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f::r270(uint32_t *dst, unsigned const dst_w,
|
||||
uint32_t const *src, unsigned const src_w,
|
||||
unsigned const w, const unsigned h)
|
||||
void Blit::Neon::B2f::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
Steps const steps { 2*int(src_w), -8*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y);
|
||||
@ -249,9 +257,11 @@ struct Blit::Neon::B2f_flip
|
||||
};
|
||||
|
||||
|
||||
void Blit::Neon::B2f_flip::r0(uint32_t *dst, unsigned const line_w,
|
||||
uint32_t const *src, unsigned const w, unsigned const h)
|
||||
void Blit::Neon::B2f_flip::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
line_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
uint32x4_t const *s = (uint32x4_t const *)src;
|
||||
uint32x4_t *d = (uint32x4_t *)dst;
|
||||
|
||||
@ -263,10 +273,12 @@ void Blit::Neon::B2f_flip::r0(uint32_t *dst, unsigned const line_w,
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f_flip::r90(uint32_t *dst, unsigned const dst_w,
|
||||
uint32_t const *src, unsigned const src_w,
|
||||
unsigned const w, unsigned const h)
|
||||
void Blit::Neon::B2f_flip::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
Steps const steps { 2*int(src_w), 8*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y);
|
||||
@ -276,9 +288,11 @@ void Blit::Neon::B2f_flip::r90(uint32_t *dst, unsigned const dst_w,
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f_flip::r180(uint32_t *dst, unsigned const line_w,
|
||||
uint32_t const *src, unsigned const w, unsigned const h)
|
||||
void Blit::Neon::B2f_flip::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
line_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
uint32x4_t const *s = (uint32x4_t const *)src + 2*line_w*8*h;
|
||||
uint32x4_t *d = (uint32x4_t *)dst;
|
||||
|
||||
@ -290,10 +304,12 @@ void Blit::Neon::B2f_flip::r180(uint32_t *dst, unsigned const line_w,
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f_flip::r270(uint32_t *dst, unsigned const dst_w,
|
||||
uint32_t const *src, unsigned const src_w,
|
||||
unsigned const w, const unsigned h)
|
||||
void Blit::Neon::B2f_flip::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
Steps const steps { -2*int(src_w), -8*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((uint32x4_t *)src + 2*src_w*(8*h - 1), steps.src_y);
|
||||
|
@ -31,9 +31,9 @@ namespace Blit {
|
||||
uint32_t *dst,
|
||||
unsigned w, unsigned h, int dx, int dy)
|
||||
{
|
||||
for (unsigned lines = h*8; lines; lines--) {
|
||||
_write_line(src, dst, 8*w, dx);
|
||||
src += 8*src_w;
|
||||
for (unsigned lines = h; lines; lines--) {
|
||||
_write_line(src, dst, w, dx);
|
||||
src += src_w;
|
||||
dst += dy;
|
||||
}
|
||||
};
|
||||
@ -60,7 +60,7 @@ struct Blit::Slow::B2f
|
||||
void Blit::Slow::B2f::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
_write_lines(src, line_w, dst, w, h, 1, 8*line_w);
|
||||
_write_lines(src, line_w, dst, w, h, 1, line_w);
|
||||
}
|
||||
|
||||
|
||||
@ -68,15 +68,15 @@ void Blit::Slow::B2f::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
_write_lines(src, src_w, dst + 8*h - 1, w, h, 8*dst_w, -1);
|
||||
_write_lines(src, src_w, dst + h - 1, w, h, dst_w, -1);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Slow::B2f::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
dst += 8*w - 1 + (8*h - 1)*8*line_w;
|
||||
_write_lines(src, line_w, dst, w, h, -1, -8*line_w);
|
||||
dst += w - 1 + (h - 1)*line_w;
|
||||
_write_lines(src, line_w, dst, w, h, -1, -line_w);
|
||||
}
|
||||
|
||||
|
||||
@ -84,8 +84,8 @@ void Blit::Slow::B2f::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst += 8*dst_w*(8*w - 1);
|
||||
_write_lines(src, src_w, dst, w, h, -8*dst_w, 1);
|
||||
dst += dst_w*(w - 1);
|
||||
_write_lines(src, src_w, dst, w, h, -dst_w, 1);
|
||||
}
|
||||
|
||||
|
||||
@ -101,7 +101,7 @@ struct Blit::Slow::B2f_flip
|
||||
void Blit::Slow::B2f_flip::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
_write_lines(src, line_w, dst + 8*w - 1, w, h, -1, 8*line_w);
|
||||
_write_lines(src, line_w, dst + w - 1, w, h, -1, line_w);
|
||||
}
|
||||
|
||||
|
||||
@ -109,15 +109,15 @@ void Blit::Slow::B2f_flip::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
_write_lines(src, src_w, dst, w, h, 8*dst_w, 1);
|
||||
_write_lines(src, src_w, dst, w, h, dst_w, 1);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Slow::B2f_flip::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
dst += (8*h - 1)*8*line_w;
|
||||
_write_lines(src, line_w, dst, w, h, 1, -8*line_w);
|
||||
dst += (h - 1)*line_w;
|
||||
_write_lines(src, line_w, dst, w, h, 1, -line_w);
|
||||
}
|
||||
|
||||
|
||||
@ -125,8 +125,8 @@ void Blit::Slow::B2f_flip::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst += 8*h - 1 + 8*dst_w*(8*w - 1);
|
||||
_write_lines(src, src_w, dst, w, h, -8*dst_w, -1);
|
||||
dst += h - 1 + dst_w*(w - 1);
|
||||
_write_lines(src, src_w, dst, w, h, -dst_w, -1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -173,6 +173,8 @@ struct Blit::Sse4::B2f
|
||||
void Blit::Sse4::B2f::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
line_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
__m128i const *s = (__m128i const *)src;
|
||||
__m128i *d = (__m128i *)dst;
|
||||
|
||||
@ -188,6 +190,8 @@ void Blit::Sse4::B2f::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
Steps const steps { -2*int(src_w), 2*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((__m128i *)src + 2*src_w*(8*h - 1), steps.src_y_4);
|
||||
@ -200,6 +204,8 @@ void Blit::Sse4::B2f::r90(uint32_t *dst, unsigned dst_w,
|
||||
void Blit::Sse4::B2f::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
line_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
__m128i *d = (__m128i *)dst;
|
||||
__m128i const *s = (__m128i const *)src + 2*line_w*8*h;
|
||||
|
||||
@ -215,6 +221,8 @@ void Blit::Sse4::B2f::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
Steps const steps { 2*int(src_w), -2*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((__m128i *)src, steps.src_y_4);
|
||||
@ -236,6 +244,8 @@ struct Blit::Sse4::B2f_flip
|
||||
void Blit::Sse4::B2f_flip::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
line_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
__m128i const *s = (__m128i const *)src;
|
||||
__m128i *d = (__m128i *)dst;
|
||||
|
||||
@ -251,6 +261,8 @@ void Blit::Sse4::B2f_flip::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
Steps const steps { 2*int(src_w), 2*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((__m128i *)src, steps.src_y_4);
|
||||
@ -263,6 +275,8 @@ void Blit::Sse4::B2f_flip::r90(uint32_t *dst, unsigned dst_w,
|
||||
void Blit::Sse4::B2f_flip::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
line_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
__m128i const *s = (__m128i const *)src + 2*line_w*8*h;
|
||||
__m128i *d = (__m128i *)dst;
|
||||
|
||||
@ -278,6 +292,8 @@ void Blit::Sse4::B2f_flip::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst_w >>= 3, src_w >>= 3, w >>= 3, h >>= 3;
|
||||
|
||||
Steps const steps { -2*int(src_w), -2*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((__m128i *)src + 2*int(src_w)*(h*8 - 1), steps.src_y_4);
|
||||
|
@ -74,6 +74,8 @@ namespace Blit {
|
||||
.y = ((r.y2() + 8) & ~0x7) - 1 });
|
||||
}
|
||||
|
||||
static inline bool divisable_by_8x8(Area a) { return ((a.w | a.h) & 0x7) == 0; }
|
||||
|
||||
template <typename B2F>
|
||||
static inline void _b2f(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
@ -92,23 +94,17 @@ namespace Blit {
|
||||
Texture<Pixel_rgb888> const &texture,
|
||||
Rect rect, Rotate rotate, Flip flip)
|
||||
{
|
||||
/* surface size must be divisible by 8 */
|
||||
if (!aligned(surface.size().w, 2) || !aligned(surface.size().h, 2)) {
|
||||
warning("surface size ", surface.size(), " not divisible by 8");
|
||||
return;
|
||||
}
|
||||
|
||||
/* check compatibility of surface with texture */
|
||||
if (transformed(surface.size(), rotate) != texture.size()) {
|
||||
warning("surface ", surface.size(), " mismatches texture ", texture.size());
|
||||
return;
|
||||
}
|
||||
|
||||
/* restrict rect to texture size */
|
||||
rect = Rect::intersect(rect, Rect { { }, texture.size() });
|
||||
/* snap src coordinates to multiple of px, restrict to texture size */
|
||||
Rect const src_rect = Rect::intersect(snapped_to_8x8_grid(rect),
|
||||
Rect { { }, texture.size() });
|
||||
|
||||
/* compute base addresses of affected pixel window */
|
||||
Rect const src_rect = snapped_to_8x8_grid(rect);
|
||||
Rect const dst_rect = transformed(src_rect, texture.size(), rotate, flip);
|
||||
|
||||
uint32_t const * const src = (uint32_t const *)texture.pixel()
|
||||
@ -119,11 +115,10 @@ namespace Blit {
|
||||
+ dst_rect.y1()*surface.size().w
|
||||
+ dst_rect.x1();
|
||||
|
||||
/* coordinates converted to 8x8 units */
|
||||
unsigned const src_w = texture.size().w >> 3,
|
||||
dst_w = surface.size().w >> 3,
|
||||
w = src_rect.area.w >> 3,
|
||||
h = src_rect.area.h >> 3;
|
||||
unsigned const src_w = texture.size().w,
|
||||
dst_w = surface.size().w,
|
||||
w = src_rect.area.w,
|
||||
h = src_rect.area.h;
|
||||
|
||||
if (w && h) {
|
||||
if (flip.enabled)
|
||||
|
@ -20,7 +20,16 @@
|
||||
|
||||
namespace Blit {
|
||||
|
||||
static inline void back2front (auto &&... args) { _b2f<Neon>(args...); }
|
||||
static inline void back2front(Surface<Pixel_rgb888> &surface,
|
||||
Texture<Pixel_rgb888> const &texture,
|
||||
Rect rect, Rotate rotate, Flip flip)
|
||||
{
|
||||
if (divisable_by_8x8(texture.size()))
|
||||
_b2f<Neon>(surface, texture, rect, rotate, flip);
|
||||
else
|
||||
_b2f<Slow>(surface, texture, rect, rotate, flip);
|
||||
}
|
||||
|
||||
static inline void blend_xrgb_a(auto &&... args) { Neon::Blend::xrgb_a(args...); }
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,16 @@
|
||||
|
||||
namespace Blit {
|
||||
|
||||
static inline void back2front (auto &&... args) { _b2f<Sse4>(args...); }
|
||||
static inline void back2front(Surface<Pixel_rgb888> &surface,
|
||||
Texture<Pixel_rgb888> const &texture,
|
||||
Rect rect, Rotate rotate, Flip flip)
|
||||
{
|
||||
if (divisable_by_8x8(texture.size()))
|
||||
_b2f<Sse4>(surface, texture, rect, rotate, flip);
|
||||
else
|
||||
_b2f<Slow>(surface, texture, rect, rotate, flip);
|
||||
}
|
||||
|
||||
static inline void blend_xrgb_a(auto &&... args) { Sse4::Blend::xrgb_a(args...); }
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user