mirror of
https://github.com/genodelabs/genode.git
synced 2025-03-03 13:14:45 +00:00
parent
10a45b78d0
commit
dc9ff4b3e3
@ -1,34 +1,62 @@
|
||||
/*
|
||||
* \brief Interface of 2D-copy library
|
||||
* \brief Blit API
|
||||
* \author Norman Feske
|
||||
* \date 2007-10-10
|
||||
* \date 2025-01-16
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2007-2017 Genode Labs GmbH
|
||||
* Copyright (C) 2025 Genode Labs GmbH
|
||||
*
|
||||
* This file is part of the Genode OS framework, which is distributed
|
||||
* under the terms of the GNU Affero General Public License version 3.
|
||||
*/
|
||||
|
||||
#ifndef _INCLUDE__BLIT__BLIT_H_
|
||||
#define _INCLUDE__BLIT__BLIT_H_
|
||||
#ifndef _INCLUDE__BLIT_H_
|
||||
#define _INCLUDE__BLIT_H_
|
||||
|
||||
/**
|
||||
* Blit memory from source buffer to destination buffer
|
||||
*
|
||||
* \param src address of source buffer
|
||||
* \param src_w line length of source buffer in bytes
|
||||
* \param dst address of destination buffer
|
||||
* \param dst_w line length of destination buffer in bytes
|
||||
* \param w number of bytes per line to copy
|
||||
* \param h number of lines to copy
|
||||
*
|
||||
* This function works at a granularity of 16bit.
|
||||
* If the source and destination overlap, the result
|
||||
* of the copy operation is not defined.
|
||||
*/
|
||||
extern "C" void blit(void const *src, unsigned src_w,
|
||||
void *dst, unsigned dst_w, int w, int h);
|
||||
#include <blit/types.h>
|
||||
#include <blit/internal/slow.h>
|
||||
|
||||
#endif /* _INCLUDE__BLIT__BLIT_H_ */
|
||||
namespace Blit {
|
||||
|
||||
/**
|
||||
* Back-to-front copy
|
||||
*
|
||||
* Copy a rectangular part of a texture to a surface while optionally
|
||||
* applying rotation and flipping. The width and height of the texture
|
||||
* must be divisible by 8. Surface and texture must perfectly line up.
|
||||
* E.g., when rotating by 90 degrees, the texture width must equal the
|
||||
* surface height and vice versa. The clipping area of the surface is
|
||||
* ignored.
|
||||
*
|
||||
* The combination of rotate and flip arguments works as follows:
|
||||
*
|
||||
* normal flipped
|
||||
*
|
||||
* rotated 0 0 1 2 3 3 2 1 0
|
||||
* 4 5 6 7 7 6 5 4
|
||||
* 8 9 10 11 11 10 9 8
|
||||
*
|
||||
* rotated 90 8 4 0 0 4 8
|
||||
* 9 5 1 1 5 9
|
||||
* 10 6 2 2 6 10
|
||||
* 11 7 3 3 7 11
|
||||
*
|
||||
* rotated 180 11 10 9 8 8 9 10 11
|
||||
* 7 6 5 4 4 5 6 7
|
||||
* 3 2 1 0 0 1 2 3
|
||||
*
|
||||
* rotated 270 3 7 11 11 7 3
|
||||
* 2 6 10 10 6 2
|
||||
* 1 5 9 9 5 1
|
||||
* 0 4 8 8 4 0
|
||||
*/
|
||||
static inline void back2front(Surface<Pixel_rgb888> &surface,
|
||||
Texture<Pixel_rgb888> const &texture,
|
||||
Rect rect, Rotate rotate, Flip flip)
|
||||
{
|
||||
_b2f<Slow>(surface, texture, rect, rotate, flip);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* _INCLUDE__BLIT_H_ */
|
||||
|
294
repos/os/include/blit/internal/neon.h
Normal file
294
repos/os/include/blit/internal/neon.h
Normal file
@ -0,0 +1,294 @@
|
||||
/*
|
||||
* \brief 2D memory copy using ARM NEON
|
||||
* \author Norman Feske
|
||||
* \date 2025-01-16
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2025 Genode Labs GmbH
|
||||
*
|
||||
* This file is part of the Genode OS framework, which is distributed
|
||||
* under the terms of the GNU Affero General Public License version 3.
|
||||
*/
|
||||
|
||||
#ifndef _INCLUDE__BLIT__INTERNAL__NEON_H_
|
||||
#define _INCLUDE__BLIT__INTERNAL__NEON_H_
|
||||
|
||||
#include <blit/types.h>
|
||||
|
||||
/* compiler intrinsics */
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wnarrowing"
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#pragma GCC diagnostic ignored "-Wfloat-conversion"
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#include <arm_neon.h>
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
|
||||
namespace Blit { struct Neon; }
|
||||
|
||||
|
||||
struct Blit::Neon
|
||||
{
|
||||
static inline uint32x4_t _reversed(uint32x4_t const v)
|
||||
{
|
||||
return vrev64q_u32(vcombine_u32(vget_high_u32(v), vget_low_u32(v)));
|
||||
}
|
||||
|
||||
static inline void _reverse_line(uint32x4_t const *src, uint32x4_t *dst, unsigned len)
|
||||
{
|
||||
src += len; /* move 'src' from end of line towards begin */
|
||||
while (len--)
|
||||
*dst++ = _reversed(*--src);
|
||||
};
|
||||
|
||||
static inline void _copy_line(uint32x4_t const *s, uint32x4_t *d, unsigned len)
|
||||
{
|
||||
while (len--)
|
||||
*d++ = *s++;
|
||||
};
|
||||
|
||||
struct Src_ptr4
|
||||
{
|
||||
uint32x4_t const *p0, *p1, *p2, *p3;
|
||||
|
||||
inline Src_ptr4(uint32x4_t const *p, int const step)
|
||||
:
|
||||
p0(p), p1(p0 + step), p2(p1 + step), p3(p2 + step)
|
||||
{ }
|
||||
|
||||
void incr_4(int const v) { p0 += v, p1 += v, p2 += v, p3 += v; }
|
||||
|
||||
void prefetch() const
|
||||
{
|
||||
__builtin_prefetch(p0); __builtin_prefetch(p1);
|
||||
__builtin_prefetch(p2); __builtin_prefetch(p3);
|
||||
}
|
||||
|
||||
void load(uint32x4x4_t &tile) const { tile = { *p0, *p1, *p2, *p3 }; }
|
||||
};
|
||||
|
||||
struct Dst_ptr4
|
||||
{
|
||||
uint32_t *p0, *p1, *p2, *p3;
|
||||
|
||||
Dst_ptr4(uint32_t *p, int const step)
|
||||
:
|
||||
p0(p), p1(p0 + step), p2(p1 + step), p3(p2 + step)
|
||||
{ }
|
||||
|
||||
void incr(int const v) { p0 += v, p1 += v, p2 += v, p3 += v; }
|
||||
|
||||
void store(uint32x4x4_t const &tile)
|
||||
{
|
||||
vst4q_lane_u32(p0, tile, 0);
|
||||
vst4q_lane_u32(p1, tile, 1);
|
||||
vst4q_lane_u32(p2, tile, 2);
|
||||
vst4q_lane_u32(p3, tile, 3);
|
||||
}
|
||||
};
|
||||
|
||||
struct Steps
|
||||
{
|
||||
int const src_y, dst_y;
|
||||
|
||||
void incr_x_4(Src_ptr4 &p) const { p.incr_4(1); };
|
||||
void incr_x_8(Src_ptr4 &p) const { p.incr_4(2); };
|
||||
void incr_y_4(Src_ptr4 &p) const { p.incr_4(src_y << 2); };
|
||||
void incr_y_8(Src_ptr4 &p) const { p.incr_4(src_y << 3); };
|
||||
|
||||
void incr_x_4(Dst_ptr4 &p) const { p.incr(4); };
|
||||
void incr_x_8(Dst_ptr4 &p) const { p.incr(8); };
|
||||
void incr_y_4(Dst_ptr4 &p) const { p.incr(dst_y << 2); };
|
||||
void incr_y_8(Dst_ptr4 &p) const { p.incr(dst_y << 3); };
|
||||
};
|
||||
|
||||
__attribute__((optimize("-O3")))
|
||||
static inline void _load_prefetch_store(Src_ptr4 &src, Dst_ptr4 &dst, Steps const steps)
|
||||
{
|
||||
uint32x4x4_t tile;
|
||||
src.load(tile);
|
||||
steps.incr_y_4(src);
|
||||
src.prefetch();
|
||||
dst.store(tile);
|
||||
steps.incr_x_4(dst);
|
||||
}
|
||||
|
||||
__attribute__((optimize("-O3")))
|
||||
static inline void _rotate_8x4(Src_ptr4 src, Dst_ptr4 dst, Steps const steps)
|
||||
{
|
||||
for (unsigned i = 0; i < 2; i++)
|
||||
_load_prefetch_store(src, dst, steps);
|
||||
}
|
||||
|
||||
__attribute__((optimize("-O3")))
|
||||
static inline void _rotate_8x4_last(Src_ptr4 src, Dst_ptr4 dst, Steps const steps)
|
||||
{
|
||||
_load_prefetch_store(src, dst, steps);
|
||||
uint32x4x4_t tile;
|
||||
src.load(tile);
|
||||
dst.store(tile);
|
||||
}
|
||||
|
||||
__attribute__((optimize("-O3")))
|
||||
static inline void _rotate_8x8(Src_ptr4 src, Dst_ptr4 dst, Steps const steps)
|
||||
{
|
||||
_rotate_8x4(src, dst, steps);
|
||||
steps.incr_y_4(dst);
|
||||
steps.incr_x_4(src);
|
||||
_rotate_8x4_last(src, dst, steps);
|
||||
}
|
||||
|
||||
__attribute__((optimize("-O3")))
|
||||
static inline void _rotate_8_lines(Src_ptr4 src, Dst_ptr4 dst,
|
||||
Steps const steps, unsigned n)
|
||||
{
|
||||
for (; n; n--) {
|
||||
_rotate_8x8(src, dst, steps);
|
||||
steps.incr_y_8(dst);
|
||||
steps.incr_x_8(src);
|
||||
}
|
||||
};
|
||||
|
||||
static inline void _rotate(Src_ptr4 src, Dst_ptr4 dst,
|
||||
Steps const steps, unsigned w, unsigned h)
|
||||
{
|
||||
for (unsigned i = h; i; i--) {
|
||||
_rotate_8_lines(src, dst, steps, w);
|
||||
steps.incr_y_8(src);
|
||||
steps.incr_x_8(dst);
|
||||
}
|
||||
}
|
||||
|
||||
struct B2f;
|
||||
struct B2f_flip;
|
||||
};
|
||||
|
||||
|
||||
struct Blit::Neon::B2f
|
||||
{
|
||||
static inline void r0 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r90 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
static inline void r180(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r270(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
};
|
||||
|
||||
|
||||
void Blit::Neon::B2f::r0(uint32_t *dst, unsigned const line_w,
|
||||
uint32_t const *src, unsigned const w, unsigned const h)
|
||||
{
|
||||
uint32x4_t const *s = (uint32x4_t const *)src;
|
||||
uint32x4_t *d = (uint32x4_t *)dst;
|
||||
|
||||
for (unsigned lines = h*8; lines; lines--) {
|
||||
_copy_line(s, d, 2*w);
|
||||
s += 2*line_w;
|
||||
d += 2*line_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f::r90(uint32_t *dst, unsigned const dst_w,
|
||||
uint32_t const *src, unsigned const src_w,
|
||||
unsigned const w, unsigned const h)
|
||||
{
|
||||
Steps const steps { -2*int(src_w), 8*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((uint32x4_t *)src + 2*src_w*(8*h - 1), steps.src_y);
|
||||
Dst_ptr4 dst_ptr4 (dst, steps.dst_y);
|
||||
|
||||
_rotate(src_ptr4, dst_ptr4, steps, w, h);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f::r180(uint32_t *dst, unsigned const line_w,
|
||||
uint32_t const *src, unsigned const w, unsigned const h)
|
||||
{
|
||||
uint32x4_t *d = (uint32x4_t *)dst;
|
||||
uint32x4_t const *s = (uint32x4_t const *)src + 2*line_w*8*h;
|
||||
|
||||
for (unsigned i = h*8; i; i--) {
|
||||
s -= 2*line_w;
|
||||
_reverse_line(s, d, 2*w);
|
||||
d += 2*line_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f::r270(uint32_t *dst, unsigned const dst_w,
|
||||
uint32_t const *src, unsigned const src_w,
|
||||
unsigned const w, const unsigned h)
|
||||
{
|
||||
Steps const steps { 2*int(src_w), -8*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y);
|
||||
Dst_ptr4 dst_ptr4 (dst + 8*int(dst_w)*(w*8 - 1), steps.dst_y);
|
||||
|
||||
_rotate(src_ptr4, dst_ptr4, steps, w, h);
|
||||
}
|
||||
|
||||
|
||||
struct Blit::Neon::B2f_flip
|
||||
{
|
||||
static inline void r0 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r90 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
static inline void r180(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r270(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
};
|
||||
|
||||
|
||||
void Blit::Neon::B2f_flip::r0(uint32_t *dst, unsigned const line_w,
|
||||
uint32_t const *src, unsigned const w, unsigned const h)
|
||||
{
|
||||
uint32x4_t const *s = (uint32x4_t const *)src;
|
||||
uint32x4_t *d = (uint32x4_t *)dst;
|
||||
|
||||
for (unsigned lines = h*8; lines; lines--) {
|
||||
_reverse_line(s, d, 2*w);
|
||||
s += 2*line_w;
|
||||
d += 2*line_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f_flip::r90(uint32_t *dst, unsigned const dst_w,
|
||||
uint32_t const *src, unsigned const src_w,
|
||||
unsigned const w, unsigned const h)
|
||||
{
|
||||
Steps const steps { 2*int(src_w), 8*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((uint32x4_t *)src, steps.src_y);
|
||||
Dst_ptr4 dst_ptr4 (dst, steps.dst_y);
|
||||
|
||||
_rotate(src_ptr4, dst_ptr4, steps, w, h);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f_flip::r180(uint32_t *dst, unsigned const line_w,
|
||||
uint32_t const *src, unsigned const w, unsigned const h)
|
||||
{
|
||||
uint32x4_t const *s = (uint32x4_t const *)src + 2*line_w*8*h;
|
||||
uint32x4_t *d = (uint32x4_t *)dst;
|
||||
|
||||
for (unsigned lines = h*8; lines; lines--) {
|
||||
s -= 2*line_w;
|
||||
_copy_line(s, d, 2*w);
|
||||
d += 2*line_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Blit::Neon::B2f_flip::r270(uint32_t *dst, unsigned const dst_w,
|
||||
uint32_t const *src, unsigned const src_w,
|
||||
unsigned const w, const unsigned h)
|
||||
{
|
||||
Steps const steps { -2*int(src_w), -8*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((uint32x4_t *)src + 2*src_w*(8*h - 1), steps.src_y);
|
||||
Dst_ptr4 dst_ptr4 (dst + 8*int(dst_w)*(w*8 - 1), steps.dst_y);
|
||||
|
||||
_rotate(src_ptr4, dst_ptr4, steps, w, h);
|
||||
}
|
||||
|
||||
#endif /* _INCLUDE__BLIT__INTERNAL__NEON_H_ */
|
131
repos/os/include/blit/internal/slow.h
Normal file
131
repos/os/include/blit/internal/slow.h
Normal file
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* \brief Fallback 2D memory copy
|
||||
* \author Norman Feske
|
||||
* \date 2025-01-16
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2025 Genode Labs GmbH
|
||||
*
|
||||
* This file is part of the Genode OS framework, which is distributed
|
||||
* under the terms of the GNU Affero General Public License version 3.
|
||||
*/
|
||||
|
||||
#ifndef _INCLUDE__BLIT__INTERNAL__SLOW_H_
|
||||
#define _INCLUDE__BLIT__INTERNAL__SLOW_H_
|
||||
|
||||
#include <blit/types.h>
|
||||
|
||||
namespace Blit {
|
||||
|
||||
struct Slow;
|
||||
|
||||
static inline void _write_line(uint32_t const *src, uint32_t *dst,
|
||||
unsigned len, int dst_step)
|
||||
{
|
||||
for (; len--; dst += dst_step)
|
||||
*dst = *src++;
|
||||
}
|
||||
|
||||
static inline void _write_lines(uint32_t const *src, unsigned src_w,
|
||||
uint32_t *dst,
|
||||
unsigned w, unsigned h, int dx, int dy)
|
||||
{
|
||||
for (unsigned lines = h*8; lines; lines--) {
|
||||
_write_line(src, dst, 8*w, dx);
|
||||
src += 8*src_w;
|
||||
dst += dy;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
struct Blit::Slow
|
||||
{
|
||||
struct B2f;
|
||||
struct B2f_flip;
|
||||
};
|
||||
|
||||
|
||||
struct Blit::Slow::B2f
|
||||
{
|
||||
static inline void r0 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r90 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
static inline void r180(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r270(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
};
|
||||
|
||||
|
||||
void Blit::Slow::B2f::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
_write_lines(src, line_w, dst, w, h, 1, 8*line_w);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Slow::B2f::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
_write_lines(src, src_w, dst + 8*h - 1, w, h, 8*dst_w, -1);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Slow::B2f::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
dst += 8*w - 1 + (8*h - 1)*8*line_w;
|
||||
_write_lines(src, line_w, dst, w, h, -1, -8*line_w);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Slow::B2f::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst += 8*dst_w*(8*w - 1);
|
||||
_write_lines(src, src_w, dst, w, h, -8*dst_w, 1);
|
||||
}
|
||||
|
||||
|
||||
struct Blit::Slow::B2f_flip
|
||||
{
|
||||
static inline void r0 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r90 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
static inline void r180(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r270(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
};
|
||||
|
||||
|
||||
void Blit::Slow::B2f_flip::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
_write_lines(src, line_w, dst + 8*w - 1, w, h, -1, 8*line_w);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Slow::B2f_flip::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
_write_lines(src, src_w, dst, w, h, 8*dst_w, 1);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Slow::B2f_flip::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
dst += (8*h - 1)*8*line_w;
|
||||
_write_lines(src, line_w, dst, w, h, 1, -8*line_w);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Slow::B2f_flip::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
dst += 8*h - 1 + 8*dst_w*(8*w - 1);
|
||||
_write_lines(src, src_w, dst, w, h, -8*dst_w, -1);
|
||||
}
|
||||
|
||||
#endif /* _INCLUDE__BLIT__INTERNAL__SLOW_H_ */
|
263
repos/os/include/blit/internal/sse4.h
Normal file
263
repos/os/include/blit/internal/sse4.h
Normal file
@ -0,0 +1,263 @@
|
||||
/*
|
||||
* \brief 2D memory copy using SSE4
|
||||
* \author Norman Feske
|
||||
* \date 2025-01-21
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2025 Genode Labs GmbH
|
||||
*
|
||||
* This file is part of the Genode OS framework, which is distributed
|
||||
* under the terms of the GNU Affero General Public License version 3.
|
||||
*/
|
||||
|
||||
#ifndef _INCLUDE__BLIT__INTERNAL__SSE4_H_
|
||||
#define _INCLUDE__BLIT__INTERNAL__SSE4_H_
|
||||
|
||||
#include <blit/types.h>
|
||||
|
||||
/* compiler intrinsics */
|
||||
#ifndef _MM_MALLOC_H_INCLUDED /* discharge dependency from stdlib.h */
|
||||
#define _MM_MALLOC_H_INCLUDED
|
||||
#define _MM_MALLOC_H_INCLUDED_PREVENTED
|
||||
#endif
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#include <immintrin.h>
|
||||
#pragma GCC diagnostic pop
|
||||
#ifdef _MM_MALLOC_H_INCLUDED_PREVENTED
|
||||
#undef _MM_MALLOC_H_INCLUDED
|
||||
#undef _MM_MALLOC_H_INCLUDED_PREVENTED
|
||||
#endif
|
||||
|
||||
|
||||
namespace Blit { struct Sse4; };
|
||||
|
||||
|
||||
struct Blit::Sse4
|
||||
{
|
||||
union Tile_4x4 { __m128i pi[4]; __m128 ps[4]; };
|
||||
|
||||
struct Src_ptr4
|
||||
{
|
||||
__m128i const *p0, *p1, *p2, *p3;
|
||||
|
||||
inline Src_ptr4(__m128i const *p, int const step)
|
||||
:
|
||||
p0(p), p1(p0 + step), p2(p1 + step), p3(p2 + step)
|
||||
{ }
|
||||
|
||||
void incr_4(int v) { p0 += v, p1 += v, p2 += v, p3 += v; }
|
||||
|
||||
void prefetch() const
|
||||
{
|
||||
_mm_prefetch(p0, _MM_HINT_T0); _mm_prefetch(p1, _MM_HINT_T0);
|
||||
_mm_prefetch(p2, _MM_HINT_T0); _mm_prefetch(p3, _MM_HINT_T0);
|
||||
}
|
||||
|
||||
void load(Tile_4x4 &tile) const
|
||||
{
|
||||
tile.pi[0] = _mm_load_si128(p0); tile.pi[1] = _mm_load_si128(p1);
|
||||
tile.pi[2] = _mm_load_si128(p2); tile.pi[3] = _mm_load_si128(p3);
|
||||
}
|
||||
};
|
||||
|
||||
struct Dst_ptr4
|
||||
{
|
||||
__m128i *p0, *p1, *p2, *p3;
|
||||
|
||||
Dst_ptr4(__m128i *p, int const step_4)
|
||||
:
|
||||
p0(p), p1(p0 + step_4), p2(p1 + step_4), p3(p2 + step_4)
|
||||
{ }
|
||||
|
||||
void incr_4(int v) { p0 += v, p1 += v, p2 += v, p3 += v; }
|
||||
|
||||
void store(Tile_4x4 const &tile) const
|
||||
{
|
||||
_mm_stream_si128(p0, tile.pi[0]); _mm_stream_si128(p1, tile.pi[1]);
|
||||
_mm_stream_si128(p2, tile.pi[2]); _mm_stream_si128(p3, tile.pi[3]);
|
||||
}
|
||||
};
|
||||
|
||||
struct Steps { int src_y_4, dst_y_4; };
|
||||
|
||||
static inline void _reverse_line(__m128i const *s, __m128i *d, unsigned len_8)
|
||||
{
|
||||
static constexpr int reversed = (0 << 6) | (1 << 4) | (2 << 2) | 3;
|
||||
|
||||
d += 2*len_8; /* move 'dst' from end towards begin */
|
||||
|
||||
while (len_8--) {
|
||||
__m128i const v0 = _mm_load_si128(s++);
|
||||
__m128i const v1 = _mm_load_si128(s++);
|
||||
_mm_stream_si128(--d, _mm_shuffle_epi32(v0, reversed));
|
||||
_mm_stream_si128(--d, _mm_shuffle_epi32(v1, reversed));
|
||||
}
|
||||
};
|
||||
|
||||
static inline void _copy_line(__m128i const *s, __m128i *d, unsigned len_8)
|
||||
{
|
||||
while (len_8--) {
|
||||
__m128i const v0 = _mm_load_si128(s++);
|
||||
__m128i const v1 = _mm_load_si128(s++);
|
||||
_mm_stream_si128(d++, v0);
|
||||
_mm_stream_si128(d++, v1);
|
||||
}
|
||||
};
|
||||
|
||||
static inline void _rotate_4_lines(Src_ptr4 src, Dst_ptr4 dst,
|
||||
unsigned len_4, auto const dst_4_step)
|
||||
{
|
||||
Tile_4x4 t;
|
||||
while (len_4--) {
|
||||
src.load(t);
|
||||
src.incr_4(1);
|
||||
src.prefetch();
|
||||
_MM_TRANSPOSE4_PS(t.ps[0], t.ps[1], t.ps[2], t.ps[3]);
|
||||
dst.store(t);
|
||||
dst.incr_4(dst_4_step);
|
||||
};
|
||||
};
|
||||
|
||||
static inline void _rotate(Src_ptr4 src, Dst_ptr4 dst,
|
||||
Steps const steps, unsigned w, unsigned h)
|
||||
{
|
||||
for (unsigned i = 2*h; i; i--) {
|
||||
_rotate_4_lines(src, dst, 2*w, 4*steps.dst_y_4);
|
||||
src.incr_4(4*steps.src_y_4);
|
||||
dst.incr_4(1);
|
||||
}
|
||||
}
|
||||
|
||||
struct B2f;
|
||||
struct B2f_flip;
|
||||
};
|
||||
|
||||
|
||||
struct Blit::Sse4::B2f
|
||||
{
|
||||
static inline void r0 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r90 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
static inline void r180(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r270(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
};
|
||||
|
||||
|
||||
void Blit::Sse4::B2f::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
__m128i const *s = (__m128i const *)src;
|
||||
__m128i *d = (__m128i *)dst;
|
||||
|
||||
for (unsigned lines = h*8; lines; lines--) {
|
||||
_copy_line(s, d, w);
|
||||
s += 2*line_w;
|
||||
d += 2*line_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Blit::Sse4::B2f::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
Steps const steps { -2*int(src_w), 2*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((__m128i *)src + 2*src_w*(8*h - 1), steps.src_y_4);
|
||||
Dst_ptr4 dst_ptr4 ((__m128i *)dst, steps.dst_y_4);
|
||||
|
||||
_rotate(src_ptr4, dst_ptr4, steps, w, h);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Sse4::B2f::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
__m128i *d = (__m128i *)dst;
|
||||
__m128i const *s = (__m128i const *)src + 2*line_w*8*h;
|
||||
|
||||
for (unsigned i = h*8; i; i--) {
|
||||
s -= 2*line_w;
|
||||
_reverse_line(s, d, w);
|
||||
d += 2*line_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Blit::Sse4::B2f::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
Steps const steps { 2*int(src_w), -2*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((__m128i *)src, steps.src_y_4);
|
||||
Dst_ptr4 dst_ptr4 ((__m128i *)dst + 2*int(dst_w)*(8*w - 1), steps.dst_y_4);
|
||||
|
||||
_rotate(src_ptr4, dst_ptr4, steps, w, h);
|
||||
}
|
||||
|
||||
|
||||
struct Blit::Sse4::B2f_flip
|
||||
{
|
||||
static inline void r0 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r90 (uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
static inline void r180(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned);
|
||||
static inline void r270(uint32_t *, unsigned, uint32_t const *, unsigned, unsigned, unsigned);
|
||||
};
|
||||
|
||||
|
||||
void Blit::Sse4::B2f_flip::r0(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
__m128i const *s = (__m128i const *)src;
|
||||
__m128i *d = (__m128i *)dst;
|
||||
|
||||
for (unsigned lines = h*8; lines; lines--) {
|
||||
_reverse_line(s, d, w);
|
||||
s += 2*line_w;
|
||||
d += 2*line_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Blit::Sse4::B2f_flip::r90(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
Steps const steps { 2*int(src_w), 2*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((__m128i *)src, steps.src_y_4);
|
||||
Dst_ptr4 dst_ptr4 ((__m128i *)dst, steps.dst_y_4);
|
||||
|
||||
_rotate(src_ptr4, dst_ptr4, steps, w, h);
|
||||
}
|
||||
|
||||
|
||||
void Blit::Sse4::B2f_flip::r180(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
__m128i const *s = (__m128i const *)src + 2*line_w*8*h;
|
||||
__m128i *d = (__m128i *)dst;
|
||||
|
||||
for (unsigned lines = h*8; lines; lines--) {
|
||||
s -= 2*line_w;
|
||||
_copy_line(s, d, w);
|
||||
d += 2*line_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Blit::Sse4::B2f_flip::r270(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
Steps const steps { -2*int(src_w), -2*int(dst_w) };
|
||||
|
||||
Src_ptr4 src_ptr4 ((__m128i *)src + 2*int(src_w)*(h*8 - 1), steps.src_y_4);
|
||||
Dst_ptr4 dst_ptr4 ((__m128i *)dst + 2*int(dst_w)*(w*8 - 1), steps.dst_y_4);
|
||||
|
||||
_rotate(src_ptr4, dst_ptr4, steps, w, h);
|
||||
}
|
||||
|
||||
#endif /* _INCLUDE__BLIT__INTERNAL__SSE4_H_ */
|
161
repos/os/include/blit/types.h
Normal file
161
repos/os/include/blit/types.h
Normal file
@ -0,0 +1,161 @@
|
||||
/*
|
||||
* \brief Types and utilities used for 2D memory copy
|
||||
* \author Norman Feske
|
||||
* \date 2025-01-16
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2025 Genode Labs GmbH
|
||||
*
|
||||
* This file is part of the Genode OS framework, which is distributed
|
||||
* under the terms of the GNU Affero General Public License version 3.
|
||||
*/
|
||||
|
||||
#ifndef _INCLUDE__BLIT__TYPES_H_
|
||||
#define _INCLUDE__BLIT__TYPES_H_
|
||||
|
||||
/* Genode includes */
|
||||
#include <os/texture.h>
|
||||
#include <os/surface.h>
|
||||
#include <os/pixel_rgb888.h>
|
||||
|
||||
namespace Blit {
|
||||
|
||||
using namespace Genode;
|
||||
|
||||
using Rect = Surface_base::Rect;
|
||||
using Area = Surface_base::Area;
|
||||
using Point = Surface_base::Point;
|
||||
|
||||
enum class Rotate { R0, R90, R180, R270 };
|
||||
|
||||
struct Flip { bool enabled; };
|
||||
|
||||
static bool swap_w_h(Rotate r) { return r == Rotate::R90 || r == Rotate::R270; }
|
||||
|
||||
static Area transformed(Area a, Rotate rotate)
|
||||
{
|
||||
return swap_w_h(rotate) ? Area { a.h, a.w } : a;
|
||||
}
|
||||
|
||||
static Point transformed(Point p, Area area, Rotate rotate, Flip flip)
|
||||
{
|
||||
int const w = area.w, h = area.h;
|
||||
|
||||
switch (rotate) {
|
||||
case Rotate::R0: break;
|
||||
case Rotate::R90: p = { .x = h - p.y - 1, .y = p.x }; break;
|
||||
case Rotate::R180: p = { .x = w - p.x - 1, .y = h - p.y - 1 }; break;
|
||||
case Rotate::R270: p = { .x = p.y, .y = w - p.x - 1 }; break;
|
||||
}
|
||||
|
||||
if (flip.enabled)
|
||||
p = { int(transformed(area, rotate).w) - p.x - 1, p.y };
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static Rect transformed(Rect r, Area area, Rotate rotate, Flip flip)
|
||||
{
|
||||
auto rect_from_points = [&] (Point p1, Point p2)
|
||||
{
|
||||
return Rect::compound(Point { min(p1.x, p2.x), min(p1.y, p2.y) },
|
||||
Point { max(p1.x, p2.x), max(p1.y, p2.y) });
|
||||
};
|
||||
return rect_from_points(transformed(r.p1(), area, rotate, flip),
|
||||
transformed(r.p2(), area, rotate, flip));
|
||||
}
|
||||
|
||||
static Rect snapped_to_8x8_grid(Rect r)
|
||||
{
|
||||
return Rect::compound(Point { .x = r.x1() & ~0x7,
|
||||
.y = r.y1() & ~0x7 },
|
||||
Point { .x = ((r.x2() + 8) & ~0x7) - 1,
|
||||
.y = ((r.y2() + 8) & ~0x7) - 1 });
|
||||
}
|
||||
|
||||
template <typename B2F>
|
||||
static inline void _b2f(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w,
|
||||
unsigned w, unsigned h, Rotate rotate)
|
||||
{
|
||||
switch (rotate) {
|
||||
case Rotate::R0: B2F::r0 (dst, dst_w, src, w, h); break;
|
||||
case Rotate::R90: B2F::r90 (dst, dst_w, src, src_w, w, h); break;
|
||||
case Rotate::R180: B2F::r180(dst, dst_w, src, w, h); break;
|
||||
case Rotate::R270: B2F::r270(dst, dst_w, src, src_w, w, h); break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename OP>
|
||||
static inline void _b2f(Surface<Pixel_rgb888> &surface,
|
||||
Texture<Pixel_rgb888> const &texture,
|
||||
Rect rect, Rotate rotate, Flip flip)
|
||||
{
|
||||
/* surface size must be divisible by 8 */
|
||||
if (!aligned(surface.size().w, 2) || !aligned(surface.size().h, 2)) {
|
||||
warning("surface size ", surface.size(), " not divisible by 8");
|
||||
return;
|
||||
}
|
||||
|
||||
/* check compatibility of surface with texture */
|
||||
if (transformed(surface.size(), rotate) != texture.size()) {
|
||||
warning("surface ", surface.size(), " mismatches texture ", texture.size());
|
||||
return;
|
||||
}
|
||||
|
||||
/* restrict rect to texture size */
|
||||
rect = Rect::intersect(rect, Rect { { }, texture.size() });
|
||||
|
||||
/* compute base addresses of affected pixel window */
|
||||
Rect const src_rect = snapped_to_8x8_grid(rect);
|
||||
Rect const dst_rect = transformed(src_rect, texture.size(), rotate, flip);
|
||||
|
||||
uint32_t const * const src = (uint32_t const *)texture.pixel()
|
||||
+ src_rect.y1()*texture.size().w
|
||||
+ src_rect.x1();
|
||||
|
||||
uint32_t * const dst = (uint32_t *)surface.addr()
|
||||
+ dst_rect.y1()*surface.size().w
|
||||
+ dst_rect.x1();
|
||||
|
||||
/* coordinates converted to 8x8 units */
|
||||
unsigned const src_w = texture.size().w >> 3,
|
||||
dst_w = surface.size().w >> 3,
|
||||
w = src_rect.area.w >> 3,
|
||||
h = src_rect.area.h >> 3;
|
||||
|
||||
if (w && h) {
|
||||
if (flip.enabled)
|
||||
_b2f<typename OP::B2f_flip>(dst, dst_w, src, src_w, w, h, rotate);
|
||||
else
|
||||
_b2f<typename OP::B2f> (dst, dst_w, src, src_w, w, h, rotate);
|
||||
}
|
||||
|
||||
surface.flush_pixels(dst_rect);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************
|
||||
** Legacy API **
|
||||
****************/
|
||||
|
||||
/**
|
||||
* Blit memory from source buffer to destination buffer
|
||||
*
|
||||
* \param src address of source buffer
|
||||
* \param src_w line length of source buffer in bytes
|
||||
* \param dst address of destination buffer
|
||||
* \param dst_w line length of destination buffer in bytes
|
||||
* \param w number of bytes per line to copy
|
||||
* \param h number of lines to copy
|
||||
*
|
||||
* This function works at a granularity of 16bit.
|
||||
* If the source and destination overlap, the result
|
||||
* of the copy operation is not defined.
|
||||
*/
|
||||
extern "C" void blit(void const *src, unsigned src_w,
|
||||
void *dst, unsigned dst_w, int w, int h);
|
||||
|
||||
#endif /* _INCLUDE__BLIT__TYPES_H_ */
|
25
repos/os/include/spec/arm_64/blit/blit.h
Normal file
25
repos/os/include/spec/arm_64/blit/blit.h
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* \brief Blit API
|
||||
* \author Norman Feske
|
||||
* \date 2025-01-16
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2025 Genode Labs GmbH
|
||||
*
|
||||
* This file is part of the Genode OS framework, which is distributed
|
||||
* under the terms of the GNU Affero General Public License version 3.
|
||||
*/
|
||||
|
||||
#ifndef _INCLUDE__SPEC__ARM_64__BLIT_H_
|
||||
#define _INCLUDE__SPEC__ARM_64__BLIT_H_
|
||||
|
||||
#include <blit/types.h>
|
||||
#include <blit/internal/neon.h>
|
||||
|
||||
namespace Blit {
|
||||
|
||||
static inline void back2front(auto &&... args) { _b2f<Neon>(args...); }
|
||||
}
|
||||
|
||||
#endif /* _INCLUDE__SPEC__ARM_64__BLIT_H_ */
|
25
repos/os/include/spec/x86_64/blit/blit.h
Normal file
25
repos/os/include/spec/x86_64/blit/blit.h
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* \brief Blit API
|
||||
* \author Norman Feske
|
||||
* \date 2025-01-16
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2025 Genode Labs GmbH
|
||||
*
|
||||
* This file is part of the Genode OS framework, which is distributed
|
||||
* under the terms of the GNU Affero General Public License version 3.
|
||||
*/
|
||||
|
||||
#ifndef _INCLUDE__SPEC__X86_64__BLIT_H_
|
||||
#define _INCLUDE__SPEC__X86_64__BLIT_H_
|
||||
|
||||
#include <blit/types.h>
|
||||
#include <blit/internal/sse4.h>
|
||||
|
||||
namespace Blit {
|
||||
|
||||
static inline void back2front(auto &&... args) { _b2f<Sse4>(args...); }
|
||||
}
|
||||
|
||||
#endif /* _INCLUDE__SPEC__X86_64__BLIT_H_ */
|
@ -1,4 +1,6 @@
|
||||
MIRROR_FROM_REP_DIR := include/blit \
|
||||
include/spec/x86_64/blit \
|
||||
include/spec/arm_64/blit \
|
||||
src/lib/blit \
|
||||
lib/mk/blit.mk \
|
||||
lib/mk/spec/arm_64/blit.mk \
|
||||
|
1
repos/os/recipes/pkg/test-blit/README
Normal file
1
repos/os/recipes/pkg/test-blit/README
Normal file
@ -0,0 +1 @@
|
||||
Scenario for testing 2D blitting operations
|
1
repos/os/recipes/pkg/test-blit/archives
Normal file
1
repos/os/recipes/pkg/test-blit/archives
Normal file
@ -0,0 +1 @@
|
||||
_/src/test-blit
|
13
repos/os/recipes/pkg/test-blit/runtime
Normal file
13
repos/os/recipes/pkg/test-blit/runtime
Normal file
@ -0,0 +1,13 @@
|
||||
<runtime ram="32M" caps="1000" binary="test-blit">
|
||||
|
||||
<fail after_seconds="20"/>
|
||||
<succeed>[init] --- blit test finished ---</succeed>
|
||||
|
||||
<content>
|
||||
<rom label="ld.lib.so"/>
|
||||
<rom label="test-blit"/>
|
||||
</content>
|
||||
|
||||
<config/>
|
||||
|
||||
</runtime>
|
2
repos/os/recipes/src/test-blit/content.mk
Normal file
2
repos/os/recipes/src/test-blit/content.mk
Normal file
@ -0,0 +1,2 @@
|
||||
SRC_DIR = src/test/blit
|
||||
include $(GENODE_DIR)/repos/base/recipes/src/content.inc
|
1
repos/os/recipes/src/test-blit/hash
Normal file
1
repos/os/recipes/src/test-blit/hash
Normal file
@ -0,0 +1 @@
|
||||
2024-12-10 67b1a1ad0dddcdc22dd6e266309f8221ff30173f
|
3
repos/os/recipes/src/test-blit/used_apis
Normal file
3
repos/os/recipes/src/test-blit/used_apis
Normal file
@ -0,0 +1,3 @@
|
||||
base
|
||||
blit
|
||||
os
|
291
repos/os/src/test/blit/main.cc
Normal file
291
repos/os/src/test/blit/main.cc
Normal file
@ -0,0 +1,291 @@
|
||||
/*
|
||||
* \brief Blitting test
|
||||
* \author Norman Feske
|
||||
* \date 2025-01-16
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2025 Genode Labs GmbH
|
||||
*
|
||||
* This file is part of the Genode OS framework, which is distributed
|
||||
* under the terms of the GNU Affero General Public License version 3.
|
||||
*/
|
||||
|
||||
#include <base/component.h>
|
||||
#include <base/log.h>
|
||||
#include <blit/blit.h>
|
||||
#include <blit/internal/slow.h>
|
||||
|
||||
using namespace Blit;
|
||||
|
||||
|
||||
/*******************************
|
||||
** Low-level SIMD operations **
|
||||
*******************************/
|
||||
|
||||
template <unsigned W, unsigned H>
|
||||
struct Image
|
||||
{
|
||||
static constexpr unsigned w = W, h = H;
|
||||
|
||||
uint32_t pixels[W*H];
|
||||
|
||||
void print(Output &out) const
|
||||
{
|
||||
using Genode::print;
|
||||
for (unsigned y = 0; y < H; y++) {
|
||||
for (unsigned x = 0; x < min(25u, W); x++) {
|
||||
uint32_t v = pixels[y*W+x];
|
||||
if (v)
|
||||
print(out, " ", Char('A' + (v&63)), Char(char('A' + ((v>>16)&63))));
|
||||
else
|
||||
print(out, " .");
|
||||
}
|
||||
if (y < H-1) print(out, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
bool operator != (Image const &other)
|
||||
{
|
||||
for (unsigned i = 0; i < W*H; i++)
|
||||
if (other.pixels[i] != pixels[i])
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static Image pattern()
|
||||
{
|
||||
Image image { };
|
||||
for (unsigned y = 0; y < H; y++)
|
||||
for (unsigned x = 0; x < W; x++)
|
||||
image.pixels[y*W + x] = (y << 16) | x;
|
||||
return image;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#define TEST_LANDSCAPE(SIMD, FN, DST_W, DST_H, W, H) \
|
||||
{ \
|
||||
Image<DST_W, DST_H> dst { }, ref { }; \
|
||||
Slow:: FN(ref.pixels, ref.w/8, src.pixels, W, H); \
|
||||
log(#FN, " ref:\n", ref); \
|
||||
SIMD:: FN(dst.pixels, dst.w/8, src.pixels, W, H); \
|
||||
log(#FN, " got:\n", dst); \
|
||||
if (dst != ref) { \
|
||||
error("", #FN, " failed"); \
|
||||
throw 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define TEST_PORTRAIT(SIMD, FN, DST_W, DST_H, W, H) \
|
||||
{ \
|
||||
Image<DST_W, DST_H> dst { }, ref { }; \
|
||||
Slow:: FN(ref.pixels, ref.w/8, src.pixels, src.w/8, W, H); \
|
||||
log(#FN, " ref:\n", ref); \
|
||||
SIMD:: FN(dst.pixels, dst.w/8, src.pixels, src.w/8, W, H); \
|
||||
log(#FN, " got:\n", dst); \
|
||||
if (dst != ref) { \
|
||||
error("", #FN, " failed"); \
|
||||
throw 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
template <typename SIMD>
|
||||
static void test_simd_b2f()
|
||||
{
|
||||
static Image<48,32> const src = Image<48,32>::pattern();
|
||||
|
||||
log("source image:\n", src);
|
||||
|
||||
TEST_LANDSCAPE ( SIMD, B2f ::r0, 48, 32, 2, 4 );
|
||||
TEST_LANDSCAPE ( SIMD, B2f_flip ::r0, 48, 32, 2, 4 );
|
||||
TEST_PORTRAIT ( SIMD, B2f ::r90, 32, 48, 4, 2 );
|
||||
TEST_PORTRAIT ( SIMD, B2f_flip ::r90, 32, 48, 4, 2 );
|
||||
TEST_LANDSCAPE ( SIMD, B2f ::r180, 48, 32, 2, 4 );
|
||||
TEST_LANDSCAPE ( SIMD, B2f_flip::r180, 48, 32, 2, 4 );
|
||||
TEST_PORTRAIT ( SIMD, B2f ::r270, 32, 48, 4, 2 );
|
||||
TEST_PORTRAIT ( SIMD, B2f_flip::r270, 32, 48, 4, 2 );
|
||||
}
|
||||
|
||||
|
||||
/****************************************
|
||||
** Back-to-front argument dispatching **
|
||||
****************************************/
|
||||
|
||||
struct Recorded
|
||||
{
|
||||
struct Args
|
||||
{
|
||||
uint32_t *dst;
|
||||
unsigned dst_w;
|
||||
uint32_t const *src;
|
||||
unsigned src_w;
|
||||
unsigned w, h;
|
||||
|
||||
bool operator != (Args const &other) const
|
||||
{
|
||||
return dst != other.dst
|
||||
|| dst_w != other.dst_w
|
||||
|| src != other.src
|
||||
|| src_w != other.src_w
|
||||
|| w != other.w
|
||||
|| h != other.h;
|
||||
}
|
||||
|
||||
void print(Output &out) const
|
||||
{
|
||||
bool const valid = (*this != Args { });
|
||||
if (!valid) {
|
||||
Genode::print(out, "invalid");
|
||||
return;
|
||||
}
|
||||
|
||||
/* print src and dst pointer values in units of uint32_t words */
|
||||
Genode::print(out, "dst=", Hex(addr_t(dst)/4), " dst_w=", dst_w,
|
||||
" src=", Hex(addr_t(src)/4), " src_w=", src_w, " w=", w, " h=", h);
|
||||
}
|
||||
};
|
||||
|
||||
static Args recorded;
|
||||
|
||||
static void _record(uint32_t *dst, unsigned line_w,
|
||||
uint32_t const *src, unsigned w, unsigned h)
|
||||
{
|
||||
recorded = { dst, line_w, src, line_w, w, h };
|
||||
}
|
||||
|
||||
static void _record(uint32_t *dst, unsigned dst_w,
|
||||
uint32_t const *src, unsigned src_w, unsigned w, unsigned h)
|
||||
{
|
||||
recorded = { dst, dst_w, src, src_w, w, h };
|
||||
}
|
||||
|
||||
struct B2f
|
||||
{
|
||||
static inline void r0 (auto &&... args) { _record(args...); }
|
||||
static inline void r90 (auto &&... args) { _record(args...); }
|
||||
static inline void r180 (auto &&... args) { _record(args...); }
|
||||
static inline void r270 (auto &&... args) { _record(args...); }
|
||||
};
|
||||
|
||||
struct B2f_flip
|
||||
{
|
||||
static inline void r0 (auto &&... args) { _record(args...); }
|
||||
static inline void r90 (auto &&... args) { _record(args...); }
|
||||
static inline void r180(auto &&... args) { _record(args...); }
|
||||
static inline void r270(auto &&... args) { _record(args...); }
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
Recorded::Args Recorded::recorded { };
|
||||
|
||||
namespace Blit {
|
||||
|
||||
static inline const char *name(Rotate r)
|
||||
{
|
||||
switch (r) {
|
||||
case Rotate::R0: return "R0";
|
||||
case Rotate::R90: return "R90";
|
||||
case Rotate::R180: return "R180";
|
||||
case Rotate::R270: return "R270";
|
||||
}
|
||||
return "invalid";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_b2f_dispatch()
|
||||
{
|
||||
Texture<Pixel_rgb888> texture_landscape { nullptr, nullptr, { 640, 480 } };
|
||||
Texture<Pixel_rgb888> texture_portrait { nullptr, nullptr, { 480, 640 } };
|
||||
Surface<Pixel_rgb888> surface { nullptr, { 640, 480 } };
|
||||
|
||||
struct Expected : Recorded::Args { };
|
||||
|
||||
auto expected = [&] (addr_t dst, unsigned dst_w, addr_t src, unsigned src_w,
|
||||
unsigned w, unsigned h)
|
||||
{
|
||||
return Expected { (uint32_t *)(4*dst), dst_w,
|
||||
(uint32_t *)(4*src), src_w, w, h };
|
||||
};
|
||||
|
||||
using Rect = Blit::Rect;
|
||||
|
||||
auto test = [&] (Texture<Pixel_rgb888> const &texture,
|
||||
Rect rect, Rotate rotate, Flip flip,
|
||||
Expected const &expected)
|
||||
{
|
||||
Recorded::recorded = { };
|
||||
_b2f<Recorded>(surface, texture, rect, rotate, flip);
|
||||
log("b2f: ", rect, " ", name(rotate), flip.enabled ? " flip" : "",
|
||||
" -> ", Recorded::recorded);
|
||||
if (Recorded::recorded != expected) {
|
||||
error("test_b2f_dispatch failed, expected: ", expected);
|
||||
throw 1;
|
||||
}
|
||||
};
|
||||
|
||||
log("offset calculation of destination window");
|
||||
{
|
||||
unsigned const x = 32, y = 16, w = 64, h = 48;
|
||||
|
||||
addr_t const src_landscape_ptr = y*640 + x,
|
||||
src_portrait_ptr = y*480 + x;
|
||||
|
||||
Rect const rect { { x, y }, { w, h } };
|
||||
|
||||
test(texture_landscape, rect, Rotate::R0, Flip { },
|
||||
expected(y*640 + x, 80, src_landscape_ptr, 80, 8, 6));
|
||||
test(texture_landscape, rect, Rotate::R0, Flip { true },
|
||||
expected(y*640 + 640 - w - x, 80, src_landscape_ptr, 80, 8, 6));
|
||||
test(texture_portrait, rect, Rotate::R90, Flip { },
|
||||
expected(x*640 + 640 - h - y, 80, src_portrait_ptr, 60, 8, 6));
|
||||
test(texture_portrait, rect, Rotate::R90, Flip { true },
|
||||
expected(x*640 + y, 80, src_portrait_ptr, 60, 8, 6));
|
||||
test(texture_landscape, rect, Rotate::R180, Flip { },
|
||||
expected((480 - y - h)*640 + 640 - x - w, 80, src_landscape_ptr, 80, 8, 6));
|
||||
test(texture_landscape, rect, Rotate::R180, Flip { true },
|
||||
expected((480 - y - h)*640 + x, 80, src_landscape_ptr, 80, 8, 6));
|
||||
test(texture_portrait, rect, Rotate::R270, Flip { },
|
||||
expected((480 - x - w)*640 + y, 80, src_portrait_ptr, 60, 8, 6));
|
||||
test(texture_portrait, rect, Rotate::R270, Flip { true },
|
||||
expected((480 - x - w)*640 + 640 - y - h, 80, src_portrait_ptr, 60, 8, 6));
|
||||
}
|
||||
|
||||
log("check for compatibility of surface and texture");
|
||||
test(texture_portrait, { { }, { 16, 16 } }, Rotate::R0, Flip { },
|
||||
expected(0, 0, 0, 0, 0, 0));
|
||||
|
||||
log("clamp rect to texture size");
|
||||
test(texture_landscape, { { -99, -99 }, { 999, 999 } }, Rotate::R0, Flip { },
|
||||
expected(0, 80, 0, 80, 80, 60));
|
||||
|
||||
log("ignore out-of-bounds rect");
|
||||
test(texture_landscape, { { 1000, 0 }, { 16, 16 } }, Rotate::R0, Flip { },
|
||||
expected(0, 0, 0, 0, 0, 0));
|
||||
|
||||
/* snap to grid */
|
||||
log("snap rect argument to 8x8 grid");
|
||||
test(texture_landscape, { { 31, 63 }, { 2, 2 } }, Rotate::R0, Flip { },
|
||||
expected(56*640 + 24, 80, 56*640 + 24, 80, 2, 2));
|
||||
}
|
||||
|
||||
|
||||
void Component::construct(Genode::Env &)
|
||||
{
|
||||
#ifdef _INCLUDE__BLIT__INTERNAL__NEON_H_
|
||||
log("-- ARM Neon --");
|
||||
test_simd_b2f<Neon>();
|
||||
#endif
|
||||
#ifdef _INCLUDE__BLIT__INTERNAL__SSE4_H_
|
||||
log("-- SSE4 --");
|
||||
test_simd_b2f<Sse4>();
|
||||
#endif
|
||||
|
||||
test_b2f_dispatch();
|
||||
|
||||
log("--- blit test finished ---");
|
||||
}
|
3
repos/os/src/test/blit/target.mk
Normal file
3
repos/os/src/test/blit/target.mk
Normal file
@ -0,0 +1,3 @@
|
||||
TARGET = test-blit
|
||||
SRC_CC = main.cc
|
||||
LIBS = base
|
Loading…
x
Reference in New Issue
Block a user