openwrt/target/linux/bcm27xx/patches-6.6/950-0020-vc4-drm-vc4_plane-Keep-fractional-source-coords-insi.patch

From 1e18d70635d275e4c6a9ac63fa79a461ed50eac2 Mon Sep 17 00:00:00 2001
From: Dom Cobley <popcornmix@gmail.com>
Date: Mon, 14 Mar 2022 17:56:10 +0000
Subject: [PATCH 0020/1085] vc4/drm: vc4_plane: Keep fractional source coords
 inside state

Signed-off-by: Dom Cobley <popcornmix@gmail.com>
---
 drivers/gpu/drm/vc4/vc4_drv.h   |  2 +-
 drivers/gpu/drm/vc4/vc4_plane.c | 68 ++++++++++++++++-----------------
 2 files changed, 34 insertions(+), 36 deletions(-)

--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -408,7 +408,7 @@ struct vc4_plane_state {
 
 	/* Clipped coordinates of the plane on the display. */
 	int crtc_x, crtc_y, crtc_w, crtc_h;
-	/* Clipped area being scanned from in the FB. */
+	/* Clipped area being scanned from in the FB in u16.16 format */
 	u32 src_x, src_y;
 
 	u32 src_w[2], src_h[2];
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -251,9 +251,9 @@ static const struct hvs_format *vc4_get_
 
 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
 {
-	if (dst == src)
+	if (dst == src >> 16)
 		return VC4_SCALING_NONE;
-	if (3 * dst >= 2 * src)
+	if (3 * dst >= 2 * (src >> 16))
 		return VC4_SCALING_PPF;
 	else
 		return VC4_SCALING_TPZ;
@@ -462,15 +462,10 @@ static int vc4_plane_setup_clipping_and_
 		vc4_state->offsets[i] = bo->dma_addr + fb->offsets[i];
 	}
 
-	/*
-	 * We don't support subpixel source positioning for scaling,
-	 * but fractional coordinates can be generated by clipping
-	 * so just round for now
-	 */
-	vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1 << 16);
-	vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1 << 16);
-	vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1 << 16) - vc4_state->src_x;
-	vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1 << 16) - vc4_state->src_y;
+	vc4_state->src_x = state->src.x1;
+	vc4_state->src_y = state->src.y1;
+	vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
+	vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;
 
 	vc4_state->crtc_x = state->dst.x1;
 	vc4_state->crtc_y = state->dst.y1;
@@ -523,7 +518,7 @@ static void vc4_write_tpz(struct vc4_pla
 {
 	u32 scale, recip;
 
-	scale = (1 << 16) * src / dst;
+	scale = src / dst;
 
 	/* The specs note that while the reciprocal would be defined
 	 * as (1<<32)/scale, ~0 is close enough.
@@ -569,7 +564,7 @@ static u32 vc4_lbm_size(struct drm_plane
 	if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
 		pix_per_line = vc4_state->crtc_w;
 	else
-		pix_per_line = vc4_state->src_w[0];
+		pix_per_line = vc4_state->src_w[0] >> 16;
 
 	if (!vc4_state->is_yuv) {
 		if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
@@ -660,7 +655,8 @@ static void vc4_plane_calc_load(struct d
 	for (i = 0; i < fb->format->num_planes; i++) {
 		/* Even if the bandwidth/plane required for a single frame is
 		 *
-		 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
+		 * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
+		 *  cpp * vrefresh
 		 *
 		 * when downscaling, we have to read more pixels per line in
 		 * the time frame reserved for a single line, so the bandwidth
@@ -669,11 +665,11 @@ static void vc4_plane_calc_load(struct d
 		 * load by this number. We're likely over-estimating the read
 		 * demand, but that's better than under-estimating it.
 		 */
-		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
+		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
 					     vc4_state->crtc_h);
-		vc4_state->membus_load += vc4_state->src_w[i] *
-					  vc4_state->src_h[i] * vscale_factor *
-					  fb->format->cpp[i];
+		vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
+					  (vc4_state->src_h[i] >> 16) *
+					  vscale_factor * fb->format->cpp[i];
 		vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
 	}
 
@@ -826,7 +822,8 @@ static int vc4_plane_mode_set(struct drm
 	bool mix_plane_alpha;
 	bool covers_screen;
 	u32 scl0, scl1, pitch0;
-	u32 tiling, src_y;
+	u32 tiling, src_x, src_y;
+	u32 width, height;
 	u32 hvs_format = format->hvs;
 	unsigned int rotation;
 	int ret, i;
@@ -838,6 +835,9 @@ static int vc4_plane_mode_set(struct drm
 	if (ret)
 		return ret;
 
+	width = vc4_state->src_w[0] >> 16;
+	height = vc4_state->src_h[0] >> 16;
+
 	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
 	 * and 4:4:4, scl1 should be set to scl0 so both channels of
 	 * the scaler do the same thing.  For YUV, the Y plane needs
@@ -858,9 +858,11 @@ static int vc4_plane_mode_set(struct drm
 					 DRM_MODE_REFLECT_Y);
 
 	/* We must point to the last line when Y reflection is enabled. */
-	src_y = vc4_state->src_y;
+	src_y = vc4_state->src_y >> 16;
 	if (rotation & DRM_MODE_REFLECT_Y)
-		src_y += vc4_state->src_h[0] - 1;
+		src_y += height - 1;
+
+	src_x = vc4_state->src_x >> 16;
 
 	switch (base_format_mod) {
 	case DRM_FORMAT_MOD_LINEAR:
@@ -875,7 +877,7 @@ static int vc4_plane_mode_set(struct drm
 						 (i ? v_subsample : 1) *
 						 fb->pitches[i];
 
-			vc4_state->offsets[i] += vc4_state->src_x /
+			vc4_state->offsets[i] += src_x /
 						 (i ? h_subsample : 1) *
 						 fb->format->cpp[i];
 		}
@@ -898,7 +900,7 @@ static int vc4_plane_mode_set(struct drm
 		 *	pitch * tile_h == tile_size * tiles_per_row
 		 */
 		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
-		u32 tiles_l = vc4_state->src_x >> tile_w_shift;
+		u32 tiles_l = src_x >> tile_w_shift;
 		u32 tiles_r = tiles_w - tiles_l;
 		u32 tiles_t = src_y >> tile_h_shift;
 		/* Intra-tile offsets, which modify the base address (the
@@ -908,7 +910,7 @@ static int vc4_plane_mode_set(struct drm
 		u32 tile_y = (src_y >> 4) & 1;
 		u32 subtile_y = (src_y >> 2) & 3;
 		u32 utile_y = src_y & 3;
-		u32 x_off = vc4_state->src_x & tile_w_mask;
+		u32 x_off = src_x & tile_w_mask;
 		u32 y_off = src_y & tile_h_mask;
 
 		/* When Y reflection is requested we must set the
@@ -1004,7 +1006,7 @@ static int vc4_plane_mode_set(struct drm
 				 * of the 12-pixels in that 128-bit word is the
 				 * first pixel to be used
 				 */
-				u32 remaining_pixels = vc4_state->src_x % 96;
+				u32 remaining_pixels = src_x % 96;
 				u32 aligned = remaining_pixels / 12;
 				u32 last_bits = remaining_pixels % 12;
 
@@ -1026,12 +1028,12 @@ static int vc4_plane_mode_set(struct drm
 					return -EINVAL;
 				}
 				pix_per_tile = tile_w / fb->format->cpp[0];
-				x_off = (vc4_state->src_x % pix_per_tile) /
+				x_off = (src_x % pix_per_tile) /
 					(i ? h_subsample : 1) *
 					fb->format->cpp[i];
 			}
 
-			tile = vc4_state->src_x / pix_per_tile;
+			tile = src_x / pix_per_tile;
 
 			vc4_state->offsets[i] += param * tile_w * tile;
 			vc4_state->offsets[i] += src_y /
@@ -1092,10 +1094,8 @@ static int vc4_plane_mode_set(struct drm
 		vc4_dlist_write(vc4_state,
 				(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
 				vc4_hvs4_get_alpha_blend_mode(state) |
-				VC4_SET_FIELD(vc4_state->src_w[0],
-					      SCALER_POS2_WIDTH) |
-				VC4_SET_FIELD(vc4_state->src_h[0],
-					      SCALER_POS2_HEIGHT));
+				VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
+				VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
 
 		/* Position Word 3: Context.  Written by the HVS. */
 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
@@ -1148,10 +1148,8 @@ static int vc4_plane_mode_set(struct drm
 		/* Position Word 2: Source Image Size */
 		vc4_state->pos2_offset = vc4_state->dlist_count;
 		vc4_dlist_write(vc4_state,
-				VC4_SET_FIELD(vc4_state->src_w[0],
-					      SCALER5_POS2_WIDTH) |
-				VC4_SET_FIELD(vc4_state->src_h[0],
-					      SCALER5_POS2_HEIGHT));
+				VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
+				VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
 
 		/* Position Word 3: Context.  Written by the HVS. */
 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
bcm27xx: add 6.6 kernel patches The patches were generated from the RPi repo with the following command: git format-patch v6.6.34..rpi-6.1.y Some patches needed rebasing and, as usual, the applied and reverted, wireless drivers, Github workflows, READMEs and defconfigs patches were removed. Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com> 2024-05-10 11:19:19 +00:00			`From 1e18d70635d275e4c6a9ac63fa79a461ed50eac2 Mon Sep 17 00:00:00 2001`
			`From: Dom Cobley <popcornmix@gmail.com>`
			`Date: Mon, 14 Mar 2022 17:56:10 +0000`
			`Subject: [PATCH 0020/1085] vc4/drm: vc4_plane: Keep fractional source coords`
			`inside state`

			`Signed-off-by: Dom Cobley <popcornmix@gmail.com>`
			`---`
			`drivers/gpu/drm/vc4/vc4_drv.h \| 2 +-`
			`drivers/gpu/drm/vc4/vc4_plane.c \| 68 ++++++++++++++++-----------------`
			`2 files changed, 34 insertions(+), 36 deletions(-)`

			`--- a/drivers/gpu/drm/vc4/vc4_drv.h`
			`+++ b/drivers/gpu/drm/vc4/vc4_drv.h`
			`@@ -408,7 +408,7 @@ struct vc4_plane_state {`

			`/* Clipped coordinates of the plane on the display. */`
			`int crtc_x, crtc_y, crtc_w, crtc_h;`
			`- /* Clipped area being scanned from in the FB. */`
			`+ /* Clipped area being scanned from in the FB in u16.16 format */`
			`u32 src_x, src_y;`

			`u32 src_w[2], src_h[2];`
			`--- a/drivers/gpu/drm/vc4/vc4_plane.c`
			`+++ b/drivers/gpu/drm/vc4/vc4_plane.c`
			`@@ -251,9 +251,9 @@ static const struct hvs_format *vc4_get_`

			`static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)`
			`{`
			`- if (dst == src)`
			`+ if (dst == src >> 16)`
			`return VC4_SCALING_NONE;`
			`- if (3 * dst >= 2 * src)`
			`+ if (3 * dst >= 2 * (src >> 16))`
			`return VC4_SCALING_PPF;`
			`else`
			`return VC4_SCALING_TPZ;`
			`@@ -462,15 +462,10 @@ static int vc4_plane_setup_clipping_and_`
			`vc4_state->offsets[i] = bo->dma_addr + fb->offsets[i];`
			`}`

			`- /*`
			`- * We don't support subpixel source positioning for scaling,`
			`- * but fractional coordinates can be generated by clipping`
			`- * so just round for now`
			`- */`
			`- vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1 << 16);`
			`- vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1 << 16);`
			`- vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1 << 16) - vc4_state->src_x;`
			`- vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1 << 16) - vc4_state->src_y;`
			`+ vc4_state->src_x = state->src.x1;`
			`+ vc4_state->src_y = state->src.y1;`
			`+ vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;`
			`+ vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;`

			`vc4_state->crtc_x = state->dst.x1;`
			`vc4_state->crtc_y = state->dst.y1;`
			`@@ -523,7 +518,7 @@ static void vc4_write_tpz(struct vc4_pla`
			`{`
			`u32 scale, recip;`

			`- scale = (1 << 16) * src / dst;`
			`+ scale = src / dst;`

			`/* The specs note that while the reciprocal would be defined`
			`* as (1<<32)/scale, ~0 is close enough.`
			`@@ -569,7 +564,7 @@ static u32 vc4_lbm_size(struct drm_plane`
			`if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)`
			`pix_per_line = vc4_state->crtc_w;`
			`else`
			`- pix_per_line = vc4_state->src_w[0];`
			`+ pix_per_line = vc4_state->src_w[0] >> 16;`

			`if (!vc4_state->is_yuv) {`
			`if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)`
			`@@ -660,7 +655,8 @@ static void vc4_plane_calc_load(struct d`
			`for (i = 0; i < fb->format->num_planes; i++) {`
			`/* Even if the bandwidth/plane required for a single frame is`
			`*`
			`- * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh`
			`+ * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *`
			`+ * cpp * vrefresh`
			`*`
			`* when downscaling, we have to read more pixels per line in`
			`* the time frame reserved for a single line, so the bandwidth`
			`@@ -669,11 +665,11 @@ static void vc4_plane_calc_load(struct d`
			`* load by this number. We're likely over-estimating the read`
			`* demand, but that's better than under-estimating it.`
			`*/`
			`- vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],`
			`+ vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,`
			`vc4_state->crtc_h);`
			`- vc4_state->membus_load += vc4_state->src_w[i] *`
			`- vc4_state->src_h[i] * vscale_factor *`
			`- fb->format->cpp[i];`
			`+ vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *`
			`+ (vc4_state->src_h[i] >> 16) *`
			`+ vscale_factor * fb->format->cpp[i];`
			`vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;`
			`}`

			`@@ -826,7 +822,8 @@ static int vc4_plane_mode_set(struct drm`
			`bool mix_plane_alpha;`
			`bool covers_screen;`
			`u32 scl0, scl1, pitch0;`
			`- u32 tiling, src_y;`
			`+ u32 tiling, src_x, src_y;`
			`+ u32 width, height;`
			`u32 hvs_format = format->hvs;`
			`unsigned int rotation;`
			`int ret, i;`
			`@@ -838,6 +835,9 @@ static int vc4_plane_mode_set(struct drm`
			`if (ret)`
			`return ret;`

			`+ width = vc4_state->src_w[0] >> 16;`
			`+ height = vc4_state->src_h[0] >> 16;`
			`+`
			`/* SCL1 is used for Cb/Cr scaling of planar formats. For RGB`
			`* and 4:4:4, scl1 should be set to scl0 so both channels of`
			`* the scaler do the same thing. For YUV, the Y plane needs`
			`@@ -858,9 +858,11 @@ static int vc4_plane_mode_set(struct drm`
			`DRM_MODE_REFLECT_Y);`

			`/* We must point to the last line when Y reflection is enabled. */`
			`- src_y = vc4_state->src_y;`
			`+ src_y = vc4_state->src_y >> 16;`
			`if (rotation & DRM_MODE_REFLECT_Y)`
			`- src_y += vc4_state->src_h[0] - 1;`
			`+ src_y += height - 1;`
			`+`
			`+ src_x = vc4_state->src_x >> 16;`

			`switch (base_format_mod) {`
			`case DRM_FORMAT_MOD_LINEAR:`
			`@@ -875,7 +877,7 @@ static int vc4_plane_mode_set(struct drm`
			`(i ? v_subsample : 1) *`
			`fb->pitches[i];`

			`- vc4_state->offsets[i] += vc4_state->src_x /`
			`+ vc4_state->offsets[i] += src_x /`
			`(i ? h_subsample : 1) *`
			`fb->format->cpp[i];`
			`}`
			`@@ -898,7 +900,7 @@ static int vc4_plane_mode_set(struct drm`
			`* pitch * tile_h == tile_size * tiles_per_row`
			`*/`
			`u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);`
			`- u32 tiles_l = vc4_state->src_x >> tile_w_shift;`
			`+ u32 tiles_l = src_x >> tile_w_shift;`
			`u32 tiles_r = tiles_w - tiles_l;`
			`u32 tiles_t = src_y >> tile_h_shift;`
			`/* Intra-tile offsets, which modify the base address (the`
			`@@ -908,7 +910,7 @@ static int vc4_plane_mode_set(struct drm`
			`u32 tile_y = (src_y >> 4) & 1;`
			`u32 subtile_y = (src_y >> 2) & 3;`
			`u32 utile_y = src_y & 3;`
			`- u32 x_off = vc4_state->src_x & tile_w_mask;`
			`+ u32 x_off = src_x & tile_w_mask;`
			`u32 y_off = src_y & tile_h_mask;`

			`/* When Y reflection is requested we must set the`
			`@@ -1004,7 +1006,7 @@ static int vc4_plane_mode_set(struct drm`
			`* of the 12-pixels in that 128-bit word is the`
			`* first pixel to be used`
			`*/`
			`- u32 remaining_pixels = vc4_state->src_x % 96;`
			`+ u32 remaining_pixels = src_x % 96;`
			`u32 aligned = remaining_pixels / 12;`
			`u32 last_bits = remaining_pixels % 12;`

			`@@ -1026,12 +1028,12 @@ static int vc4_plane_mode_set(struct drm`
			`return -EINVAL;`
			`}`
			`pix_per_tile = tile_w / fb->format->cpp[0];`
			`- x_off = (vc4_state->src_x % pix_per_tile) /`
			`+ x_off = (src_x % pix_per_tile) /`
			`(i ? h_subsample : 1) *`
			`fb->format->cpp[i];`
			`}`

			`- tile = vc4_state->src_x / pix_per_tile;`
			`+ tile = src_x / pix_per_tile;`

			`vc4_state->offsets[i] += param * tile_w * tile;`
			`vc4_state->offsets[i] += src_y /`
			`@@ -1092,10 +1094,8 @@ static int vc4_plane_mode_set(struct drm`
			`vc4_dlist_write(vc4_state,`
			`(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) \|`
			`vc4_hvs4_get_alpha_blend_mode(state) \|`
			`- VC4_SET_FIELD(vc4_state->src_w[0],`
			`- SCALER_POS2_WIDTH) \|`
			`- VC4_SET_FIELD(vc4_state->src_h[0],`
			`- SCALER_POS2_HEIGHT));`
			`+ VC4_SET_FIELD(width, SCALER_POS2_WIDTH) \|`
			`+ VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));`

			`/* Position Word 3: Context. Written by the HVS. */`
			`vc4_dlist_write(vc4_state, 0xc0c0c0c0);`
			`@@ -1148,10 +1148,8 @@ static int vc4_plane_mode_set(struct drm`
			`/* Position Word 2: Source Image Size */`
			`vc4_state->pos2_offset = vc4_state->dlist_count;`
			`vc4_dlist_write(vc4_state,`
			`- VC4_SET_FIELD(vc4_state->src_w[0],`
			`- SCALER5_POS2_WIDTH) \|`
			`- VC4_SET_FIELD(vc4_state->src_h[0],`
			`- SCALER5_POS2_HEIGHT));`
			`+ VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) \|`
			`+ VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));`

			`/* Position Word 3: Context. Written by the HVS. */`
			`vc4_dlist_write(vc4_state, 0xc0c0c0c0);`