mirror of
https://github.com/openwrt/openwrt.git
synced 2025-01-10 15:03:07 +00:00
20ea6adbf1
Build system: x86_64 Build-tested: bcm2708, bcm2709, bcm2710, bcm2711 Run-tested: bcm2708/RPiB+, bcm2709/RPi3B, bcm2710/RPi3B, bcm2711/RPi4B Signed-off-by: Marty Jones <mj8263788@gmail.com> Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
335 lines
11 KiB
Diff
335 lines
11 KiB
Diff
From 4ab81f113bdf1ca8c3b0d53c777885aa33ed27f3 Mon Sep 17 00:00:00 2001
|
|
From: John Cox <jc@kynesim.co.uk>
|
|
Date: Thu, 29 Apr 2021 19:17:06 +0100
|
|
Subject: [PATCH] media: rpivid: Make slice ctrl dynamic
|
|
|
|
Allows the user to submit a whole frames worth of slice headers in
|
|
one lump along with a single bitstream dmabuf for the whole lot.
|
|
This saves potentially a lot of bitstream copying.
|
|
|
|
Signed-off-by: John Cox <jc@kynesim.co.uk>
|
|
---
|
|
drivers/staging/media/rpivid/rpivid.c | 4 +
|
|
drivers/staging/media/rpivid/rpivid_dec.c | 18 ++-
|
|
drivers/staging/media/rpivid/rpivid_h265.c | 151 +++++++++++----------
|
|
3 files changed, 99 insertions(+), 74 deletions(-)
|
|
|
|
--- a/drivers/staging/media/rpivid/rpivid.c
|
|
+++ b/drivers/staging/media/rpivid/rpivid.c
|
|
@@ -63,7 +63,11 @@ static const struct rpivid_control rpivi
|
|
},
|
|
{
|
|
.cfg = {
|
|
+ .name = "Slice param array",
|
|
.id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
|
|
+ .type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS,
|
|
+ .flags = V4L2_CTRL_FLAG_DYNAMIC_ARRAY,
|
|
+ .dims = { 0x1000 },
|
|
},
|
|
.required = true,
|
|
},
|
|
--- a/drivers/staging/media/rpivid/rpivid_dec.c
|
|
+++ b/drivers/staging/media/rpivid/rpivid_dec.c
|
|
@@ -46,22 +46,34 @@ void rpivid_device_run(void *priv)
|
|
|
|
switch (ctx->src_fmt.pixelformat) {
|
|
case V4L2_PIX_FMT_HEVC_SLICE:
|
|
+ {
|
|
+ const struct v4l2_ctrl *ctrl;
|
|
+
|
|
run.h265.sps =
|
|
rpivid_find_control_data(ctx,
|
|
V4L2_CID_MPEG_VIDEO_HEVC_SPS);
|
|
run.h265.pps =
|
|
rpivid_find_control_data(ctx,
|
|
V4L2_CID_MPEG_VIDEO_HEVC_PPS);
|
|
- run.h265.slice_params =
|
|
- rpivid_find_control_data(ctx,
|
|
- V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
|
|
run.h265.dec =
|
|
rpivid_find_control_data(ctx,
|
|
V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS);
|
|
+
|
|
+ ctrl = rpivid_find_ctrl(ctx,
|
|
+ V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
|
|
+ if (!ctrl || !ctrl->elems) {
|
|
+ v4l2_err(&dev->v4l2_dev, "%s: Missing slice params\n",
|
|
+ __func__);
|
|
+ goto fail;
|
|
+ }
|
|
+ run.h265.slice_ents = ctrl->elems;
|
|
+ run.h265.slice_params = ctrl->p_cur.p;
|
|
+
|
|
run.h265.scaling_matrix =
|
|
rpivid_find_control_data(ctx,
|
|
V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX);
|
|
break;
|
|
+ }
|
|
|
|
default:
|
|
break;
|
|
--- a/drivers/staging/media/rpivid/rpivid_h265.c
|
|
+++ b/drivers/staging/media/rpivid/rpivid_h265.c
|
|
@@ -245,7 +245,6 @@ struct rpivid_dec_state {
|
|
|
|
// Slice vars
|
|
unsigned int slice_idx;
|
|
- bool frame_end;
|
|
bool slice_temporal_mvp; /* Slice flag but constant for frame */
|
|
|
|
// Temp vars per run - don't actually need to persist
|
|
@@ -740,7 +739,8 @@ static void new_slice_segment(struct rpi
|
|
V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED))
|
|
<< 24));
|
|
|
|
- if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
|
|
+ if (!s->start_ts &&
|
|
+ (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
|
|
write_scaling_factors(de);
|
|
|
|
if (!s->dependent_slice_segment_flag) {
|
|
@@ -1111,7 +1111,8 @@ static int wpp_end_previous_slice(struct
|
|
* next chunk code simpler
|
|
*/
|
|
static int wpp_decode_slice(struct rpivid_dec_env *const de,
|
|
- const struct rpivid_dec_state *const s)
|
|
+ const struct rpivid_dec_state *const s,
|
|
+ bool last_slice)
|
|
{
|
|
bool reset_qp_y = true;
|
|
const bool indep = !s->dependent_slice_segment_flag;
|
|
@@ -1150,7 +1151,7 @@ static int wpp_decode_slice(struct rpivi
|
|
0, 0, s->start_ctb_x, s->start_ctb_y,
|
|
s->slice_qp, slice_reg_const(s));
|
|
|
|
- if (s->frame_end) {
|
|
+ if (last_slice) {
|
|
rv = wpp_entry_fill(de, s, s->ctb_height - 1);
|
|
if (rv)
|
|
return rv;
|
|
@@ -1229,7 +1230,8 @@ static int end_previous_slice(struct rpi
|
|
}
|
|
|
|
static int decode_slice(struct rpivid_dec_env *const de,
|
|
- const struct rpivid_dec_state *const s)
|
|
+ const struct rpivid_dec_state *const s,
|
|
+ bool last_slice)
|
|
{
|
|
bool reset_qp_y;
|
|
unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x);
|
|
@@ -1275,7 +1277,7 @@ static int decode_slice(struct rpivid_de
|
|
* now, otherwise this will be done at the start of the next slice
|
|
* when it will be known where this slice finishes
|
|
*/
|
|
- if (s->frame_end) {
|
|
+ if (last_slice) {
|
|
rv = tile_entry_fill(de, s,
|
|
s->tile_width - 1,
|
|
s->tile_height - 1);
|
|
@@ -1670,11 +1672,13 @@ static u32 mk_config2(const struct rpivi
|
|
static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run)
|
|
{
|
|
struct rpivid_dev *const dev = ctx->dev;
|
|
- const struct v4l2_ctrl_hevc_slice_params *const sh =
|
|
- run->h265.slice_params;
|
|
const struct v4l2_ctrl_hevc_decode_params *const dec =
|
|
run->h265.dec;
|
|
-// const struct v4l2_hevc_pred_weight_table *pred_weight_table;
|
|
+ /* sh0 used where slice header contents should be constant over all
|
|
+ * slices, or first slice of frame
|
|
+ */
|
|
+ const struct v4l2_ctrl_hevc_slice_params *const sh0 =
|
|
+ run->h265.slice_params;
|
|
struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
|
|
struct rpivid_dec_state *const s = ctx->state;
|
|
struct vb2_queue *vq;
|
|
@@ -1684,20 +1688,18 @@ static void rpivid_h265_setup(struct rpi
|
|
int use_aux;
|
|
int rv;
|
|
bool slice_temporal_mvp;
|
|
+ bool frame_end;
|
|
|
|
xtrace_in(dev, de);
|
|
+ s->sh = NULL; // Avoid use until in the slice loop
|
|
|
|
-// pred_weight_table = &sh->pred_weight_table;
|
|
-
|
|
- s->frame_end =
|
|
+ frame_end =
|
|
((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0);
|
|
|
|
- slice_temporal_mvp = (sh->flags &
|
|
+ slice_temporal_mvp = (sh0->flags &
|
|
V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED);
|
|
|
|
if (de && de->state != RPIVID_DECODE_END) {
|
|
- ++s->slice_idx;
|
|
-
|
|
switch (de->state) {
|
|
case RPIVID_DECODE_SLICE_CONTINUE:
|
|
// Expected state
|
|
@@ -1830,7 +1832,7 @@ static void rpivid_h265_setup(struct rpi
|
|
de->rpi_config2 = mk_config2(s);
|
|
de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) |
|
|
s->sps.pic_width_in_luma_samples;
|
|
- de->rpi_currpoc = sh->slice_pic_order_cnt;
|
|
+ de->rpi_currpoc = sh0->slice_pic_order_cnt;
|
|
|
|
if (s->sps.flags &
|
|
V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) {
|
|
@@ -1839,17 +1841,17 @@ static void rpivid_h265_setup(struct rpi
|
|
|
|
s->slice_idx = 0;
|
|
|
|
- if (sh->slice_segment_addr != 0) {
|
|
+ if (sh0->slice_segment_addr != 0) {
|
|
v4l2_warn(&dev->v4l2_dev,
|
|
"New frame but segment_addr=%d\n",
|
|
- sh->slice_segment_addr);
|
|
+ sh0->slice_segment_addr);
|
|
goto fail;
|
|
}
|
|
|
|
/* Allocate a bitbuf if we need one - don't need one if single
|
|
* slice as we can use the src buf directly
|
|
*/
|
|
- if (!s->frame_end && !de->bit_copy_gptr->ptr) {
|
|
+ if (!frame_end && !de->bit_copy_gptr->ptr) {
|
|
size_t bits_alloc;
|
|
bits_alloc = rpivid_bit_buf_size(s->sps.pic_width_in_luma_samples,
|
|
s->sps.pic_height_in_luma_samples,
|
|
@@ -1873,21 +1875,7 @@ static void rpivid_h265_setup(struct rpi
|
|
s->src_addr = 0;
|
|
s->src_buf = NULL;
|
|
|
|
- if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) {
|
|
- v4l2_warn(&dev->v4l2_dev,
|
|
- "Bit size %d > bytesused %d\n",
|
|
- sh->bit_size, run->src->planes[0].bytesused);
|
|
- goto fail;
|
|
- }
|
|
- if (sh->data_bit_offset >= sh->bit_size ||
|
|
- sh->bit_size - sh->data_bit_offset < 8) {
|
|
- v4l2_warn(&dev->v4l2_dev,
|
|
- "Bit size %d < Bit offset %d + 8\n",
|
|
- sh->bit_size, sh->data_bit_offset);
|
|
- goto fail;
|
|
- }
|
|
-
|
|
- if (s->frame_end)
|
|
+ if (frame_end)
|
|
s->src_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf,
|
|
0);
|
|
if (!s->src_addr)
|
|
@@ -1898,44 +1886,65 @@ static void rpivid_h265_setup(struct rpi
|
|
}
|
|
|
|
// Pre calc a few things
|
|
- s->sh = sh;
|
|
s->dec = dec;
|
|
- s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta;
|
|
- s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
|
|
+ for (i = 0; i != run->h265.slice_ents; ++i) {
|
|
+ const struct v4l2_ctrl_hevc_slice_params *const sh = sh0 + i;
|
|
+ const bool last_slice = frame_end && i + 1 == run->h265.slice_ents;
|
|
+
|
|
+ s->sh = sh;
|
|
+
|
|
+ if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) {
|
|
+ v4l2_warn(&dev->v4l2_dev,
|
|
+ "Bit size %d > bytesused %d\n",
|
|
+ sh->bit_size, run->src->planes[0].bytesused);
|
|
+ goto fail;
|
|
+ }
|
|
+ if (sh->data_bit_offset >= sh->bit_size ||
|
|
+ sh->bit_size - sh->data_bit_offset < 8) {
|
|
+ v4l2_warn(&dev->v4l2_dev,
|
|
+ "Bit size %d < Bit offset %d + 8\n",
|
|
+ sh->bit_size, sh->data_bit_offset);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ s->slice_qp = 26 + s->pps.init_qp_minus26 + sh->slice_qp_delta;
|
|
+ s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
|
|
+ 0 :
|
|
+ (5 - sh->five_minus_max_num_merge_cand);
|
|
+ s->dependent_slice_segment_flag =
|
|
+ ((sh->flags &
|
|
+ V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
|
|
+
|
|
+ s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
|
|
+ 0 :
|
|
+ sh->num_ref_idx_l0_active_minus1 + 1;
|
|
+ s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
|
|
0 :
|
|
- (5 - sh->five_minus_max_num_merge_cand);
|
|
- // * SH DSS flag invented by me - but clearly needed
|
|
- s->dependent_slice_segment_flag =
|
|
- ((sh->flags &
|
|
- V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
|
|
-
|
|
- s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
|
|
- 0 :
|
|
- sh->num_ref_idx_l0_active_minus1 + 1;
|
|
- s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
|
|
- 0 :
|
|
- sh->num_ref_idx_l1_active_minus1 + 1;
|
|
-
|
|
- if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
|
|
- populate_scaling_factors(run, de, s);
|
|
-
|
|
- // Calc all the random coord info to avoid repeated conversion in/out
|
|
- s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
|
|
- s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y;
|
|
- s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y;
|
|
- // Last CTB of previous slice
|
|
- prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1];
|
|
- s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y;
|
|
- s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y;
|
|
+ sh->num_ref_idx_l1_active_minus1 + 1;
|
|
|
|
- if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
|
|
- rv = wpp_decode_slice(de, s);
|
|
- else
|
|
- rv = decode_slice(de, s);
|
|
- if (rv)
|
|
- goto fail;
|
|
+ if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
|
|
+ populate_scaling_factors(run, de, s);
|
|
+
|
|
+ /* Calc all the random coord info to avoid repeated conversion in/out */
|
|
+ s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
|
|
+ s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y;
|
|
+ s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y;
|
|
+ /* Last CTB of previous slice */
|
|
+ prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1];
|
|
+ s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y;
|
|
+ s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y;
|
|
+
|
|
+ if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
|
|
+ rv = wpp_decode_slice(de, s, last_slice);
|
|
+ else
|
|
+ rv = decode_slice(de, s, last_slice);
|
|
+ if (rv)
|
|
+ goto fail;
|
|
+
|
|
+ ++s->slice_idx;
|
|
+ }
|
|
|
|
- if (!s->frame_end) {
|
|
+ if (!frame_end) {
|
|
xtrace_ok(dev, de);
|
|
return;
|
|
}
|
|
@@ -2054,8 +2063,8 @@ static void rpivid_h265_setup(struct rpi
|
|
fail:
|
|
if (de)
|
|
// Actual error reporting happens in Trigger
|
|
- de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE :
|
|
- RPIVID_DECODE_ERROR_CONTINUE;
|
|
+ de->state = frame_end ? RPIVID_DECODE_ERROR_DONE :
|
|
+ RPIVID_DECODE_ERROR_CONTINUE;
|
|
xtrace_fail(dev, de);
|
|
}
|
|
|