mirror of
https://github.com/openwrt/openwrt.git
synced 2025-01-19 11:16:32 +00:00
4f8b350be0
Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
803 lines
25 KiB
Diff
803 lines
25 KiB
Diff
From ba1e90b6c3b3bf0e88ab01c824c4f8fde582e878 Mon Sep 17 00:00:00 2001
|
|
From: Eric Anholt <eric@anholt.net>
|
|
Date: Wed, 28 Nov 2018 15:09:25 -0800
|
|
Subject: [PATCH 563/806] drm/v3d: Add support for submitting jobs to the TFU.
|
|
|
|
The TFU can copy from raster, UIF, and SAND input images to UIF output
|
|
images, with optional mipmap generation. This will certainly be
|
|
useful for media EGL image input, but is also useful immediately for
|
|
mipmap generation without bogging the V3D core down.
|
|
|
|
For now we only run the queue 1 job deep, and don't have any hang
|
|
recovery (though I don't think we should need it, with TFU). Queuing
|
|
multiple jobs in the HW will require synchronizing the YUV coefficient
|
|
regs updates since they don't get FIFOed with the job.
|
|
|
|
v2: Change the ioctl to IOW instead of IOWR, always set COEF0, explain
|
|
why TFU is AUTH, clarify the syncing docs, drop the unused TFU
|
|
interrupt regs (you're expected to use the hub's), don't take
|
|
&bo->base for NULL bos.
|
|
v3: Fix a little whitespace alignment (noticed by checkpatch), rebase
|
|
on drm_sched_job_cleanup() changes.
|
|
|
|
Signed-off-by: Eric Anholt <eric@anholt.net>
|
|
Reviewed-by: Dave Emett <david.emett@broadcom.com> (v2)
|
|
Link: https://patchwork.freedesktop.org/patch/264607/
|
|
(cherry picked from commit 1584f16ca96ef124aad79efa3303cff5f3530e2c)
|
|
---
|
|
drivers/gpu/drm/v3d/v3d_drv.c | 15 ++-
|
|
drivers/gpu/drm/v3d/v3d_drv.h | 32 +++++-
|
|
drivers/gpu/drm/v3d/v3d_gem.c | 178 ++++++++++++++++++++++++++++----
|
|
drivers/gpu/drm/v3d/v3d_irq.c | 12 ++-
|
|
drivers/gpu/drm/v3d/v3d_regs.h | 49 +++++++++
|
|
drivers/gpu/drm/v3d/v3d_sched.c | 148 ++++++++++++++++++++++----
|
|
drivers/gpu/drm/v3d/v3d_trace.h | 20 ++++
|
|
include/uapi/drm/v3d_drm.h | 25 +++++
|
|
8 files changed, 426 insertions(+), 53 deletions(-)
|
|
|
|
--- a/drivers/gpu/drm/v3d/v3d_drv.c
|
|
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
|
|
@@ -112,10 +112,15 @@ static int v3d_get_param_ioctl(struct dr
|
|
return 0;
|
|
}
|
|
|
|
- /* Any params that aren't just register reads would go here. */
|
|
|
|
- DRM_DEBUG("Unknown parameter %d\n", args->param);
|
|
- return -EINVAL;
|
|
+ switch (args->param) {
|
|
+ case DRM_V3D_PARAM_SUPPORTS_TFU:
|
|
+ args->value = 1;
|
|
+ return 0;
|
|
+ default:
|
|
+ DRM_DEBUG("Unknown parameter %d\n", args->param);
|
|
+ return -EINVAL;
|
|
+ }
|
|
}
|
|
|
|
static int
|
|
@@ -170,7 +175,8 @@ static const struct file_operations v3d_
|
|
/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
|
|
* protection between clients. Note that render nodes would be be
|
|
* able to submit CLs that could access BOs from clients authenticated
|
|
- * with the master node.
|
|
+ * with the master node. The TFU doesn't use the GMP, so it would
|
|
+ * need to stay DRM_AUTH until we do buffer size/offset validation.
|
|
*/
|
|
static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
|
|
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
|
|
@@ -179,6 +185,7 @@ static const struct drm_ioctl_desc v3d_d
|
|
DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW),
|
|
DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
|
|
DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
|
|
+ DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
|
|
};
|
|
|
|
static const struct vm_operations_struct v3d_vm_ops = {
|
|
--- a/drivers/gpu/drm/v3d/v3d_drv.h
|
|
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
|
|
@@ -7,19 +7,18 @@
|
|
#include <drm/drm_encoder.h>
|
|
#include <drm/drm_gem.h>
|
|
#include <drm/gpu_scheduler.h>
|
|
+#include "uapi/drm/v3d_drm.h"
|
|
|
|
#define GMP_GRANULARITY (128 * 1024)
|
|
|
|
-/* Enum for each of the V3D queues. We maintain various queue
|
|
- * tracking as an array because at some point we'll want to support
|
|
- * the TFU (texture formatting unit) as another queue.
|
|
- */
|
|
+/* Enum for each of the V3D queues. */
|
|
enum v3d_queue {
|
|
V3D_BIN,
|
|
V3D_RENDER,
|
|
+ V3D_TFU,
|
|
};
|
|
|
|
-#define V3D_MAX_QUEUES (V3D_RENDER + 1)
|
|
+#define V3D_MAX_QUEUES (V3D_TFU + 1)
|
|
|
|
struct v3d_queue_state {
|
|
struct drm_gpu_scheduler sched;
|
|
@@ -68,6 +67,7 @@ struct v3d_dev {
|
|
|
|
struct v3d_exec_info *bin_job;
|
|
struct v3d_exec_info *render_job;
|
|
+ struct v3d_tfu_job *tfu_job;
|
|
|
|
struct v3d_queue_state queue[V3D_MAX_QUEUES];
|
|
|
|
@@ -218,6 +218,25 @@ struct v3d_exec_info {
|
|
u32 qma, qms, qts;
|
|
};
|
|
|
|
+struct v3d_tfu_job {
|
|
+ struct drm_sched_job base;
|
|
+
|
|
+ struct drm_v3d_submit_tfu args;
|
|
+
|
|
+ /* An optional fence userspace can pass in for the job to depend on. */
|
|
+ struct dma_fence *in_fence;
|
|
+
|
|
+ /* v3d fence to be signaled by IRQ handler when the job is complete. */
|
|
+ struct dma_fence *done_fence;
|
|
+
|
|
+ struct v3d_dev *v3d;
|
|
+
|
|
+ struct kref refcount;
|
|
+
|
|
+ /* This is the array of BOs that were looked up at the start of exec. */
|
|
+ struct v3d_bo *bo[4];
|
|
+};
|
|
+
|
|
/**
|
|
* _wait_for - magic (register) wait macro
|
|
*
|
|
@@ -281,9 +300,12 @@ int v3d_gem_init(struct drm_device *dev)
|
|
void v3d_gem_destroy(struct drm_device *dev);
|
|
int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
|
|
struct drm_file *file_priv);
|
|
+int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
|
|
+ struct drm_file *file_priv);
|
|
int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
|
|
struct drm_file *file_priv);
|
|
void v3d_exec_put(struct v3d_exec_info *exec);
|
|
+void v3d_tfu_job_put(struct v3d_tfu_job *exec);
|
|
void v3d_reset(struct v3d_dev *v3d);
|
|
void v3d_invalidate_caches(struct v3d_dev *v3d);
|
|
void v3d_flush_caches(struct v3d_dev *v3d);
|
|
--- a/drivers/gpu/drm/v3d/v3d_gem.c
|
|
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
|
|
@@ -207,26 +207,27 @@ v3d_flush_caches(struct v3d_dev *v3d)
|
|
}
|
|
|
|
static void
|
|
-v3d_attach_object_fences(struct v3d_exec_info *exec)
|
|
+v3d_attach_object_fences(struct v3d_bo **bos, int bo_count,
|
|
+ struct dma_fence *fence)
|
|
{
|
|
- struct dma_fence *out_fence = exec->render_done_fence;
|
|
int i;
|
|
|
|
- for (i = 0; i < exec->bo_count; i++) {
|
|
+ for (i = 0; i < bo_count; i++) {
|
|
/* XXX: Use shared fences for read-only objects. */
|
|
- reservation_object_add_excl_fence(exec->bo[i]->resv, out_fence);
|
|
+ reservation_object_add_excl_fence(bos[i]->resv, fence);
|
|
}
|
|
}
|
|
|
|
static void
|
|
v3d_unlock_bo_reservations(struct drm_device *dev,
|
|
- struct v3d_exec_info *exec,
|
|
+ struct v3d_bo **bos,
|
|
+ int bo_count,
|
|
struct ww_acquire_ctx *acquire_ctx)
|
|
{
|
|
int i;
|
|
|
|
- for (i = 0; i < exec->bo_count; i++)
|
|
- ww_mutex_unlock(&exec->bo[i]->resv->lock);
|
|
+ for (i = 0; i < bo_count; i++)
|
|
+ ww_mutex_unlock(&bos[i]->resv->lock);
|
|
|
|
ww_acquire_fini(acquire_ctx);
|
|
}
|
|
@@ -240,7 +241,8 @@ v3d_unlock_bo_reservations(struct drm_de
|
|
*/
|
|
static int
|
|
v3d_lock_bo_reservations(struct drm_device *dev,
|
|
- struct v3d_exec_info *exec,
|
|
+ struct v3d_bo **bos,
|
|
+ int bo_count,
|
|
struct ww_acquire_ctx *acquire_ctx)
|
|
{
|
|
int contended_lock = -1;
|
|
@@ -250,7 +252,7 @@ v3d_lock_bo_reservations(struct drm_devi
|
|
|
|
retry:
|
|
if (contended_lock != -1) {
|
|
- struct v3d_bo *bo = exec->bo[contended_lock];
|
|
+ struct v3d_bo *bo = bos[contended_lock];
|
|
|
|
ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
|
|
acquire_ctx);
|
|
@@ -260,20 +262,20 @@ retry:
|
|
}
|
|
}
|
|
|
|
- for (i = 0; i < exec->bo_count; i++) {
|
|
+ for (i = 0; i < bo_count; i++) {
|
|
if (i == contended_lock)
|
|
continue;
|
|
|
|
- ret = ww_mutex_lock_interruptible(&exec->bo[i]->resv->lock,
|
|
+ ret = ww_mutex_lock_interruptible(&bos[i]->resv->lock,
|
|
acquire_ctx);
|
|
if (ret) {
|
|
int j;
|
|
|
|
for (j = 0; j < i; j++)
|
|
- ww_mutex_unlock(&exec->bo[j]->resv->lock);
|
|
+ ww_mutex_unlock(&bos[j]->resv->lock);
|
|
|
|
if (contended_lock != -1 && contended_lock >= i) {
|
|
- struct v3d_bo *bo = exec->bo[contended_lock];
|
|
+ struct v3d_bo *bo = bos[contended_lock];
|
|
|
|
ww_mutex_unlock(&bo->resv->lock);
|
|
}
|
|
@@ -293,10 +295,11 @@ retry:
|
|
/* Reserve space for our shared (read-only) fence references,
|
|
* before we commit the CL to the hardware.
|
|
*/
|
|
- for (i = 0; i < exec->bo_count; i++) {
|
|
- ret = reservation_object_reserve_shared(exec->bo[i]->resv);
|
|
+ for (i = 0; i < bo_count; i++) {
|
|
+ ret = reservation_object_reserve_shared(bos[i]->resv);
|
|
if (ret) {
|
|
- v3d_unlock_bo_reservations(dev, exec, acquire_ctx);
|
|
+ v3d_unlock_bo_reservations(dev, bos, bo_count,
|
|
+ acquire_ctx);
|
|
return ret;
|
|
}
|
|
}
|
|
@@ -419,6 +422,33 @@ void v3d_exec_put(struct v3d_exec_info *
|
|
kref_put(&exec->refcount, v3d_exec_cleanup);
|
|
}
|
|
|
|
+static void
|
|
+v3d_tfu_job_cleanup(struct kref *ref)
|
|
+{
|
|
+ struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job,
|
|
+ refcount);
|
|
+ struct v3d_dev *v3d = job->v3d;
|
|
+ unsigned int i;
|
|
+
|
|
+ dma_fence_put(job->in_fence);
|
|
+ dma_fence_put(job->done_fence);
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
|
|
+ if (job->bo[i])
|
|
+ drm_gem_object_put_unlocked(&job->bo[i]->base);
|
|
+ }
|
|
+
|
|
+ pm_runtime_mark_last_busy(v3d->dev);
|
|
+ pm_runtime_put_autosuspend(v3d->dev);
|
|
+
|
|
+ kfree(job);
|
|
+}
|
|
+
|
|
+void v3d_tfu_job_put(struct v3d_tfu_job *job)
|
|
+{
|
|
+ kref_put(&job->refcount, v3d_tfu_job_cleanup);
|
|
+}
|
|
+
|
|
int
|
|
v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
|
|
struct drm_file *file_priv)
|
|
@@ -536,7 +566,8 @@ v3d_submit_cl_ioctl(struct drm_device *d
|
|
if (ret)
|
|
goto fail;
|
|
|
|
- ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx);
|
|
+ ret = v3d_lock_bo_reservations(dev, exec->bo, exec->bo_count,
|
|
+ &acquire_ctx);
|
|
if (ret)
|
|
goto fail;
|
|
|
|
@@ -570,9 +601,10 @@ v3d_submit_cl_ioctl(struct drm_device *d
|
|
&v3d_priv->sched_entity[V3D_RENDER]);
|
|
mutex_unlock(&v3d->sched_lock);
|
|
|
|
- v3d_attach_object_fences(exec);
|
|
+ v3d_attach_object_fences(exec->bo, exec->bo_count,
|
|
+ exec->render_done_fence);
|
|
|
|
- v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
|
|
+ v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx);
|
|
|
|
/* Update the return sync object for the */
|
|
sync_out = drm_syncobj_find(file_priv, args->out_sync);
|
|
@@ -588,12 +620,118 @@ v3d_submit_cl_ioctl(struct drm_device *d
|
|
|
|
fail_unreserve:
|
|
mutex_unlock(&v3d->sched_lock);
|
|
- v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
|
|
+ v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx);
|
|
fail:
|
|
v3d_exec_put(exec);
|
|
|
|
return ret;
|
|
}
|
|
+
|
|
+/**
|
|
+ * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
|
|
+ * @dev: DRM device
|
|
+ * @data: ioctl argument
|
|
+ * @file_priv: DRM file for this fd
|
|
+ *
|
|
+ * Userspace provides the register setup for the TFU, which we don't
|
|
+ * need to validate since the TFU is behind the MMU.
|
|
+ */
|
|
+int
|
|
+v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
|
|
+ struct drm_file *file_priv)
|
|
+{
|
|
+ struct v3d_dev *v3d = to_v3d_dev(dev);
|
|
+ struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
|
|
+ struct drm_v3d_submit_tfu *args = data;
|
|
+ struct v3d_tfu_job *job;
|
|
+ struct ww_acquire_ctx acquire_ctx;
|
|
+ struct drm_syncobj *sync_out;
|
|
+ struct dma_fence *sched_done_fence;
|
|
+ int ret = 0;
|
|
+ int bo_count;
|
|
+
|
|
+ job = kcalloc(1, sizeof(*job), GFP_KERNEL);
|
|
+ if (!job)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ ret = pm_runtime_get_sync(v3d->dev);
|
|
+ if (ret < 0) {
|
|
+ kfree(job);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ kref_init(&job->refcount);
|
|
+
|
|
+ ret = drm_syncobj_find_fence(file_priv, args->in_sync,
|
|
+ 0, &job->in_fence);
|
|
+ if (ret == -EINVAL)
|
|
+ goto fail;
|
|
+
|
|
+ job->args = *args;
|
|
+ job->v3d = v3d;
|
|
+
|
|
+ spin_lock(&file_priv->table_lock);
|
|
+ for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) {
|
|
+ struct drm_gem_object *bo;
|
|
+
|
|
+ if (!args->bo_handles[bo_count])
|
|
+ break;
|
|
+
|
|
+ bo = idr_find(&file_priv->object_idr,
|
|
+ args->bo_handles[bo_count]);
|
|
+ if (!bo) {
|
|
+ DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
|
|
+ bo_count, args->bo_handles[bo_count]);
|
|
+ ret = -ENOENT;
|
|
+ spin_unlock(&file_priv->table_lock);
|
|
+ goto fail;
|
|
+ }
|
|
+ drm_gem_object_get(bo);
|
|
+ job->bo[bo_count] = to_v3d_bo(bo);
|
|
+ }
|
|
+ spin_unlock(&file_priv->table_lock);
|
|
+
|
|
+ ret = v3d_lock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx);
|
|
+ if (ret)
|
|
+ goto fail;
|
|
+
|
|
+ mutex_lock(&v3d->sched_lock);
|
|
+ ret = drm_sched_job_init(&job->base,
|
|
+ &v3d_priv->sched_entity[V3D_TFU],
|
|
+ v3d_priv);
|
|
+ if (ret)
|
|
+ goto fail_unreserve;
|
|
+
|
|
+ sched_done_fence = dma_fence_get(&job->base.s_fence->finished);
|
|
+
|
|
+ kref_get(&job->refcount); /* put by scheduler job completion */
|
|
+ drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]);
|
|
+ mutex_unlock(&v3d->sched_lock);
|
|
+
|
|
+ v3d_attach_object_fences(job->bo, bo_count, sched_done_fence);
|
|
+
|
|
+ v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx);
|
|
+
|
|
+ /* Update the return sync object */
|
|
+ sync_out = drm_syncobj_find(file_priv, args->out_sync);
|
|
+ if (sync_out) {
|
|
+ drm_syncobj_replace_fence(sync_out, sched_done_fence);
|
|
+ drm_syncobj_put(sync_out);
|
|
+ }
|
|
+ dma_fence_put(sched_done_fence);
|
|
+
|
|
+ v3d_tfu_job_put(job);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail_unreserve:
|
|
+ mutex_unlock(&v3d->sched_lock);
|
|
+ v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx);
|
|
+fail:
|
|
+ v3d_tfu_job_put(job);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
|
|
int
|
|
v3d_gem_init(struct drm_device *dev)
|
|
--- a/drivers/gpu/drm/v3d/v3d_irq.c
|
|
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
|
|
@@ -4,8 +4,8 @@
|
|
/**
|
|
* DOC: Interrupt management for the V3D engine
|
|
*
|
|
- * When we take a binning or rendering flush done interrupt, we need
|
|
- * to signal the fence for that job so that the scheduler can queue up
|
|
+ * When we take a bin, render, or TFU done interrupt, we need to
|
|
+ * signal the fence for that job so that the scheduler can queue up
|
|
* the next one and unblock any waiters.
|
|
*
|
|
* When we take the binner out of memory interrupt, we need to
|
|
@@ -23,7 +23,8 @@
|
|
|
|
#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \
|
|
V3D_HUB_INT_MMU_PTI | \
|
|
- V3D_HUB_INT_MMU_CAP))
|
|
+ V3D_HUB_INT_MMU_CAP | \
|
|
+ V3D_HUB_INT_TFUC))
|
|
|
|
static void
|
|
v3d_overflow_mem_work(struct work_struct *work)
|
|
@@ -117,6 +118,11 @@ v3d_hub_irq(int irq, void *arg)
|
|
/* Acknowledge the interrupts we're handling here. */
|
|
V3D_WRITE(V3D_HUB_INT_CLR, intsts);
|
|
|
|
+ if (intsts & V3D_HUB_INT_TFUC) {
|
|
+ dma_fence_signal(v3d->tfu_job->done_fence);
|
|
+ status = IRQ_HANDLED;
|
|
+ }
|
|
+
|
|
if (intsts & (V3D_HUB_INT_MMU_WRV |
|
|
V3D_HUB_INT_MMU_PTI |
|
|
V3D_HUB_INT_MMU_CAP)) {
|
|
--- a/drivers/gpu/drm/v3d/v3d_regs.h
|
|
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
|
|
@@ -86,6 +86,55 @@
|
|
# define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c
|
|
# define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0)
|
|
|
|
+#define V3D_TFU_CS 0x00400
|
|
+/* Stops current job, empties input fifo. */
|
|
+# define V3D_TFU_CS_TFURST BIT(31)
|
|
+# define V3D_TFU_CS_CVTCT_MASK V3D_MASK(23, 16)
|
|
+# define V3D_TFU_CS_CVTCT_SHIFT 16
|
|
+# define V3D_TFU_CS_NFREE_MASK V3D_MASK(13, 8)
|
|
+# define V3D_TFU_CS_NFREE_SHIFT 8
|
|
+# define V3D_TFU_CS_BUSY BIT(0)
|
|
+
|
|
+#define V3D_TFU_SU 0x00404
|
|
+/* Interrupt when FINTTHR input slots are free (0 = disabled) */
|
|
+# define V3D_TFU_SU_FINTTHR_MASK V3D_MASK(13, 8)
|
|
+# define V3D_TFU_SU_FINTTHR_SHIFT 8
|
|
+/* Skips resetting the CRC at the start of CRC generation. */
|
|
+# define V3D_TFU_SU_CRCCHAIN BIT(4)
|
|
+/* skips writes, computes CRC of the image. miplevels must be 0. */
|
|
+# define V3D_TFU_SU_CRC BIT(3)
|
|
+# define V3D_TFU_SU_THROTTLE_MASK V3D_MASK(1, 0)
|
|
+# define V3D_TFU_SU_THROTTLE_SHIFT 0
|
|
+
|
|
+#define V3D_TFU_ICFG 0x00408
|
|
+/* Interrupt when the conversion is complete. */
|
|
+# define V3D_TFU_ICFG_IOC BIT(0)
|
|
+
|
|
+/* Input Image Address */
|
|
+#define V3D_TFU_IIA 0x0040c
|
|
+/* Input Chroma Address */
|
|
+#define V3D_TFU_ICA 0x00410
|
|
+/* Input Image Stride */
|
|
+#define V3D_TFU_IIS 0x00414
|
|
+/* Input Image U-Plane Address */
|
|
+#define V3D_TFU_IUA 0x00418
|
|
+/* Output Image Address */
|
|
+#define V3D_TFU_IOA 0x0041c
|
|
+/* Image Output Size */
|
|
+#define V3D_TFU_IOS 0x00420
|
|
+/* TFU YUV Coefficient 0 */
|
|
+#define V3D_TFU_COEF0 0x00424
|
|
+/* Use these regs instead of the defaults. */
|
|
+# define V3D_TFU_COEF0_USECOEF BIT(31)
|
|
+/* TFU YUV Coefficient 1 */
|
|
+#define V3D_TFU_COEF1 0x00428
|
|
+/* TFU YUV Coefficient 2 */
|
|
+#define V3D_TFU_COEF2 0x0042c
|
|
+/* TFU YUV Coefficient 3 */
|
|
+#define V3D_TFU_COEF3 0x00430
|
|
+
|
|
+#define V3D_TFU_CRC 0x00434
|
|
+
|
|
/* Per-MMU registers. */
|
|
|
|
#define V3D_MMUC_CONTROL 0x01000
|
|
--- a/drivers/gpu/drm/v3d/v3d_sched.c
|
|
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
|
|
@@ -30,6 +30,12 @@ to_v3d_job(struct drm_sched_job *sched_j
|
|
return container_of(sched_job, struct v3d_job, base);
|
|
}
|
|
|
|
+static struct v3d_tfu_job *
|
|
+to_tfu_job(struct drm_sched_job *sched_job)
|
|
+{
|
|
+ return container_of(sched_job, struct v3d_tfu_job, base);
|
|
+}
|
|
+
|
|
static void
|
|
v3d_job_free(struct drm_sched_job *sched_job)
|
|
{
|
|
@@ -38,6 +44,14 @@ v3d_job_free(struct drm_sched_job *sched
|
|
v3d_exec_put(job->exec);
|
|
}
|
|
|
|
+static void
|
|
+v3d_tfu_job_free(struct drm_sched_job *sched_job)
|
|
+{
|
|
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
|
|
+
|
|
+ v3d_tfu_job_put(job);
|
|
+}
|
|
+
|
|
/**
|
|
* Returns the fences that the bin or render job depends on, one by one.
|
|
* v3d_job_run() won't be called until all of them have been signaled.
|
|
@@ -76,6 +90,27 @@ v3d_job_dependency(struct drm_sched_job
|
|
return fence;
|
|
}
|
|
|
|
+/**
|
|
+ * Returns the fences that the TFU job depends on, one by one.
|
|
+ * v3d_tfu_job_run() won't be called until all of them have been
|
|
+ * signaled.
|
|
+ */
|
|
+static struct dma_fence *
|
|
+v3d_tfu_job_dependency(struct drm_sched_job *sched_job,
|
|
+ struct drm_sched_entity *s_entity)
|
|
+{
|
|
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
|
|
+ struct dma_fence *fence;
|
|
+
|
|
+ fence = job->in_fence;
|
|
+ if (fence) {
|
|
+ job->in_fence = NULL;
|
|
+ return fence;
|
|
+ }
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
|
|
{
|
|
struct v3d_job *job = to_v3d_job(sched_job);
|
|
@@ -147,31 +182,47 @@ static struct dma_fence *v3d_job_run(str
|
|
return fence;
|
|
}
|
|
|
|
-static void
|
|
-v3d_job_timedout(struct drm_sched_job *sched_job)
|
|
+static struct dma_fence *
|
|
+v3d_tfu_job_run(struct drm_sched_job *sched_job)
|
|
{
|
|
- struct v3d_job *job = to_v3d_job(sched_job);
|
|
- struct v3d_exec_info *exec = job->exec;
|
|
- struct v3d_dev *v3d = exec->v3d;
|
|
- enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
|
|
- enum v3d_queue q;
|
|
- u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
|
|
- u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
|
|
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
|
|
+ struct v3d_dev *v3d = job->v3d;
|
|
+ struct drm_device *dev = &v3d->drm;
|
|
+ struct dma_fence *fence;
|
|
|
|
- /* If the current address or return address have changed, then
|
|
- * the GPU has probably made progress and we should delay the
|
|
- * reset. This could fail if the GPU got in an infinite loop
|
|
- * in the CL, but that is pretty unlikely outside of an i-g-t
|
|
- * testcase.
|
|
- */
|
|
- if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
|
|
- job->timedout_ctca = ctca;
|
|
- job->timedout_ctra = ctra;
|
|
+ fence = v3d_fence_create(v3d, V3D_TFU);
|
|
+ if (IS_ERR(fence))
|
|
+ return NULL;
|
|
|
|
- schedule_delayed_work(&job->base.work_tdr,
|
|
- job->base.sched->timeout);
|
|
- return;
|
|
+ v3d->tfu_job = job;
|
|
+ if (job->done_fence)
|
|
+ dma_fence_put(job->done_fence);
|
|
+ job->done_fence = dma_fence_get(fence);
|
|
+
|
|
+ trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
|
|
+
|
|
+ V3D_WRITE(V3D_TFU_IIA, job->args.iia);
|
|
+ V3D_WRITE(V3D_TFU_IIS, job->args.iis);
|
|
+ V3D_WRITE(V3D_TFU_ICA, job->args.ica);
|
|
+ V3D_WRITE(V3D_TFU_IUA, job->args.iua);
|
|
+ V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
|
|
+ V3D_WRITE(V3D_TFU_IOS, job->args.ios);
|
|
+ V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
|
|
+ if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
|
|
+ V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
|
|
+ V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
|
|
+ V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
|
|
}
|
|
+ /* ICFG kicks off the job. */
|
|
+ V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
|
|
+
|
|
+ return fence;
|
|
+}
|
|
+
|
|
+static void
|
|
+v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
|
|
+{
|
|
+ enum v3d_queue q;
|
|
|
|
mutex_lock(&v3d->reset_lock);
|
|
|
|
@@ -196,6 +247,41 @@ v3d_job_timedout(struct drm_sched_job *s
|
|
mutex_unlock(&v3d->reset_lock);
|
|
}
|
|
|
|
+static void
|
|
+v3d_job_timedout(struct drm_sched_job *sched_job)
|
|
+{
|
|
+ struct v3d_job *job = to_v3d_job(sched_job);
|
|
+ struct v3d_exec_info *exec = job->exec;
|
|
+ struct v3d_dev *v3d = exec->v3d;
|
|
+ enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
|
|
+ u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
|
|
+ u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
|
|
+
|
|
+ /* If the current address or return address have changed, then
|
|
+ * the GPU has probably made progress and we should delay the
|
|
+ * reset. This could fail if the GPU got in an infinite loop
|
|
+ * in the CL, but that is pretty unlikely outside of an i-g-t
|
|
+ * testcase.
|
|
+ */
|
|
+ if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
|
|
+ job->timedout_ctca = ctca;
|
|
+ job->timedout_ctra = ctra;
|
|
+ schedule_delayed_work(&job->base.work_tdr,
|
|
+ job->base.sched->timeout);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ v3d_gpu_reset_for_timeout(v3d, sched_job);
|
|
+}
|
|
+
|
|
+static void
|
|
+v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
|
|
+{
|
|
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
|
|
+
|
|
+ v3d_gpu_reset_for_timeout(job->v3d, sched_job);
|
|
+}
|
|
+
|
|
static const struct drm_sched_backend_ops v3d_sched_ops = {
|
|
.dependency = v3d_job_dependency,
|
|
.run_job = v3d_job_run,
|
|
@@ -203,6 +289,13 @@ static const struct drm_sched_backend_op
|
|
.free_job = v3d_job_free
|
|
};
|
|
|
|
+static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
|
|
+ .dependency = v3d_tfu_job_dependency,
|
|
+ .run_job = v3d_tfu_job_run,
|
|
+ .timedout_job = v3d_tfu_job_timedout,
|
|
+ .free_job = v3d_tfu_job_free
|
|
+};
|
|
+
|
|
int
|
|
v3d_sched_init(struct v3d_dev *v3d)
|
|
{
|
|
@@ -232,6 +325,19 @@ v3d_sched_init(struct v3d_dev *v3d)
|
|
drm_sched_fini(&v3d->queue[V3D_BIN].sched);
|
|
return ret;
|
|
}
|
|
+
|
|
+ ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
|
|
+ &v3d_tfu_sched_ops,
|
|
+ hw_jobs_limit, job_hang_limit,
|
|
+ msecs_to_jiffies(hang_limit_ms),
|
|
+ "v3d_tfu");
|
|
+ if (ret) {
|
|
+ dev_err(v3d->dev, "Failed to create TFU scheduler: %d.",
|
|
+ ret);
|
|
+ drm_sched_fini(&v3d->queue[V3D_RENDER].sched);
|
|
+ drm_sched_fini(&v3d->queue[V3D_BIN].sched);
|
|
+ return ret;
|
|
+ }
|
|
|
|
return 0;
|
|
}
|
|
--- a/drivers/gpu/drm/v3d/v3d_trace.h
|
|
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
|
|
@@ -42,6 +42,26 @@ TRACE_EVENT(v3d_submit_cl,
|
|
__entry->ctnqea)
|
|
);
|
|
|
|
+TRACE_EVENT(v3d_submit_tfu,
|
|
+ TP_PROTO(struct drm_device *dev,
|
|
+ uint64_t seqno),
|
|
+ TP_ARGS(dev, seqno),
|
|
+
|
|
+ TP_STRUCT__entry(
|
|
+ __field(u32, dev)
|
|
+ __field(u64, seqno)
|
|
+ ),
|
|
+
|
|
+ TP_fast_assign(
|
|
+ __entry->dev = dev->primary->index;
|
|
+ __entry->seqno = seqno;
|
|
+ ),
|
|
+
|
|
+ TP_printk("dev=%u, seqno=%llu",
|
|
+ __entry->dev,
|
|
+ __entry->seqno)
|
|
+);
|
|
+
|
|
TRACE_EVENT(v3d_reset_begin,
|
|
TP_PROTO(struct drm_device *dev),
|
|
TP_ARGS(dev),
|
|
--- a/include/uapi/drm/v3d_drm.h
|
|
+++ b/include/uapi/drm/v3d_drm.h
|
|
@@ -36,6 +36,7 @@ extern "C" {
|
|
#define DRM_V3D_MMAP_BO 0x03
|
|
#define DRM_V3D_GET_PARAM 0x04
|
|
#define DRM_V3D_GET_BO_OFFSET 0x05
|
|
+#define DRM_V3D_SUBMIT_TFU 0x06
|
|
|
|
#define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
|
|
#define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
|
|
@@ -43,6 +44,7 @@ extern "C" {
|
|
#define DRM_IOCTL_V3D_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
|
|
#define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
|
|
#define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
|
|
+#define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
|
|
|
|
/**
|
|
* struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
|
|
@@ -169,6 +171,7 @@ enum drm_v3d_param {
|
|
DRM_V3D_PARAM_V3D_CORE0_IDENT0,
|
|
DRM_V3D_PARAM_V3D_CORE0_IDENT1,
|
|
DRM_V3D_PARAM_V3D_CORE0_IDENT2,
|
|
+ DRM_V3D_PARAM_SUPPORTS_TFU,
|
|
};
|
|
|
|
struct drm_v3d_get_param {
|
|
@@ -187,6 +190,28 @@ struct drm_v3d_get_bo_offset {
|
|
__u32 offset;
|
|
};
|
|
|
|
+struct drm_v3d_submit_tfu {
|
|
+ __u32 icfg;
|
|
+ __u32 iia;
|
|
+ __u32 iis;
|
|
+ __u32 ica;
|
|
+ __u32 iua;
|
|
+ __u32 ioa;
|
|
+ __u32 ios;
|
|
+ __u32 coef[4];
|
|
+ /* First handle is the output BO, following are other inputs.
|
|
+ * 0 for unused.
|
|
+ */
|
|
+ __u32 bo_handles[4];
|
|
+ /* sync object to block on before running the TFU job. Each TFU
|
|
+ * job will execute in the order submitted to its FD. Synchronization
|
|
+ * against rendering jobs requires using sync objects.
|
|
+ */
|
|
+ __u32 in_sync;
|
|
+ /* Sync object to signal when the TFU job is done. */
|
|
+ __u32 out_sync;
|
|
+};
|
|
+
|
|
#if defined(__cplusplus)
|
|
}
|
|
#endif
|