mirror of
https://github.com/openwrt/openwrt.git
synced 2024-12-23 15:32:33 +00:00
494 lines
15 KiB
Diff
494 lines
15 KiB
Diff
|
From f0b0156b38d07a45e1b5309181efc645e6fe8393 Mon Sep 17 00:00:00 2001
|
||
|
From: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
|
||
|
Date: Tue, 7 Feb 2023 13:54:02 +0100
|
||
|
Subject: [PATCH 0464/1085] drm/v3d: New debugfs end-points to query GPU usage
|
||
|
stats.
|
||
|
|
||
|
Two new debugfs interfaces are implemented:
|
||
|
|
||
|
- gpu_usage: exposes the total runtime since boot of each
|
||
|
of the 5 scheduling queues available at V3D (BIN, RENDER,
|
||
|
CSD, TFU, CACHE_CLEAN). So if the interface is queried at
|
||
|
two different points of time the usage percentage of each
|
||
|
of the queues can be calculated.
|
||
|
|
||
|
- gpu_pid_usage: exposes the same information but to the
|
||
|
level of detail of each process using the V3D driver. The
|
||
|
runtime for process using the driver is stored. So the
|
||
|
percentages of usage by PID can be calculated with
|
||
|
measures at different timestamps.
|
||
|
|
||
|
The storage of gpu_pid_usage stats is only done if
|
||
|
the debugfs interface is polled during the last 70 seconds.
|
||
|
If a process does not submit a GPU job during last 70
|
||
|
seconds its stats will also be purged.
|
||
|
|
||
|
Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
|
||
|
---
|
||
|
drivers/gpu/drm/v3d/v3d_debugfs.c | 79 +++++++++++++++++
|
||
|
drivers/gpu/drm/v3d/v3d_drv.h | 59 +++++++++++++
|
||
|
drivers/gpu/drm/v3d/v3d_gem.c | 1 +
|
||
|
drivers/gpu/drm/v3d/v3d_irq.c | 5 ++
|
||
|
drivers/gpu/drm/v3d/v3d_sched.c | 139 +++++++++++++++++++++++++++++-
|
||
|
5 files changed, 282 insertions(+), 1 deletion(-)
|
||
|
|
||
|
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
|
||
|
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
|
||
|
@@ -6,6 +6,7 @@
|
||
|
#include <linux/debugfs.h>
|
||
|
#include <linux/seq_file.h>
|
||
|
#include <linux/string_helpers.h>
|
||
|
+#include <linux/sched/clock.h>
|
||
|
|
||
|
#include <drm/drm_debugfs.h>
|
||
|
|
||
|
@@ -202,6 +203,82 @@ static int v3d_debugfs_bo_stats(struct s
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+static int v3d_debugfs_gpu_usage(struct seq_file *m, void *unused)
|
||
|
+{
|
||
|
+ struct drm_debugfs_entry *entry = m->private;
|
||
|
+ struct drm_device *dev = entry->dev;
|
||
|
+ struct v3d_dev *v3d = to_v3d_dev(dev);
|
||
|
+ struct v3d_queue_stats *queue_stats;
|
||
|
+ enum v3d_queue queue;
|
||
|
+ u64 timestamp = local_clock();
|
||
|
+ u64 active_runtime;
|
||
|
+
|
||
|
+ seq_printf(m, "timestamp;%llu;\n", local_clock());
|
||
|
+ seq_printf(m, "\"QUEUE\";\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
|
||
|
+ for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
|
||
|
+ if (!v3d->queue[queue].sched.ready)
|
||
|
+ continue;
|
||
|
+
|
||
|
+ queue_stats = &v3d->gpu_queue_stats[queue];
|
||
|
+ mutex_lock(&queue_stats->lock);
|
||
|
+ v3d_sched_stats_update(queue_stats);
|
||
|
+ if (queue_stats->last_pid)
|
||
|
+ active_runtime = timestamp - queue_stats->last_exec_start;
|
||
|
+ else
|
||
|
+ active_runtime = 0;
|
||
|
+
|
||
|
+ seq_printf(m, "%s;%d;%llu;%c;\n",
|
||
|
+ v3d_queue_to_string(queue),
|
||
|
+ queue_stats->jobs_sent,
|
||
|
+ queue_stats->runtime + active_runtime,
|
||
|
+ queue_stats->last_pid?'1':'0');
|
||
|
+ mutex_unlock(&queue_stats->lock);
|
||
|
+ }
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int v3d_debugfs_gpu_pid_usage(struct seq_file *m, void *unused)
|
||
|
+{
|
||
|
+ struct drm_debugfs_entry *entry = m->private;
|
||
|
+ struct drm_device *dev = entry->dev;
|
||
|
+ struct v3d_dev *v3d = to_v3d_dev(dev);
|
||
|
+ struct v3d_queue_stats *queue_stats;
|
||
|
+ struct v3d_queue_pid_stats *cur;
|
||
|
+ enum v3d_queue queue;
|
||
|
+ u64 active_runtime;
|
||
|
+ u64 timestamp = local_clock();
|
||
|
+
|
||
|
+ seq_printf(m, "timestamp;%llu;\n", timestamp);
|
||
|
+ seq_printf(m, "\"QUEUE\";\"PID\",\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
|
||
|
+ for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
|
||
|
+
|
||
|
+ if (!v3d->queue[queue].sched.ready)
|
||
|
+ continue;
|
||
|
+
|
||
|
+ queue_stats = &v3d->gpu_queue_stats[queue];
|
||
|
+ mutex_lock(&queue_stats->lock);
|
||
|
+ queue_stats->gpu_pid_stats_timeout = jiffies + V3D_QUEUE_STATS_TIMEOUT;
|
||
|
+ v3d_sched_stats_update(queue_stats);
|
||
|
+ list_for_each_entry(cur, &queue_stats->pid_stats_list, list) {
|
||
|
+
|
||
|
+ if (cur->pid == queue_stats->last_pid)
|
||
|
+ active_runtime = timestamp - queue_stats->last_exec_start;
|
||
|
+ else
|
||
|
+ active_runtime = 0;
|
||
|
+
|
||
|
+ seq_printf(m, "%s;%d;%d;%llu;%c;\n",
|
||
|
+ v3d_queue_to_string(queue),
|
||
|
+ cur->pid, cur->jobs_sent,
|
||
|
+ cur->runtime + active_runtime,
|
||
|
+ cur->pid == queue_stats->last_pid ? '1' : '0');
|
||
|
+ }
|
||
|
+ mutex_unlock(&queue_stats->lock);
|
||
|
+ }
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
static int v3d_measure_clock(struct seq_file *m, void *unused)
|
||
|
{
|
||
|
struct drm_debugfs_entry *entry = m->private;
|
||
|
@@ -241,6 +318,8 @@ static const struct drm_debugfs_info v3d
|
||
|
{"v3d_regs", v3d_v3d_debugfs_regs, 0},
|
||
|
{"measure_clock", v3d_measure_clock, 0},
|
||
|
{"bo_stats", v3d_debugfs_bo_stats, 0},
|
||
|
+ {"gpu_usage", v3d_debugfs_gpu_usage, 0},
|
||
|
+ {"gpu_pid_usage", v3d_debugfs_gpu_pid_usage, 0},
|
||
|
};
|
||
|
|
||
|
void
|
||
|
--- a/drivers/gpu/drm/v3d/v3d_drv.h
|
||
|
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
|
||
|
@@ -21,6 +21,19 @@ struct reset_control;
|
||
|
|
||
|
#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
|
||
|
|
||
|
+static inline char *
|
||
|
+v3d_queue_to_string(enum v3d_queue queue)
|
||
|
+{
|
||
|
+ switch (queue) {
|
||
|
+ case V3D_BIN: return "v3d_bin";
|
||
|
+ case V3D_RENDER: return "v3d_render";
|
||
|
+ case V3D_TFU: return "v3d_tfu";
|
||
|
+ case V3D_CSD: return "v3d_csd";
|
||
|
+ case V3D_CACHE_CLEAN: return "v3d_cache_clean";
|
||
|
+ }
|
||
|
+ return "UNKNOWN";
|
||
|
+}
|
||
|
+
|
||
|
struct v3d_queue_state {
|
||
|
struct drm_gpu_scheduler sched;
|
||
|
|
||
|
@@ -28,6 +41,44 @@ struct v3d_queue_state {
|
||
|
u64 emit_seqno;
|
||
|
};
|
||
|
|
||
|
+struct v3d_queue_pid_stats {
|
||
|
+ struct list_head list;
|
||
|
+ u64 runtime;
|
||
|
+ /* Time in jiffes.to purge the stats of this process. Every time a
|
||
|
+ * process sends a new job to the queue, this timeout is delayed by
|
||
|
+ * V3D_QUEUE_STATS_TIMEOUT while the gpu_pid_stats_timeout of the
|
||
|
+ * queue is not reached.
|
||
|
+ */
|
||
|
+ unsigned long timeout_purge;
|
||
|
+ u32 jobs_sent;
|
||
|
+ pid_t pid;
|
||
|
+};
|
||
|
+
|
||
|
+struct v3d_queue_stats {
|
||
|
+ struct mutex lock;
|
||
|
+ u64 last_exec_start;
|
||
|
+ u64 last_exec_end;
|
||
|
+ u64 runtime;
|
||
|
+ u32 jobs_sent;
|
||
|
+ /* Time in jiffes to stop collecting gpu stats by process. This is
|
||
|
+ * increased by every access to*the debugfs interface gpu_pid_usage.
|
||
|
+ * If the debugfs is not used stats are not collected.
|
||
|
+ */
|
||
|
+ unsigned long gpu_pid_stats_timeout;
|
||
|
+ pid_t last_pid;
|
||
|
+ struct list_head pid_stats_list;
|
||
|
+};
|
||
|
+
|
||
|
+/* pid_stats by process (v3d_queue_pid_stats) are recorded if there is an
|
||
|
+ * access to the gpu_pid_usageare debugfs interface for the last
|
||
|
+ * V3D_QUEUE_STATS_TIMEOUT (70s).
|
||
|
+ *
|
||
|
+ * The same timeout is used to purge the stats by process for those process
|
||
|
+ * that have not sent jobs this period.
|
||
|
+ */
|
||
|
+#define V3D_QUEUE_STATS_TIMEOUT (70 * HZ)
|
||
|
+
|
||
|
+
|
||
|
/* Performance monitor object. The perform lifetime is controlled by userspace
|
||
|
* using perfmon related ioctls. A perfmon can be attached to a submit_cl
|
||
|
* request, and when this is the case, HW perf counters will be activated just
|
||
|
@@ -147,6 +198,8 @@ struct v3d_dev {
|
||
|
u32 num_allocated;
|
||
|
u32 pages_allocated;
|
||
|
} bo_stats;
|
||
|
+
|
||
|
+ struct v3d_queue_stats gpu_queue_stats[V3D_MAX_QUEUES];
|
||
|
};
|
||
|
|
||
|
static inline struct v3d_dev *
|
||
|
@@ -244,6 +297,11 @@ struct v3d_job {
|
||
|
*/
|
||
|
struct v3d_perfmon *perfmon;
|
||
|
|
||
|
+ /* PID of the process that submitted the job that could be used to
|
||
|
+ * for collecting stats by process of gpu usage.
|
||
|
+ */
|
||
|
+ pid_t client_pid;
|
||
|
+
|
||
|
/* Callback for the freeing of the job on refcount going to 0. */
|
||
|
void (*free)(struct kref *ref);
|
||
|
};
|
||
|
@@ -408,6 +466,7 @@ void v3d_mmu_remove_ptes(struct v3d_bo *
|
||
|
/* v3d_sched.c */
|
||
|
int v3d_sched_init(struct v3d_dev *v3d);
|
||
|
void v3d_sched_fini(struct v3d_dev *v3d);
|
||
|
+void v3d_sched_stats_update(struct v3d_queue_stats *queue_stats);
|
||
|
|
||
|
/* v3d_perfmon.c */
|
||
|
void v3d_perfmon_get(struct v3d_perfmon *perfmon);
|
||
|
--- a/drivers/gpu/drm/v3d/v3d_gem.c
|
||
|
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
|
||
|
@@ -460,6 +460,7 @@ v3d_job_init(struct v3d_dev *v3d, struct
|
||
|
job = *container;
|
||
|
job->v3d = v3d;
|
||
|
job->free = free;
|
||
|
+ job->client_pid = current->pid;
|
||
|
|
||
|
ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
|
||
|
v3d_priv);
|
||
|
--- a/drivers/gpu/drm/v3d/v3d_irq.c
|
||
|
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
|
||
|
@@ -14,6 +14,7 @@
|
||
|
*/
|
||
|
|
||
|
#include <linux/platform_device.h>
|
||
|
+#include <linux/sched/clock.h>
|
||
|
|
||
|
#include "v3d_drv.h"
|
||
|
#include "v3d_regs.h"
|
||
|
@@ -100,6 +101,7 @@ v3d_irq(int irq, void *arg)
|
||
|
if (intsts & V3D_INT_FLDONE) {
|
||
|
struct v3d_fence *fence =
|
||
|
to_v3d_fence(v3d->bin_job->base.irq_fence);
|
||
|
+ v3d->gpu_queue_stats[V3D_BIN].last_exec_end = local_clock();
|
||
|
|
||
|
trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
|
||
|
dma_fence_signal(&fence->base);
|
||
|
@@ -109,6 +111,7 @@ v3d_irq(int irq, void *arg)
|
||
|
if (intsts & V3D_INT_FRDONE) {
|
||
|
struct v3d_fence *fence =
|
||
|
to_v3d_fence(v3d->render_job->base.irq_fence);
|
||
|
+ v3d->gpu_queue_stats[V3D_RENDER].last_exec_end = local_clock();
|
||
|
|
||
|
trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
|
||
|
dma_fence_signal(&fence->base);
|
||
|
@@ -118,6 +121,7 @@ v3d_irq(int irq, void *arg)
|
||
|
if (intsts & V3D_INT_CSDDONE) {
|
||
|
struct v3d_fence *fence =
|
||
|
to_v3d_fence(v3d->csd_job->base.irq_fence);
|
||
|
+ v3d->gpu_queue_stats[V3D_CSD].last_exec_end = local_clock();
|
||
|
|
||
|
trace_v3d_csd_irq(&v3d->drm, fence->seqno);
|
||
|
dma_fence_signal(&fence->base);
|
||
|
@@ -154,6 +158,7 @@ v3d_hub_irq(int irq, void *arg)
|
||
|
if (intsts & V3D_HUB_INT_TFUC) {
|
||
|
struct v3d_fence *fence =
|
||
|
to_v3d_fence(v3d->tfu_job->base.irq_fence);
|
||
|
+ v3d->gpu_queue_stats[V3D_TFU].last_exec_end = local_clock();
|
||
|
|
||
|
trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
|
||
|
dma_fence_signal(&fence->base);
|
||
|
--- a/drivers/gpu/drm/v3d/v3d_sched.c
|
||
|
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
|
||
|
@@ -19,6 +19,7 @@
|
||
|
*/
|
||
|
|
||
|
#include <linux/kthread.h>
|
||
|
+#include <linux/sched/clock.h>
|
||
|
|
||
|
#include "v3d_drv.h"
|
||
|
#include "v3d_regs.h"
|
||
|
@@ -72,6 +73,114 @@ v3d_switch_perfmon(struct v3d_dev *v3d,
|
||
|
v3d_perfmon_start(v3d, job->perfmon);
|
||
|
}
|
||
|
|
||
|
+/*
|
||
|
+ * Updates the scheduling stats of the gpu queues runtime for completed jobs.
|
||
|
+ *
|
||
|
+ * It should be called before any new job submission to the queue or before
|
||
|
+ * accessing the stats from the debugfs interface.
|
||
|
+ *
|
||
|
+ * It is expected that calls to this function are done with queue_stats->lock
|
||
|
+ * locked.
|
||
|
+ */
|
||
|
+void
|
||
|
+v3d_sched_stats_update(struct v3d_queue_stats *queue_stats)
|
||
|
+{
|
||
|
+ struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
|
||
|
+ struct v3d_queue_pid_stats *cur, *tmp;
|
||
|
+ u64 runtime = 0;
|
||
|
+ bool store_pid_stats =
|
||
|
+ time_is_after_jiffies(queue_stats->gpu_pid_stats_timeout);
|
||
|
+
|
||
|
+ /* If debugfs stats gpu_pid_usage has not been polled for a period,
|
||
|
+ * the pid stats collection is stopped and we purge any existing
|
||
|
+ * pid_stats.
|
||
|
+ *
|
||
|
+ * pid_stats are also purged for clients that have reached the
|
||
|
+ * timeout_purge because the process probably does not exist anymore.
|
||
|
+ */
|
||
|
+ list_for_each_entry_safe_reverse(cur, tmp, pid_stats_list, list) {
|
||
|
+ if (!store_pid_stats || time_is_before_jiffies(cur->timeout_purge)) {
|
||
|
+ list_del(&cur->list);
|
||
|
+ kfree(cur);
|
||
|
+ } else {
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ /* If a job has finished its stats are updated. */
|
||
|
+ if (queue_stats->last_pid && queue_stats->last_exec_end) {
|
||
|
+ runtime = queue_stats->last_exec_end -
|
||
|
+ queue_stats->last_exec_start;
|
||
|
+ queue_stats->runtime += runtime;
|
||
|
+
|
||
|
+ if (store_pid_stats) {
|
||
|
+ struct v3d_queue_pid_stats *pid_stats;
|
||
|
+ /* Last job info is always at the head of the list */
|
||
|
+ pid_stats = list_first_entry_or_null(pid_stats_list,
|
||
|
+ struct v3d_queue_pid_stats, list);
|
||
|
+ if (pid_stats &&
|
||
|
+ pid_stats->pid == queue_stats->last_pid) {
|
||
|
+ pid_stats->runtime += runtime;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ queue_stats->last_pid = 0;
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+/*
|
||
|
+ * Updates the queue usage adding the information of a new job that is
|
||
|
+ * about to be sent to the GPU to be executed.
|
||
|
+ */
|
||
|
+int
|
||
|
+v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
|
||
|
+ struct drm_sched_job *sched_job)
|
||
|
+{
|
||
|
+
|
||
|
+ struct v3d_queue_pid_stats *pid_stats = NULL;
|
||
|
+ struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
|
||
|
+ struct v3d_queue_pid_stats *cur;
|
||
|
+ struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
|
||
|
+ int ret = 0;
|
||
|
+
|
||
|
+ mutex_lock(&queue_stats->lock);
|
||
|
+
|
||
|
+ /* Completion of previous job requires an update of its runtime stats */
|
||
|
+ v3d_sched_stats_update(queue_stats);
|
||
|
+
|
||
|
+ queue_stats->last_exec_start = local_clock();
|
||
|
+ queue_stats->last_exec_end = 0;
|
||
|
+ queue_stats->jobs_sent++;
|
||
|
+ queue_stats->last_pid = job->client_pid;
|
||
|
+
|
||
|
+ /* gpu usage stats by process are being collected */
|
||
|
+ if (time_is_after_jiffies(queue_stats->gpu_pid_stats_timeout)) {
|
||
|
+ list_for_each_entry(cur, pid_stats_list, list) {
|
||
|
+ if (cur->pid == job->client_pid) {
|
||
|
+ pid_stats = cur;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ /* pid_stats of this client is moved to the head of the list. */
|
||
|
+ if (pid_stats) {
|
||
|
+ list_move(&pid_stats->list, pid_stats_list);
|
||
|
+ } else {
|
||
|
+ pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
|
||
|
+ GFP_KERNEL);
|
||
|
+ if (!pid_stats) {
|
||
|
+ ret = -ENOMEM;
|
||
|
+ goto err_mem;
|
||
|
+ }
|
||
|
+ pid_stats->pid = job->client_pid;
|
||
|
+ list_add(&pid_stats->list, pid_stats_list);
|
||
|
+ }
|
||
|
+ pid_stats->jobs_sent++;
|
||
|
+ pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
|
||
|
+ }
|
||
|
+
|
||
|
+err_mem:
|
||
|
+ mutex_unlock(&queue_stats->lock);
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
|
||
|
{
|
||
|
struct v3d_bin_job *job = to_bin_job(sched_job);
|
||
|
@@ -107,6 +216,7 @@ static struct dma_fence *v3d_bin_job_run
|
||
|
trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
|
||
|
job->start, job->end);
|
||
|
|
||
|
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_BIN], sched_job);
|
||
|
v3d_switch_perfmon(v3d, &job->base);
|
||
|
|
||
|
/* Set the current and end address of the control list.
|
||
|
@@ -158,6 +268,7 @@ static struct dma_fence *v3d_render_job_
|
||
|
trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
|
||
|
job->start, job->end);
|
||
|
|
||
|
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_RENDER], sched_job);
|
||
|
v3d_switch_perfmon(v3d, &job->base);
|
||
|
|
||
|
/* XXX: Set the QCFG */
|
||
|
@@ -190,6 +301,7 @@ v3d_tfu_job_run(struct drm_sched_job *sc
|
||
|
|
||
|
trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
|
||
|
|
||
|
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_TFU], sched_job);
|
||
|
V3D_WRITE(V3D_TFU_IIA, job->args.iia);
|
||
|
V3D_WRITE(V3D_TFU_IIS, job->args.iis);
|
||
|
V3D_WRITE(V3D_TFU_ICA, job->args.ica);
|
||
|
@@ -231,6 +343,7 @@ v3d_csd_job_run(struct drm_sched_job *sc
|
||
|
|
||
|
trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
|
||
|
|
||
|
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CSD], sched_job);
|
||
|
v3d_switch_perfmon(v3d, &job->base);
|
||
|
|
||
|
for (i = 1; i <= 6; i++)
|
||
|
@@ -247,7 +360,10 @@ v3d_cache_clean_job_run(struct drm_sched
|
||
|
struct v3d_job *job = to_v3d_job(sched_job);
|
||
|
struct v3d_dev *v3d = job->v3d;
|
||
|
|
||
|
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CACHE_CLEAN],
|
||
|
+ sched_job);
|
||
|
v3d_clean_caches(v3d);
|
||
|
+ v3d->gpu_queue_stats[V3D_CACHE_CLEAN].last_exec_end = local_clock();
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
@@ -385,8 +501,18 @@ v3d_sched_init(struct v3d_dev *v3d)
|
||
|
int hw_jobs_limit = 1;
|
||
|
int job_hang_limit = 0;
|
||
|
int hang_limit_ms = 500;
|
||
|
+ enum v3d_queue q;
|
||
|
int ret;
|
||
|
|
||
|
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
|
||
|
+ INIT_LIST_HEAD(&v3d->gpu_queue_stats[q].pid_stats_list);
|
||
|
+ /* Setting timeout before current jiffies disables collecting
|
||
|
+ * pid_stats on scheduling init.
|
||
|
+ */
|
||
|
+ v3d->gpu_queue_stats[q].gpu_pid_stats_timeout = jiffies - 1;
|
||
|
+ mutex_init(&v3d->gpu_queue_stats[q].lock);
|
||
|
+ }
|
||
|
+
|
||
|
ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
|
||
|
&v3d_bin_sched_ops,
|
||
|
hw_jobs_limit, job_hang_limit,
|
||
|
@@ -440,9 +566,20 @@ void
|
||
|
v3d_sched_fini(struct v3d_dev *v3d)
|
||
|
{
|
||
|
enum v3d_queue q;
|
||
|
+ struct v3d_queue_stats *queue_stats;
|
||
|
|
||
|
for (q = 0; q < V3D_MAX_QUEUES; q++) {
|
||
|
- if (v3d->queue[q].sched.ready)
|
||
|
+ if (v3d->queue[q].sched.ready) {
|
||
|
+ queue_stats = &v3d->gpu_queue_stats[q];
|
||
|
+ mutex_lock(&queue_stats->lock);
|
||
|
+ /* Setting gpu_pid_stats_timeout to jiffies-1 will
|
||
|
+ * make v3d_sched_stats_update to purge all
|
||
|
+ * allocated pid_stats.
|
||
|
+ */
|
||
|
+ queue_stats->gpu_pid_stats_timeout = jiffies - 1;
|
||
|
+ v3d_sched_stats_update(queue_stats);
|
||
|
+ mutex_unlock(&queue_stats->lock);
|
||
|
drm_sched_fini(&v3d->queue[q].sched);
|
||
|
+ }
|
||
|
}
|
||
|
}
|