From 04b14cd6be101594b46fedf0b5a50734ef55af1e Mon Sep 17 00:00:00 2001 From: Eric Anholt <eric@anholt.net> Date: Fri, 30 Oct 2015 10:09:02 -0700 Subject: [PATCH 113/381] drm/vc4: Add an interface for capturing the GPU state after a hang. This can be parsed with vc4-gpu-tools tools for trying to figure out what was going on. Signed-off-by: Eric Anholt <eric@anholt.net> --- drivers/gpu/drm/vc4/vc4_bo.c | 4 +- drivers/gpu/drm/vc4/vc4_drv.c | 1 + drivers/gpu/drm/vc4/vc4_drv.h | 4 + drivers/gpu/drm/vc4/vc4_gem.c | 185 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/vc4_drm.h | 45 ++++++++++ 5 files changed, 237 insertions(+), 2 deletions(-) --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -415,8 +415,8 @@ int vc4_mmap(struct file *filp, struct v gem_obj = vma->vm_private_data; bo = to_vc4_bo(gem_obj); - if (bo->validated_shader) { - DRM_ERROR("mmaping of shader BOs not allowed.\n"); + if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) { + DRM_ERROR("mmaping of shader BOs for writing not allowed.\n"); return -EINVAL; } --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -81,6 +81,7 @@ static const struct drm_ioctl_desc vc4_d DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), }; static struct drm_driver vc4_drm_driver = { --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -20,6 +20,8 @@ struct vc4_dev { struct drm_fbdev_cma *fbdev; struct rpi_firmware *firmware; + struct vc4_hang_state *hang_state; + /* The kernel-space BO cache. Tracks buffers that have been * unreferenced by all other users (refcounts of 0!) but not * yet freed, so we can do cheap allocations. @@ -366,6 +368,8 @@ int vc4_create_shader_bo_ioctl(struct dr struct drm_file *file_priv); int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int vc4_mmap(struct file *filp, struct vm_area_struct *vma); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); void *vc4_prime_vmap(struct drm_gem_object *obj); --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -40,6 +40,186 @@ vc4_queue_hangcheck(struct drm_device *d round_jiffies_up(jiffies + msecs_to_jiffies(100))); } +struct vc4_hang_state { + struct drm_vc4_get_hang_state user_state; + + u32 bo_count; + struct drm_gem_object **bo; +}; + +static void +vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) +{ + unsigned int i; + + mutex_lock(&dev->struct_mutex); + for (i = 0; i < state->user_state.bo_count; i++) { + drm_gem_object_unreference(state->bo[i]); + } + mutex_unlock(&dev->struct_mutex); + + kfree(state); +} + +int +vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vc4_get_hang_state *get_state = data; + struct drm_vc4_get_hang_state_bo *bo_state; + struct vc4_hang_state *kernel_state; + struct drm_vc4_get_hang_state *state; + struct vc4_dev *vc4 = to_vc4_dev(dev); + unsigned long irqflags; + u32 i; + int ret; + + spin_lock_irqsave(&vc4->job_lock, irqflags); + kernel_state = vc4->hang_state; + if (!kernel_state) { + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + return -ENOENT; + } + state = &kernel_state->user_state; + + /* If the user's array isn't big enough, just return the + * required array size. + */ + if (get_state->bo_count < state->bo_count) { + get_state->bo_count = state->bo_count; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + return 0; + } + + vc4->hang_state = NULL; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + + /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ + state->bo = get_state->bo; + memcpy(get_state, state, sizeof(*state)); + + bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); + if (!bo_state) { + ret = -ENOMEM; + goto err_free; + } + + for (i = 0; i < state->bo_count; i++) { + struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); + u32 handle; + ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], + &handle); + + if (ret) { + state->bo_count = i - 1; + goto err; + } + bo_state[i].handle = handle; + bo_state[i].paddr = vc4_bo->base.paddr; + bo_state[i].size = vc4_bo->base.base.size; + } + + ret = copy_to_user((void __user *)(uintptr_t)get_state->bo, + bo_state, + state->bo_count * sizeof(*bo_state)); + kfree(bo_state); + + err_free: + + vc4_free_hang_state(dev, kernel_state); + +err: + return ret; +} + +static void +vc4_save_hang_state(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_vc4_get_hang_state *state; + struct vc4_hang_state *kernel_state; + struct vc4_exec_info *exec; + struct vc4_bo *bo; + unsigned long irqflags; + unsigned int i, unref_list_count; + + kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL); + if (!kernel_state) + return; + + state = &kernel_state->user_state; + + spin_lock_irqsave(&vc4->job_lock, irqflags); + exec = vc4_first_job(vc4); + if (!exec) { + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + return; + } + + unref_list_count = 0; + list_for_each_entry(bo, &exec->unref_list, unref_head) + unref_list_count++; + + state->bo_count = exec->bo_count + unref_list_count; + kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo), + GFP_ATOMIC); + if (!kernel_state->bo) { + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + return; + } + + for (i = 0; i < exec->bo_count; i++) { + drm_gem_object_reference(&exec->bo[i].bo->base); + kernel_state->bo[i] = &exec->bo[i].bo->base; + } + + list_for_each_entry(bo, &exec->unref_list, unref_head) { + drm_gem_object_reference(&bo->base.base); + kernel_state->bo[i] = &bo->base.base; + i++; + } + + state->start_bin = exec->ct0ca; + state->start_render = exec->ct1ca; + + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + + state->ct0ca = V3D_READ(V3D_CTNCA(0)); + state->ct0ea = V3D_READ(V3D_CTNEA(0)); + + state->ct1ca = V3D_READ(V3D_CTNCA(1)); + state->ct1ea = V3D_READ(V3D_CTNEA(1)); + + state->ct0cs = V3D_READ(V3D_CTNCS(0)); + state->ct1cs = V3D_READ(V3D_CTNCS(1)); + + state->ct0ra0 = V3D_READ(V3D_CT00RA0); + state->ct1ra0 = V3D_READ(V3D_CT01RA0); + + state->bpca = V3D_READ(V3D_BPCA); + state->bpcs = V3D_READ(V3D_BPCS); + state->bpoa = V3D_READ(V3D_BPOA); + state->bpos = V3D_READ(V3D_BPOS); + + state->vpmbase = V3D_READ(V3D_VPMBASE); + + state->dbge = V3D_READ(V3D_DBGE); + state->fdbgo = V3D_READ(V3D_FDBGO); + state->fdbgb = V3D_READ(V3D_FDBGB); + state->fdbgr = V3D_READ(V3D_FDBGR); + state->fdbgs = V3D_READ(V3D_FDBGS); + state->errstat = V3D_READ(V3D_ERRSTAT); + + spin_lock_irqsave(&vc4->job_lock, irqflags); + if (vc4->hang_state) { + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + vc4_free_hang_state(dev, kernel_state); + } else { + vc4->hang_state = kernel_state; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + } +} + static void vc4_reset(struct drm_device *dev) { @@ -64,6 +244,8 @@ vc4_reset_work(struct work_struct *work) struct vc4_dev *vc4 = container_of(work, struct vc4_dev, hangcheck.reset_work); + vc4_save_hang_state(vc4->dev); + vc4_reset(vc4->dev); } @@ -673,4 +855,7 @@ vc4_gem_destroy(struct drm_device *dev) } vc4_bo_cache_destroy(dev); + + if (vc4->hang_state) + vc4_free_hang_state(dev, vc4->hang_state); } --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -32,6 +32,7 @@ #define DRM_VC4_CREATE_BO 0x03 #define DRM_VC4_MMAP_BO 0x04 #define DRM_VC4_CREATE_SHADER_BO 0x05 +#define DRM_VC4_GET_HANG_STATE 0x06 #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) @@ -39,6 +40,7 @@ #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) +#define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state) struct drm_vc4_submit_rcl_surface { uint32_t hindex; /* Handle index, or ~0 if not present. */ @@ -226,4 +228,47 @@ struct drm_vc4_mmap_bo { uint64_t offset; }; +struct drm_vc4_get_hang_state_bo { + uint32_t handle; + uint32_t paddr; + uint32_t size; + uint32_t pad; +}; + +/** + * struct drm_vc4_hang_state - ioctl argument for collecting state + * from a GPU hang for analysis. +*/ +struct drm_vc4_get_hang_state { + /** Pointer to array of struct drm_vc4_get_hang_state_bo. */ + uint64_t bo; + /** + * On input, the size of the bo array. Output is the number + * of bos to be returned. + */ + uint32_t bo_count; + + uint32_t start_bin, start_render; + + uint32_t ct0ca, ct0ea; + uint32_t ct1ca, ct1ea; + uint32_t ct0cs, ct1cs; + uint32_t ct0ra0, ct1ra0; + + uint32_t bpca, bpcs; + uint32_t bpoa, bpos; + + uint32_t vpmbase; + + uint32_t dbge; + uint32_t fdbgo; + uint32_t fdbgb; + uint32_t fdbgr; + uint32_t fdbgs; + uint32_t errstat; + + /* Pad that we may save more registers into in the future. */ + uint32_t pad[16]; +}; + #endif /* _UAPI_VC4_DRM_H_ */