Skip to content

Commit

Permalink
drm/v3d: Expose performance counters to userspace
Browse files Browse the repository at this point in the history
The V3D engine has several hardware performance counters that can of
interest for userspace performance analysis tools.

This exposes new ioctls to create and destroy performance monitor
objects, as well as to query the counter values.

Each created performance monitor object has an ID that can be attached
to CL/CSD submissions, so the driver enables the requested counters when
the job is submitted, and updates the performance monitor values when
the job is done.

It is up to the user to ensure all the jobs have been finished before
getting the performance monitor values. It is also up to the user to
properly synchronize BCL jobs when submitting jobs with different
performance monitors attached.

Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Emma Anholt <emma@anholt.net>
To: dri-devel@lists.freedesktop.org
Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
Acked-by: Melissa Wen <mwen@igalia.com>
Signed-off-by: Melissa Wen <melissa.srw@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210608111541.461991-1-jasuarez@igalia.com
  • Loading branch information
jasuarez authored and melissawen committed Jul 20, 2021
1 parent 56f0729 commit 26a4dc2
Show file tree
Hide file tree
Showing 8 changed files with 470 additions and 0 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/v3d/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ v3d-y := \
v3d_gem.o \
v3d_irq.o \
v3d_mmu.o \
v3d_perfmon.o \
v3d_trace_points.o \
v3d_sched.o

Expand Down
8 changes: 8 additions & 0 deletions drivers/gpu/drm/v3d/v3d_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
args->value = 1;
return 0;
case DRM_V3D_PARAM_SUPPORTS_PERFMON:
args->value = (v3d->ver >= 40);
return 0;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
Expand Down Expand Up @@ -121,6 +124,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
1, NULL);
}

v3d_perfmon_open_file(v3d_priv);
file->driver_priv = v3d_priv;

return 0;
Expand All @@ -136,6 +140,7 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file)
drm_sched_entity_destroy(&v3d_priv->sched_entity[q]);
}

v3d_perfmon_close_file(v3d_priv);
kfree(v3d_priv);
}

Expand All @@ -156,6 +161,9 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CSD, v3d_submit_csd_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
DRM_IOCTL_DEF_DRV(V3D_PERFMON_CREATE, v3d_perfmon_create_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(V3D_PERFMON_DESTROY, v3d_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
};

static const struct drm_driver v3d_drm_driver = {
Expand Down
63 changes: 63 additions & 0 deletions drivers/gpu/drm/v3d/v3d_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,40 @@ struct v3d_queue_state {
u64 emit_seqno;
};

/* Performance monitor object. The perform lifetime is controlled by userspace
* using perfmon related ioctls. A perfmon can be attached to a submit_cl
* request, and when this is the case, HW perf counters will be activated just
* before the submit_cl is submitted to the GPU and disabled when the job is
* done. This way, only events related to a specific job will be counted.
*/
struct v3d_perfmon {
/* Tracks the number of users of the perfmon, when this counter reaches
* zero the perfmon is destroyed.
*/
refcount_t refcnt;

/* Protects perfmon stop, as it can be invoked from multiple places. */
struct mutex lock;

/* Number of counters activated in this perfmon instance
* (should be less than DRM_V3D_MAX_PERF_COUNTERS).
*/
u8 ncounters;

/* Events counted by the HW perf counters. */
u8 counters[DRM_V3D_MAX_PERF_COUNTERS];

/* Storage for counter values. Counters are incremented by the
* HW perf counter values every time the perfmon is attached
* to a GPU job. This way, perfmon users don't have to
* retrieve the results after each job if they want to track
* events covering several submissions. Note that counter
* values can't be reset, but you can fake a reset by
* destroying the perfmon and creating a new one.
*/
u64 values[];
};

struct v3d_dev {
struct drm_device drm;

Expand Down Expand Up @@ -89,6 +123,9 @@ struct v3d_dev {
*/
spinlock_t job_lock;

/* Used to track the active perfmon if any. */
struct v3d_perfmon *active_perfmon;

/* Protects bo_stats */
struct mutex bo_lock;

Expand Down Expand Up @@ -133,6 +170,11 @@ v3d_has_csd(struct v3d_dev *v3d)
struct v3d_file_priv {
struct v3d_dev *v3d;

struct {
struct idr idr;
struct mutex lock;
} perfmon;

struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
};

Expand Down Expand Up @@ -205,6 +247,11 @@ struct v3d_job {
*/
struct dma_fence *done_fence;

/* Pointer to a performance monitor object if the user requested it,
* NULL otherwise.
*/
struct v3d_perfmon *perfmon;

/* Callback for the freeing of the job on refcount going to 0. */
void (*free)(struct kref *ref);
};
Expand Down Expand Up @@ -353,3 +400,19 @@ void v3d_mmu_remove_ptes(struct v3d_bo *bo);
/* v3d_sched.c */
int v3d_sched_init(struct v3d_dev *v3d);
void v3d_sched_fini(struct v3d_dev *v3d);

/* v3d_perfmon.c */
void v3d_perfmon_get(struct v3d_perfmon *perfmon);
void v3d_perfmon_put(struct v3d_perfmon *perfmon);
void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon);
void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon,
bool capture);
struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id);
void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv);
void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv);
int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
31 changes: 31 additions & 0 deletions drivers/gpu/drm/v3d/v3d_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ v3d_reset(struct v3d_dev *v3d)
v3d_mmu_set_page_table(v3d);
v3d_irq_reset(v3d);

v3d_perfmon_stop(v3d, v3d->active_perfmon, false);

trace_v3d_reset_end(dev);
}

Expand Down Expand Up @@ -375,6 +377,9 @@ v3d_job_free(struct kref *ref)
pm_runtime_mark_last_busy(job->v3d->drm.dev);
pm_runtime_put_autosuspend(job->v3d->drm.dev);

if (job->perfmon)
v3d_perfmon_put(job->perfmon);

kfree(job);
}

Expand Down Expand Up @@ -539,6 +544,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,

trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);

if (args->pad != 0)
return -EINVAL;

if (args->flags != 0 &&
args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
DRM_INFO("invalid flags: %d\n", args->flags);
Expand Down Expand Up @@ -611,8 +619,20 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail;

if (args->perfmon_id) {
render->base.perfmon = v3d_perfmon_find(v3d_priv,
args->perfmon_id);

if (!render->base.perfmon) {
ret = -ENOENT;
goto fail;
}
}

mutex_lock(&v3d->sched_lock);
if (bin) {
bin->base.perfmon = render->base.perfmon;
v3d_perfmon_get(bin->base.perfmon);
ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
if (ret)
goto fail_unreserve;
Expand All @@ -633,6 +653,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
ret = drm_gem_fence_array_add(&clean_job->deps, render_fence);
if (ret)
goto fail_unreserve;
clean_job->perfmon = render->base.perfmon;
v3d_perfmon_get(clean_job->perfmon);
ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
if (ret)
goto fail_unreserve;
Expand Down Expand Up @@ -827,6 +849,15 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail;

if (args->perfmon_id) {
job->base.perfmon = v3d_perfmon_find(v3d_priv,
args->perfmon_id);
if (!job->base.perfmon) {
ret = -ENOENT;
goto fail;
}
}

mutex_lock(&v3d->sched_lock);
ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD);
if (ret)
Expand Down
Loading

0 comments on commit 26a4dc2

Please sign in to comment.