18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2018 Broadcom
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci/**
78c2ecf20Sopenharmony_ci * DOC: VC4 V3D performance monitor module
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * The V3D block provides 16 hardware counters which can count various events.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include "vc4_drv.h"
138c2ecf20Sopenharmony_ci#include "vc4_regs.h"
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#define VC4_PERFMONID_MIN	1
168c2ecf20Sopenharmony_ci#define VC4_PERFMONID_MAX	U32_MAX
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_civoid vc4_perfmon_get(struct vc4_perfmon *perfmon)
198c2ecf20Sopenharmony_ci{
208c2ecf20Sopenharmony_ci	if (perfmon)
218c2ecf20Sopenharmony_ci		refcount_inc(&perfmon->refcnt);
228c2ecf20Sopenharmony_ci}
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_civoid vc4_perfmon_put(struct vc4_perfmon *perfmon)
258c2ecf20Sopenharmony_ci{
268c2ecf20Sopenharmony_ci	if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
278c2ecf20Sopenharmony_ci		kfree(perfmon);
288c2ecf20Sopenharmony_ci}
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_civoid vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon)
318c2ecf20Sopenharmony_ci{
328c2ecf20Sopenharmony_ci	unsigned int i;
338c2ecf20Sopenharmony_ci	u32 mask;
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon))
368c2ecf20Sopenharmony_ci		return;
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci	for (i = 0; i < perfmon->ncounters; i++)
398c2ecf20Sopenharmony_ci		V3D_WRITE(V3D_PCTRS(i), perfmon->events[i]);
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	mask = GENMASK(perfmon->ncounters - 1, 0);
428c2ecf20Sopenharmony_ci	V3D_WRITE(V3D_PCTRC, mask);
438c2ecf20Sopenharmony_ci	V3D_WRITE(V3D_PCTRE, V3D_PCTRE_EN | mask);
448c2ecf20Sopenharmony_ci	vc4->active_perfmon = perfmon;
458c2ecf20Sopenharmony_ci}
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_civoid vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
488c2ecf20Sopenharmony_ci		      bool capture)
498c2ecf20Sopenharmony_ci{
508c2ecf20Sopenharmony_ci	unsigned int i;
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!vc4->active_perfmon ||
538c2ecf20Sopenharmony_ci			 perfmon != vc4->active_perfmon))
548c2ecf20Sopenharmony_ci		return;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	if (capture) {
578c2ecf20Sopenharmony_ci		for (i = 0; i < perfmon->ncounters; i++)
588c2ecf20Sopenharmony_ci			perfmon->counters[i] += V3D_READ(V3D_PCTR(i));
598c2ecf20Sopenharmony_ci	}
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	V3D_WRITE(V3D_PCTRE, 0);
628c2ecf20Sopenharmony_ci	vc4->active_perfmon = NULL;
638c2ecf20Sopenharmony_ci}
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_cistruct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id)
668c2ecf20Sopenharmony_ci{
678c2ecf20Sopenharmony_ci	struct vc4_perfmon *perfmon;
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	mutex_lock(&vc4file->perfmon.lock);
708c2ecf20Sopenharmony_ci	perfmon = idr_find(&vc4file->perfmon.idr, id);
718c2ecf20Sopenharmony_ci	vc4_perfmon_get(perfmon);
728c2ecf20Sopenharmony_ci	mutex_unlock(&vc4file->perfmon.lock);
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	return perfmon;
758c2ecf20Sopenharmony_ci}
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_civoid vc4_perfmon_open_file(struct vc4_file *vc4file)
788c2ecf20Sopenharmony_ci{
798c2ecf20Sopenharmony_ci	mutex_init(&vc4file->perfmon.lock);
808c2ecf20Sopenharmony_ci	idr_init(&vc4file->perfmon.idr);
818c2ecf20Sopenharmony_ci}
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_cistatic int vc4_perfmon_idr_del(int id, void *elem, void *data)
848c2ecf20Sopenharmony_ci{
858c2ecf20Sopenharmony_ci	struct vc4_perfmon *perfmon = elem;
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	vc4_perfmon_put(perfmon);
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	return 0;
908c2ecf20Sopenharmony_ci}
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_civoid vc4_perfmon_close_file(struct vc4_file *vc4file)
938c2ecf20Sopenharmony_ci{
948c2ecf20Sopenharmony_ci	mutex_lock(&vc4file->perfmon.lock);
958c2ecf20Sopenharmony_ci	idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
968c2ecf20Sopenharmony_ci	idr_destroy(&vc4file->perfmon.idr);
978c2ecf20Sopenharmony_ci	mutex_unlock(&vc4file->perfmon.lock);
988c2ecf20Sopenharmony_ci}
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ciint vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
1018c2ecf20Sopenharmony_ci			     struct drm_file *file_priv)
1028c2ecf20Sopenharmony_ci{
1038c2ecf20Sopenharmony_ci	struct vc4_dev *vc4 = to_vc4_dev(dev);
1048c2ecf20Sopenharmony_ci	struct vc4_file *vc4file = file_priv->driver_priv;
1058c2ecf20Sopenharmony_ci	struct drm_vc4_perfmon_create *req = data;
1068c2ecf20Sopenharmony_ci	struct vc4_perfmon *perfmon;
1078c2ecf20Sopenharmony_ci	unsigned int i;
1088c2ecf20Sopenharmony_ci	int ret;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	if (!vc4->v3d) {
1118c2ecf20Sopenharmony_ci		DRM_DEBUG("Creating perfmon no VC4 V3D probed\n");
1128c2ecf20Sopenharmony_ci		return -ENODEV;
1138c2ecf20Sopenharmony_ci	}
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	/* Number of monitored counters cannot exceed HW limits. */
1168c2ecf20Sopenharmony_ci	if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS ||
1178c2ecf20Sopenharmony_ci	    !req->ncounters)
1188c2ecf20Sopenharmony_ci		return -EINVAL;
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	/* Make sure all events are valid. */
1218c2ecf20Sopenharmony_ci	for (i = 0; i < req->ncounters; i++) {
1228c2ecf20Sopenharmony_ci		if (req->events[i] >= VC4_PERFCNT_NUM_EVENTS)
1238c2ecf20Sopenharmony_ci			return -EINVAL;
1248c2ecf20Sopenharmony_ci	}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	perfmon = kzalloc(struct_size(perfmon, counters, req->ncounters),
1278c2ecf20Sopenharmony_ci			  GFP_KERNEL);
1288c2ecf20Sopenharmony_ci	if (!perfmon)
1298c2ecf20Sopenharmony_ci		return -ENOMEM;
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci	for (i = 0; i < req->ncounters; i++)
1328c2ecf20Sopenharmony_ci		perfmon->events[i] = req->events[i];
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci	perfmon->ncounters = req->ncounters;
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	refcount_set(&perfmon->refcnt, 1);
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	mutex_lock(&vc4file->perfmon.lock);
1398c2ecf20Sopenharmony_ci	ret = idr_alloc(&vc4file->perfmon.idr, perfmon, VC4_PERFMONID_MIN,
1408c2ecf20Sopenharmony_ci			VC4_PERFMONID_MAX, GFP_KERNEL);
1418c2ecf20Sopenharmony_ci	mutex_unlock(&vc4file->perfmon.lock);
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	if (ret < 0) {
1448c2ecf20Sopenharmony_ci		kfree(perfmon);
1458c2ecf20Sopenharmony_ci		return ret;
1468c2ecf20Sopenharmony_ci	}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci	req->id = ret;
1498c2ecf20Sopenharmony_ci	return 0;
1508c2ecf20Sopenharmony_ci}
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ciint vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
1538c2ecf20Sopenharmony_ci			      struct drm_file *file_priv)
1548c2ecf20Sopenharmony_ci{
1558c2ecf20Sopenharmony_ci	struct vc4_dev *vc4 = to_vc4_dev(dev);
1568c2ecf20Sopenharmony_ci	struct vc4_file *vc4file = file_priv->driver_priv;
1578c2ecf20Sopenharmony_ci	struct drm_vc4_perfmon_destroy *req = data;
1588c2ecf20Sopenharmony_ci	struct vc4_perfmon *perfmon;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	if (!vc4->v3d) {
1618c2ecf20Sopenharmony_ci		DRM_DEBUG("Destroying perfmon no VC4 V3D probed\n");
1628c2ecf20Sopenharmony_ci		return -ENODEV;
1638c2ecf20Sopenharmony_ci	}
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	mutex_lock(&vc4file->perfmon.lock);
1668c2ecf20Sopenharmony_ci	perfmon = idr_remove(&vc4file->perfmon.idr, req->id);
1678c2ecf20Sopenharmony_ci	mutex_unlock(&vc4file->perfmon.lock);
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci	if (!perfmon)
1708c2ecf20Sopenharmony_ci		return -EINVAL;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	vc4_perfmon_put(perfmon);
1738c2ecf20Sopenharmony_ci	return 0;
1748c2ecf20Sopenharmony_ci}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ciint vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
1778c2ecf20Sopenharmony_ci				 struct drm_file *file_priv)
1788c2ecf20Sopenharmony_ci{
1798c2ecf20Sopenharmony_ci	struct vc4_dev *vc4 = to_vc4_dev(dev);
1808c2ecf20Sopenharmony_ci	struct vc4_file *vc4file = file_priv->driver_priv;
1818c2ecf20Sopenharmony_ci	struct drm_vc4_perfmon_get_values *req = data;
1828c2ecf20Sopenharmony_ci	struct vc4_perfmon *perfmon;
1838c2ecf20Sopenharmony_ci	int ret;
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	if (!vc4->v3d) {
1868c2ecf20Sopenharmony_ci		DRM_DEBUG("Getting perfmon no VC4 V3D probed\n");
1878c2ecf20Sopenharmony_ci		return -ENODEV;
1888c2ecf20Sopenharmony_ci	}
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	mutex_lock(&vc4file->perfmon.lock);
1918c2ecf20Sopenharmony_ci	perfmon = idr_find(&vc4file->perfmon.idr, req->id);
1928c2ecf20Sopenharmony_ci	vc4_perfmon_get(perfmon);
1938c2ecf20Sopenharmony_ci	mutex_unlock(&vc4file->perfmon.lock);
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	if (!perfmon)
1968c2ecf20Sopenharmony_ci		return -EINVAL;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->counters,
1998c2ecf20Sopenharmony_ci			 perfmon->ncounters * sizeof(u64)))
2008c2ecf20Sopenharmony_ci		ret = -EFAULT;
2018c2ecf20Sopenharmony_ci	else
2028c2ecf20Sopenharmony_ci		ret = 0;
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	vc4_perfmon_put(perfmon);
2058c2ecf20Sopenharmony_ci	return ret;
2068c2ecf20Sopenharmony_ci}
207