1/*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * Expose V3D HW perf counters.
26 *
27 * We also have code to fake support for occlusion queries.
28 * Since we expose support for GL 2.0, we have to expose occlusion queries,
29 * but the spec allows you to expose 0 query counter bits, so we just return 0
30 * as the result of all our queries.
31 */
32#include "vc4_context.h"
33
34struct vc4_query
35{
36        unsigned num_queries;
37        struct vc4_hwperfmon *hwperfmon;
38};
39
40static const char *v3d_counter_names[] = {
41        "FEP-valid-primitives-no-rendered-pixels",
42        "FEP-valid-primitives-rendered-pixels",
43        "FEP-clipped-quads",
44        "FEP-valid-quads",
45        "TLB-quads-not-passing-stencil-test",
46        "TLB-quads-not-passing-z-and-stencil-test",
47        "TLB-quads-passing-z-and-stencil-test",
48        "TLB-quads-with-zero-coverage",
49        "TLB-quads-with-non-zero-coverage",
50        "TLB-quads-written-to-color-buffer",
51        "PTB-primitives-discarded-outside-viewport",
52        "PTB-primitives-need-clipping",
53        "PTB-primitives-discared-reversed",
54        "QPU-total-idle-clk-cycles",
55        "QPU-total-clk-cycles-vertex-coord-shading",
56        "QPU-total-clk-cycles-fragment-shading",
57        "QPU-total-clk-cycles-executing-valid-instr",
58        "QPU-total-clk-cycles-waiting-TMU",
59        "QPU-total-clk-cycles-waiting-scoreboard",
60        "QPU-total-clk-cycles-waiting-varyings",
61        "QPU-total-instr-cache-hit",
62        "QPU-total-instr-cache-miss",
63        "QPU-total-uniform-cache-hit",
64        "QPU-total-uniform-cache-miss",
65        "TMU-total-text-quads-processed",
66        "TMU-total-text-cache-miss",
67        "VPM-total-clk-cycles-VDW-stalled",
68        "VPM-total-clk-cycles-VCD-stalled",
69        "L2C-total-cache-hit",
70        "L2C-total-cache-miss",
71};
72
73int vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
74                                    unsigned index,
75                                    struct pipe_driver_query_group_info *info)
76{
77        struct vc4_screen *screen = vc4_screen(pscreen);
78
79        if (!screen->has_perfmon_ioctl)
80                return 0;
81
82        if (!info)
83                return 1;
84
85        if (index > 0)
86                return 0;
87
88        info->name = "V3D counters";
89        info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS;
90        info->num_queries = ARRAY_SIZE(v3d_counter_names);
91        return 1;
92}
93
94int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
95                              struct pipe_driver_query_info *info)
96{
97        struct vc4_screen *screen = vc4_screen(pscreen);
98
99        if (!screen->has_perfmon_ioctl)
100                return 0;
101
102        if (!info)
103                return ARRAY_SIZE(v3d_counter_names);
104
105        if (index >= ARRAY_SIZE(v3d_counter_names))
106                return 0;
107
108        info->group_id = 0;
109        info->name = v3d_counter_names[index];
110        info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
111        info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
112        info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
113        info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
114        return 1;
115}
116
117static struct pipe_query *
118vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
119                       unsigned *query_types)
120{
121        struct vc4_query *query = calloc(1, sizeof(*query));
122        struct vc4_hwperfmon *hwperfmon;
123        unsigned i, nhwqueries = 0;
124
125        if (!query)
126                return NULL;
127
128        for (i = 0; i < num_queries; i++) {
129                if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC)
130                        nhwqueries++;
131        }
132
133        /* We can't mix HW and non-HW queries. */
134        if (nhwqueries && nhwqueries != num_queries)
135                goto err_free_query;
136
137        if (!nhwqueries)
138                return (struct pipe_query *)query;
139
140        hwperfmon = calloc(1, sizeof(*hwperfmon));
141        if (!hwperfmon)
142                goto err_free_query;
143
144        for (i = 0; i < num_queries; i++)
145                hwperfmon->events[i] = query_types[i] -
146                                       PIPE_QUERY_DRIVER_SPECIFIC;
147
148        query->hwperfmon = hwperfmon;
149        query->num_queries = num_queries;
150
151        /* Note that struct pipe_query isn't actually defined anywhere. */
152        return (struct pipe_query *)query;
153
154err_free_query:
155        free(query);
156
157        return NULL;
158}
159
160static struct pipe_query *
161vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
162{
163        return vc4_create_batch_query(ctx, 1, &query_type);
164}
165
166static void
167vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
168{
169        struct vc4_context *ctx = vc4_context(pctx);
170        struct vc4_query *query = (struct vc4_query *)pquery;
171
172        if (query->hwperfmon && query->hwperfmon->id) {
173                if (query->hwperfmon->id) {
174                        struct drm_vc4_perfmon_destroy req = { };
175
176                        req.id = query->hwperfmon->id;
177                        vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY,
178                                  &req);
179                }
180
181                free(query->hwperfmon);
182        }
183
184        free(query);
185}
186
187static bool
188vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
189{
190        struct vc4_query *query = (struct vc4_query *)pquery;
191        struct vc4_context *ctx = vc4_context(pctx);
192        struct drm_vc4_perfmon_create req = { };
193        unsigned i;
194        int ret;
195
196        if (!query->hwperfmon)
197                return true;
198
199        /* Only one perfmon can be activated per context. */
200        if (ctx->perfmon)
201                return false;
202
203        /* Reset the counters by destroying the previously allocated perfmon */
204        if (query->hwperfmon->id) {
205                struct drm_vc4_perfmon_destroy destroyreq = { };
206
207                destroyreq.id = query->hwperfmon->id;
208                vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq);
209        }
210
211        for (i = 0; i < query->num_queries; i++)
212                req.events[i] = query->hwperfmon->events[i];
213
214        req.ncounters = query->num_queries;
215        ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req);
216        if (ret)
217                return false;
218
219        query->hwperfmon->id = req.id;
220
221        /* Make sure all pendings jobs are flushed before activating the
222         * perfmon.
223         */
224        vc4_flush(pctx);
225        ctx->perfmon = query->hwperfmon;
226        return true;
227}
228
229static bool
230vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
231{
232        struct vc4_query *query = (struct vc4_query *)pquery;
233        struct vc4_context *ctx = vc4_context(pctx);
234
235        if (!query->hwperfmon)
236                return true;
237
238        if (ctx->perfmon != query->hwperfmon)
239                return false;
240
241        /* Make sure all pendings jobs are flushed before deactivating the
242         * perfmon.
243         */
244        vc4_flush(pctx);
245        ctx->perfmon = NULL;
246        return true;
247}
248
249static bool
250vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
251                     bool wait, union pipe_query_result *vresult)
252{
253        struct vc4_context *ctx = vc4_context(pctx);
254        struct vc4_query *query = (struct vc4_query *)pquery;
255        struct drm_vc4_perfmon_get_values req;
256        unsigned i;
257        int ret;
258
259        if (!query->hwperfmon) {
260                vresult->u64 = 0;
261                return true;
262        }
263
264        if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno,
265                            wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon"))
266                return false;
267
268        req.id = query->hwperfmon->id;
269        req.values_ptr = (uintptr_t)query->hwperfmon->counters;
270        ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req);
271        if (ret)
272                return false;
273
274        for (i = 0; i < query->num_queries; i++)
275                vresult->batch[i].u64 = query->hwperfmon->counters[i];
276
277        return true;
278}
279
280static void
281vc4_set_active_query_state(struct pipe_context *pctx, bool enable)
282{
283}
284
285void
286vc4_query_init(struct pipe_context *pctx)
287{
288        pctx->create_query = vc4_create_query;
289        pctx->create_batch_query = vc4_create_batch_query;
290        pctx->destroy_query = vc4_destroy_query;
291        pctx->begin_query = vc4_begin_query;
292        pctx->end_query = vc4_end_query;
293        pctx->get_query_result = vc4_get_query_result;
294        pctx->set_active_query_state = vc4_set_active_query_state;
295}
296