1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** 25 * Expose V3D HW perf counters. 26 * 27 * We also have code to fake support for occlusion queries. 28 * Since we expose support for GL 2.0, we have to expose occlusion queries, 29 * but the spec allows you to expose 0 query counter bits, so we just return 0 30 * as the result of all our queries. 31 */ 32#include "vc4_context.h" 33 34struct vc4_query 35{ 36 unsigned num_queries; 37 struct vc4_hwperfmon *hwperfmon; 38}; 39 40static const char *v3d_counter_names[] = { 41 "FEP-valid-primitives-no-rendered-pixels", 42 "FEP-valid-primitives-rendered-pixels", 43 "FEP-clipped-quads", 44 "FEP-valid-quads", 45 "TLB-quads-not-passing-stencil-test", 46 "TLB-quads-not-passing-z-and-stencil-test", 47 "TLB-quads-passing-z-and-stencil-test", 48 "TLB-quads-with-zero-coverage", 49 "TLB-quads-with-non-zero-coverage", 50 "TLB-quads-written-to-color-buffer", 51 "PTB-primitives-discarded-outside-viewport", 52 "PTB-primitives-need-clipping", 53 "PTB-primitives-discared-reversed", 54 "QPU-total-idle-clk-cycles", 55 "QPU-total-clk-cycles-vertex-coord-shading", 56 "QPU-total-clk-cycles-fragment-shading", 57 "QPU-total-clk-cycles-executing-valid-instr", 58 "QPU-total-clk-cycles-waiting-TMU", 59 "QPU-total-clk-cycles-waiting-scoreboard", 60 "QPU-total-clk-cycles-waiting-varyings", 61 "QPU-total-instr-cache-hit", 62 "QPU-total-instr-cache-miss", 63 "QPU-total-uniform-cache-hit", 64 "QPU-total-uniform-cache-miss", 65 "TMU-total-text-quads-processed", 66 "TMU-total-text-cache-miss", 67 "VPM-total-clk-cycles-VDW-stalled", 68 "VPM-total-clk-cycles-VCD-stalled", 69 "L2C-total-cache-hit", 70 "L2C-total-cache-miss", 71}; 72 73int vc4_get_driver_query_group_info(struct pipe_screen *pscreen, 74 unsigned index, 75 struct pipe_driver_query_group_info *info) 76{ 77 struct vc4_screen *screen = vc4_screen(pscreen); 78 79 if (!screen->has_perfmon_ioctl) 80 return 0; 81 82 if (!info) 83 return 1; 84 85 if (index > 0) 86 return 0; 87 88 info->name = "V3D counters"; 89 info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS; 90 info->num_queries = ARRAY_SIZE(v3d_counter_names); 91 return 1; 92} 93 94int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, 95 struct pipe_driver_query_info *info) 96{ 97 struct vc4_screen *screen = vc4_screen(pscreen); 98 99 if (!screen->has_perfmon_ioctl) 100 return 0; 101 102 if (!info) 103 return ARRAY_SIZE(v3d_counter_names); 104 105 if (index >= ARRAY_SIZE(v3d_counter_names)) 106 return 0; 107 108 info->group_id = 0; 109 info->name = v3d_counter_names[index]; 110 info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; 111 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; 112 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; 113 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; 114 return 1; 115} 116 117static struct pipe_query * 118vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries, 119 unsigned *query_types) 120{ 121 struct vc4_query *query = calloc(1, sizeof(*query)); 122 struct vc4_hwperfmon *hwperfmon; 123 unsigned i, nhwqueries = 0; 124 125 if (!query) 126 return NULL; 127 128 for (i = 0; i < num_queries; i++) { 129 if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC) 130 nhwqueries++; 131 } 132 133 /* We can't mix HW and non-HW queries. */ 134 if (nhwqueries && nhwqueries != num_queries) 135 goto err_free_query; 136 137 if (!nhwqueries) 138 return (struct pipe_query *)query; 139 140 hwperfmon = calloc(1, sizeof(*hwperfmon)); 141 if (!hwperfmon) 142 goto err_free_query; 143 144 for (i = 0; i < num_queries; i++) 145 hwperfmon->events[i] = query_types[i] - 146 PIPE_QUERY_DRIVER_SPECIFIC; 147 148 query->hwperfmon = hwperfmon; 149 query->num_queries = num_queries; 150 151 /* Note that struct pipe_query isn't actually defined anywhere. */ 152 return (struct pipe_query *)query; 153 154err_free_query: 155 free(query); 156 157 return NULL; 158} 159 160static struct pipe_query * 161vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) 162{ 163 return vc4_create_batch_query(ctx, 1, &query_type); 164} 165 166static void 167vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery) 168{ 169 struct vc4_context *ctx = vc4_context(pctx); 170 struct vc4_query *query = (struct vc4_query *)pquery; 171 172 if (query->hwperfmon && query->hwperfmon->id) { 173 if (query->hwperfmon->id) { 174 struct drm_vc4_perfmon_destroy req = { }; 175 176 req.id = query->hwperfmon->id; 177 vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, 178 &req); 179 } 180 181 free(query->hwperfmon); 182 } 183 184 free(query); 185} 186 187static bool 188vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery) 189{ 190 struct vc4_query *query = (struct vc4_query *)pquery; 191 struct vc4_context *ctx = vc4_context(pctx); 192 struct drm_vc4_perfmon_create req = { }; 193 unsigned i; 194 int ret; 195 196 if (!query->hwperfmon) 197 return true; 198 199 /* Only one perfmon can be activated per context. */ 200 if (ctx->perfmon) 201 return false; 202 203 /* Reset the counters by destroying the previously allocated perfmon */ 204 if (query->hwperfmon->id) { 205 struct drm_vc4_perfmon_destroy destroyreq = { }; 206 207 destroyreq.id = query->hwperfmon->id; 208 vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq); 209 } 210 211 for (i = 0; i < query->num_queries; i++) 212 req.events[i] = query->hwperfmon->events[i]; 213 214 req.ncounters = query->num_queries; 215 ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req); 216 if (ret) 217 return false; 218 219 query->hwperfmon->id = req.id; 220 221 /* Make sure all pendings jobs are flushed before activating the 222 * perfmon. 223 */ 224 vc4_flush(pctx); 225 ctx->perfmon = query->hwperfmon; 226 return true; 227} 228 229static bool 230vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery) 231{ 232 struct vc4_query *query = (struct vc4_query *)pquery; 233 struct vc4_context *ctx = vc4_context(pctx); 234 235 if (!query->hwperfmon) 236 return true; 237 238 if (ctx->perfmon != query->hwperfmon) 239 return false; 240 241 /* Make sure all pendings jobs are flushed before deactivating the 242 * perfmon. 243 */ 244 vc4_flush(pctx); 245 ctx->perfmon = NULL; 246 return true; 247} 248 249static bool 250vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery, 251 bool wait, union pipe_query_result *vresult) 252{ 253 struct vc4_context *ctx = vc4_context(pctx); 254 struct vc4_query *query = (struct vc4_query *)pquery; 255 struct drm_vc4_perfmon_get_values req; 256 unsigned i; 257 int ret; 258 259 if (!query->hwperfmon) { 260 vresult->u64 = 0; 261 return true; 262 } 263 264 if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno, 265 wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon")) 266 return false; 267 268 req.id = query->hwperfmon->id; 269 req.values_ptr = (uintptr_t)query->hwperfmon->counters; 270 ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req); 271 if (ret) 272 return false; 273 274 for (i = 0; i < query->num_queries; i++) 275 vresult->batch[i].u64 = query->hwperfmon->counters[i]; 276 277 return true; 278} 279 280static void 281vc4_set_active_query_state(struct pipe_context *pctx, bool enable) 282{ 283} 284 285void 286vc4_query_init(struct pipe_context *pctx) 287{ 288 pctx->create_query = vc4_create_query; 289 pctx->create_batch_query = vc4_create_batch_query; 290 pctx->destroy_query = vc4_destroy_query; 291 pctx->begin_query = vc4_begin_query; 292 pctx->end_query = vc4_end_query; 293 pctx->get_query_result = vc4_get_query_result; 294 pctx->set_active_query_state = vc4_set_active_query_state; 295} 296