1/* 2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Jonathan Marek <jonathan@marek.ca> 25 * Rob Clark <robclark@freedesktop.org> 26 */ 27 28/* NOTE: perfcntrs are 48-bits but we only have 32-bit accumulate (?) 29 * so we work with 32-bits only. we accumulate start/stop separately, 30 * which differs from a5xx but works with only accumulate (no add/neg) 31 */ 32 33#include "freedreno_query_acc.h" 34#include "freedreno_resource.h" 35 36#include "fd2_context.h" 37#include "fd2_query.h" 38 39struct PACKED fd2_query_sample { 40 uint32_t start; 41 uint32_t stop; 42}; 43 44/* offset of a single field of an array of fd2_query_sample: */ 45#define query_sample_idx(aq, idx, field) \ 46 fd_resource((aq)->prsc)->bo, \ 47 (idx * sizeof(struct fd2_query_sample)) + \ 48 offsetof(struct fd2_query_sample, field), \ 49 0, 0 50 51/* offset of a single field of fd2_query_sample: */ 52#define query_sample(aq, field) query_sample_idx(aq, 0, field) 53 54/* 55 * Performance Counter (batch) queries: 56 * 57 * Only one of these is active at a time, per design of the gallium 58 * batch_query API design. On perfcntr query tracks N query_types, 59 * each of which has a 'fd_batch_query_entry' that maps it back to 60 * the associated group and counter. 61 */ 62 63struct fd_batch_query_entry { 64 uint8_t gid; /* group-id */ 65 uint8_t cid; /* countable-id within the group */ 66}; 67 68struct fd_batch_query_data { 69 struct fd_screen *screen; 70 unsigned num_query_entries; 71 struct fd_batch_query_entry query_entries[]; 72}; 73 74static void 75perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt 76{ 77 struct fd_batch_query_data *data = aq->query_data; 78 struct fd_screen *screen = data->screen; 79 struct fd_ringbuffer *ring = batch->draw; 80 81 unsigned counters_per_group[screen->num_perfcntr_groups]; 82 memset(counters_per_group, 0, sizeof(counters_per_group)); 83 84 fd_wfi(batch, ring); 85 86 /* configure performance counters for the requested queries: */ 87 for (unsigned i = 0; i < data->num_query_entries; i++) { 88 struct fd_batch_query_entry *entry = &data->query_entries[i]; 89 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; 90 unsigned counter_idx = counters_per_group[entry->gid]++; 91 92 assert(counter_idx < g->num_counters); 93 94 OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1); 95 OUT_RING(ring, g->countables[entry->cid].selector); 96 } 97 98 memset(counters_per_group, 0, sizeof(counters_per_group)); 99 100 /* and snapshot the start values */ 101 for (unsigned i = 0; i < data->num_query_entries; i++) { 102 struct fd_batch_query_entry *entry = &data->query_entries[i]; 103 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; 104 unsigned counter_idx = counters_per_group[entry->gid]++; 105 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; 106 107 OUT_PKT3(ring, CP_REG_TO_MEM, 2); 108 OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE); 109 OUT_RELOC(ring, query_sample_idx(aq, i, start)); 110 } 111} 112 113static void 114perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt 115{ 116 struct fd_batch_query_data *data = aq->query_data; 117 struct fd_screen *screen = data->screen; 118 struct fd_ringbuffer *ring = batch->draw; 119 120 unsigned counters_per_group[screen->num_perfcntr_groups]; 121 memset(counters_per_group, 0, sizeof(counters_per_group)); 122 123 fd_wfi(batch, ring); 124 125 /* TODO do we need to bother to turn anything off? */ 126 127 /* snapshot the end values: */ 128 for (unsigned i = 0; i < data->num_query_entries; i++) { 129 struct fd_batch_query_entry *entry = &data->query_entries[i]; 130 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; 131 unsigned counter_idx = counters_per_group[entry->gid]++; 132 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; 133 134 OUT_PKT3(ring, CP_REG_TO_MEM, 2); 135 OUT_RING(ring, counter->counter_reg_lo | CP_REG_TO_MEM_0_ACCUMULATE); 136 OUT_RELOC(ring, query_sample_idx(aq, i, stop)); 137 } 138} 139 140static void 141perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf, 142 union pipe_query_result *result) 143{ 144 struct fd_batch_query_data *data = aq->query_data; 145 struct fd2_query_sample *sp = buf; 146 147 for (unsigned i = 0; i < data->num_query_entries; i++) 148 result->batch[i].u64 = sp[i].stop - sp[i].start; 149} 150 151static const struct fd_acc_sample_provider perfcntr = { 152 .query_type = FD_QUERY_FIRST_PERFCNTR, 153 .always = true, 154 .resume = perfcntr_resume, 155 .pause = perfcntr_pause, 156 .result = perfcntr_accumulate_result, 157}; 158 159static struct pipe_query * 160fd2_create_batch_query(struct pipe_context *pctx, unsigned num_queries, 161 unsigned *query_types) 162{ 163 struct fd_context *ctx = fd_context(pctx); 164 struct fd_screen *screen = ctx->screen; 165 struct fd_query *q; 166 struct fd_acc_query *aq; 167 struct fd_batch_query_data *data; 168 169 data = CALLOC_VARIANT_LENGTH_STRUCT( 170 fd_batch_query_data, num_queries * sizeof(data->query_entries[0])); 171 172 data->screen = screen; 173 data->num_query_entries = num_queries; 174 175 /* validate the requested query_types and ensure we don't try 176 * to request more query_types of a given group than we have 177 * counters: 178 */ 179 unsigned counters_per_group[screen->num_perfcntr_groups]; 180 memset(counters_per_group, 0, sizeof(counters_per_group)); 181 182 for (unsigned i = 0; i < num_queries; i++) { 183 unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR; 184 185 /* verify valid query_type, ie. is it actually a perfcntr? */ 186 if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) || 187 (idx >= screen->num_perfcntr_queries)) { 188 mesa_loge("invalid batch query query_type: %u", query_types[i]); 189 goto error; 190 } 191 192 struct fd_batch_query_entry *entry = &data->query_entries[i]; 193 struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx]; 194 195 entry->gid = pq->group_id; 196 197 /* the perfcntr_queries[] table flattens all the countables 198 * for each group in series, ie: 199 * 200 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ... 201 * 202 * So to find the countable index just step back through the 203 * table to find the first entry with the same group-id. 204 */ 205 while (pq > screen->perfcntr_queries) { 206 pq--; 207 if (pq->group_id == entry->gid) 208 entry->cid++; 209 } 210 211 if (counters_per_group[entry->gid] >= 212 screen->perfcntr_groups[entry->gid].num_counters) { 213 mesa_loge("too many counters for group %u", entry->gid); 214 goto error; 215 } 216 217 counters_per_group[entry->gid]++; 218 } 219 220 q = fd_acc_create_query2(ctx, 0, 0, &perfcntr); 221 aq = fd_acc_query(q); 222 223 /* sample buffer size is based on # of queries: */ 224 aq->size = num_queries * sizeof(struct fd2_query_sample); 225 aq->query_data = data; 226 227 return (struct pipe_query *)q; 228 229error: 230 free(data); 231 return NULL; 232} 233 234void 235fd2_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis 236{ 237 struct fd_context *ctx = fd_context(pctx); 238 239 ctx->create_query = fd_acc_create_query; 240 ctx->query_update_batch = fd_acc_query_update_batch; 241 242 pctx->create_batch_query = fd2_create_batch_query; 243} 244