1/* 2 * Copyright © 2021 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "anv_private.h" 25 26#include "perf/intel_perf.h" 27 28static uint32_t 29command_buffers_count_utraces(struct anv_device *device, 30 uint32_t cmd_buffer_count, 31 struct anv_cmd_buffer **cmd_buffers, 32 uint32_t *utrace_copies) 33{ 34 if (!u_trace_context_actively_tracing(&device->ds.trace_context)) 35 return 0; 36 37 uint32_t utraces = 0; 38 for (uint32_t i = 0; i < cmd_buffer_count; i++) { 39 if (u_trace_has_points(&cmd_buffers[i]->trace)) { 40 utraces++; 41 if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) 42 *utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks); 43 } 44 } 45 46 return utraces; 47} 48 49static void 50anv_utrace_delete_flush_data(struct u_trace_context *utctx, 51 void *flush_data) 52{ 53 struct anv_device *device = 54 container_of(utctx, struct anv_device, ds.trace_context); 55 struct anv_utrace_flush_copy *flush = flush_data; 56 57 intel_ds_flush_data_fini(&flush->ds); 58 59 if (flush->trace_bo) { 60 assert(flush->batch_bo); 61 anv_reloc_list_finish(&flush->relocs, &device->vk.alloc); 62 anv_device_release_bo(device, flush->batch_bo); 63 anv_device_release_bo(device, flush->trace_bo); 64 } 65 66 vk_sync_destroy(&device->vk, flush->sync); 67 68 vk_free(&device->vk.alloc, flush); 69} 70 71static void 72anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx, 73 void *cmdstream, 74 void *ts_from, uint32_t from_offset, 75 void *ts_to, uint32_t to_offset, 76 uint32_t count) 77{ 78 struct anv_device *device = 79 container_of(utctx, struct anv_device, ds.trace_context); 80 struct anv_utrace_flush_copy *flush = cmdstream; 81 struct anv_address from_addr = (struct anv_address) { 82 .bo = ts_from, .offset = from_offset * sizeof(uint64_t) }; 83 struct anv_address to_addr = (struct anv_address) { 84 .bo = ts_to, .offset = to_offset * sizeof(uint64_t) }; 85 86 anv_genX(&device->info, emit_so_memcpy)(&flush->memcpy_state, 87 to_addr, from_addr, count * sizeof(uint64_t)); 88} 89 90VkResult 91anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, 92 uint32_t cmd_buffer_count, 93 struct anv_cmd_buffer **cmd_buffers, 94 struct anv_utrace_flush_copy **out_flush_data) 95{ 96 struct anv_device *device = queue->device; 97 uint32_t utrace_copies = 0; 98 uint32_t utraces = command_buffers_count_utraces(device, 99 cmd_buffer_count, 100 cmd_buffers, 101 &utrace_copies); 102 if (!utraces) { 103 *out_flush_data = NULL; 104 return VK_SUCCESS; 105 } 106 107 VkResult result; 108 struct anv_utrace_flush_copy *flush = 109 vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy), 110 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 111 if (!flush) 112 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 113 114 intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id); 115 116 result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, 117 0, 0, &flush->sync); 118 if (result != VK_SUCCESS) 119 goto error_sync; 120 121 if (utrace_copies > 0) { 122 result = anv_bo_pool_alloc(&device->utrace_bo_pool, 123 utrace_copies * 4096, 124 &flush->trace_bo); 125 if (result != VK_SUCCESS) 126 goto error_trace_buf; 127 128 result = anv_bo_pool_alloc(&device->utrace_bo_pool, 129 /* 128 dwords of setup + 64 dwords per copy */ 130 align_u32(512 + 64 * utrace_copies, 4096), 131 &flush->batch_bo); 132 if (result != VK_SUCCESS) 133 goto error_batch_buf; 134 135 result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc); 136 if (result != VK_SUCCESS) 137 goto error_reloc_list; 138 139 flush->batch.alloc = &device->vk.alloc; 140 flush->batch.relocs = &flush->relocs; 141 anv_batch_set_storage(&flush->batch, 142 (struct anv_address) { .bo = flush->batch_bo, }, 143 flush->batch_bo->map, flush->batch_bo->size); 144 145 /* Emit the copies */ 146 anv_genX(&device->info, emit_so_memcpy_init)(&flush->memcpy_state, 147 device, 148 &flush->batch); 149 for (uint32_t i = 0; i < cmd_buffer_count; i++) { 150 if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) { 151 u_trace_flush(&cmd_buffers[i]->trace, flush, false); 152 } else { 153 u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace), 154 u_trace_end_iterator(&cmd_buffers[i]->trace), 155 &flush->ds.trace, 156 flush, 157 anv_device_utrace_emit_copy_ts_buffer); 158 } 159 } 160 anv_genX(&device->info, emit_so_memcpy_fini)(&flush->memcpy_state); 161 162 u_trace_flush(&flush->ds.trace, flush, true); 163 164 if (flush->batch.status != VK_SUCCESS) { 165 result = flush->batch.status; 166 goto error_batch; 167 } 168 } else { 169 for (uint32_t i = 0; i < cmd_buffer_count; i++) { 170 assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); 171 u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1)); 172 } 173 } 174 175 flush->queue = queue; 176 177 *out_flush_data = flush; 178 179 return VK_SUCCESS; 180 181 error_batch: 182 anv_reloc_list_finish(&flush->relocs, &device->vk.alloc); 183 error_reloc_list: 184 anv_bo_pool_free(&device->utrace_bo_pool, flush->batch_bo); 185 error_batch_buf: 186 anv_bo_pool_free(&device->utrace_bo_pool, flush->trace_bo); 187 error_trace_buf: 188 vk_sync_destroy(&device->vk, flush->sync); 189 error_sync: 190 vk_free(&device->vk.alloc, flush); 191 return result; 192} 193 194static void * 195anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b) 196{ 197 struct anv_device *device = 198 container_of(utctx, struct anv_device, ds.trace_context); 199 200 struct anv_bo *bo = NULL; 201 UNUSED VkResult result = 202 anv_bo_pool_alloc(&device->utrace_bo_pool, 203 align_u32(size_b, 4096), 204 &bo); 205 assert(result == VK_SUCCESS); 206 207 return bo; 208} 209 210static void 211anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) 212{ 213 struct anv_device *device = 214 container_of(utctx, struct anv_device, ds.trace_context); 215 struct anv_bo *bo = timestamps; 216 217 anv_bo_pool_free(&device->utrace_bo_pool, bo); 218} 219 220static void 221anv_utrace_record_ts(struct u_trace *ut, void *cs, 222 void *timestamps, unsigned idx, 223 bool end_of_pipe) 224{ 225 struct anv_cmd_buffer *cmd_buffer = 226 container_of(ut, struct anv_cmd_buffer, trace); 227 struct anv_device *device = cmd_buffer->device; 228 struct anv_bo *bo = timestamps; 229 230 device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device, 231 (struct anv_address) { 232 .bo = bo, 233 .offset = idx * sizeof(uint64_t) }, 234 end_of_pipe); 235} 236 237static uint64_t 238anv_utrace_read_ts(struct u_trace_context *utctx, 239 void *timestamps, unsigned idx, void *flush_data) 240{ 241 struct anv_device *device = 242 container_of(utctx, struct anv_device, ds.trace_context); 243 struct anv_bo *bo = timestamps; 244 struct anv_utrace_flush_copy *flush = flush_data; 245 246 /* Only need to stall on results for the first entry: */ 247 if (idx == 0) { 248 UNUSED VkResult result = 249 vk_sync_wait(&device->vk, 250 flush->sync, 251 0, 252 VK_SYNC_WAIT_COMPLETE, 253 os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE)); 254 assert(result == VK_SUCCESS); 255 } 256 257 uint64_t *ts = bo->map; 258 259 /* Don't translate the no-timestamp marker: */ 260 if (ts[idx] == U_TRACE_NO_TIMESTAMP) 261 return U_TRACE_NO_TIMESTAMP; 262 263 return intel_device_info_timebase_scale(&device->info, ts[idx]); 264} 265 266static const char * 267queue_family_to_name(const struct anv_queue_family *family) 268{ 269 switch (family->engine_class) { 270 case I915_ENGINE_CLASS_RENDER: 271 return "render"; 272 case I915_ENGINE_CLASS_COPY: 273 return "copy"; 274 case I915_ENGINE_CLASS_VIDEO: 275 return "video"; 276 case I915_ENGINE_CLASS_VIDEO_ENHANCE: 277 return "video-enh"; 278 default: 279 return "unknown"; 280 } 281} 282 283void 284anv_device_utrace_init(struct anv_device *device) 285{ 286 anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace"); 287 intel_ds_device_init(&device->ds, &device->info, device->fd, 288 device->physical->local_minor - 128, 289 INTEL_DS_API_VULKAN); 290 u_trace_context_init(&device->ds.trace_context, 291 &device->ds, 292 anv_utrace_create_ts_buffer, 293 anv_utrace_destroy_ts_buffer, 294 anv_utrace_record_ts, 295 anv_utrace_read_ts, 296 anv_utrace_delete_flush_data); 297 298 for (uint32_t q = 0; q < device->queue_count; q++) { 299 struct anv_queue *queue = &device->queues[q]; 300 301 queue->ds = 302 intel_ds_device_add_queue(&device->ds, "%s%u", 303 queue_family_to_name(queue->family), 304 queue->index_in_family); 305 } 306} 307 308void 309anv_device_utrace_finish(struct anv_device *device) 310{ 311 u_trace_context_process(&device->ds.trace_context, true); 312 intel_ds_device_fini(&device->ds); 313 anv_bo_pool_finish(&device->utrace_bo_pool); 314} 315 316enum intel_ds_stall_flag 317anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits) 318{ 319 static const struct { 320 enum anv_pipe_bits anv; 321 enum intel_ds_stall_flag ds; 322 } anv_to_ds_flags[] = { 323 { .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, }, 324 { .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, }, 325 { .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, }, 326 { .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, }, 327 { .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, }, 328 { .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, }, 329 { .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, }, 330 { .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, }, 331 { .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, }, 332 { .anv = ANV_PIPE_DEPTH_STALL_BIT, .ds = INTEL_DS_DEPTH_STALL_BIT, }, 333 { .anv = ANV_PIPE_CS_STALL_BIT, .ds = INTEL_DS_CS_STALL_BIT, }, 334 { .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, }, 335 { .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, }, 336 }; 337 338 enum intel_ds_stall_flag ret = 0; 339 for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) { 340 if (anv_to_ds_flags[i].anv & bits) 341 ret |= anv_to_ds_flags[i].ds; 342 } 343 344 return ret; 345} 346