1/*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "anv_private.h"
25
26#include "perf/intel_perf.h"
27
28static uint32_t
29command_buffers_count_utraces(struct anv_device *device,
30                              uint32_t cmd_buffer_count,
31                              struct anv_cmd_buffer **cmd_buffers,
32                              uint32_t *utrace_copies)
33{
34   if (!u_trace_context_actively_tracing(&device->ds.trace_context))
35      return 0;
36
37   uint32_t utraces = 0;
38   for (uint32_t i = 0; i < cmd_buffer_count; i++) {
39      if (u_trace_has_points(&cmd_buffers[i]->trace)) {
40         utraces++;
41         if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
42            *utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks);
43      }
44   }
45
46   return utraces;
47}
48
49static void
50anv_utrace_delete_flush_data(struct u_trace_context *utctx,
51                             void *flush_data)
52{
53   struct anv_device *device =
54      container_of(utctx, struct anv_device, ds.trace_context);
55   struct anv_utrace_flush_copy *flush = flush_data;
56
57   intel_ds_flush_data_fini(&flush->ds);
58
59   if (flush->trace_bo) {
60      assert(flush->batch_bo);
61      anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
62      anv_device_release_bo(device, flush->batch_bo);
63      anv_device_release_bo(device, flush->trace_bo);
64   }
65
66   vk_sync_destroy(&device->vk, flush->sync);
67
68   vk_free(&device->vk.alloc, flush);
69}
70
71static void
72anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
73                                      void *cmdstream,
74                                      void *ts_from, uint32_t from_offset,
75                                      void *ts_to, uint32_t to_offset,
76                                      uint32_t count)
77{
78   struct anv_device *device =
79      container_of(utctx, struct anv_device, ds.trace_context);
80   struct anv_utrace_flush_copy *flush = cmdstream;
81   struct anv_address from_addr = (struct anv_address) {
82      .bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
83   struct anv_address to_addr = (struct anv_address) {
84      .bo = ts_to, .offset = to_offset * sizeof(uint64_t) };
85
86   anv_genX(&device->info, emit_so_memcpy)(&flush->memcpy_state,
87                                           to_addr, from_addr, count * sizeof(uint64_t));
88}
89
90VkResult
91anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
92                                    uint32_t cmd_buffer_count,
93                                    struct anv_cmd_buffer **cmd_buffers,
94                                    struct anv_utrace_flush_copy **out_flush_data)
95{
96   struct anv_device *device = queue->device;
97   uint32_t utrace_copies = 0;
98   uint32_t utraces = command_buffers_count_utraces(device,
99                                                    cmd_buffer_count,
100                                                    cmd_buffers,
101                                                    &utrace_copies);
102   if (!utraces) {
103      *out_flush_data = NULL;
104      return VK_SUCCESS;
105   }
106
107   VkResult result;
108   struct anv_utrace_flush_copy *flush =
109      vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy),
110                8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
111   if (!flush)
112      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
113
114   intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
115
116   result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
117                           0, 0, &flush->sync);
118   if (result != VK_SUCCESS)
119      goto error_sync;
120
121   if (utrace_copies > 0) {
122      result = anv_bo_pool_alloc(&device->utrace_bo_pool,
123                                 utrace_copies * 4096,
124                                 &flush->trace_bo);
125      if (result != VK_SUCCESS)
126         goto error_trace_buf;
127
128      result = anv_bo_pool_alloc(&device->utrace_bo_pool,
129                                 /* 128 dwords of setup + 64 dwords per copy */
130                                 align_u32(512 + 64 * utrace_copies, 4096),
131                                 &flush->batch_bo);
132      if (result != VK_SUCCESS)
133         goto error_batch_buf;
134
135      result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc);
136      if (result != VK_SUCCESS)
137         goto error_reloc_list;
138
139      flush->batch.alloc = &device->vk.alloc;
140      flush->batch.relocs = &flush->relocs;
141      anv_batch_set_storage(&flush->batch,
142                            (struct anv_address) { .bo = flush->batch_bo, },
143                            flush->batch_bo->map, flush->batch_bo->size);
144
145      /* Emit the copies */
146      anv_genX(&device->info, emit_so_memcpy_init)(&flush->memcpy_state,
147                                                   device,
148                                                   &flush->batch);
149      for (uint32_t i = 0; i < cmd_buffer_count; i++) {
150         if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
151            u_trace_flush(&cmd_buffers[i]->trace, flush, false);
152         } else {
153            u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
154                                 u_trace_end_iterator(&cmd_buffers[i]->trace),
155                                 &flush->ds.trace,
156                                 flush,
157                                 anv_device_utrace_emit_copy_ts_buffer);
158         }
159      }
160      anv_genX(&device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
161
162      u_trace_flush(&flush->ds.trace, flush, true);
163
164      if (flush->batch.status != VK_SUCCESS) {
165         result = flush->batch.status;
166         goto error_batch;
167      }
168   } else {
169      for (uint32_t i = 0; i < cmd_buffer_count; i++) {
170         assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
171         u_trace_flush(&cmd_buffers[i]->trace, flush, i == (cmd_buffer_count - 1));
172      }
173   }
174
175   flush->queue = queue;
176
177   *out_flush_data = flush;
178
179   return VK_SUCCESS;
180
181 error_batch:
182   anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
183 error_reloc_list:
184   anv_bo_pool_free(&device->utrace_bo_pool, flush->batch_bo);
185 error_batch_buf:
186   anv_bo_pool_free(&device->utrace_bo_pool, flush->trace_bo);
187 error_trace_buf:
188   vk_sync_destroy(&device->vk, flush->sync);
189 error_sync:
190   vk_free(&device->vk.alloc, flush);
191   return result;
192}
193
194static void *
195anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
196{
197   struct anv_device *device =
198      container_of(utctx, struct anv_device, ds.trace_context);
199
200   struct anv_bo *bo = NULL;
201   UNUSED VkResult result =
202      anv_bo_pool_alloc(&device->utrace_bo_pool,
203                        align_u32(size_b, 4096),
204                        &bo);
205   assert(result == VK_SUCCESS);
206
207   return bo;
208}
209
210static void
211anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
212{
213   struct anv_device *device =
214      container_of(utctx, struct anv_device, ds.trace_context);
215   struct anv_bo *bo = timestamps;
216
217   anv_bo_pool_free(&device->utrace_bo_pool, bo);
218}
219
220static void
221anv_utrace_record_ts(struct u_trace *ut, void *cs,
222                     void *timestamps, unsigned idx,
223                     bool end_of_pipe)
224{
225   struct anv_cmd_buffer *cmd_buffer =
226      container_of(ut, struct anv_cmd_buffer, trace);
227   struct anv_device *device = cmd_buffer->device;
228   struct anv_bo *bo = timestamps;
229
230   device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
231                                        (struct anv_address) {
232                                           .bo = bo,
233                                           .offset = idx * sizeof(uint64_t) },
234                                        end_of_pipe);
235}
236
237static uint64_t
238anv_utrace_read_ts(struct u_trace_context *utctx,
239                   void *timestamps, unsigned idx, void *flush_data)
240{
241   struct anv_device *device =
242      container_of(utctx, struct anv_device, ds.trace_context);
243   struct anv_bo *bo = timestamps;
244   struct anv_utrace_flush_copy *flush = flush_data;
245
246   /* Only need to stall on results for the first entry: */
247   if (idx == 0) {
248      UNUSED VkResult result =
249         vk_sync_wait(&device->vk,
250                      flush->sync,
251                      0,
252                      VK_SYNC_WAIT_COMPLETE,
253                      os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
254      assert(result == VK_SUCCESS);
255   }
256
257   uint64_t *ts = bo->map;
258
259   /* Don't translate the no-timestamp marker: */
260   if (ts[idx] == U_TRACE_NO_TIMESTAMP)
261      return U_TRACE_NO_TIMESTAMP;
262
263   return intel_device_info_timebase_scale(&device->info, ts[idx]);
264}
265
266static const char *
267queue_family_to_name(const struct anv_queue_family *family)
268{
269   switch (family->engine_class) {
270   case I915_ENGINE_CLASS_RENDER:
271      return "render";
272   case I915_ENGINE_CLASS_COPY:
273      return "copy";
274   case I915_ENGINE_CLASS_VIDEO:
275      return "video";
276   case I915_ENGINE_CLASS_VIDEO_ENHANCE:
277      return "video-enh";
278   default:
279      return "unknown";
280   }
281}
282
283void
284anv_device_utrace_init(struct anv_device *device)
285{
286   anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace");
287   intel_ds_device_init(&device->ds, &device->info, device->fd,
288                        device->physical->local_minor - 128,
289                        INTEL_DS_API_VULKAN);
290   u_trace_context_init(&device->ds.trace_context,
291                        &device->ds,
292                        anv_utrace_create_ts_buffer,
293                        anv_utrace_destroy_ts_buffer,
294                        anv_utrace_record_ts,
295                        anv_utrace_read_ts,
296                        anv_utrace_delete_flush_data);
297
298   for (uint32_t q = 0; q < device->queue_count; q++) {
299      struct anv_queue *queue = &device->queues[q];
300
301      queue->ds =
302         intel_ds_device_add_queue(&device->ds, "%s%u",
303                                   queue_family_to_name(queue->family),
304                                   queue->index_in_family);
305   }
306}
307
308void
309anv_device_utrace_finish(struct anv_device *device)
310{
311   u_trace_context_process(&device->ds.trace_context, true);
312   intel_ds_device_fini(&device->ds);
313   anv_bo_pool_finish(&device->utrace_bo_pool);
314}
315
316enum intel_ds_stall_flag
317anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
318{
319   static const struct {
320      enum anv_pipe_bits anv;
321      enum intel_ds_stall_flag ds;
322   } anv_to_ds_flags[] = {
323      { .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT,            .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
324      { .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT,             .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
325      { .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT,             .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
326      { .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,    .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
327      { .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,       .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
328      { .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,    .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
329      { .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT,          .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
330      { .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,     .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
331      { .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
332      { .anv = ANV_PIPE_DEPTH_STALL_BIT,                  .ds = INTEL_DS_DEPTH_STALL_BIT, },
333      { .anv = ANV_PIPE_CS_STALL_BIT,                     .ds = INTEL_DS_CS_STALL_BIT, },
334      { .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT,           .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
335      { .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT,          .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
336   };
337
338   enum intel_ds_stall_flag ret = 0;
339   for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) {
340      if (anv_to_ds_flags[i].anv & bits)
341         ret |= anv_to_ds_flags[i].ds;
342   }
343
344   return ret;
345}
346