xref: /third_party/mesa3d/src/amd/common/ac_sqtt.h (revision bf215546)
1/*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 * Copyright 2020 Valve Corporation
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#ifndef AC_SQTT_H
27#define AC_SQTT_H
28
29#include <stdint.h>
30#include <stdbool.h>
31
32#include <assert.h>
33#include "ac_rgp.h"
34
35struct radeon_cmdbuf;
36struct radeon_info;
37
38struct ac_thread_trace_data {
39   struct radeon_cmdbuf *start_cs[2];
40   struct radeon_cmdbuf *stop_cs[2];
41   /* struct radeon_winsys_bo or struct pb_buffer */
42   void *bo;
43   void *ptr;
44   uint32_t buffer_size;
45   int start_frame;
46   char *trigger_file;
47
48   struct rgp_code_object rgp_code_object;
49   struct rgp_loader_events rgp_loader_events;
50   struct rgp_pso_correlation rgp_pso_correlation;
51
52   struct rgp_queue_info rgp_queue_info;
53   struct rgp_queue_event rgp_queue_event;
54
55   struct rgp_clock_calibration rgp_clock_calibration;
56};
57
58#define SQTT_BUFFER_ALIGN_SHIFT 12
59
60struct ac_thread_trace_info {
61   uint32_t cur_offset;
62   uint32_t trace_status;
63   union {
64      uint32_t gfx9_write_counter;
65      uint32_t gfx10_dropped_cntr;
66   };
67};
68
69struct ac_thread_trace_se {
70   struct ac_thread_trace_info info;
71   void *data_ptr;
72   uint32_t shader_engine;
73   uint32_t compute_unit;
74};
75
76struct ac_thread_trace {
77   struct ac_thread_trace_data *data;
78   uint32_t num_traces;
79   struct ac_thread_trace_se traces[4];
80};
81
82uint64_t
83ac_thread_trace_get_info_offset(unsigned se);
84
85uint64_t
86ac_thread_trace_get_data_offset(const struct radeon_info *rad_info,
87                                const struct ac_thread_trace_data *data, unsigned se);
88uint64_t
89ac_thread_trace_get_info_va(uint64_t va, unsigned se);
90
91uint64_t
92ac_thread_trace_get_data_va(const struct radeon_info *rad_info,
93                            const struct ac_thread_trace_data *data, uint64_t va, unsigned se);
94
95bool
96ac_is_thread_trace_complete(struct radeon_info *rad_info,
97                            const struct ac_thread_trace_data *data,
98                            const struct ac_thread_trace_info *info);
99
100uint32_t
101ac_get_expected_buffer_size(struct radeon_info *rad_info,
102                            const struct ac_thread_trace_info *info);
103
104/**
105 * Identifiers for RGP SQ thread-tracing markers (Table 1)
106 */
107enum rgp_sqtt_marker_identifier
108{
109   RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0,
110   RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1,
111   RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2,
112   RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3,
113   RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4,
114   RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5,
115   RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6,
116   RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7,
117   RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8,
118   RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9,
119   RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA,
120   RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB,
121   RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC,
122   RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD,
123   RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE,
124   RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF
125};
126
127/**
128 * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2)
129 */
130struct rgp_sqtt_marker_cb_start {
131   union {
132      struct {
133         uint32_t identifier : 4;
134         uint32_t ext_dwords : 3;
135         uint32_t cb_id : 20;
136         uint32_t queue : 5;
137      };
138      uint32_t dword01;
139   };
140   union {
141      uint32_t device_id_low;
142      uint32_t dword02;
143   };
144   union {
145      uint32_t device_id_high;
146      uint32_t dword03;
147   };
148   union {
149      uint32_t queue_flags;
150      uint32_t dword04;
151   };
152};
153
154static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16,
155              "rgp_sqtt_marker_cb_start doesn't match RGP spec");
156
157/**
158 *
159 * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3)
160 */
161struct rgp_sqtt_marker_cb_end {
162   union {
163      struct {
164         uint32_t identifier : 4;
165         uint32_t ext_dwords : 3;
166         uint32_t cb_id : 20;
167         uint32_t reserved : 5;
168      };
169      uint32_t dword01;
170   };
171   union {
172      uint32_t device_id_low;
173      uint32_t dword02;
174   };
175   union {
176      uint32_t device_id_high;
177      uint32_t dword03;
178   };
179};
180
181static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12,
182              "rgp_sqtt_marker_cb_end doesn't match RGP spec");
183
184/**
185 * API types used in RGP SQ thread-tracing markers for the "General API"
186 * packet.
187 */
188enum rgp_sqtt_marker_general_api_type
189{
190   ApiCmdBindPipeline = 0,
191   ApiCmdBindDescriptorSets = 1,
192   ApiCmdBindIndexBuffer = 2,
193   ApiCmdBindVertexBuffers = 3,
194   ApiCmdDraw = 4,
195   ApiCmdDrawIndexed = 5,
196   ApiCmdDrawIndirect = 6,
197   ApiCmdDrawIndexedIndirect = 7,
198   ApiCmdDrawIndirectCountAMD = 8,
199   ApiCmdDrawIndexedIndirectCountAMD = 9,
200   ApiCmdDispatch = 10,
201   ApiCmdDispatchIndirect = 11,
202   ApiCmdCopyBuffer = 12,
203   ApiCmdCopyImage = 13,
204   ApiCmdBlitImage = 14,
205   ApiCmdCopyBufferToImage = 15,
206   ApiCmdCopyImageToBuffer = 16,
207   ApiCmdUpdateBuffer = 17,
208   ApiCmdFillBuffer = 18,
209   ApiCmdClearColorImage = 19,
210   ApiCmdClearDepthStencilImage = 20,
211   ApiCmdClearAttachments = 21,
212   ApiCmdResolveImage = 22,
213   ApiCmdWaitEvents = 23,
214   ApiCmdPipelineBarrier = 24,
215   ApiCmdBeginQuery = 25,
216   ApiCmdEndQuery = 26,
217   ApiCmdResetQueryPool = 27,
218   ApiCmdWriteTimestamp = 28,
219   ApiCmdCopyQueryPoolResults = 29,
220   ApiCmdPushConstants = 30,
221   ApiCmdBeginRenderPass = 31,
222   ApiCmdNextSubpass = 32,
223   ApiCmdEndRenderPass = 33,
224   ApiCmdExecuteCommands = 34,
225   ApiCmdSetViewport = 35,
226   ApiCmdSetScissor = 36,
227   ApiCmdSetLineWidth = 37,
228   ApiCmdSetDepthBias = 38,
229   ApiCmdSetBlendConstants = 39,
230   ApiCmdSetDepthBounds = 40,
231   ApiCmdSetStencilCompareMask = 41,
232   ApiCmdSetStencilWriteMask = 42,
233   ApiCmdSetStencilReference = 43,
234   ApiCmdDrawIndirectCount = 44,
235   ApiCmdDrawIndexedIndirectCount = 45,
236   ApiInvalid = 0xffffffff
237};
238
239/**
240 * RGP SQ thread-tracing marker for a "General API" instrumentation packet.
241 */
242struct rgp_sqtt_marker_general_api {
243   union {
244      struct {
245         uint32_t identifier : 4;
246         uint32_t ext_dwords : 3;
247         uint32_t api_type : 20;
248         uint32_t is_end : 1;
249         uint32_t reserved : 4;
250      };
251      uint32_t dword01;
252   };
253};
254
255static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4,
256              "rgp_sqtt_marker_general_api doesn't match RGP spec");
257
258/**
259 * API types used in RGP SQ thread-tracing markers (Table 16).
260 */
261enum rgp_sqtt_marker_event_type
262{
263   EventCmdDraw = 0,
264   EventCmdDrawIndexed = 1,
265   EventCmdDrawIndirect = 2,
266   EventCmdDrawIndexedIndirect = 3,
267   EventCmdDrawIndirectCountAMD = 4,
268   EventCmdDrawIndexedIndirectCountAMD = 5,
269   EventCmdDispatch = 6,
270   EventCmdDispatchIndirect = 7,
271   EventCmdCopyBuffer = 8,
272   EventCmdCopyImage = 9,
273   EventCmdBlitImage = 10,
274   EventCmdCopyBufferToImage = 11,
275   EventCmdCopyImageToBuffer = 12,
276   EventCmdUpdateBuffer = 13,
277   EventCmdFillBuffer = 14,
278   EventCmdClearColorImage = 15,
279   EventCmdClearDepthStencilImage = 16,
280   EventCmdClearAttachments = 17,
281   EventCmdResolveImage = 18,
282   EventCmdWaitEvents = 19,
283   EventCmdPipelineBarrier = 20,
284   EventCmdResetQueryPool = 21,
285   EventCmdCopyQueryPoolResults = 22,
286   EventRenderPassColorClear = 23,
287   EventRenderPassDepthStencilClear = 24,
288   EventRenderPassResolve = 25,
289   EventInternalUnknown = 26,
290   EventCmdDrawIndirectCount = 27,
291   EventCmdDrawIndexedIndirectCount = 28,
292   EventInvalid = 0xffffffff
293};
294
295/**
296 * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4)
297 */
298struct rgp_sqtt_marker_event {
299   union {
300      struct {
301         uint32_t identifier : 4;
302         uint32_t ext_dwords : 3;
303         uint32_t api_type : 24;
304         uint32_t has_thread_dims : 1;
305      };
306      uint32_t dword01;
307   };
308   union {
309      struct {
310         uint32_t cb_id : 20;
311         uint32_t vertex_offset_reg_idx : 4;
312         uint32_t instance_offset_reg_idx : 4;
313         uint32_t draw_index_reg_idx : 4;
314      };
315      uint32_t dword02;
316   };
317   union {
318      uint32_t cmd_id;
319      uint32_t dword03;
320   };
321};
322
323static_assert(sizeof(struct rgp_sqtt_marker_event) == 12,
324              "rgp_sqtt_marker_event doesn't match RGP spec");
325
326/**
327 * Per-dispatch specific marker where workgroup dims are included.
328 */
329struct rgp_sqtt_marker_event_with_dims {
330   struct rgp_sqtt_marker_event event;
331   uint32_t thread_x;
332   uint32_t thread_y;
333   uint32_t thread_z;
334};
335
336static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24,
337              "rgp_sqtt_marker_event_with_dims doesn't match RGP spec");
338
339/**
340 * "Barrier Start" RGP SQTT instrumentation marker (Table 5)
341 */
342struct rgp_sqtt_marker_barrier_start {
343   union {
344      struct {
345         uint32_t identifier : 4;
346         uint32_t ext_dwords : 3;
347         uint32_t cb_id : 20;
348         uint32_t reserved : 5;
349      };
350      uint32_t dword01;
351   };
352   union {
353      struct {
354         uint32_t driver_reason : 31;
355         uint32_t internal : 1;
356      };
357      uint32_t dword02;
358   };
359};
360
361static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8,
362              "rgp_sqtt_marker_barrier_start doesn't match RGP spec");
363
364/**
365 * "Barrier End" RGP SQTT instrumentation marker (Table 6)
366 */
367struct rgp_sqtt_marker_barrier_end {
368   union {
369      struct {
370         uint32_t identifier : 4;
371         uint32_t ext_dwords : 3;
372         uint32_t cb_id : 20;
373         uint32_t wait_on_eop_ts : 1;
374         uint32_t vs_partial_flush : 1;
375         uint32_t ps_partial_flush : 1;
376         uint32_t cs_partial_flush : 1;
377         uint32_t pfp_sync_me : 1;
378      };
379      uint32_t dword01;
380   };
381   union {
382      struct {
383         uint32_t sync_cp_dma : 1;
384         uint32_t inval_tcp : 1;
385         uint32_t inval_sqI : 1;
386         uint32_t inval_sqK : 1;
387         uint32_t flush_tcc : 1;
388         uint32_t inval_tcc : 1;
389         uint32_t flush_cb : 1;
390         uint32_t inval_cb : 1;
391         uint32_t flush_db : 1;
392         uint32_t inval_db : 1;
393         uint32_t num_layout_transitions : 16;
394         uint32_t inval_gl1 : 1;
395         uint32_t reserved : 5;
396      };
397      uint32_t dword02;
398   };
399};
400
401static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8,
402              "rgp_sqtt_marker_barrier_end doesn't match RGP spec");
403
404/**
405 * "Layout Transition" RGP SQTT instrumentation marker (Table 7)
406 */
407struct rgp_sqtt_marker_layout_transition {
408   union {
409      struct {
410         uint32_t identifier : 4;
411         uint32_t ext_dwords : 3;
412         uint32_t depth_stencil_expand : 1;
413         uint32_t htile_hiz_range_expand : 1;
414         uint32_t depth_stencil_resummarize : 1;
415         uint32_t dcc_decompress : 1;
416         uint32_t fmask_decompress : 1;
417         uint32_t fast_clear_eliminate : 1;
418         uint32_t fmask_color_expand : 1;
419         uint32_t init_mask_ram : 1;
420         uint32_t reserved1 : 17;
421      };
422      uint32_t dword01;
423   };
424   union {
425      struct {
426         uint32_t reserved2 : 32;
427      };
428      uint32_t dword02;
429   };
430};
431
432static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8,
433              "rgp_sqtt_marker_layout_transition doesn't match RGP spec");
434
435
436/**
437 * "User Event" RGP SQTT instrumentation marker (Table 8)
438 */
439struct rgp_sqtt_marker_user_event {
440   union {
441      struct {
442         uint32_t identifier : 4;
443         uint32_t reserved0 : 8;
444         uint32_t data_type : 8;
445         uint32_t reserved1 : 12;
446      };
447      uint32_t dword01;
448   };
449};
450struct rgp_sqtt_marker_user_event_with_length {
451   struct rgp_sqtt_marker_user_event user_event;
452   uint32_t length;
453};
454
455static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4,
456              "rgp_sqtt_marker_user_event doesn't match RGP spec");
457
458enum rgp_sqtt_marker_user_event_type
459{
460   UserEventTrigger = 0,
461   UserEventPop,
462   UserEventPush,
463   UserEventObjectName,
464};
465
466/**
467 * "Pipeline bind" RGP SQTT instrumentation marker (Table 12)
468 */
469struct rgp_sqtt_marker_pipeline_bind {
470   union {
471      struct {
472         uint32_t identifier : 4;
473         uint32_t ext_dwords : 3;
474         uint32_t bind_point : 1;
475         uint32_t cb_id : 20;
476         uint32_t reserved : 4;
477      };
478      uint32_t dword01;
479   };
480   union {
481      uint32_t api_pso_hash[2];
482      struct {
483         uint32_t dword02;
484         uint32_t dword03;
485      };
486   };
487};
488
489static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12,
490              "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec");
491
492
493bool ac_sqtt_add_pso_correlation(struct ac_thread_trace_data *thread_trace_data,
494                                 uint64_t pipeline_hash);
495
496bool ac_sqtt_add_code_object_loader_event(struct ac_thread_trace_data *thread_trace_data,
497                                          uint64_t pipeline_hash,
498                                          uint64_t base_address);
499
500bool ac_check_profile_state(const struct radeon_info *info);
501
502#endif
503