1/* 2 * Copyright 2020 Advanced Micro Devices, Inc. 3 * Copyright 2020 Valve Corporation 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * on the rights to use, copy, modify, merge, publish, distribute, sub 10 * license, and/or sell copies of the Software, and to permit persons to whom 11 * the Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#include "ac_sqtt.h" 27 28#include "ac_gpu_info.h" 29#include "util/u_math.h" 30#include "util/os_time.h" 31 32uint64_t 33ac_thread_trace_get_info_offset(unsigned se) 34{ 35 return sizeof(struct ac_thread_trace_info) * se; 36} 37 38uint64_t 39ac_thread_trace_get_data_offset(const struct radeon_info *rad_info, 40 const struct ac_thread_trace_data *data, unsigned se) 41{ 42 unsigned max_se = rad_info->max_se; 43 uint64_t data_offset; 44 45 data_offset = align64(sizeof(struct ac_thread_trace_info) * max_se, 46 1 << SQTT_BUFFER_ALIGN_SHIFT); 47 data_offset += data->buffer_size * se; 48 49 return data_offset; 50} 51 52uint64_t 53ac_thread_trace_get_info_va(uint64_t va, unsigned se) 54{ 55 return va + ac_thread_trace_get_info_offset(se); 56} 57 58uint64_t 59ac_thread_trace_get_data_va(const struct radeon_info *rad_info, 60 const struct ac_thread_trace_data *data, uint64_t va, unsigned se) 61{ 62 return va + ac_thread_trace_get_data_offset(rad_info, data, se); 63} 64 65bool 66ac_is_thread_trace_complete(struct radeon_info *rad_info, 67 const struct ac_thread_trace_data *data, 68 const struct ac_thread_trace_info *info) 69{ 70 if (rad_info->gfx_level >= GFX10) { 71 /* GFX10 doesn't have THREAD_TRACE_CNTR but it reports the number of 72 * dropped bytes per SE via THREAD_TRACE_DROPPED_CNTR. Though, this 73 * doesn't seem reliable because it might still report non-zero even if 74 * the SQTT buffer isn't full. 75 * 76 * The solution here is to compare the number of bytes written by the hw 77 * (in units of 32 bytes) to the SQTT buffer size. If it's equal, that 78 * means that the buffer is full and should be resized. 79 */ 80 return !(info->cur_offset * 32 == data->buffer_size - 32); 81 } 82 83 /* Otherwise, compare the current thread trace offset with the number 84 * of written bytes. 85 */ 86 return info->cur_offset == info->gfx9_write_counter; 87} 88 89uint32_t 90ac_get_expected_buffer_size(struct radeon_info *rad_info, 91 const struct ac_thread_trace_info *info) 92{ 93 if (rad_info->gfx_level >= GFX10) { 94 uint32_t dropped_cntr_per_se = info->gfx10_dropped_cntr / rad_info->max_se; 95 return ((info->cur_offset * 32) + dropped_cntr_per_se) / 1024; 96 } 97 98 return (info->gfx9_write_counter * 32) / 1024; 99} 100 101bool 102ac_sqtt_add_pso_correlation(struct ac_thread_trace_data *thread_trace_data, 103 uint64_t pipeline_hash) 104{ 105 struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation; 106 struct rgp_pso_correlation_record *record; 107 108 record = malloc(sizeof(struct rgp_pso_correlation_record)); 109 if (!record) 110 return false; 111 112 record->api_pso_hash = pipeline_hash; 113 record->pipeline_hash[0] = pipeline_hash; 114 record->pipeline_hash[1] = pipeline_hash; 115 memset(record->api_level_obj_name, 0, sizeof(record->api_level_obj_name)); 116 117 simple_mtx_lock(&pso_correlation->lock); 118 list_addtail(&record->list, &pso_correlation->record); 119 pso_correlation->record_count++; 120 simple_mtx_unlock(&pso_correlation->lock); 121 122 return true; 123} 124 125bool 126ac_sqtt_add_code_object_loader_event(struct ac_thread_trace_data *thread_trace_data, 127 uint64_t pipeline_hash, 128 uint64_t base_address) 129{ 130 struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events; 131 struct rgp_loader_events_record *record; 132 133 record = malloc(sizeof(struct rgp_loader_events_record)); 134 if (!record) 135 return false; 136 137 record->loader_event_type = RGP_LOAD_TO_GPU_MEMORY; 138 record->reserved = 0; 139 record->base_address = base_address & 0xffffffffffff; 140 record->code_object_hash[0] = pipeline_hash; 141 record->code_object_hash[1] = pipeline_hash; 142 record->time_stamp = os_time_get_nano(); 143 144 simple_mtx_lock(&loader_events->lock); 145 list_addtail(&record->list, &loader_events->record); 146 loader_events->record_count++; 147 simple_mtx_unlock(&loader_events->lock); 148 149 return true; 150} 151 152/* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/5260 153 * On some HW SQTT can hang if we're not in one of the profiling pstates. */ 154bool 155ac_check_profile_state(const struct radeon_info *info) 156{ 157 char path[128]; 158 char data[128]; 159 int n; 160 161 snprintf(path, sizeof(path), 162 "/sys/bus/pci/devices/%04x:%02x:%02x.%x/power_dpm_force_performance_level", 163 info->pci_domain, info->pci_bus, info->pci_dev, info->pci_func); 164 165 FILE *f = fopen(path, "r"); 166 if (!f) 167 return false; /* Unknown but optimistic. */ 168 n = fread(data, 1, sizeof(data) - 1, f); 169 fclose(f); 170 data[n] = 0; 171 return strstr(data, "profile") == NULL; 172} 173