1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2020 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * Copyright 2020 Valve Corporation 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 9bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 10bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 11bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 15bf215546Sopenharmony_ci * Software. 16bf215546Sopenharmony_ci * 17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 24bf215546Sopenharmony_ci */ 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "ac_sqtt.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "ac_gpu_info.h" 29bf215546Sopenharmony_ci#include "util/u_math.h" 30bf215546Sopenharmony_ci#include "util/os_time.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ciuint64_t 33bf215546Sopenharmony_ciac_thread_trace_get_info_offset(unsigned se) 34bf215546Sopenharmony_ci{ 35bf215546Sopenharmony_ci return sizeof(struct ac_thread_trace_info) * se; 36bf215546Sopenharmony_ci} 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ciuint64_t 39bf215546Sopenharmony_ciac_thread_trace_get_data_offset(const struct radeon_info *rad_info, 40bf215546Sopenharmony_ci const struct ac_thread_trace_data *data, unsigned se) 41bf215546Sopenharmony_ci{ 42bf215546Sopenharmony_ci unsigned max_se = rad_info->max_se; 43bf215546Sopenharmony_ci uint64_t data_offset; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci data_offset = align64(sizeof(struct ac_thread_trace_info) * max_se, 46bf215546Sopenharmony_ci 1 << SQTT_BUFFER_ALIGN_SHIFT); 47bf215546Sopenharmony_ci data_offset += data->buffer_size * se; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci return data_offset; 50bf215546Sopenharmony_ci} 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ciuint64_t 53bf215546Sopenharmony_ciac_thread_trace_get_info_va(uint64_t va, unsigned se) 54bf215546Sopenharmony_ci{ 55bf215546Sopenharmony_ci return va + ac_thread_trace_get_info_offset(se); 56bf215546Sopenharmony_ci} 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ciuint64_t 59bf215546Sopenharmony_ciac_thread_trace_get_data_va(const struct radeon_info *rad_info, 60bf215546Sopenharmony_ci const struct ac_thread_trace_data *data, uint64_t va, unsigned se) 61bf215546Sopenharmony_ci{ 62bf215546Sopenharmony_ci return va + ac_thread_trace_get_data_offset(rad_info, data, se); 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_cibool 66bf215546Sopenharmony_ciac_is_thread_trace_complete(struct radeon_info *rad_info, 67bf215546Sopenharmony_ci const struct ac_thread_trace_data *data, 68bf215546Sopenharmony_ci const struct ac_thread_trace_info *info) 69bf215546Sopenharmony_ci{ 70bf215546Sopenharmony_ci if (rad_info->gfx_level >= GFX10) { 71bf215546Sopenharmony_ci /* GFX10 doesn't have THREAD_TRACE_CNTR but it reports the number of 72bf215546Sopenharmony_ci * dropped bytes per SE via THREAD_TRACE_DROPPED_CNTR. Though, this 73bf215546Sopenharmony_ci * doesn't seem reliable because it might still report non-zero even if 74bf215546Sopenharmony_ci * the SQTT buffer isn't full. 75bf215546Sopenharmony_ci * 76bf215546Sopenharmony_ci * The solution here is to compare the number of bytes written by the hw 77bf215546Sopenharmony_ci * (in units of 32 bytes) to the SQTT buffer size. If it's equal, that 78bf215546Sopenharmony_ci * means that the buffer is full and should be resized. 79bf215546Sopenharmony_ci */ 80bf215546Sopenharmony_ci return !(info->cur_offset * 32 == data->buffer_size - 32); 81bf215546Sopenharmony_ci } 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci /* Otherwise, compare the current thread trace offset with the number 84bf215546Sopenharmony_ci * of written bytes. 85bf215546Sopenharmony_ci */ 86bf215546Sopenharmony_ci return info->cur_offset == info->gfx9_write_counter; 87bf215546Sopenharmony_ci} 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ciuint32_t 90bf215546Sopenharmony_ciac_get_expected_buffer_size(struct radeon_info *rad_info, 91bf215546Sopenharmony_ci const struct ac_thread_trace_info *info) 92bf215546Sopenharmony_ci{ 93bf215546Sopenharmony_ci if (rad_info->gfx_level >= GFX10) { 94bf215546Sopenharmony_ci uint32_t dropped_cntr_per_se = info->gfx10_dropped_cntr / rad_info->max_se; 95bf215546Sopenharmony_ci return ((info->cur_offset * 32) + dropped_cntr_per_se) / 1024; 96bf215546Sopenharmony_ci } 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci return (info->gfx9_write_counter * 32) / 1024; 99bf215546Sopenharmony_ci} 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_cibool 102bf215546Sopenharmony_ciac_sqtt_add_pso_correlation(struct ac_thread_trace_data *thread_trace_data, 103bf215546Sopenharmony_ci uint64_t pipeline_hash) 104bf215546Sopenharmony_ci{ 105bf215546Sopenharmony_ci struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation; 106bf215546Sopenharmony_ci struct rgp_pso_correlation_record *record; 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci record = malloc(sizeof(struct rgp_pso_correlation_record)); 109bf215546Sopenharmony_ci if (!record) 110bf215546Sopenharmony_ci return false; 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci record->api_pso_hash = pipeline_hash; 113bf215546Sopenharmony_ci record->pipeline_hash[0] = pipeline_hash; 114bf215546Sopenharmony_ci record->pipeline_hash[1] = pipeline_hash; 115bf215546Sopenharmony_ci memset(record->api_level_obj_name, 0, sizeof(record->api_level_obj_name)); 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci simple_mtx_lock(&pso_correlation->lock); 118bf215546Sopenharmony_ci list_addtail(&record->list, &pso_correlation->record); 119bf215546Sopenharmony_ci pso_correlation->record_count++; 120bf215546Sopenharmony_ci simple_mtx_unlock(&pso_correlation->lock); 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci return true; 123bf215546Sopenharmony_ci} 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_cibool 126bf215546Sopenharmony_ciac_sqtt_add_code_object_loader_event(struct ac_thread_trace_data *thread_trace_data, 127bf215546Sopenharmony_ci uint64_t pipeline_hash, 128bf215546Sopenharmony_ci uint64_t base_address) 129bf215546Sopenharmony_ci{ 130bf215546Sopenharmony_ci struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events; 131bf215546Sopenharmony_ci struct rgp_loader_events_record *record; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci record = malloc(sizeof(struct rgp_loader_events_record)); 134bf215546Sopenharmony_ci if (!record) 135bf215546Sopenharmony_ci return false; 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci record->loader_event_type = RGP_LOAD_TO_GPU_MEMORY; 138bf215546Sopenharmony_ci record->reserved = 0; 139bf215546Sopenharmony_ci record->base_address = base_address & 0xffffffffffff; 140bf215546Sopenharmony_ci record->code_object_hash[0] = pipeline_hash; 141bf215546Sopenharmony_ci record->code_object_hash[1] = pipeline_hash; 142bf215546Sopenharmony_ci record->time_stamp = os_time_get_nano(); 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci simple_mtx_lock(&loader_events->lock); 145bf215546Sopenharmony_ci list_addtail(&record->list, &loader_events->record); 146bf215546Sopenharmony_ci loader_events->record_count++; 147bf215546Sopenharmony_ci simple_mtx_unlock(&loader_events->lock); 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci return true; 150bf215546Sopenharmony_ci} 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci/* See https://gitlab.freedesktop.org/mesa/mesa/-/issues/5260 153bf215546Sopenharmony_ci * On some HW SQTT can hang if we're not in one of the profiling pstates. */ 154bf215546Sopenharmony_cibool 155bf215546Sopenharmony_ciac_check_profile_state(const struct radeon_info *info) 156bf215546Sopenharmony_ci{ 157bf215546Sopenharmony_ci char path[128]; 158bf215546Sopenharmony_ci char data[128]; 159bf215546Sopenharmony_ci int n; 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci snprintf(path, sizeof(path), 162bf215546Sopenharmony_ci "/sys/bus/pci/devices/%04x:%02x:%02x.%x/power_dpm_force_performance_level", 163bf215546Sopenharmony_ci info->pci_domain, info->pci_bus, info->pci_dev, info->pci_func); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci FILE *f = fopen(path, "r"); 166bf215546Sopenharmony_ci if (!f) 167bf215546Sopenharmony_ci return false; /* Unknown but optimistic. */ 168bf215546Sopenharmony_ci n = fread(data, 1, sizeof(data) - 1, f); 169bf215546Sopenharmony_ci fclose(f); 170bf215546Sopenharmony_ci data[n] = 0; 171bf215546Sopenharmony_ci return strstr(data, "profile") == NULL; 172bf215546Sopenharmony_ci} 173