1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2014 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "ac_binary.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "ac_gpu_info.h" 27bf215546Sopenharmony_ci#include "util/u_math.h" 28bf215546Sopenharmony_ci#include "util/u_memory.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#ifndef _WIN32 31bf215546Sopenharmony_ci#include <gelf.h> 32bf215546Sopenharmony_ci#include <libelf.h> 33bf215546Sopenharmony_ci#endif 34bf215546Sopenharmony_ci#include <sid.h> 35bf215546Sopenharmony_ci#include <stdio.h> 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci#define SPILLED_SGPRS 0x4 38bf215546Sopenharmony_ci#define SPILLED_VGPRS 0x8 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci/* Parse configuration data in .AMDGPU.config section format. */ 41bf215546Sopenharmony_civoid ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size, 42bf215546Sopenharmony_ci const struct radeon_info *info, struct ac_shader_config *conf) 43bf215546Sopenharmony_ci{ 44bf215546Sopenharmony_ci for (size_t i = 0; i < nbytes; i += 8) { 45bf215546Sopenharmony_ci unsigned reg = util_le32_to_cpu(*(uint32_t *)(data + i)); 46bf215546Sopenharmony_ci unsigned value = util_le32_to_cpu(*(uint32_t *)(data + i + 4)); 47bf215546Sopenharmony_ci switch (reg) { 48bf215546Sopenharmony_ci case R_00B028_SPI_SHADER_PGM_RSRC1_PS: 49bf215546Sopenharmony_ci case R_00B128_SPI_SHADER_PGM_RSRC1_VS: 50bf215546Sopenharmony_ci case R_00B228_SPI_SHADER_PGM_RSRC1_GS: 51bf215546Sopenharmony_ci case R_00B848_COMPUTE_PGM_RSRC1: 52bf215546Sopenharmony_ci case R_00B428_SPI_SHADER_PGM_RSRC1_HS: 53bf215546Sopenharmony_ci if (wave_size == 32 || info->wave64_vgpr_alloc_granularity == 8) 54bf215546Sopenharmony_ci conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8); 55bf215546Sopenharmony_ci else 56bf215546Sopenharmony_ci conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); 59bf215546Sopenharmony_ci /* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */ 60bf215546Sopenharmony_ci conf->float_mode = G_00B028_FLOAT_MODE(value); 61bf215546Sopenharmony_ci conf->rsrc1 = value; 62bf215546Sopenharmony_ci break; 63bf215546Sopenharmony_ci case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: 64bf215546Sopenharmony_ci conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); 65bf215546Sopenharmony_ci /* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */ 66bf215546Sopenharmony_ci conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value); 67bf215546Sopenharmony_ci conf->rsrc2 = value; 68bf215546Sopenharmony_ci break; 69bf215546Sopenharmony_ci case R_00B12C_SPI_SHADER_PGM_RSRC2_VS: 70bf215546Sopenharmony_ci conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value); 71bf215546Sopenharmony_ci conf->rsrc2 = value; 72bf215546Sopenharmony_ci break; 73bf215546Sopenharmony_ci case R_00B22C_SPI_SHADER_PGM_RSRC2_GS: 74bf215546Sopenharmony_ci conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value); 75bf215546Sopenharmony_ci conf->rsrc2 = value; 76bf215546Sopenharmony_ci break; 77bf215546Sopenharmony_ci case R_00B42C_SPI_SHADER_PGM_RSRC2_HS: 78bf215546Sopenharmony_ci conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value); 79bf215546Sopenharmony_ci conf->rsrc2 = value; 80bf215546Sopenharmony_ci break; 81bf215546Sopenharmony_ci case R_00B84C_COMPUTE_PGM_RSRC2: 82bf215546Sopenharmony_ci conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value)); 83bf215546Sopenharmony_ci conf->rsrc2 = value; 84bf215546Sopenharmony_ci break; 85bf215546Sopenharmony_ci case R_00B8A0_COMPUTE_PGM_RSRC3: 86bf215546Sopenharmony_ci conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value); 87bf215546Sopenharmony_ci conf->rsrc3 = value; 88bf215546Sopenharmony_ci break; 89bf215546Sopenharmony_ci case R_0286CC_SPI_PS_INPUT_ENA: 90bf215546Sopenharmony_ci conf->spi_ps_input_ena = value; 91bf215546Sopenharmony_ci break; 92bf215546Sopenharmony_ci case R_0286D0_SPI_PS_INPUT_ADDR: 93bf215546Sopenharmony_ci conf->spi_ps_input_addr = value; 94bf215546Sopenharmony_ci break; 95bf215546Sopenharmony_ci case R_0286E8_SPI_TMPRING_SIZE: 96bf215546Sopenharmony_ci case R_00B860_COMPUTE_TMPRING_SIZE: 97bf215546Sopenharmony_ci if (info->gfx_level >= GFX11) 98bf215546Sopenharmony_ci conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 256; 99bf215546Sopenharmony_ci else 100bf215546Sopenharmony_ci conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(value) * 1024; 101bf215546Sopenharmony_ci break; 102bf215546Sopenharmony_ci case SPILLED_SGPRS: 103bf215546Sopenharmony_ci conf->spilled_sgprs = value; 104bf215546Sopenharmony_ci break; 105bf215546Sopenharmony_ci case SPILLED_VGPRS: 106bf215546Sopenharmony_ci conf->spilled_vgprs = value; 107bf215546Sopenharmony_ci break; 108bf215546Sopenharmony_ci default: { 109bf215546Sopenharmony_ci static bool printed; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci if (!printed) { 112bf215546Sopenharmony_ci fprintf(stderr, 113bf215546Sopenharmony_ci "Warning: LLVM emitted unknown " 114bf215546Sopenharmony_ci "config register: 0x%x\n", 115bf215546Sopenharmony_ci reg); 116bf215546Sopenharmony_ci printed = true; 117bf215546Sopenharmony_ci } 118bf215546Sopenharmony_ci } break; 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci if (!conf->spi_ps_input_addr) 123bf215546Sopenharmony_ci conf->spi_ps_input_addr = conf->spi_ps_input_ena; 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci /* GFX 10.3 internally: 126bf215546Sopenharmony_ci * - aligns VGPRS to 16 for Wave32 and 8 for Wave64 127bf215546Sopenharmony_ci * - aligns LDS to 1024 128bf215546Sopenharmony_ci * 129bf215546Sopenharmony_ci * For shader-db stats, set num_vgprs that the hw actually uses. 130bf215546Sopenharmony_ci */ 131bf215546Sopenharmony_ci if (info->gfx_level == GFX10_3) { 132bf215546Sopenharmony_ci conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8); 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci /* Enable 64-bit and 16-bit denormals, because there is no performance 136bf215546Sopenharmony_ci * cost. 137bf215546Sopenharmony_ci * 138bf215546Sopenharmony_ci * Don't enable denormals for 32-bit floats, because: 139bf215546Sopenharmony_ci * - denormals disable output modifiers 140bf215546Sopenharmony_ci * - denormals break v_mad_f32 141bf215546Sopenharmony_ci * - GFX6 & GFX7 would be very slow 142bf215546Sopenharmony_ci */ 143bf215546Sopenharmony_ci conf->float_mode &= ~V_00B028_FP_32_DENORMS; 144bf215546Sopenharmony_ci conf->float_mode |= V_00B028_FP_16_64_DENORMS; 145bf215546Sopenharmony_ci} 146