1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Red Hat. 3bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * based on si_state.c 6bf215546Sopenharmony_ci * Copyright © 2015 Advanced Micro Devices, Inc. 7bf215546Sopenharmony_ci * 8bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 9bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 10bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 11bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 13bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 16bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 17bf215546Sopenharmony_ci * Software. 18bf215546Sopenharmony_ci * 19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25bf215546Sopenharmony_ci * IN THE SOFTWARE. 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* command buffer handling for AMD GCN */ 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "radv_cs.h" 31bf215546Sopenharmony_ci#include "radv_private.h" 32bf215546Sopenharmony_ci#include "radv_shader.h" 33bf215546Sopenharmony_ci#include "sid.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cistatic void 36bf215546Sopenharmony_cisi_write_harvested_raster_configs(struct radv_physical_device *physical_device, 37bf215546Sopenharmony_ci struct radeon_cmdbuf *cs, unsigned raster_config, 38bf215546Sopenharmony_ci unsigned raster_config_1) 39bf215546Sopenharmony_ci{ 40bf215546Sopenharmony_ci unsigned num_se = MAX2(physical_device->rad_info.max_se, 1); 41bf215546Sopenharmony_ci unsigned raster_config_se[4]; 42bf215546Sopenharmony_ci unsigned se; 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci ac_get_harvested_configs(&physical_device->rad_info, raster_config, &raster_config_1, 45bf215546Sopenharmony_ci raster_config_se); 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci for (se = 0; se < num_se; se++) { 48bf215546Sopenharmony_ci /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */ 49bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level < GFX7) 50bf215546Sopenharmony_ci radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, 51bf215546Sopenharmony_ci S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) | 52bf215546Sopenharmony_ci S_00802C_INSTANCE_BROADCAST_WRITES(1)); 53bf215546Sopenharmony_ci else 54bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, 55bf215546Sopenharmony_ci S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | 56bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1)); 57bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); 58bf215546Sopenharmony_ci } 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */ 61bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level < GFX7) 62bf215546Sopenharmony_ci radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, 63bf215546Sopenharmony_ci S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) | 64bf215546Sopenharmony_ci S_00802C_INSTANCE_BROADCAST_WRITES(1)); 65bf215546Sopenharmony_ci else 66bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, 67bf215546Sopenharmony_ci S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 68bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1)); 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX7) 71bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 72bf215546Sopenharmony_ci} 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_civoid 75bf215546Sopenharmony_cisi_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) 76bf215546Sopenharmony_ci{ 77bf215546Sopenharmony_ci const struct radeon_info *info = &device->physical_device->rad_info; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); 80bf215546Sopenharmony_ci radeon_emit(cs, 0); 81bf215546Sopenharmony_ci radeon_emit(cs, 0); 82bf215546Sopenharmony_ci radeon_emit(cs, 0); 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, 85bf215546Sopenharmony_ci S_00B834_DATA(device->physical_device->rad_info.address32_hi >> 8)); 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2); 88bf215546Sopenharmony_ci /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1, 89bf215546Sopenharmony_ci * renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */ 90bf215546Sopenharmony_ci radeon_emit(cs, S_00B858_SH0_CU_EN(info->spi_cu_en) | S_00B858_SH1_CU_EN(info->spi_cu_en)); 91bf215546Sopenharmony_ci radeon_emit(cs, S_00B858_SH0_CU_EN(info->spi_cu_en) | S_00B858_SH1_CU_EN(info->spi_cu_en)); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX7) { 94bf215546Sopenharmony_ci /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */ 95bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); 96bf215546Sopenharmony_ci radeon_emit(cs, S_00B858_SH0_CU_EN(info->spi_cu_en) | S_00B858_SH1_CU_EN(info->spi_cu_en)); 97bf215546Sopenharmony_ci radeon_emit(cs, S_00B858_SH0_CU_EN(info->spi_cu_en) | S_00B858_SH1_CU_EN(info->spi_cu_en)); 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci if (device->border_color_data.bo) { 100bf215546Sopenharmony_ci uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2); 103bf215546Sopenharmony_ci radeon_emit(cs, bc_va >> 8); 104bf215546Sopenharmony_ci radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40)); 105bf215546Sopenharmony_ci } 106bf215546Sopenharmony_ci } 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX9 && 109bf215546Sopenharmony_ci device->physical_device->rad_info.gfx_level < GFX11) { 110bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, 111bf215546Sopenharmony_ci device->physical_device->rad_info.gfx_level >= GFX10 ? 0x20 : 0); 112bf215546Sopenharmony_ci } 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX10) { 115bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B890_COMPUTE_USER_ACCUM_0, 5); 116bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B890_COMPUTE_USER_ACCUM_0 */ 117bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */ 118bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B898_COMPUTE_USER_ACCUM_2 */ 119bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B89C_COMPUTE_USER_ACCUM_3 */ 120bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B8A0_COMPUTE_PGM_RSRC3 */ 121bf215546Sopenharmony_ci } 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID 124bf215546Sopenharmony_ci * and is now per pipe, so it should be handled in the 125bf215546Sopenharmony_ci * kernel if we want to use something other than the default value, 126bf215546Sopenharmony_ci * which is now 0x22f. 127bf215546Sopenharmony_ci */ 128bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level <= GFX6) { 129bf215546Sopenharmony_ci /* XXX: This should be: 130bf215546Sopenharmony_ci * (number of compute units) * 4 * (waves per simd) - 1 */ 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B82C_COMPUTE_MAX_WAVE_ID, 0x190 /* Default value */); 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci if (device->border_color_data.bo) { 135bf215546Sopenharmony_ci uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo); 136bf215546Sopenharmony_ci radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8); 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci } 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci if (device->tma_bo) { 141bf215546Sopenharmony_ci uint64_t tba_va, tma_va; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci assert(device->physical_device->rad_info.gfx_level == GFX8); 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci tba_va = radv_trap_handler_shader_get_va(device->trap_handler_shader); 146bf215546Sopenharmony_ci tma_va = radv_buffer_get_va(device->tma_bo); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4); 149bf215546Sopenharmony_ci radeon_emit(cs, tba_va >> 8); 150bf215546Sopenharmony_ci radeon_emit(cs, tba_va >> 40); 151bf215546Sopenharmony_ci radeon_emit(cs, tma_va >> 8); 152bf215546Sopenharmony_ci radeon_emit(cs, tma_va >> 40); 153bf215546Sopenharmony_ci } 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX11) { 156bf215546Sopenharmony_ci uint32_t spi_cu_en = device->physical_device->rad_info.spi_cu_en; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, 4); 159bf215546Sopenharmony_ci radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE4 */ 160bf215546Sopenharmony_ci radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE5 */ 161bf215546Sopenharmony_ci radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE6 */ 162bf215546Sopenharmony_ci radeon_emit(cs, S_00B8AC_SA0_CU_EN(spi_cu_en) | S_00B8AC_SA1_CU_EN(spi_cu_en)); /* SE7 */ 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE, 64); 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci} 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci/* 12.4 fixed-point */ 169bf215546Sopenharmony_cistatic unsigned 170bf215546Sopenharmony_ciradv_pack_float_12p4(float x) 171bf215546Sopenharmony_ci{ 172bf215546Sopenharmony_ci return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16; 173bf215546Sopenharmony_ci} 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_cistatic void 176bf215546Sopenharmony_cisi_set_raster_config(struct radv_physical_device *physical_device, struct radeon_cmdbuf *cs) 177bf215546Sopenharmony_ci{ 178bf215546Sopenharmony_ci unsigned num_rb = MIN2(physical_device->rad_info.max_render_backends, 16); 179bf215546Sopenharmony_ci unsigned rb_mask = physical_device->rad_info.enabled_rb_mask; 180bf215546Sopenharmony_ci unsigned raster_config, raster_config_1; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci ac_get_raster_config(&physical_device->rad_info, &raster_config, &raster_config_1, NULL); 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci /* Always use the default config when all backends are enabled 185bf215546Sopenharmony_ci * (or when we failed to determine the enabled backends). 186bf215546Sopenharmony_ci */ 187bf215546Sopenharmony_ci if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 188bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config); 189bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX7) 190bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 191bf215546Sopenharmony_ci } else { 192bf215546Sopenharmony_ci si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1); 193bf215546Sopenharmony_ci } 194bf215546Sopenharmony_ci} 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_civoid 197bf215546Sopenharmony_cisi_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) 198bf215546Sopenharmony_ci{ 199bf215546Sopenharmony_ci struct radv_physical_device *physical_device = device->physical_device; 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci bool has_clear_state = physical_device->rad_info.has_clear_state; 202bf215546Sopenharmony_ci int i; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 205bf215546Sopenharmony_ci radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1)); 206bf215546Sopenharmony_ci radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1)); 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci if (has_clear_state) { 209bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0)); 210bf215546Sopenharmony_ci radeon_emit(cs, 0); 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level <= GFX8) 214bf215546Sopenharmony_ci si_set_raster_config(physical_device, cs); 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 217bf215546Sopenharmony_ci if (!has_clear_state) 218bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci /* FIXME calculate these values somehow ??? */ 221bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level <= GFX8) { 222bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 223bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40); 224bf215546Sopenharmony_ci } 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci if (!has_clear_state) { 227bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level < GFX11) { 228bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2); 229bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 232bf215546Sopenharmony_ci } 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level <= GFX9) 235bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); 236bf215546Sopenharmony_ci if (!has_clear_state && physical_device->rad_info.gfx_level < GFX11) 237bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); 238bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level < GFX7) 239bf215546Sopenharmony_ci radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, 240bf215546Sopenharmony_ci S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci if (!has_clear_state) 243bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci /* CLEAR_STATE doesn't clear these correctly on certain generations. 246bf215546Sopenharmony_ci * I don't know why. Deduced by trial and error. 247bf215546Sopenharmony_ci */ 248bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level <= GFX7 || !has_clear_state) { 249bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 250bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, 251bf215546Sopenharmony_ci S_028204_WINDOW_OFFSET_DISABLE(1)); 252bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, 253bf215546Sopenharmony_ci S_028240_WINDOW_OFFSET_DISABLE(1)); 254bf215546Sopenharmony_ci radeon_set_context_reg( 255bf215546Sopenharmony_ci cs, R_028244_PA_SC_GENERIC_SCISSOR_BR, 256bf215546Sopenharmony_ci S_028244_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028244_BR_Y(MAX_FRAMEBUFFER_HEIGHT)); 257bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 258bf215546Sopenharmony_ci radeon_set_context_reg( 259bf215546Sopenharmony_ci cs, R_028034_PA_SC_SCREEN_SCISSOR_BR, 260bf215546Sopenharmony_ci S_028034_BR_X(MAX_FRAMEBUFFER_WIDTH) | S_028034_BR_Y(MAX_FRAMEBUFFER_HEIGHT)); 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci if (!has_clear_state) { 264bf215546Sopenharmony_ci for (i = 0; i < 16; i++) { 265bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i * 8, 0); 266bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i * 8, fui(1.0)); 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci if (!has_clear_state) { 271bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 272bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); 273bf215546Sopenharmony_ci /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on GFX6 */ 274bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 275bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0); 276bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 277bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 278bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 279bf215546Sopenharmony_ci } 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, 282bf215546Sopenharmony_ci S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 283bf215546Sopenharmony_ci S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX10) { 286bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0); 287bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030964_GE_MAX_VTX_INDX, ~0); 288bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030924_GE_MIN_VTX_INDX, 0); 289bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030928_GE_INDX_OFFSET, 0); 290bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_03097C_GE_STEREO_CNTL, 0); 291bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030988_GE_USER_VGPR_EN, 0); 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level < GFX11) { 294bf215546Sopenharmony_ci radeon_set_context_reg( 295bf215546Sopenharmony_ci cs, R_028038_DB_DFSM_CONTROL, 296bf215546Sopenharmony_ci S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | S_028038_POPS_DRAIN_PS_ON_OVERLAP(1)); 297bf215546Sopenharmony_ci } 298bf215546Sopenharmony_ci } else if (physical_device->rad_info.gfx_level == GFX9) { 299bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0); 300bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0); 301bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0); 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, 304bf215546Sopenharmony_ci S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | 305bf215546Sopenharmony_ci S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); 306bf215546Sopenharmony_ci } else { 307bf215546Sopenharmony_ci /* These registers, when written, also overwrite the 308bf215546Sopenharmony_ci * CLEAR_STATE context, so we can't rely on CLEAR_STATE setting 309bf215546Sopenharmony_ci * them. It would be an issue if there was another UMD 310bf215546Sopenharmony_ci * changing them. 311bf215546Sopenharmony_ci */ 312bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0); 313bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0); 314bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); 315bf215546Sopenharmony_ci } 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX10) { 318bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, 319bf215546Sopenharmony_ci S_00B524_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8)); 320bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, 321bf215546Sopenharmony_ci S_00B324_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8)); 322bf215546Sopenharmony_ci } else if (device->physical_device->rad_info.gfx_level == GFX9) { 323bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS, 324bf215546Sopenharmony_ci S_00B414_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8)); 325bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES, 326bf215546Sopenharmony_ci S_00B214_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8)); 327bf215546Sopenharmony_ci } else { 328bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, 329bf215546Sopenharmony_ci S_00B524_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8)); 330bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, 331bf215546Sopenharmony_ci S_00B324_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8)); 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level < GFX11) 335bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS, 336bf215546Sopenharmony_ci S_00B124_MEM_BASE(device->physical_device->rad_info.address32_hi >> 8)); 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci unsigned cu_mask_ps = 0xffffffff; 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci /* It's wasteful to enable all CUs for PS if shader arrays have a 341bf215546Sopenharmony_ci * different number of CUs. The reason is that the hardware sends the 342bf215546Sopenharmony_ci * same number of PS waves to each shader array, so the slowest shader 343bf215546Sopenharmony_ci * array limits the performance. Disable the extra CUs for PS in 344bf215546Sopenharmony_ci * other shader arrays to save power and thus increase clocks for busy 345bf215546Sopenharmony_ci * CUs. In the future, we might disable or enable this tweak only for 346bf215546Sopenharmony_ci * certain apps. 347bf215546Sopenharmony_ci */ 348bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX10_3) 349bf215546Sopenharmony_ci cu_mask_ps = u_bit_consecutive(0, physical_device->rad_info.min_good_cu_per_sa); 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX7) { 352bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX10 && 353bf215546Sopenharmony_ci physical_device->rad_info.gfx_level < GFX11) { 354bf215546Sopenharmony_ci /* Logical CUs 16 - 31 */ 355bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff), 356bf215546Sopenharmony_ci C_00B404_CU_EN, 16, &physical_device->rad_info, 357bf215546Sopenharmony_ci (void*)gfx10_set_sh_reg_idx3); 358bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff), 359bf215546Sopenharmony_ci C_00B104_CU_EN, 16, &physical_device->rad_info, 360bf215546Sopenharmony_ci (void*)gfx10_set_sh_reg_idx3); 361bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16), 362bf215546Sopenharmony_ci C_00B004_CU_EN, 16, &physical_device->rad_info, 363bf215546Sopenharmony_ci (void*)gfx10_set_sh_reg_idx3); 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX10) { 367bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 368bf215546Sopenharmony_ci S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), 369bf215546Sopenharmony_ci C_00B41C_CU_EN, 0, &physical_device->rad_info, 370bf215546Sopenharmony_ci (void*)gfx10_set_sh_reg_idx3); 371bf215546Sopenharmony_ci } else if (physical_device->rad_info.gfx_level == GFX9) { 372bf215546Sopenharmony_ci radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3, 373bf215546Sopenharmony_ci S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); 374bf215546Sopenharmony_ci } else { 375bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 376bf215546Sopenharmony_ci S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F)); 377bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F)); 378bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 379bf215546Sopenharmony_ci S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F)); 380bf215546Sopenharmony_ci /* If this is 0, Bonaire can hang even if GS isn't being used. 381bf215546Sopenharmony_ci * Other chips are unaffected. These are suboptimal values, 382bf215546Sopenharmony_ci * but we don't use on-chip GS. 383bf215546Sopenharmony_ci */ 384bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, 385bf215546Sopenharmony_ci S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4)); 386bf215546Sopenharmony_ci } 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX10) { 389bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 390bf215546Sopenharmony_ci S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F) | 391bf215546Sopenharmony_ci S_00B01C_LDS_GROUP_SIZE(physical_device->rad_info.gfx_level >= GFX11), 392bf215546Sopenharmony_ci C_00B01C_CU_EN, 0, &physical_device->rad_info, 393bf215546Sopenharmony_ci (void*)gfx10_set_sh_reg_idx3); 394bf215546Sopenharmony_ci } else { 395bf215546Sopenharmony_ci radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3, 396bf215546Sopenharmony_ci S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F)); 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci } 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX10) { 401bf215546Sopenharmony_ci /* Break up a pixel wave if it contains deallocs for more than 402bf215546Sopenharmony_ci * half the parameter cache. 403bf215546Sopenharmony_ci * 404bf215546Sopenharmony_ci * To avoid a deadlock where pixel waves aren't launched 405bf215546Sopenharmony_ci * because they're waiting for more pixels while the frontend 406bf215546Sopenharmony_ci * is stuck waiting for PC space, the maximum allowed value is 407bf215546Sopenharmony_ci * the size of the PC minus the largest possible allocation for 408bf215546Sopenharmony_ci * a single primitive shader subgroup. 409bf215546Sopenharmony_ci */ 410bf215546Sopenharmony_ci uint32_t max_deallocs_in_wave = physical_device->rad_info.gfx_level >= GFX11 ? 16 : 512; 411bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, 412bf215546Sopenharmony_ci S_028C50_MAX_DEALLOCS_IN_WAVE(max_deallocs_in_wave)); 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level < GFX11) 415bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci /* Vulkan doesn't support user edge flags and it also doesn't 418bf215546Sopenharmony_ci * need to prevent drawing lines on internal edges of 419bf215546Sopenharmony_ci * decomposed primitives (such as quads) with polygon mode = lines. 420bf215546Sopenharmony_ci */ 421bf215546Sopenharmony_ci unsigned vertex_reuse_depth = physical_device->rad_info.gfx_level >= GFX10_3 ? 30 : 0; 422bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028838_PA_CL_NGG_CNTL, 423bf215546Sopenharmony_ci S_028838_INDEX_BUF_EDGE_FLAG_ENA(0) | 424bf215546Sopenharmony_ci S_028838_VERTEX_REUSE_DEPTH(vertex_reuse_depth)); 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_ci /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */ 427bf215546Sopenharmony_ci unsigned meta_write_policy, meta_read_policy; 428bf215546Sopenharmony_ci unsigned no_alloc = device->physical_device->rad_info.gfx_level >= GFX11 429bf215546Sopenharmony_ci ? V_02807C_CACHE_NOA_GFX11 430bf215546Sopenharmony_ci : V_02807C_CACHE_NOA_GFX10; 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci /* TODO: investigate whether LRU improves performance on other chips too */ 433bf215546Sopenharmony_ci if (physical_device->rad_info.max_render_backends <= 4) { 434bf215546Sopenharmony_ci meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */ 435bf215546Sopenharmony_ci meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */ 436bf215546Sopenharmony_ci } else { 437bf215546Sopenharmony_ci meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */ 438bf215546Sopenharmony_ci meta_read_policy = no_alloc; /* don't cache reads */ 439bf215546Sopenharmony_ci } 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci radeon_set_context_reg( 442bf215546Sopenharmony_ci cs, R_02807C_DB_RMI_L2_CACHE_CONTROL, 443bf215546Sopenharmony_ci S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) | 444bf215546Sopenharmony_ci S_02807C_HTILE_WR_POLICY(meta_write_policy) | 445bf215546Sopenharmony_ci S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_Z_RD_POLICY(no_alloc) | 446bf215546Sopenharmony_ci S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy)); 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci uint32_t gl2_cc; 449bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX11) { 450bf215546Sopenharmony_ci gl2_cc = S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) | 451bf215546Sopenharmony_ci S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) | 452bf215546Sopenharmony_ci S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11); 453bf215546Sopenharmony_ci } else { 454bf215546Sopenharmony_ci gl2_cc = S_028410_CMASK_WR_POLICY(meta_write_policy) | 455bf215546Sopenharmony_ci S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) | 456bf215546Sopenharmony_ci S_028410_DCC_WR_POLICY_GFX10(meta_write_policy) | 457bf215546Sopenharmony_ci S_028410_COLOR_WR_POLICY_GFX10(V_028410_CACHE_STREAM) | 458bf215546Sopenharmony_ci S_028410_CMASK_RD_POLICY(meta_read_policy) | 459bf215546Sopenharmony_ci S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA_GFX10) | 460bf215546Sopenharmony_ci S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX10); 461bf215546Sopenharmony_ci } 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL, 464bf215546Sopenharmony_ci gl2_cc | S_028410_DCC_RD_POLICY(meta_read_policy)); 465bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 4); 468bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0 */ 469bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1 */ 470bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2 */ 471bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3 */ 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level < GFX11) { 474bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 4); 475bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0 */ 476bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1 */ 477bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2 */ 478bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3 */ 479bf215546Sopenharmony_ci } 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 4); 482bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0 */ 483bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1 */ 484bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2 */ 485bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3 */ 486bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 4); 487bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0 */ 488bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1 */ 489bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2 */ 490bf215546Sopenharmony_ci radeon_emit(cs, 0); /* R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3 */ 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, 493bf215546Sopenharmony_ci S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level < GFX11) 496bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX10_3) { 499bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); 500bf215546Sopenharmony_ci /* This allows sample shading. */ 501bf215546Sopenharmony_ci radeon_set_context_reg( 502bf215546Sopenharmony_ci cs, R_028848_PA_CL_VRS_CNTL, 503bf215546Sopenharmony_ci S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE)); 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci } 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX9) { 508bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, 509bf215546Sopenharmony_ci S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | 510bf215546Sopenharmony_ci S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT_GFX9(24) | 511bf215546Sopenharmony_ci S_028B50_TRAP_SPLIT(6)); 512bf215546Sopenharmony_ci } else if (physical_device->rad_info.gfx_level >= GFX8) { 513bf215546Sopenharmony_ci uint32_t vgt_tess_distribution; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | 516bf215546Sopenharmony_ci S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT_GFX81(16); 517bf215546Sopenharmony_ci 518bf215546Sopenharmony_ci if (physical_device->rad_info.family == CHIP_FIJI || 519bf215546Sopenharmony_ci physical_device->rad_info.family >= CHIP_POLARIS10) 520bf215546Sopenharmony_ci vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 523bf215546Sopenharmony_ci } else if (!has_clear_state) { 524bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 525bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 526bf215546Sopenharmony_ci } 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci if (device->border_color_data.bo) { 529bf215546Sopenharmony_ci uint64_t border_color_va = radv_buffer_get_va(device->border_color_data.bo); 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 532bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX7) { 533bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI, 534bf215546Sopenharmony_ci S_028084_ADDRESS(border_color_va >> 40)); 535bf215546Sopenharmony_ci } 536bf215546Sopenharmony_ci } 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX9) { 539bf215546Sopenharmony_ci radeon_set_context_reg( 540bf215546Sopenharmony_ci cs, R_028C48_PA_SC_BINNER_CNTL_1, 541bf215546Sopenharmony_ci S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) | 542bf215546Sopenharmony_ci S_028C48_MAX_PRIM_PER_BATCH(1023)); 543bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 544bf215546Sopenharmony_ci S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); 545bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0); 546bf215546Sopenharmony_ci } 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci unsigned tmp = (unsigned)(1.0 * 8.0); 549bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A00_PA_SU_POINT_SIZE, 550bf215546Sopenharmony_ci S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 551bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028A04_PA_SU_POINT_MINMAX, 552bf215546Sopenharmony_ci S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) | 553bf215546Sopenharmony_ci S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875 / 2))); 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci if (!has_clear_state) { 556bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028004_DB_COUNT_CONTROL, S_028004_ZPASS_INCREMENT_DISABLE(1)); 557bf215546Sopenharmony_ci } 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci /* Enable the Polaris small primitive filter control. 560bf215546Sopenharmony_ci * XXX: There is possibly an issue when MSAA is off (see RadeonSI 561bf215546Sopenharmony_ci * has_msaa_sample_loc_bug). But this doesn't seem to regress anything, 562bf215546Sopenharmony_ci * and AMDVLK doesn't have a workaround as well. 563bf215546Sopenharmony_ci */ 564bf215546Sopenharmony_ci if (physical_device->rad_info.family >= CHIP_POLARIS10) { 565bf215546Sopenharmony_ci unsigned small_prim_filter_cntl = 566bf215546Sopenharmony_ci S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 567bf215546Sopenharmony_ci /* Workaround for a hw line bug. */ 568bf215546Sopenharmony_ci S_028830_LINE_FILTER_DISABLE(physical_device->rad_info.family <= CHIP_POLARIS12); 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl); 571bf215546Sopenharmony_ci } 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci radeon_set_context_reg( 574bf215546Sopenharmony_ci cs, R_0286D4_SPI_INTERP_CONTROL_0, 575bf215546Sopenharmony_ci S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) | 576bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 577bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 578bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 579bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 580bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */ 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL, 583bf215546Sopenharmony_ci S_028BE4_PIX_CENTER(1) | S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) | 584bf215546Sopenharmony_ci S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL, 587bf215546Sopenharmony_ci S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | 588bf215546Sopenharmony_ci S_028818_VPORT_X_OFFSET_ENA(1) | S_028818_VPORT_Y_SCALE_ENA(1) | 589bf215546Sopenharmony_ci S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) | 590bf215546Sopenharmony_ci S_028818_VPORT_Z_OFFSET_ENA(1)); 591bf215546Sopenharmony_ci 592bf215546Sopenharmony_ci if (device->tma_bo) { 593bf215546Sopenharmony_ci uint64_t tba_va, tma_va; 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci assert(device->physical_device->rad_info.gfx_level == GFX8); 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_ci tba_va = radv_trap_handler_shader_get_va(device->trap_handler_shader); 598bf215546Sopenharmony_ci tma_va = radv_buffer_get_va(device->tma_bo); 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, R_00B100_SPI_SHADER_TBA_LO_VS, 601bf215546Sopenharmony_ci R_00B200_SPI_SHADER_TBA_LO_GS, R_00B300_SPI_SHADER_TBA_LO_ES, 602bf215546Sopenharmony_ci R_00B400_SPI_SHADER_TBA_LO_HS, R_00B500_SPI_SHADER_TBA_LO_LS}; 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(regs); ++i) { 605bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, regs[i], 4); 606bf215546Sopenharmony_ci radeon_emit(cs, tba_va >> 8); 607bf215546Sopenharmony_ci radeon_emit(cs, tba_va >> 40); 608bf215546Sopenharmony_ci radeon_emit(cs, tma_va >> 8); 609bf215546Sopenharmony_ci radeon_emit(cs, tma_va >> 40); 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci } 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci /* The DX10 diamond test is unnecessary with Vulkan and it decreases line rasterization 614bf215546Sopenharmony_ci * performance. 615bf215546Sopenharmony_ci */ 616bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028BDC_PA_SC_LINE_CNTL, 0); 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci if (physical_device->rad_info.gfx_level >= GFX11) { 619bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C54_PA_SC_BINNER_CNTL_2, 0); 620bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028620_PA_RATE_CNTL, 621bf215546Sopenharmony_ci S_028620_VERTEX_RATE(2) | S_028620_PRIM_RATE(1)); 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123); 624bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, R_031114_SPI_GS_THROTTLE_CNTL2, 0x1544D); 625bf215546Sopenharmony_ci } 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci si_emit_compute(device, cs); 628bf215546Sopenharmony_ci} 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_civoid 631bf215546Sopenharmony_cicik_create_gfx_config(struct radv_device *device) 632bf215546Sopenharmony_ci{ 633bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, AMD_IP_GFX); 634bf215546Sopenharmony_ci if (!cs) 635bf215546Sopenharmony_ci return; 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_ci si_emit_graphics(device, cs); 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci while (cs->cdw & 7) { 640bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_ib_pad_with_type2) 641bf215546Sopenharmony_ci radeon_emit(cs, PKT2_NOP_PAD); 642bf215546Sopenharmony_ci else 643bf215546Sopenharmony_ci radeon_emit(cs, PKT3_NOP_PAD); 644bf215546Sopenharmony_ci } 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_ci VkResult result = 647bf215546Sopenharmony_ci device->ws->buffer_create(device->ws, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws), 648bf215546Sopenharmony_ci RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | 649bf215546Sopenharmony_ci RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC, 650bf215546Sopenharmony_ci RADV_BO_PRIORITY_CS, 0, &device->gfx_init); 651bf215546Sopenharmony_ci if (result != VK_SUCCESS) 652bf215546Sopenharmony_ci goto fail; 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci void *map = device->ws->buffer_map(device->gfx_init); 655bf215546Sopenharmony_ci if (!map) { 656bf215546Sopenharmony_ci device->ws->buffer_destroy(device->ws, device->gfx_init); 657bf215546Sopenharmony_ci device->gfx_init = NULL; 658bf215546Sopenharmony_ci goto fail; 659bf215546Sopenharmony_ci } 660bf215546Sopenharmony_ci memcpy(map, cs->buf, cs->cdw * 4); 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci device->ws->buffer_unmap(device->gfx_init); 663bf215546Sopenharmony_ci device->gfx_init_size_dw = cs->cdw; 664bf215546Sopenharmony_cifail: 665bf215546Sopenharmony_ci device->ws->cs_destroy(cs); 666bf215546Sopenharmony_ci} 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_civoid 669bf215546Sopenharmony_ciradv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]) 670bf215546Sopenharmony_ci{ 671bf215546Sopenharmony_ci float x = viewport->x; 672bf215546Sopenharmony_ci float y = viewport->y; 673bf215546Sopenharmony_ci float half_width = 0.5f * viewport->width; 674bf215546Sopenharmony_ci float half_height = 0.5f * viewport->height; 675bf215546Sopenharmony_ci double n = viewport->minDepth; 676bf215546Sopenharmony_ci double f = viewport->maxDepth; 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci scale[0] = half_width; 679bf215546Sopenharmony_ci translate[0] = half_width + x; 680bf215546Sopenharmony_ci scale[1] = half_height; 681bf215546Sopenharmony_ci translate[1] = half_height + y; 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci scale[2] = (f - n); 684bf215546Sopenharmony_ci translate[2] = n; 685bf215546Sopenharmony_ci} 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_cistatic VkRect2D 688bf215546Sopenharmony_cisi_scissor_from_viewport(const VkViewport *viewport) 689bf215546Sopenharmony_ci{ 690bf215546Sopenharmony_ci float scale[3], translate[3]; 691bf215546Sopenharmony_ci VkRect2D rect; 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci radv_get_viewport_xform(viewport, scale, translate); 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_ci rect.offset.x = translate[0] - fabsf(scale[0]); 696bf215546Sopenharmony_ci rect.offset.y = translate[1] - fabsf(scale[1]); 697bf215546Sopenharmony_ci rect.extent.width = ceilf(translate[0] + fabsf(scale[0])) - rect.offset.x; 698bf215546Sopenharmony_ci rect.extent.height = ceilf(translate[1] + fabsf(scale[1])) - rect.offset.y; 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci return rect; 701bf215546Sopenharmony_ci} 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_cistatic VkRect2D 704bf215546Sopenharmony_cisi_intersect_scissor(const VkRect2D *a, const VkRect2D *b) 705bf215546Sopenharmony_ci{ 706bf215546Sopenharmony_ci VkRect2D ret; 707bf215546Sopenharmony_ci ret.offset.x = MAX2(a->offset.x, b->offset.x); 708bf215546Sopenharmony_ci ret.offset.y = MAX2(a->offset.y, b->offset.y); 709bf215546Sopenharmony_ci ret.extent.width = 710bf215546Sopenharmony_ci MIN2(a->offset.x + a->extent.width, b->offset.x + b->extent.width) - ret.offset.x; 711bf215546Sopenharmony_ci ret.extent.height = 712bf215546Sopenharmony_ci MIN2(a->offset.y + a->extent.height, b->offset.y + b->extent.height) - ret.offset.y; 713bf215546Sopenharmony_ci return ret; 714bf215546Sopenharmony_ci} 715bf215546Sopenharmony_ci 716bf215546Sopenharmony_civoid 717bf215546Sopenharmony_cisi_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors, 718bf215546Sopenharmony_ci const VkViewport *viewports, unsigned rast_prim, float line_width) 719bf215546Sopenharmony_ci{ 720bf215546Sopenharmony_ci int i; 721bf215546Sopenharmony_ci float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY; 722bf215546Sopenharmony_ci float discard_x = 1.0f, discard_y = 1.0f; 723bf215546Sopenharmony_ci const float max_range = 32767.0f; 724bf215546Sopenharmony_ci if (!count) 725bf215546Sopenharmony_ci return; 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2); 728bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 729bf215546Sopenharmony_ci VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i); 730bf215546Sopenharmony_ci VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor); 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci radv_get_viewport_xform(viewports + i, scale, translate); 733bf215546Sopenharmony_ci scale[0] = fabsf(scale[0]); 734bf215546Sopenharmony_ci scale[1] = fabsf(scale[1]); 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci if (scale[0] < 0.5) 737bf215546Sopenharmony_ci scale[0] = 0.5; 738bf215546Sopenharmony_ci if (scale[1] < 0.5) 739bf215546Sopenharmony_ci scale[1] = 0.5; 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci guardband_x = MIN2(guardband_x, (max_range - fabsf(translate[0])) / scale[0]); 742bf215546Sopenharmony_ci guardband_y = MIN2(guardband_y, (max_range - fabsf(translate[1])) / scale[1]); 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci radeon_emit(cs, S_028250_TL_X(scissor.offset.x) | S_028250_TL_Y(scissor.offset.y) | 745bf215546Sopenharmony_ci S_028250_WINDOW_OFFSET_DISABLE(1)); 746bf215546Sopenharmony_ci radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) | 747bf215546Sopenharmony_ci S_028254_BR_Y(scissor.offset.y + scissor.extent.height)); 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci if (radv_rast_prim_is_points_or_lines(rast_prim)) { 750bf215546Sopenharmony_ci /* When rendering wide points or lines, we need to be more conservative about when to 751bf215546Sopenharmony_ci * discard them entirely. */ 752bf215546Sopenharmony_ci float pixels; 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci if (rast_prim == V_028A6C_POINTLIST) { 755bf215546Sopenharmony_ci pixels = 8191.875f; 756bf215546Sopenharmony_ci } else { 757bf215546Sopenharmony_ci pixels = line_width; 758bf215546Sopenharmony_ci } 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci /* Add half the point size / line width. */ 761bf215546Sopenharmony_ci discard_x += pixels / (2.0 * scale[0]); 762bf215546Sopenharmony_ci discard_y += pixels / (2.0 * scale[1]); 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci /* Discard primitives that would lie entirely outside the clip region. */ 765bf215546Sopenharmony_ci discard_x = MIN2(discard_x, guardband_x); 766bf215546Sopenharmony_ci discard_y = MIN2(discard_y, guardband_y); 767bf215546Sopenharmony_ci } 768bf215546Sopenharmony_ci } 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4); 771bf215546Sopenharmony_ci radeon_emit(cs, fui(guardband_y)); 772bf215546Sopenharmony_ci radeon_emit(cs, fui(discard_y)); 773bf215546Sopenharmony_ci radeon_emit(cs, fui(guardband_x)); 774bf215546Sopenharmony_ci radeon_emit(cs, fui(discard_x)); 775bf215546Sopenharmony_ci} 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_cistatic inline unsigned 778bf215546Sopenharmony_ciradv_prims_for_vertices(struct radv_prim_vertex_count *info, unsigned num) 779bf215546Sopenharmony_ci{ 780bf215546Sopenharmony_ci if (num == 0) 781bf215546Sopenharmony_ci return 0; 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ci if (info->incr == 0) 784bf215546Sopenharmony_ci return 0; 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci if (num < info->min) 787bf215546Sopenharmony_ci return 0; 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci return 1 + ((num - info->min) / info->incr); 790bf215546Sopenharmony_ci} 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_cistatic const struct radv_prim_vertex_count prim_size_table[] = { 793bf215546Sopenharmony_ci [V_008958_DI_PT_NONE] = {0, 0}, [V_008958_DI_PT_POINTLIST] = {1, 1}, 794bf215546Sopenharmony_ci [V_008958_DI_PT_LINELIST] = {2, 2}, [V_008958_DI_PT_LINESTRIP] = {2, 1}, 795bf215546Sopenharmony_ci [V_008958_DI_PT_TRILIST] = {3, 3}, [V_008958_DI_PT_TRIFAN] = {3, 1}, 796bf215546Sopenharmony_ci [V_008958_DI_PT_TRISTRIP] = {3, 1}, [V_008958_DI_PT_LINELIST_ADJ] = {4, 4}, 797bf215546Sopenharmony_ci [V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1}, [V_008958_DI_PT_TRILIST_ADJ] = {6, 6}, 798bf215546Sopenharmony_ci [V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2}, [V_008958_DI_PT_RECTLIST] = {3, 3}, 799bf215546Sopenharmony_ci [V_008958_DI_PT_LINELOOP] = {2, 1}, [V_008958_DI_PT_POLYGON] = {3, 1}, 800bf215546Sopenharmony_ci [V_008958_DI_PT_2D_TRI_STRIP] = {0, 0}, 801bf215546Sopenharmony_ci}; 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ciuint32_t 804bf215546Sopenharmony_cisi_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, 805bf215546Sopenharmony_ci bool indirect_draw, bool count_from_stream_output, 806bf215546Sopenharmony_ci uint32_t draw_vertex_count, unsigned topology, bool prim_restart_enable) 807bf215546Sopenharmony_ci{ 808bf215546Sopenharmony_ci enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; 809bf215546Sopenharmony_ci enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family; 810bf215546Sopenharmony_ci struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; 811bf215546Sopenharmony_ci const unsigned max_primgroup_in_wave = 2; 812bf215546Sopenharmony_ci /* SWITCH_ON_EOP(0) is always preferable. */ 813bf215546Sopenharmony_ci bool wd_switch_on_eop = false; 814bf215546Sopenharmony_ci bool ia_switch_on_eop = false; 815bf215546Sopenharmony_ci bool ia_switch_on_eoi = false; 816bf215546Sopenharmony_ci bool partial_vs_wave = false; 817bf215546Sopenharmony_ci bool partial_es_wave = cmd_buffer->state.graphics_pipeline->ia_multi_vgt_param.partial_es_wave; 818bf215546Sopenharmony_ci bool multi_instances_smaller_than_primgroup; 819bf215546Sopenharmony_ci struct radv_prim_vertex_count prim_vertex_count = prim_size_table[topology]; 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci if (radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_TESS_CTRL)) { 822bf215546Sopenharmony_ci if (topology == V_008958_DI_PT_PATCH) { 823bf215546Sopenharmony_ci prim_vertex_count.min = cmd_buffer->state.graphics_pipeline->tess_patch_control_points; 824bf215546Sopenharmony_ci prim_vertex_count.incr = 1; 825bf215546Sopenharmony_ci } 826bf215546Sopenharmony_ci } 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci multi_instances_smaller_than_primgroup = indirect_draw; 829bf215546Sopenharmony_ci if (!multi_instances_smaller_than_primgroup && instanced_draw) { 830bf215546Sopenharmony_ci uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count); 831bf215546Sopenharmony_ci if (num_prims < cmd_buffer->state.graphics_pipeline->ia_multi_vgt_param.primgroup_size) 832bf215546Sopenharmony_ci multi_instances_smaller_than_primgroup = true; 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci ia_switch_on_eoi = cmd_buffer->state.graphics_pipeline->ia_multi_vgt_param.ia_switch_on_eoi; 836bf215546Sopenharmony_ci partial_vs_wave = cmd_buffer->state.graphics_pipeline->ia_multi_vgt_param.partial_vs_wave; 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci if (gfx_level >= GFX7) { 839bf215546Sopenharmony_ci /* WD_SWITCH_ON_EOP has no effect on GPUs with less than 840bf215546Sopenharmony_ci * 4 shader engines. Set 1 to pass the assertion below. 841bf215546Sopenharmony_ci * The other cases are hardware requirements. */ 842bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.max_se < 4 || 843bf215546Sopenharmony_ci topology == V_008958_DI_PT_POLYGON || topology == V_008958_DI_PT_LINELOOP || 844bf215546Sopenharmony_ci topology == V_008958_DI_PT_TRIFAN || topology == V_008958_DI_PT_TRISTRIP_ADJ || 845bf215546Sopenharmony_ci (prim_restart_enable && 846bf215546Sopenharmony_ci (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10 || 847bf215546Sopenharmony_ci (topology != V_008958_DI_PT_POINTLIST && topology != V_008958_DI_PT_LINESTRIP)))) 848bf215546Sopenharmony_ci wd_switch_on_eop = true; 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. 851bf215546Sopenharmony_ci * We don't know that for indirect drawing, so treat it as 852bf215546Sopenharmony_ci * always problematic. */ 853bf215546Sopenharmony_ci if (family == CHIP_HAWAII && (instanced_draw || indirect_draw)) 854bf215546Sopenharmony_ci wd_switch_on_eop = true; 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci /* Performance recommendation for 4 SE Gfx7-8 parts if 857bf215546Sopenharmony_ci * instances are smaller than a primgroup. 858bf215546Sopenharmony_ci * Assume indirect draws always use small instances. 859bf215546Sopenharmony_ci * This is needed for good VS wave utilization. 860bf215546Sopenharmony_ci */ 861bf215546Sopenharmony_ci if (gfx_level <= GFX8 && info->max_se == 4 && multi_instances_smaller_than_primgroup) 862bf215546Sopenharmony_ci wd_switch_on_eop = true; 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci /* Hardware requirement when drawing primitives from a stream 865bf215546Sopenharmony_ci * output buffer. 866bf215546Sopenharmony_ci */ 867bf215546Sopenharmony_ci if (count_from_stream_output) 868bf215546Sopenharmony_ci wd_switch_on_eop = true; 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci /* Required on GFX7 and later. */ 871bf215546Sopenharmony_ci if (info->max_se > 2 && !wd_switch_on_eop) 872bf215546Sopenharmony_ci ia_switch_on_eoi = true; 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci /* Required by Hawaii and, for some special cases, by GFX8. */ 875bf215546Sopenharmony_ci if (ia_switch_on_eoi && 876bf215546Sopenharmony_ci (family == CHIP_HAWAII || 877bf215546Sopenharmony_ci (gfx_level == GFX8 && 878bf215546Sopenharmony_ci /* max primgroup in wave is always 2 - leave this for documentation */ 879bf215546Sopenharmony_ci (radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_GEOMETRY) || max_primgroup_in_wave != 2)))) 880bf215546Sopenharmony_ci partial_vs_wave = true; 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci /* Instancing bug on Bonaire. */ 883bf215546Sopenharmony_ci if (family == CHIP_BONAIRE && ia_switch_on_eoi && (instanced_draw || indirect_draw)) 884bf215546Sopenharmony_ci partial_vs_wave = true; 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci /* If the WD switch is false, the IA switch must be false too. */ 887bf215546Sopenharmony_ci assert(wd_switch_on_eop || !ia_switch_on_eop); 888bf215546Sopenharmony_ci } 889bf215546Sopenharmony_ci /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ 890bf215546Sopenharmony_ci if (gfx_level <= GFX8 && ia_switch_on_eoi) 891bf215546Sopenharmony_ci partial_es_wave = true; 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci if (radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_GEOMETRY)) { 894bf215546Sopenharmony_ci /* GS hw bug with single-primitive instances and SWITCH_ON_EOI. 895bf215546Sopenharmony_ci * The hw doc says all multi-SE chips are affected, but amdgpu-pro Vulkan 896bf215546Sopenharmony_ci * only applies it to Hawaii. Do what amdgpu-pro Vulkan does. 897bf215546Sopenharmony_ci */ 898bf215546Sopenharmony_ci if (family == CHIP_HAWAII && ia_switch_on_eoi) { 899bf215546Sopenharmony_ci bool set_vgt_flush = indirect_draw; 900bf215546Sopenharmony_ci if (!set_vgt_flush && instanced_draw) { 901bf215546Sopenharmony_ci uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count); 902bf215546Sopenharmony_ci if (num_prims <= 1) 903bf215546Sopenharmony_ci set_vgt_flush = true; 904bf215546Sopenharmony_ci } 905bf215546Sopenharmony_ci if (set_vgt_flush) 906bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH; 907bf215546Sopenharmony_ci } 908bf215546Sopenharmony_ci } 909bf215546Sopenharmony_ci 910bf215546Sopenharmony_ci /* Workaround for a VGT hang when strip primitive types are used with 911bf215546Sopenharmony_ci * primitive restart. 912bf215546Sopenharmony_ci */ 913bf215546Sopenharmony_ci if (prim_restart_enable && 914bf215546Sopenharmony_ci (topology == V_008958_DI_PT_LINESTRIP || topology == V_008958_DI_PT_TRISTRIP || 915bf215546Sopenharmony_ci topology == V_008958_DI_PT_LINESTRIP_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) { 916bf215546Sopenharmony_ci partial_vs_wave = true; 917bf215546Sopenharmony_ci } 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci return cmd_buffer->state.graphics_pipeline->ia_multi_vgt_param.base | 920bf215546Sopenharmony_ci S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | 921bf215546Sopenharmony_ci S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | 922bf215546Sopenharmony_ci S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | 923bf215546Sopenharmony_ci S_028AA8_WD_SWITCH_ON_EOP(gfx_level >= GFX7 ? wd_switch_on_eop : 0); 924bf215546Sopenharmony_ci} 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_civoid 927bf215546Sopenharmony_cisi_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec, 928bf215546Sopenharmony_ci unsigned event, unsigned event_flags, unsigned dst_sel, 929bf215546Sopenharmony_ci unsigned data_sel, uint64_t va, uint32_t new_fence, 930bf215546Sopenharmony_ci uint64_t gfx9_eop_bug_va) 931bf215546Sopenharmony_ci{ 932bf215546Sopenharmony_ci unsigned op = EVENT_TYPE(event) | 933bf215546Sopenharmony_ci EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | 934bf215546Sopenharmony_ci event_flags; 935bf215546Sopenharmony_ci unsigned is_gfx8_mec = is_mec && gfx_level < GFX9; 936bf215546Sopenharmony_ci unsigned sel = EOP_DST_SEL(dst_sel) | EOP_DATA_SEL(data_sel); 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ci /* Wait for write confirmation before writing data, but don't send 939bf215546Sopenharmony_ci * an interrupt. */ 940bf215546Sopenharmony_ci if (data_sel != EOP_DATA_SEL_DISCARD) 941bf215546Sopenharmony_ci sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM); 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci if (gfx_level >= GFX9 || is_gfx8_mec) { 944bf215546Sopenharmony_ci /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion 945bf215546Sopenharmony_ci * counters) must immediately precede every timestamp event to 946bf215546Sopenharmony_ci * prevent a GPU hang on GFX9. 947bf215546Sopenharmony_ci */ 948bf215546Sopenharmony_ci if (gfx_level == GFX9 && !is_mec) { 949bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 950bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); 951bf215546Sopenharmony_ci radeon_emit(cs, gfx9_eop_bug_va); 952bf215546Sopenharmony_ci radeon_emit(cs, gfx9_eop_bug_va >> 32); 953bf215546Sopenharmony_ci } 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, false)); 956bf215546Sopenharmony_ci radeon_emit(cs, op); 957bf215546Sopenharmony_ci radeon_emit(cs, sel); 958bf215546Sopenharmony_ci radeon_emit(cs, va); /* address lo */ 959bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); /* address hi */ 960bf215546Sopenharmony_ci radeon_emit(cs, new_fence); /* immediate data lo */ 961bf215546Sopenharmony_ci radeon_emit(cs, 0); /* immediate data hi */ 962bf215546Sopenharmony_ci if (!is_gfx8_mec) 963bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 964bf215546Sopenharmony_ci } else { 965bf215546Sopenharmony_ci /* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS. 966bf215546Sopenharmony_ci * On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on 967bf215546Sopenharmony_ci * the graphics queue, and with RELEASE_MEM on the compute 968bf215546Sopenharmony_ci * queue. 969bf215546Sopenharmony_ci */ 970bf215546Sopenharmony_ci if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) { 971bf215546Sopenharmony_ci assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM && 972bf215546Sopenharmony_ci data_sel == EOP_DATA_SEL_VALUE_32BIT); 973bf215546Sopenharmony_ci 974bf215546Sopenharmony_ci if (is_mec) { 975bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, false)); 976bf215546Sopenharmony_ci radeon_emit(cs, op); 977bf215546Sopenharmony_ci radeon_emit(cs, sel); 978bf215546Sopenharmony_ci radeon_emit(cs, va); /* address lo */ 979bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); /* address hi */ 980bf215546Sopenharmony_ci radeon_emit(cs, new_fence); /* immediate data lo */ 981bf215546Sopenharmony_ci radeon_emit(cs, 0); /* immediate data hi */ 982bf215546Sopenharmony_ci } else { 983bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, false)); 984bf215546Sopenharmony_ci radeon_emit(cs, op); 985bf215546Sopenharmony_ci radeon_emit(cs, va); 986bf215546Sopenharmony_ci radeon_emit(cs, ((va >> 32) & 0xffff) | EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT)); 987bf215546Sopenharmony_ci radeon_emit(cs, new_fence); 988bf215546Sopenharmony_ci } 989bf215546Sopenharmony_ci } else { 990bf215546Sopenharmony_ci if (gfx_level == GFX7 || gfx_level == GFX8) { 991bf215546Sopenharmony_ci /* Two EOP events are required to make all 992bf215546Sopenharmony_ci * engines go idle (and optional cache flushes 993bf215546Sopenharmony_ci * executed) before the timestamp is written. 994bf215546Sopenharmony_ci */ 995bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false)); 996bf215546Sopenharmony_ci radeon_emit(cs, op); 997bf215546Sopenharmony_ci radeon_emit(cs, va); 998bf215546Sopenharmony_ci radeon_emit(cs, ((va >> 32) & 0xffff) | sel); 999bf215546Sopenharmony_ci radeon_emit(cs, 0); /* immediate data */ 1000bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 1001bf215546Sopenharmony_ci } 1002bf215546Sopenharmony_ci 1003bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false)); 1004bf215546Sopenharmony_ci radeon_emit(cs, op); 1005bf215546Sopenharmony_ci radeon_emit(cs, va); 1006bf215546Sopenharmony_ci radeon_emit(cs, ((va >> 32) & 0xffff) | sel); 1007bf215546Sopenharmony_ci radeon_emit(cs, new_fence); /* immediate data */ 1008bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 1009bf215546Sopenharmony_ci } 1010bf215546Sopenharmony_ci } 1011bf215546Sopenharmony_ci} 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_civoid 1014bf215546Sopenharmony_ciradv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask) 1015bf215546Sopenharmony_ci{ 1016bf215546Sopenharmony_ci assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || 1017bf215546Sopenharmony_ci op == WAIT_REG_MEM_GREATER_OR_EQUAL); 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false)); 1020bf215546Sopenharmony_ci radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1)); 1021bf215546Sopenharmony_ci radeon_emit(cs, va); 1022bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 1023bf215546Sopenharmony_ci radeon_emit(cs, ref); /* reference value */ 1024bf215546Sopenharmony_ci radeon_emit(cs, mask); /* mask */ 1025bf215546Sopenharmony_ci radeon_emit(cs, 4); /* poll interval */ 1026bf215546Sopenharmony_ci} 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_cistatic void 1029bf215546Sopenharmony_cisi_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl) 1030bf215546Sopenharmony_ci{ 1031bf215546Sopenharmony_ci if (is_mec || is_gfx9) { 1032bf215546Sopenharmony_ci uint32_t hi_val = is_gfx9 ? 0xffffff : 0xff; 1033bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, false) | PKT3_SHADER_TYPE_S(is_mec)); 1034bf215546Sopenharmony_ci radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ 1035bf215546Sopenharmony_ci radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ 1036bf215546Sopenharmony_ci radeon_emit(cs, hi_val); /* CP_COHER_SIZE_HI */ 1037bf215546Sopenharmony_ci radeon_emit(cs, 0); /* CP_COHER_BASE */ 1038bf215546Sopenharmony_ci radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ 1039bf215546Sopenharmony_ci radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ 1040bf215546Sopenharmony_ci } else { 1041bf215546Sopenharmony_ci /* ACQUIRE_MEM is only required on a compute ring. */ 1042bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, false)); 1043bf215546Sopenharmony_ci radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ 1044bf215546Sopenharmony_ci radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ 1045bf215546Sopenharmony_ci radeon_emit(cs, 0); /* CP_COHER_BASE */ 1046bf215546Sopenharmony_ci radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ 1047bf215546Sopenharmony_ci } 1048bf215546Sopenharmony_ci} 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_cistatic void 1051bf215546Sopenharmony_cigfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, 1052bf215546Sopenharmony_ci uint32_t *flush_cnt, uint64_t flush_va, bool is_mec, 1053bf215546Sopenharmony_ci enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits, 1054bf215546Sopenharmony_ci uint64_t gfx9_eop_bug_va) 1055bf215546Sopenharmony_ci{ 1056bf215546Sopenharmony_ci uint32_t gcr_cntl = 0; 1057bf215546Sopenharmony_ci unsigned cb_db_event = 0; 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci /* We don't need these. */ 1060bf215546Sopenharmony_ci assert(!(flush_bits & (RADV_CMD_FLAG_VGT_STREAMOUT_SYNC))); 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) { 1063bf215546Sopenharmony_ci gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL); 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE; 1066bf215546Sopenharmony_ci } 1067bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) { 1068bf215546Sopenharmony_ci /* TODO: When writing to the SMEM L1 cache, we need to set SEQ 1069bf215546Sopenharmony_ci * to FORWARD when both L1 and L2 are written out (WB or INV). 1070bf215546Sopenharmony_ci */ 1071bf215546Sopenharmony_ci gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1); 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0; 1074bf215546Sopenharmony_ci } 1075bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) { 1076bf215546Sopenharmony_ci gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1); 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0 | RGP_FLUSH_INVAL_L1; 1079bf215546Sopenharmony_ci } 1080bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_INV_L2) { 1081bf215546Sopenharmony_ci /* Writeback and invalidate everything in L2. */ 1082bf215546Sopenharmony_ci gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1) | S_586_GLM_INV(1) | S_586_GLM_WB(1); 1083bf215546Sopenharmony_ci 1084bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2; 1085bf215546Sopenharmony_ci } else if (flush_bits & RADV_CMD_FLAG_WB_L2) { 1086bf215546Sopenharmony_ci /* Writeback but do not invalidate. 1087bf215546Sopenharmony_ci * GLM doesn't support WB alone. If WB is set, INV must be set too. 1088bf215546Sopenharmony_ci */ 1089bf215546Sopenharmony_ci gcr_cntl |= S_586_GL2_WB(1) | S_586_GLM_WB(1) | S_586_GLM_INV(1); 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2; 1092bf215546Sopenharmony_ci } else if (flush_bits & RADV_CMD_FLAG_INV_L2_METADATA) { 1093bf215546Sopenharmony_ci gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1); 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci if (flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) { 1097bf215546Sopenharmony_ci /* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_CB_META */ 1098bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { 1099bf215546Sopenharmony_ci /* Flush CMASK/FMASK/DCC. Will wait for idle later. */ 1100bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1101bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB; 1104bf215546Sopenharmony_ci } 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci /* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_DB_META ? */ 1107bf215546Sopenharmony_ci if (gfx_level < GFX11 && (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB)) { 1108bf215546Sopenharmony_ci /* Flush HTILE. Will wait for idle later. */ 1109bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1110bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB; 1113bf215546Sopenharmony_ci } 1114bf215546Sopenharmony_ci 1115bf215546Sopenharmony_ci /* First flush CB/DB, then L1/L2. */ 1116bf215546Sopenharmony_ci gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD); 1117bf215546Sopenharmony_ci 1118bf215546Sopenharmony_ci if ((flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) == 1119bf215546Sopenharmony_ci (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) { 1120bf215546Sopenharmony_ci cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT; 1121bf215546Sopenharmony_ci } else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { 1122bf215546Sopenharmony_ci cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS; 1123bf215546Sopenharmony_ci } else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { 1124bf215546Sopenharmony_ci if (gfx_level == GFX11) { 1125bf215546Sopenharmony_ci cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT; 1126bf215546Sopenharmony_ci } else { 1127bf215546Sopenharmony_ci cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS; 1128bf215546Sopenharmony_ci } 1129bf215546Sopenharmony_ci } else { 1130bf215546Sopenharmony_ci assert(0); 1131bf215546Sopenharmony_ci } 1132bf215546Sopenharmony_ci } else { 1133bf215546Sopenharmony_ci /* Wait for graphics shaders to go idle if requested. */ 1134bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) { 1135bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1136bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH; 1139bf215546Sopenharmony_ci } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) { 1140bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1141bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH; 1144bf215546Sopenharmony_ci } 1145bf215546Sopenharmony_ci } 1146bf215546Sopenharmony_ci 1147bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) { 1148bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1149bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4))); 1150bf215546Sopenharmony_ci 1151bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH; 1152bf215546Sopenharmony_ci } 1153bf215546Sopenharmony_ci 1154bf215546Sopenharmony_ci if (cb_db_event) { 1155bf215546Sopenharmony_ci /* CB/DB flush and invalidate (or possibly just a wait for a 1156bf215546Sopenharmony_ci * meta flush) via RELEASE_MEM. 1157bf215546Sopenharmony_ci * 1158bf215546Sopenharmony_ci * Combine this with other cache flushes when possible; this 1159bf215546Sopenharmony_ci * requires affected shaders to be idle, so do it after the 1160bf215546Sopenharmony_ci * CS_PARTIAL_FLUSH before (VS/PS partial flushes are always 1161bf215546Sopenharmony_ci * implied). 1162bf215546Sopenharmony_ci */ 1163bf215546Sopenharmony_ci /* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */ 1164bf215546Sopenharmony_ci unsigned glm_wb = G_586_GLM_WB(gcr_cntl); 1165bf215546Sopenharmony_ci unsigned glm_inv = G_586_GLM_INV(gcr_cntl); 1166bf215546Sopenharmony_ci unsigned glv_inv = G_586_GLV_INV(gcr_cntl); 1167bf215546Sopenharmony_ci unsigned gl1_inv = G_586_GL1_INV(gcr_cntl); 1168bf215546Sopenharmony_ci assert(G_586_GL2_US(gcr_cntl) == 0); 1169bf215546Sopenharmony_ci assert(G_586_GL2_RANGE(gcr_cntl) == 0); 1170bf215546Sopenharmony_ci assert(G_586_GL2_DISCARD(gcr_cntl) == 0); 1171bf215546Sopenharmony_ci unsigned gl2_inv = G_586_GL2_INV(gcr_cntl); 1172bf215546Sopenharmony_ci unsigned gl2_wb = G_586_GL2_WB(gcr_cntl); 1173bf215546Sopenharmony_ci unsigned gcr_seq = G_586_SEQ(gcr_cntl); 1174bf215546Sopenharmony_ci 1175bf215546Sopenharmony_ci gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & 1176bf215546Sopenharmony_ci C_586_GL2_WB; /* keep SEQ */ 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ci assert(flush_cnt); 1179bf215546Sopenharmony_ci (*flush_cnt)++; 1180bf215546Sopenharmony_ci 1181bf215546Sopenharmony_ci si_cs_emit_write_event_eop( 1182bf215546Sopenharmony_ci cs, gfx_level, false, cb_db_event, 1183bf215546Sopenharmony_ci S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | 1184bf215546Sopenharmony_ci S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | 1185bf215546Sopenharmony_ci S_490_SEQ(gcr_seq), 1186bf215546Sopenharmony_ci EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va); 1187bf215546Sopenharmony_ci 1188bf215546Sopenharmony_ci radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff); 1189bf215546Sopenharmony_ci } 1190bf215546Sopenharmony_ci 1191bf215546Sopenharmony_ci /* VGT state sync */ 1192bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) { 1193bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1194bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); 1195bf215546Sopenharmony_ci } 1196bf215546Sopenharmony_ci 1197bf215546Sopenharmony_ci /* Ignore fields that only modify the behavior of other fields. */ 1198bf215546Sopenharmony_ci if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) { 1199bf215546Sopenharmony_ci /* Flush caches and wait for the caches to assert idle. 1200bf215546Sopenharmony_ci * The cache flush is executed in the ME, but the PFP waits 1201bf215546Sopenharmony_ci * for completion. 1202bf215546Sopenharmony_ci */ 1203bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); 1204bf215546Sopenharmony_ci radeon_emit(cs, 0); /* CP_COHER_CNTL */ 1205bf215546Sopenharmony_ci radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ 1206bf215546Sopenharmony_ci radeon_emit(cs, 0xffffff); /* CP_COHER_SIZE_HI */ 1207bf215546Sopenharmony_ci radeon_emit(cs, 0); /* CP_COHER_BASE */ 1208bf215546Sopenharmony_ci radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ 1209bf215546Sopenharmony_ci radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ 1210bf215546Sopenharmony_ci radeon_emit(cs, gcr_cntl); /* GCR_CNTL */ 1211bf215546Sopenharmony_ci } else if ((cb_db_event || 1212bf215546Sopenharmony_ci (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 1213bf215546Sopenharmony_ci RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) && 1214bf215546Sopenharmony_ci !is_mec) { 1215bf215546Sopenharmony_ci /* We need to ensure that PFP waits as well. */ 1216bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 1217bf215546Sopenharmony_ci radeon_emit(cs, 0); 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME; 1220bf215546Sopenharmony_ci } 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) { 1223bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1224bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0)); 1225bf215546Sopenharmony_ci } else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) { 1226bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1227bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0)); 1228bf215546Sopenharmony_ci } 1229bf215546Sopenharmony_ci} 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_civoid 1232bf215546Sopenharmony_cisi_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, uint32_t *flush_cnt, 1233bf215546Sopenharmony_ci uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits, 1234bf215546Sopenharmony_ci enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va) 1235bf215546Sopenharmony_ci{ 1236bf215546Sopenharmony_ci unsigned cp_coher_cntl = 0; 1237bf215546Sopenharmony_ci uint32_t flush_cb_db = 1238bf215546Sopenharmony_ci flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB); 1239bf215546Sopenharmony_ci 1240bf215546Sopenharmony_ci if (gfx_level >= GFX10) { 1241bf215546Sopenharmony_ci /* GFX10 cache flush handling is quite different. */ 1242bf215546Sopenharmony_ci gfx10_cs_emit_cache_flush(cs, gfx_level, flush_cnt, flush_va, is_mec, flush_bits, 1243bf215546Sopenharmony_ci sqtt_flush_bits, gfx9_eop_bug_va); 1244bf215546Sopenharmony_ci return; 1245bf215546Sopenharmony_ci } 1246bf215546Sopenharmony_ci 1247bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) { 1248bf215546Sopenharmony_ci cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); 1249bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE; 1250bf215546Sopenharmony_ci } 1251bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) { 1252bf215546Sopenharmony_ci cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); 1253bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0; 1254bf215546Sopenharmony_ci } 1255bf215546Sopenharmony_ci 1256bf215546Sopenharmony_ci if (gfx_level <= GFX8) { 1257bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { 1258bf215546Sopenharmony_ci cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | 1259bf215546Sopenharmony_ci S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) | 1260bf215546Sopenharmony_ci S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) | 1261bf215546Sopenharmony_ci S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | 1262bf215546Sopenharmony_ci S_0085F0_CB7_DEST_BASE_ENA(1); 1263bf215546Sopenharmony_ci 1264bf215546Sopenharmony_ci /* Necessary for DCC */ 1265bf215546Sopenharmony_ci if (gfx_level >= GFX8) { 1266bf215546Sopenharmony_ci si_cs_emit_write_event_eop(cs, gfx_level, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, 1267bf215546Sopenharmony_ci EOP_DST_SEL_MEM, EOP_DATA_SEL_DISCARD, 0, 0, 1268bf215546Sopenharmony_ci gfx9_eop_bug_va); 1269bf215546Sopenharmony_ci } 1270bf215546Sopenharmony_ci 1271bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB; 1272bf215546Sopenharmony_ci } 1273bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { 1274bf215546Sopenharmony_ci cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1); 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB; 1277bf215546Sopenharmony_ci } 1278bf215546Sopenharmony_ci } 1279bf215546Sopenharmony_ci 1280bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) { 1281bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1282bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); 1283bf215546Sopenharmony_ci 1284bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB; 1285bf215546Sopenharmony_ci } 1286bf215546Sopenharmony_ci 1287bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) { 1288bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1289bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); 1290bf215546Sopenharmony_ci 1291bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB; 1292bf215546Sopenharmony_ci } 1293bf215546Sopenharmony_ci 1294bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) { 1295bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1296bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 1297bf215546Sopenharmony_ci 1298bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH; 1299bf215546Sopenharmony_ci } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) { 1300bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1301bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 1302bf215546Sopenharmony_ci 1303bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH; 1304bf215546Sopenharmony_ci } 1305bf215546Sopenharmony_ci 1306bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) { 1307bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1308bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 1309bf215546Sopenharmony_ci 1310bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH; 1311bf215546Sopenharmony_ci } 1312bf215546Sopenharmony_ci 1313bf215546Sopenharmony_ci if (gfx_level == GFX9 && flush_cb_db) { 1314bf215546Sopenharmony_ci unsigned cb_db_event, tc_flags; 1315bf215546Sopenharmony_ci 1316bf215546Sopenharmony_ci /* Set the CB/DB flush event. */ 1317bf215546Sopenharmony_ci cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT; 1318bf215546Sopenharmony_ci 1319bf215546Sopenharmony_ci /* These are the only allowed combinations. If you need to 1320bf215546Sopenharmony_ci * do multiple operations at once, do them separately. 1321bf215546Sopenharmony_ci * All operations that invalidate L2 also seem to invalidate 1322bf215546Sopenharmony_ci * metadata. Volatile (VOL) and WC flushes are not listed here. 1323bf215546Sopenharmony_ci * 1324bf215546Sopenharmony_ci * TC | TC_WB = writeback & invalidate L2 & L1 1325bf215546Sopenharmony_ci * TC | TC_WB | TC_NC = writeback & invalidate L2 for MTYPE == NC 1326bf215546Sopenharmony_ci * TC_WB | TC_NC = writeback L2 for MTYPE == NC 1327bf215546Sopenharmony_ci * TC | TC_NC = invalidate L2 for MTYPE == NC 1328bf215546Sopenharmony_ci * TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.) 1329bf215546Sopenharmony_ci * TCL1 = invalidate L1 1330bf215546Sopenharmony_ci */ 1331bf215546Sopenharmony_ci tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_MD_ACTION_ENA; 1332bf215546Sopenharmony_ci 1333bf215546Sopenharmony_ci *sqtt_flush_bits |= 1334bf215546Sopenharmony_ci RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB | RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB; 1335bf215546Sopenharmony_ci 1336bf215546Sopenharmony_ci /* Ideally flush TC together with CB/DB. */ 1337bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_INV_L2) { 1338bf215546Sopenharmony_ci /* Writeback and invalidate everything in L2 & L1. */ 1339bf215546Sopenharmony_ci tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_WB_ACTION_ENA; 1340bf215546Sopenharmony_ci 1341bf215546Sopenharmony_ci /* Clear the flags. */ 1342bf215546Sopenharmony_ci flush_bits &= ~(RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_INV_VCACHE); 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2; 1345bf215546Sopenharmony_ci } 1346bf215546Sopenharmony_ci 1347bf215546Sopenharmony_ci assert(flush_cnt); 1348bf215546Sopenharmony_ci (*flush_cnt)++; 1349bf215546Sopenharmony_ci 1350bf215546Sopenharmony_ci si_cs_emit_write_event_eop(cs, gfx_level, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM, 1351bf215546Sopenharmony_ci EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va); 1352bf215546Sopenharmony_ci radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff); 1353bf215546Sopenharmony_ci } 1354bf215546Sopenharmony_ci 1355bf215546Sopenharmony_ci /* VGT state sync */ 1356bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) { 1357bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1358bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); 1359bf215546Sopenharmony_ci } 1360bf215546Sopenharmony_ci 1361bf215546Sopenharmony_ci /* VGT streamout state sync */ 1362bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_VGT_STREAMOUT_SYNC) { 1363bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1364bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0)); 1365bf215546Sopenharmony_ci } 1366bf215546Sopenharmony_ci 1367bf215546Sopenharmony_ci /* Make sure ME is idle (it executes most packets) before continuing. 1368bf215546Sopenharmony_ci * This prevents read-after-write hazards between PFP and ME. 1369bf215546Sopenharmony_ci */ 1370bf215546Sopenharmony_ci if ((cp_coher_cntl || (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | 1371bf215546Sopenharmony_ci RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2))) && 1372bf215546Sopenharmony_ci !is_mec) { 1373bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 1374bf215546Sopenharmony_ci radeon_emit(cs, 0); 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME; 1377bf215546Sopenharmony_ci } 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_ci if ((flush_bits & RADV_CMD_FLAG_INV_L2) || 1380bf215546Sopenharmony_ci (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) { 1381bf215546Sopenharmony_ci si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, 1382bf215546Sopenharmony_ci cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | 1383bf215546Sopenharmony_ci S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8)); 1384bf215546Sopenharmony_ci cp_coher_cntl = 0; 1385bf215546Sopenharmony_ci 1386bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2 | RGP_FLUSH_INVAL_VMEM_L0; 1387bf215546Sopenharmony_ci } else { 1388bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_WB_L2) { 1389bf215546Sopenharmony_ci /* WB = write-back 1390bf215546Sopenharmony_ci * NC = apply to non-coherent MTYPEs 1391bf215546Sopenharmony_ci * (i.e. MTYPE <= 1, which is what we use everywhere) 1392bf215546Sopenharmony_ci * 1393bf215546Sopenharmony_ci * WB doesn't work without NC. 1394bf215546Sopenharmony_ci */ 1395bf215546Sopenharmony_ci si_emit_acquire_mem( 1396bf215546Sopenharmony_ci cs, is_mec, gfx_level == GFX9, 1397bf215546Sopenharmony_ci cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); 1398bf215546Sopenharmony_ci cp_coher_cntl = 0; 1399bf215546Sopenharmony_ci 1400bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0; 1401bf215546Sopenharmony_ci } 1402bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) { 1403bf215546Sopenharmony_ci si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, 1404bf215546Sopenharmony_ci cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); 1405bf215546Sopenharmony_ci cp_coher_cntl = 0; 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_ci *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0; 1408bf215546Sopenharmony_ci } 1409bf215546Sopenharmony_ci } 1410bf215546Sopenharmony_ci 1411bf215546Sopenharmony_ci /* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle. 1412bf215546Sopenharmony_ci * Therefore, it should be last. Done in PFP. 1413bf215546Sopenharmony_ci */ 1414bf215546Sopenharmony_ci if (cp_coher_cntl) 1415bf215546Sopenharmony_ci si_emit_acquire_mem(cs, is_mec, gfx_level == GFX9, cp_coher_cntl); 1416bf215546Sopenharmony_ci 1417bf215546Sopenharmony_ci if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) { 1418bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1419bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0)); 1420bf215546Sopenharmony_ci } else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) { 1421bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1422bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0)); 1423bf215546Sopenharmony_ci } 1424bf215546Sopenharmony_ci} 1425bf215546Sopenharmony_ci 1426bf215546Sopenharmony_civoid 1427bf215546Sopenharmony_cisi_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) 1428bf215546Sopenharmony_ci{ 1429bf215546Sopenharmony_ci bool is_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE; 1430bf215546Sopenharmony_ci 1431bf215546Sopenharmony_ci if (is_compute) 1432bf215546Sopenharmony_ci cmd_buffer->state.flush_bits &= 1433bf215546Sopenharmony_ci ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | 1434bf215546Sopenharmony_ci RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | 1435bf215546Sopenharmony_ci RADV_CMD_FLAG_INV_L2_METADATA | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 1436bf215546Sopenharmony_ci RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_VGT_FLUSH | 1437bf215546Sopenharmony_ci RADV_CMD_FLAG_START_PIPELINE_STATS | RADV_CMD_FLAG_STOP_PIPELINE_STATS); 1438bf215546Sopenharmony_ci 1439bf215546Sopenharmony_ci if (!cmd_buffer->state.flush_bits) { 1440bf215546Sopenharmony_ci radv_describe_barrier_end_delayed(cmd_buffer); 1441bf215546Sopenharmony_ci return; 1442bf215546Sopenharmony_ci } 1443bf215546Sopenharmony_ci 1444bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128); 1445bf215546Sopenharmony_ci 1446bf215546Sopenharmony_ci si_cs_emit_cache_flush(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level, 1447bf215546Sopenharmony_ci &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, 1448bf215546Sopenharmony_ci radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits, 1449bf215546Sopenharmony_ci &cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va); 1450bf215546Sopenharmony_ci 1451bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 1452bf215546Sopenharmony_ci radv_cmd_buffer_trace_emit(cmd_buffer); 1453bf215546Sopenharmony_ci 1454bf215546Sopenharmony_ci if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_L2) 1455bf215546Sopenharmony_ci cmd_buffer->state.rb_noncoherent_dirty = false; 1456bf215546Sopenharmony_ci 1457bf215546Sopenharmony_ci /* Clear the caches that have been flushed to avoid syncing too much 1458bf215546Sopenharmony_ci * when there is some pending active queries. 1459bf215546Sopenharmony_ci */ 1460bf215546Sopenharmony_ci cmd_buffer->active_query_flush_bits &= ~cmd_buffer->state.flush_bits; 1461bf215546Sopenharmony_ci 1462bf215546Sopenharmony_ci cmd_buffer->state.flush_bits = 0; 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci /* If the driver used a compute shader for resetting a query pool, it 1465bf215546Sopenharmony_ci * should be finished at this point. 1466bf215546Sopenharmony_ci */ 1467bf215546Sopenharmony_ci cmd_buffer->pending_reset_query = false; 1468bf215546Sopenharmony_ci 1469bf215546Sopenharmony_ci radv_describe_barrier_end_delayed(cmd_buffer); 1470bf215546Sopenharmony_ci} 1471bf215546Sopenharmony_ci 1472bf215546Sopenharmony_ci/* sets the CP predication state using a boolean stored at va */ 1473bf215546Sopenharmony_civoid 1474bf215546Sopenharmony_cisi_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, 1475bf215546Sopenharmony_ci unsigned pred_op, uint64_t va) 1476bf215546Sopenharmony_ci{ 1477bf215546Sopenharmony_ci uint32_t op = 0; 1478bf215546Sopenharmony_ci 1479bf215546Sopenharmony_ci if (va) { 1480bf215546Sopenharmony_ci assert(pred_op == PREDICATION_OP_BOOL32 || pred_op == PREDICATION_OP_BOOL64); 1481bf215546Sopenharmony_ci 1482bf215546Sopenharmony_ci op = PRED_OP(pred_op); 1483bf215546Sopenharmony_ci 1484bf215546Sopenharmony_ci /* PREDICATION_DRAW_VISIBLE means that if the 32-bit value is 1485bf215546Sopenharmony_ci * zero, all rendering commands are discarded. Otherwise, they 1486bf215546Sopenharmony_ci * are discarded if the value is non zero. 1487bf215546Sopenharmony_ci */ 1488bf215546Sopenharmony_ci op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE; 1489bf215546Sopenharmony_ci } 1490bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { 1491bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0)); 1492bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, op); 1493bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va); 1494bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va >> 32); 1495bf215546Sopenharmony_ci } else { 1496bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); 1497bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va); 1498bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, op | ((va >> 32) & 0xFF)); 1499bf215546Sopenharmony_ci } 1500bf215546Sopenharmony_ci} 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci/* Set this if you want the 3D engine to wait until CP DMA is done. 1503bf215546Sopenharmony_ci * It should be set on the last CP DMA packet. */ 1504bf215546Sopenharmony_ci#define CP_DMA_SYNC (1 << 0) 1505bf215546Sopenharmony_ci 1506bf215546Sopenharmony_ci/* Set this if the source data was used as a destination in a previous CP DMA 1507bf215546Sopenharmony_ci * packet. It's for preventing a read-after-write (RAW) hazard between two 1508bf215546Sopenharmony_ci * CP DMA packets. */ 1509bf215546Sopenharmony_ci#define CP_DMA_RAW_WAIT (1 << 1) 1510bf215546Sopenharmony_ci#define CP_DMA_USE_L2 (1 << 2) 1511bf215546Sopenharmony_ci#define CP_DMA_CLEAR (1 << 3) 1512bf215546Sopenharmony_ci 1513bf215546Sopenharmony_ci/* Alignment for optimal performance. */ 1514bf215546Sopenharmony_ci#define SI_CPDMA_ALIGNMENT 32 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ci/* The max number of bytes that can be copied per packet. */ 1517bf215546Sopenharmony_cistatic inline unsigned 1518bf215546Sopenharmony_cicp_dma_max_byte_count(enum amd_gfx_level gfx_level) 1519bf215546Sopenharmony_ci{ 1520bf215546Sopenharmony_ci unsigned max = gfx_level >= GFX11 ? 32767 : 1521bf215546Sopenharmony_ci gfx_level >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX6(~0u); 1522bf215546Sopenharmony_ci 1523bf215546Sopenharmony_ci /* make it aligned for optimal performance */ 1524bf215546Sopenharmony_ci return max & ~(SI_CPDMA_ALIGNMENT - 1); 1525bf215546Sopenharmony_ci} 1526bf215546Sopenharmony_ci 1527bf215546Sopenharmony_ci/* Emit a CP DMA packet to do a copy from one buffer to another, or to clear 1528bf215546Sopenharmony_ci * a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit 1529bf215546Sopenharmony_ci * clear value. 1530bf215546Sopenharmony_ci */ 1531bf215546Sopenharmony_cistatic void 1532bf215546Sopenharmony_cisi_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, 1533bf215546Sopenharmony_ci uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags) 1534bf215546Sopenharmony_ci{ 1535bf215546Sopenharmony_ci uint32_t header = 0, command = 0; 1536bf215546Sopenharmony_ci 1537bf215546Sopenharmony_ci assert(size <= cp_dma_max_byte_count(device->physical_device->rad_info.gfx_level)); 1538bf215546Sopenharmony_ci 1539bf215546Sopenharmony_ci radeon_check_space(device->ws, cs, 9); 1540bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX9) 1541bf215546Sopenharmony_ci command |= S_415_BYTE_COUNT_GFX9(size); 1542bf215546Sopenharmony_ci else 1543bf215546Sopenharmony_ci command |= S_415_BYTE_COUNT_GFX6(size); 1544bf215546Sopenharmony_ci 1545bf215546Sopenharmony_ci /* Sync flags. */ 1546bf215546Sopenharmony_ci if (flags & CP_DMA_SYNC) 1547bf215546Sopenharmony_ci header |= S_411_CP_SYNC(1); 1548bf215546Sopenharmony_ci else { 1549bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX9) 1550bf215546Sopenharmony_ci command |= S_415_DISABLE_WR_CONFIRM_GFX9(1); 1551bf215546Sopenharmony_ci else 1552bf215546Sopenharmony_ci command |= S_415_DISABLE_WR_CONFIRM_GFX6(1); 1553bf215546Sopenharmony_ci } 1554bf215546Sopenharmony_ci 1555bf215546Sopenharmony_ci if (flags & CP_DMA_RAW_WAIT) 1556bf215546Sopenharmony_ci command |= S_415_RAW_WAIT(1); 1557bf215546Sopenharmony_ci 1558bf215546Sopenharmony_ci /* Src and dst flags. */ 1559bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && 1560bf215546Sopenharmony_ci src_va == dst_va) 1561bf215546Sopenharmony_ci header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */ 1562bf215546Sopenharmony_ci else if (flags & CP_DMA_USE_L2) 1563bf215546Sopenharmony_ci header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); 1564bf215546Sopenharmony_ci 1565bf215546Sopenharmony_ci if (flags & CP_DMA_CLEAR) 1566bf215546Sopenharmony_ci header |= S_411_SRC_SEL(V_411_DATA); 1567bf215546Sopenharmony_ci else if (flags & CP_DMA_USE_L2) 1568bf215546Sopenharmony_ci header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); 1569bf215546Sopenharmony_ci 1570bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX7) { 1571bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, predicating)); 1572bf215546Sopenharmony_ci radeon_emit(cs, header); 1573bf215546Sopenharmony_ci radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ 1574bf215546Sopenharmony_ci radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */ 1575bf215546Sopenharmony_ci radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ 1576bf215546Sopenharmony_ci radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */ 1577bf215546Sopenharmony_ci radeon_emit(cs, command); 1578bf215546Sopenharmony_ci } else { 1579bf215546Sopenharmony_ci assert(!(flags & CP_DMA_USE_L2)); 1580bf215546Sopenharmony_ci header |= S_411_SRC_ADDR_HI(src_va >> 32); 1581bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, predicating)); 1582bf215546Sopenharmony_ci radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ 1583bf215546Sopenharmony_ci radeon_emit(cs, header); /* SRC_ADDR_HI [15:0] + flags. */ 1584bf215546Sopenharmony_ci radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ 1585bf215546Sopenharmony_ci radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */ 1586bf215546Sopenharmony_ci radeon_emit(cs, command); 1587bf215546Sopenharmony_ci } 1588bf215546Sopenharmony_ci} 1589bf215546Sopenharmony_ci 1590bf215546Sopenharmony_cistatic void 1591bf215546Sopenharmony_cisi_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size, 1592bf215546Sopenharmony_ci unsigned flags) 1593bf215546Sopenharmony_ci{ 1594bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 1595bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 1596bf215546Sopenharmony_ci bool predicating = cmd_buffer->state.predicating; 1597bf215546Sopenharmony_ci 1598bf215546Sopenharmony_ci si_cs_emit_cp_dma(device, cs, predicating, dst_va, src_va, size, flags); 1599bf215546Sopenharmony_ci 1600bf215546Sopenharmony_ci /* CP DMA is executed in ME, but index buffers are read by PFP. 1601bf215546Sopenharmony_ci * This ensures that ME (CP DMA) is idle before PFP starts fetching 1602bf215546Sopenharmony_ci * indices. If we wanted to execute CP DMA in PFP, this packet 1603bf215546Sopenharmony_ci * should precede it. 1604bf215546Sopenharmony_ci */ 1605bf215546Sopenharmony_ci if (flags & CP_DMA_SYNC) { 1606bf215546Sopenharmony_ci if (cmd_buffer->qf == RADV_QUEUE_GENERAL) { 1607bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); 1608bf215546Sopenharmony_ci radeon_emit(cs, 0); 1609bf215546Sopenharmony_ci } 1610bf215546Sopenharmony_ci 1611bf215546Sopenharmony_ci /* CP will see the sync flag and wait for all DMAs to complete. */ 1612bf215546Sopenharmony_ci cmd_buffer->state.dma_is_busy = false; 1613bf215546Sopenharmony_ci } 1614bf215546Sopenharmony_ci 1615bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 1616bf215546Sopenharmony_ci radv_cmd_buffer_trace_emit(cmd_buffer); 1617bf215546Sopenharmony_ci} 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_civoid 1620bf215546Sopenharmony_cisi_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, 1621bf215546Sopenharmony_ci unsigned size, bool predicating) 1622bf215546Sopenharmony_ci{ 1623bf215546Sopenharmony_ci struct radeon_winsys *ws = device->ws; 1624bf215546Sopenharmony_ci enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; 1625bf215546Sopenharmony_ci uint32_t header = 0, command = 0; 1626bf215546Sopenharmony_ci 1627bf215546Sopenharmony_ci if (gfx_level >= GFX11) 1628bf215546Sopenharmony_ci size = MIN2(size, 32768 - SI_CPDMA_ALIGNMENT); 1629bf215546Sopenharmony_ci 1630bf215546Sopenharmony_ci assert(size <= cp_dma_max_byte_count(gfx_level)); 1631bf215546Sopenharmony_ci 1632bf215546Sopenharmony_ci radeon_check_space(ws, cs, 9); 1633bf215546Sopenharmony_ci 1634bf215546Sopenharmony_ci uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1); 1635bf215546Sopenharmony_ci uint64_t aligned_size = 1636bf215546Sopenharmony_ci ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va; 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_ci if (gfx_level >= GFX9) { 1639bf215546Sopenharmony_ci command |= S_415_BYTE_COUNT_GFX9(aligned_size) | 1640bf215546Sopenharmony_ci S_415_DISABLE_WR_CONFIRM_GFX9(1); 1641bf215546Sopenharmony_ci header |= S_411_DST_SEL(V_411_NOWHERE); 1642bf215546Sopenharmony_ci } else { 1643bf215546Sopenharmony_ci command |= S_415_BYTE_COUNT_GFX6(aligned_size) | 1644bf215546Sopenharmony_ci S_415_DISABLE_WR_CONFIRM_GFX6(1); 1645bf215546Sopenharmony_ci header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); 1646bf215546Sopenharmony_ci } 1647bf215546Sopenharmony_ci 1648bf215546Sopenharmony_ci header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); 1649bf215546Sopenharmony_ci 1650bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, predicating)); 1651bf215546Sopenharmony_ci radeon_emit(cs, header); 1652bf215546Sopenharmony_ci radeon_emit(cs, aligned_va); /* SRC_ADDR_LO [31:0] */ 1653bf215546Sopenharmony_ci radeon_emit(cs, aligned_va >> 32); /* SRC_ADDR_HI [31:0] */ 1654bf215546Sopenharmony_ci radeon_emit(cs, aligned_va); /* DST_ADDR_LO [31:0] */ 1655bf215546Sopenharmony_ci radeon_emit(cs, aligned_va >> 32); /* DST_ADDR_HI [31:0] */ 1656bf215546Sopenharmony_ci radeon_emit(cs, command); 1657bf215546Sopenharmony_ci} 1658bf215546Sopenharmony_ci 1659bf215546Sopenharmony_civoid 1660bf215546Sopenharmony_cisi_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size) 1661bf215546Sopenharmony_ci{ 1662bf215546Sopenharmony_ci si_cs_cp_dma_prefetch(cmd_buffer->device, cmd_buffer->cs, va, size, 1663bf215546Sopenharmony_ci cmd_buffer->state.predicating); 1664bf215546Sopenharmony_ci 1665bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 1666bf215546Sopenharmony_ci radv_cmd_buffer_trace_emit(cmd_buffer); 1667bf215546Sopenharmony_ci} 1668bf215546Sopenharmony_ci 1669bf215546Sopenharmony_cistatic void 1670bf215546Sopenharmony_cisi_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, uint64_t remaining_size, 1671bf215546Sopenharmony_ci unsigned *flags) 1672bf215546Sopenharmony_ci{ 1673bf215546Sopenharmony_ci 1674bf215546Sopenharmony_ci /* Flush the caches for the first copy only. 1675bf215546Sopenharmony_ci * Also wait for the previous CP DMA operations. 1676bf215546Sopenharmony_ci */ 1677bf215546Sopenharmony_ci if (cmd_buffer->state.flush_bits) { 1678bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 1679bf215546Sopenharmony_ci *flags |= CP_DMA_RAW_WAIT; 1680bf215546Sopenharmony_ci } 1681bf215546Sopenharmony_ci 1682bf215546Sopenharmony_ci /* Do the synchronization after the last dma, so that all data 1683bf215546Sopenharmony_ci * is written to memory. 1684bf215546Sopenharmony_ci */ 1685bf215546Sopenharmony_ci if (byte_count == remaining_size) 1686bf215546Sopenharmony_ci *flags |= CP_DMA_SYNC; 1687bf215546Sopenharmony_ci} 1688bf215546Sopenharmony_ci 1689bf215546Sopenharmony_cistatic void 1690bf215546Sopenharmony_cisi_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size) 1691bf215546Sopenharmony_ci{ 1692bf215546Sopenharmony_ci uint64_t va; 1693bf215546Sopenharmony_ci uint32_t offset; 1694bf215546Sopenharmony_ci unsigned dma_flags = 0; 1695bf215546Sopenharmony_ci unsigned buf_size = SI_CPDMA_ALIGNMENT * 2; 1696bf215546Sopenharmony_ci void *ptr; 1697bf215546Sopenharmony_ci 1698bf215546Sopenharmony_ci assert(size < SI_CPDMA_ALIGNMENT); 1699bf215546Sopenharmony_ci 1700bf215546Sopenharmony_ci radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, &offset, &ptr); 1701bf215546Sopenharmony_ci 1702bf215546Sopenharmony_ci va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 1703bf215546Sopenharmony_ci va += offset; 1704bf215546Sopenharmony_ci 1705bf215546Sopenharmony_ci si_cp_dma_prepare(cmd_buffer, size, size, &dma_flags); 1706bf215546Sopenharmony_ci 1707bf215546Sopenharmony_ci si_emit_cp_dma(cmd_buffer, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags); 1708bf215546Sopenharmony_ci} 1709bf215546Sopenharmony_ci 1710bf215546Sopenharmony_civoid 1711bf215546Sopenharmony_cisi_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, 1712bf215546Sopenharmony_ci uint64_t size) 1713bf215546Sopenharmony_ci{ 1714bf215546Sopenharmony_ci enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; 1715bf215546Sopenharmony_ci uint64_t main_src_va, main_dest_va; 1716bf215546Sopenharmony_ci uint64_t skipped_size = 0, realign_size = 0; 1717bf215546Sopenharmony_ci 1718bf215546Sopenharmony_ci /* Assume that we are not going to sync after the last DMA operation. */ 1719bf215546Sopenharmony_ci cmd_buffer->state.dma_is_busy = true; 1720bf215546Sopenharmony_ci 1721bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO || 1722bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) { 1723bf215546Sopenharmony_ci /* If the size is not aligned, we must add a dummy copy at the end 1724bf215546Sopenharmony_ci * just to align the internal counter. Otherwise, the DMA engine 1725bf215546Sopenharmony_ci * would slow down by an order of magnitude for following copies. 1726bf215546Sopenharmony_ci */ 1727bf215546Sopenharmony_ci if (size % SI_CPDMA_ALIGNMENT) 1728bf215546Sopenharmony_ci realign_size = SI_CPDMA_ALIGNMENT - (size % SI_CPDMA_ALIGNMENT); 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci /* If the copy begins unaligned, we must start copying from the next 1731bf215546Sopenharmony_ci * aligned block and the skipped part should be copied after everything 1732bf215546Sopenharmony_ci * else has been copied. Only the src alignment matters, not dst. 1733bf215546Sopenharmony_ci */ 1734bf215546Sopenharmony_ci if (src_va % SI_CPDMA_ALIGNMENT) { 1735bf215546Sopenharmony_ci skipped_size = SI_CPDMA_ALIGNMENT - (src_va % SI_CPDMA_ALIGNMENT); 1736bf215546Sopenharmony_ci /* The main part will be skipped if the size is too small. */ 1737bf215546Sopenharmony_ci skipped_size = MIN2(skipped_size, size); 1738bf215546Sopenharmony_ci size -= skipped_size; 1739bf215546Sopenharmony_ci } 1740bf215546Sopenharmony_ci } 1741bf215546Sopenharmony_ci main_src_va = src_va + skipped_size; 1742bf215546Sopenharmony_ci main_dest_va = dest_va + skipped_size; 1743bf215546Sopenharmony_ci 1744bf215546Sopenharmony_ci while (size) { 1745bf215546Sopenharmony_ci unsigned dma_flags = 0; 1746bf215546Sopenharmony_ci unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level)); 1747bf215546Sopenharmony_ci 1748bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { 1749bf215546Sopenharmony_ci /* DMA operations via L2 are coherent and faster. 1750bf215546Sopenharmony_ci * TODO: GFX7-GFX8 should also support this but it 1751bf215546Sopenharmony_ci * requires tests/benchmarks. 1752bf215546Sopenharmony_ci * 1753bf215546Sopenharmony_ci * Also enable on GFX9 so we can use L2 at rest on GFX9+. On Raven 1754bf215546Sopenharmony_ci * this didn't seem to be worse. 1755bf215546Sopenharmony_ci * 1756bf215546Sopenharmony_ci * Note that we only use CP DMA for sizes < RADV_BUFFER_OPS_CS_THRESHOLD, 1757bf215546Sopenharmony_ci * which is 4k at the moment, so this is really unlikely to cause 1758bf215546Sopenharmony_ci * significant thrashing. 1759bf215546Sopenharmony_ci */ 1760bf215546Sopenharmony_ci dma_flags |= CP_DMA_USE_L2; 1761bf215546Sopenharmony_ci } 1762bf215546Sopenharmony_ci 1763bf215546Sopenharmony_ci si_cp_dma_prepare(cmd_buffer, byte_count, size + skipped_size + realign_size, &dma_flags); 1764bf215546Sopenharmony_ci 1765bf215546Sopenharmony_ci dma_flags &= ~CP_DMA_SYNC; 1766bf215546Sopenharmony_ci 1767bf215546Sopenharmony_ci si_emit_cp_dma(cmd_buffer, main_dest_va, main_src_va, byte_count, dma_flags); 1768bf215546Sopenharmony_ci 1769bf215546Sopenharmony_ci size -= byte_count; 1770bf215546Sopenharmony_ci main_src_va += byte_count; 1771bf215546Sopenharmony_ci main_dest_va += byte_count; 1772bf215546Sopenharmony_ci } 1773bf215546Sopenharmony_ci 1774bf215546Sopenharmony_ci if (skipped_size) { 1775bf215546Sopenharmony_ci unsigned dma_flags = 0; 1776bf215546Sopenharmony_ci 1777bf215546Sopenharmony_ci si_cp_dma_prepare(cmd_buffer, skipped_size, size + skipped_size + realign_size, &dma_flags); 1778bf215546Sopenharmony_ci 1779bf215546Sopenharmony_ci si_emit_cp_dma(cmd_buffer, dest_va, src_va, skipped_size, dma_flags); 1780bf215546Sopenharmony_ci } 1781bf215546Sopenharmony_ci if (realign_size) 1782bf215546Sopenharmony_ci si_cp_dma_realign_engine(cmd_buffer, realign_size); 1783bf215546Sopenharmony_ci} 1784bf215546Sopenharmony_ci 1785bf215546Sopenharmony_civoid 1786bf215546Sopenharmony_cisi_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, 1787bf215546Sopenharmony_ci unsigned value) 1788bf215546Sopenharmony_ci{ 1789bf215546Sopenharmony_ci if (!size) 1790bf215546Sopenharmony_ci return; 1791bf215546Sopenharmony_ci 1792bf215546Sopenharmony_ci assert(va % 4 == 0 && size % 4 == 0); 1793bf215546Sopenharmony_ci 1794bf215546Sopenharmony_ci enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; 1795bf215546Sopenharmony_ci 1796bf215546Sopenharmony_ci /* Assume that we are not going to sync after the last DMA operation. */ 1797bf215546Sopenharmony_ci cmd_buffer->state.dma_is_busy = true; 1798bf215546Sopenharmony_ci 1799bf215546Sopenharmony_ci while (size) { 1800bf215546Sopenharmony_ci unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level)); 1801bf215546Sopenharmony_ci unsigned dma_flags = CP_DMA_CLEAR; 1802bf215546Sopenharmony_ci 1803bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { 1804bf215546Sopenharmony_ci /* DMA operations via L2 are coherent and faster. 1805bf215546Sopenharmony_ci * TODO: GFX7-GFX8 should also support this but it 1806bf215546Sopenharmony_ci * requires tests/benchmarks. 1807bf215546Sopenharmony_ci * 1808bf215546Sopenharmony_ci * Also enable on GFX9 so we can use L2 at rest on GFX9+. 1809bf215546Sopenharmony_ci */ 1810bf215546Sopenharmony_ci dma_flags |= CP_DMA_USE_L2; 1811bf215546Sopenharmony_ci } 1812bf215546Sopenharmony_ci 1813bf215546Sopenharmony_ci si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags); 1814bf215546Sopenharmony_ci 1815bf215546Sopenharmony_ci /* Emit the clear packet. */ 1816bf215546Sopenharmony_ci si_emit_cp_dma(cmd_buffer, va, value, byte_count, dma_flags); 1817bf215546Sopenharmony_ci 1818bf215546Sopenharmony_ci size -= byte_count; 1819bf215546Sopenharmony_ci va += byte_count; 1820bf215546Sopenharmony_ci } 1821bf215546Sopenharmony_ci} 1822bf215546Sopenharmony_ci 1823bf215546Sopenharmony_civoid 1824bf215546Sopenharmony_cisi_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) 1825bf215546Sopenharmony_ci{ 1826bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX7) 1827bf215546Sopenharmony_ci return; 1828bf215546Sopenharmony_ci 1829bf215546Sopenharmony_ci if (!cmd_buffer->state.dma_is_busy) 1830bf215546Sopenharmony_ci return; 1831bf215546Sopenharmony_ci 1832bf215546Sopenharmony_ci /* Issue a dummy DMA that copies zero bytes. 1833bf215546Sopenharmony_ci * 1834bf215546Sopenharmony_ci * The DMA engine will see that there's no work to do and skip this 1835bf215546Sopenharmony_ci * DMA request, however, the CP will see the sync flag and still wait 1836bf215546Sopenharmony_ci * for all DMAs to complete. 1837bf215546Sopenharmony_ci */ 1838bf215546Sopenharmony_ci si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC); 1839bf215546Sopenharmony_ci 1840bf215546Sopenharmony_ci cmd_buffer->state.dma_is_busy = false; 1841bf215546Sopenharmony_ci} 1842bf215546Sopenharmony_ci 1843bf215546Sopenharmony_ci/* For MSAA sample positions. */ 1844bf215546Sopenharmony_ci#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ 1845bf215546Sopenharmony_ci ((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) | \ 1846bf215546Sopenharmony_ci (((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) | \ 1847bf215546Sopenharmony_ci (((unsigned)(s2y)&0xf) << 20) | (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28)) 1848bf215546Sopenharmony_ci 1849bf215546Sopenharmony_ci/* For obtaining location coordinates from registers */ 1850bf215546Sopenharmony_ci#define SEXT4(x) ((int)((x) | ((x)&0x8 ? 0xfffffff0 : 0))) 1851bf215546Sopenharmony_ci#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index)*4)) & 0xf) 1852bf215546Sopenharmony_ci#define GET_SX(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2) 1853bf215546Sopenharmony_ci#define GET_SY(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2 + 1) 1854bf215546Sopenharmony_ci 1855bf215546Sopenharmony_ci/* 1x MSAA */ 1856bf215546Sopenharmony_cistatic const uint32_t sample_locs_1x = FILL_SREG(0, 0, 0, 0, 0, 0, 0, 0); 1857bf215546Sopenharmony_cistatic const unsigned max_dist_1x = 0; 1858bf215546Sopenharmony_cistatic const uint64_t centroid_priority_1x = 0x0000000000000000ull; 1859bf215546Sopenharmony_ci 1860bf215546Sopenharmony_ci/* 2xMSAA */ 1861bf215546Sopenharmony_cistatic const uint32_t sample_locs_2x = FILL_SREG(4, 4, -4, -4, 0, 0, 0, 0); 1862bf215546Sopenharmony_cistatic const unsigned max_dist_2x = 4; 1863bf215546Sopenharmony_cistatic const uint64_t centroid_priority_2x = 0x1010101010101010ull; 1864bf215546Sopenharmony_ci 1865bf215546Sopenharmony_ci/* 4xMSAA */ 1866bf215546Sopenharmony_cistatic const uint32_t sample_locs_4x = FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6); 1867bf215546Sopenharmony_cistatic const unsigned max_dist_4x = 6; 1868bf215546Sopenharmony_cistatic const uint64_t centroid_priority_4x = 0x3210321032103210ull; 1869bf215546Sopenharmony_ci 1870bf215546Sopenharmony_ci/* 8xMSAA */ 1871bf215546Sopenharmony_cistatic const uint32_t sample_locs_8x[] = { 1872bf215546Sopenharmony_ci FILL_SREG(1, -3, -1, 3, 5, 1, -3, -5), 1873bf215546Sopenharmony_ci FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7), 1874bf215546Sopenharmony_ci /* The following are unused by hardware, but we emit them to IBs 1875bf215546Sopenharmony_ci * instead of multiple SET_CONTEXT_REG packets. */ 1876bf215546Sopenharmony_ci 0, 1877bf215546Sopenharmony_ci 0, 1878bf215546Sopenharmony_ci}; 1879bf215546Sopenharmony_cistatic const unsigned max_dist_8x = 7; 1880bf215546Sopenharmony_cistatic const uint64_t centroid_priority_8x = 0x7654321076543210ull; 1881bf215546Sopenharmony_ci 1882bf215546Sopenharmony_ciunsigned 1883bf215546Sopenharmony_ciradv_get_default_max_sample_dist(int log_samples) 1884bf215546Sopenharmony_ci{ 1885bf215546Sopenharmony_ci unsigned max_dist[] = { 1886bf215546Sopenharmony_ci max_dist_1x, 1887bf215546Sopenharmony_ci max_dist_2x, 1888bf215546Sopenharmony_ci max_dist_4x, 1889bf215546Sopenharmony_ci max_dist_8x, 1890bf215546Sopenharmony_ci }; 1891bf215546Sopenharmony_ci return max_dist[log_samples]; 1892bf215546Sopenharmony_ci} 1893bf215546Sopenharmony_ci 1894bf215546Sopenharmony_civoid 1895bf215546Sopenharmony_ciradv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples) 1896bf215546Sopenharmony_ci{ 1897bf215546Sopenharmony_ci switch (nr_samples) { 1898bf215546Sopenharmony_ci default: 1899bf215546Sopenharmony_ci case 1: 1900bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 1901bf215546Sopenharmony_ci radeon_emit(cs, (uint32_t)centroid_priority_1x); 1902bf215546Sopenharmony_ci radeon_emit(cs, centroid_priority_1x >> 32); 1903bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_1x); 1904bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_1x); 1905bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_1x); 1906bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_1x); 1907bf215546Sopenharmony_ci break; 1908bf215546Sopenharmony_ci case 2: 1909bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 1910bf215546Sopenharmony_ci radeon_emit(cs, (uint32_t)centroid_priority_2x); 1911bf215546Sopenharmony_ci radeon_emit(cs, centroid_priority_2x >> 32); 1912bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x); 1913bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x); 1914bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x); 1915bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x); 1916bf215546Sopenharmony_ci break; 1917bf215546Sopenharmony_ci case 4: 1918bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 1919bf215546Sopenharmony_ci radeon_emit(cs, (uint32_t)centroid_priority_4x); 1920bf215546Sopenharmony_ci radeon_emit(cs, centroid_priority_4x >> 32); 1921bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x); 1922bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x); 1923bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x); 1924bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x); 1925bf215546Sopenharmony_ci break; 1926bf215546Sopenharmony_ci case 8: 1927bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 1928bf215546Sopenharmony_ci radeon_emit(cs, (uint32_t)centroid_priority_8x); 1929bf215546Sopenharmony_ci radeon_emit(cs, centroid_priority_8x >> 32); 1930bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14); 1931bf215546Sopenharmony_ci radeon_emit_array(cs, sample_locs_8x, 4); 1932bf215546Sopenharmony_ci radeon_emit_array(cs, sample_locs_8x, 4); 1933bf215546Sopenharmony_ci radeon_emit_array(cs, sample_locs_8x, 4); 1934bf215546Sopenharmony_ci radeon_emit_array(cs, sample_locs_8x, 2); 1935bf215546Sopenharmony_ci break; 1936bf215546Sopenharmony_ci } 1937bf215546Sopenharmony_ci} 1938bf215546Sopenharmony_ci 1939bf215546Sopenharmony_cistatic void 1940bf215546Sopenharmony_ciradv_get_sample_position(struct radv_device *device, unsigned sample_count, unsigned sample_index, 1941bf215546Sopenharmony_ci float *out_value) 1942bf215546Sopenharmony_ci{ 1943bf215546Sopenharmony_ci const uint32_t *sample_locs; 1944bf215546Sopenharmony_ci 1945bf215546Sopenharmony_ci switch (sample_count) { 1946bf215546Sopenharmony_ci case 1: 1947bf215546Sopenharmony_ci default: 1948bf215546Sopenharmony_ci sample_locs = &sample_locs_1x; 1949bf215546Sopenharmony_ci break; 1950bf215546Sopenharmony_ci case 2: 1951bf215546Sopenharmony_ci sample_locs = &sample_locs_2x; 1952bf215546Sopenharmony_ci break; 1953bf215546Sopenharmony_ci case 4: 1954bf215546Sopenharmony_ci sample_locs = &sample_locs_4x; 1955bf215546Sopenharmony_ci break; 1956bf215546Sopenharmony_ci case 8: 1957bf215546Sopenharmony_ci sample_locs = sample_locs_8x; 1958bf215546Sopenharmony_ci break; 1959bf215546Sopenharmony_ci } 1960bf215546Sopenharmony_ci 1961bf215546Sopenharmony_ci out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f; 1962bf215546Sopenharmony_ci out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f; 1963bf215546Sopenharmony_ci} 1964bf215546Sopenharmony_ci 1965bf215546Sopenharmony_civoid 1966bf215546Sopenharmony_ciradv_device_init_msaa(struct radv_device *device) 1967bf215546Sopenharmony_ci{ 1968bf215546Sopenharmony_ci int i; 1969bf215546Sopenharmony_ci 1970bf215546Sopenharmony_ci radv_get_sample_position(device, 1, 0, device->sample_locations_1x[0]); 1971bf215546Sopenharmony_ci 1972bf215546Sopenharmony_ci for (i = 0; i < 2; i++) 1973bf215546Sopenharmony_ci radv_get_sample_position(device, 2, i, device->sample_locations_2x[i]); 1974bf215546Sopenharmony_ci for (i = 0; i < 4; i++) 1975bf215546Sopenharmony_ci radv_get_sample_position(device, 4, i, device->sample_locations_4x[i]); 1976bf215546Sopenharmony_ci for (i = 0; i < 8; i++) 1977bf215546Sopenharmony_ci radv_get_sample_position(device, 8, i, device->sample_locations_8x[i]); 1978bf215546Sopenharmony_ci} 1979