1/* 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25/** 26 * This file contains helpers for writing commands to commands streams. 27 */ 28 29#ifndef SI_BUILD_PM4_H 30#define SI_BUILD_PM4_H 31 32#include "si_pipe.h" 33#include "sid.h" 34 35#if 0 36#include "ac_shadowed_regs.h" 37#define SI_CHECK_SHADOWED_REGS(reg_offset, count) ac_check_shadowed_regs(GFX10, CHIP_NAVI14, reg_offset, count) 38#else 39#define SI_CHECK_SHADOWED_REGS(reg_offset, count) 40#endif 41 42#define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \ 43 unsigned __cs_num = __cs->current.cdw; \ 44 UNUSED unsigned __cs_num_initial = __cs_num; \ 45 uint32_t *__cs_buf = __cs->current.buf 46 47#define radeon_begin_again(cs) do { \ 48 assert(__cs == NULL); \ 49 __cs = (cs); \ 50 __cs_num = __cs->current.cdw; \ 51 __cs_num_initial = __cs_num; \ 52 __cs_buf = __cs->current.buf; \ 53} while (0) 54 55#define radeon_end() do { \ 56 __cs->current.cdw = __cs_num; \ 57 assert(__cs->current.cdw <= __cs->current.max_dw); \ 58 __cs = NULL; \ 59} while (0) 60 61#define radeon_emit(value) __cs_buf[__cs_num++] = (value) 62#define radeon_packets_added() (__cs_num != __cs_num_initial) 63 64#define radeon_end_update_context_roll(sctx) do { \ 65 radeon_end(); \ 66 if (radeon_packets_added()) \ 67 (sctx)->context_roll = true; \ 68} while (0) 69 70#define radeon_emit_array(values, num) do { \ 71 unsigned __n = (num); \ 72 memcpy(__cs_buf + __cs_num, (values), __n * 4); \ 73 __cs_num += __n; \ 74} while (0) 75 76#define radeon_set_config_reg_seq(reg, num) do { \ 77 SI_CHECK_SHADOWED_REGS(reg, num); \ 78 assert((reg) < SI_CONTEXT_REG_OFFSET); \ 79 radeon_emit(PKT3(PKT3_SET_CONFIG_REG, num, 0)); \ 80 radeon_emit(((reg) - SI_CONFIG_REG_OFFSET) >> 2); \ 81} while (0) 82 83#define radeon_set_config_reg(reg, value) do { \ 84 radeon_set_config_reg_seq(reg, 1); \ 85 radeon_emit(value); \ 86} while (0) 87 88#define radeon_set_context_reg_seq(reg, num) do { \ 89 SI_CHECK_SHADOWED_REGS(reg, num); \ 90 assert((reg) >= SI_CONTEXT_REG_OFFSET); \ 91 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \ 92 radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \ 93} while (0) 94 95#define radeon_set_context_reg(reg, value) do { \ 96 radeon_set_context_reg_seq(reg, 1); \ 97 radeon_emit(value); \ 98} while (0) 99 100#define radeon_set_context_reg_seq_array(reg, num, values) do { \ 101 radeon_set_context_reg_seq(reg, num); \ 102 radeon_emit_array(values, num); \ 103} while (0) 104 105#define radeon_set_context_reg_idx(reg, idx, value) do { \ 106 SI_CHECK_SHADOWED_REGS(reg, 1); \ 107 assert((reg) >= SI_CONTEXT_REG_OFFSET); \ 108 radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \ 109 radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \ 110 radeon_emit(value); \ 111} while (0) 112 113#define radeon_set_sh_reg_seq(reg, num) do { \ 114 SI_CHECK_SHADOWED_REGS(reg, num); \ 115 assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \ 116 radeon_emit(PKT3(PKT3_SET_SH_REG, num, 0)); \ 117 radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \ 118} while (0) 119 120#define radeon_set_sh_reg_idx3_seq(reg, num) do { \ 121 SI_CHECK_SHADOWED_REGS(reg, num); \ 122 assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \ 123 radeon_emit(PKT3(PKT3_SET_SH_REG_INDEX, num, 0)); \ 124 radeon_emit((((reg) - SI_SH_REG_OFFSET) >> 2) | (3 << 28)); \ 125} while (0) 126 127#define radeon_set_sh_reg(reg, value) do { \ 128 radeon_set_sh_reg_seq(reg, 1); \ 129 radeon_emit(value); \ 130} while (0) 131 132#define radeon_set_sh_reg_idx3(reg, value) do { \ 133 radeon_set_sh_reg_idx3_seq(reg, 1); \ 134 radeon_emit(value); \ 135} while (0) 136 137#define radeon_set_uconfig_reg_seq(reg, num, perfctr) do { \ 138 SI_CHECK_SHADOWED_REGS(reg, num); \ 139 assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \ 140 radeon_emit(PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \ 141 radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \ 142} while (0) 143 144#define radeon_set_uconfig_reg(reg, value) do { \ 145 radeon_set_uconfig_reg_seq(reg, 1, false); \ 146 radeon_emit(value); \ 147} while (0) 148 149#define radeon_set_uconfig_reg_perfctr(reg, value) do { \ 150 radeon_set_uconfig_reg_seq(reg, 1, true); \ 151 radeon_emit(value); \ 152} while (0) 153 154#define radeon_set_uconfig_reg_idx(screen, gfx_level, reg, idx, value) do { \ 155 SI_CHECK_SHADOWED_REGS(reg, 1); \ 156 assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \ 157 assert((idx) != 0); \ 158 unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \ 159 if ((gfx_level) < GFX9 || \ 160 ((gfx_level) == GFX9 && (screen)->info.me_fw_version < 26)) \ 161 __opcode = PKT3_SET_UCONFIG_REG; \ 162 radeon_emit(PKT3(__opcode, 1, 0)); \ 163 radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \ 164 radeon_emit(value); \ 165} while (0) 166 167/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */ 168#define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \ 169 unsigned __value = val; \ 170 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \ 171 sctx->tracked_regs.reg_value[reg] != __value) { \ 172 radeon_set_context_reg(offset, __value); \ 173 sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \ 174 sctx->tracked_regs.reg_value[reg] = __value; \ 175 } \ 176} while (0) 177 178/** 179 * Set 2 consecutive registers if any registers value is different. 180 * @param offset starting register offset 181 * @param val1 is written to first register 182 * @param val2 is written to second register 183 */ 184#define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \ 185 unsigned __value1 = (val1), __value2 = (val2); \ 186 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x3) != 0x3 || \ 187 sctx->tracked_regs.reg_value[reg] != __value1 || \ 188 sctx->tracked_regs.reg_value[(reg) + 1] != __value2) { \ 189 radeon_set_context_reg_seq(offset, 2); \ 190 radeon_emit(__value1); \ 191 radeon_emit(__value2); \ 192 sctx->tracked_regs.reg_value[reg] = __value1; \ 193 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \ 194 sctx->tracked_regs.reg_saved |= 0x3ull << (reg); \ 195 } \ 196} while (0) 197 198/** 199 * Set 3 consecutive registers if any registers value is different. 200 */ 201#define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \ 202 unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \ 203 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x7) != 0x7 || \ 204 sctx->tracked_regs.reg_value[reg] != __value1 || \ 205 sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \ 206 sctx->tracked_regs.reg_value[(reg) + 2] != __value3) { \ 207 radeon_set_context_reg_seq(offset, 3); \ 208 radeon_emit(__value1); \ 209 radeon_emit(__value2); \ 210 radeon_emit(__value3); \ 211 sctx->tracked_regs.reg_value[reg] = __value1; \ 212 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \ 213 sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \ 214 sctx->tracked_regs.reg_saved |= 0x7ull << (reg); \ 215 } \ 216} while (0) 217 218/** 219 * Set 4 consecutive registers if any registers value is different. 220 */ 221#define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) do { \ 222 unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 = (val4); \ 223 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0xf) != 0xf || \ 224 sctx->tracked_regs.reg_value[reg] != __value1 || \ 225 sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \ 226 sctx->tracked_regs.reg_value[(reg) + 2] != __value3 || \ 227 sctx->tracked_regs.reg_value[(reg) + 3] != __value4) { \ 228 radeon_set_context_reg_seq(offset, 4); \ 229 radeon_emit(__value1); \ 230 radeon_emit(__value2); \ 231 radeon_emit(__value3); \ 232 radeon_emit(__value4); \ 233 sctx->tracked_regs.reg_value[reg] = __value1; \ 234 sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \ 235 sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \ 236 sctx->tracked_regs.reg_value[(reg) + 3] = __value4; \ 237 sctx->tracked_regs.reg_saved |= 0xfull << (reg); \ 238 } \ 239} while (0) 240 241/** 242 * Set consecutive registers if any registers value is different. 243 */ 244#define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \ 245 if (memcmp(value, saved_val, sizeof(uint32_t) * (num))) { \ 246 radeon_set_context_reg_seq(offset, num); \ 247 radeon_emit_array(value, num); \ 248 memcpy(saved_val, value, sizeof(uint32_t) * (num)); \ 249 } \ 250} while (0) 251 252#define radeon_opt_set_sh_reg(sctx, offset, reg, val) do { \ 253 unsigned __value = val; \ 254 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \ 255 sctx->tracked_regs.reg_value[reg] != __value) { \ 256 radeon_set_sh_reg(offset, __value); \ 257 sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \ 258 sctx->tracked_regs.reg_value[reg] = __value; \ 259 } \ 260} while (0) 261 262#define radeon_opt_set_sh_reg_idx3(sctx, offset, reg, val) do { \ 263 unsigned __value = val; \ 264 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \ 265 sctx->tracked_regs.reg_value[reg] != __value) { \ 266 if (sctx->gfx_level >= GFX10) \ 267 radeon_set_sh_reg_idx3(offset, __value); \ 268 else \ 269 radeon_set_sh_reg(offset, __value); \ 270 sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \ 271 sctx->tracked_regs.reg_value[reg] = __value; \ 272 } \ 273} while (0) 274 275#define radeon_opt_set_uconfig_reg(sctx, offset, reg, val) do { \ 276 unsigned __value = val; \ 277 if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \ 278 sctx->tracked_regs.reg_value[reg] != __value) { \ 279 radeon_set_uconfig_reg(offset, __value); \ 280 sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \ 281 sctx->tracked_regs.reg_value[reg] = __value; \ 282 } \ 283} while (0) 284 285#define radeon_set_privileged_config_reg(reg, value) do { \ 286 assert((reg) < CIK_UCONFIG_REG_OFFSET); \ 287 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \ 288 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \ 289 COPY_DATA_DST_SEL(COPY_DATA_PERF)); \ 290 radeon_emit(value); \ 291 radeon_emit(0); /* unused */ \ 292 radeon_emit((reg) >> 2); \ 293 radeon_emit(0); /* unused */ \ 294} while (0) 295 296#define radeon_emit_32bit_pointer(sscreen, va) do { \ 297 radeon_emit(va); \ 298 assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \ 299} while (0) 300 301#define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \ 302 unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \ 303 radeon_set_sh_reg_seq(sh_offset, 1); \ 304 radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \ 305} while (0) 306 307/* Wrappers that are only used when they are passed as function pointers. */ 308static inline void radeon_set_sh_reg_func(struct radeon_cmdbuf *cs, unsigned reg_offset, 309 uint32_t value) 310{ 311 radeon_begin(cs); 312 radeon_set_sh_reg(reg_offset, value); 313 radeon_end(); 314} 315 316static inline void radeon_set_sh_reg_idx3_func(struct radeon_cmdbuf *cs, unsigned reg_offset, 317 uint32_t value) 318{ 319 radeon_begin(cs); 320 radeon_set_sh_reg_idx3(reg_offset, value); 321 radeon_end(); 322} 323 324/* This should be evaluated at compile time if all parameters are constants. */ 325static ALWAYS_INLINE unsigned 326si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess, 327 enum si_has_gs has_gs, enum si_has_ngg ngg, 328 enum pipe_shader_type shader) 329{ 330 switch (shader) { 331 case PIPE_SHADER_VERTEX: 332 /* VS can be bound as VS, ES, or LS. */ 333 if (has_tess) { 334 if (gfx_level >= GFX10) { 335 return R_00B430_SPI_SHADER_USER_DATA_HS_0; 336 } else if (gfx_level == GFX9) { 337 return R_00B430_SPI_SHADER_USER_DATA_LS_0; 338 } else { 339 return R_00B530_SPI_SHADER_USER_DATA_LS_0; 340 } 341 } else if (gfx_level >= GFX10) { 342 if (ngg || has_gs) { 343 return R_00B230_SPI_SHADER_USER_DATA_GS_0; 344 } else { 345 return R_00B130_SPI_SHADER_USER_DATA_VS_0; 346 } 347 } else if (has_gs) { 348 return R_00B330_SPI_SHADER_USER_DATA_ES_0; 349 } else { 350 return R_00B130_SPI_SHADER_USER_DATA_VS_0; 351 } 352 353 case PIPE_SHADER_TESS_CTRL: 354 if (gfx_level == GFX9) { 355 return R_00B430_SPI_SHADER_USER_DATA_LS_0; 356 } else { 357 return R_00B430_SPI_SHADER_USER_DATA_HS_0; 358 } 359 360 case PIPE_SHADER_TESS_EVAL: 361 /* TES can be bound as ES, VS, or not bound. */ 362 if (has_tess) { 363 if (gfx_level >= GFX10) { 364 if (ngg || has_gs) { 365 return R_00B230_SPI_SHADER_USER_DATA_GS_0; 366 } else { 367 return R_00B130_SPI_SHADER_USER_DATA_VS_0; 368 } 369 } else if (has_gs) { 370 return R_00B330_SPI_SHADER_USER_DATA_ES_0; 371 } else { 372 return R_00B130_SPI_SHADER_USER_DATA_VS_0; 373 } 374 } else { 375 return 0; 376 } 377 378 case PIPE_SHADER_GEOMETRY: 379 if (gfx_level == GFX9) { 380 return R_00B330_SPI_SHADER_USER_DATA_ES_0; 381 } else { 382 return R_00B230_SPI_SHADER_USER_DATA_GS_0; 383 } 384 385 default: 386 assert(0); 387 return 0; 388 } 389} 390 391#endif 392