1/* 2 * Copyright 2018 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#define AC_SURFACE_INCLUDE_NIR 26#include "ac_surface.h" 27#include "si_pipe.h" 28 29static void *create_shader_state(struct si_context *sctx, nir_shader *nir) 30{ 31 sctx->b.screen->finalize_nir(sctx->b.screen, (void*)nir); 32 33 struct pipe_shader_state state = {0}; 34 state.type = PIPE_SHADER_IR_NIR; 35 state.ir.nir = nir; 36 37 switch (nir->info.stage) { 38 case MESA_SHADER_VERTEX: 39 return sctx->b.create_vs_state(&sctx->b, &state); 40 case MESA_SHADER_TESS_CTRL: 41 return sctx->b.create_tcs_state(&sctx->b, &state); 42 case MESA_SHADER_TESS_EVAL: 43 return sctx->b.create_tes_state(&sctx->b, &state); 44 case MESA_SHADER_FRAGMENT: 45 return sctx->b.create_fs_state(&sctx->b, &state); 46 case MESA_SHADER_COMPUTE: { 47 struct pipe_compute_state cs_state = {0}; 48 cs_state.ir_type = PIPE_SHADER_IR_NIR; 49 cs_state.prog = nir; 50 return sctx->b.create_compute_state(&sctx->b, &cs_state); 51 } 52 default: 53 unreachable("invalid shader stage"); 54 return NULL; 55 } 56} 57 58static nir_ssa_def *get_global_ids(nir_builder *b, unsigned num_components) 59{ 60 unsigned mask = BITFIELD_MASK(num_components); 61 62 nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask); 63 nir_ssa_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask); 64 nir_ssa_def *block_size = nir_channels(b, nir_load_workgroup_size(b), mask); 65 return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids); 66} 67 68static void unpack_2x16(nir_builder *b, nir_ssa_def *src, nir_ssa_def **x, nir_ssa_def **y) 69{ 70 *x = nir_iand(b, src, nir_imm_int(b, 0xffff)); 71 *y = nir_ushr(b, src, nir_imm_int(b, 16)); 72} 73 74static nir_ssa_def * 75deref_ssa(nir_builder *b, nir_variable *var) 76{ 77 return &nir_build_deref_var(b, var)->dest.ssa; 78} 79 80/* Create a NIR compute shader implementing copy_image. 81 * 82 * This shader can handle 1D and 2D, linear and non-linear images. 83 * It expects the source and destination (x,y,z) coords as user_data_amd, 84 * packed into 3 SGPRs as 2x16bits per component. 85 */ 86void *si_create_copy_image_cs(struct si_context *sctx, bool src_is_1d_array, bool dst_is_1d_array) 87{ 88 const nir_shader_compiler_options *options = 89 sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); 90 91 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "copy_image_cs"); 92 b.shader->info.num_images = 2; 93 94 /* The workgroup size is either 8x8 for normal (non-linear) 2D images, 95 * or 64x1 for 1D and linear-2D images. 96 */ 97 b.shader->info.workgroup_size_variable = true; 98 99 b.shader->info.cs.user_data_components_amd = 3; 100 nir_ssa_def *ids = get_global_ids(&b, 3); 101 102 nir_ssa_def *coord_src = NULL, *coord_dst = NULL; 103 unpack_2x16(&b, nir_load_user_data_amd(&b), &coord_src, &coord_dst); 104 105 coord_src = nir_iadd(&b, coord_src, ids); 106 coord_dst = nir_iadd(&b, coord_dst, ids); 107 108 static unsigned swizzle_xz[] = {0, 2, 0, 0}; 109 110 if (src_is_1d_array) 111 coord_src = nir_swizzle(&b, coord_src, swizzle_xz, 4); 112 if (dst_is_1d_array) 113 coord_dst = nir_swizzle(&b, coord_dst, swizzle_xz, 4); 114 115 const struct glsl_type *src_img_type = glsl_image_type(src_is_1d_array ? GLSL_SAMPLER_DIM_1D 116 : GLSL_SAMPLER_DIM_2D, 117 /*is_array*/ true, GLSL_TYPE_FLOAT); 118 const struct glsl_type *dst_img_type = glsl_image_type(dst_is_1d_array ? GLSL_SAMPLER_DIM_1D 119 : GLSL_SAMPLER_DIM_2D, 120 /*is_array*/ true, GLSL_TYPE_FLOAT); 121 122 nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, src_img_type, "img_src"); 123 img_src->data.binding = 0; 124 125 nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, dst_img_type, "img_dst"); 126 img_dst->data.binding = 1; 127 128 nir_ssa_def *undef32 = nir_ssa_undef(&b, 1, 32); 129 nir_ssa_def *zero = nir_imm_int(&b, 0); 130 131 nir_ssa_def *data = nir_image_deref_load(&b, /*num_components*/ 4, /*bit_size*/ 32, 132 deref_ssa(&b, img_src), coord_src, undef32, zero); 133 134 nir_image_deref_store(&b, deref_ssa(&b, img_dst), coord_dst, undef32, data, zero); 135 136 return create_shader_state(sctx, b.shader); 137} 138 139void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf) 140{ 141 const nir_shader_compiler_options *options = 142 sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); 143 144 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "dcc_retile"); 145 b.shader->info.workgroup_size[0] = 8; 146 b.shader->info.workgroup_size[1] = 8; 147 b.shader->info.workgroup_size[2] = 1; 148 b.shader->info.cs.user_data_components_amd = 3; 149 b.shader->info.num_ssbos = 1; 150 151 /* Get user data SGPRs. */ 152 nir_ssa_def *user_sgprs = nir_load_user_data_amd(&b); 153 154 /* Relative offset from the displayable DCC to the non-displayable DCC in the same buffer. */ 155 nir_ssa_def *src_dcc_offset = nir_channel(&b, user_sgprs, 0); 156 157 nir_ssa_def *src_dcc_pitch, *dst_dcc_pitch, *src_dcc_height, *dst_dcc_height; 158 unpack_2x16(&b, nir_channel(&b, user_sgprs, 1), &src_dcc_pitch, &src_dcc_height); 159 unpack_2x16(&b, nir_channel(&b, user_sgprs, 2), &dst_dcc_pitch, &dst_dcc_height); 160 161 /* Get the 2D coordinates. */ 162 nir_ssa_def *coord = get_global_ids(&b, 2); 163 nir_ssa_def *zero = nir_imm_int(&b, 0); 164 165 /* Multiply the coordinates by the DCC block size (they are DCC block coordinates). */ 166 coord = nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, 167 surf->u.gfx9.color.dcc_block_height)); 168 169 nir_ssa_def *src_offset = 170 ac_nir_dcc_addr_from_coord(&b, &sctx->screen->info, surf->bpe, &surf->u.gfx9.color.dcc_equation, 171 src_dcc_pitch, src_dcc_height, zero, /* DCC slice size */ 172 nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), /* x, y */ 173 zero, zero, zero); /* z, sample, pipe_xor */ 174 src_offset = nir_iadd(&b, src_offset, src_dcc_offset); 175 nir_ssa_def *value = nir_load_ssbo(&b, 1, 8, zero, src_offset, .align_mul=1); 176 177 nir_ssa_def *dst_offset = 178 ac_nir_dcc_addr_from_coord(&b, &sctx->screen->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, 179 dst_dcc_pitch, dst_dcc_height, zero, /* DCC slice size */ 180 nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), /* x, y */ 181 zero, zero, zero); /* z, sample, pipe_xor */ 182 nir_store_ssbo(&b, value, zero, dst_offset, .write_mask=0x1, .align_mul=1); 183 184 return create_shader_state(sctx, b.shader); 185} 186 187void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex) 188{ 189 const nir_shader_compiler_options *options = 190 sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); 191 192 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "clear_dcc_msaa"); 193 b.shader->info.workgroup_size[0] = 8; 194 b.shader->info.workgroup_size[1] = 8; 195 b.shader->info.workgroup_size[2] = 1; 196 b.shader->info.cs.user_data_components_amd = 2; 197 b.shader->info.num_ssbos = 1; 198 199 /* Get user data SGPRs. */ 200 nir_ssa_def *user_sgprs = nir_load_user_data_amd(&b); 201 nir_ssa_def *dcc_pitch, *dcc_height, *clear_value, *pipe_xor; 202 unpack_2x16(&b, nir_channel(&b, user_sgprs, 0), &dcc_pitch, &dcc_height); 203 unpack_2x16(&b, nir_channel(&b, user_sgprs, 1), &clear_value, &pipe_xor); 204 clear_value = nir_u2u16(&b, clear_value); 205 206 /* Get the 2D coordinates. */ 207 nir_ssa_def *coord = get_global_ids(&b, 3); 208 nir_ssa_def *zero = nir_imm_int(&b, 0); 209 210 /* Multiply the coordinates by the DCC block size (they are DCC block coordinates). */ 211 coord = nir_imul(&b, coord, 212 nir_channels(&b, nir_imm_ivec4(&b, tex->surface.u.gfx9.color.dcc_block_width, 213 tex->surface.u.gfx9.color.dcc_block_height, 214 tex->surface.u.gfx9.color.dcc_block_depth, 0), 0x7)); 215 216 nir_ssa_def *offset = 217 ac_nir_dcc_addr_from_coord(&b, &sctx->screen->info, tex->surface.bpe, 218 &tex->surface.u.gfx9.color.dcc_equation, 219 dcc_pitch, dcc_height, zero, /* DCC slice size */ 220 nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), /* x, y */ 221 tex->buffer.b.b.array_size > 1 ? nir_channel(&b, coord, 2) : zero, /* z */ 222 zero, pipe_xor); /* sample, pipe_xor */ 223 224 /* The trick here is that DCC elements for an even and the next odd sample are next to each other 225 * in memory, so we only need to compute the address for sample 0 and the next DCC byte is always 226 * sample 1. That's why the clear value has 2 bytes - we're clearing 2 samples at the same time. 227 */ 228 nir_store_ssbo(&b, clear_value, zero, offset, .write_mask=0x1, .align_mul=2); 229 230 return create_shader_state(sctx, b.shader); 231} 232 233/* Create a compute shader implementing clear_buffer or copy_buffer. */ 234void *si_create_clear_buffer_rmw_cs(struct si_context *sctx) 235{ 236 const nir_shader_compiler_options *options = 237 sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); 238 239 nir_builder b = 240 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "clear_buffer_rmw_cs"); 241 b.shader->info.workgroup_size[0] = 64; 242 b.shader->info.workgroup_size[1] = 1; 243 b.shader->info.workgroup_size[2] = 1; 244 b.shader->info.cs.user_data_components_amd = 2; 245 b.shader->info.num_ssbos = 1; 246 247 /* address = blockID * 64 + threadID; */ 248 nir_ssa_def *address = get_global_ids(&b, 1); 249 250 /* address = address * 16; (byte offset, loading one vec4 per thread) */ 251 address = nir_ishl(&b, address, nir_imm_int(&b, 4)); 252 253 nir_ssa_def *zero = nir_imm_int(&b, 0); 254 nir_ssa_def *data = nir_load_ssbo(&b, 4, 32, zero, address, .align_mul = 4); 255 256 /* Get user data SGPRs. */ 257 nir_ssa_def *user_sgprs = nir_load_user_data_amd(&b); 258 259 /* data &= inverted_writemask; */ 260 data = nir_iand(&b, data, nir_channel(&b, user_sgprs, 1)); 261 /* data |= clear_value_masked; */ 262 data = nir_ior(&b, data, nir_channel(&b, user_sgprs, 0)); 263 264 nir_store_ssbo(&b, data, zero, address, 265 .access = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ACCESS_STREAM_CACHE_POLICY : 0, 266 .align_mul = 4); 267 268 return create_shader_state(sctx, b.shader); 269} 270 271/* This is used when TCS is NULL in the VS->TCS->TES chain. In this case, 272 * VS passes its outputs to TES directly, so the fixed-function shader only 273 * has to write TESSOUTER and TESSINNER. 274 */ 275void *si_create_passthrough_tcs(struct si_context *sctx) 276{ 277 const nir_shader_compiler_options *options = 278 sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, 279 PIPE_SHADER_TESS_CTRL); 280 281 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_TESS_CTRL, options, 282 "tcs passthrough"); 283 284 unsigned num_inputs = 0; 285 unsigned num_outputs = 0; 286 287 nir_variable *in_inner = 288 nir_variable_create(b.shader, nir_var_system_value, glsl_vec_type(2), 289 "tess inner default"); 290 in_inner->data.location = SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT; 291 292 nir_variable *out_inner = 293 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec_type(2), 294 "tess inner"); 295 out_inner->data.location = VARYING_SLOT_TESS_LEVEL_INNER; 296 out_inner->data.driver_location = num_outputs++; 297 298 nir_ssa_def *inner = nir_load_var(&b, in_inner); 299 nir_store_var(&b, out_inner, inner, 0x3); 300 301 nir_variable *in_outer = 302 nir_variable_create(b.shader, nir_var_system_value, glsl_vec4_type(), 303 "tess outer default"); 304 in_outer->data.location = SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT; 305 306 nir_variable *out_outer = 307 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), 308 "tess outer"); 309 out_outer->data.location = VARYING_SLOT_TESS_LEVEL_OUTER; 310 out_outer->data.driver_location = num_outputs++; 311 312 nir_ssa_def *outer = nir_load_var(&b, in_outer); 313 nir_store_var(&b, out_outer, outer, 0xf); 314 315 nir_ssa_def *id = nir_load_invocation_id(&b); 316 struct si_shader_info *info = &sctx->shader.vs.cso->info; 317 for (unsigned i = 0; i < info->num_outputs; i++) { 318 const struct glsl_type *type; 319 unsigned semantic = info->output_semantic[i]; 320 if (semantic < VARYING_SLOT_VAR31 && semantic != VARYING_SLOT_EDGE) 321 type = glsl_array_type(glsl_vec4_type(), 0, 0); 322 else if (semantic >= VARYING_SLOT_VAR0_16BIT) 323 type = glsl_array_type(glsl_vector_type(GLSL_TYPE_FLOAT16, 4), 0, 0); 324 else 325 continue; 326 327 char name[10]; 328 snprintf(name, sizeof(name), "in_%u", i); 329 nir_variable *in = nir_variable_create(b.shader, nir_var_shader_in, type, name); 330 in->data.location = semantic; 331 in->data.driver_location = num_inputs++; 332 333 snprintf(name, sizeof(name), "out_%u", i); 334 nir_variable *out = nir_variable_create(b.shader, nir_var_shader_out, type, name); 335 out->data.location = semantic; 336 out->data.driver_location = num_outputs++; 337 338 /* no need to use copy_var to save a lower pass */ 339 nir_ssa_def *value = nir_load_array_var(&b, in, id); 340 nir_store_array_var(&b, out, id, value, 0xf); 341 } 342 343 b.shader->num_inputs = num_inputs; 344 b.shader->num_outputs = num_outputs; 345 346 b.shader->info.tess.tcs_vertices_out = sctx->patch_vertices; 347 348 return create_shader_state(sctx, b.shader); 349} 350