1/* 2 * Copyright © 2021 Collabora Ltd. 3 * 4 * Derived from tu_shader.c which is: 5 * Copyright © 2019 Google LLC 6 * 7 * Also derived from anv_pipeline.c which is 8 * Copyright © 2015 Intel Corporation 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a 11 * copy of this software and associated documentation files (the "Software"), 12 * to deal in the Software without restriction, including without limitation 13 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 * and/or sell copies of the Software, and to permit persons to whom the 15 * Software is furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice (including the next 18 * paragraph) shall be included in all copies or substantial portions of the 19 * Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 27 * DEALINGS IN THE SOFTWARE. 28 */ 29 30#include "genxml/gen_macros.h" 31 32#include "panvk_private.h" 33 34#include "nir_builder.h" 35#include "nir_deref.h" 36#include "nir_lower_blend.h" 37#include "nir_conversion_builder.h" 38#include "spirv/nir_spirv.h" 39#include "util/mesa-sha1.h" 40#include "vk_shader_module.h" 41 42#include "pan_shader.h" 43#include "util/pan_lower_framebuffer.h" 44 45#include "vk_util.h" 46 47static void 48panvk_init_sysvals(struct panfrost_sysvals *sysvals, 49 gl_shader_stage stage) 50{ 51 memset(sysvals, 0, sizeof(*sysvals)); 52 53#define SYSVAL_SLOT(name) \ 54 (assert(offsetof(struct panvk_sysvals, name) % 16 == 0), \ 55 offsetof(struct panvk_sysvals, name) / 16) 56 57#define INIT_SYSVAL(name, SYSVAL) \ 58 sysvals->sysvals[SYSVAL_SLOT(name)] = PAN_SYSVAL_##SYSVAL 59 60 if (gl_shader_stage_is_compute(stage)) { 61 INIT_SYSVAL(num_work_groups, NUM_WORK_GROUPS); 62 INIT_SYSVAL(local_group_size, LOCAL_GROUP_SIZE); 63 } else { 64 INIT_SYSVAL(viewport_scale, VIEWPORT_SCALE); 65 INIT_SYSVAL(viewport_offset, VIEWPORT_OFFSET); 66 INIT_SYSVAL(vertex_instance_offsets, VERTEX_INSTANCE_OFFSETS); 67 INIT_SYSVAL(blend_constants, BLEND_CONSTANTS); 68 } 69 sysvals->sysval_count = SYSVAL_SLOT(dyn_ssbos); 70 71#undef SYSVAL_SLOT 72#undef INIT_SYSVAL 73} 74 75static bool 76panvk_inline_blend_constants(nir_builder *b, nir_instr *instr, void *data) 77{ 78 if (instr->type != nir_instr_type_intrinsic) 79 return false; 80 81 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 82 if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba) 83 return false; 84 85 const nir_const_value *constants = data; 86 87 b->cursor = nir_after_instr(instr); 88 nir_ssa_def *constant = nir_build_imm(b, 4, 32, constants); 89 nir_ssa_def_rewrite_uses(&intr->dest.ssa, constant); 90 nir_instr_remove(instr); 91 return true; 92} 93 94static void 95panvk_lower_blend(struct panfrost_device *pdev, 96 nir_shader *nir, 97 struct panfrost_compile_inputs *inputs, 98 struct pan_blend_state *blend_state, 99 bool static_blend_constants) 100{ 101 nir_lower_blend_options options = { 102 .logicop_enable = blend_state->logicop_enable, 103 .logicop_func = blend_state->logicop_func, 104 }; 105 106 bool lower_blend = false; 107 108 for (unsigned rt = 0; rt < blend_state->rt_count; rt++) { 109 struct pan_blend_rt_state *rt_state = &blend_state->rts[rt]; 110 111 if (!panvk_per_arch(blend_needs_lowering)(pdev, blend_state, rt)) 112 continue; 113 114 enum pipe_format fmt = rt_state->format; 115 116 options.format[rt] = fmt; 117 options.rt[rt].colormask = rt_state->equation.color_mask; 118 119 if (!rt_state->equation.blend_enable) { 120 static const nir_lower_blend_channel replace = { 121 .func = BLEND_FUNC_ADD, 122 .src_factor = BLEND_FACTOR_ZERO, 123 .invert_src_factor = true, 124 .dst_factor = BLEND_FACTOR_ZERO, 125 .invert_dst_factor = false, 126 }; 127 128 options.rt[rt].rgb = replace; 129 options.rt[rt].alpha = replace; 130 } else { 131 options.rt[rt].rgb.func = rt_state->equation.rgb_func; 132 options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor; 133 options.rt[rt].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor; 134 options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor; 135 options.rt[rt].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor; 136 options.rt[rt].alpha.func = rt_state->equation.alpha_func; 137 options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor; 138 options.rt[rt].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor; 139 options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor; 140 options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor; 141 } 142 143 /* Update the equation to force a color replacement */ 144 rt_state->equation.color_mask = 0xf; 145 rt_state->equation.rgb_func = BLEND_FUNC_ADD; 146 rt_state->equation.rgb_src_factor = BLEND_FACTOR_ZERO; 147 rt_state->equation.rgb_invert_src_factor = true; 148 rt_state->equation.rgb_dst_factor = BLEND_FACTOR_ZERO; 149 rt_state->equation.rgb_invert_dst_factor = false; 150 rt_state->equation.alpha_func = BLEND_FUNC_ADD; 151 rt_state->equation.alpha_src_factor = BLEND_FACTOR_ZERO; 152 rt_state->equation.alpha_invert_src_factor = true; 153 rt_state->equation.alpha_dst_factor = BLEND_FACTOR_ZERO; 154 rt_state->equation.alpha_invert_dst_factor = false; 155 lower_blend = true; 156 157 inputs->bifrost.static_rt_conv = true; 158 inputs->bifrost.rt_conv[rt] = 159 GENX(pan_blend_get_internal_desc)(pdev, fmt, rt, 32, false) >> 32; 160 } 161 162 if (lower_blend) { 163 NIR_PASS_V(nir, nir_lower_blend, &options); 164 165 if (static_blend_constants) { 166 const nir_const_value constants[4] = { 167 { .f32 = CLAMP(blend_state->constants[0], 0.0f, 1.0f) }, 168 { .f32 = CLAMP(blend_state->constants[1], 0.0f, 1.0f) }, 169 { .f32 = CLAMP(blend_state->constants[2], 0.0f, 1.0f) }, 170 { .f32 = CLAMP(blend_state->constants[3], 0.0f, 1.0f) }, 171 }; 172 NIR_PASS_V(nir, nir_shader_instructions_pass, 173 panvk_inline_blend_constants, 174 nir_metadata_block_index | 175 nir_metadata_dominance, 176 (void *)constants); 177 } 178 } 179} 180 181static bool 182panvk_lower_load_push_constant(nir_builder *b, nir_instr *instr, void *data) 183{ 184 if (instr->type != nir_instr_type_intrinsic) 185 return false; 186 187 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 188 if (intr->intrinsic != nir_intrinsic_load_push_constant) 189 return false; 190 191 b->cursor = nir_before_instr(instr); 192 nir_ssa_def *ubo_load = 193 nir_load_ubo(b, nir_dest_num_components(intr->dest), 194 nir_dest_bit_size(intr->dest), 195 nir_imm_int(b, PANVK_PUSH_CONST_UBO_INDEX), 196 intr->src[0].ssa, 197 .align_mul = nir_dest_bit_size(intr->dest) / 8, 198 .align_offset = 0, 199 .range_base = nir_intrinsic_base(intr), 200 .range = nir_intrinsic_range(intr)); 201 nir_ssa_def_rewrite_uses(&intr->dest.ssa, ubo_load); 202 nir_instr_remove(instr); 203 return true; 204} 205 206static void 207shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) 208{ 209 assert(glsl_type_is_vector_or_scalar(type)); 210 211 uint32_t comp_size = glsl_type_is_boolean(type) 212 ? 4 : glsl_get_bit_size(type) / 8; 213 unsigned length = glsl_get_vector_elements(type); 214 *size = comp_size * length, 215 *align = comp_size * (length == 3 ? 4 : length); 216} 217 218struct panvk_shader * 219panvk_per_arch(shader_create)(struct panvk_device *dev, 220 gl_shader_stage stage, 221 const VkPipelineShaderStageCreateInfo *stage_info, 222 const struct panvk_pipeline_layout *layout, 223 unsigned sysval_ubo, 224 struct pan_blend_state *blend_state, 225 bool static_blend_constants, 226 const VkAllocationCallbacks *alloc) 227{ 228 VK_FROM_HANDLE(vk_shader_module, module, stage_info->module); 229 struct panfrost_device *pdev = &dev->physical_device->pdev; 230 struct panvk_shader *shader; 231 232 shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8, 233 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 234 if (!shader) 235 return NULL; 236 237 util_dynarray_init(&shader->binary, NULL); 238 239 /* TODO these are made-up */ 240 const struct spirv_to_nir_options spirv_options = { 241 .caps = { 242 .variable_pointers = true, 243 }, 244 .ubo_addr_format = nir_address_format_32bit_index_offset, 245 .ssbo_addr_format = dev->vk.enabled_features.robustBufferAccess ? 246 nir_address_format_64bit_bounded_global : 247 nir_address_format_64bit_global_32bit_offset, 248 }; 249 250 nir_shader *nir; 251 VkResult result = vk_shader_module_to_nir(&dev->vk, module, stage, 252 stage_info->pName, 253 stage_info->pSpecializationInfo, 254 &spirv_options, 255 GENX(pan_shader_get_compiler_options)(), 256 NULL, &nir); 257 if (result != VK_SUCCESS) { 258 vk_free2(&dev->vk.alloc, alloc, shader); 259 return NULL; 260 } 261 262 NIR_PASS_V(nir, nir_lower_io_to_temporaries, 263 nir_shader_get_entrypoint(nir), true, true); 264 265 struct panfrost_sysvals fixed_sysvals; 266 panvk_init_sysvals(&fixed_sysvals, stage); 267 268 struct panfrost_compile_inputs inputs = { 269 .gpu_id = pdev->gpu_id, 270 .no_ubo_to_push = true, 271 .no_idvs = true, /* TODO */ 272 .fixed_sysval_ubo = sysval_ubo, 273 .fixed_sysval_layout = &fixed_sysvals, 274 }; 275 276 NIR_PASS_V(nir, nir_lower_indirect_derefs, 277 nir_var_shader_in | nir_var_shader_out, 278 UINT32_MAX); 279 280 NIR_PASS_V(nir, nir_opt_copy_prop_vars); 281 NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all); 282 NIR_PASS_V(nir, nir_opt_trivial_continues); 283 284 /* Do texture lowering here. Yes, it's a duplication of the texture 285 * lowering in bifrost_compile. However, we need to lower texture stuff 286 * now, before we call panvk_per_arch(nir_lower_descriptors)() because some 287 * of the texture lowering generates nir_texop_txs which we handle as part 288 * of descriptor lowering. 289 * 290 * TODO: We really should be doing this in common code, not dpulicated in 291 * panvk. In order to do that, we need to rework the panfrost compile 292 * flow to look more like the Intel flow: 293 * 294 * 1. Compile SPIR-V to NIR and maybe do a tiny bit of lowering that needs 295 * to be done really early. 296 * 297 * 2. bi_preprocess_nir: Does common lowering and runs the optimization 298 * loop. Nothing here should be API-specific. 299 * 300 * 3. Do additional lowering in panvk 301 * 302 * 4. bi_postprocess_nir: Does final lowering and runs the optimization 303 * loop again. This can happen as part of the final compile. 304 * 305 * This would give us a better place to do panvk-specific lowering. 306 */ 307 nir_lower_tex_options lower_tex_options = { 308 .lower_txs_lod = true, 309 .lower_txp = ~0, 310 .lower_tg4_broadcom_swizzle = true, 311 .lower_txd = true, 312 .lower_invalid_implicit_lod = true, 313 }; 314 NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); 315 316 NIR_PASS_V(nir, panvk_per_arch(nir_lower_descriptors), 317 dev, layout, &shader->has_img_access); 318 319 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo, 320 nir_address_format_32bit_index_offset); 321 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo, 322 spirv_options.ssbo_addr_format); 323 NIR_PASS_V(nir, nir_lower_explicit_io, 324 nir_var_mem_push_const, 325 nir_address_format_32bit_offset); 326 327 if (gl_shader_stage_uses_workgroup(stage)) { 328 if (!nir->info.shared_memory_explicit_layout) { 329 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, 330 nir_var_mem_shared, 331 shared_type_info); 332 } 333 334 NIR_PASS_V(nir, nir_lower_explicit_io, 335 nir_var_mem_shared, 336 nir_address_format_32bit_offset); 337 } 338 339 NIR_PASS_V(nir, nir_shader_instructions_pass, 340 panvk_lower_load_push_constant, 341 nir_metadata_block_index | 342 nir_metadata_dominance, 343 (void *)layout); 344 345 NIR_PASS_V(nir, nir_lower_system_values); 346 NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); 347 348 NIR_PASS_V(nir, nir_split_var_copies); 349 NIR_PASS_V(nir, nir_lower_var_copies); 350 351 /* We have to run nir_lower_blend() after we've gotten rid of copies (it 352 * requires load/store) and before we assign output locations. 353 */ 354 if (stage == MESA_SHADER_FRAGMENT) { 355 /* This is required for nir_lower_blend */ 356 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true); 357 panvk_lower_blend(pdev, nir, &inputs, blend_state, static_blend_constants); 358 } 359 360 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage); 361 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage); 362 363 /* Needed to turn shader_temp into function_temp since the backend only 364 * handles the latter for now. 365 */ 366 NIR_PASS_V(nir, nir_lower_global_vars_to_local); 367 368 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 369 if (unlikely(dev->physical_device->instance->debug_flags & PANVK_DEBUG_NIR)) { 370 fprintf(stderr, "translated nir:\n"); 371 nir_print_shader(nir, stderr); 372 } 373 374 GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &shader->info); 375 376 /* System values shouldn't have changed */ 377 assert(memcmp(&shader->info.sysvals, &fixed_sysvals, 378 sizeof(fixed_sysvals)) == 0); 379 380 /* Patch the descriptor count */ 381 shader->info.ubo_count = PANVK_NUM_BUILTIN_UBOS + 382 layout->num_ubos + layout->num_dyn_ubos; 383 shader->info.sampler_count = layout->num_samplers; 384 shader->info.texture_count = layout->num_textures; 385 if (shader->has_img_access) 386 shader->info.attribute_count += layout->num_imgs; 387 388 shader->sysval_ubo = sysval_ubo; 389 shader->local_size.x = nir->info.workgroup_size[0]; 390 shader->local_size.y = nir->info.workgroup_size[1]; 391 shader->local_size.z = nir->info.workgroup_size[2]; 392 393 ralloc_free(nir); 394 395 return shader; 396} 397