1/* 2 * Copyright (C) 2019-2021 Collabora, Ltd. 3 * Copyright (C) 2019 Alyssa Rosenzweig 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25/** 26 * @file 27 * 28 * Implements the fragment pipeline (blending and writeout) in software, to be 29 * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment 30 * shader variant on typical GPUs. This pass is useful if hardware lacks 31 * fixed-function blending in part or in full. 32 */ 33 34#include "compiler/nir/nir.h" 35#include "compiler/nir/nir_builder.h" 36#include "compiler/nir/nir_format_convert.h" 37#include "nir_lower_blend.h" 38 39/* Given processed factors, combine them per a blend function */ 40 41static nir_ssa_def * 42nir_blend_func( 43 nir_builder *b, 44 enum blend_func func, 45 nir_ssa_def *src, nir_ssa_def *dst) 46{ 47 switch (func) { 48 case BLEND_FUNC_ADD: 49 return nir_fadd(b, src, dst); 50 case BLEND_FUNC_SUBTRACT: 51 return nir_fsub(b, src, dst); 52 case BLEND_FUNC_REVERSE_SUBTRACT: 53 return nir_fsub(b, dst, src); 54 case BLEND_FUNC_MIN: 55 return nir_fmin(b, src, dst); 56 case BLEND_FUNC_MAX: 57 return nir_fmax(b, src, dst); 58 } 59 60 unreachable("Invalid blend function"); 61} 62 63/* Does this blend function multiply by a blend factor? */ 64 65static bool 66nir_blend_factored(enum blend_func func) 67{ 68 switch (func) { 69 case BLEND_FUNC_ADD: 70 case BLEND_FUNC_SUBTRACT: 71 case BLEND_FUNC_REVERSE_SUBTRACT: 72 return true; 73 default: 74 return false; 75 } 76} 77 78/* Compute a src_alpha_saturate factor */ 79static nir_ssa_def * 80nir_alpha_saturate( 81 nir_builder *b, 82 nir_ssa_def *src, nir_ssa_def *dst, 83 unsigned chan) 84{ 85 nir_ssa_def *Asrc = nir_channel(b, src, 3); 86 nir_ssa_def *Adst = nir_channel(b, dst, 3); 87 nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size); 88 nir_ssa_def *Adsti = nir_fsub(b, one, Adst); 89 90 return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one; 91} 92 93/* Returns a scalar single factor, unmultiplied */ 94 95static nir_ssa_def * 96nir_blend_factor_value( 97 nir_builder *b, 98 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, 99 unsigned chan, 100 enum blend_factor factor) 101{ 102 switch (factor) { 103 case BLEND_FACTOR_ZERO: 104 return nir_imm_floatN_t(b, 0.0, src->bit_size); 105 case BLEND_FACTOR_SRC_COLOR: 106 return nir_channel(b, src, chan); 107 case BLEND_FACTOR_SRC1_COLOR: 108 return nir_channel(b, src1, chan); 109 case BLEND_FACTOR_DST_COLOR: 110 return nir_channel(b, dst, chan); 111 case BLEND_FACTOR_SRC_ALPHA: 112 return nir_channel(b, src, 3); 113 case BLEND_FACTOR_SRC1_ALPHA: 114 return nir_channel(b, src1, 3); 115 case BLEND_FACTOR_DST_ALPHA: 116 return nir_channel(b, dst, 3); 117 case BLEND_FACTOR_CONSTANT_COLOR: 118 return nir_channel(b, bconst, chan); 119 case BLEND_FACTOR_CONSTANT_ALPHA: 120 return nir_channel(b, bconst, 3); 121 case BLEND_FACTOR_SRC_ALPHA_SATURATE: 122 return nir_alpha_saturate(b, src, dst, chan); 123 } 124 125 unreachable("Invalid blend factor"); 126} 127 128static nir_ssa_def * 129nir_blend_factor( 130 nir_builder *b, 131 nir_ssa_def *raw_scalar, 132 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, 133 unsigned chan, 134 enum blend_factor factor, 135 bool inverted) 136{ 137 nir_ssa_def *f = 138 nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor); 139 140 if (inverted) 141 f = nir_fadd_imm(b, nir_fneg(b, f), 1.0); 142 143 return nir_fmul(b, raw_scalar, f); 144} 145 146/* Given a colormask, "blend" with the destination */ 147 148static nir_ssa_def * 149nir_color_mask( 150 nir_builder *b, 151 unsigned mask, 152 nir_ssa_def *src, 153 nir_ssa_def *dst) 154{ 155 return nir_vec4(b, 156 nir_channel(b, (mask & (1 << 0)) ? src : dst, 0), 157 nir_channel(b, (mask & (1 << 1)) ? src : dst, 1), 158 nir_channel(b, (mask & (1 << 2)) ? src : dst, 2), 159 nir_channel(b, (mask & (1 << 3)) ? src : dst, 3)); 160} 161 162static nir_ssa_def * 163nir_logicop_func( 164 nir_builder *b, 165 unsigned func, 166 nir_ssa_def *src, nir_ssa_def *dst) 167{ 168 switch (func) { 169 case PIPE_LOGICOP_CLEAR: 170 return nir_imm_ivec4(b, 0, 0, 0, 0); 171 case PIPE_LOGICOP_NOR: 172 return nir_inot(b, nir_ior(b, src, dst)); 173 case PIPE_LOGICOP_AND_INVERTED: 174 return nir_iand(b, nir_inot(b, src), dst); 175 case PIPE_LOGICOP_COPY_INVERTED: 176 return nir_inot(b, src); 177 case PIPE_LOGICOP_AND_REVERSE: 178 return nir_iand(b, src, nir_inot(b, dst)); 179 case PIPE_LOGICOP_INVERT: 180 return nir_inot(b, dst); 181 case PIPE_LOGICOP_XOR: 182 return nir_ixor(b, src, dst); 183 case PIPE_LOGICOP_NAND: 184 return nir_inot(b, nir_iand(b, src, dst)); 185 case PIPE_LOGICOP_AND: 186 return nir_iand(b, src, dst); 187 case PIPE_LOGICOP_EQUIV: 188 return nir_inot(b, nir_ixor(b, src, dst)); 189 case PIPE_LOGICOP_NOOP: 190 return dst; 191 case PIPE_LOGICOP_OR_INVERTED: 192 return nir_ior(b, nir_inot(b, src), dst); 193 case PIPE_LOGICOP_COPY: 194 return src; 195 case PIPE_LOGICOP_OR_REVERSE: 196 return nir_ior(b, src, nir_inot(b, dst)); 197 case PIPE_LOGICOP_OR: 198 return nir_ior(b, src, dst); 199 case PIPE_LOGICOP_SET: 200 return nir_imm_ivec4(b, ~0, ~0, ~0, ~0); 201 } 202 203 unreachable("Invalid logciop function"); 204} 205 206static nir_ssa_def * 207nir_blend_logicop( 208 nir_builder *b, 209 const nir_lower_blend_options *options, 210 unsigned rt, 211 nir_ssa_def *src, nir_ssa_def *dst) 212{ 213 unsigned bit_size = src->bit_size; 214 215 enum pipe_format format = options->format[rt]; 216 const struct util_format_description *format_desc = 217 util_format_description(format); 218 219 if (bit_size != 32) { 220 src = nir_f2f32(b, src); 221 dst = nir_f2f32(b, dst); 222 } 223 224 assert(src->num_components <= 4); 225 assert(dst->num_components <= 4); 226 227 unsigned bits[4]; 228 for (int i = 0; i < 4; ++i) 229 bits[i] = format_desc->channel[i].size; 230 231 if (util_format_is_unorm(format)) { 232 src = nir_format_float_to_unorm(b, src, bits); 233 dst = nir_format_float_to_unorm(b, dst, bits); 234 } else if (util_format_is_snorm(format)) { 235 src = nir_format_float_to_snorm(b, src, bits); 236 dst = nir_format_float_to_snorm(b, dst, bits); 237 } else { 238 assert(util_format_is_pure_integer(format)); 239 } 240 241 nir_ssa_def *out = nir_logicop_func(b, options->logicop_func, src, dst); 242 243 if (bits[0] < 32) { 244 nir_const_value mask[4]; 245 for (int i = 0; i < 4; ++i) 246 mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32); 247 248 out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask)); 249 } 250 251 if (util_format_is_unorm(format)) { 252 out = nir_format_unorm_to_float(b, out, bits); 253 } else if (util_format_is_snorm(format)) { 254 out = nir_format_snorm_to_float(b, out, bits); 255 } else { 256 assert(util_format_is_pure_integer(format)); 257 } 258 259 if (bit_size == 16) 260 out = nir_f2f16(b, out); 261 262 return out; 263} 264 265static nir_ssa_def * 266nir_fsat_signed(nir_builder *b, nir_ssa_def *x) 267{ 268 return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size), 269 nir_imm_floatN_t(b, +1.0, x->bit_size)); 270} 271 272/* Given a blend state, the source color, and the destination color, 273 * return the blended color 274 */ 275 276static nir_ssa_def * 277nir_blend( 278 nir_builder *b, 279 const nir_lower_blend_options *options, 280 unsigned rt, 281 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst) 282{ 283 /* Grab the blend constant ahead of time */ 284 nir_ssa_def *bconst; 285 if (options->scalar_blend_const) { 286 bconst = nir_vec4(b, 287 nir_load_blend_const_color_r_float(b), 288 nir_load_blend_const_color_g_float(b), 289 nir_load_blend_const_color_b_float(b), 290 nir_load_blend_const_color_a_float(b)); 291 } else { 292 bconst = nir_load_blend_const_color_rgba(b); 293 } 294 295 if (src->bit_size == 16) 296 bconst = nir_f2f16(b, bconst); 297 298 /* Fixed-point framebuffers require their inputs clamped. */ 299 enum pipe_format format = options->format[rt]; 300 301 /* From section 17.3.6 "Blending" of the OpenGL 4.5 spec: 302 * 303 * If the color buffer is fixed-point, the components of the source and 304 * destination values and blend factors are each clamped to [0, 1] or 305 * [-1, 1] respectively for an unsigned normalized or signed normalized 306 * color buffer prior to evaluating the blend equation. If the color 307 * buffer is floating-point, no clamping occurs. 308 */ 309 if (util_format_is_unorm(format)) 310 src = nir_fsat(b, src); 311 else if (util_format_is_snorm(format)) 312 src = nir_fsat_signed(b, src); 313 314 /* DST_ALPHA reads back 1.0 if there is no alpha channel */ 315 const struct util_format_description *desc = 316 util_format_description(format); 317 318 if (desc->nr_channels < 4) { 319 nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size); 320 nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size); 321 322 dst = nir_vec4(b, nir_channel(b, dst, 0), 323 desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero, 324 desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero, 325 desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one); 326 } 327 328 /* We blend per channel and recombine later */ 329 nir_ssa_def *channels[4]; 330 331 for (unsigned c = 0; c < 4; ++c) { 332 /* Decide properties based on channel */ 333 nir_lower_blend_channel chan = 334 (c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha; 335 336 nir_ssa_def *psrc = nir_channel(b, src, c); 337 nir_ssa_def *pdst = nir_channel(b, dst, c); 338 339 if (nir_blend_factored(chan.func)) { 340 psrc = nir_blend_factor( 341 b, psrc, 342 src, src1, dst, bconst, c, 343 chan.src_factor, chan.invert_src_factor); 344 345 pdst = nir_blend_factor( 346 b, pdst, 347 src, src1, dst, bconst, c, 348 chan.dst_factor, chan.invert_dst_factor); 349 } 350 351 channels[c] = nir_blend_func(b, chan.func, psrc, pdst); 352 } 353 354 return nir_vec(b, channels, 4); 355} 356 357static int 358color_index_for_var(const nir_variable *var) 359{ 360 if (var->data.location != FRAG_RESULT_COLOR && 361 var->data.location < FRAG_RESULT_DATA0) 362 return -1; 363 364 return (var->data.location == FRAG_RESULT_COLOR) ? 0 : 365 (var->data.location - FRAG_RESULT_DATA0); 366} 367 368static bool 369nir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store, 370 const nir_lower_blend_options *options) 371{ 372 assert(store->intrinsic == nir_intrinsic_store_deref); 373 374 nir_variable *var = nir_intrinsic_get_var(store, 0); 375 int rt = color_index_for_var(var); 376 377 /* No blend lowering requested on this RT */ 378 if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE) 379 return false; 380 381 b->cursor = nir_before_instr(&store->instr); 382 383 /* Grab the input color. We always want 4 channels during blend. Dead 384 * code will clean up any channels we don't need. 385 */ 386 assert(store->src[1].is_ssa); 387 nir_ssa_def *src = nir_pad_vector(b, store->src[1].ssa, 4); 388 389 /* Grab the previous fragment color */ 390 var->data.fb_fetch_output = true; 391 b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location); 392 b->shader->info.fs.uses_fbfetch_output = true; 393 nir_ssa_def *dst = nir_pad_vector(b, nir_load_var(b, var), 4); 394 395 /* Blend the two colors per the passed options */ 396 nir_ssa_def *blended = src; 397 398 if (options->logicop_enable) { 399 blended = nir_blend_logicop(b, options, rt, src, dst); 400 } else if (!util_format_is_pure_integer(options->format[rt])) { 401 assert(!util_format_is_scaled(options->format[rt])); 402 blended = nir_blend(b, options, rt, src, options->src1, dst); 403 } 404 405 /* Apply a colormask */ 406 blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst); 407 408 const unsigned num_components = glsl_get_vector_elements(var->type); 409 410 /* Shave off any components we don't want to store */ 411 blended = nir_trim_vector(b, blended, num_components); 412 413 /* Grow or shrink the store destination as needed */ 414 assert(nir_intrinsic_write_mask(store) == 415 nir_component_mask(store->num_components)); 416 store->num_components = num_components; 417 store->dest.ssa.num_components = num_components; 418 nir_intrinsic_set_write_mask(store, nir_component_mask(num_components)); 419 420 /* Write out the final color instead of the input */ 421 nir_instr_rewrite_src_ssa(&store->instr, &store->src[1], blended); 422 return true; 423} 424 425static bool 426nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) 427{ 428 const nir_lower_blend_options *options = data; 429 430 switch (instr->type) { 431 case nir_instr_type_deref: { 432 /* Fix up output deref types, as needed */ 433 nir_deref_instr *deref = nir_instr_as_deref(instr); 434 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 435 return false; 436 437 /* Indirects must be already lowered and output variables split */ 438 assert(deref->deref_type == nir_deref_type_var); 439 440 if (deref->type == deref->var->type) 441 return false; 442 443 deref->type = deref->var->type; 444 return true; 445 } 446 447 case nir_instr_type_intrinsic: { 448 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 449 if (intrin->intrinsic != nir_intrinsic_load_deref && 450 intrin->intrinsic != nir_intrinsic_store_deref) 451 return false; 452 453 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 454 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 455 return false; 456 457 assert(glsl_type_is_vector_or_scalar(deref->type)); 458 459 if (intrin->intrinsic == nir_intrinsic_load_deref) { 460 /* We need to fix up framebuffer if num_components changed */ 461 const unsigned num_components = glsl_get_vector_elements(deref->type); 462 if (intrin->num_components == num_components) 463 return false; 464 465 b->cursor = nir_after_instr(&intrin->instr); 466 467 assert(intrin->dest.is_ssa); 468 nir_ssa_def *val = nir_resize_vector(b, &intrin->dest.ssa, 469 num_components); 470 intrin->num_components = num_components, 471 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, val, 472 val->parent_instr); 473 return true; 474 } else { 475 return nir_lower_blend_store(b, intrin, options); 476 } 477 } 478 479 default: 480 return false; 481 } 482} 483 484/** Lower blending to framebuffer fetch and some math 485 * 486 * This pass requires that indirects are lowered and output variables split 487 * so that we have a single output variable for each RT. We could go to the 488 * effort of handling arrays (possibly of arrays) but, given that we need 489 * indirects lowered anyway (we need constant indices to look up blend 490 * functions and formats), we may as well require variables to be split. 491 * This can be done by calling nir_lower_io_arrays_to_elements_no_indirect(). 492 */ 493void 494nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options) 495{ 496 assert(shader->info.stage == MESA_SHADER_FRAGMENT); 497 498 /* Re-type any blended output variables to have the same number of 499 * components as the image format. The GL 4.6 Spec says: 500 * 501 * "If a fragment shader writes to none of gl_FragColor, gl_FragData, 502 * nor any user-defined output variables, the values of the fragment 503 * colors following shader execution are undefined, and may differ for 504 * each fragment color. If some, but not all elements of gl_FragData or 505 * of theser-defined output variables are written, the values of 506 * fragment colors corresponding to unwritten elements orariables are 507 * similarly undefined." 508 * 509 * Note the phrase "following shader execution". Those color values are 510 * then supposed to go into blending which may, depending on the blend 511 * mode, apply constraints that result in well-defined rendering. It's 512 * fine if we have to pad out a value with undef but we then need to blend 513 * that garbage value to ensure correct results. 514 * 515 * This may also, depending on output format, be a small optimization 516 * allowing NIR to dead-code unused calculations. 517 */ 518 nir_foreach_shader_out_variable(var, shader) { 519 int rt = color_index_for_var(var); 520 521 /* No blend lowering requested on this RT */ 522 if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE) 523 continue; 524 525 const unsigned num_format_components = 526 util_format_get_nr_components(options->format[rt]); 527 528 /* Indirects must be already lowered and output variables split */ 529 assert(glsl_type_is_vector_or_scalar(var->type)); 530 var->type = glsl_replace_vector_type(var->type, num_format_components); 531 } 532 533 nir_shader_instructions_pass(shader, nir_lower_blend_instr, 534 nir_metadata_block_index | 535 nir_metadata_dominance, 536 (void *)options); 537} 538