1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2019-2021 Collabora, Ltd. 3bf215546Sopenharmony_ci * Copyright (C) 2019 Alyssa Rosenzweig 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22bf215546Sopenharmony_ci * IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci/** 26bf215546Sopenharmony_ci * @file 27bf215546Sopenharmony_ci * 28bf215546Sopenharmony_ci * Implements the fragment pipeline (blending and writeout) in software, to be 29bf215546Sopenharmony_ci * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment 30bf215546Sopenharmony_ci * shader variant on typical GPUs. This pass is useful if hardware lacks 31bf215546Sopenharmony_ci * fixed-function blending in part or in full. 32bf215546Sopenharmony_ci */ 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci#include "compiler/nir/nir.h" 35bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 36bf215546Sopenharmony_ci#include "compiler/nir/nir_format_convert.h" 37bf215546Sopenharmony_ci#include "nir_lower_blend.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci/* Given processed factors, combine them per a blend function */ 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_cistatic nir_ssa_def * 42bf215546Sopenharmony_cinir_blend_func( 43bf215546Sopenharmony_ci nir_builder *b, 44bf215546Sopenharmony_ci enum blend_func func, 45bf215546Sopenharmony_ci nir_ssa_def *src, nir_ssa_def *dst) 46bf215546Sopenharmony_ci{ 47bf215546Sopenharmony_ci switch (func) { 48bf215546Sopenharmony_ci case BLEND_FUNC_ADD: 49bf215546Sopenharmony_ci return nir_fadd(b, src, dst); 50bf215546Sopenharmony_ci case BLEND_FUNC_SUBTRACT: 51bf215546Sopenharmony_ci return nir_fsub(b, src, dst); 52bf215546Sopenharmony_ci case BLEND_FUNC_REVERSE_SUBTRACT: 53bf215546Sopenharmony_ci return nir_fsub(b, dst, src); 54bf215546Sopenharmony_ci case BLEND_FUNC_MIN: 55bf215546Sopenharmony_ci return nir_fmin(b, src, dst); 56bf215546Sopenharmony_ci case BLEND_FUNC_MAX: 57bf215546Sopenharmony_ci return nir_fmax(b, src, dst); 58bf215546Sopenharmony_ci } 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci unreachable("Invalid blend function"); 61bf215546Sopenharmony_ci} 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci/* Does this blend function multiply by a blend factor? */ 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_cistatic bool 66bf215546Sopenharmony_cinir_blend_factored(enum blend_func func) 67bf215546Sopenharmony_ci{ 68bf215546Sopenharmony_ci switch (func) { 69bf215546Sopenharmony_ci case BLEND_FUNC_ADD: 70bf215546Sopenharmony_ci case BLEND_FUNC_SUBTRACT: 71bf215546Sopenharmony_ci case BLEND_FUNC_REVERSE_SUBTRACT: 72bf215546Sopenharmony_ci return true; 73bf215546Sopenharmony_ci default: 74bf215546Sopenharmony_ci return false; 75bf215546Sopenharmony_ci } 76bf215546Sopenharmony_ci} 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci/* Compute a src_alpha_saturate factor */ 79bf215546Sopenharmony_cistatic nir_ssa_def * 80bf215546Sopenharmony_cinir_alpha_saturate( 81bf215546Sopenharmony_ci nir_builder *b, 82bf215546Sopenharmony_ci nir_ssa_def *src, nir_ssa_def *dst, 83bf215546Sopenharmony_ci unsigned chan) 84bf215546Sopenharmony_ci{ 85bf215546Sopenharmony_ci nir_ssa_def *Asrc = nir_channel(b, src, 3); 86bf215546Sopenharmony_ci nir_ssa_def *Adst = nir_channel(b, dst, 3); 87bf215546Sopenharmony_ci nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size); 88bf215546Sopenharmony_ci nir_ssa_def *Adsti = nir_fsub(b, one, Adst); 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one; 91bf215546Sopenharmony_ci} 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci/* Returns a scalar single factor, unmultiplied */ 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_cistatic nir_ssa_def * 96bf215546Sopenharmony_cinir_blend_factor_value( 97bf215546Sopenharmony_ci nir_builder *b, 98bf215546Sopenharmony_ci nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, 99bf215546Sopenharmony_ci unsigned chan, 100bf215546Sopenharmony_ci enum blend_factor factor) 101bf215546Sopenharmony_ci{ 102bf215546Sopenharmony_ci switch (factor) { 103bf215546Sopenharmony_ci case BLEND_FACTOR_ZERO: 104bf215546Sopenharmony_ci return nir_imm_floatN_t(b, 0.0, src->bit_size); 105bf215546Sopenharmony_ci case BLEND_FACTOR_SRC_COLOR: 106bf215546Sopenharmony_ci return nir_channel(b, src, chan); 107bf215546Sopenharmony_ci case BLEND_FACTOR_SRC1_COLOR: 108bf215546Sopenharmony_ci return nir_channel(b, src1, chan); 109bf215546Sopenharmony_ci case BLEND_FACTOR_DST_COLOR: 110bf215546Sopenharmony_ci return nir_channel(b, dst, chan); 111bf215546Sopenharmony_ci case BLEND_FACTOR_SRC_ALPHA: 112bf215546Sopenharmony_ci return nir_channel(b, src, 3); 113bf215546Sopenharmony_ci case BLEND_FACTOR_SRC1_ALPHA: 114bf215546Sopenharmony_ci return nir_channel(b, src1, 3); 115bf215546Sopenharmony_ci case BLEND_FACTOR_DST_ALPHA: 116bf215546Sopenharmony_ci return nir_channel(b, dst, 3); 117bf215546Sopenharmony_ci case BLEND_FACTOR_CONSTANT_COLOR: 118bf215546Sopenharmony_ci return nir_channel(b, bconst, chan); 119bf215546Sopenharmony_ci case BLEND_FACTOR_CONSTANT_ALPHA: 120bf215546Sopenharmony_ci return nir_channel(b, bconst, 3); 121bf215546Sopenharmony_ci case BLEND_FACTOR_SRC_ALPHA_SATURATE: 122bf215546Sopenharmony_ci return nir_alpha_saturate(b, src, dst, chan); 123bf215546Sopenharmony_ci } 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci unreachable("Invalid blend factor"); 126bf215546Sopenharmony_ci} 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_cistatic nir_ssa_def * 129bf215546Sopenharmony_cinir_blend_factor( 130bf215546Sopenharmony_ci nir_builder *b, 131bf215546Sopenharmony_ci nir_ssa_def *raw_scalar, 132bf215546Sopenharmony_ci nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, 133bf215546Sopenharmony_ci unsigned chan, 134bf215546Sopenharmony_ci enum blend_factor factor, 135bf215546Sopenharmony_ci bool inverted) 136bf215546Sopenharmony_ci{ 137bf215546Sopenharmony_ci nir_ssa_def *f = 138bf215546Sopenharmony_ci nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci if (inverted) 141bf215546Sopenharmony_ci f = nir_fadd_imm(b, nir_fneg(b, f), 1.0); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci return nir_fmul(b, raw_scalar, f); 144bf215546Sopenharmony_ci} 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci/* Given a colormask, "blend" with the destination */ 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_cistatic nir_ssa_def * 149bf215546Sopenharmony_cinir_color_mask( 150bf215546Sopenharmony_ci nir_builder *b, 151bf215546Sopenharmony_ci unsigned mask, 152bf215546Sopenharmony_ci nir_ssa_def *src, 153bf215546Sopenharmony_ci nir_ssa_def *dst) 154bf215546Sopenharmony_ci{ 155bf215546Sopenharmony_ci return nir_vec4(b, 156bf215546Sopenharmony_ci nir_channel(b, (mask & (1 << 0)) ? src : dst, 0), 157bf215546Sopenharmony_ci nir_channel(b, (mask & (1 << 1)) ? src : dst, 1), 158bf215546Sopenharmony_ci nir_channel(b, (mask & (1 << 2)) ? src : dst, 2), 159bf215546Sopenharmony_ci nir_channel(b, (mask & (1 << 3)) ? src : dst, 3)); 160bf215546Sopenharmony_ci} 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_cistatic nir_ssa_def * 163bf215546Sopenharmony_cinir_logicop_func( 164bf215546Sopenharmony_ci nir_builder *b, 165bf215546Sopenharmony_ci unsigned func, 166bf215546Sopenharmony_ci nir_ssa_def *src, nir_ssa_def *dst) 167bf215546Sopenharmony_ci{ 168bf215546Sopenharmony_ci switch (func) { 169bf215546Sopenharmony_ci case PIPE_LOGICOP_CLEAR: 170bf215546Sopenharmony_ci return nir_imm_ivec4(b, 0, 0, 0, 0); 171bf215546Sopenharmony_ci case PIPE_LOGICOP_NOR: 172bf215546Sopenharmony_ci return nir_inot(b, nir_ior(b, src, dst)); 173bf215546Sopenharmony_ci case PIPE_LOGICOP_AND_INVERTED: 174bf215546Sopenharmony_ci return nir_iand(b, nir_inot(b, src), dst); 175bf215546Sopenharmony_ci case PIPE_LOGICOP_COPY_INVERTED: 176bf215546Sopenharmony_ci return nir_inot(b, src); 177bf215546Sopenharmony_ci case PIPE_LOGICOP_AND_REVERSE: 178bf215546Sopenharmony_ci return nir_iand(b, src, nir_inot(b, dst)); 179bf215546Sopenharmony_ci case PIPE_LOGICOP_INVERT: 180bf215546Sopenharmony_ci return nir_inot(b, dst); 181bf215546Sopenharmony_ci case PIPE_LOGICOP_XOR: 182bf215546Sopenharmony_ci return nir_ixor(b, src, dst); 183bf215546Sopenharmony_ci case PIPE_LOGICOP_NAND: 184bf215546Sopenharmony_ci return nir_inot(b, nir_iand(b, src, dst)); 185bf215546Sopenharmony_ci case PIPE_LOGICOP_AND: 186bf215546Sopenharmony_ci return nir_iand(b, src, dst); 187bf215546Sopenharmony_ci case PIPE_LOGICOP_EQUIV: 188bf215546Sopenharmony_ci return nir_inot(b, nir_ixor(b, src, dst)); 189bf215546Sopenharmony_ci case PIPE_LOGICOP_NOOP: 190bf215546Sopenharmony_ci return dst; 191bf215546Sopenharmony_ci case PIPE_LOGICOP_OR_INVERTED: 192bf215546Sopenharmony_ci return nir_ior(b, nir_inot(b, src), dst); 193bf215546Sopenharmony_ci case PIPE_LOGICOP_COPY: 194bf215546Sopenharmony_ci return src; 195bf215546Sopenharmony_ci case PIPE_LOGICOP_OR_REVERSE: 196bf215546Sopenharmony_ci return nir_ior(b, src, nir_inot(b, dst)); 197bf215546Sopenharmony_ci case PIPE_LOGICOP_OR: 198bf215546Sopenharmony_ci return nir_ior(b, src, dst); 199bf215546Sopenharmony_ci case PIPE_LOGICOP_SET: 200bf215546Sopenharmony_ci return nir_imm_ivec4(b, ~0, ~0, ~0, ~0); 201bf215546Sopenharmony_ci } 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci unreachable("Invalid logciop function"); 204bf215546Sopenharmony_ci} 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_cistatic nir_ssa_def * 207bf215546Sopenharmony_cinir_blend_logicop( 208bf215546Sopenharmony_ci nir_builder *b, 209bf215546Sopenharmony_ci const nir_lower_blend_options *options, 210bf215546Sopenharmony_ci unsigned rt, 211bf215546Sopenharmony_ci nir_ssa_def *src, nir_ssa_def *dst) 212bf215546Sopenharmony_ci{ 213bf215546Sopenharmony_ci unsigned bit_size = src->bit_size; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci enum pipe_format format = options->format[rt]; 216bf215546Sopenharmony_ci const struct util_format_description *format_desc = 217bf215546Sopenharmony_ci util_format_description(format); 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci if (bit_size != 32) { 220bf215546Sopenharmony_ci src = nir_f2f32(b, src); 221bf215546Sopenharmony_ci dst = nir_f2f32(b, dst); 222bf215546Sopenharmony_ci } 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci assert(src->num_components <= 4); 225bf215546Sopenharmony_ci assert(dst->num_components <= 4); 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci unsigned bits[4]; 228bf215546Sopenharmony_ci for (int i = 0; i < 4; ++i) 229bf215546Sopenharmony_ci bits[i] = format_desc->channel[i].size; 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci if (util_format_is_unorm(format)) { 232bf215546Sopenharmony_ci src = nir_format_float_to_unorm(b, src, bits); 233bf215546Sopenharmony_ci dst = nir_format_float_to_unorm(b, dst, bits); 234bf215546Sopenharmony_ci } else if (util_format_is_snorm(format)) { 235bf215546Sopenharmony_ci src = nir_format_float_to_snorm(b, src, bits); 236bf215546Sopenharmony_ci dst = nir_format_float_to_snorm(b, dst, bits); 237bf215546Sopenharmony_ci } else { 238bf215546Sopenharmony_ci assert(util_format_is_pure_integer(format)); 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci nir_ssa_def *out = nir_logicop_func(b, options->logicop_func, src, dst); 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci if (bits[0] < 32) { 244bf215546Sopenharmony_ci nir_const_value mask[4]; 245bf215546Sopenharmony_ci for (int i = 0; i < 4; ++i) 246bf215546Sopenharmony_ci mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32); 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask)); 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci if (util_format_is_unorm(format)) { 252bf215546Sopenharmony_ci out = nir_format_unorm_to_float(b, out, bits); 253bf215546Sopenharmony_ci } else if (util_format_is_snorm(format)) { 254bf215546Sopenharmony_ci out = nir_format_snorm_to_float(b, out, bits); 255bf215546Sopenharmony_ci } else { 256bf215546Sopenharmony_ci assert(util_format_is_pure_integer(format)); 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci if (bit_size == 16) 260bf215546Sopenharmony_ci out = nir_f2f16(b, out); 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci return out; 263bf215546Sopenharmony_ci} 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_cistatic nir_ssa_def * 266bf215546Sopenharmony_cinir_fsat_signed(nir_builder *b, nir_ssa_def *x) 267bf215546Sopenharmony_ci{ 268bf215546Sopenharmony_ci return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size), 269bf215546Sopenharmony_ci nir_imm_floatN_t(b, +1.0, x->bit_size)); 270bf215546Sopenharmony_ci} 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci/* Given a blend state, the source color, and the destination color, 273bf215546Sopenharmony_ci * return the blended color 274bf215546Sopenharmony_ci */ 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_cistatic nir_ssa_def * 277bf215546Sopenharmony_cinir_blend( 278bf215546Sopenharmony_ci nir_builder *b, 279bf215546Sopenharmony_ci const nir_lower_blend_options *options, 280bf215546Sopenharmony_ci unsigned rt, 281bf215546Sopenharmony_ci nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst) 282bf215546Sopenharmony_ci{ 283bf215546Sopenharmony_ci /* Grab the blend constant ahead of time */ 284bf215546Sopenharmony_ci nir_ssa_def *bconst; 285bf215546Sopenharmony_ci if (options->scalar_blend_const) { 286bf215546Sopenharmony_ci bconst = nir_vec4(b, 287bf215546Sopenharmony_ci nir_load_blend_const_color_r_float(b), 288bf215546Sopenharmony_ci nir_load_blend_const_color_g_float(b), 289bf215546Sopenharmony_ci nir_load_blend_const_color_b_float(b), 290bf215546Sopenharmony_ci nir_load_blend_const_color_a_float(b)); 291bf215546Sopenharmony_ci } else { 292bf215546Sopenharmony_ci bconst = nir_load_blend_const_color_rgba(b); 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci if (src->bit_size == 16) 296bf215546Sopenharmony_ci bconst = nir_f2f16(b, bconst); 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci /* Fixed-point framebuffers require their inputs clamped. */ 299bf215546Sopenharmony_ci enum pipe_format format = options->format[rt]; 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci /* From section 17.3.6 "Blending" of the OpenGL 4.5 spec: 302bf215546Sopenharmony_ci * 303bf215546Sopenharmony_ci * If the color buffer is fixed-point, the components of the source and 304bf215546Sopenharmony_ci * destination values and blend factors are each clamped to [0, 1] or 305bf215546Sopenharmony_ci * [-1, 1] respectively for an unsigned normalized or signed normalized 306bf215546Sopenharmony_ci * color buffer prior to evaluating the blend equation. If the color 307bf215546Sopenharmony_ci * buffer is floating-point, no clamping occurs. 308bf215546Sopenharmony_ci */ 309bf215546Sopenharmony_ci if (util_format_is_unorm(format)) 310bf215546Sopenharmony_ci src = nir_fsat(b, src); 311bf215546Sopenharmony_ci else if (util_format_is_snorm(format)) 312bf215546Sopenharmony_ci src = nir_fsat_signed(b, src); 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci /* DST_ALPHA reads back 1.0 if there is no alpha channel */ 315bf215546Sopenharmony_ci const struct util_format_description *desc = 316bf215546Sopenharmony_ci util_format_description(format); 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci if (desc->nr_channels < 4) { 319bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size); 320bf215546Sopenharmony_ci nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size); 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci dst = nir_vec4(b, nir_channel(b, dst, 0), 323bf215546Sopenharmony_ci desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero, 324bf215546Sopenharmony_ci desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero, 325bf215546Sopenharmony_ci desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one); 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci /* We blend per channel and recombine later */ 329bf215546Sopenharmony_ci nir_ssa_def *channels[4]; 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_ci for (unsigned c = 0; c < 4; ++c) { 332bf215546Sopenharmony_ci /* Decide properties based on channel */ 333bf215546Sopenharmony_ci nir_lower_blend_channel chan = 334bf215546Sopenharmony_ci (c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci nir_ssa_def *psrc = nir_channel(b, src, c); 337bf215546Sopenharmony_ci nir_ssa_def *pdst = nir_channel(b, dst, c); 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci if (nir_blend_factored(chan.func)) { 340bf215546Sopenharmony_ci psrc = nir_blend_factor( 341bf215546Sopenharmony_ci b, psrc, 342bf215546Sopenharmony_ci src, src1, dst, bconst, c, 343bf215546Sopenharmony_ci chan.src_factor, chan.invert_src_factor); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci pdst = nir_blend_factor( 346bf215546Sopenharmony_ci b, pdst, 347bf215546Sopenharmony_ci src, src1, dst, bconst, c, 348bf215546Sopenharmony_ci chan.dst_factor, chan.invert_dst_factor); 349bf215546Sopenharmony_ci } 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci channels[c] = nir_blend_func(b, chan.func, psrc, pdst); 352bf215546Sopenharmony_ci } 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci return nir_vec(b, channels, 4); 355bf215546Sopenharmony_ci} 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_cistatic int 358bf215546Sopenharmony_cicolor_index_for_var(const nir_variable *var) 359bf215546Sopenharmony_ci{ 360bf215546Sopenharmony_ci if (var->data.location != FRAG_RESULT_COLOR && 361bf215546Sopenharmony_ci var->data.location < FRAG_RESULT_DATA0) 362bf215546Sopenharmony_ci return -1; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci return (var->data.location == FRAG_RESULT_COLOR) ? 0 : 365bf215546Sopenharmony_ci (var->data.location - FRAG_RESULT_DATA0); 366bf215546Sopenharmony_ci} 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_cistatic bool 369bf215546Sopenharmony_cinir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store, 370bf215546Sopenharmony_ci const nir_lower_blend_options *options) 371bf215546Sopenharmony_ci{ 372bf215546Sopenharmony_ci assert(store->intrinsic == nir_intrinsic_store_deref); 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci nir_variable *var = nir_intrinsic_get_var(store, 0); 375bf215546Sopenharmony_ci int rt = color_index_for_var(var); 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci /* No blend lowering requested on this RT */ 378bf215546Sopenharmony_ci if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE) 379bf215546Sopenharmony_ci return false; 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci b->cursor = nir_before_instr(&store->instr); 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci /* Grab the input color. We always want 4 channels during blend. Dead 384bf215546Sopenharmony_ci * code will clean up any channels we don't need. 385bf215546Sopenharmony_ci */ 386bf215546Sopenharmony_ci assert(store->src[1].is_ssa); 387bf215546Sopenharmony_ci nir_ssa_def *src = nir_pad_vector(b, store->src[1].ssa, 4); 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci /* Grab the previous fragment color */ 390bf215546Sopenharmony_ci var->data.fb_fetch_output = true; 391bf215546Sopenharmony_ci b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location); 392bf215546Sopenharmony_ci b->shader->info.fs.uses_fbfetch_output = true; 393bf215546Sopenharmony_ci nir_ssa_def *dst = nir_pad_vector(b, nir_load_var(b, var), 4); 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci /* Blend the two colors per the passed options */ 396bf215546Sopenharmony_ci nir_ssa_def *blended = src; 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci if (options->logicop_enable) { 399bf215546Sopenharmony_ci blended = nir_blend_logicop(b, options, rt, src, dst); 400bf215546Sopenharmony_ci } else if (!util_format_is_pure_integer(options->format[rt])) { 401bf215546Sopenharmony_ci assert(!util_format_is_scaled(options->format[rt])); 402bf215546Sopenharmony_ci blended = nir_blend(b, options, rt, src, options->src1, dst); 403bf215546Sopenharmony_ci } 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci /* Apply a colormask */ 406bf215546Sopenharmony_ci blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst); 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci const unsigned num_components = glsl_get_vector_elements(var->type); 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci /* Shave off any components we don't want to store */ 411bf215546Sopenharmony_ci blended = nir_trim_vector(b, blended, num_components); 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci /* Grow or shrink the store destination as needed */ 414bf215546Sopenharmony_ci assert(nir_intrinsic_write_mask(store) == 415bf215546Sopenharmony_ci nir_component_mask(store->num_components)); 416bf215546Sopenharmony_ci store->num_components = num_components; 417bf215546Sopenharmony_ci store->dest.ssa.num_components = num_components; 418bf215546Sopenharmony_ci nir_intrinsic_set_write_mask(store, nir_component_mask(num_components)); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci /* Write out the final color instead of the input */ 421bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(&store->instr, &store->src[1], blended); 422bf215546Sopenharmony_ci return true; 423bf215546Sopenharmony_ci} 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_cistatic bool 426bf215546Sopenharmony_cinir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) 427bf215546Sopenharmony_ci{ 428bf215546Sopenharmony_ci const nir_lower_blend_options *options = data; 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci switch (instr->type) { 431bf215546Sopenharmony_ci case nir_instr_type_deref: { 432bf215546Sopenharmony_ci /* Fix up output deref types, as needed */ 433bf215546Sopenharmony_ci nir_deref_instr *deref = nir_instr_as_deref(instr); 434bf215546Sopenharmony_ci if (!nir_deref_mode_is(deref, nir_var_shader_out)) 435bf215546Sopenharmony_ci return false; 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci /* Indirects must be already lowered and output variables split */ 438bf215546Sopenharmony_ci assert(deref->deref_type == nir_deref_type_var); 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci if (deref->type == deref->var->type) 441bf215546Sopenharmony_ci return false; 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci deref->type = deref->var->type; 444bf215546Sopenharmony_ci return true; 445bf215546Sopenharmony_ci } 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 448bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 449bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_load_deref && 450bf215546Sopenharmony_ci intrin->intrinsic != nir_intrinsic_store_deref) 451bf215546Sopenharmony_ci return false; 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 454bf215546Sopenharmony_ci if (!nir_deref_mode_is(deref, nir_var_shader_out)) 455bf215546Sopenharmony_ci return false; 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci assert(glsl_type_is_vector_or_scalar(deref->type)); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_deref) { 460bf215546Sopenharmony_ci /* We need to fix up framebuffer if num_components changed */ 461bf215546Sopenharmony_ci const unsigned num_components = glsl_get_vector_elements(deref->type); 462bf215546Sopenharmony_ci if (intrin->num_components == num_components) 463bf215546Sopenharmony_ci return false; 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci b->cursor = nir_after_instr(&intrin->instr); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci assert(intrin->dest.is_ssa); 468bf215546Sopenharmony_ci nir_ssa_def *val = nir_resize_vector(b, &intrin->dest.ssa, 469bf215546Sopenharmony_ci num_components); 470bf215546Sopenharmony_ci intrin->num_components = num_components, 471bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, val, 472bf215546Sopenharmony_ci val->parent_instr); 473bf215546Sopenharmony_ci return true; 474bf215546Sopenharmony_ci } else { 475bf215546Sopenharmony_ci return nir_lower_blend_store(b, intrin, options); 476bf215546Sopenharmony_ci } 477bf215546Sopenharmony_ci } 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci default: 480bf215546Sopenharmony_ci return false; 481bf215546Sopenharmony_ci } 482bf215546Sopenharmony_ci} 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci/** Lower blending to framebuffer fetch and some math 485bf215546Sopenharmony_ci * 486bf215546Sopenharmony_ci * This pass requires that indirects are lowered and output variables split 487bf215546Sopenharmony_ci * so that we have a single output variable for each RT. We could go to the 488bf215546Sopenharmony_ci * effort of handling arrays (possibly of arrays) but, given that we need 489bf215546Sopenharmony_ci * indirects lowered anyway (we need constant indices to look up blend 490bf215546Sopenharmony_ci * functions and formats), we may as well require variables to be split. 491bf215546Sopenharmony_ci * This can be done by calling nir_lower_io_arrays_to_elements_no_indirect(). 492bf215546Sopenharmony_ci */ 493bf215546Sopenharmony_civoid 494bf215546Sopenharmony_cinir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options) 495bf215546Sopenharmony_ci{ 496bf215546Sopenharmony_ci assert(shader->info.stage == MESA_SHADER_FRAGMENT); 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci /* Re-type any blended output variables to have the same number of 499bf215546Sopenharmony_ci * components as the image format. The GL 4.6 Spec says: 500bf215546Sopenharmony_ci * 501bf215546Sopenharmony_ci * "If a fragment shader writes to none of gl_FragColor, gl_FragData, 502bf215546Sopenharmony_ci * nor any user-defined output variables, the values of the fragment 503bf215546Sopenharmony_ci * colors following shader execution are undefined, and may differ for 504bf215546Sopenharmony_ci * each fragment color. If some, but not all elements of gl_FragData or 505bf215546Sopenharmony_ci * of theser-defined output variables are written, the values of 506bf215546Sopenharmony_ci * fragment colors corresponding to unwritten elements orariables are 507bf215546Sopenharmony_ci * similarly undefined." 508bf215546Sopenharmony_ci * 509bf215546Sopenharmony_ci * Note the phrase "following shader execution". Those color values are 510bf215546Sopenharmony_ci * then supposed to go into blending which may, depending on the blend 511bf215546Sopenharmony_ci * mode, apply constraints that result in well-defined rendering. It's 512bf215546Sopenharmony_ci * fine if we have to pad out a value with undef but we then need to blend 513bf215546Sopenharmony_ci * that garbage value to ensure correct results. 514bf215546Sopenharmony_ci * 515bf215546Sopenharmony_ci * This may also, depending on output format, be a small optimization 516bf215546Sopenharmony_ci * allowing NIR to dead-code unused calculations. 517bf215546Sopenharmony_ci */ 518bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, shader) { 519bf215546Sopenharmony_ci int rt = color_index_for_var(var); 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci /* No blend lowering requested on this RT */ 522bf215546Sopenharmony_ci if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE) 523bf215546Sopenharmony_ci continue; 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci const unsigned num_format_components = 526bf215546Sopenharmony_ci util_format_get_nr_components(options->format[rt]); 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci /* Indirects must be already lowered and output variables split */ 529bf215546Sopenharmony_ci assert(glsl_type_is_vector_or_scalar(var->type)); 530bf215546Sopenharmony_ci var->type = glsl_replace_vector_type(var->type, num_format_components); 531bf215546Sopenharmony_ci } 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci nir_shader_instructions_pass(shader, nir_lower_blend_instr, 534bf215546Sopenharmony_ci nir_metadata_block_index | 535bf215546Sopenharmony_ci nir_metadata_dominance, 536bf215546Sopenharmony_ci (void *)options); 537bf215546Sopenharmony_ci} 538