/* * Copyright (C) 2019-2021 Collabora, Ltd. * Copyright (C) 2019 Alyssa Rosenzweig * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /** * @file * * Implements the fragment pipeline (blending and writeout) in software, to be * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment * shader variant on typical GPUs. This pass is useful if hardware lacks * fixed-function blending in part or in full. */ #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_format_convert.h" #include "nir_lower_blend.h" /* Given processed factors, combine them per a blend function */ static nir_ssa_def * nir_blend_func( nir_builder *b, enum blend_func func, nir_ssa_def *src, nir_ssa_def *dst) { switch (func) { case BLEND_FUNC_ADD: return nir_fadd(b, src, dst); case BLEND_FUNC_SUBTRACT: return nir_fsub(b, src, dst); case BLEND_FUNC_REVERSE_SUBTRACT: return nir_fsub(b, dst, src); case BLEND_FUNC_MIN: return nir_fmin(b, src, dst); case BLEND_FUNC_MAX: return nir_fmax(b, src, dst); } unreachable("Invalid blend function"); } /* Does this blend function multiply by a blend factor? */ static bool nir_blend_factored(enum blend_func func) { switch (func) { case BLEND_FUNC_ADD: case BLEND_FUNC_SUBTRACT: case BLEND_FUNC_REVERSE_SUBTRACT: return true; default: return false; } } /* Compute a src_alpha_saturate factor */ static nir_ssa_def * nir_alpha_saturate( nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, unsigned chan) { nir_ssa_def *Asrc = nir_channel(b, src, 3); nir_ssa_def *Adst = nir_channel(b, dst, 3); nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size); nir_ssa_def *Adsti = nir_fsub(b, one, Adst); return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one; } /* Returns a scalar single factor, unmultiplied */ static nir_ssa_def * nir_blend_factor_value( nir_builder *b, nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, unsigned chan, enum blend_factor factor) { switch (factor) { case BLEND_FACTOR_ZERO: return nir_imm_floatN_t(b, 0.0, src->bit_size); case BLEND_FACTOR_SRC_COLOR: return nir_channel(b, src, chan); case BLEND_FACTOR_SRC1_COLOR: return nir_channel(b, src1, chan); case BLEND_FACTOR_DST_COLOR: return nir_channel(b, dst, chan); case BLEND_FACTOR_SRC_ALPHA: return nir_channel(b, src, 3); case BLEND_FACTOR_SRC1_ALPHA: return nir_channel(b, src1, 3); case BLEND_FACTOR_DST_ALPHA: return nir_channel(b, dst, 3); case BLEND_FACTOR_CONSTANT_COLOR: return nir_channel(b, bconst, chan); case BLEND_FACTOR_CONSTANT_ALPHA: return nir_channel(b, bconst, 3); case BLEND_FACTOR_SRC_ALPHA_SATURATE: return nir_alpha_saturate(b, src, dst, chan); } unreachable("Invalid blend factor"); } static nir_ssa_def * nir_blend_factor( nir_builder *b, nir_ssa_def *raw_scalar, nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, unsigned chan, enum blend_factor factor, bool inverted) { nir_ssa_def *f = nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor); if (inverted) f = nir_fadd_imm(b, nir_fneg(b, f), 1.0); return nir_fmul(b, raw_scalar, f); } /* Given a colormask, "blend" with the destination */ static nir_ssa_def * nir_color_mask( nir_builder *b, unsigned mask, nir_ssa_def *src, nir_ssa_def *dst) { return nir_vec4(b, nir_channel(b, (mask & (1 << 0)) ? src : dst, 0), nir_channel(b, (mask & (1 << 1)) ? src : dst, 1), nir_channel(b, (mask & (1 << 2)) ? src : dst, 2), nir_channel(b, (mask & (1 << 3)) ? src : dst, 3)); } static nir_ssa_def * nir_logicop_func( nir_builder *b, unsigned func, nir_ssa_def *src, nir_ssa_def *dst) { switch (func) { case PIPE_LOGICOP_CLEAR: return nir_imm_ivec4(b, 0, 0, 0, 0); case PIPE_LOGICOP_NOR: return nir_inot(b, nir_ior(b, src, dst)); case PIPE_LOGICOP_AND_INVERTED: return nir_iand(b, nir_inot(b, src), dst); case PIPE_LOGICOP_COPY_INVERTED: return nir_inot(b, src); case PIPE_LOGICOP_AND_REVERSE: return nir_iand(b, src, nir_inot(b, dst)); case PIPE_LOGICOP_INVERT: return nir_inot(b, dst); case PIPE_LOGICOP_XOR: return nir_ixor(b, src, dst); case PIPE_LOGICOP_NAND: return nir_inot(b, nir_iand(b, src, dst)); case PIPE_LOGICOP_AND: return nir_iand(b, src, dst); case PIPE_LOGICOP_EQUIV: return nir_inot(b, nir_ixor(b, src, dst)); case PIPE_LOGICOP_NOOP: return dst; case PIPE_LOGICOP_OR_INVERTED: return nir_ior(b, nir_inot(b, src), dst); case PIPE_LOGICOP_COPY: return src; case PIPE_LOGICOP_OR_REVERSE: return nir_ior(b, src, nir_inot(b, dst)); case PIPE_LOGICOP_OR: return nir_ior(b, src, dst); case PIPE_LOGICOP_SET: return nir_imm_ivec4(b, ~0, ~0, ~0, ~0); } unreachable("Invalid logciop function"); } static nir_ssa_def * nir_blend_logicop( nir_builder *b, const nir_lower_blend_options *options, unsigned rt, nir_ssa_def *src, nir_ssa_def *dst) { unsigned bit_size = src->bit_size; enum pipe_format format = options->format[rt]; const struct util_format_description *format_desc = util_format_description(format); if (bit_size != 32) { src = nir_f2f32(b, src); dst = nir_f2f32(b, dst); } assert(src->num_components <= 4); assert(dst->num_components <= 4); unsigned bits[4]; for (int i = 0; i < 4; ++i) bits[i] = format_desc->channel[i].size; if (util_format_is_unorm(format)) { src = nir_format_float_to_unorm(b, src, bits); dst = nir_format_float_to_unorm(b, dst, bits); } else if (util_format_is_snorm(format)) { src = nir_format_float_to_snorm(b, src, bits); dst = nir_format_float_to_snorm(b, dst, bits); } else { assert(util_format_is_pure_integer(format)); } nir_ssa_def *out = nir_logicop_func(b, options->logicop_func, src, dst); if (bits[0] < 32) { nir_const_value mask[4]; for (int i = 0; i < 4; ++i) mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32); out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask)); } if (util_format_is_unorm(format)) { out = nir_format_unorm_to_float(b, out, bits); } else if (util_format_is_snorm(format)) { out = nir_format_snorm_to_float(b, out, bits); } else { assert(util_format_is_pure_integer(format)); } if (bit_size == 16) out = nir_f2f16(b, out); return out; } static nir_ssa_def * nir_fsat_signed(nir_builder *b, nir_ssa_def *x) { return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size), nir_imm_floatN_t(b, +1.0, x->bit_size)); } /* Given a blend state, the source color, and the destination color, * return the blended color */ static nir_ssa_def * nir_blend( nir_builder *b, const nir_lower_blend_options *options, unsigned rt, nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst) { /* Grab the blend constant ahead of time */ nir_ssa_def *bconst; if (options->scalar_blend_const) { bconst = nir_vec4(b, nir_load_blend_const_color_r_float(b), nir_load_blend_const_color_g_float(b), nir_load_blend_const_color_b_float(b), nir_load_blend_const_color_a_float(b)); } else { bconst = nir_load_blend_const_color_rgba(b); } if (src->bit_size == 16) bconst = nir_f2f16(b, bconst); /* Fixed-point framebuffers require their inputs clamped. */ enum pipe_format format = options->format[rt]; /* From section 17.3.6 "Blending" of the OpenGL 4.5 spec: * * If the color buffer is fixed-point, the components of the source and * destination values and blend factors are each clamped to [0, 1] or * [-1, 1] respectively for an unsigned normalized or signed normalized * color buffer prior to evaluating the blend equation. If the color * buffer is floating-point, no clamping occurs. */ if (util_format_is_unorm(format)) src = nir_fsat(b, src); else if (util_format_is_snorm(format)) src = nir_fsat_signed(b, src); /* DST_ALPHA reads back 1.0 if there is no alpha channel */ const struct util_format_description *desc = util_format_description(format); if (desc->nr_channels < 4) { nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size); nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size); dst = nir_vec4(b, nir_channel(b, dst, 0), desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero, desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero, desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one); } /* We blend per channel and recombine later */ nir_ssa_def *channels[4]; for (unsigned c = 0; c < 4; ++c) { /* Decide properties based on channel */ nir_lower_blend_channel chan = (c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha; nir_ssa_def *psrc = nir_channel(b, src, c); nir_ssa_def *pdst = nir_channel(b, dst, c); if (nir_blend_factored(chan.func)) { psrc = nir_blend_factor( b, psrc, src, src1, dst, bconst, c, chan.src_factor, chan.invert_src_factor); pdst = nir_blend_factor( b, pdst, src, src1, dst, bconst, c, chan.dst_factor, chan.invert_dst_factor); } channels[c] = nir_blend_func(b, chan.func, psrc, pdst); } return nir_vec(b, channels, 4); } static int color_index_for_var(const nir_variable *var) { if (var->data.location != FRAG_RESULT_COLOR && var->data.location < FRAG_RESULT_DATA0) return -1; return (var->data.location == FRAG_RESULT_COLOR) ? 0 : (var->data.location - FRAG_RESULT_DATA0); } static bool nir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store, const nir_lower_blend_options *options) { assert(store->intrinsic == nir_intrinsic_store_deref); nir_variable *var = nir_intrinsic_get_var(store, 0); int rt = color_index_for_var(var); /* No blend lowering requested on this RT */ if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE) return false; b->cursor = nir_before_instr(&store->instr); /* Grab the input color. We always want 4 channels during blend. Dead * code will clean up any channels we don't need. */ assert(store->src[1].is_ssa); nir_ssa_def *src = nir_pad_vector(b, store->src[1].ssa, 4); /* Grab the previous fragment color */ var->data.fb_fetch_output = true; b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location); b->shader->info.fs.uses_fbfetch_output = true; nir_ssa_def *dst = nir_pad_vector(b, nir_load_var(b, var), 4); /* Blend the two colors per the passed options */ nir_ssa_def *blended = src; if (options->logicop_enable) { blended = nir_blend_logicop(b, options, rt, src, dst); } else if (!util_format_is_pure_integer(options->format[rt])) { assert(!util_format_is_scaled(options->format[rt])); blended = nir_blend(b, options, rt, src, options->src1, dst); } /* Apply a colormask */ blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst); const unsigned num_components = glsl_get_vector_elements(var->type); /* Shave off any components we don't want to store */ blended = nir_trim_vector(b, blended, num_components); /* Grow or shrink the store destination as needed */ assert(nir_intrinsic_write_mask(store) == nir_component_mask(store->num_components)); store->num_components = num_components; store->dest.ssa.num_components = num_components; nir_intrinsic_set_write_mask(store, nir_component_mask(num_components)); /* Write out the final color instead of the input */ nir_instr_rewrite_src_ssa(&store->instr, &store->src[1], blended); return true; } static bool nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) { const nir_lower_blend_options *options = data; switch (instr->type) { case nir_instr_type_deref: { /* Fix up output deref types, as needed */ nir_deref_instr *deref = nir_instr_as_deref(instr); if (!nir_deref_mode_is(deref, nir_var_shader_out)) return false; /* Indirects must be already lowered and output variables split */ assert(deref->deref_type == nir_deref_type_var); if (deref->type == deref->var->type) return false; deref->type = deref->var->type; return true; } case nir_instr_type_intrinsic: { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic != nir_intrinsic_load_deref && intrin->intrinsic != nir_intrinsic_store_deref) return false; nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); if (!nir_deref_mode_is(deref, nir_var_shader_out)) return false; assert(glsl_type_is_vector_or_scalar(deref->type)); if (intrin->intrinsic == nir_intrinsic_load_deref) { /* We need to fix up framebuffer if num_components changed */ const unsigned num_components = glsl_get_vector_elements(deref->type); if (intrin->num_components == num_components) return false; b->cursor = nir_after_instr(&intrin->instr); assert(intrin->dest.is_ssa); nir_ssa_def *val = nir_resize_vector(b, &intrin->dest.ssa, num_components); intrin->num_components = num_components, nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, val, val->parent_instr); return true; } else { return nir_lower_blend_store(b, intrin, options); } } default: return false; } } /** Lower blending to framebuffer fetch and some math * * This pass requires that indirects are lowered and output variables split * so that we have a single output variable for each RT. We could go to the * effort of handling arrays (possibly of arrays) but, given that we need * indirects lowered anyway (we need constant indices to look up blend * functions and formats), we may as well require variables to be split. * This can be done by calling nir_lower_io_arrays_to_elements_no_indirect(). */ void nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options) { assert(shader->info.stage == MESA_SHADER_FRAGMENT); /* Re-type any blended output variables to have the same number of * components as the image format. The GL 4.6 Spec says: * * "If a fragment shader writes to none of gl_FragColor, gl_FragData, * nor any user-defined output variables, the values of the fragment * colors following shader execution are undefined, and may differ for * each fragment color. If some, but not all elements of gl_FragData or * of theser-defined output variables are written, the values of * fragment colors corresponding to unwritten elements orariables are * similarly undefined." * * Note the phrase "following shader execution". Those color values are * then supposed to go into blending which may, depending on the blend * mode, apply constraints that result in well-defined rendering. It's * fine if we have to pad out a value with undef but we then need to blend * that garbage value to ensure correct results. * * This may also, depending on output format, be a small optimization * allowing NIR to dead-code unused calculations. */ nir_foreach_shader_out_variable(var, shader) { int rt = color_index_for_var(var); /* No blend lowering requested on this RT */ if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE) continue; const unsigned num_format_components = util_format_get_nr_components(options->format[rt]); /* Indirects must be already lowered and output variables split */ assert(glsl_type_is_vector_or_scalar(var->type)); var->type = glsl_replace_vector_type(var->type, num_format_components); } nir_shader_instructions_pass(shader, nir_lower_blend_instr, nir_metadata_block_index | nir_metadata_dominance, (void *)options); }