1/*
2 * Copyright (C) 2019-2021 Collabora, Ltd.
3 * Copyright (C) 2019 Alyssa Rosenzweig
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25/**
26 * @file
27 *
28 * Implements the fragment pipeline (blending and writeout) in software, to be
29 * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
30 * shader variant on typical GPUs. This pass is useful if hardware lacks
31 * fixed-function blending in part or in full.
32 */
33
34#include "compiler/nir/nir.h"
35#include "compiler/nir/nir_builder.h"
36#include "compiler/nir/nir_format_convert.h"
37#include "nir_lower_blend.h"
38
39/* Given processed factors, combine them per a blend function */
40
41static nir_ssa_def *
42nir_blend_func(
43   nir_builder *b,
44   enum blend_func func,
45   nir_ssa_def *src, nir_ssa_def *dst)
46{
47   switch (func) {
48   case BLEND_FUNC_ADD:
49      return nir_fadd(b, src, dst);
50   case BLEND_FUNC_SUBTRACT:
51      return nir_fsub(b, src, dst);
52   case BLEND_FUNC_REVERSE_SUBTRACT:
53      return nir_fsub(b, dst, src);
54   case BLEND_FUNC_MIN:
55      return nir_fmin(b, src, dst);
56   case BLEND_FUNC_MAX:
57      return nir_fmax(b, src, dst);
58   }
59
60   unreachable("Invalid blend function");
61}
62
63/* Does this blend function multiply by a blend factor? */
64
65static bool
66nir_blend_factored(enum blend_func func)
67{
68   switch (func) {
69   case BLEND_FUNC_ADD:
70   case BLEND_FUNC_SUBTRACT:
71   case BLEND_FUNC_REVERSE_SUBTRACT:
72      return true;
73   default:
74      return false;
75   }
76}
77
78/* Compute a src_alpha_saturate factor */
79static nir_ssa_def *
80nir_alpha_saturate(
81   nir_builder *b,
82   nir_ssa_def *src, nir_ssa_def *dst,
83   unsigned chan)
84{
85   nir_ssa_def *Asrc = nir_channel(b, src, 3);
86   nir_ssa_def *Adst = nir_channel(b, dst, 3);
87   nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
88   nir_ssa_def *Adsti = nir_fsub(b, one, Adst);
89
90   return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
91}
92
93/* Returns a scalar single factor, unmultiplied */
94
95static nir_ssa_def *
96nir_blend_factor_value(
97   nir_builder *b,
98   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
99   unsigned chan,
100   enum blend_factor factor)
101{
102   switch (factor) {
103   case BLEND_FACTOR_ZERO:
104      return nir_imm_floatN_t(b, 0.0, src->bit_size);
105   case BLEND_FACTOR_SRC_COLOR:
106      return nir_channel(b, src, chan);
107   case BLEND_FACTOR_SRC1_COLOR:
108      return nir_channel(b, src1, chan);
109   case BLEND_FACTOR_DST_COLOR:
110      return nir_channel(b, dst, chan);
111   case BLEND_FACTOR_SRC_ALPHA:
112      return nir_channel(b, src, 3);
113   case BLEND_FACTOR_SRC1_ALPHA:
114      return nir_channel(b, src1, 3);
115   case BLEND_FACTOR_DST_ALPHA:
116      return nir_channel(b, dst, 3);
117   case BLEND_FACTOR_CONSTANT_COLOR:
118      return nir_channel(b, bconst, chan);
119   case BLEND_FACTOR_CONSTANT_ALPHA:
120      return nir_channel(b, bconst, 3);
121   case BLEND_FACTOR_SRC_ALPHA_SATURATE:
122      return nir_alpha_saturate(b, src, dst, chan);
123   }
124
125   unreachable("Invalid blend factor");
126}
127
128static nir_ssa_def *
129nir_blend_factor(
130   nir_builder *b,
131   nir_ssa_def *raw_scalar,
132   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
133   unsigned chan,
134   enum blend_factor factor,
135   bool inverted)
136{
137   nir_ssa_def *f =
138      nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor);
139
140   if (inverted)
141      f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
142
143   return nir_fmul(b, raw_scalar, f);
144}
145
146/* Given a colormask, "blend" with the destination */
147
148static nir_ssa_def *
149nir_color_mask(
150   nir_builder *b,
151   unsigned mask,
152   nir_ssa_def *src,
153   nir_ssa_def *dst)
154{
155   return nir_vec4(b,
156         nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
157         nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
158         nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
159         nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
160}
161
162static nir_ssa_def *
163nir_logicop_func(
164   nir_builder *b,
165   unsigned func,
166   nir_ssa_def *src, nir_ssa_def *dst)
167{
168   switch (func) {
169   case PIPE_LOGICOP_CLEAR:
170      return nir_imm_ivec4(b, 0, 0, 0, 0);
171   case PIPE_LOGICOP_NOR:
172      return nir_inot(b, nir_ior(b, src, dst));
173   case PIPE_LOGICOP_AND_INVERTED:
174      return nir_iand(b, nir_inot(b, src), dst);
175   case PIPE_LOGICOP_COPY_INVERTED:
176      return nir_inot(b, src);
177   case PIPE_LOGICOP_AND_REVERSE:
178      return nir_iand(b, src, nir_inot(b, dst));
179   case PIPE_LOGICOP_INVERT:
180      return nir_inot(b, dst);
181   case PIPE_LOGICOP_XOR:
182      return nir_ixor(b, src, dst);
183   case PIPE_LOGICOP_NAND:
184      return nir_inot(b, nir_iand(b, src, dst));
185   case PIPE_LOGICOP_AND:
186      return nir_iand(b, src, dst);
187   case PIPE_LOGICOP_EQUIV:
188      return nir_inot(b, nir_ixor(b, src, dst));
189   case PIPE_LOGICOP_NOOP:
190      return dst;
191   case PIPE_LOGICOP_OR_INVERTED:
192      return nir_ior(b, nir_inot(b, src), dst);
193   case PIPE_LOGICOP_COPY:
194      return src;
195   case PIPE_LOGICOP_OR_REVERSE:
196      return nir_ior(b, src, nir_inot(b, dst));
197   case PIPE_LOGICOP_OR:
198      return nir_ior(b, src, dst);
199   case PIPE_LOGICOP_SET:
200      return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
201   }
202
203   unreachable("Invalid logciop function");
204}
205
206static nir_ssa_def *
207nir_blend_logicop(
208   nir_builder *b,
209   const nir_lower_blend_options *options,
210   unsigned rt,
211   nir_ssa_def *src, nir_ssa_def *dst)
212{
213   unsigned bit_size = src->bit_size;
214
215   enum pipe_format format = options->format[rt];
216   const struct util_format_description *format_desc =
217      util_format_description(format);
218
219   if (bit_size != 32) {
220      src = nir_f2f32(b, src);
221      dst = nir_f2f32(b, dst);
222   }
223
224   assert(src->num_components <= 4);
225   assert(dst->num_components <= 4);
226
227   unsigned bits[4];
228   for (int i = 0; i < 4; ++i)
229       bits[i] = format_desc->channel[i].size;
230
231   if (util_format_is_unorm(format)) {
232      src = nir_format_float_to_unorm(b, src, bits);
233      dst = nir_format_float_to_unorm(b, dst, bits);
234   } else if (util_format_is_snorm(format)) {
235      src = nir_format_float_to_snorm(b, src, bits);
236      dst = nir_format_float_to_snorm(b, dst, bits);
237   } else {
238      assert(util_format_is_pure_integer(format));
239   }
240
241   nir_ssa_def *out = nir_logicop_func(b, options->logicop_func, src, dst);
242
243   if (bits[0] < 32) {
244       nir_const_value mask[4];
245       for (int i = 0; i < 4; ++i)
246           mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);
247
248       out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));
249   }
250
251   if (util_format_is_unorm(format)) {
252      out = nir_format_unorm_to_float(b, out, bits);
253   } else if (util_format_is_snorm(format)) {
254      out = nir_format_snorm_to_float(b, out, bits);
255   } else {
256      assert(util_format_is_pure_integer(format));
257   }
258
259   if (bit_size == 16)
260      out = nir_f2f16(b, out);
261
262   return out;
263}
264
265static nir_ssa_def *
266nir_fsat_signed(nir_builder *b, nir_ssa_def *x)
267{
268   return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
269                           nir_imm_floatN_t(b, +1.0, x->bit_size));
270}
271
272/* Given a blend state, the source color, and the destination color,
273 * return the blended color
274 */
275
276static nir_ssa_def *
277nir_blend(
278   nir_builder *b,
279   const nir_lower_blend_options *options,
280   unsigned rt,
281   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst)
282{
283   /* Grab the blend constant ahead of time */
284   nir_ssa_def *bconst;
285   if (options->scalar_blend_const) {
286      bconst = nir_vec4(b,
287                        nir_load_blend_const_color_r_float(b),
288                        nir_load_blend_const_color_g_float(b),
289                        nir_load_blend_const_color_b_float(b),
290                        nir_load_blend_const_color_a_float(b));
291   } else {
292      bconst = nir_load_blend_const_color_rgba(b);
293   }
294
295   if (src->bit_size == 16)
296      bconst = nir_f2f16(b, bconst);
297
298   /* Fixed-point framebuffers require their inputs clamped. */
299   enum pipe_format format = options->format[rt];
300
301   /* From section 17.3.6 "Blending" of the OpenGL 4.5 spec:
302    *
303    *     If the color buffer is fixed-point, the components of the source and
304    *     destination values and blend factors are each clamped to [0, 1] or
305    *     [-1, 1] respectively for an unsigned normalized or signed normalized
306    *     color buffer prior to evaluating the blend equation. If the color
307    *     buffer is floating-point, no clamping occurs.
308    */
309   if (util_format_is_unorm(format))
310      src = nir_fsat(b, src);
311   else if (util_format_is_snorm(format))
312      src = nir_fsat_signed(b, src);
313
314   /* DST_ALPHA reads back 1.0 if there is no alpha channel */
315   const struct util_format_description *desc =
316      util_format_description(format);
317
318   if (desc->nr_channels < 4) {
319      nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
320      nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
321
322      dst = nir_vec4(b, nir_channel(b, dst, 0),
323            desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero,
324            desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero,
325            desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one);
326   }
327
328   /* We blend per channel and recombine later */
329   nir_ssa_def *channels[4];
330
331   for (unsigned c = 0; c < 4; ++c) {
332      /* Decide properties based on channel */
333      nir_lower_blend_channel chan =
334         (c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha;
335
336      nir_ssa_def *psrc = nir_channel(b, src, c);
337      nir_ssa_def *pdst = nir_channel(b, dst, c);
338
339      if (nir_blend_factored(chan.func)) {
340         psrc = nir_blend_factor(
341                   b, psrc,
342                   src, src1, dst, bconst, c,
343                   chan.src_factor, chan.invert_src_factor);
344
345         pdst = nir_blend_factor(
346                   b, pdst,
347                   src, src1, dst, bconst, c,
348                   chan.dst_factor, chan.invert_dst_factor);
349      }
350
351      channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
352   }
353
354   return nir_vec(b, channels, 4);
355}
356
357static int
358color_index_for_var(const nir_variable *var)
359{
360   if (var->data.location != FRAG_RESULT_COLOR &&
361       var->data.location < FRAG_RESULT_DATA0)
362      return -1;
363
364   return (var->data.location == FRAG_RESULT_COLOR) ? 0 :
365          (var->data.location - FRAG_RESULT_DATA0);
366}
367
368static bool
369nir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store,
370                      const nir_lower_blend_options *options)
371{
372   assert(store->intrinsic == nir_intrinsic_store_deref);
373
374   nir_variable *var = nir_intrinsic_get_var(store, 0);
375   int rt = color_index_for_var(var);
376
377   /* No blend lowering requested on this RT */
378   if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
379      return false;
380
381   b->cursor = nir_before_instr(&store->instr);
382
383   /* Grab the input color.  We always want 4 channels during blend.  Dead
384    * code will clean up any channels we don't need.
385    */
386   assert(store->src[1].is_ssa);
387   nir_ssa_def *src = nir_pad_vector(b, store->src[1].ssa, 4);
388
389   /* Grab the previous fragment color */
390   var->data.fb_fetch_output = true;
391   b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location);
392   b->shader->info.fs.uses_fbfetch_output = true;
393   nir_ssa_def *dst = nir_pad_vector(b, nir_load_var(b, var), 4);
394
395   /* Blend the two colors per the passed options */
396   nir_ssa_def *blended = src;
397
398   if (options->logicop_enable) {
399      blended = nir_blend_logicop(b, options, rt, src, dst);
400   } else if (!util_format_is_pure_integer(options->format[rt])) {
401      assert(!util_format_is_scaled(options->format[rt]));
402      blended = nir_blend(b, options, rt, src, options->src1, dst);
403   }
404
405   /* Apply a colormask */
406   blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);
407
408   const unsigned num_components = glsl_get_vector_elements(var->type);
409
410   /* Shave off any components we don't want to store */
411   blended = nir_trim_vector(b, blended, num_components);
412
413   /* Grow or shrink the store destination as needed */
414   assert(nir_intrinsic_write_mask(store) ==
415          nir_component_mask(store->num_components));
416   store->num_components = num_components;
417   store->dest.ssa.num_components = num_components;
418   nir_intrinsic_set_write_mask(store, nir_component_mask(num_components));
419
420   /* Write out the final color instead of the input */
421   nir_instr_rewrite_src_ssa(&store->instr, &store->src[1], blended);
422   return true;
423}
424
425static bool
426nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
427{
428   const nir_lower_blend_options *options = data;
429
430   switch (instr->type) {
431   case nir_instr_type_deref: {
432      /* Fix up output deref types, as needed */
433      nir_deref_instr *deref = nir_instr_as_deref(instr);
434      if (!nir_deref_mode_is(deref, nir_var_shader_out))
435         return false;
436
437      /* Indirects must be already lowered and output variables split */
438      assert(deref->deref_type == nir_deref_type_var);
439
440      if (deref->type == deref->var->type)
441         return false;
442
443      deref->type = deref->var->type;
444      return true;
445   }
446
447   case nir_instr_type_intrinsic: {
448      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
449      if (intrin->intrinsic != nir_intrinsic_load_deref &&
450          intrin->intrinsic != nir_intrinsic_store_deref)
451         return false;
452
453      nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
454      if (!nir_deref_mode_is(deref, nir_var_shader_out))
455         return false;
456
457      assert(glsl_type_is_vector_or_scalar(deref->type));
458
459      if (intrin->intrinsic == nir_intrinsic_load_deref) {
460         /* We need to fix up framebuffer if num_components changed */
461         const unsigned num_components = glsl_get_vector_elements(deref->type);
462         if (intrin->num_components == num_components)
463            return false;
464
465         b->cursor = nir_after_instr(&intrin->instr);
466
467         assert(intrin->dest.is_ssa);
468         nir_ssa_def *val = nir_resize_vector(b, &intrin->dest.ssa,
469                                              num_components);
470         intrin->num_components = num_components,
471         nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, val,
472                                        val->parent_instr);
473         return true;
474      } else {
475         return nir_lower_blend_store(b, intrin, options);
476      }
477   }
478
479   default:
480      return false;
481   }
482}
483
484/** Lower blending to framebuffer fetch and some math
485 *
486 * This pass requires that indirects are lowered and output variables split
487 * so that we have a single output variable for each RT.  We could go to the
488 * effort of handling arrays (possibly of arrays) but, given that we need
489 * indirects lowered anyway (we need constant indices to look up blend
490 * functions and formats), we may as well require variables to be split.
491 * This can be done by calling nir_lower_io_arrays_to_elements_no_indirect().
492 */
493void
494nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
495{
496   assert(shader->info.stage == MESA_SHADER_FRAGMENT);
497
498   /* Re-type any blended output variables to have the same number of
499    * components as the image format.  The GL 4.6 Spec says:
500    *
501    *    "If a fragment shader writes to none of gl_FragColor, gl_FragData,
502    *    nor any user-defined output variables, the values of the fragment
503    *    colors following shader execution are undefined, and may differ for
504    *    each fragment color.  If some, but not all elements of gl_FragData or
505    *    of theser-defined output variables are written, the values of
506    *    fragment colors corresponding to unwritten elements orariables are
507    *    similarly undefined."
508    *
509    * Note the phrase "following shader execution".  Those color values are
510    * then supposed to go into blending which may, depending on the blend
511    * mode, apply constraints that result in well-defined rendering.  It's
512    * fine if we have to pad out a value with undef but we then need to blend
513    * that garbage value to ensure correct results.
514    *
515    * This may also, depending on output format, be a small optimization
516    * allowing NIR to dead-code unused calculations.
517    */
518   nir_foreach_shader_out_variable(var, shader) {
519      int rt = color_index_for_var(var);
520
521      /* No blend lowering requested on this RT */
522      if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
523         continue;
524
525      const unsigned num_format_components =
526         util_format_get_nr_components(options->format[rt]);
527
528      /* Indirects must be already lowered and output variables split */
529      assert(glsl_type_is_vector_or_scalar(var->type));
530      var->type = glsl_replace_vector_type(var->type, num_format_components);
531   }
532
533   nir_shader_instructions_pass(shader, nir_lower_blend_instr,
534                                nir_metadata_block_index |
535                                nir_metadata_dominance,
536                                (void *)options);
537}
538