1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2019-2021 Collabora, Ltd.
3bf215546Sopenharmony_ci * Copyright (C) 2019 Alyssa Rosenzweig
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22bf215546Sopenharmony_ci * IN THE SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci/**
26bf215546Sopenharmony_ci * @file
27bf215546Sopenharmony_ci *
28bf215546Sopenharmony_ci * Implements the fragment pipeline (blending and writeout) in software, to be
29bf215546Sopenharmony_ci * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
30bf215546Sopenharmony_ci * shader variant on typical GPUs. This pass is useful if hardware lacks
31bf215546Sopenharmony_ci * fixed-function blending in part or in full.
32bf215546Sopenharmony_ci */
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci#include "compiler/nir/nir.h"
35bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h"
36bf215546Sopenharmony_ci#include "compiler/nir/nir_format_convert.h"
37bf215546Sopenharmony_ci#include "nir_lower_blend.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci/* Given processed factors, combine them per a blend function */
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_cistatic nir_ssa_def *
42bf215546Sopenharmony_cinir_blend_func(
43bf215546Sopenharmony_ci   nir_builder *b,
44bf215546Sopenharmony_ci   enum blend_func func,
45bf215546Sopenharmony_ci   nir_ssa_def *src, nir_ssa_def *dst)
46bf215546Sopenharmony_ci{
47bf215546Sopenharmony_ci   switch (func) {
48bf215546Sopenharmony_ci   case BLEND_FUNC_ADD:
49bf215546Sopenharmony_ci      return nir_fadd(b, src, dst);
50bf215546Sopenharmony_ci   case BLEND_FUNC_SUBTRACT:
51bf215546Sopenharmony_ci      return nir_fsub(b, src, dst);
52bf215546Sopenharmony_ci   case BLEND_FUNC_REVERSE_SUBTRACT:
53bf215546Sopenharmony_ci      return nir_fsub(b, dst, src);
54bf215546Sopenharmony_ci   case BLEND_FUNC_MIN:
55bf215546Sopenharmony_ci      return nir_fmin(b, src, dst);
56bf215546Sopenharmony_ci   case BLEND_FUNC_MAX:
57bf215546Sopenharmony_ci      return nir_fmax(b, src, dst);
58bf215546Sopenharmony_ci   }
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci   unreachable("Invalid blend function");
61bf215546Sopenharmony_ci}
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci/* Does this blend function multiply by a blend factor? */
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_cistatic bool
66bf215546Sopenharmony_cinir_blend_factored(enum blend_func func)
67bf215546Sopenharmony_ci{
68bf215546Sopenharmony_ci   switch (func) {
69bf215546Sopenharmony_ci   case BLEND_FUNC_ADD:
70bf215546Sopenharmony_ci   case BLEND_FUNC_SUBTRACT:
71bf215546Sopenharmony_ci   case BLEND_FUNC_REVERSE_SUBTRACT:
72bf215546Sopenharmony_ci      return true;
73bf215546Sopenharmony_ci   default:
74bf215546Sopenharmony_ci      return false;
75bf215546Sopenharmony_ci   }
76bf215546Sopenharmony_ci}
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci/* Compute a src_alpha_saturate factor */
79bf215546Sopenharmony_cistatic nir_ssa_def *
80bf215546Sopenharmony_cinir_alpha_saturate(
81bf215546Sopenharmony_ci   nir_builder *b,
82bf215546Sopenharmony_ci   nir_ssa_def *src, nir_ssa_def *dst,
83bf215546Sopenharmony_ci   unsigned chan)
84bf215546Sopenharmony_ci{
85bf215546Sopenharmony_ci   nir_ssa_def *Asrc = nir_channel(b, src, 3);
86bf215546Sopenharmony_ci   nir_ssa_def *Adst = nir_channel(b, dst, 3);
87bf215546Sopenharmony_ci   nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
88bf215546Sopenharmony_ci   nir_ssa_def *Adsti = nir_fsub(b, one, Adst);
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci   return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
91bf215546Sopenharmony_ci}
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci/* Returns a scalar single factor, unmultiplied */
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_cistatic nir_ssa_def *
96bf215546Sopenharmony_cinir_blend_factor_value(
97bf215546Sopenharmony_ci   nir_builder *b,
98bf215546Sopenharmony_ci   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
99bf215546Sopenharmony_ci   unsigned chan,
100bf215546Sopenharmony_ci   enum blend_factor factor)
101bf215546Sopenharmony_ci{
102bf215546Sopenharmony_ci   switch (factor) {
103bf215546Sopenharmony_ci   case BLEND_FACTOR_ZERO:
104bf215546Sopenharmony_ci      return nir_imm_floatN_t(b, 0.0, src->bit_size);
105bf215546Sopenharmony_ci   case BLEND_FACTOR_SRC_COLOR:
106bf215546Sopenharmony_ci      return nir_channel(b, src, chan);
107bf215546Sopenharmony_ci   case BLEND_FACTOR_SRC1_COLOR:
108bf215546Sopenharmony_ci      return nir_channel(b, src1, chan);
109bf215546Sopenharmony_ci   case BLEND_FACTOR_DST_COLOR:
110bf215546Sopenharmony_ci      return nir_channel(b, dst, chan);
111bf215546Sopenharmony_ci   case BLEND_FACTOR_SRC_ALPHA:
112bf215546Sopenharmony_ci      return nir_channel(b, src, 3);
113bf215546Sopenharmony_ci   case BLEND_FACTOR_SRC1_ALPHA:
114bf215546Sopenharmony_ci      return nir_channel(b, src1, 3);
115bf215546Sopenharmony_ci   case BLEND_FACTOR_DST_ALPHA:
116bf215546Sopenharmony_ci      return nir_channel(b, dst, 3);
117bf215546Sopenharmony_ci   case BLEND_FACTOR_CONSTANT_COLOR:
118bf215546Sopenharmony_ci      return nir_channel(b, bconst, chan);
119bf215546Sopenharmony_ci   case BLEND_FACTOR_CONSTANT_ALPHA:
120bf215546Sopenharmony_ci      return nir_channel(b, bconst, 3);
121bf215546Sopenharmony_ci   case BLEND_FACTOR_SRC_ALPHA_SATURATE:
122bf215546Sopenharmony_ci      return nir_alpha_saturate(b, src, dst, chan);
123bf215546Sopenharmony_ci   }
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci   unreachable("Invalid blend factor");
126bf215546Sopenharmony_ci}
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_cistatic nir_ssa_def *
129bf215546Sopenharmony_cinir_blend_factor(
130bf215546Sopenharmony_ci   nir_builder *b,
131bf215546Sopenharmony_ci   nir_ssa_def *raw_scalar,
132bf215546Sopenharmony_ci   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
133bf215546Sopenharmony_ci   unsigned chan,
134bf215546Sopenharmony_ci   enum blend_factor factor,
135bf215546Sopenharmony_ci   bool inverted)
136bf215546Sopenharmony_ci{
137bf215546Sopenharmony_ci   nir_ssa_def *f =
138bf215546Sopenharmony_ci      nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor);
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   if (inverted)
141bf215546Sopenharmony_ci      f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci   return nir_fmul(b, raw_scalar, f);
144bf215546Sopenharmony_ci}
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci/* Given a colormask, "blend" with the destination */
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_cistatic nir_ssa_def *
149bf215546Sopenharmony_cinir_color_mask(
150bf215546Sopenharmony_ci   nir_builder *b,
151bf215546Sopenharmony_ci   unsigned mask,
152bf215546Sopenharmony_ci   nir_ssa_def *src,
153bf215546Sopenharmony_ci   nir_ssa_def *dst)
154bf215546Sopenharmony_ci{
155bf215546Sopenharmony_ci   return nir_vec4(b,
156bf215546Sopenharmony_ci         nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
157bf215546Sopenharmony_ci         nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
158bf215546Sopenharmony_ci         nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
159bf215546Sopenharmony_ci         nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
160bf215546Sopenharmony_ci}
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_cistatic nir_ssa_def *
163bf215546Sopenharmony_cinir_logicop_func(
164bf215546Sopenharmony_ci   nir_builder *b,
165bf215546Sopenharmony_ci   unsigned func,
166bf215546Sopenharmony_ci   nir_ssa_def *src, nir_ssa_def *dst)
167bf215546Sopenharmony_ci{
168bf215546Sopenharmony_ci   switch (func) {
169bf215546Sopenharmony_ci   case PIPE_LOGICOP_CLEAR:
170bf215546Sopenharmony_ci      return nir_imm_ivec4(b, 0, 0, 0, 0);
171bf215546Sopenharmony_ci   case PIPE_LOGICOP_NOR:
172bf215546Sopenharmony_ci      return nir_inot(b, nir_ior(b, src, dst));
173bf215546Sopenharmony_ci   case PIPE_LOGICOP_AND_INVERTED:
174bf215546Sopenharmony_ci      return nir_iand(b, nir_inot(b, src), dst);
175bf215546Sopenharmony_ci   case PIPE_LOGICOP_COPY_INVERTED:
176bf215546Sopenharmony_ci      return nir_inot(b, src);
177bf215546Sopenharmony_ci   case PIPE_LOGICOP_AND_REVERSE:
178bf215546Sopenharmony_ci      return nir_iand(b, src, nir_inot(b, dst));
179bf215546Sopenharmony_ci   case PIPE_LOGICOP_INVERT:
180bf215546Sopenharmony_ci      return nir_inot(b, dst);
181bf215546Sopenharmony_ci   case PIPE_LOGICOP_XOR:
182bf215546Sopenharmony_ci      return nir_ixor(b, src, dst);
183bf215546Sopenharmony_ci   case PIPE_LOGICOP_NAND:
184bf215546Sopenharmony_ci      return nir_inot(b, nir_iand(b, src, dst));
185bf215546Sopenharmony_ci   case PIPE_LOGICOP_AND:
186bf215546Sopenharmony_ci      return nir_iand(b, src, dst);
187bf215546Sopenharmony_ci   case PIPE_LOGICOP_EQUIV:
188bf215546Sopenharmony_ci      return nir_inot(b, nir_ixor(b, src, dst));
189bf215546Sopenharmony_ci   case PIPE_LOGICOP_NOOP:
190bf215546Sopenharmony_ci      return dst;
191bf215546Sopenharmony_ci   case PIPE_LOGICOP_OR_INVERTED:
192bf215546Sopenharmony_ci      return nir_ior(b, nir_inot(b, src), dst);
193bf215546Sopenharmony_ci   case PIPE_LOGICOP_COPY:
194bf215546Sopenharmony_ci      return src;
195bf215546Sopenharmony_ci   case PIPE_LOGICOP_OR_REVERSE:
196bf215546Sopenharmony_ci      return nir_ior(b, src, nir_inot(b, dst));
197bf215546Sopenharmony_ci   case PIPE_LOGICOP_OR:
198bf215546Sopenharmony_ci      return nir_ior(b, src, dst);
199bf215546Sopenharmony_ci   case PIPE_LOGICOP_SET:
200bf215546Sopenharmony_ci      return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
201bf215546Sopenharmony_ci   }
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   unreachable("Invalid logciop function");
204bf215546Sopenharmony_ci}
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_cistatic nir_ssa_def *
207bf215546Sopenharmony_cinir_blend_logicop(
208bf215546Sopenharmony_ci   nir_builder *b,
209bf215546Sopenharmony_ci   const nir_lower_blend_options *options,
210bf215546Sopenharmony_ci   unsigned rt,
211bf215546Sopenharmony_ci   nir_ssa_def *src, nir_ssa_def *dst)
212bf215546Sopenharmony_ci{
213bf215546Sopenharmony_ci   unsigned bit_size = src->bit_size;
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   enum pipe_format format = options->format[rt];
216bf215546Sopenharmony_ci   const struct util_format_description *format_desc =
217bf215546Sopenharmony_ci      util_format_description(format);
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   if (bit_size != 32) {
220bf215546Sopenharmony_ci      src = nir_f2f32(b, src);
221bf215546Sopenharmony_ci      dst = nir_f2f32(b, dst);
222bf215546Sopenharmony_ci   }
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   assert(src->num_components <= 4);
225bf215546Sopenharmony_ci   assert(dst->num_components <= 4);
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   unsigned bits[4];
228bf215546Sopenharmony_ci   for (int i = 0; i < 4; ++i)
229bf215546Sopenharmony_ci       bits[i] = format_desc->channel[i].size;
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci   if (util_format_is_unorm(format)) {
232bf215546Sopenharmony_ci      src = nir_format_float_to_unorm(b, src, bits);
233bf215546Sopenharmony_ci      dst = nir_format_float_to_unorm(b, dst, bits);
234bf215546Sopenharmony_ci   } else if (util_format_is_snorm(format)) {
235bf215546Sopenharmony_ci      src = nir_format_float_to_snorm(b, src, bits);
236bf215546Sopenharmony_ci      dst = nir_format_float_to_snorm(b, dst, bits);
237bf215546Sopenharmony_ci   } else {
238bf215546Sopenharmony_ci      assert(util_format_is_pure_integer(format));
239bf215546Sopenharmony_ci   }
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci   nir_ssa_def *out = nir_logicop_func(b, options->logicop_func, src, dst);
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci   if (bits[0] < 32) {
244bf215546Sopenharmony_ci       nir_const_value mask[4];
245bf215546Sopenharmony_ci       for (int i = 0; i < 4; ++i)
246bf215546Sopenharmony_ci           mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci       out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));
249bf215546Sopenharmony_ci   }
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   if (util_format_is_unorm(format)) {
252bf215546Sopenharmony_ci      out = nir_format_unorm_to_float(b, out, bits);
253bf215546Sopenharmony_ci   } else if (util_format_is_snorm(format)) {
254bf215546Sopenharmony_ci      out = nir_format_snorm_to_float(b, out, bits);
255bf215546Sopenharmony_ci   } else {
256bf215546Sopenharmony_ci      assert(util_format_is_pure_integer(format));
257bf215546Sopenharmony_ci   }
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   if (bit_size == 16)
260bf215546Sopenharmony_ci      out = nir_f2f16(b, out);
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   return out;
263bf215546Sopenharmony_ci}
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_cistatic nir_ssa_def *
266bf215546Sopenharmony_cinir_fsat_signed(nir_builder *b, nir_ssa_def *x)
267bf215546Sopenharmony_ci{
268bf215546Sopenharmony_ci   return nir_fclamp(b, x, nir_imm_floatN_t(b, -1.0, x->bit_size),
269bf215546Sopenharmony_ci                           nir_imm_floatN_t(b, +1.0, x->bit_size));
270bf215546Sopenharmony_ci}
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci/* Given a blend state, the source color, and the destination color,
273bf215546Sopenharmony_ci * return the blended color
274bf215546Sopenharmony_ci */
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_cistatic nir_ssa_def *
277bf215546Sopenharmony_cinir_blend(
278bf215546Sopenharmony_ci   nir_builder *b,
279bf215546Sopenharmony_ci   const nir_lower_blend_options *options,
280bf215546Sopenharmony_ci   unsigned rt,
281bf215546Sopenharmony_ci   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst)
282bf215546Sopenharmony_ci{
283bf215546Sopenharmony_ci   /* Grab the blend constant ahead of time */
284bf215546Sopenharmony_ci   nir_ssa_def *bconst;
285bf215546Sopenharmony_ci   if (options->scalar_blend_const) {
286bf215546Sopenharmony_ci      bconst = nir_vec4(b,
287bf215546Sopenharmony_ci                        nir_load_blend_const_color_r_float(b),
288bf215546Sopenharmony_ci                        nir_load_blend_const_color_g_float(b),
289bf215546Sopenharmony_ci                        nir_load_blend_const_color_b_float(b),
290bf215546Sopenharmony_ci                        nir_load_blend_const_color_a_float(b));
291bf215546Sopenharmony_ci   } else {
292bf215546Sopenharmony_ci      bconst = nir_load_blend_const_color_rgba(b);
293bf215546Sopenharmony_ci   }
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   if (src->bit_size == 16)
296bf215546Sopenharmony_ci      bconst = nir_f2f16(b, bconst);
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci   /* Fixed-point framebuffers require their inputs clamped. */
299bf215546Sopenharmony_ci   enum pipe_format format = options->format[rt];
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci   /* From section 17.3.6 "Blending" of the OpenGL 4.5 spec:
302bf215546Sopenharmony_ci    *
303bf215546Sopenharmony_ci    *     If the color buffer is fixed-point, the components of the source and
304bf215546Sopenharmony_ci    *     destination values and blend factors are each clamped to [0, 1] or
305bf215546Sopenharmony_ci    *     [-1, 1] respectively for an unsigned normalized or signed normalized
306bf215546Sopenharmony_ci    *     color buffer prior to evaluating the blend equation. If the color
307bf215546Sopenharmony_ci    *     buffer is floating-point, no clamping occurs.
308bf215546Sopenharmony_ci    */
309bf215546Sopenharmony_ci   if (util_format_is_unorm(format))
310bf215546Sopenharmony_ci      src = nir_fsat(b, src);
311bf215546Sopenharmony_ci   else if (util_format_is_snorm(format))
312bf215546Sopenharmony_ci      src = nir_fsat_signed(b, src);
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   /* DST_ALPHA reads back 1.0 if there is no alpha channel */
315bf215546Sopenharmony_ci   const struct util_format_description *desc =
316bf215546Sopenharmony_ci      util_format_description(format);
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   if (desc->nr_channels < 4) {
319bf215546Sopenharmony_ci      nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
320bf215546Sopenharmony_ci      nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci      dst = nir_vec4(b, nir_channel(b, dst, 0),
323bf215546Sopenharmony_ci            desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero,
324bf215546Sopenharmony_ci            desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero,
325bf215546Sopenharmony_ci            desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one);
326bf215546Sopenharmony_ci   }
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   /* We blend per channel and recombine later */
329bf215546Sopenharmony_ci   nir_ssa_def *channels[4];
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_ci   for (unsigned c = 0; c < 4; ++c) {
332bf215546Sopenharmony_ci      /* Decide properties based on channel */
333bf215546Sopenharmony_ci      nir_lower_blend_channel chan =
334bf215546Sopenharmony_ci         (c < 3) ? options->rt[rt].rgb : options->rt[rt].alpha;
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci      nir_ssa_def *psrc = nir_channel(b, src, c);
337bf215546Sopenharmony_ci      nir_ssa_def *pdst = nir_channel(b, dst, c);
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci      if (nir_blend_factored(chan.func)) {
340bf215546Sopenharmony_ci         psrc = nir_blend_factor(
341bf215546Sopenharmony_ci                   b, psrc,
342bf215546Sopenharmony_ci                   src, src1, dst, bconst, c,
343bf215546Sopenharmony_ci                   chan.src_factor, chan.invert_src_factor);
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci         pdst = nir_blend_factor(
346bf215546Sopenharmony_ci                   b, pdst,
347bf215546Sopenharmony_ci                   src, src1, dst, bconst, c,
348bf215546Sopenharmony_ci                   chan.dst_factor, chan.invert_dst_factor);
349bf215546Sopenharmony_ci      }
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci      channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
352bf215546Sopenharmony_ci   }
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci   return nir_vec(b, channels, 4);
355bf215546Sopenharmony_ci}
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_cistatic int
358bf215546Sopenharmony_cicolor_index_for_var(const nir_variable *var)
359bf215546Sopenharmony_ci{
360bf215546Sopenharmony_ci   if (var->data.location != FRAG_RESULT_COLOR &&
361bf215546Sopenharmony_ci       var->data.location < FRAG_RESULT_DATA0)
362bf215546Sopenharmony_ci      return -1;
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   return (var->data.location == FRAG_RESULT_COLOR) ? 0 :
365bf215546Sopenharmony_ci          (var->data.location - FRAG_RESULT_DATA0);
366bf215546Sopenharmony_ci}
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_cistatic bool
369bf215546Sopenharmony_cinir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store,
370bf215546Sopenharmony_ci                      const nir_lower_blend_options *options)
371bf215546Sopenharmony_ci{
372bf215546Sopenharmony_ci   assert(store->intrinsic == nir_intrinsic_store_deref);
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci   nir_variable *var = nir_intrinsic_get_var(store, 0);
375bf215546Sopenharmony_ci   int rt = color_index_for_var(var);
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci   /* No blend lowering requested on this RT */
378bf215546Sopenharmony_ci   if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
379bf215546Sopenharmony_ci      return false;
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&store->instr);
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   /* Grab the input color.  We always want 4 channels during blend.  Dead
384bf215546Sopenharmony_ci    * code will clean up any channels we don't need.
385bf215546Sopenharmony_ci    */
386bf215546Sopenharmony_ci   assert(store->src[1].is_ssa);
387bf215546Sopenharmony_ci   nir_ssa_def *src = nir_pad_vector(b, store->src[1].ssa, 4);
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci   /* Grab the previous fragment color */
390bf215546Sopenharmony_ci   var->data.fb_fetch_output = true;
391bf215546Sopenharmony_ci   b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location);
392bf215546Sopenharmony_ci   b->shader->info.fs.uses_fbfetch_output = true;
393bf215546Sopenharmony_ci   nir_ssa_def *dst = nir_pad_vector(b, nir_load_var(b, var), 4);
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_ci   /* Blend the two colors per the passed options */
396bf215546Sopenharmony_ci   nir_ssa_def *blended = src;
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci   if (options->logicop_enable) {
399bf215546Sopenharmony_ci      blended = nir_blend_logicop(b, options, rt, src, dst);
400bf215546Sopenharmony_ci   } else if (!util_format_is_pure_integer(options->format[rt])) {
401bf215546Sopenharmony_ci      assert(!util_format_is_scaled(options->format[rt]));
402bf215546Sopenharmony_ci      blended = nir_blend(b, options, rt, src, options->src1, dst);
403bf215546Sopenharmony_ci   }
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci   /* Apply a colormask */
406bf215546Sopenharmony_ci   blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci   const unsigned num_components = glsl_get_vector_elements(var->type);
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_ci   /* Shave off any components we don't want to store */
411bf215546Sopenharmony_ci   blended = nir_trim_vector(b, blended, num_components);
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci   /* Grow or shrink the store destination as needed */
414bf215546Sopenharmony_ci   assert(nir_intrinsic_write_mask(store) ==
415bf215546Sopenharmony_ci          nir_component_mask(store->num_components));
416bf215546Sopenharmony_ci   store->num_components = num_components;
417bf215546Sopenharmony_ci   store->dest.ssa.num_components = num_components;
418bf215546Sopenharmony_ci   nir_intrinsic_set_write_mask(store, nir_component_mask(num_components));
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   /* Write out the final color instead of the input */
421bf215546Sopenharmony_ci   nir_instr_rewrite_src_ssa(&store->instr, &store->src[1], blended);
422bf215546Sopenharmony_ci   return true;
423bf215546Sopenharmony_ci}
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_cistatic bool
426bf215546Sopenharmony_cinir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
427bf215546Sopenharmony_ci{
428bf215546Sopenharmony_ci   const nir_lower_blend_options *options = data;
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_ci   switch (instr->type) {
431bf215546Sopenharmony_ci   case nir_instr_type_deref: {
432bf215546Sopenharmony_ci      /* Fix up output deref types, as needed */
433bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_instr_as_deref(instr);
434bf215546Sopenharmony_ci      if (!nir_deref_mode_is(deref, nir_var_shader_out))
435bf215546Sopenharmony_ci         return false;
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci      /* Indirects must be already lowered and output variables split */
438bf215546Sopenharmony_ci      assert(deref->deref_type == nir_deref_type_var);
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci      if (deref->type == deref->var->type)
441bf215546Sopenharmony_ci         return false;
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci      deref->type = deref->var->type;
444bf215546Sopenharmony_ci      return true;
445bf215546Sopenharmony_ci   }
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci   case nir_instr_type_intrinsic: {
448bf215546Sopenharmony_ci      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
449bf215546Sopenharmony_ci      if (intrin->intrinsic != nir_intrinsic_load_deref &&
450bf215546Sopenharmony_ci          intrin->intrinsic != nir_intrinsic_store_deref)
451bf215546Sopenharmony_ci         return false;
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci      nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
454bf215546Sopenharmony_ci      if (!nir_deref_mode_is(deref, nir_var_shader_out))
455bf215546Sopenharmony_ci         return false;
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci      assert(glsl_type_is_vector_or_scalar(deref->type));
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci      if (intrin->intrinsic == nir_intrinsic_load_deref) {
460bf215546Sopenharmony_ci         /* We need to fix up framebuffer if num_components changed */
461bf215546Sopenharmony_ci         const unsigned num_components = glsl_get_vector_elements(deref->type);
462bf215546Sopenharmony_ci         if (intrin->num_components == num_components)
463bf215546Sopenharmony_ci            return false;
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci         b->cursor = nir_after_instr(&intrin->instr);
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci         assert(intrin->dest.is_ssa);
468bf215546Sopenharmony_ci         nir_ssa_def *val = nir_resize_vector(b, &intrin->dest.ssa,
469bf215546Sopenharmony_ci                                              num_components);
470bf215546Sopenharmony_ci         intrin->num_components = num_components,
471bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, val,
472bf215546Sopenharmony_ci                                        val->parent_instr);
473bf215546Sopenharmony_ci         return true;
474bf215546Sopenharmony_ci      } else {
475bf215546Sopenharmony_ci         return nir_lower_blend_store(b, intrin, options);
476bf215546Sopenharmony_ci      }
477bf215546Sopenharmony_ci   }
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_ci   default:
480bf215546Sopenharmony_ci      return false;
481bf215546Sopenharmony_ci   }
482bf215546Sopenharmony_ci}
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_ci/** Lower blending to framebuffer fetch and some math
485bf215546Sopenharmony_ci *
486bf215546Sopenharmony_ci * This pass requires that indirects are lowered and output variables split
487bf215546Sopenharmony_ci * so that we have a single output variable for each RT.  We could go to the
488bf215546Sopenharmony_ci * effort of handling arrays (possibly of arrays) but, given that we need
489bf215546Sopenharmony_ci * indirects lowered anyway (we need constant indices to look up blend
490bf215546Sopenharmony_ci * functions and formats), we may as well require variables to be split.
491bf215546Sopenharmony_ci * This can be done by calling nir_lower_io_arrays_to_elements_no_indirect().
492bf215546Sopenharmony_ci */
493bf215546Sopenharmony_civoid
494bf215546Sopenharmony_cinir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
495bf215546Sopenharmony_ci{
496bf215546Sopenharmony_ci   assert(shader->info.stage == MESA_SHADER_FRAGMENT);
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   /* Re-type any blended output variables to have the same number of
499bf215546Sopenharmony_ci    * components as the image format.  The GL 4.6 Spec says:
500bf215546Sopenharmony_ci    *
501bf215546Sopenharmony_ci    *    "If a fragment shader writes to none of gl_FragColor, gl_FragData,
502bf215546Sopenharmony_ci    *    nor any user-defined output variables, the values of the fragment
503bf215546Sopenharmony_ci    *    colors following shader execution are undefined, and may differ for
504bf215546Sopenharmony_ci    *    each fragment color.  If some, but not all elements of gl_FragData or
505bf215546Sopenharmony_ci    *    of theser-defined output variables are written, the values of
506bf215546Sopenharmony_ci    *    fragment colors corresponding to unwritten elements orariables are
507bf215546Sopenharmony_ci    *    similarly undefined."
508bf215546Sopenharmony_ci    *
509bf215546Sopenharmony_ci    * Note the phrase "following shader execution".  Those color values are
510bf215546Sopenharmony_ci    * then supposed to go into blending which may, depending on the blend
511bf215546Sopenharmony_ci    * mode, apply constraints that result in well-defined rendering.  It's
512bf215546Sopenharmony_ci    * fine if we have to pad out a value with undef but we then need to blend
513bf215546Sopenharmony_ci    * that garbage value to ensure correct results.
514bf215546Sopenharmony_ci    *
515bf215546Sopenharmony_ci    * This may also, depending on output format, be a small optimization
516bf215546Sopenharmony_ci    * allowing NIR to dead-code unused calculations.
517bf215546Sopenharmony_ci    */
518bf215546Sopenharmony_ci   nir_foreach_shader_out_variable(var, shader) {
519bf215546Sopenharmony_ci      int rt = color_index_for_var(var);
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci      /* No blend lowering requested on this RT */
522bf215546Sopenharmony_ci      if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
523bf215546Sopenharmony_ci         continue;
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci      const unsigned num_format_components =
526bf215546Sopenharmony_ci         util_format_get_nr_components(options->format[rt]);
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci      /* Indirects must be already lowered and output variables split */
529bf215546Sopenharmony_ci      assert(glsl_type_is_vector_or_scalar(var->type));
530bf215546Sopenharmony_ci      var->type = glsl_replace_vector_type(var->type, num_format_components);
531bf215546Sopenharmony_ci   }
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci   nir_shader_instructions_pass(shader, nir_lower_blend_instr,
534bf215546Sopenharmony_ci                                nir_metadata_block_index |
535bf215546Sopenharmony_ci                                nir_metadata_dominance,
536bf215546Sopenharmony_ci                                (void *)options);
537bf215546Sopenharmony_ci}
538