1/*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2019-2021 Collabora, Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "pan_blend.h"
26
27#ifdef PAN_ARCH
28#include "pan_shader.h"
29#endif
30
31#include "pan_texture.h"
32#include "panfrost/util/pan_lower_framebuffer.h"
33#include "util/format/u_format.h"
34#include "compiler/nir/nir.h"
35#include "compiler/nir/nir_builder.h"
36#include "compiler/nir/nir_conversion_builder.h"
37#include "compiler/nir/nir_lower_blend.h"
38
39#ifndef PAN_ARCH
40
41/* Fixed function blending */
42
43static bool
44factor_is_supported(enum blend_factor factor)
45{
46        return factor != BLEND_FACTOR_SRC_ALPHA_SATURATE &&
47               factor != BLEND_FACTOR_SRC1_COLOR &&
48               factor != BLEND_FACTOR_SRC1_ALPHA;
49}
50
51/* OpenGL allows encoding (src*dest + dest*src) which is incompatiblle with
52 * Midgard style blending since there are two multiplies. However, it may be
53 * factored as 2*src*dest = dest*(2*src), which can be encoded on Bifrost as 0
54 * + dest * (2*src) wih the new source_2 value of C. Detect this case. */
55
56static bool
57is_2srcdest(enum blend_func blend_func,
58            enum blend_factor src_factor,
59            bool invert_src,
60            enum blend_factor dest_factor,
61            bool invert_dest,
62            bool is_alpha)
63{
64        return (blend_func == BLEND_FUNC_ADD) &&
65               ((src_factor == BLEND_FACTOR_DST_COLOR) ||
66                ((src_factor == BLEND_FACTOR_DST_ALPHA) && is_alpha)) &&
67               ((dest_factor == BLEND_FACTOR_SRC_COLOR) ||
68                ((dest_factor == BLEND_FACTOR_SRC_ALPHA) && is_alpha)) &&
69               !invert_src && !invert_dest;
70}
71
72static bool
73can_fixed_function_equation(enum blend_func blend_func,
74                            enum blend_factor src_factor,
75                            bool invert_src,
76                            enum blend_factor dest_factor,
77                            bool invert_dest,
78                            bool is_alpha,
79                            bool supports_2src)
80{
81        if (is_2srcdest(blend_func, src_factor, invert_src,
82                       dest_factor, invert_dest, is_alpha)) {
83
84                return supports_2src;
85        }
86
87        if (blend_func != BLEND_FUNC_ADD &&
88            blend_func != BLEND_FUNC_SUBTRACT &&
89            blend_func != BLEND_FUNC_REVERSE_SUBTRACT)
90                return false;
91
92        if (!factor_is_supported(src_factor) ||
93            !factor_is_supported(dest_factor))
94                return false;
95
96        if (src_factor != dest_factor &&
97            src_factor != BLEND_FACTOR_ZERO &&
98            dest_factor != BLEND_FACTOR_ZERO)
99                return false;
100
101        return true;
102}
103
104static unsigned
105blend_factor_constant_mask(enum blend_factor factor)
106{
107        if (factor == BLEND_FACTOR_CONSTANT_COLOR)
108                return 0b0111; /* RGB */
109        else if (factor == BLEND_FACTOR_CONSTANT_ALPHA)
110                return 0b1000; /* A */
111        else
112                return 0b0000; /* - */
113}
114
115unsigned
116pan_blend_constant_mask(const struct pan_blend_equation eq)
117{
118        return blend_factor_constant_mask(eq.rgb_src_factor) |
119               blend_factor_constant_mask(eq.rgb_dst_factor) |
120               blend_factor_constant_mask(eq.alpha_src_factor) |
121               blend_factor_constant_mask(eq.alpha_dst_factor);
122}
123
124/* Only "homogenous" (scalar or vector with all components equal) constants are
125 * valid for fixed-function, so check for this condition */
126
127bool
128pan_blend_is_homogenous_constant(unsigned mask, const float *constants)
129{
130        float constant = pan_blend_get_constant(mask, constants);
131
132        u_foreach_bit(i, mask) {
133                if (constants[i] != constant)
134                        return false;
135        }
136
137        return true;
138}
139
140/* Determines if an equation can run in fixed function */
141
142bool
143pan_blend_can_fixed_function(const struct pan_blend_equation equation,
144                             bool supports_2src)
145{
146        return !equation.blend_enable ||
147               (can_fixed_function_equation(equation.rgb_func,
148                                            equation.rgb_src_factor,
149                                            equation.rgb_invert_src_factor,
150                                            equation.rgb_dst_factor,
151                                            equation.rgb_invert_dst_factor,
152                                            false, supports_2src) &&
153                can_fixed_function_equation(equation.alpha_func,
154                                            equation.alpha_src_factor,
155                                            equation.alpha_invert_src_factor,
156                                            equation.alpha_dst_factor,
157                                            equation.alpha_invert_dst_factor,
158                                            true, supports_2src));
159}
160
161static enum mali_blend_operand_c
162to_c_factor(enum blend_factor factor)
163{
164        switch (factor) {
165        case BLEND_FACTOR_ZERO:
166                return MALI_BLEND_OPERAND_C_ZERO;
167
168        case BLEND_FACTOR_SRC_ALPHA:
169                return MALI_BLEND_OPERAND_C_SRC_ALPHA;
170
171        case BLEND_FACTOR_DST_ALPHA:
172                return MALI_BLEND_OPERAND_C_DEST_ALPHA;
173
174        case BLEND_FACTOR_SRC_COLOR:
175                return MALI_BLEND_OPERAND_C_SRC;
176
177        case BLEND_FACTOR_DST_COLOR:
178                return MALI_BLEND_OPERAND_C_DEST;
179
180        case BLEND_FACTOR_CONSTANT_COLOR:
181        case BLEND_FACTOR_CONSTANT_ALPHA:
182                return MALI_BLEND_OPERAND_C_CONSTANT;
183
184        default:
185                unreachable("Unsupported blend factor");
186        }
187}
188
189static void
190to_panfrost_function(enum blend_func blend_func,
191                     enum blend_factor src_factor,
192                     bool invert_src,
193                     enum blend_factor dest_factor,
194                     bool invert_dest,
195                     bool is_alpha,
196                     struct MALI_BLEND_FUNCTION *function)
197{
198        assert(can_fixed_function_equation(blend_func, src_factor, invert_src,
199                                           dest_factor, invert_dest, is_alpha, true));
200
201        if (src_factor == BLEND_FACTOR_ZERO && !invert_src) {
202                function->a = MALI_BLEND_OPERAND_A_ZERO;
203                function->b = MALI_BLEND_OPERAND_B_DEST;
204                if (blend_func == BLEND_FUNC_SUBTRACT)
205                        function->negate_b = true;
206                function->invert_c = invert_dest;
207                function->c = to_c_factor(dest_factor);
208        } else if (src_factor == BLEND_FACTOR_ZERO && invert_src) {
209                function->a = MALI_BLEND_OPERAND_A_SRC;
210                function->b = MALI_BLEND_OPERAND_B_DEST;
211                if (blend_func == BLEND_FUNC_SUBTRACT)
212                        function->negate_b = true;
213                else if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT)
214                        function->negate_a = true;
215                function->invert_c = invert_dest;
216                function->c = to_c_factor(dest_factor);
217        } else if (dest_factor == BLEND_FACTOR_ZERO && !invert_dest) {
218                function->a = MALI_BLEND_OPERAND_A_ZERO;
219                function->b = MALI_BLEND_OPERAND_B_SRC;
220                if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT)
221                        function->negate_b = true;
222                function->invert_c = invert_src;
223                function->c = to_c_factor(src_factor);
224        } else if (dest_factor == BLEND_FACTOR_ZERO && invert_dest) {
225                function->a = MALI_BLEND_OPERAND_A_DEST;
226                function->b = MALI_BLEND_OPERAND_B_SRC;
227                if (blend_func == BLEND_FUNC_SUBTRACT)
228                        function->negate_a = true;
229                else if (blend_func == BLEND_FUNC_REVERSE_SUBTRACT)
230                        function->negate_b = true;
231                function->invert_c = invert_src;
232                function->c = to_c_factor(src_factor);
233        } else if (src_factor == dest_factor && invert_src == invert_dest) {
234                function->a = MALI_BLEND_OPERAND_A_ZERO;
235                function->invert_c = invert_src;
236                function->c = to_c_factor(src_factor);
237
238                switch (blend_func) {
239                case BLEND_FUNC_ADD:
240                        function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
241                        break;
242                case BLEND_FUNC_REVERSE_SUBTRACT:
243                        function->negate_b = true;
244                        FALLTHROUGH;
245                case BLEND_FUNC_SUBTRACT:
246                        function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
247                        break;
248                default:
249                        unreachable("Invalid blend function");
250                }
251        } else if (is_2srcdest(blend_func, src_factor, invert_src, dest_factor,
252                                invert_dest, is_alpha)) {
253                /* src*dest + dest*src = 2*src*dest = 0 + dest*(2*src) */
254                function->a = MALI_BLEND_OPERAND_A_ZERO;
255                function->b = MALI_BLEND_OPERAND_B_DEST;
256                function->c = MALI_BLEND_OPERAND_C_SRC_X_2;
257        } else {
258                assert(src_factor == dest_factor && invert_src != invert_dest);
259
260                function->a = MALI_BLEND_OPERAND_A_DEST;
261                function->invert_c = invert_src;
262                function->c = to_c_factor(src_factor);
263
264                switch (blend_func) {
265                case BLEND_FUNC_ADD:
266                        function->b = MALI_BLEND_OPERAND_B_SRC_MINUS_DEST;
267                        break;
268                case BLEND_FUNC_REVERSE_SUBTRACT:
269                        function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
270                        function->negate_b = true;
271                        break;
272                case BLEND_FUNC_SUBTRACT:
273                        function->b = MALI_BLEND_OPERAND_B_SRC_PLUS_DEST;
274                        function->negate_a = true;
275                        break;
276                default:
277                        unreachable("Invalid blend function\n");
278                }
279        }
280}
281
282bool
283pan_blend_is_opaque(const struct pan_blend_equation equation)
284{
285        /* If a channel is masked out, we can't use opaque mode even if
286         * blending is disabled, since we need a tilebuffer read in there */
287        if (equation.color_mask != 0xF)
288                return false;
289
290        /* With nothing masked out, disabled bledning is opaque */
291        if (!equation.blend_enable)
292                return true;
293
294        /* Also detect open-coded opaque blending */
295        return equation.rgb_src_factor == BLEND_FACTOR_ZERO &&
296               equation.rgb_invert_src_factor &&
297               equation.rgb_dst_factor == BLEND_FACTOR_ZERO &&
298               !equation.rgb_invert_dst_factor &&
299               (equation.rgb_func == BLEND_FUNC_ADD ||
300                equation.rgb_func == BLEND_FUNC_SUBTRACT) &&
301               equation.alpha_src_factor == BLEND_FACTOR_ZERO &&
302               equation.alpha_invert_src_factor &&
303               equation.alpha_dst_factor == BLEND_FACTOR_ZERO &&
304               !equation.alpha_invert_dst_factor &&
305               (equation.alpha_func == BLEND_FUNC_ADD ||
306                equation.alpha_func == BLEND_FUNC_SUBTRACT);
307}
308
309/* Check if (factor, invert) represents a constant value of val, assuming
310 * src_alpha is the given constant.
311 */
312
313static inline bool
314is_factor_01(unsigned factor, bool invert, unsigned val, unsigned srca)
315{
316        assert(val == 0 || val == 1);
317        assert(srca == 0 || srca == 1);
318
319        return ((invert ^ !val) && factor == BLEND_FACTOR_ZERO) ||
320               ((invert ^ srca ^ !val) && factor == BLEND_FACTOR_SRC_ALPHA);
321}
322
323/* Returns if src alpha = 0 implies the blended colour equals the destination
324 * colour. Suppose source alpha = 0 and consider cases.
325 *
326 * Additive blending: Equivalent to D = S * f_s + D * f_d for all D and all S
327 * with S_a = 0, for each component. For the alpha component (if it unmasked),
328 * we have S_a = 0 so this reduces to D = D * f_d <===> f_d = 1. For RGB
329 * components (if unmasked), we need f_s = 0 and f_d = 1.
330 *
331 * Subtractive blending: Fails in general (D = S * f_S - D * f_D). We
332 * would need f_S = 0 and f_D = -1, which is not valid in the APIs.
333 *
334 * Reverse subtractive blending (D = D * f_D - S * f_S), we need f_D = 1
335 * and f_S = 0 up to masking. This is the same as additive blending.
336 *
337 * Min/max: Fails in general on the RGB components.
338 */
339
340bool
341pan_blend_alpha_zero_nop(const struct pan_blend_equation eq)
342{
343        if (eq.rgb_func != BLEND_FUNC_ADD &&
344            eq.rgb_func != BLEND_FUNC_REVERSE_SUBTRACT)
345                return false;
346
347        if (eq.color_mask & 0x8) {
348                if (!is_factor_01(eq.alpha_dst_factor, eq.alpha_invert_dst_factor, 1, 0))
349                        return false;
350        }
351
352        if (eq.color_mask & 0x7) {
353                if (!is_factor_01(eq.rgb_dst_factor, eq.rgb_invert_dst_factor, 1, 0))
354                        return false;
355
356                if (!is_factor_01(eq.rgb_src_factor, eq.rgb_invert_src_factor, 0, 0))
357                        return false;
358        }
359
360        return true;
361}
362
363/* Returns if src alpha = 1 implies the blended colour equals the source
364 * colour. Suppose source alpha = 1 and consider cases.
365 *
366 * Additive blending: S = S * f_s + D * f_d. We need f_s = 1 and f_d = 0.
367 *
368 * Subtractive blending: S = S * f_s - D * f_d. Same as additive blending.
369 *
370 * Reverse subtractive blending: S = D * f_d - S * f_s. Fails in general since
371 * it would require f_s = -1, which is not valid in the APIs.
372 *
373 * Min/max: Fails in general on the RGB components.
374 *
375 * Note if any component is masked, we can't use a store.
376 */
377
378bool
379pan_blend_alpha_one_store(const struct pan_blend_equation eq)
380{
381        if (eq.rgb_func != BLEND_FUNC_ADD &&
382            eq.rgb_func != BLEND_FUNC_SUBTRACT)
383                return false;
384
385        if (eq.color_mask != 0xf)
386                return false;
387
388        return is_factor_01(eq.rgb_src_factor, eq.rgb_invert_src_factor, 1, 1) &&
389               is_factor_01(eq.alpha_src_factor, eq.alpha_invert_src_factor, 1, 1) &&
390               is_factor_01(eq.rgb_dst_factor, eq.rgb_invert_dst_factor, 0, 1) &&
391               is_factor_01(eq.alpha_dst_factor, eq.alpha_invert_dst_factor, 0, 1);
392}
393
394static bool
395is_dest_factor(enum blend_factor factor, bool alpha)
396{
397      return factor == BLEND_FACTOR_DST_ALPHA ||
398             factor == BLEND_FACTOR_DST_COLOR ||
399             (factor == BLEND_FACTOR_SRC_ALPHA_SATURATE && !alpha);
400}
401
402/* Determines if a blend equation reads back the destination. This can occur by
403 * explicitly referencing the destination in the blend equation, or by using a
404 * partial writemask. */
405
406bool
407pan_blend_reads_dest(const struct pan_blend_equation equation)
408{
409        return (equation.color_mask && equation.color_mask != 0xF) ||
410                is_dest_factor(equation.rgb_src_factor, false) ||
411                is_dest_factor(equation.alpha_src_factor, true) ||
412                equation.rgb_dst_factor != BLEND_FACTOR_ZERO ||
413                equation.rgb_invert_dst_factor ||
414                equation.alpha_dst_factor != BLEND_FACTOR_ZERO ||
415                equation.alpha_invert_dst_factor;
416}
417
418/* Create the descriptor for a fixed blend mode given the corresponding API
419 * state. Assumes the equation can be represented as fixed-function. */
420
421void
422pan_blend_to_fixed_function_equation(const struct pan_blend_equation equation,
423                                     struct MALI_BLEND_EQUATION *out)
424{
425        /* If no blending is enabled, default back on `replace` mode */
426        if (!equation.blend_enable) {
427                out->color_mask = equation.color_mask;
428                out->rgb.a = MALI_BLEND_OPERAND_A_SRC;
429                out->rgb.b = MALI_BLEND_OPERAND_B_SRC;
430                out->rgb.c = MALI_BLEND_OPERAND_C_ZERO;
431                out->alpha.a = MALI_BLEND_OPERAND_A_SRC;
432                out->alpha.b = MALI_BLEND_OPERAND_B_SRC;
433                out->alpha.c = MALI_BLEND_OPERAND_C_ZERO;
434                return;
435        }
436
437        /* Compile the fixed-function blend */
438        to_panfrost_function(equation.rgb_func,
439                             equation.rgb_src_factor,
440                             equation.rgb_invert_src_factor,
441                             equation.rgb_dst_factor,
442                             equation.rgb_invert_dst_factor,
443                             false, &out->rgb);
444
445        to_panfrost_function(equation.alpha_func,
446                             equation.alpha_src_factor,
447                             equation.alpha_invert_src_factor,
448                             equation.alpha_dst_factor,
449                             equation.alpha_invert_dst_factor,
450                             true, &out->alpha);
451        out->color_mask = equation.color_mask;
452}
453
454uint32_t
455pan_pack_blend(const struct pan_blend_equation equation)
456{
457        STATIC_ASSERT(sizeof(uint32_t) == MALI_BLEND_EQUATION_LENGTH);
458
459        uint32_t out = 0;
460
461        pan_pack(&out, BLEND_EQUATION, cfg) {
462                pan_blend_to_fixed_function_equation(equation, &cfg);
463        }
464
465        return out;
466}
467
468static uint32_t pan_blend_shader_key_hash(const void *key)
469{
470        return _mesa_hash_data(key, sizeof(struct pan_blend_shader_key));
471}
472
473static bool pan_blend_shader_key_equal(const void *a, const void *b)
474{
475        return !memcmp(a, b, sizeof(struct pan_blend_shader_key));
476}
477
478void
479pan_blend_shaders_init(struct panfrost_device *dev)
480{
481        dev->blend_shaders.shaders =
482                _mesa_hash_table_create(NULL, pan_blend_shader_key_hash,
483                                        pan_blend_shader_key_equal);
484        pthread_mutex_init(&dev->blend_shaders.lock, NULL);
485}
486
487void
488pan_blend_shaders_cleanup(struct panfrost_device *dev)
489{
490        _mesa_hash_table_destroy(dev->blend_shaders.shaders, NULL);
491}
492
493#else /* ifndef PAN_ARCH */
494
495static const char *
496logicop_str(enum pipe_logicop logicop)
497{
498        switch (logicop) {
499        case PIPE_LOGICOP_CLEAR: return "clear";
500        case PIPE_LOGICOP_NOR: return "nor";
501        case PIPE_LOGICOP_AND_INVERTED: return "and-inverted";
502        case PIPE_LOGICOP_COPY_INVERTED: return "copy-inverted";
503        case PIPE_LOGICOP_AND_REVERSE: return "and-reverse";
504        case PIPE_LOGICOP_INVERT: return "invert";
505        case PIPE_LOGICOP_XOR: return "xor";
506        case PIPE_LOGICOP_NAND: return "nand";
507        case PIPE_LOGICOP_AND: return "and";
508        case PIPE_LOGICOP_EQUIV: return "equiv";
509        case PIPE_LOGICOP_NOOP: return "noop";
510        case PIPE_LOGICOP_OR_INVERTED: return "or-inverted";
511        case PIPE_LOGICOP_COPY: return "copy";
512        case PIPE_LOGICOP_OR_REVERSE: return "or-reverse";
513        case PIPE_LOGICOP_OR: return "or";
514        case PIPE_LOGICOP_SET: return "set";
515        default: unreachable("Invalid logicop\n");
516        }
517}
518
519static void
520get_equation_str(const struct pan_blend_rt_state *rt_state,
521                 char *str, unsigned len)
522{
523        const char *funcs[] = {
524                "add", "sub", "reverse_sub", "min", "max",
525        };
526        const char *factors[] = {
527                "zero", "src_color", "src1_color", "dst_color",
528                "src_alpha", "src1_alpha", "dst_alpha",
529                "const_color", "const_alpha", "src_alpha_sat",
530        };
531        int ret;
532
533        if (!rt_state->equation.blend_enable) {
534		ret = snprintf(str, len, "replace");
535                assert(ret > 0);
536                return;
537        }
538
539        if (rt_state->equation.color_mask & 7) {
540                assert(rt_state->equation.rgb_func < ARRAY_SIZE(funcs));
541                assert(rt_state->equation.rgb_src_factor < ARRAY_SIZE(factors));
542                assert(rt_state->equation.rgb_dst_factor < ARRAY_SIZE(factors));
543                ret = snprintf(str, len, "%s%s%s(func=%s,src_factor=%s%s,dst_factor=%s%s)%s",
544                               (rt_state->equation.color_mask & 1) ? "R" : "",
545                               (rt_state->equation.color_mask & 2) ? "G" : "",
546                               (rt_state->equation.color_mask & 4) ? "B" : "",
547                               funcs[rt_state->equation.rgb_func],
548                               rt_state->equation.rgb_invert_src_factor ? "-" : "",
549                               factors[rt_state->equation.rgb_src_factor],
550                               rt_state->equation.rgb_invert_dst_factor ? "-" : "",
551                               factors[rt_state->equation.rgb_dst_factor],
552                               rt_state->equation.color_mask & 8 ? ";" : "");
553                assert(ret > 0);
554                str += ret;
555                len -= ret;
556         }
557
558        if (rt_state->equation.color_mask & 8) {
559                assert(rt_state->equation.alpha_func < ARRAY_SIZE(funcs));
560                assert(rt_state->equation.alpha_src_factor < ARRAY_SIZE(factors));
561                assert(rt_state->equation.alpha_dst_factor < ARRAY_SIZE(factors));
562                ret = snprintf(str, len, "A(func=%s,src_factor=%s%s,dst_factor=%s%s)",
563                               funcs[rt_state->equation.alpha_func],
564                               rt_state->equation.alpha_invert_src_factor ? "-" : "",
565                               factors[rt_state->equation.alpha_src_factor],
566                               rt_state->equation.alpha_invert_dst_factor ? "-" : "",
567                               factors[rt_state->equation.alpha_dst_factor]);
568                assert(ret > 0);
569                str += ret;
570                len -= ret;
571         }
572}
573
574static bool
575pan_inline_blend_constants(nir_builder *b, nir_instr *instr, void *data)
576{
577        if (instr->type != nir_instr_type_intrinsic)
578                return false;
579
580        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
581        if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
582                return false;
583
584        float *floats = data;
585        const nir_const_value constants[4] = {
586                { .f32 = floats[0] },
587                { .f32 = floats[1] },
588                { .f32 = floats[2] },
589                { .f32 = floats[3] }
590        };
591
592        b->cursor = nir_after_instr(instr);
593        nir_ssa_def *constant = nir_build_imm(b, 4, 32, constants);
594        nir_ssa_def_rewrite_uses(&intr->dest.ssa, constant);
595        nir_instr_remove(instr);
596        return true;
597}
598
599nir_shader *
600GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
601                              const struct pan_blend_state *state,
602                              nir_alu_type src0_type,
603                              nir_alu_type src1_type,
604                              unsigned rt)
605{
606        const struct pan_blend_rt_state *rt_state = &state->rts[rt];
607        char equation_str[128] = { 0 };
608
609        get_equation_str(rt_state, equation_str, sizeof(equation_str));
610
611        nir_builder b =
612                nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
613                                               GENX(pan_shader_get_compiler_options)(),
614                                               "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)",
615                                               rt, util_format_name(rt_state->format),
616                                               rt_state->nr_samples,
617                                               state->logicop_enable ? "logicop" : "equation",
618                                               state->logicop_enable ?
619                                               logicop_str(state->logicop_func) : equation_str);
620
621        const struct util_format_description *format_desc =
622                util_format_description(rt_state->format);
623        nir_alu_type nir_type = pan_unpacked_type_for_format(format_desc);
624        enum glsl_base_type glsl_type = nir_get_glsl_base_type_for_nir_type(nir_type);
625
626        nir_lower_blend_options options = {
627                .logicop_enable = state->logicop_enable,
628                .logicop_func = state->logicop_func,
629                .rt[0].colormask = rt_state->equation.color_mask,
630                .format[0] = rt_state->format
631        };
632
633        if (!rt_state->equation.blend_enable) {
634                static const nir_lower_blend_channel replace = {
635                        .func = BLEND_FUNC_ADD,
636                        .src_factor = BLEND_FACTOR_ZERO,
637                        .invert_src_factor = true,
638                        .dst_factor = BLEND_FACTOR_ZERO,
639                        .invert_dst_factor = false,
640                };
641
642                options.rt[0].rgb = replace;
643                options.rt[0].alpha = replace;
644        } else {
645                options.rt[0].rgb.func = rt_state->equation.rgb_func;
646                options.rt[0].rgb.src_factor = rt_state->equation.rgb_src_factor;
647                options.rt[0].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor;
648                options.rt[0].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
649                options.rt[0].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor;
650                options.rt[0].alpha.func = rt_state->equation.alpha_func;
651                options.rt[0].alpha.src_factor = rt_state->equation.alpha_src_factor;
652                options.rt[0].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor;
653                options.rt[0].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
654                options.rt[0].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
655        }
656
657        nir_alu_type src_types[] = { src0_type ?: nir_type_float32, src1_type ?: nir_type_float32 };
658
659        /* HACK: workaround buggy TGSI shaders (u_blitter) */
660        for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) {
661                src_types[i] = nir_alu_type_get_base_type(nir_type) |
662                        nir_alu_type_get_type_size(src_types[i]);
663        }
664
665	nir_variable *c_src =
666                nir_variable_create(b.shader, nir_var_shader_in,
667                                    glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[0]), 4),
668                                    "gl_Color");
669        c_src->data.location = VARYING_SLOT_COL0;
670        nir_variable *c_src1 =
671                nir_variable_create(b.shader, nir_var_shader_in,
672                                    glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[1]), 4),
673                                    "gl_Color1");
674        c_src1->data.location = VARYING_SLOT_VAR0;
675        c_src1->data.driver_location = 1;
676        nir_variable *c_out =
677                nir_variable_create(b.shader, nir_var_shader_out,
678                                    glsl_vector_type(glsl_type, 4),
679                                    "gl_FragColor");
680        c_out->data.location = FRAG_RESULT_DATA0;
681
682        nir_ssa_def *s_src[] = {nir_load_var(&b, c_src), nir_load_var(&b, c_src1)};
683
684        /* Saturate integer conversions */
685        for (int i = 0; i < ARRAY_SIZE(s_src); ++i) {
686                nir_alu_type T = nir_alu_type_get_base_type(nir_type);
687                s_src[i] = nir_convert_with_rounding(&b, s_src[i],
688                                src_types[i], nir_type,
689                                nir_rounding_mode_undef,
690                                T != nir_type_float);
691        }
692
693        /* Build a trivial blend shader */
694        nir_store_var(&b, c_out, s_src[0], 0xFF);
695
696        options.src1 = s_src[1];
697
698        NIR_PASS_V(b.shader, nir_lower_blend, &options);
699        nir_shader_instructions_pass(b.shader, pan_inline_blend_constants,
700                        nir_metadata_block_index | nir_metadata_dominance,
701                        (void *) state->constants);
702
703        return b.shader;
704}
705
706#if PAN_ARCH >= 6
707uint64_t
708GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
709                                  enum pipe_format fmt, unsigned rt,
710                                  unsigned force_size, bool dithered)
711{
712        const struct util_format_description *desc = util_format_description(fmt);
713        uint64_t res;
714
715        pan_pack(&res, INTERNAL_BLEND, cfg) {
716                cfg.mode = MALI_BLEND_MODE_OPAQUE;
717                cfg.fixed_function.num_comps = desc->nr_channels;
718                cfg.fixed_function.rt = rt;
719
720                nir_alu_type T = pan_unpacked_type_for_format(desc);
721
722                if (force_size)
723                        T = nir_alu_type_get_base_type(T) | force_size;
724
725                switch (T) {
726                case nir_type_float16:
727                        cfg.fixed_function.conversion.register_format =
728                                MALI_REGISTER_FILE_FORMAT_F16;
729                        break;
730                case nir_type_float32:
731                        cfg.fixed_function.conversion.register_format =
732                                MALI_REGISTER_FILE_FORMAT_F32;
733                        break;
734                case nir_type_int8:
735                case nir_type_int16:
736                        cfg.fixed_function.conversion.register_format =
737                                MALI_REGISTER_FILE_FORMAT_I16;
738                        break;
739                case nir_type_int32:
740                        cfg.fixed_function.conversion.register_format =
741                                MALI_REGISTER_FILE_FORMAT_I32;
742                        break;
743                case nir_type_uint8:
744                case nir_type_uint16:
745                        cfg.fixed_function.conversion.register_format =
746                                MALI_REGISTER_FILE_FORMAT_U16;
747                        break;
748                case nir_type_uint32:
749                        cfg.fixed_function.conversion.register_format =
750                                MALI_REGISTER_FILE_FORMAT_U32;
751                        break;
752                default:
753                        unreachable("Invalid format");
754                }
755
756                cfg.fixed_function.conversion.memory_format =
757                         panfrost_format_to_bifrost_blend(dev, fmt, dithered);
758        }
759
760        return res;
761}
762#endif
763
764struct pan_blend_shader_variant *
765GENX(pan_blend_get_shader_locked)(const struct panfrost_device *dev,
766                                  const struct pan_blend_state *state,
767                                  nir_alu_type src0_type,
768                                  nir_alu_type src1_type,
769                                  unsigned rt)
770{
771        struct pan_blend_shader_key key = {
772                .format = state->rts[rt].format,
773                .src0_type = src0_type,
774                .src1_type = src1_type,
775                .rt = rt,
776                .has_constants = pan_blend_constant_mask(state->rts[rt].equation) != 0,
777                .logicop_enable = state->logicop_enable,
778                .logicop_func = state->logicop_func,
779                .nr_samples = state->rts[rt].nr_samples,
780                .equation = state->rts[rt].equation,
781        };
782
783        struct hash_entry *he = _mesa_hash_table_search(dev->blend_shaders.shaders, &key);
784        struct pan_blend_shader *shader = he ? he->data : NULL;
785
786        if (!shader) {
787                shader = rzalloc(dev->blend_shaders.shaders, struct pan_blend_shader);
788                shader->key = key;
789                list_inithead(&shader->variants);
790                _mesa_hash_table_insert(dev->blend_shaders.shaders, &shader->key, shader);
791        }
792
793        list_for_each_entry(struct pan_blend_shader_variant, iter,
794                            &shader->variants, node) {
795                if (!key.has_constants ||
796                    !memcmp(iter->constants, state->constants, sizeof(iter->constants))) {
797                        return iter;
798                }
799        }
800
801        struct pan_blend_shader_variant *variant = NULL;
802
803        if (shader->nvariants < PAN_BLEND_SHADER_MAX_VARIANTS) {
804                variant = rzalloc(shader, struct pan_blend_shader_variant);
805                util_dynarray_init(&variant->binary, variant);
806                list_add(&variant->node, &shader->variants);
807                shader->nvariants++;
808        } else {
809                variant = list_last_entry(&shader->variants, struct pan_blend_shader_variant, node);
810                list_del(&variant->node);
811                list_add(&variant->node, &shader->variants);
812                util_dynarray_clear(&variant->binary);
813        }
814
815        memcpy(variant->constants, state->constants, sizeof(variant->constants));
816
817        nir_shader *nir =
818                GENX(pan_blend_create_shader)(dev, state, src0_type, src1_type, rt);
819
820        /* Compile the NIR shader */
821        struct panfrost_compile_inputs inputs = {
822                .gpu_id = dev->gpu_id,
823                .is_blend = true,
824                .blend.rt = shader->key.rt,
825                .blend.nr_samples = key.nr_samples,
826                .fixed_sysval_ubo = -1,
827                .rt_formats = { key.format },
828        };
829
830#if PAN_ARCH >= 6
831        inputs.blend.bifrost_blend_desc =
832                GENX(pan_blend_get_internal_desc)(dev, key.format, key.rt, 0, false);
833#endif
834
835        struct pan_shader_info info;
836
837        GENX(pan_shader_compile)(nir, &inputs, &variant->binary, &info);
838
839        /* Blend shaders can't have sysvals */
840        assert(info.sysvals.sysval_count == 0);
841
842        variant->work_reg_count = info.work_reg_count;
843
844#if PAN_ARCH <= 5
845        variant->first_tag = info.midgard.first_tag;
846#endif
847
848        ralloc_free(nir);
849
850        return variant;
851}
852#endif /* ifndef PAN_ARCH */
853