xref: /third_party/mesa3d/src/panfrost/bifrost/bir.c (revision bf215546)
1/*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27#include "compiler.h"
28
29bool
30bi_has_arg(const bi_instr *ins, bi_index arg)
31{
32        if (!ins)
33                return false;
34
35        bi_foreach_src(ins, s) {
36                if (bi_is_equiv(ins->src[s], arg))
37                        return true;
38        }
39
40        return false;
41}
42
43/* Precondition: valid 16-bit or 32-bit register format. Returns whether it is
44 * 32-bit. Note auto reads to 32-bit registers even if the memory format is
45 * 16-bit, so is considered as such here */
46
47bool
48bi_is_regfmt_16(enum bi_register_format fmt)
49{
50        switch  (fmt) {
51        case BI_REGISTER_FORMAT_F16:
52        case BI_REGISTER_FORMAT_S16:
53        case BI_REGISTER_FORMAT_U16:
54                return true;
55        case BI_REGISTER_FORMAT_F32:
56        case BI_REGISTER_FORMAT_S32:
57        case BI_REGISTER_FORMAT_U32:
58        case BI_REGISTER_FORMAT_AUTO:
59                return false;
60        default:
61                unreachable("Invalid register format");
62        }
63}
64
65static unsigned
66bi_count_staging_registers(const bi_instr *ins)
67{
68        enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
69        unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
70
71        switch (count) {
72        case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
73                return count;
74        case BI_SR_COUNT_FORMAT:
75                return bi_is_regfmt_16(ins->register_format) ?
76                        DIV_ROUND_UP(vecsize, 2) : vecsize;
77        case BI_SR_COUNT_VECSIZE:
78                return vecsize;
79        case BI_SR_COUNT_SR_COUNT:
80                return ins->sr_count;
81        }
82
83        unreachable("Invalid sr_count");
84}
85
86unsigned
87bi_count_read_registers(const bi_instr *ins, unsigned s)
88{
89        /* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
90        if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
91                return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
92        else if (s == 0 && bi_opcode_props[ins->op].sr_read)
93                return bi_count_staging_registers(ins);
94        else if (s == 4 && ins->op == BI_OPCODE_BLEND)
95                return ins->sr_count_2; /* Dual source blending */
96        else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
97                return ins->nr_dests;
98        else
99                return 1;
100}
101
102unsigned
103bi_count_write_registers(const bi_instr *ins, unsigned d)
104{
105        if (d == 0 && bi_opcode_props[ins->op].sr_write) {
106                switch (ins->op) {
107                case BI_OPCODE_TEXC:
108                        if (ins->sr_count_2)
109                                return ins->sr_count;
110                        else
111                                return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
112
113                case BI_OPCODE_TEX_SINGLE:
114                case BI_OPCODE_TEX_FETCH:
115                case BI_OPCODE_TEX_GATHER: {
116                        unsigned chans = util_bitcount(ins->write_mask);
117
118                        return bi_is_regfmt_16(ins->register_format) ?
119                                DIV_ROUND_UP(chans, 2) : chans;
120                }
121
122                case BI_OPCODE_ACMPXCHG_I32:
123                        /* Reads 2 but writes 1 */
124                        return 1;
125
126                case BI_OPCODE_ATOM1_RETURN_I32:
127                        /* Allow omitting the destination for plain ATOM1 */
128                        return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
129                default:
130                        return bi_count_staging_registers(ins);
131                }
132        } else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
133                return 2;
134        } else if (ins->op == BI_OPCODE_TEXC && d == 1) {
135                return ins->sr_count_2;
136        } else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
137                return ins->nr_srcs;
138        }
139
140        return 1;
141}
142
143unsigned
144bi_writemask(const bi_instr *ins, unsigned d)
145{
146        unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
147        unsigned shift = ins->dest[d].offset;
148        return (mask << shift);
149}
150
151bi_clause *
152bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
153{
154        if (!block && !clause)
155                return NULL;
156
157        /* Try the first clause in this block if we're starting from scratch */
158        if (!clause && !list_is_empty(&block->clauses))
159                return list_first_entry(&block->clauses, bi_clause, link);
160
161        /* Try the next clause in this block */
162        if (clause && clause->link.next != &block->clauses)
163                return list_first_entry(&(clause->link), bi_clause, link);
164
165        /* Try the next block, or the one after that if it's empty, etc .*/
166        bi_block *next_block = bi_next_block(block);
167
168        bi_foreach_block_from(ctx, next_block, block) {
169                if (!list_is_empty(&block->clauses))
170                        return list_first_entry(&block->clauses, bi_clause, link);
171        }
172
173        return NULL;
174}
175
176/* Does an instruction have a side effect not captured by its register
177 * destination? Applies to certain message-passing instructions, +DISCARD, and
178 * branching only, used in dead code elimation. Branches are characterized by
179 * `last` which applies to them and some atomics, +BARRIER, +BLEND which
180 * implies no loss of generality */
181
182bool
183bi_side_effects(const bi_instr *I)
184{
185        if (bi_opcode_props[I->op].last)
186                return true;
187
188        switch (I->op) {
189        case BI_OPCODE_DISCARD_F32:
190        case BI_OPCODE_DISCARD_B32:
191                return true;
192        default:
193                break;
194        }
195
196        switch (bi_opcode_props[I->op].message) {
197        case BIFROST_MESSAGE_NONE:
198        case BIFROST_MESSAGE_VARYING:
199        case BIFROST_MESSAGE_ATTRIBUTE:
200        case BIFROST_MESSAGE_TEX:
201        case BIFROST_MESSAGE_VARTEX:
202        case BIFROST_MESSAGE_LOAD:
203        case BIFROST_MESSAGE_64BIT:
204                return false;
205
206        case BIFROST_MESSAGE_STORE:
207        case BIFROST_MESSAGE_ATOMIC:
208        case BIFROST_MESSAGE_BARRIER:
209        case BIFROST_MESSAGE_BLEND:
210        case BIFROST_MESSAGE_Z_STENCIL:
211        case BIFROST_MESSAGE_ATEST:
212        case BIFROST_MESSAGE_JOB:
213                return true;
214
215        case BIFROST_MESSAGE_TILE:
216                return (I->op != BI_OPCODE_LD_TILE);
217        }
218
219        unreachable("Invalid message type");
220}
221
222/* Branch reconvergence is required when the execution mask may change
223 * between adjacent instructions (clauses). This occurs for conditional
224 * branches and for the last instruction (clause) in a block whose
225 * fallthrough successor has multiple predecessors.
226 */
227
228bool
229bi_reconverge_branches(bi_block *block)
230{
231        /* Last block of a program */
232        if (!block->successors[0]) {
233                assert(!block->successors[1]);
234                return true;
235        }
236
237        /* Multiple successors? We're branching */
238        if (block->successors[1])
239                return true;
240
241        /* Must have at least one successor */
242        struct bi_block *succ = block->successors[0];
243
244        /* Reconverge if the successor has multiple predecessors */
245        return bi_num_predecessors(succ) > 1;
246}
247
248/*
249 * When MUX.i32 or MUX.v2i16 is used to multiplex entire sources, they can be
250 * replaced by CSEL as follows:
251 *
252 *      MUX.neg(x, y, b) -> CSEL.s.lt(b, 0, x, y)
253 *      MUX.int_zero(x, y, b) -> CSEL.i.eq(b, 0, x, y)
254 *      MUX.fp_zero(x, y, b) -> CSEL.f.eq(b, 0, x, y)
255 *
256 * MUX.bit cannot be transformed like this.
257 *
258 * Note that MUX.v2i16 has partial support for swizzles, which CSEL.v2i16 lacks.
259 * So we must check the swizzles too.
260 */
261bool
262bi_can_replace_with_csel(bi_instr *I)
263{
264        return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
265                (I->mux != BI_MUX_BIT) &&
266                (I->src[0].swizzle == BI_SWIZZLE_H01) &&
267                (I->src[1].swizzle == BI_SWIZZLE_H01) &&
268                (I->src[2].swizzle == BI_SWIZZLE_H01);
269}
270
271static enum bi_opcode
272bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux)
273{
274        switch (mux) {
275        case BI_MUX_INT_ZERO:
276                if (must_sign)
277                        return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
278                else
279                        return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
280        case BI_MUX_NEG:
281                return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
282        case BI_MUX_FP_ZERO:
283                return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
284        default:
285             unreachable("No CSEL for MUX.bit");
286        }
287}
288
289void
290bi_replace_mux_with_csel(bi_instr *I, bool must_sign)
291{
292        assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
293        I->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
294        I->cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
295        I->mux = 0;
296
297        bi_index vTrue = I->src[0], vFalse = I->src[1], cond = I->src[2];
298
299        I->src[0] = cond;
300        I->src[1] = bi_zero();
301        I->src[2] = vTrue;
302        I->src[3] = vFalse;
303}
304