1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3bf215546Sopenharmony_ci * Copyright (C) 2019-2020 Collabora, Ltd.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22bf215546Sopenharmony_ci * SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "compiler.h"
26bf215546Sopenharmony_ci#include "midgard_ops.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_civoid mir_rewrite_index_src_single(midgard_instruction *ins, unsigned old, unsigned new)
29bf215546Sopenharmony_ci{
30bf215546Sopenharmony_ci        mir_foreach_src(ins, i) {
31bf215546Sopenharmony_ci                if (ins->src[i] == old)
32bf215546Sopenharmony_ci                        ins->src[i] = new;
33bf215546Sopenharmony_ci        }
34bf215546Sopenharmony_ci}
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_civoid mir_rewrite_index_dst_single(midgard_instruction *ins, unsigned old, unsigned new)
37bf215546Sopenharmony_ci{
38bf215546Sopenharmony_ci        if (ins->dest == old)
39bf215546Sopenharmony_ci                ins->dest = new;
40bf215546Sopenharmony_ci}
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_cistatic void
43bf215546Sopenharmony_cimir_rewrite_index_src_single_swizzle(midgard_instruction *ins, unsigned old, unsigned new, unsigned *swizzle)
44bf215546Sopenharmony_ci{
45bf215546Sopenharmony_ci        for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i) {
46bf215546Sopenharmony_ci                if (ins->src[i] != old) continue;
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci                ins->src[i] = new;
49bf215546Sopenharmony_ci                mir_compose_swizzle(ins->swizzle[i], swizzle, ins->swizzle[i]);
50bf215546Sopenharmony_ci        }
51bf215546Sopenharmony_ci}
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_civoid
54bf215546Sopenharmony_cimir_rewrite_index_src(compiler_context *ctx, unsigned old, unsigned new)
55bf215546Sopenharmony_ci{
56bf215546Sopenharmony_ci        mir_foreach_instr_global(ctx, ins) {
57bf215546Sopenharmony_ci                mir_rewrite_index_src_single(ins, old, new);
58bf215546Sopenharmony_ci        }
59bf215546Sopenharmony_ci}
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_civoid
62bf215546Sopenharmony_cimir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned *swizzle)
63bf215546Sopenharmony_ci{
64bf215546Sopenharmony_ci        mir_foreach_instr_global(ctx, ins) {
65bf215546Sopenharmony_ci                mir_rewrite_index_src_single_swizzle(ins, old, new, swizzle);
66bf215546Sopenharmony_ci        }
67bf215546Sopenharmony_ci}
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_civoid
70bf215546Sopenharmony_cimir_rewrite_index_dst(compiler_context *ctx, unsigned old, unsigned new)
71bf215546Sopenharmony_ci{
72bf215546Sopenharmony_ci        mir_foreach_instr_global(ctx, ins) {
73bf215546Sopenharmony_ci                mir_rewrite_index_dst_single(ins, old, new);
74bf215546Sopenharmony_ci        }
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci        /* Implicitly written before the shader */
77bf215546Sopenharmony_ci        if (ctx->blend_input == old)
78bf215546Sopenharmony_ci                ctx->blend_input = new;
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci        if (ctx->blend_src1 == old)
81bf215546Sopenharmony_ci                ctx->blend_src1 = new;
82bf215546Sopenharmony_ci}
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_civoid
85bf215546Sopenharmony_cimir_rewrite_index(compiler_context *ctx, unsigned old, unsigned new)
86bf215546Sopenharmony_ci{
87bf215546Sopenharmony_ci        mir_rewrite_index_src(ctx, old, new);
88bf215546Sopenharmony_ci        mir_rewrite_index_dst(ctx, old, new);
89bf215546Sopenharmony_ci}
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ciunsigned
92bf215546Sopenharmony_cimir_use_count(compiler_context *ctx, unsigned value)
93bf215546Sopenharmony_ci{
94bf215546Sopenharmony_ci        unsigned used_count = 0;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci        mir_foreach_instr_global(ctx, ins) {
97bf215546Sopenharmony_ci                if (mir_has_arg(ins, value))
98bf215546Sopenharmony_ci                        ++used_count;
99bf215546Sopenharmony_ci        }
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci        if (ctx->blend_input == value)
102bf215546Sopenharmony_ci                ++used_count;
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci        if (ctx->blend_src1 == value)
105bf215546Sopenharmony_ci                ++used_count;
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci        return used_count;
108bf215546Sopenharmony_ci}
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci/* Checks if a value is used only once (or totally dead), which is an important
111bf215546Sopenharmony_ci * heuristic to figure out if certain optimizations are Worth It (TM) */
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_cibool
114bf215546Sopenharmony_cimir_single_use(compiler_context *ctx, unsigned value)
115bf215546Sopenharmony_ci{
116bf215546Sopenharmony_ci        /* We can replicate constants in places so who cares */
117bf215546Sopenharmony_ci        if (value == SSA_FIXED_REGISTER(REGISTER_CONSTANT))
118bf215546Sopenharmony_ci                return true;
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci        return mir_use_count(ctx, value) <= 1;
121bf215546Sopenharmony_ci}
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_cibool
124bf215546Sopenharmony_cimir_nontrivial_mod(midgard_instruction *ins, unsigned i, bool check_swizzle)
125bf215546Sopenharmony_ci{
126bf215546Sopenharmony_ci        bool is_int = midgard_is_integer_op(ins->op);
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci        if (is_int) {
129bf215546Sopenharmony_ci                if (ins->src_shift[i]) return true;
130bf215546Sopenharmony_ci        } else {
131bf215546Sopenharmony_ci                if (ins->src_neg[i]) return true;
132bf215546Sopenharmony_ci                if (ins->src_abs[i]) return true;
133bf215546Sopenharmony_ci        }
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci        if (ins->dest_type != ins->src_types[i]) return true;
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci        if (check_swizzle) {
138bf215546Sopenharmony_ci                for (unsigned c = 0; c < 16; ++c) {
139bf215546Sopenharmony_ci                        if (!(ins->mask & (1 << c))) continue;
140bf215546Sopenharmony_ci                        if (ins->swizzle[i][c] != c) return true;
141bf215546Sopenharmony_ci                }
142bf215546Sopenharmony_ci        }
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci        return false;
145bf215546Sopenharmony_ci}
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_cibool
148bf215546Sopenharmony_cimir_nontrivial_outmod(midgard_instruction *ins)
149bf215546Sopenharmony_ci{
150bf215546Sopenharmony_ci        bool is_int = midgard_is_integer_op(ins->op);
151bf215546Sopenharmony_ci        unsigned mod = ins->outmod;
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci        if (ins->dest_type != ins->src_types[1])
154bf215546Sopenharmony_ci                return true;
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci        if (is_int)
157bf215546Sopenharmony_ci                return mod != midgard_outmod_keeplo;
158bf215546Sopenharmony_ci        else
159bf215546Sopenharmony_ci                return mod != midgard_outmod_none;
160bf215546Sopenharmony_ci}
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci/* 128 / sz = exp2(log2(128 / sz))
163bf215546Sopenharmony_ci *          = exp2(log2(128) - log2(sz))
164bf215546Sopenharmony_ci *          = exp2(7 - log2(sz))
165bf215546Sopenharmony_ci *          = 1 << (7 - log2(sz))
166bf215546Sopenharmony_ci */
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_cistatic unsigned
169bf215546Sopenharmony_cimir_components_for_bits(unsigned bits)
170bf215546Sopenharmony_ci{
171bf215546Sopenharmony_ci        return 1 << (7 - util_logbase2(bits));
172bf215546Sopenharmony_ci}
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ciunsigned
175bf215546Sopenharmony_cimir_components_for_type(nir_alu_type T)
176bf215546Sopenharmony_ci{
177bf215546Sopenharmony_ci        unsigned sz = nir_alu_type_get_type_size(T);
178bf215546Sopenharmony_ci        return mir_components_for_bits(sz);
179bf215546Sopenharmony_ci}
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ciuint16_t
182bf215546Sopenharmony_cimir_from_bytemask(uint16_t bytemask, unsigned bits)
183bf215546Sopenharmony_ci{
184bf215546Sopenharmony_ci        unsigned value = 0;
185bf215546Sopenharmony_ci        unsigned count = bits / 8;
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci        for (unsigned c = 0, d = 0; c < 16; c += count, ++d) {
188bf215546Sopenharmony_ci                bool a = (bytemask & (1 << c)) != 0;
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci                for (unsigned q = c; q < count; ++q)
191bf215546Sopenharmony_ci                        assert(((bytemask & (1 << q)) != 0) == a);
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci                value |= (a << d);
194bf215546Sopenharmony_ci        }
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci        return value;
197bf215546Sopenharmony_ci}
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci/* Rounds up a bytemask to fill a given component count. Iterate each
200bf215546Sopenharmony_ci * component, and check if any bytes in the component are masked on */
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ciuint16_t
203bf215546Sopenharmony_cimir_round_bytemask_up(uint16_t mask, unsigned bits)
204bf215546Sopenharmony_ci{
205bf215546Sopenharmony_ci        unsigned bytes = bits / 8;
206bf215546Sopenharmony_ci        unsigned maxmask = mask_of(bytes);
207bf215546Sopenharmony_ci        unsigned channels = mir_components_for_bits(bits);
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci        for (unsigned c = 0; c < channels; ++c) {
210bf215546Sopenharmony_ci                unsigned submask = maxmask << (c * bytes);
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci                if (mask & submask)
213bf215546Sopenharmony_ci                        mask |= submask;
214bf215546Sopenharmony_ci        }
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci        return mask;
217bf215546Sopenharmony_ci}
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci/* Grabs the per-byte mask of an instruction (as opposed to per-component) */
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ciuint16_t
222bf215546Sopenharmony_cimir_bytemask(midgard_instruction *ins)
223bf215546Sopenharmony_ci{
224bf215546Sopenharmony_ci        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
225bf215546Sopenharmony_ci        return pan_to_bytemask(type_size, ins->mask);
226bf215546Sopenharmony_ci}
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_civoid
229bf215546Sopenharmony_cimir_set_bytemask(midgard_instruction *ins, uint16_t bytemask)
230bf215546Sopenharmony_ci{
231bf215546Sopenharmony_ci        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
232bf215546Sopenharmony_ci        ins->mask = mir_from_bytemask(bytemask, type_size);
233bf215546Sopenharmony_ci}
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci/* Checks if we should use an upper destination override, rather than the lower
236bf215546Sopenharmony_ci * one in the IR. Returns zero if no, returns the bytes to shift otherwise */
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_cisigned
239bf215546Sopenharmony_cimir_upper_override(midgard_instruction *ins, unsigned inst_size)
240bf215546Sopenharmony_ci{
241bf215546Sopenharmony_ci        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci        /* 8bit imovs are promoted to 16bit ones with .sext on the source and
244bf215546Sopenharmony_ci         * .keeplo on the destination to accomodate with non-identity swizzles.
245bf215546Sopenharmony_ci         */
246bf215546Sopenharmony_ci        if (ins->op == midgard_alu_op_imov && type_size == 8)
247bf215546Sopenharmony_ci                return 0;
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci        /* If the sizes are the same, there's nothing to override */
250bf215546Sopenharmony_ci        if (type_size == inst_size)
251bf215546Sopenharmony_ci                return -1;
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_ci        /* There are 16 bytes per vector, so there are (16/bytes)
254bf215546Sopenharmony_ci         * components per vector. So the magic half is half of
255bf215546Sopenharmony_ci         * (16/bytes), which simplifies to 8/bytes = 8 / (bits / 8) = 64 / bits
256bf215546Sopenharmony_ci         * */
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci        unsigned threshold = mir_components_for_bits(type_size) >> 1;
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci        /* How many components did we shift over? */
261bf215546Sopenharmony_ci        unsigned zeroes = __builtin_ctz(ins->mask);
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci        /* Did we hit the threshold? */
264bf215546Sopenharmony_ci        return (zeroes >= threshold) ? threshold : 0;
265bf215546Sopenharmony_ci}
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci/* Creates a mask of the components of a node read by an instruction, by
268bf215546Sopenharmony_ci * analyzing the swizzle with respect to the instruction's mask. E.g.:
269bf215546Sopenharmony_ci *
270bf215546Sopenharmony_ci *  fadd r0.xz, r1.yyyy, r2.zwyx
271bf215546Sopenharmony_ci *
272bf215546Sopenharmony_ci * will return a mask of Z/Y for r2
273bf215546Sopenharmony_ci */
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_cistatic uint16_t
276bf215546Sopenharmony_cimir_bytemask_of_read_components_single(unsigned *swizzle, unsigned inmask, unsigned bits)
277bf215546Sopenharmony_ci{
278bf215546Sopenharmony_ci        unsigned cmask = 0;
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) {
281bf215546Sopenharmony_ci                if (!(inmask & (1 << c))) continue;
282bf215546Sopenharmony_ci                cmask |= (1 << swizzle[c]);
283bf215546Sopenharmony_ci        }
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci        return pan_to_bytemask(bits, cmask);
286bf215546Sopenharmony_ci}
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ciuint16_t
289bf215546Sopenharmony_cimir_bytemask_of_read_components_index(midgard_instruction *ins, unsigned i)
290bf215546Sopenharmony_ci{
291bf215546Sopenharmony_ci        /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
292bf215546Sopenharmony_ci        if (ins->compact_branch && ins->branch.conditional && (i == 0))
293bf215546Sopenharmony_ci                return 0xF;
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci        /* ALU ops act componentwise so we need to pay attention to
296bf215546Sopenharmony_ci         * their mask. Texture/ldst does not so we don't clamp source
297bf215546Sopenharmony_ci         * readmasks based on the writemask */
298bf215546Sopenharmony_ci        unsigned qmask = ~0;
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci        /* Handle dot products and things */
301bf215546Sopenharmony_ci        if (ins->type == TAG_ALU_4 && !ins->compact_branch) {
302bf215546Sopenharmony_ci                unsigned props = alu_opcode_props[ins->op].props;
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci                unsigned channel_override = GET_CHANNEL_COUNT(props);
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci                if (channel_override)
307bf215546Sopenharmony_ci                        qmask = mask_of(channel_override);
308bf215546Sopenharmony_ci                else
309bf215546Sopenharmony_ci                        qmask = ins->mask;
310bf215546Sopenharmony_ci        }
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci        return mir_bytemask_of_read_components_single(ins->swizzle[i], qmask,
313bf215546Sopenharmony_ci                nir_alu_type_get_type_size(ins->src_types[i]));
314bf215546Sopenharmony_ci}
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ciuint16_t
317bf215546Sopenharmony_cimir_bytemask_of_read_components(midgard_instruction *ins, unsigned node)
318bf215546Sopenharmony_ci{
319bf215546Sopenharmony_ci        uint16_t mask = 0;
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci        if (node == ~0)
322bf215546Sopenharmony_ci                return 0;
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci        mir_foreach_src(ins, i) {
325bf215546Sopenharmony_ci                if (ins->src[i] != node) continue;
326bf215546Sopenharmony_ci                mask |= mir_bytemask_of_read_components_index(ins, i);
327bf215546Sopenharmony_ci        }
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci        return mask;
330bf215546Sopenharmony_ci}
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci/* Register allocation occurs after instruction scheduling, which is fine until
333bf215546Sopenharmony_ci * we start needing to spill registers and therefore insert instructions into
334bf215546Sopenharmony_ci * an already-scheduled program. We don't have to be terribly efficient about
335bf215546Sopenharmony_ci * this, since spilling is already slow. So just semantically we need to insert
336bf215546Sopenharmony_ci * the instruction into a new bundle before/after the bundle of the instruction
337bf215546Sopenharmony_ci * in question */
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_cistatic midgard_bundle
340bf215546Sopenharmony_cimir_bundle_for_op(compiler_context *ctx, midgard_instruction ins)
341bf215546Sopenharmony_ci{
342bf215546Sopenharmony_ci        midgard_instruction *u = mir_upload_ins(ctx, ins);
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci        midgard_bundle bundle = {
345bf215546Sopenharmony_ci                .tag = ins.type,
346bf215546Sopenharmony_ci                .instruction_count = 1,
347bf215546Sopenharmony_ci                .instructions = { u },
348bf215546Sopenharmony_ci        };
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci        if (bundle.tag == TAG_ALU_4) {
351bf215546Sopenharmony_ci                assert(OP_IS_MOVE(u->op));
352bf215546Sopenharmony_ci                u->unit = UNIT_VMUL;
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci                size_t bytes_emitted = sizeof(uint32_t) + sizeof(midgard_reg_info) + sizeof(midgard_vector_alu);
355bf215546Sopenharmony_ci                bundle.padding = ~(bytes_emitted - 1) & 0xF;
356bf215546Sopenharmony_ci                bundle.control = ins.type | u->unit;
357bf215546Sopenharmony_ci        }
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci        return bundle;
360bf215546Sopenharmony_ci}
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_cistatic unsigned
363bf215546Sopenharmony_cimir_bundle_idx_for_ins(midgard_instruction *tag, midgard_block *block)
364bf215546Sopenharmony_ci{
365bf215546Sopenharmony_ci        midgard_bundle *bundles =
366bf215546Sopenharmony_ci                (midgard_bundle *) block->bundles.data;
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci        size_t count = (block->bundles.size / sizeof(midgard_bundle));
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci        for (unsigned i = 0; i < count; ++i) {
371bf215546Sopenharmony_ci                for (unsigned j = 0; j < bundles[i].instruction_count; ++j) {
372bf215546Sopenharmony_ci                        if (bundles[i].instructions[j] == tag)
373bf215546Sopenharmony_ci                                return i;
374bf215546Sopenharmony_ci                }
375bf215546Sopenharmony_ci        }
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci        mir_print_instruction(tag);
378bf215546Sopenharmony_ci        unreachable("Instruction not scheduled in block");
379bf215546Sopenharmony_ci}
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_cimidgard_instruction *
382bf215546Sopenharmony_cimir_insert_instruction_before_scheduled(
383bf215546Sopenharmony_ci        compiler_context *ctx,
384bf215546Sopenharmony_ci        midgard_block *block,
385bf215546Sopenharmony_ci        midgard_instruction *tag,
386bf215546Sopenharmony_ci        midgard_instruction ins)
387bf215546Sopenharmony_ci{
388bf215546Sopenharmony_ci        unsigned before = mir_bundle_idx_for_ins(tag, block);
389bf215546Sopenharmony_ci        size_t count = util_dynarray_num_elements(&block->bundles, midgard_bundle);
390bf215546Sopenharmony_ci        UNUSED void *unused = util_dynarray_grow(&block->bundles, midgard_bundle, 1);
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci        midgard_bundle *bundles = (midgard_bundle *) block->bundles.data;
393bf215546Sopenharmony_ci        memmove(bundles + before + 1, bundles + before, (count - before) * sizeof(midgard_bundle));
394bf215546Sopenharmony_ci        midgard_bundle *before_bundle = bundles + before + 1;
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci        midgard_bundle new = mir_bundle_for_op(ctx, ins);
397bf215546Sopenharmony_ci        memcpy(bundles + before, &new, sizeof(new));
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci        list_addtail(&new.instructions[0]->link, &before_bundle->instructions[0]->link);
400bf215546Sopenharmony_ci        block->quadword_count += midgard_tag_props[new.tag].size;
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_ci        return new.instructions[0];
403bf215546Sopenharmony_ci}
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_cimidgard_instruction *
406bf215546Sopenharmony_cimir_insert_instruction_after_scheduled(
407bf215546Sopenharmony_ci        compiler_context *ctx,
408bf215546Sopenharmony_ci        midgard_block *block,
409bf215546Sopenharmony_ci        midgard_instruction *tag,
410bf215546Sopenharmony_ci        midgard_instruction ins)
411bf215546Sopenharmony_ci{
412bf215546Sopenharmony_ci        /* We need to grow the bundles array to add our new bundle */
413bf215546Sopenharmony_ci        size_t count = util_dynarray_num_elements(&block->bundles, midgard_bundle);
414bf215546Sopenharmony_ci        UNUSED void *unused = util_dynarray_grow(&block->bundles, midgard_bundle, 1);
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci        /* Find the bundle that we want to insert after */
417bf215546Sopenharmony_ci        unsigned after = mir_bundle_idx_for_ins(tag, block);
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci        /* All the bundles after that one, we move ahead by one */
420bf215546Sopenharmony_ci        midgard_bundle *bundles = (midgard_bundle *) block->bundles.data;
421bf215546Sopenharmony_ci        memmove(bundles + after + 2, bundles + after + 1, (count - after - 1) * sizeof(midgard_bundle));
422bf215546Sopenharmony_ci        midgard_bundle *after_bundle = bundles + after;
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_ci        midgard_bundle new = mir_bundle_for_op(ctx, ins);
425bf215546Sopenharmony_ci        memcpy(bundles + after + 1, &new, sizeof(new));
426bf215546Sopenharmony_ci        list_add(&new.instructions[0]->link, &after_bundle->instructions[after_bundle->instruction_count - 1]->link);
427bf215546Sopenharmony_ci        block->quadword_count += midgard_tag_props[new.tag].size;
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci        return new.instructions[0];
430bf215546Sopenharmony_ci}
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci/* Flip the first-two arguments of a (binary) op. Currently ALU
433bf215546Sopenharmony_ci * only, no known uses for ldst/tex */
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_civoid
436bf215546Sopenharmony_cimir_flip(midgard_instruction *ins)
437bf215546Sopenharmony_ci{
438bf215546Sopenharmony_ci        unsigned temp = ins->src[0];
439bf215546Sopenharmony_ci        ins->src[0] = ins->src[1];
440bf215546Sopenharmony_ci        ins->src[1] = temp;
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci        assert(ins->type == TAG_ALU_4);
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci        temp = ins->src_types[0];
445bf215546Sopenharmony_ci        ins->src_types[0] = ins->src_types[1];
446bf215546Sopenharmony_ci        ins->src_types[1] = temp;
447bf215546Sopenharmony_ci
448bf215546Sopenharmony_ci        temp = ins->src_abs[0];
449bf215546Sopenharmony_ci        ins->src_abs[0] = ins->src_abs[1];
450bf215546Sopenharmony_ci        ins->src_abs[1] = temp;
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_ci        temp = ins->src_neg[0];
453bf215546Sopenharmony_ci        ins->src_neg[0] = ins->src_neg[1];
454bf215546Sopenharmony_ci        ins->src_neg[1] = temp;
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci        temp = ins->src_invert[0];
457bf215546Sopenharmony_ci        ins->src_invert[0] = ins->src_invert[1];
458bf215546Sopenharmony_ci        ins->src_invert[1] = temp;
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci        unsigned temp_swizzle[16];
461bf215546Sopenharmony_ci        memcpy(temp_swizzle, ins->swizzle[0], sizeof(ins->swizzle[0]));
462bf215546Sopenharmony_ci        memcpy(ins->swizzle[0], ins->swizzle[1], sizeof(ins->swizzle[0]));
463bf215546Sopenharmony_ci        memcpy(ins->swizzle[1], temp_swizzle, sizeof(ins->swizzle[0]));
464bf215546Sopenharmony_ci}
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci/* Before squashing, calculate ctx->temp_count just by observing the MIR */
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_civoid
469bf215546Sopenharmony_cimir_compute_temp_count(compiler_context *ctx)
470bf215546Sopenharmony_ci{
471bf215546Sopenharmony_ci        if (ctx->temp_count)
472bf215546Sopenharmony_ci                return;
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci        unsigned max_dest = 0;
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci        mir_foreach_instr_global(ctx, ins) {
477bf215546Sopenharmony_ci                if (ins->dest < SSA_FIXED_MINIMUM)
478bf215546Sopenharmony_ci                        max_dest = MAX2(max_dest, ins->dest + 1);
479bf215546Sopenharmony_ci        }
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci        ctx->temp_count = max_dest;
482bf215546Sopenharmony_ci}
483