1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2012 Intel Corporation
3bf215546Sopenharmony_ci * Copyright © 2016 Broadcom
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22bf215546Sopenharmony_ci * IN THE SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#define MAX_INSTRUCTION (1 << 30)
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "util/ralloc.h"
28bf215546Sopenharmony_ci#include "util/register_allocate.h"
29bf215546Sopenharmony_ci#include "v3d_compiler.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci/* Keeps track of conditional / partial writes in a block */
32bf215546Sopenharmony_cistruct partial_update_state {
33bf215546Sopenharmony_ci        /* Instruction doing a conditional or partial write */
34bf215546Sopenharmony_ci        struct qinst *inst;
35bf215546Sopenharmony_ci        /* Instruction that set the flags for the conditional write */
36bf215546Sopenharmony_ci        struct qinst *flags_inst;
37bf215546Sopenharmony_ci};
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_cistatic int
40bf215546Sopenharmony_civir_reg_to_var(struct qreg reg)
41bf215546Sopenharmony_ci{
42bf215546Sopenharmony_ci        if (reg.file == QFILE_TEMP)
43bf215546Sopenharmony_ci                return reg.index;
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci        return -1;
46bf215546Sopenharmony_ci}
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_cistatic void
49bf215546Sopenharmony_civir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
50bf215546Sopenharmony_ci              struct partial_update_state *partial_update_ht, struct qinst *inst,
51bf215546Sopenharmony_ci              struct qreg src, struct qinst *flags_inst)
52bf215546Sopenharmony_ci{
53bf215546Sopenharmony_ci        int var = vir_reg_to_var(src);
54bf215546Sopenharmony_ci        if (var == -1)
55bf215546Sopenharmony_ci                return;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci        c->temp_start[var] = MIN2(c->temp_start[var], ip);
58bf215546Sopenharmony_ci        c->temp_end[var] = MAX2(c->temp_end[var], ip);
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci        /* The use[] bitset marks when the block makes
61bf215546Sopenharmony_ci         * use of a variable without having completely
62bf215546Sopenharmony_ci         * defined that variable within the block.
63bf215546Sopenharmony_ci         */
64bf215546Sopenharmony_ci        if (!BITSET_TEST(block->def, var)) {
65bf215546Sopenharmony_ci                /* If this use of var is conditional and the condition
66bf215546Sopenharmony_ci                 * and flags match those of a previous instruction
67bf215546Sopenharmony_ci                 * in the same block partially defining var then we
68bf215546Sopenharmony_ci                 * consider var completely defined within the block.
69bf215546Sopenharmony_ci                 */
70bf215546Sopenharmony_ci                if (BITSET_TEST(block->defout, var)) {
71bf215546Sopenharmony_ci                        struct partial_update_state *state =
72bf215546Sopenharmony_ci                                &partial_update_ht[var];
73bf215546Sopenharmony_ci                        if (state->inst) {
74bf215546Sopenharmony_ci                                if (vir_get_cond(inst) == vir_get_cond(state->inst) &&
75bf215546Sopenharmony_ci                                    flags_inst == state->flags_inst) {
76bf215546Sopenharmony_ci                                        return;
77bf215546Sopenharmony_ci                                }
78bf215546Sopenharmony_ci                        }
79bf215546Sopenharmony_ci                }
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci                BITSET_SET(block->use, var);
82bf215546Sopenharmony_ci        }
83bf215546Sopenharmony_ci}
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci/* The def[] bitset marks when an initialization in a
86bf215546Sopenharmony_ci * block completely screens off previous updates of
87bf215546Sopenharmony_ci * that variable.
88bf215546Sopenharmony_ci */
89bf215546Sopenharmony_cistatic void
90bf215546Sopenharmony_civir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
91bf215546Sopenharmony_ci              struct partial_update_state *partial_update, struct qinst *inst,
92bf215546Sopenharmony_ci              struct qinst *flags_inst)
93bf215546Sopenharmony_ci{
94bf215546Sopenharmony_ci        if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
95bf215546Sopenharmony_ci                return;
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci        int var = vir_reg_to_var(inst->dst);
98bf215546Sopenharmony_ci        if (var == -1)
99bf215546Sopenharmony_ci                return;
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci        c->temp_start[var] = MIN2(c->temp_start[var], ip);
102bf215546Sopenharmony_ci        c->temp_end[var] = MAX2(c->temp_end[var], ip);
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci        /* Mark the block as having a (partial) def of the var. */
105bf215546Sopenharmony_ci        BITSET_SET(block->defout, var);
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci        /* If we've already tracked this as a def that screens off previous
108bf215546Sopenharmony_ci         * uses, or already used it within the block, there's nothing to do.
109bf215546Sopenharmony_ci         */
110bf215546Sopenharmony_ci        if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
111bf215546Sopenharmony_ci                return;
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci        /* Easy, common case: unconditional full register update.*/
114bf215546Sopenharmony_ci        if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
115bf215546Sopenharmony_ci             inst->qpu.flags.mc == V3D_QPU_COND_NONE) &&
116bf215546Sopenharmony_ci            inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
117bf215546Sopenharmony_ci            inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
118bf215546Sopenharmony_ci                BITSET_SET(block->def, var);
119bf215546Sopenharmony_ci                return;
120bf215546Sopenharmony_ci        }
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci        /* Keep track of conditional writes.
123bf215546Sopenharmony_ci         *
124bf215546Sopenharmony_ci         * Notice that the dst's live range for a conditional or partial writes
125bf215546Sopenharmony_ci         * will get extended up the control flow to the top of the program until
126bf215546Sopenharmony_ci         * we find a full write, making register allocation more difficult, so
127bf215546Sopenharmony_ci         * we should try our best to keep track of these and figure out if a
128bf215546Sopenharmony_ci         * combination of them actually writes the entire register so we can
129bf215546Sopenharmony_ci         * stop that process early and reduce liveness.
130bf215546Sopenharmony_ci         *
131bf215546Sopenharmony_ci         * FIXME: Track partial updates via pack/unpack.
132bf215546Sopenharmony_ci         */
133bf215546Sopenharmony_ci        struct partial_update_state *state = &partial_update[var];
134bf215546Sopenharmony_ci        if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
135bf215546Sopenharmony_ci            inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
136bf215546Sopenharmony_ci                state->inst = inst;
137bf215546Sopenharmony_ci                state->flags_inst = flags_inst;
138bf215546Sopenharmony_ci        }
139bf215546Sopenharmony_ci}
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci/* Sets up the def/use arrays for when variables are used-before-defined or
142bf215546Sopenharmony_ci * defined-before-used in the block.
143bf215546Sopenharmony_ci *
144bf215546Sopenharmony_ci * Also initializes the temp_start/temp_end to cover just the instruction IPs
145bf215546Sopenharmony_ci * where the variable is used, which will be extended later in
146bf215546Sopenharmony_ci * vir_compute_start_end().
147bf215546Sopenharmony_ci */
148bf215546Sopenharmony_cistatic void
149bf215546Sopenharmony_civir_setup_def_use(struct v3d_compile *c)
150bf215546Sopenharmony_ci{
151bf215546Sopenharmony_ci        struct partial_update_state *partial_update =
152bf215546Sopenharmony_ci                rzalloc_array(c, struct partial_update_state, c->num_temps);
153bf215546Sopenharmony_ci        int ip = 0;
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci        vir_for_each_block(block, c) {
156bf215546Sopenharmony_ci                block->start_ip = ip;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci                memset(partial_update, 0,
159bf215546Sopenharmony_ci                       sizeof(struct partial_update_state) * c->num_temps);
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci                struct qinst *flags_inst = NULL;
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci                vir_for_each_inst(inst, block) {
164bf215546Sopenharmony_ci                        for (int i = 0; i < vir_get_nsrc(inst); i++) {
165bf215546Sopenharmony_ci                                vir_setup_use(c, block, ip, partial_update,
166bf215546Sopenharmony_ci                                              inst, inst->src[i], flags_inst);
167bf215546Sopenharmony_ci                        }
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci                        vir_setup_def(c, block, ip, partial_update,
170bf215546Sopenharmony_ci                                      inst, flags_inst);
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci                        if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
173bf215546Sopenharmony_ci                            inst->qpu.flags.mpf != V3D_QPU_PF_NONE) {
174bf215546Sopenharmony_ci                               flags_inst = inst;
175bf215546Sopenharmony_ci                        }
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci                        if (inst->qpu.flags.auf != V3D_QPU_UF_NONE ||
178bf215546Sopenharmony_ci                            inst->qpu.flags.muf != V3D_QPU_UF_NONE) {
179bf215546Sopenharmony_ci                                flags_inst = NULL;
180bf215546Sopenharmony_ci                        }
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci                        /* Payload registers: r0/1/2 contain W, centroid W,
183bf215546Sopenharmony_ci                         * and Z at program start.  Register allocation will
184bf215546Sopenharmony_ci                         * force their nodes to R0/1/2.
185bf215546Sopenharmony_ci                         */
186bf215546Sopenharmony_ci                        if (inst->src[0].file == QFILE_REG) {
187bf215546Sopenharmony_ci                                switch (inst->src[0].index) {
188bf215546Sopenharmony_ci                                case 0:
189bf215546Sopenharmony_ci                                case 1:
190bf215546Sopenharmony_ci                                case 2:
191bf215546Sopenharmony_ci                                        c->temp_start[inst->dst.index] = 0;
192bf215546Sopenharmony_ci                                        break;
193bf215546Sopenharmony_ci                                }
194bf215546Sopenharmony_ci                        }
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci                        ip++;
197bf215546Sopenharmony_ci                }
198bf215546Sopenharmony_ci                block->end_ip = ip;
199bf215546Sopenharmony_ci        }
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci        ralloc_free(partial_update);
202bf215546Sopenharmony_ci}
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_cistatic bool
205bf215546Sopenharmony_civir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
206bf215546Sopenharmony_ci{
207bf215546Sopenharmony_ci        bool cont = false;
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci        vir_for_each_block_rev(block, c) {
210bf215546Sopenharmony_ci                /* Update live_out: Any successor using the variable
211bf215546Sopenharmony_ci                 * on entrance needs us to have the variable live on
212bf215546Sopenharmony_ci                 * exit.
213bf215546Sopenharmony_ci                 */
214bf215546Sopenharmony_ci                vir_for_each_successor(succ, block) {
215bf215546Sopenharmony_ci                        for (int i = 0; i < bitset_words; i++) {
216bf215546Sopenharmony_ci                                BITSET_WORD new_live_out = (succ->live_in[i] &
217bf215546Sopenharmony_ci                                                            ~block->live_out[i]);
218bf215546Sopenharmony_ci                                if (new_live_out) {
219bf215546Sopenharmony_ci                                        block->live_out[i] |= new_live_out;
220bf215546Sopenharmony_ci                                        cont = true;
221bf215546Sopenharmony_ci                                }
222bf215546Sopenharmony_ci                        }
223bf215546Sopenharmony_ci                }
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci                /* Update live_in */
226bf215546Sopenharmony_ci                for (int i = 0; i < bitset_words; i++) {
227bf215546Sopenharmony_ci                        BITSET_WORD new_live_in = (block->use[i] |
228bf215546Sopenharmony_ci                                                   (block->live_out[i] &
229bf215546Sopenharmony_ci                                                    ~block->def[i]));
230bf215546Sopenharmony_ci                        if (new_live_in & ~block->live_in[i]) {
231bf215546Sopenharmony_ci                                block->live_in[i] |= new_live_in;
232bf215546Sopenharmony_ci                                cont = true;
233bf215546Sopenharmony_ci                        }
234bf215546Sopenharmony_ci                }
235bf215546Sopenharmony_ci        }
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci        return cont;
238bf215546Sopenharmony_ci}
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_cistatic bool
241bf215546Sopenharmony_civir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)
242bf215546Sopenharmony_ci{
243bf215546Sopenharmony_ci        bool cont = false;
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci        vir_for_each_block_rev(block, c) {
246bf215546Sopenharmony_ci                /* Propagate defin/defout down the successors to produce the
247bf215546Sopenharmony_ci                 * union of blocks with a reachable (partial) definition of
248bf215546Sopenharmony_ci                 * the var.
249bf215546Sopenharmony_ci                 *
250bf215546Sopenharmony_ci                 * This keeps a conditional first write to a reg from
251bf215546Sopenharmony_ci                 * extending its lifetime back to the start of the program.
252bf215546Sopenharmony_ci                 */
253bf215546Sopenharmony_ci                vir_for_each_successor(succ, block) {
254bf215546Sopenharmony_ci                        for (int i = 0; i < bitset_words; i++) {
255bf215546Sopenharmony_ci                                BITSET_WORD new_def = (block->defout[i] &
256bf215546Sopenharmony_ci                                                       ~succ->defin[i]);
257bf215546Sopenharmony_ci                                succ->defin[i] |= new_def;
258bf215546Sopenharmony_ci                                succ->defout[i] |= new_def;
259bf215546Sopenharmony_ci                                cont |= new_def;
260bf215546Sopenharmony_ci                        }
261bf215546Sopenharmony_ci                }
262bf215546Sopenharmony_ci        }
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci        return cont;
265bf215546Sopenharmony_ci}
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci/**
268bf215546Sopenharmony_ci * Extend the start/end ranges for each variable to account for the
269bf215546Sopenharmony_ci * new information calculated from control flow.
270bf215546Sopenharmony_ci */
271bf215546Sopenharmony_cistatic void
272bf215546Sopenharmony_civir_compute_start_end(struct v3d_compile *c, int num_vars)
273bf215546Sopenharmony_ci{
274bf215546Sopenharmony_ci        vir_for_each_block(block, c) {
275bf215546Sopenharmony_ci                for (int i = 0; i < num_vars; i++) {
276bf215546Sopenharmony_ci                        if (BITSET_TEST(block->live_in, i) &&
277bf215546Sopenharmony_ci                            BITSET_TEST(block->defin, i)) {
278bf215546Sopenharmony_ci                                c->temp_start[i] = MIN2(c->temp_start[i],
279bf215546Sopenharmony_ci                                                        block->start_ip);
280bf215546Sopenharmony_ci                                c->temp_end[i] = MAX2(c->temp_end[i],
281bf215546Sopenharmony_ci                                                      block->start_ip);
282bf215546Sopenharmony_ci                        }
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci                        if (BITSET_TEST(block->live_out, i) &&
285bf215546Sopenharmony_ci                            BITSET_TEST(block->defout, i)) {
286bf215546Sopenharmony_ci                                c->temp_start[i] = MIN2(c->temp_start[i],
287bf215546Sopenharmony_ci                                                        block->end_ip);
288bf215546Sopenharmony_ci                                c->temp_end[i] = MAX2(c->temp_end[i],
289bf215546Sopenharmony_ci                                                      block->end_ip);
290bf215546Sopenharmony_ci                        }
291bf215546Sopenharmony_ci                }
292bf215546Sopenharmony_ci        }
293bf215546Sopenharmony_ci}
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_civoid
296bf215546Sopenharmony_civir_calculate_live_intervals(struct v3d_compile *c)
297bf215546Sopenharmony_ci{
298bf215546Sopenharmony_ci        int bitset_words = BITSET_WORDS(c->num_temps);
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci        /* We may be called more than once if we've rearranged the program to
301bf215546Sopenharmony_ci         * try to get register allocation to succeed.
302bf215546Sopenharmony_ci         */
303bf215546Sopenharmony_ci        if (c->temp_start) {
304bf215546Sopenharmony_ci                ralloc_free(c->temp_start);
305bf215546Sopenharmony_ci                ralloc_free(c->temp_end);
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci                vir_for_each_block(block, c) {
308bf215546Sopenharmony_ci                        ralloc_free(block->def);
309bf215546Sopenharmony_ci                        ralloc_free(block->use);
310bf215546Sopenharmony_ci                        ralloc_free(block->live_in);
311bf215546Sopenharmony_ci                        ralloc_free(block->live_out);
312bf215546Sopenharmony_ci                }
313bf215546Sopenharmony_ci        }
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci        c->temp_start = rzalloc_array(c, int, c->num_temps);
316bf215546Sopenharmony_ci        c->temp_end = rzalloc_array(c, int, c->num_temps);
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci        for (int i = 0; i < c->num_temps; i++) {
319bf215546Sopenharmony_ci                c->temp_start[i] = MAX_INSTRUCTION;
320bf215546Sopenharmony_ci                c->temp_end[i] = -1;
321bf215546Sopenharmony_ci        }
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci        vir_for_each_block(block, c) {
324bf215546Sopenharmony_ci                block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
325bf215546Sopenharmony_ci                block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);
326bf215546Sopenharmony_ci                block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);
327bf215546Sopenharmony_ci                block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
328bf215546Sopenharmony_ci                block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
329bf215546Sopenharmony_ci                block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
330bf215546Sopenharmony_ci        }
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci        vir_setup_def_use(c);
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci        while (vir_live_variables_dataflow(c, bitset_words))
335bf215546Sopenharmony_ci                ;
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci        while (vir_live_variables_defin_defout_dataflow(c, bitset_words))
338bf215546Sopenharmony_ci                ;
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci        vir_compute_start_end(c, c->num_temps);
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci        c->live_intervals_valid = true;
343bf215546Sopenharmony_ci}
344