1/*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#define MAX_INSTRUCTION (1 << 30)
26
27#include "util/ralloc.h"
28#include "util/register_allocate.h"
29#include "v3d_compiler.h"
30
31/* Keeps track of conditional / partial writes in a block */
32struct partial_update_state {
33        /* Instruction doing a conditional or partial write */
34        struct qinst *inst;
35        /* Instruction that set the flags for the conditional write */
36        struct qinst *flags_inst;
37};
38
39static int
40vir_reg_to_var(struct qreg reg)
41{
42        if (reg.file == QFILE_TEMP)
43                return reg.index;
44
45        return -1;
46}
47
48static void
49vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
50              struct partial_update_state *partial_update_ht, struct qinst *inst,
51              struct qreg src, struct qinst *flags_inst)
52{
53        int var = vir_reg_to_var(src);
54        if (var == -1)
55                return;
56
57        c->temp_start[var] = MIN2(c->temp_start[var], ip);
58        c->temp_end[var] = MAX2(c->temp_end[var], ip);
59
60        /* The use[] bitset marks when the block makes
61         * use of a variable without having completely
62         * defined that variable within the block.
63         */
64        if (!BITSET_TEST(block->def, var)) {
65                /* If this use of var is conditional and the condition
66                 * and flags match those of a previous instruction
67                 * in the same block partially defining var then we
68                 * consider var completely defined within the block.
69                 */
70                if (BITSET_TEST(block->defout, var)) {
71                        struct partial_update_state *state =
72                                &partial_update_ht[var];
73                        if (state->inst) {
74                                if (vir_get_cond(inst) == vir_get_cond(state->inst) &&
75                                    flags_inst == state->flags_inst) {
76                                        return;
77                                }
78                        }
79                }
80
81                BITSET_SET(block->use, var);
82        }
83}
84
85/* The def[] bitset marks when an initialization in a
86 * block completely screens off previous updates of
87 * that variable.
88 */
89static void
90vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
91              struct partial_update_state *partial_update, struct qinst *inst,
92              struct qinst *flags_inst)
93{
94        if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
95                return;
96
97        int var = vir_reg_to_var(inst->dst);
98        if (var == -1)
99                return;
100
101        c->temp_start[var] = MIN2(c->temp_start[var], ip);
102        c->temp_end[var] = MAX2(c->temp_end[var], ip);
103
104        /* Mark the block as having a (partial) def of the var. */
105        BITSET_SET(block->defout, var);
106
107        /* If we've already tracked this as a def that screens off previous
108         * uses, or already used it within the block, there's nothing to do.
109         */
110        if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
111                return;
112
113        /* Easy, common case: unconditional full register update.*/
114        if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
115             inst->qpu.flags.mc == V3D_QPU_COND_NONE) &&
116            inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
117            inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
118                BITSET_SET(block->def, var);
119                return;
120        }
121
122        /* Keep track of conditional writes.
123         *
124         * Notice that the dst's live range for a conditional or partial writes
125         * will get extended up the control flow to the top of the program until
126         * we find a full write, making register allocation more difficult, so
127         * we should try our best to keep track of these and figure out if a
128         * combination of them actually writes the entire register so we can
129         * stop that process early and reduce liveness.
130         *
131         * FIXME: Track partial updates via pack/unpack.
132         */
133        struct partial_update_state *state = &partial_update[var];
134        if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
135            inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
136                state->inst = inst;
137                state->flags_inst = flags_inst;
138        }
139}
140
141/* Sets up the def/use arrays for when variables are used-before-defined or
142 * defined-before-used in the block.
143 *
144 * Also initializes the temp_start/temp_end to cover just the instruction IPs
145 * where the variable is used, which will be extended later in
146 * vir_compute_start_end().
147 */
148static void
149vir_setup_def_use(struct v3d_compile *c)
150{
151        struct partial_update_state *partial_update =
152                rzalloc_array(c, struct partial_update_state, c->num_temps);
153        int ip = 0;
154
155        vir_for_each_block(block, c) {
156                block->start_ip = ip;
157
158                memset(partial_update, 0,
159                       sizeof(struct partial_update_state) * c->num_temps);
160
161                struct qinst *flags_inst = NULL;
162
163                vir_for_each_inst(inst, block) {
164                        for (int i = 0; i < vir_get_nsrc(inst); i++) {
165                                vir_setup_use(c, block, ip, partial_update,
166                                              inst, inst->src[i], flags_inst);
167                        }
168
169                        vir_setup_def(c, block, ip, partial_update,
170                                      inst, flags_inst);
171
172                        if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
173                            inst->qpu.flags.mpf != V3D_QPU_PF_NONE) {
174                               flags_inst = inst;
175                        }
176
177                        if (inst->qpu.flags.auf != V3D_QPU_UF_NONE ||
178                            inst->qpu.flags.muf != V3D_QPU_UF_NONE) {
179                                flags_inst = NULL;
180                        }
181
182                        /* Payload registers: r0/1/2 contain W, centroid W,
183                         * and Z at program start.  Register allocation will
184                         * force their nodes to R0/1/2.
185                         */
186                        if (inst->src[0].file == QFILE_REG) {
187                                switch (inst->src[0].index) {
188                                case 0:
189                                case 1:
190                                case 2:
191                                        c->temp_start[inst->dst.index] = 0;
192                                        break;
193                                }
194                        }
195
196                        ip++;
197                }
198                block->end_ip = ip;
199        }
200
201        ralloc_free(partial_update);
202}
203
204static bool
205vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
206{
207        bool cont = false;
208
209        vir_for_each_block_rev(block, c) {
210                /* Update live_out: Any successor using the variable
211                 * on entrance needs us to have the variable live on
212                 * exit.
213                 */
214                vir_for_each_successor(succ, block) {
215                        for (int i = 0; i < bitset_words; i++) {
216                                BITSET_WORD new_live_out = (succ->live_in[i] &
217                                                            ~block->live_out[i]);
218                                if (new_live_out) {
219                                        block->live_out[i] |= new_live_out;
220                                        cont = true;
221                                }
222                        }
223                }
224
225                /* Update live_in */
226                for (int i = 0; i < bitset_words; i++) {
227                        BITSET_WORD new_live_in = (block->use[i] |
228                                                   (block->live_out[i] &
229                                                    ~block->def[i]));
230                        if (new_live_in & ~block->live_in[i]) {
231                                block->live_in[i] |= new_live_in;
232                                cont = true;
233                        }
234                }
235        }
236
237        return cont;
238}
239
240static bool
241vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)
242{
243        bool cont = false;
244
245        vir_for_each_block_rev(block, c) {
246                /* Propagate defin/defout down the successors to produce the
247                 * union of blocks with a reachable (partial) definition of
248                 * the var.
249                 *
250                 * This keeps a conditional first write to a reg from
251                 * extending its lifetime back to the start of the program.
252                 */
253                vir_for_each_successor(succ, block) {
254                        for (int i = 0; i < bitset_words; i++) {
255                                BITSET_WORD new_def = (block->defout[i] &
256                                                       ~succ->defin[i]);
257                                succ->defin[i] |= new_def;
258                                succ->defout[i] |= new_def;
259                                cont |= new_def;
260                        }
261                }
262        }
263
264        return cont;
265}
266
267/**
268 * Extend the start/end ranges for each variable to account for the
269 * new information calculated from control flow.
270 */
271static void
272vir_compute_start_end(struct v3d_compile *c, int num_vars)
273{
274        vir_for_each_block(block, c) {
275                for (int i = 0; i < num_vars; i++) {
276                        if (BITSET_TEST(block->live_in, i) &&
277                            BITSET_TEST(block->defin, i)) {
278                                c->temp_start[i] = MIN2(c->temp_start[i],
279                                                        block->start_ip);
280                                c->temp_end[i] = MAX2(c->temp_end[i],
281                                                      block->start_ip);
282                        }
283
284                        if (BITSET_TEST(block->live_out, i) &&
285                            BITSET_TEST(block->defout, i)) {
286                                c->temp_start[i] = MIN2(c->temp_start[i],
287                                                        block->end_ip);
288                                c->temp_end[i] = MAX2(c->temp_end[i],
289                                                      block->end_ip);
290                        }
291                }
292        }
293}
294
295void
296vir_calculate_live_intervals(struct v3d_compile *c)
297{
298        int bitset_words = BITSET_WORDS(c->num_temps);
299
300        /* We may be called more than once if we've rearranged the program to
301         * try to get register allocation to succeed.
302         */
303        if (c->temp_start) {
304                ralloc_free(c->temp_start);
305                ralloc_free(c->temp_end);
306
307                vir_for_each_block(block, c) {
308                        ralloc_free(block->def);
309                        ralloc_free(block->use);
310                        ralloc_free(block->live_in);
311                        ralloc_free(block->live_out);
312                }
313        }
314
315        c->temp_start = rzalloc_array(c, int, c->num_temps);
316        c->temp_end = rzalloc_array(c, int, c->num_temps);
317
318        for (int i = 0; i < c->num_temps; i++) {
319                c->temp_start[i] = MAX_INSTRUCTION;
320                c->temp_end[i] = -1;
321        }
322
323        vir_for_each_block(block, c) {
324                block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
325                block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);
326                block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);
327                block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
328                block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
329                block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
330        }
331
332        vir_setup_def_use(c);
333
334        while (vir_live_variables_dataflow(c, bitset_words))
335                ;
336
337        while (vir_live_variables_defin_defout_dataflow(c, bitset_words))
338                ;
339
340        vir_compute_start_end(c, c->num_temps);
341
342        c->live_intervals_valid = true;
343}
344