1/*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "compiler/v3d_compiler.h"
25#include "qpu/qpu_instr.h"
26#include "qpu/qpu_disasm.h"
27
28static inline struct qpu_reg
29qpu_reg(int index)
30{
31        struct qpu_reg reg = {
32                .magic = false,
33                .index = index,
34        };
35        return reg;
36}
37
38static inline struct qpu_reg
39qpu_magic(enum v3d_qpu_waddr waddr)
40{
41        struct qpu_reg reg = {
42                .magic = true,
43                .index = waddr,
44        };
45        return reg;
46}
47
48struct v3d_qpu_instr
49v3d_qpu_nop(void)
50{
51        struct v3d_qpu_instr instr = {
52                .type = V3D_QPU_INSTR_TYPE_ALU,
53                .alu = {
54                        .add = {
55                                .op = V3D_QPU_A_NOP,
56                                .waddr = V3D_QPU_WADDR_NOP,
57                                .magic_write = true,
58                        },
59                        .mul = {
60                                .op = V3D_QPU_M_NOP,
61                                .waddr = V3D_QPU_WADDR_NOP,
62                                .magic_write = true,
63                        },
64                }
65        };
66
67        return instr;
68}
69
70static struct qinst *
71vir_nop(void)
72{
73        struct qreg undef = vir_nop_reg();
74        struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
75
76        return qinst;
77}
78
79static struct qinst *
80new_qpu_nop_before(struct qinst *inst)
81{
82        struct qinst *q = vir_nop();
83
84        list_addtail(&q->link, &inst->link);
85
86        return q;
87}
88
89/**
90 * Allocates the src register (accumulator or register file) into the RADDR
91 * fields of the instruction.
92 */
93static void
94set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
95{
96        if (src.smimm) {
97                assert(instr->sig.small_imm);
98                *mux = V3D_QPU_MUX_B;
99                return;
100        }
101
102        if (src.magic) {
103                assert(src.index >= V3D_QPU_WADDR_R0 &&
104                       src.index <= V3D_QPU_WADDR_R5);
105                *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
106                return;
107        }
108
109        if (instr->alu.add.a != V3D_QPU_MUX_A &&
110            instr->alu.add.b != V3D_QPU_MUX_A &&
111            instr->alu.mul.a != V3D_QPU_MUX_A &&
112            instr->alu.mul.b != V3D_QPU_MUX_A) {
113                instr->raddr_a = src.index;
114                *mux = V3D_QPU_MUX_A;
115        } else {
116                if (instr->raddr_a == src.index) {
117                        *mux = V3D_QPU_MUX_A;
118                } else {
119                        assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
120                                 instr->alu.add.b == V3D_QPU_MUX_B &&
121                                 instr->alu.mul.a == V3D_QPU_MUX_B &&
122                                 instr->alu.mul.b == V3D_QPU_MUX_B) ||
123                               src.index == instr->raddr_b);
124
125                        instr->raddr_b = src.index;
126                        *mux = V3D_QPU_MUX_B;
127                }
128        }
129}
130
131static bool
132is_no_op_mov(struct qinst *qinst)
133{
134        static const struct v3d_qpu_sig no_sig = {0};
135
136        /* Make sure it's just a lone MOV. */
137        if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
138            qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
139            qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
140            memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
141                return false;
142        }
143
144        /* Check if it's a MOV from a register to itself. */
145        enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
146        if (qinst->qpu.alu.mul.magic_write) {
147                if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
148                        return false;
149
150                if (qinst->qpu.alu.mul.a !=
151                    V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
152                        return false;
153                }
154        } else {
155                int raddr;
156
157                switch (qinst->qpu.alu.mul.a) {
158                case V3D_QPU_MUX_A:
159                        raddr = qinst->qpu.raddr_a;
160                        break;
161                case V3D_QPU_MUX_B:
162                        raddr = qinst->qpu.raddr_b;
163                        break;
164                default:
165                        return false;
166                }
167                if (raddr != waddr)
168                        return false;
169        }
170
171        /* No packing or flags updates, or we need to execute the
172         * instruction.
173         */
174        if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
175            qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
176            qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
177            qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
178            qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
179                return false;
180        }
181
182        return true;
183}
184
185static void
186v3d_generate_code_block(struct v3d_compile *c,
187                        struct qblock *block,
188                        struct qpu_reg *temp_registers)
189{
190        int last_vpm_read_index = -1;
191
192        vir_for_each_inst_safe(qinst, block) {
193#if 0
194                fprintf(stderr, "translating qinst to qpu: ");
195                vir_dump_inst(c, qinst);
196                fprintf(stderr, "\n");
197#endif
198
199                struct qinst *temp;
200
201                if (vir_has_uniform(qinst))
202                        c->num_uniforms++;
203
204                int nsrc = vir_get_nsrc(qinst);
205                struct qpu_reg src[ARRAY_SIZE(qinst->src)];
206                for (int i = 0; i < nsrc; i++) {
207                        int index = qinst->src[i].index;
208                        switch (qinst->src[i].file) {
209                        case QFILE_REG:
210                                src[i] = qpu_reg(qinst->src[i].index);
211                                break;
212                        case QFILE_MAGIC:
213                                src[i] = qpu_magic(qinst->src[i].index);
214                                break;
215                        case QFILE_NULL:
216                                /* QFILE_NULL is an undef, so we can load
217                                 * anything. Using reg 0
218                                 */
219                                src[i] = qpu_reg(0);
220                                break;
221                        case QFILE_LOAD_IMM:
222                                assert(!"not reached");
223                                break;
224                        case QFILE_TEMP:
225                                src[i] = temp_registers[index];
226                                break;
227                        case QFILE_SMALL_IMM:
228                                src[i].smimm = true;
229                                break;
230
231                        case QFILE_VPM:
232                                assert((int)qinst->src[i].index >=
233                                       last_vpm_read_index);
234                                (void)last_vpm_read_index;
235                                last_vpm_read_index = qinst->src[i].index;
236
237                                temp = new_qpu_nop_before(qinst);
238                                temp->qpu.sig.ldvpm = true;
239
240                                src[i] = qpu_magic(V3D_QPU_WADDR_R3);
241                                break;
242                        }
243                }
244
245                struct qpu_reg dst;
246                switch (qinst->dst.file) {
247                case QFILE_NULL:
248                        dst = qpu_magic(V3D_QPU_WADDR_NOP);
249                        break;
250
251                case QFILE_REG:
252                        dst = qpu_reg(qinst->dst.index);
253                        break;
254
255                case QFILE_MAGIC:
256                        dst = qpu_magic(qinst->dst.index);
257                        break;
258
259                case QFILE_TEMP:
260                        dst = temp_registers[qinst->dst.index];
261                        break;
262
263                case QFILE_VPM:
264                        dst = qpu_magic(V3D_QPU_WADDR_VPM);
265                        break;
266
267                case QFILE_SMALL_IMM:
268                case QFILE_LOAD_IMM:
269                        assert(!"not reached");
270                        break;
271                }
272
273                if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
274                        if (qinst->qpu.sig.ldunif || qinst->qpu.sig.ldunifa) {
275                                assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
276                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
277
278                                if (!dst.magic ||
279                                    dst.index != V3D_QPU_WADDR_R5) {
280                                        assert(c->devinfo->ver >= 40);
281
282                                        if (qinst->qpu.sig.ldunif) {
283                                           qinst->qpu.sig.ldunif = false;
284                                           qinst->qpu.sig.ldunifrf = true;
285                                        } else {
286                                           qinst->qpu.sig.ldunifa = false;
287                                           qinst->qpu.sig.ldunifarf = true;
288                                        }
289                                        qinst->qpu.sig_addr = dst.index;
290                                        qinst->qpu.sig_magic = dst.magic;
291                                }
292                        } else if (v3d_qpu_sig_writes_address(c->devinfo,
293                                                       &qinst->qpu.sig)) {
294                                assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
295                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
296
297                                qinst->qpu.sig_addr = dst.index;
298                                qinst->qpu.sig_magic = dst.magic;
299                        } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
300                                assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
301                                if (nsrc >= 1) {
302                                        set_src(&qinst->qpu,
303                                                &qinst->qpu.alu.add.a, src[0]);
304                                }
305                                if (nsrc >= 2) {
306                                        set_src(&qinst->qpu,
307                                                &qinst->qpu.alu.add.b, src[1]);
308                                }
309
310                                qinst->qpu.alu.add.waddr = dst.index;
311                                qinst->qpu.alu.add.magic_write = dst.magic;
312                        } else {
313                                if (nsrc >= 1) {
314                                        set_src(&qinst->qpu,
315                                                &qinst->qpu.alu.mul.a, src[0]);
316                                }
317                                if (nsrc >= 2) {
318                                        set_src(&qinst->qpu,
319                                                &qinst->qpu.alu.mul.b, src[1]);
320                                }
321
322                                qinst->qpu.alu.mul.waddr = dst.index;
323                                qinst->qpu.alu.mul.magic_write = dst.magic;
324
325                                if (is_no_op_mov(qinst)) {
326                                        vir_remove_instruction(c, qinst);
327                                        continue;
328                                }
329                        }
330                } else {
331                        assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
332                }
333        }
334}
335
336static bool
337reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
338{
339        struct v3d_qpu_instr qpu;
340        ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
341        assert(ok);
342
343        if (qpu.sig.ldunif ||
344            qpu.sig.ldunifrf ||
345            qpu.sig.ldtlbu ||
346            qpu.sig.wrtmuc) {
347                return true;
348        }
349
350        if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
351                return true;
352
353        if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
354                if (qpu.alu.add.magic_write &&
355                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
356                        return true;
357                }
358
359                if (qpu.alu.mul.magic_write &&
360                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
361                        return true;
362                }
363        }
364
365        return false;
366}
367
368static void
369v3d_dump_qpu(struct v3d_compile *c)
370{
371        fprintf(stderr, "%s prog %d/%d QPU:\n",
372                vir_get_stage_name(c),
373                c->program_id, c->variant_id);
374
375        int next_uniform = 0;
376        for (int i = 0; i < c->qpu_inst_count; i++) {
377                const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
378                fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
379
380                /* We can only do this on 4.x, because we're not tracking TMU
381                 * implicit uniforms here on 3.x.
382                 */
383                if (c->devinfo->ver >= 40 &&
384                    reads_uniform(c->devinfo, c->qpu_insts[i])) {
385                        fprintf(stderr, " (");
386                        vir_dump_uniform(c->uniform_contents[next_uniform],
387                                         c->uniform_data[next_uniform]);
388                        fprintf(stderr, ")");
389                        next_uniform++;
390                }
391                fprintf(stderr, "\n");
392                ralloc_free((void *)str);
393        }
394
395        /* Make sure our dumping lined up. */
396        if (c->devinfo->ver >= 40)
397                assert(next_uniform == c->num_uniforms);
398
399        fprintf(stderr, "\n");
400}
401
402void
403v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
404{
405        /* Reset the uniform count to how many will be actually loaded by the
406         * generated QPU code.
407         */
408        c->num_uniforms = 0;
409
410        vir_for_each_block(block, c)
411                v3d_generate_code_block(c, block, temp_registers);
412
413        v3d_qpu_schedule_instructions(c);
414
415        c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
416        int i = 0;
417        vir_for_each_inst_inorder(inst, c) {
418                bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
419                                             &c->qpu_insts[i++]);
420                if (!ok) {
421                        fprintf(stderr, "Failed to pack instruction %d:\n", i);
422                        vir_dump_inst(c, inst);
423                        fprintf(stderr, "\n");
424                        c->compilation_result = V3D_COMPILATION_FAILED;
425                        return;
426                }
427
428                if (v3d_qpu_is_nop(&inst->qpu))
429                        c->nop_count++;
430        }
431        assert(i == c->qpu_inst_count);
432
433        if (V3D_DEBUG & (V3D_DEBUG_QPU |
434                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
435                v3d_dump_qpu(c);
436        }
437
438        qpu_validate(c);
439
440        free(temp_registers);
441}
442