1/* 2 * Copyright © 2021 Raspberry Pi Ltd 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** 25 * @file v3d_opt_constant_alu.c 26 * 27 * Identified sequences of ALU instructions that operate on constant operands 28 * and reduces them to a uniform load. 29 * 30 * This is useful, for example, to optimize the result of removing leading 31 * ldunifa instructions in the DCE pass, which can leave a series of constant 32 * additions that increment the unifa address by 4 for each leading ldunif 33 * removed. It helps turn this: 34 * 35 * nop t1; ldunif (0x00000004 / 0.000000) 36 * nop t2; ldunif (0x00000004 / 0.000000) 37 * add t3, t1, t2 38 * 39 * into: 40 * 41 * nop t1; ldunif (0x00000004 / 0.000000) 42 * nop t2; ldunif (0x00000004 / 0.000000) 43 * nop t4; ldunif (0x00000008 / 0.000000) 44 * mov t3, t4 45 * 46 * For best results we want to run copy propagation in between this and 47 * the combine constants pass: every time we manage to convert an alu to 48 * a uniform load, we move the uniform to the original alu destination. By 49 * running copy propagation immediately after we can reuse the uniform as 50 * source in more follow-up alu instructions, making them constant and allowing 51 * this pass to continue making progress. However, if we run the small 52 * immediates optimization before that, that pass can convert some of the movs 53 * to use small immediates instead of the uniforms and prevent us from making 54 * the best of this pass, as small immediates don't get copy propagated. 55 */ 56 57#include "v3d_compiler.h" 58 59#include "util/half_float.h" 60#include "util/u_math.h" 61 62static bool 63opt_constant_add(struct v3d_compile *c, struct qinst *inst, union fi *values) 64{ 65 /* FIXME: handle more add operations */ 66 struct qreg unif = { }; 67 switch (inst->qpu.alu.add.op) { 68 case V3D_QPU_A_ADD: 69 c->cursor = vir_after_inst(inst); 70 unif = vir_uniform_ui(c, values[0].ui + values[1].ui); 71 break; 72 73 case V3D_QPU_A_VFPACK: { 74 assert(inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE); 75 76 const uint32_t packed = 77 (((uint32_t)_mesa_float_to_half(values[1].f)) << 16) | 78 _mesa_float_to_half(values[0].f); 79 80 c->cursor = vir_after_inst(inst); 81 unif = vir_uniform_ui(c, packed); 82 break; 83 } 84 85 default: 86 return false; 87 } 88 89 /* Remove the original ALU instruction and replace it with a uniform 90 * load. If the original instruction loaded an implicit uniform we 91 * need to replicate that in the new instruction. 92 */ 93 struct qreg dst = inst->dst; 94 struct qinst *mov = vir_MOV_dest(c, dst, unif); 95 mov->uniform = inst->uniform; 96 vir_remove_instruction(c, inst); 97 if (dst.file == QFILE_TEMP) 98 c->defs[dst.index] = mov; 99 return true; 100} 101 102static bool 103try_opt_constant_alu(struct v3d_compile *c, struct qinst *inst) 104{ 105 if(inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU) 106 return false; 107 108 /* If the instruction does anything other than writing the result 109 * directly to the destination, skip. 110 */ 111 if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE || 112 inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) { 113 return false; 114 } 115 116 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || 117 inst->qpu.flags.mc != V3D_QPU_COND_NONE) { 118 return false; 119 } 120 121 assert(vir_get_nsrc(inst) <= 2); 122 union fi values[2]; 123 for (int i = 0; i < vir_get_nsrc(inst); i++) { 124 if (inst->src[i].file == QFILE_SMALL_IMM && 125 v3d_qpu_small_imm_unpack(c->devinfo, 126 inst->qpu.raddr_b, 127 &values[i].ui)) { 128 continue; 129 } 130 131 if (inst->src[i].file == QFILE_TEMP) { 132 struct qinst *def = c->defs[inst->src[i].index]; 133 if (!def) 134 return false; 135 136 if ((def->qpu.sig.ldunif || def->qpu.sig.ldunifrf) && 137 c->uniform_contents[def->uniform] == QUNIFORM_CONSTANT) { 138 values[i].ui = c->uniform_data[def->uniform]; 139 continue; 140 } 141 } 142 143 return false; 144 } 145 146 /* FIXME: handle mul operations */ 147 if (vir_is_add(inst)) 148 return opt_constant_add(c, inst, values); 149 150 return false; 151} 152 153bool 154vir_opt_constant_alu(struct v3d_compile *c) 155{ 156 bool progress = false; 157 vir_for_each_block(block, c) { 158 c->cur_block = block; 159 vir_for_each_inst_safe(inst, block) { 160 progress = try_opt_constant_alu(c, inst) || progress; 161 } 162 } 163 164 return progress; 165} 166