1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2019 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci/* Midgard has some accelerated support for perspective projection on the 25bf215546Sopenharmony_ci * load/store pipes. So the first perspective projection pass looks for 26bf215546Sopenharmony_ci * lowered/open-coded perspective projection of the form "fmul (A.xyz, 27bf215546Sopenharmony_ci * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native 28bf215546Sopenharmony_ci * perspective division opcode (on the load/store pipe). Caveats apply: the 29bf215546Sopenharmony_ci * frcp should be used only once to make this optimization worthwhile. And the 30bf215546Sopenharmony_ci * source of the frcp ought to be a varying to make it worthwhile... 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * The second pass in this file is a step #2 of sorts: fusing that load/store 33bf215546Sopenharmony_ci * projection into a varying load instruction (they can be done together 34bf215546Sopenharmony_ci * implicitly). This depends on the combination pass. Again caveat: the vary 35bf215546Sopenharmony_ci * should only be used once to make this worthwhile. 36bf215546Sopenharmony_ci */ 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#include "compiler.h" 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistatic bool 41bf215546Sopenharmony_ciis_swizzle_0(unsigned *swizzle) 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) 44bf215546Sopenharmony_ci if (swizzle[c]) 45bf215546Sopenharmony_ci return false; 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci return true; 48bf215546Sopenharmony_ci} 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_cibool 51bf215546Sopenharmony_cimidgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) 52bf215546Sopenharmony_ci{ 53bf215546Sopenharmony_ci bool progress = false; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci mir_foreach_instr_in_block_safe(block, ins) { 56bf215546Sopenharmony_ci /* First search for fmul */ 57bf215546Sopenharmony_ci if (ins->type != TAG_ALU_4) continue; 58bf215546Sopenharmony_ci if (ins->op != midgard_alu_op_fmul) continue; 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci /* TODO: Flip */ 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci /* Check the swizzles */ 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask)) continue; 65bf215546Sopenharmony_ci if (!is_swizzle_0(ins->swizzle[1])) continue; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci /* Awesome, we're the right form. Now check where src2 is from */ 68bf215546Sopenharmony_ci unsigned frcp = ins->src[1]; 69bf215546Sopenharmony_ci unsigned to = ins->dest; 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci if (frcp & PAN_IS_REG) continue; 72bf215546Sopenharmony_ci if (to & PAN_IS_REG) continue; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci bool frcp_found = false; 75bf215546Sopenharmony_ci unsigned frcp_component = 0; 76bf215546Sopenharmony_ci unsigned frcp_from = 0; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci mir_foreach_instr_in_block_safe(block, sub) { 79bf215546Sopenharmony_ci if (sub->dest != frcp) continue; 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci frcp_component = sub->swizzle[0][0]; 82bf215546Sopenharmony_ci frcp_from = sub->src[0]; 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci frcp_found = 85bf215546Sopenharmony_ci (sub->type == TAG_ALU_4) && 86bf215546Sopenharmony_ci (sub->op == midgard_alu_op_frcp); 87bf215546Sopenharmony_ci break; 88bf215546Sopenharmony_ci } 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci if (!frcp_found) continue; 91bf215546Sopenharmony_ci if (frcp_from != ins->src[0]) continue; 92bf215546Sopenharmony_ci if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue; 93bf215546Sopenharmony_ci if (!mir_single_use(ctx, frcp)) continue; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci /* Heuristic: check if the frcp is from a single-use varying */ 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci bool ok = false; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci /* One for frcp and one for fmul */ 100bf215546Sopenharmony_ci if (mir_use_count(ctx, frcp_from) > 2) continue; 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci mir_foreach_instr_in_block_safe(block, v) { 103bf215546Sopenharmony_ci if (v->dest != frcp_from) continue; 104bf215546Sopenharmony_ci if (v->type != TAG_LOAD_STORE_4) break; 105bf215546Sopenharmony_ci if (!OP_IS_LOAD_VARY_F(v->op)) break; 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci ok = true; 108bf215546Sopenharmony_ci break; 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci if (!ok) 112bf215546Sopenharmony_ci continue; 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci /* Nice, we got the form spot on. Let's convert! */ 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci midgard_instruction accel = { 117bf215546Sopenharmony_ci .type = TAG_LOAD_STORE_4, 118bf215546Sopenharmony_ci .mask = ins->mask, 119bf215546Sopenharmony_ci .dest = to, 120bf215546Sopenharmony_ci .dest_type = nir_type_float32, 121bf215546Sopenharmony_ci .src = { frcp_from, ~0, ~0, ~0 }, 122bf215546Sopenharmony_ci .src_types = { nir_type_float32 }, 123bf215546Sopenharmony_ci .swizzle = SWIZZLE_IDENTITY_4, 124bf215546Sopenharmony_ci .op = frcp_component == COMPONENT_W ? 125bf215546Sopenharmony_ci midgard_op_ldst_perspective_div_w : 126bf215546Sopenharmony_ci midgard_op_ldst_perspective_div_z, 127bf215546Sopenharmony_ci .load_store = { 128bf215546Sopenharmony_ci .bitsize_toggle = true, 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci }; 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci mir_insert_instruction_before(ctx, ins, accel); 133bf215546Sopenharmony_ci mir_remove_instruction(ins); 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci progress |= true; 136bf215546Sopenharmony_ci } 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci return progress; 139bf215546Sopenharmony_ci} 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_cibool 142bf215546Sopenharmony_cimidgard_opt_varying_projection(compiler_context *ctx, midgard_block *block) 143bf215546Sopenharmony_ci{ 144bf215546Sopenharmony_ci bool progress = false; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci mir_foreach_instr_in_block_safe(block, ins) { 147bf215546Sopenharmony_ci /* Search for a projection */ 148bf215546Sopenharmony_ci if (ins->type != TAG_LOAD_STORE_4) continue; 149bf215546Sopenharmony_ci if (!OP_IS_PROJECTION(ins->op)) continue; 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci unsigned vary = ins->src[0]; 152bf215546Sopenharmony_ci unsigned to = ins->dest; 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci if (vary & PAN_IS_REG) continue; 155bf215546Sopenharmony_ci if (to & PAN_IS_REG) continue; 156bf215546Sopenharmony_ci if (!mir_single_use(ctx, vary)) continue; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci /* Check for a varying source. If we find it, we rewrite */ 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci bool rewritten = false; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci mir_foreach_instr_in_block_safe(block, v) { 163bf215546Sopenharmony_ci if (v->dest != vary) continue; 164bf215546Sopenharmony_ci if (v->type != TAG_LOAD_STORE_4) break; 165bf215546Sopenharmony_ci if (!OP_IS_LOAD_VARY_F(v->op)) break; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* We found it, so rewrite it to project. Grab the 168bf215546Sopenharmony_ci * modifier */ 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci midgard_varying_params p = 171bf215546Sopenharmony_ci midgard_unpack_varying_params(v->load_store); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci if (p.modifier != midgard_varying_mod_none) 174bf215546Sopenharmony_ci break; 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci bool projects_w = 177bf215546Sopenharmony_ci ins->op == midgard_op_ldst_perspective_div_w; 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci p.modifier = projects_w ? 180bf215546Sopenharmony_ci midgard_varying_mod_perspective_w : 181bf215546Sopenharmony_ci midgard_varying_mod_perspective_z; 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci midgard_pack_varying_params(&v->load_store, p); 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci /* Use the new destination */ 186bf215546Sopenharmony_ci v->dest = to; 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci rewritten = true; 189bf215546Sopenharmony_ci break; 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci if (rewritten) 193bf215546Sopenharmony_ci mir_remove_instruction(ins); 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci progress |= rewritten; 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci return progress; 199bf215546Sopenharmony_ci} 200