1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2019 Collabora, Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci/* Midgard has some accelerated support for perspective projection on the
25bf215546Sopenharmony_ci * load/store pipes. So the first perspective projection pass looks for
26bf215546Sopenharmony_ci * lowered/open-coded perspective projection of the form "fmul (A.xyz,
27bf215546Sopenharmony_ci * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native
28bf215546Sopenharmony_ci * perspective division opcode (on the load/store pipe). Caveats apply: the
29bf215546Sopenharmony_ci * frcp should be used only once to make this optimization worthwhile. And the
30bf215546Sopenharmony_ci * source of the frcp ought to be a varying to make it worthwhile...
31bf215546Sopenharmony_ci *
32bf215546Sopenharmony_ci * The second pass in this file is a step #2 of sorts: fusing that load/store
33bf215546Sopenharmony_ci * projection into a varying load instruction (they can be done together
34bf215546Sopenharmony_ci * implicitly). This depends on the combination pass. Again caveat: the vary
35bf215546Sopenharmony_ci * should only be used once to make this worthwhile.
36bf215546Sopenharmony_ci */
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci#include "compiler.h"
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic bool
41bf215546Sopenharmony_ciis_swizzle_0(unsigned *swizzle)
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci        for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
44bf215546Sopenharmony_ci                if (swizzle[c])
45bf215546Sopenharmony_ci                        return false;
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci        return true;
48bf215546Sopenharmony_ci}
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_cibool
51bf215546Sopenharmony_cimidgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
52bf215546Sopenharmony_ci{
53bf215546Sopenharmony_ci        bool progress = false;
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci        mir_foreach_instr_in_block_safe(block, ins) {
56bf215546Sopenharmony_ci                /* First search for fmul */
57bf215546Sopenharmony_ci                if (ins->type != TAG_ALU_4) continue;
58bf215546Sopenharmony_ci                if (ins->op != midgard_alu_op_fmul) continue;
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci                /* TODO: Flip */
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci                /* Check the swizzles */
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci                if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask)) continue;
65bf215546Sopenharmony_ci                if (!is_swizzle_0(ins->swizzle[1])) continue;
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci                /* Awesome, we're the right form. Now check where src2 is from */
68bf215546Sopenharmony_ci                unsigned frcp = ins->src[1];
69bf215546Sopenharmony_ci                unsigned to = ins->dest;
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci                if (frcp & PAN_IS_REG) continue;
72bf215546Sopenharmony_ci                if (to & PAN_IS_REG) continue;
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci                bool frcp_found = false;
75bf215546Sopenharmony_ci                unsigned frcp_component = 0;
76bf215546Sopenharmony_ci                unsigned frcp_from = 0;
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci                mir_foreach_instr_in_block_safe(block, sub) {
79bf215546Sopenharmony_ci                        if (sub->dest != frcp) continue;
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci                        frcp_component = sub->swizzle[0][0];
82bf215546Sopenharmony_ci                        frcp_from = sub->src[0];
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci                        frcp_found =
85bf215546Sopenharmony_ci                                (sub->type == TAG_ALU_4) &&
86bf215546Sopenharmony_ci                                (sub->op == midgard_alu_op_frcp);
87bf215546Sopenharmony_ci                        break;
88bf215546Sopenharmony_ci                }
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci                if (!frcp_found) continue;
91bf215546Sopenharmony_ci                if (frcp_from != ins->src[0]) continue;
92bf215546Sopenharmony_ci                if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue;
93bf215546Sopenharmony_ci                if (!mir_single_use(ctx, frcp)) continue;
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci                /* Heuristic: check if the frcp is from a single-use varying */
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci                bool ok = false;
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci                /* One for frcp and one for fmul */
100bf215546Sopenharmony_ci                if (mir_use_count(ctx, frcp_from) > 2) continue;
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci                mir_foreach_instr_in_block_safe(block, v) {
103bf215546Sopenharmony_ci                        if (v->dest != frcp_from) continue;
104bf215546Sopenharmony_ci                        if (v->type != TAG_LOAD_STORE_4) break;
105bf215546Sopenharmony_ci                        if (!OP_IS_LOAD_VARY_F(v->op)) break;
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci                        ok = true;
108bf215546Sopenharmony_ci                        break;
109bf215546Sopenharmony_ci                }
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci                if (!ok)
112bf215546Sopenharmony_ci                        continue;
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci                /* Nice, we got the form spot on. Let's convert! */
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci                midgard_instruction accel = {
117bf215546Sopenharmony_ci                        .type = TAG_LOAD_STORE_4,
118bf215546Sopenharmony_ci                        .mask = ins->mask,
119bf215546Sopenharmony_ci                        .dest = to,
120bf215546Sopenharmony_ci                        .dest_type = nir_type_float32,
121bf215546Sopenharmony_ci                        .src = { frcp_from, ~0, ~0, ~0 },
122bf215546Sopenharmony_ci                        .src_types = { nir_type_float32 },
123bf215546Sopenharmony_ci                        .swizzle = SWIZZLE_IDENTITY_4,
124bf215546Sopenharmony_ci                        .op = frcp_component == COMPONENT_W ?
125bf215546Sopenharmony_ci                                midgard_op_ldst_perspective_div_w :
126bf215546Sopenharmony_ci                                midgard_op_ldst_perspective_div_z,
127bf215546Sopenharmony_ci                        .load_store = {
128bf215546Sopenharmony_ci                                .bitsize_toggle = true,
129bf215546Sopenharmony_ci                        }
130bf215546Sopenharmony_ci                };
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci                mir_insert_instruction_before(ctx, ins, accel);
133bf215546Sopenharmony_ci                mir_remove_instruction(ins);
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci                progress |= true;
136bf215546Sopenharmony_ci        }
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci        return progress;
139bf215546Sopenharmony_ci}
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_cibool
142bf215546Sopenharmony_cimidgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
143bf215546Sopenharmony_ci{
144bf215546Sopenharmony_ci        bool progress = false;
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci        mir_foreach_instr_in_block_safe(block, ins) {
147bf215546Sopenharmony_ci                /* Search for a projection */
148bf215546Sopenharmony_ci                if (ins->type != TAG_LOAD_STORE_4) continue;
149bf215546Sopenharmony_ci                if (!OP_IS_PROJECTION(ins->op)) continue;
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci                unsigned vary = ins->src[0];
152bf215546Sopenharmony_ci                unsigned to = ins->dest;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci                if (vary & PAN_IS_REG) continue;
155bf215546Sopenharmony_ci                if (to & PAN_IS_REG) continue;
156bf215546Sopenharmony_ci                if (!mir_single_use(ctx, vary)) continue;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci                /* Check for a varying source. If we find it, we rewrite */
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci                bool rewritten = false;
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci                mir_foreach_instr_in_block_safe(block, v) {
163bf215546Sopenharmony_ci                        if (v->dest != vary) continue;
164bf215546Sopenharmony_ci                        if (v->type != TAG_LOAD_STORE_4) break;
165bf215546Sopenharmony_ci                        if (!OP_IS_LOAD_VARY_F(v->op)) break;
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci                        /* We found it, so rewrite it to project. Grab the
168bf215546Sopenharmony_ci                         * modifier */
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci                        midgard_varying_params p =
171bf215546Sopenharmony_ci                                midgard_unpack_varying_params(v->load_store);
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci                        if (p.modifier != midgard_varying_mod_none)
174bf215546Sopenharmony_ci                                break;
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci                        bool projects_w =
177bf215546Sopenharmony_ci                                ins->op == midgard_op_ldst_perspective_div_w;
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci                        p.modifier = projects_w ?
180bf215546Sopenharmony_ci                                midgard_varying_mod_perspective_w :
181bf215546Sopenharmony_ci                                midgard_varying_mod_perspective_z;
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci                        midgard_pack_varying_params(&v->load_store, p);
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci                        /* Use the new destination */
186bf215546Sopenharmony_ci                        v->dest = to;
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci                        rewritten = true;
189bf215546Sopenharmony_ci                        break;
190bf215546Sopenharmony_ci                }
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci                if (rewritten)
193bf215546Sopenharmony_ci                        mir_remove_instruction(ins);
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci                progress |= rewritten;
196bf215546Sopenharmony_ci        }
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci        return progress;
199bf215546Sopenharmony_ci}
200