1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2022 Collabora Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "va_compiler.h"
25bf215546Sopenharmony_ci#include "valhall_enums.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci/*
28bf215546Sopenharmony_ci * Valhall sources may marked as the last use of a register, according
29bf215546Sopenharmony_ci * to the following rules:
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci * 1. The last use of a register should be marked allowing the hardware
32bf215546Sopenharmony_ci *    to elide register writes.
33bf215546Sopenharmony_ci * 2. Staging sources may be read at any time before the asynchronous
34bf215546Sopenharmony_ci *    instruction completes. If a register is used as both a staging source and
35bf215546Sopenharmony_ci *    a regular source, the regular source cannot be marked until the program
36bf215546Sopenharmony_ci *    waits for the asynchronous instruction.
37bf215546Sopenharmony_ci * 3. Marking a register pair marks both registers in the pair.
38bf215546Sopenharmony_ci *
39bf215546Sopenharmony_ci * Last use information follows immediately from (post-RA) liveness analysis:
40bf215546Sopenharmony_ci * a register is dead immediately after its last use.
41bf215546Sopenharmony_ci *
42bf215546Sopenharmony_ci * Staging information follows from scoreboard analysis: do not mark registers
43bf215546Sopenharmony_ci * that are read by a pending asynchronous instruction. Note that the Valhall
44bf215546Sopenharmony_ci * scoreboard analysis does not track reads, so we handle that with our own
45bf215546Sopenharmony_ci * (simplified) scoreboard analysis.
46bf215546Sopenharmony_ci *
47bf215546Sopenharmony_ci * Register pairs are marked conservatively: if either register in a pair cannot
48bf215546Sopenharmony_ci * be marked, do not mark either register.
49bf215546Sopenharmony_ci */
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_cistatic uint64_t
52bf215546Sopenharmony_cibi_staging_read_mask(const bi_instr *I)
53bf215546Sopenharmony_ci{
54bf215546Sopenharmony_ci   uint64_t mask = 0;
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci   bi_foreach_src(I, s) {
57bf215546Sopenharmony_ci      if (bi_is_staging_src(I, s) && !bi_is_null(I->src[s])) {
58bf215546Sopenharmony_ci         assert(I->src[s].type == BI_INDEX_REGISTER);
59bf215546Sopenharmony_ci         unsigned reg = I->src[s].value;
60bf215546Sopenharmony_ci         unsigned count = bi_count_read_registers(I, s);
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci         mask |= (BITFIELD64_MASK(count) << reg);
63bf215546Sopenharmony_ci      }
64bf215546Sopenharmony_ci   }
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci   return mask;
67bf215546Sopenharmony_ci}
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_cistatic bool
70bf215546Sopenharmony_cibi_writes_reg(const bi_instr *I, unsigned reg)
71bf215546Sopenharmony_ci{
72bf215546Sopenharmony_ci   bi_foreach_dest(I, d) {
73bf215546Sopenharmony_ci      if (bi_is_null(I->dest[d]))
74bf215546Sopenharmony_ci         continue;
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci      assert(I->dest[d].type == BI_INDEX_REGISTER);
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci      unsigned count = bi_count_write_registers(I, d);
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci      if (reg >= I->dest[d].value && (reg - I->dest[d].value) < count)
81bf215546Sopenharmony_ci         return true;
82bf215546Sopenharmony_ci   }
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci   return false;
85bf215546Sopenharmony_ci}
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_cistatic unsigned
88bf215546Sopenharmony_ciwaits_on_slot(enum va_flow flow, unsigned slot)
89bf215546Sopenharmony_ci{
90bf215546Sopenharmony_ci   return (flow == VA_FLOW_WAIT) || (flow == VA_FLOW_WAIT0126) ||
91bf215546Sopenharmony_ci          (va_flow_is_wait_or_none(flow) && (flow & BITFIELD_BIT(slot)));
92bf215546Sopenharmony_ci}
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_cistatic void
95bf215546Sopenharmony_ciscoreboard_update(struct bi_scoreboard_state *st, const bi_instr *I)
96bf215546Sopenharmony_ci{
97bf215546Sopenharmony_ci   /* Mark read staging registers */
98bf215546Sopenharmony_ci   st->read[I->slot] |= bi_staging_read_mask(I);
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci   /* Unmark registers after they are waited on */
101bf215546Sopenharmony_ci   for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) {
102bf215546Sopenharmony_ci      if (waits_on_slot(I->flow, i))
103bf215546Sopenharmony_ci            st->read[i] = 0;
104bf215546Sopenharmony_ci   }
105bf215546Sopenharmony_ci}
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_cistatic void
108bf215546Sopenharmony_civa_analyze_scoreboard_reads(bi_context *ctx)
109bf215546Sopenharmony_ci{
110bf215546Sopenharmony_ci   u_worklist worklist;
111bf215546Sopenharmony_ci   bi_worklist_init(ctx, &worklist);
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   bi_foreach_block(ctx, block) {
114bf215546Sopenharmony_ci      bi_worklist_push_tail(&worklist, block);
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci      /* Reset analysis from previous pass */
117bf215546Sopenharmony_ci      block->scoreboard_in = (struct bi_scoreboard_state){ 0 };
118bf215546Sopenharmony_ci      block->scoreboard_out = (struct bi_scoreboard_state){ 0 };
119bf215546Sopenharmony_ci   }
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci   /* Perform forward data flow analysis to calculate dependencies */
122bf215546Sopenharmony_ci   while (!u_worklist_is_empty(&worklist)) {
123bf215546Sopenharmony_ci      /* Pop from the front for forward analysis */
124bf215546Sopenharmony_ci      bi_block *blk = bi_worklist_pop_head(&worklist);
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci      bi_foreach_predecessor(blk, pred) {
127bf215546Sopenharmony_ci         for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i)
128bf215546Sopenharmony_ci            blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
129bf215546Sopenharmony_ci      }
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci      struct bi_scoreboard_state state = blk->scoreboard_in;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci      bi_foreach_instr_in_block(blk, I)
134bf215546Sopenharmony_ci         scoreboard_update(&state, I);
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci      /* If there was progress, reprocess successors */
137bf215546Sopenharmony_ci      if (memcmp(&state, &blk->scoreboard_out, sizeof(state)) != 0) {
138bf215546Sopenharmony_ci         bi_foreach_successor(blk, succ)
139bf215546Sopenharmony_ci            bi_worklist_push_tail(&worklist, succ);
140bf215546Sopenharmony_ci      }
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci      blk->scoreboard_out = state;
143bf215546Sopenharmony_ci   }
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci   u_worklist_fini(&worklist);
146bf215546Sopenharmony_ci}
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_civoid
149bf215546Sopenharmony_civa_mark_last(bi_context *ctx)
150bf215546Sopenharmony_ci{
151bf215546Sopenharmony_ci   /* Analyze the shader globally */
152bf215546Sopenharmony_ci   bi_postra_liveness(ctx);
153bf215546Sopenharmony_ci   va_analyze_scoreboard_reads(ctx);
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   bi_foreach_block(ctx, block) {
156bf215546Sopenharmony_ci      uint64_t live = block->reg_live_out;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci      /* Mark all last uses */
159bf215546Sopenharmony_ci      bi_foreach_instr_in_block_rev(block, I) {
160bf215546Sopenharmony_ci         bi_foreach_src(I, s) {
161bf215546Sopenharmony_ci            if (I->src[s].type != BI_INDEX_REGISTER)
162bf215546Sopenharmony_ci               continue;
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci            unsigned nr = bi_count_read_registers(I, s);
165bf215546Sopenharmony_ci            uint64_t mask = BITFIELD64_MASK(nr) << I->src[s].value;
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci            /* If the register dead after this instruction, it's the last use */
168bf215546Sopenharmony_ci            I->src[s].discard = (live & mask) == 0;
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci            /* If the register is overwritten this cycle, it is implicitly
171bf215546Sopenharmony_ci             * discarded, but that won't show up in the liveness analysis.
172bf215546Sopenharmony_ci             */
173bf215546Sopenharmony_ci            I->src[s].discard |= bi_writes_reg(I, I->src[s].value);
174bf215546Sopenharmony_ci         }
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci         live = bi_postra_liveness_ins(live, I);
177bf215546Sopenharmony_ci      }
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci      struct bi_scoreboard_state st = block->scoreboard_in;
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci      bi_foreach_instr_in_block(block, I) {
182bf215546Sopenharmony_ci         /* Unmark registers read by a pending async instruction */
183bf215546Sopenharmony_ci         bi_foreach_src(I, s) {
184bf215546Sopenharmony_ci            if (!I->src[s].discard)
185bf215546Sopenharmony_ci               continue;
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci            assert(I->src[s].type == BI_INDEX_REGISTER);
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci            uint64_t pending_regs = st.read[0] | st.read[1] | st.read[2];
190bf215546Sopenharmony_ci            bool pending = (pending_regs & BITFIELD64_BIT(I->src[s].value));
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci            if (bi_is_staging_src(I, s) || pending)
193bf215546Sopenharmony_ci               I->src[s].discard = false;
194bf215546Sopenharmony_ci         }
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci         /* Unmark register pairs where one half must be preserved */
197bf215546Sopenharmony_ci         bi_foreach_src(I, s) {
198bf215546Sopenharmony_ci            /* Only look for "real" architectural registers */
199bf215546Sopenharmony_ci            if (s >= 3)
200bf215546Sopenharmony_ci               break;
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci            if (va_src_info(I->op, s).size == VA_SIZE_64) {
203bf215546Sopenharmony_ci               bool both_discard = I->src[s].discard && I->src[s + 1].discard;
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci               I->src[s + 0].discard = both_discard;
206bf215546Sopenharmony_ci               I->src[s + 1].discard = both_discard;
207bf215546Sopenharmony_ci            }
208bf215546Sopenharmony_ci         }
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci         scoreboard_update(&st, I);
211bf215546Sopenharmony_ci      }
212bf215546Sopenharmony_ci   }
213bf215546Sopenharmony_ci}
214