1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "ir3_ra.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci/* The spilling pass leaves out a few details required to successfully operate
27bf215546Sopenharmony_ci * ldp/stp:
28bf215546Sopenharmony_ci *
29bf215546Sopenharmony_ci * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
30bf215546Sopenharmony_ci *    that and just spills/restores entire values, including arrays and values
31bf215546Sopenharmony_ci *    created for texture setup which can be more than 4 components.
32bf215546Sopenharmony_ci * 2. The immediate offset only has 13 bits and is signed, so if we spill a lot
33bf215546Sopenharmony_ci *    or have very large arrays before spilling then we could run out.
34bf215546Sopenharmony_ci * 3. The spiller doesn't add barrier dependencies needed for post-RA
35bf215546Sopenharmony_ci *    scheduling.
36bf215546Sopenharmony_ci *
37bf215546Sopenharmony_ci * The first one, in particular, is much easier to handle after RA because
38bf215546Sopenharmony_ci * arrays and normal values can be treated the same way. Therefore this pass
39bf215546Sopenharmony_ci * runs after RA, and handles all three issues. This keeps the complexity out of
40bf215546Sopenharmony_ci * the spiller.
41bf215546Sopenharmony_ci */
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_cistatic unsigned
44bf215546Sopenharmony_cicomponent_bytes(struct ir3_register *src)
45bf215546Sopenharmony_ci{
46bf215546Sopenharmony_ci   return (src->flags & IR3_REG_HALF) ? 2 : 4;
47bf215546Sopenharmony_ci}
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci/* Note: this won't work if the base register is anything other than 0!
50bf215546Sopenharmony_ci * Dynamic bases, which we'll need for "real" function call support, will
51bf215546Sopenharmony_ci * probably be a lot harder to handle and may require reserving another
52bf215546Sopenharmony_ci * register.
53bf215546Sopenharmony_ci */
54bf215546Sopenharmony_cistatic void
55bf215546Sopenharmony_ciset_base_reg(struct ir3_instruction *mem, unsigned val)
56bf215546Sopenharmony_ci{
57bf215546Sopenharmony_ci   struct ir3_instruction *mov = ir3_instr_create(mem->block, OPC_MOV, 1, 1);
58bf215546Sopenharmony_ci   ir3_dst_create(mov, mem->srcs[0]->num, mem->srcs[0]->flags);
59bf215546Sopenharmony_ci   ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = val;
60bf215546Sopenharmony_ci   mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci   ir3_instr_move_before(mov, mem);
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_cistatic void
66bf215546Sopenharmony_cireset_base_reg(struct ir3_instruction *mem)
67bf215546Sopenharmony_ci{
68bf215546Sopenharmony_ci   /* If the base register is killed, then we don't need to clobber it and it
69bf215546Sopenharmony_ci    * may be reused as a destination so we can't always clobber it after the
70bf215546Sopenharmony_ci    * instruction anyway.
71bf215546Sopenharmony_ci    */
72bf215546Sopenharmony_ci   struct ir3_register *base = mem->srcs[0];
73bf215546Sopenharmony_ci   if (base->flags & IR3_REG_KILL)
74bf215546Sopenharmony_ci      return;
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci   struct ir3_instruction *mov = ir3_instr_create(mem->block, OPC_MOV, 1, 1);
77bf215546Sopenharmony_ci   ir3_dst_create(mov, base->num, base->flags);
78bf215546Sopenharmony_ci   ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = 0;
79bf215546Sopenharmony_ci   mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci   ir3_instr_move_after(mov, mem);
82bf215546Sopenharmony_ci}
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci/* There are 13 bits, but 1 << 12 will be sign-extended into a negative offset
85bf215546Sopenharmony_ci * so it can't be used directly. Therefore only offsets under 1 << 12 can be
86bf215546Sopenharmony_ci * used without any adjustments.
87bf215546Sopenharmony_ci */
88bf215546Sopenharmony_ci#define MAX_CAT6_SIZE (1u << 12)
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_cistatic void
91bf215546Sopenharmony_cihandle_oob_offset_spill(struct ir3_instruction *spill)
92bf215546Sopenharmony_ci{
93bf215546Sopenharmony_ci   unsigned components = spill->srcs[2]->uim_val;
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   if (spill->cat6.dst_offset + components * component_bytes(spill->srcs[1]) < MAX_CAT6_SIZE)
96bf215546Sopenharmony_ci      return;
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   set_base_reg(spill, spill->cat6.dst_offset);
99bf215546Sopenharmony_ci   reset_base_reg(spill);
100bf215546Sopenharmony_ci   spill->cat6.dst_offset = 0;
101bf215546Sopenharmony_ci}
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_cistatic void
104bf215546Sopenharmony_cihandle_oob_offset_reload(struct ir3_instruction *reload)
105bf215546Sopenharmony_ci{
106bf215546Sopenharmony_ci   unsigned components = reload->srcs[2]->uim_val;
107bf215546Sopenharmony_ci   unsigned offset = reload->srcs[1]->uim_val;
108bf215546Sopenharmony_ci   if (offset + components * component_bytes(reload->dsts[0]) < MAX_CAT6_SIZE)
109bf215546Sopenharmony_ci      return;
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci   set_base_reg(reload, offset);
112bf215546Sopenharmony_ci   reset_base_reg(reload);
113bf215546Sopenharmony_ci   reload->srcs[1]->uim_val = 0;
114bf215546Sopenharmony_ci}
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_cistatic void
117bf215546Sopenharmony_cisplit_spill(struct ir3_instruction *spill)
118bf215546Sopenharmony_ci{
119bf215546Sopenharmony_ci   unsigned orig_components = spill->srcs[2]->uim_val;
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci   /* We don't handle splitting dependencies. */
122bf215546Sopenharmony_ci   assert(spill->deps_count == 0);
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci   if (orig_components <= 4) {
125bf215546Sopenharmony_ci      if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
126bf215546Sopenharmony_ci         spill->srcs[1]->wrmask = MASK(orig_components);
127bf215546Sopenharmony_ci         spill->srcs[1]->num = spill->srcs[1]->array.base;
128bf215546Sopenharmony_ci         spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
129bf215546Sopenharmony_ci      }
130bf215546Sopenharmony_ci      return;
131bf215546Sopenharmony_ci   }
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci   for (unsigned comp = 0; comp < orig_components; comp += 4) {
134bf215546Sopenharmony_ci      unsigned components = MIN2(orig_components - comp, 4);
135bf215546Sopenharmony_ci      struct ir3_instruction *clone = ir3_instr_clone(spill);
136bf215546Sopenharmony_ci      ir3_instr_move_before(clone, spill);
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci      clone->srcs[1]->wrmask = MASK(components);
139bf215546Sopenharmony_ci      if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
140bf215546Sopenharmony_ci         clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
141bf215546Sopenharmony_ci         clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
142bf215546Sopenharmony_ci      }
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci      clone->srcs[2]->uim_val = components;
145bf215546Sopenharmony_ci      clone->cat6.dst_offset += comp * component_bytes(spill->srcs[1]);
146bf215546Sopenharmony_ci   }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   list_delinit(&spill->node);
149bf215546Sopenharmony_ci}
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_cistatic void
152bf215546Sopenharmony_cisplit_reload(struct ir3_instruction *reload)
153bf215546Sopenharmony_ci{
154bf215546Sopenharmony_ci   unsigned orig_components = reload->srcs[2]->uim_val;
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci   assert(reload->deps_count == 0);
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci   if (orig_components <= 4) {
159bf215546Sopenharmony_ci      if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
160bf215546Sopenharmony_ci         reload->dsts[0]->wrmask = MASK(orig_components);
161bf215546Sopenharmony_ci         reload->dsts[0]->num = reload->dsts[0]->array.base;
162bf215546Sopenharmony_ci         reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
163bf215546Sopenharmony_ci      }
164bf215546Sopenharmony_ci      return;
165bf215546Sopenharmony_ci   }
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   for (unsigned comp = 0; comp < orig_components; comp += 4) {
168bf215546Sopenharmony_ci      unsigned components = MIN2(orig_components - comp, 4);
169bf215546Sopenharmony_ci      struct ir3_instruction *clone = ir3_instr_clone(reload);
170bf215546Sopenharmony_ci      ir3_instr_move_before(clone, reload);
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci      clone->dsts[0]->wrmask = MASK(components);
173bf215546Sopenharmony_ci      if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
174bf215546Sopenharmony_ci         clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
175bf215546Sopenharmony_ci         clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
176bf215546Sopenharmony_ci      }
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci      clone->srcs[2]->uim_val = components;
179bf215546Sopenharmony_ci      clone->srcs[1]->uim_val += comp * component_bytes(reload->dsts[0]);
180bf215546Sopenharmony_ci   }
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   list_delinit(&reload->node);
183bf215546Sopenharmony_ci}
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_cistatic void
186bf215546Sopenharmony_ciadd_spill_reload_deps(struct ir3_block *block)
187bf215546Sopenharmony_ci{
188bf215546Sopenharmony_ci   struct ir3_instruction *last_spill = NULL;
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   foreach_instr (instr, &block->instr_list) {
191bf215546Sopenharmony_ci      if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
192bf215546Sopenharmony_ci          last_spill) {
193bf215546Sopenharmony_ci         ir3_instr_add_dep(instr, last_spill);
194bf215546Sopenharmony_ci      }
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci      if (instr->opc == OPC_SPILL_MACRO)
197bf215546Sopenharmony_ci         last_spill = instr;
198bf215546Sopenharmony_ci   }
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   last_spill = NULL;
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   foreach_instr_rev (instr, &block->instr_list) {
204bf215546Sopenharmony_ci      if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
205bf215546Sopenharmony_ci          last_spill) {
206bf215546Sopenharmony_ci         ir3_instr_add_dep(last_spill, instr);
207bf215546Sopenharmony_ci      }
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci      if (instr->opc == OPC_SPILL_MACRO)
210bf215546Sopenharmony_ci         last_spill = instr;
211bf215546Sopenharmony_ci   }
212bf215546Sopenharmony_ci}
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_cibool
215bf215546Sopenharmony_ciir3_lower_spill(struct ir3 *ir)
216bf215546Sopenharmony_ci{
217bf215546Sopenharmony_ci   foreach_block (block, &ir->block_list) {
218bf215546Sopenharmony_ci      foreach_instr_safe (instr, &block->instr_list) {
219bf215546Sopenharmony_ci         if (instr->opc == OPC_SPILL_MACRO) {
220bf215546Sopenharmony_ci            handle_oob_offset_spill(instr);
221bf215546Sopenharmony_ci            split_spill(instr);
222bf215546Sopenharmony_ci         } else if (instr->opc == OPC_RELOAD_MACRO) {
223bf215546Sopenharmony_ci            handle_oob_offset_reload(instr);
224bf215546Sopenharmony_ci            split_reload(instr);
225bf215546Sopenharmony_ci         }
226bf215546Sopenharmony_ci      }
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci      add_spill_reload_deps(block);
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci      foreach_instr (instr, &block->instr_list) {
231bf215546Sopenharmony_ci         if (instr->opc == OPC_SPILL_MACRO)
232bf215546Sopenharmony_ci            instr->opc = OPC_STP;
233bf215546Sopenharmony_ci         else if (instr->opc == OPC_RELOAD_MACRO)
234bf215546Sopenharmony_ci            instr->opc = OPC_LDP;
235bf215546Sopenharmony_ci      }
236bf215546Sopenharmony_ci   }
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   return true;
239bf215546Sopenharmony_ci}
240