1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Jonathan Marek <jonathan@marek.ca>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "ir2_private.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci/* if an instruction has side effects, we should never kill it */
30bf215546Sopenharmony_cistatic bool
31bf215546Sopenharmony_cihas_side_effects(struct ir2_instr *instr)
32bf215546Sopenharmony_ci{
33bf215546Sopenharmony_ci   if (instr->type == IR2_CF)
34bf215546Sopenharmony_ci      return true;
35bf215546Sopenharmony_ci   else if (instr->type == IR2_FETCH)
36bf215546Sopenharmony_ci      return false;
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci   switch (instr->alu.scalar_opc) {
39bf215546Sopenharmony_ci   case PRED_SETEs ... KILLONEs:
40bf215546Sopenharmony_ci      return true;
41bf215546Sopenharmony_ci   default:
42bf215546Sopenharmony_ci      break;
43bf215546Sopenharmony_ci   }
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci   switch (instr->alu.vector_opc) {
46bf215546Sopenharmony_ci   case PRED_SETE_PUSHv ... KILLNEv:
47bf215546Sopenharmony_ci      return true;
48bf215546Sopenharmony_ci   default:
49bf215546Sopenharmony_ci      break;
50bf215546Sopenharmony_ci   }
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci   return instr->alu.export >= 0;
53bf215546Sopenharmony_ci}
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci/* mark an instruction as required, and all its sources recursively */
56bf215546Sopenharmony_cistatic void
57bf215546Sopenharmony_ciset_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
58bf215546Sopenharmony_ci{
59bf215546Sopenharmony_ci   struct ir2_reg *reg;
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   /* don't repeat work already done */
62bf215546Sopenharmony_ci   if (instr->need_emit)
63bf215546Sopenharmony_ci      return;
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci   instr->need_emit = true;
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   ir2_foreach_src (src, instr) {
68bf215546Sopenharmony_ci      switch (src->type) {
69bf215546Sopenharmony_ci      case IR2_SRC_SSA:
70bf215546Sopenharmony_ci         set_need_emit(ctx, &ctx->instr[src->num]);
71bf215546Sopenharmony_ci         break;
72bf215546Sopenharmony_ci      case IR2_SRC_REG:
73bf215546Sopenharmony_ci         /* slow ..  */
74bf215546Sopenharmony_ci         reg = get_reg_src(ctx, src);
75bf215546Sopenharmony_ci         ir2_foreach_instr (instr, ctx) {
76bf215546Sopenharmony_ci            if (!instr->is_ssa && instr->reg == reg)
77bf215546Sopenharmony_ci               set_need_emit(ctx, instr);
78bf215546Sopenharmony_ci         }
79bf215546Sopenharmony_ci         break;
80bf215546Sopenharmony_ci      default:
81bf215546Sopenharmony_ci         break;
82bf215546Sopenharmony_ci      }
83bf215546Sopenharmony_ci   }
84bf215546Sopenharmony_ci}
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci/* get current bit mask of allocated components for a register */
87bf215546Sopenharmony_cistatic unsigned
88bf215546Sopenharmony_cireg_mask(struct ir2_context *ctx, unsigned idx)
89bf215546Sopenharmony_ci{
90bf215546Sopenharmony_ci   return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf;
91bf215546Sopenharmony_ci}
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_cistatic void
94bf215546Sopenharmony_cireg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   idx = idx * 4 + c;
97bf215546Sopenharmony_ci   ctx->reg_state[idx / 32] |= 1 << idx % 32;
98bf215546Sopenharmony_ci}
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_cistatic void
101bf215546Sopenharmony_cireg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
102bf215546Sopenharmony_ci{
103bf215546Sopenharmony_ci   idx = idx * 4 + c;
104bf215546Sopenharmony_ci   ctx->reg_state[idx / 32] &= ~(1 << idx % 32);
105bf215546Sopenharmony_ci}
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_civoid
108bf215546Sopenharmony_cira_count_refs(struct ir2_context *ctx)
109bf215546Sopenharmony_ci{
110bf215546Sopenharmony_ci   struct ir2_reg *reg;
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   /* mark instructions as needed
113bf215546Sopenharmony_ci    * need to do this because "substitutions" pass makes many movs not needed
114bf215546Sopenharmony_ci    */
115bf215546Sopenharmony_ci   ir2_foreach_instr (instr, ctx) {
116bf215546Sopenharmony_ci      if (has_side_effects(instr))
117bf215546Sopenharmony_ci         set_need_emit(ctx, instr);
118bf215546Sopenharmony_ci   }
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   /* compute ref_counts */
121bf215546Sopenharmony_ci   ir2_foreach_instr (instr, ctx) {
122bf215546Sopenharmony_ci      /* kill non-needed so they can be skipped */
123bf215546Sopenharmony_ci      if (!instr->need_emit) {
124bf215546Sopenharmony_ci         instr->type = IR2_NONE;
125bf215546Sopenharmony_ci         continue;
126bf215546Sopenharmony_ci      }
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci      ir2_foreach_src (src, instr) {
129bf215546Sopenharmony_ci         if (src->type == IR2_SRC_CONST)
130bf215546Sopenharmony_ci            continue;
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci         reg = get_reg_src(ctx, src);
133bf215546Sopenharmony_ci         for (int i = 0; i < src_ncomp(instr); i++)
134bf215546Sopenharmony_ci            reg->comp[swiz_get(src->swizzle, i)].ref_count++;
135bf215546Sopenharmony_ci      }
136bf215546Sopenharmony_ci   }
137bf215546Sopenharmony_ci}
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_civoid
140bf215546Sopenharmony_cira_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export,
141bf215546Sopenharmony_ci       uint8_t export_writemask)
142bf215546Sopenharmony_ci{
143bf215546Sopenharmony_ci   /* for export, don't allocate anything but set component layout */
144bf215546Sopenharmony_ci   if (export) {
145bf215546Sopenharmony_ci      for (int i = 0; i < 4; i++)
146bf215546Sopenharmony_ci         reg->comp[i].c = i;
147bf215546Sopenharmony_ci      return;
148bf215546Sopenharmony_ci   }
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci   unsigned idx = force_idx;
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   /* TODO: allocate into the same register if theres room
153bf215546Sopenharmony_ci    * note: the blob doesn't do it, so verify that it is indeed better
154bf215546Sopenharmony_ci    * also, doing it would conflict with scalar mov insertion
155bf215546Sopenharmony_ci    */
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   /* check if already allocated */
158bf215546Sopenharmony_ci   for (int i = 0; i < reg->ncomp; i++) {
159bf215546Sopenharmony_ci      if (reg->comp[i].alloc)
160bf215546Sopenharmony_ci         return;
161bf215546Sopenharmony_ci   }
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   if (force_idx < 0) {
164bf215546Sopenharmony_ci      for (idx = 0; idx < 64; idx++) {
165bf215546Sopenharmony_ci         if (reg_mask(ctx, idx) == 0)
166bf215546Sopenharmony_ci            break;
167bf215546Sopenharmony_ci      }
168bf215546Sopenharmony_ci   }
169bf215546Sopenharmony_ci   assert(idx != 64); /* TODO ran out of register space.. */
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   /* update max_reg value */
172bf215546Sopenharmony_ci   ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx);
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   unsigned mask = reg_mask(ctx, idx);
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci   for (int i = 0; i < reg->ncomp; i++) {
177bf215546Sopenharmony_ci      /* don't allocate never used values */
178bf215546Sopenharmony_ci      if (reg->comp[i].ref_count == 0) {
179bf215546Sopenharmony_ci         reg->comp[i].c = 7;
180bf215546Sopenharmony_ci         continue;
181bf215546Sopenharmony_ci      }
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci      /* TODO */
184bf215546Sopenharmony_ci      unsigned c = 1 ? i : (ffs(~mask) - 1);
185bf215546Sopenharmony_ci      mask |= 1 << c;
186bf215546Sopenharmony_ci      reg->comp[i].c = c;
187bf215546Sopenharmony_ci      reg_setmask(ctx, idx, c);
188bf215546Sopenharmony_ci      reg->comp[i].alloc = true;
189bf215546Sopenharmony_ci   }
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   reg->idx = idx;
192bf215546Sopenharmony_ci   ctx->live_regs[reg->idx] = reg;
193bf215546Sopenharmony_ci}
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci/* reduce srcs ref_count and free if needed */
196bf215546Sopenharmony_civoid
197bf215546Sopenharmony_cira_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
198bf215546Sopenharmony_ci{
199bf215546Sopenharmony_ci   struct ir2_reg *reg;
200bf215546Sopenharmony_ci   struct ir2_reg_component *comp;
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   ir2_foreach_src (src, instr) {
203bf215546Sopenharmony_ci      if (src->type == IR2_SRC_CONST)
204bf215546Sopenharmony_ci         continue;
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci      reg = get_reg_src(ctx, src);
207bf215546Sopenharmony_ci      /* XXX use before write case */
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci      for (int i = 0; i < src_ncomp(instr); i++) {
210bf215546Sopenharmony_ci         comp = &reg->comp[swiz_get(src->swizzle, i)];
211bf215546Sopenharmony_ci         if (!--comp->ref_count && reg->block_idx_free < 0) {
212bf215546Sopenharmony_ci            reg_freemask(ctx, reg->idx, comp->c);
213bf215546Sopenharmony_ci            comp->alloc = false;
214bf215546Sopenharmony_ci         }
215bf215546Sopenharmony_ci      }
216bf215546Sopenharmony_ci   }
217bf215546Sopenharmony_ci}
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci/* free any regs left for a block */
220bf215546Sopenharmony_civoid
221bf215546Sopenharmony_cira_block_free(struct ir2_context *ctx, unsigned block)
222bf215546Sopenharmony_ci{
223bf215546Sopenharmony_ci   ir2_foreach_live_reg (reg, ctx) {
224bf215546Sopenharmony_ci      if (reg->block_idx_free != block)
225bf215546Sopenharmony_ci         continue;
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci      for (int i = 0; i < reg->ncomp; i++) {
228bf215546Sopenharmony_ci         if (!reg->comp[i].alloc) /* XXX should never be true? */
229bf215546Sopenharmony_ci            continue;
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci         reg_freemask(ctx, reg->idx, reg->comp[i].c);
232bf215546Sopenharmony_ci         reg->comp[i].alloc = false;
233bf215546Sopenharmony_ci      }
234bf215546Sopenharmony_ci      ctx->live_regs[reg->idx] = NULL;
235bf215546Sopenharmony_ci   }
236bf215546Sopenharmony_ci}
237