1/*
2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Jonathan Marek <jonathan@marek.ca>
25 */
26
27#include "ir2_private.h"
28
29/* if an instruction has side effects, we should never kill it */
30static bool
31has_side_effects(struct ir2_instr *instr)
32{
33   if (instr->type == IR2_CF)
34      return true;
35   else if (instr->type == IR2_FETCH)
36      return false;
37
38   switch (instr->alu.scalar_opc) {
39   case PRED_SETEs ... KILLONEs:
40      return true;
41   default:
42      break;
43   }
44
45   switch (instr->alu.vector_opc) {
46   case PRED_SETE_PUSHv ... KILLNEv:
47      return true;
48   default:
49      break;
50   }
51
52   return instr->alu.export >= 0;
53}
54
55/* mark an instruction as required, and all its sources recursively */
56static void
57set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
58{
59   struct ir2_reg *reg;
60
61   /* don't repeat work already done */
62   if (instr->need_emit)
63      return;
64
65   instr->need_emit = true;
66
67   ir2_foreach_src (src, instr) {
68      switch (src->type) {
69      case IR2_SRC_SSA:
70         set_need_emit(ctx, &ctx->instr[src->num]);
71         break;
72      case IR2_SRC_REG:
73         /* slow ..  */
74         reg = get_reg_src(ctx, src);
75         ir2_foreach_instr (instr, ctx) {
76            if (!instr->is_ssa && instr->reg == reg)
77               set_need_emit(ctx, instr);
78         }
79         break;
80      default:
81         break;
82      }
83   }
84}
85
86/* get current bit mask of allocated components for a register */
87static unsigned
88reg_mask(struct ir2_context *ctx, unsigned idx)
89{
90   return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf;
91}
92
93static void
94reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
95{
96   idx = idx * 4 + c;
97   ctx->reg_state[idx / 32] |= 1 << idx % 32;
98}
99
100static void
101reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
102{
103   idx = idx * 4 + c;
104   ctx->reg_state[idx / 32] &= ~(1 << idx % 32);
105}
106
107void
108ra_count_refs(struct ir2_context *ctx)
109{
110   struct ir2_reg *reg;
111
112   /* mark instructions as needed
113    * need to do this because "substitutions" pass makes many movs not needed
114    */
115   ir2_foreach_instr (instr, ctx) {
116      if (has_side_effects(instr))
117         set_need_emit(ctx, instr);
118   }
119
120   /* compute ref_counts */
121   ir2_foreach_instr (instr, ctx) {
122      /* kill non-needed so they can be skipped */
123      if (!instr->need_emit) {
124         instr->type = IR2_NONE;
125         continue;
126      }
127
128      ir2_foreach_src (src, instr) {
129         if (src->type == IR2_SRC_CONST)
130            continue;
131
132         reg = get_reg_src(ctx, src);
133         for (int i = 0; i < src_ncomp(instr); i++)
134            reg->comp[swiz_get(src->swizzle, i)].ref_count++;
135      }
136   }
137}
138
139void
140ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export,
141       uint8_t export_writemask)
142{
143   /* for export, don't allocate anything but set component layout */
144   if (export) {
145      for (int i = 0; i < 4; i++)
146         reg->comp[i].c = i;
147      return;
148   }
149
150   unsigned idx = force_idx;
151
152   /* TODO: allocate into the same register if theres room
153    * note: the blob doesn't do it, so verify that it is indeed better
154    * also, doing it would conflict with scalar mov insertion
155    */
156
157   /* check if already allocated */
158   for (int i = 0; i < reg->ncomp; i++) {
159      if (reg->comp[i].alloc)
160         return;
161   }
162
163   if (force_idx < 0) {
164      for (idx = 0; idx < 64; idx++) {
165         if (reg_mask(ctx, idx) == 0)
166            break;
167      }
168   }
169   assert(idx != 64); /* TODO ran out of register space.. */
170
171   /* update max_reg value */
172   ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx);
173
174   unsigned mask = reg_mask(ctx, idx);
175
176   for (int i = 0; i < reg->ncomp; i++) {
177      /* don't allocate never used values */
178      if (reg->comp[i].ref_count == 0) {
179         reg->comp[i].c = 7;
180         continue;
181      }
182
183      /* TODO */
184      unsigned c = 1 ? i : (ffs(~mask) - 1);
185      mask |= 1 << c;
186      reg->comp[i].c = c;
187      reg_setmask(ctx, idx, c);
188      reg->comp[i].alloc = true;
189   }
190
191   reg->idx = idx;
192   ctx->live_regs[reg->idx] = reg;
193}
194
195/* reduce srcs ref_count and free if needed */
196void
197ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
198{
199   struct ir2_reg *reg;
200   struct ir2_reg_component *comp;
201
202   ir2_foreach_src (src, instr) {
203      if (src->type == IR2_SRC_CONST)
204         continue;
205
206      reg = get_reg_src(ctx, src);
207      /* XXX use before write case */
208
209      for (int i = 0; i < src_ncomp(instr); i++) {
210         comp = &reg->comp[swiz_get(src->swizzle, i)];
211         if (!--comp->ref_count && reg->block_idx_free < 0) {
212            reg_freemask(ctx, reg->idx, comp->c);
213            comp->alloc = false;
214         }
215      }
216   }
217}
218
219/* free any regs left for a block */
220void
221ra_block_free(struct ir2_context *ctx, unsigned block)
222{
223   ir2_foreach_live_reg (reg, ctx) {
224      if (reg->block_idx_free != block)
225         continue;
226
227      for (int i = 0; i < reg->ncomp; i++) {
228         if (!reg->comp[i].alloc) /* XXX should never be true? */
229            continue;
230
231         reg_freemask(ctx, reg->idx, reg->comp[i].c);
232         reg->comp[i].alloc = false;
233      }
234      ctx->live_regs[reg->idx] = NULL;
235   }
236}
237