1/*
2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Jonathan Marek <jonathan@marek.ca>
25 */
26
27#include <assert.h>
28#include <stdint.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32
33#include "ir2/instr-a2xx.h"
34#include "fd2_program.h"
35#include "ir2.h"
36
37enum ir2_src_type {
38   IR2_SRC_SSA,
39   IR2_SRC_REG,
40   IR2_SRC_INPUT,
41   IR2_SRC_CONST,
42};
43
44struct ir2_src {
45   /* num can mean different things
46    *   ssa: index of instruction
47    *   reg: index in ctx->reg array
48    *   input: index in ctx->input array
49    *   const: constant index (C0, C1, etc)
50    */
51   uint16_t num;
52   uint8_t swizzle;
53   enum ir2_src_type type : 2;
54   uint8_t abs : 1;
55   uint8_t negate : 1;
56   uint8_t : 4;
57};
58
59struct ir2_reg_component {
60   uint8_t c : 3;     /* assigned x/y/z/w (7=dont write, for fetch instr) */
61   bool alloc : 1;    /* is it currently allocated */
62   uint8_t ref_count; /* for ra */
63};
64
65struct ir2_reg {
66   uint8_t idx; /* assigned hardware register */
67   uint8_t ncomp;
68
69   uint8_t loop_depth;
70   bool initialized;
71   /* block_idx to free on (-1 = free on ref_count==0) */
72   int block_idx_free;
73   struct ir2_reg_component comp[4];
74};
75
76struct ir2_instr {
77   unsigned idx;
78
79   unsigned block_idx;
80
81   enum {
82      IR2_NONE,
83      IR2_FETCH,
84      IR2_ALU,
85      IR2_CF,
86   } type : 2;
87
88   /* instruction needs to be emitted (for scheduling) */
89   bool need_emit : 1;
90
91   /* predicate value - (usually) same for entire block */
92   uint8_t pred : 2;
93
94   /* src */
95   uint8_t src_count;
96   struct ir2_src src[4];
97
98   /* dst */
99   bool is_ssa;
100   union {
101      struct ir2_reg ssa;
102      struct ir2_reg *reg;
103   };
104
105   /* type-specific */
106   union {
107      struct {
108         instr_fetch_opc_t opc : 5;
109         union {
110            struct {
111               uint8_t const_idx;
112               uint8_t const_idx_sel;
113            } vtx;
114            struct {
115               bool is_cube : 1;
116               bool is_rect : 1;
117               uint8_t samp_id;
118            } tex;
119         };
120      } fetch;
121      struct {
122         /* store possible opcs, then we can choose vector/scalar instr */
123         instr_scalar_opc_t scalar_opc : 6;
124         instr_vector_opc_t vector_opc : 5;
125         /* same as nir */
126         uint8_t write_mask : 4;
127         bool saturate : 1;
128
129         /* export idx (-1 no export) */
130         int8_t export;
131
132         /* for scalarized 2 src instruction */
133         uint8_t src1_swizzle;
134      } alu;
135      struct {
136         /* jmp dst block_idx */
137         uint8_t block_idx;
138      } cf;
139   };
140};
141
142struct ir2_sched_instr {
143   uint32_t reg_state[8];
144   struct ir2_instr *instr, *instr_s;
145};
146
147struct ir2_context {
148   struct fd2_shader_stateobj *so;
149
150   unsigned block_idx, pred_idx;
151   uint8_t pred;
152   bool block_has_jump[64];
153
154   unsigned loop_last_block[64];
155   unsigned loop_depth;
156
157   nir_shader *nir;
158
159   /* ssa index of position output */
160   struct ir2_src position;
161
162   /* to translate SSA ids to instruction ids */
163   int16_t ssa_map[1024];
164
165   struct ir2_shader_info *info;
166   struct ir2_frag_linkage *f;
167
168   int prev_export;
169
170   /* RA state */
171   struct ir2_reg *live_regs[64];
172   uint32_t reg_state[256 / 32]; /* 64*4 bits */
173
174   /* inputs */
175   struct ir2_reg input[16 + 1]; /* 16 + param */
176
177   /* non-ssa regs */
178   struct ir2_reg reg[64];
179   unsigned reg_count;
180
181   struct ir2_instr instr[0x300];
182   unsigned instr_count;
183
184   struct ir2_sched_instr instr_sched[0x180];
185   unsigned instr_sched_count;
186};
187
188void assemble(struct ir2_context *ctx, bool binning);
189
190void ir2_nir_compile(struct ir2_context *ctx, bool binning);
191bool ir2_nir_lower_scalar(nir_shader *shader);
192
193void ra_count_refs(struct ir2_context *ctx);
194void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
195            bool export, uint8_t export_writemask);
196void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
197void ra_block_free(struct ir2_context *ctx, unsigned block);
198
199void cp_src(struct ir2_context *ctx);
200void cp_export(struct ir2_context *ctx);
201
202/* utils */
203enum {
204   IR2_SWIZZLE_Y = 1 << 0,
205   IR2_SWIZZLE_Z = 2 << 0,
206   IR2_SWIZZLE_W = 3 << 0,
207
208   IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
209
210   IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
211
212   IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
213   IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
214   IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
215   IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
216   IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
217   IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
218   IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
219   IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
220};
221
222#define compile_error(ctx, args...)                                            \
223   ({                                                                          \
224      printf(args);                                                            \
225      assert(0);                                                               \
226   })
227
228static inline struct ir2_src
229ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
230{
231   return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type};
232}
233
234/* ir2_assemble uses it .. */
235struct ir2_src ir2_zero(struct ir2_context *ctx);
236
237#define ir2_foreach_instr(it, ctx)                                             \
238   for (struct ir2_instr *it = (ctx)->instr; ({                                \
239           while (it != &(ctx)->instr[(ctx)->instr_count] &&                   \
240                  it->type == IR2_NONE)                                        \
241              it++;                                                            \
242           it != &(ctx)->instr[(ctx)->instr_count];                            \
243        });                                                                    \
244        it++)
245
246#define ir2_foreach_live_reg(it, ctx)                                          \
247   for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({                     \
248           while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL)            \
249              __ptr++;                                                         \
250           __ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL;              \
251        });                                                                    \
252        it++)
253
254#define ir2_foreach_avail(it)                                                  \
255   for (struct ir2_instr **__instrp = avail, *it;                              \
256        it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
257
258#define ir2_foreach_src(it, instr)                                             \
259   for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count];  \
260        it++)
261
262/* mask for register allocation
263 * 64 registers with 4 components each = 256 bits
264 */
265/* typedef struct {
266        uint64_t data[4];
267} regmask_t; */
268
269static inline bool
270mask_isset(uint32_t *mask, unsigned num)
271{
272   return !!(mask[num / 32] & 1 << num % 32);
273}
274
275static inline void
276mask_set(uint32_t *mask, unsigned num)
277{
278   mask[num / 32] |= 1 << num % 32;
279}
280
281static inline void
282mask_unset(uint32_t *mask, unsigned num)
283{
284   mask[num / 32] &= ~(1 << num % 32);
285}
286
287static inline unsigned
288mask_reg(uint32_t *mask, unsigned num)
289{
290   return mask[num / 8] >> num % 8 * 4 & 0xf;
291}
292
293static inline bool
294is_export(struct ir2_instr *instr)
295{
296   return instr->type == IR2_ALU && instr->alu.export >= 0;
297}
298
299static inline instr_alloc_type_t
300export_buf(unsigned num)
301{
302   return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY;
303}
304
305/* component c for channel i */
306static inline unsigned
307swiz_set(unsigned c, unsigned i)
308{
309   return ((c - i) & 3) << i * 2;
310}
311
312/* get swizzle in channel i */
313static inline unsigned
314swiz_get(unsigned swiz, unsigned i)
315{
316   return ((swiz >> i * 2) + i) & 3;
317}
318
319static inline unsigned
320swiz_merge(unsigned swiz0, unsigned swiz1)
321{
322   unsigned swiz = 0;
323   for (int i = 0; i < 4; i++)
324      swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
325   return swiz;
326}
327
328static inline void
329swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
330{
331   unsigned swiz = 0;
332   for (int i = 0; i < 4; i++)
333      swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
334   *swiz0 = swiz;
335}
336
337static inline struct ir2_reg *
338get_reg(struct ir2_instr *instr)
339{
340   return instr->is_ssa ? &instr->ssa : instr->reg;
341}
342
343static inline struct ir2_reg *
344get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
345{
346   switch (src->type) {
347   case IR2_SRC_INPUT:
348      return &ctx->input[src->num];
349   case IR2_SRC_SSA:
350      return &ctx->instr[src->num].ssa;
351   case IR2_SRC_REG:
352      return &ctx->reg[src->num];
353   default:
354      return NULL;
355   }
356}
357
358/* gets a ncomp value for the dst */
359static inline unsigned
360dst_ncomp(struct ir2_instr *instr)
361{
362   if (instr->is_ssa)
363      return instr->ssa.ncomp;
364
365   if (instr->type == IR2_FETCH)
366      return instr->reg->ncomp;
367
368   assert(instr->type == IR2_ALU);
369
370   unsigned ncomp = 0;
371   for (int i = 0; i < instr->reg->ncomp; i++)
372      ncomp += !!(instr->alu.write_mask & 1 << i);
373   return ncomp;
374}
375
376/* gets a ncomp value for the src registers */
377static inline unsigned
378src_ncomp(struct ir2_instr *instr)
379{
380   if (instr->type == IR2_FETCH) {
381      switch (instr->fetch.opc) {
382      case VTX_FETCH:
383         return 1;
384      case TEX_FETCH:
385         return instr->fetch.tex.is_cube ? 3 : 2;
386      case TEX_SET_TEX_LOD:
387         return 1;
388      default:
389         assert(0);
390      }
391   }
392
393   switch (instr->alu.scalar_opc) {
394   case PRED_SETEs ... KILLONEs:
395      return 1;
396   default:
397      break;
398   }
399
400   switch (instr->alu.vector_opc) {
401   case DOT2ADDv:
402      return 2;
403   case DOT3v:
404      return 3;
405   case DOT4v:
406   case CUBEv:
407   case PRED_SETE_PUSHv:
408      return 4;
409   default:
410      return dst_ncomp(instr);
411   }
412}
413