1/* -*- c++ -*- */
2/*
3 * Copyright © 2011-2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#ifndef BRW_IR_VEC4_H
26#define BRW_IR_VEC4_H
27
28#include "brw_shader.h"
29
30namespace brw {
31
32class dst_reg;
33
34class src_reg : public backend_reg
35{
36public:
37   DECLARE_RALLOC_CXX_OPERATORS(src_reg)
38
39   void init();
40
41   src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
42   src_reg();
43   src_reg(struct ::brw_reg reg);
44
45   bool equals(const src_reg &r) const;
46   bool negative_equals(const src_reg &r) const;
47
48   src_reg(class vec4_visitor *v, const struct glsl_type *type);
49   src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
50
51   explicit src_reg(const dst_reg &reg);
52
53   src_reg *reladdr;
54};
55
56static inline src_reg
57retype(src_reg reg, enum brw_reg_type type)
58{
59   reg.type = type;
60   return reg;
61}
62
63namespace detail {
64
65static inline void
66add_byte_offset(backend_reg *reg, unsigned bytes)
67{
68   switch (reg->file) {
69      case BAD_FILE:
70         break;
71      case VGRF:
72      case ATTR:
73      case UNIFORM:
74         reg->offset += bytes;
75         assert(reg->offset % 16 == 0);
76         break;
77      case MRF: {
78         const unsigned suboffset = reg->offset + bytes;
79         reg->nr += suboffset / REG_SIZE;
80         reg->offset = suboffset % REG_SIZE;
81         assert(reg->offset % 16 == 0);
82         break;
83      }
84      case ARF:
85      case FIXED_GRF: {
86         const unsigned suboffset = reg->subnr + bytes;
87         reg->nr += suboffset / REG_SIZE;
88         reg->subnr = suboffset % REG_SIZE;
89         assert(reg->subnr % 16 == 0);
90         break;
91      }
92      default:
93         assert(bytes == 0);
94   }
95}
96
97} /* namespace detail */
98
99static inline src_reg
100byte_offset(src_reg reg, unsigned bytes)
101{
102   detail::add_byte_offset(&reg, bytes);
103   return reg;
104}
105
106static inline src_reg
107offset(src_reg reg, unsigned width, unsigned delta)
108{
109   const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
110   const unsigned num_components = MAX2(width / 4 * stride, 4);
111   return byte_offset(reg, num_components * type_sz(reg.type) * delta);
112}
113
114static inline src_reg
115horiz_offset(src_reg reg, unsigned delta)
116{
117   return byte_offset(reg, delta * type_sz(reg.type));
118}
119
120/**
121 * Reswizzle a given source register.
122 * \sa brw_swizzle().
123 */
124static inline src_reg
125swizzle(src_reg reg, unsigned swizzle)
126{
127   if (reg.file == IMM)
128      reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
129   else
130      reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
131
132   return reg;
133}
134
135static inline src_reg
136negate(src_reg reg)
137{
138   assert(reg.file != IMM);
139   reg.negate = !reg.negate;
140   return reg;
141}
142
143static inline bool
144is_uniform(const src_reg &reg)
145{
146   return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
147          (!reg.reladdr || is_uniform(*reg.reladdr));
148}
149
150class dst_reg : public backend_reg
151{
152public:
153   DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
154
155   void init();
156
157   dst_reg();
158   dst_reg(enum brw_reg_file file, int nr);
159   dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
160           unsigned writemask);
161   dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
162           unsigned writemask);
163   dst_reg(struct ::brw_reg reg);
164   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
165
166   explicit dst_reg(const src_reg &reg);
167
168   bool equals(const dst_reg &r) const;
169
170   src_reg *reladdr;
171};
172
173static inline dst_reg
174retype(dst_reg reg, enum brw_reg_type type)
175{
176   reg.type = type;
177   return reg;
178}
179
180static inline dst_reg
181byte_offset(dst_reg reg, unsigned bytes)
182{
183   detail::add_byte_offset(&reg, bytes);
184   return reg;
185}
186
187static inline dst_reg
188offset(dst_reg reg, unsigned width, unsigned delta)
189{
190   const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
191   const unsigned num_components = MAX2(width / 4 * stride, 4);
192   return byte_offset(reg, num_components * type_sz(reg.type) * delta);
193}
194
195static inline dst_reg
196horiz_offset(const dst_reg &reg, unsigned delta)
197{
198   if (is_uniform(src_reg(reg)))
199      return reg;
200   else
201      return byte_offset(reg, delta * type_sz(reg.type));
202}
203
204static inline dst_reg
205writemask(dst_reg reg, unsigned mask)
206{
207   assert(reg.file != IMM);
208   assert((reg.writemask & mask) != 0);
209   reg.writemask &= mask;
210   return reg;
211}
212
213/**
214 * Return an integer identifying the discrete address space a register is
215 * contained in.  A register is by definition fully contained in the single
216 * reg_space it belongs to, so two registers with different reg_space ids are
217 * guaranteed not to overlap.  Most register files are a single reg_space of
218 * its own, only the VGRF file is composed of multiple discrete address
219 * spaces, one for each VGRF allocation.
220 */
221static inline uint32_t
222reg_space(const backend_reg &r)
223{
224   return r.file << 16 | (r.file == VGRF ? r.nr : 0);
225}
226
227/**
228 * Return the base offset in bytes of a register relative to the start of its
229 * reg_space().
230 */
231static inline unsigned
232reg_offset(const backend_reg &r)
233{
234   return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
235          (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
236          (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
237}
238
239/**
240 * Return whether the register region starting at \p r and spanning \p dr
241 * bytes could potentially overlap the register region starting at \p s and
242 * spanning \p ds bytes.
243 */
244static inline bool
245regions_overlap(const backend_reg &r, unsigned dr,
246                const backend_reg &s, unsigned ds)
247{
248   if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
249      /* COMPR4 regions are translated by the hardware during decompression
250       * into two separate half-regions 4 MRFs apart from each other.
251       */
252      backend_reg t0 = r;
253      t0.nr &= ~BRW_MRF_COMPR4;
254      backend_reg t1 = t0;
255      t1.offset += 4 * REG_SIZE;
256      return regions_overlap(t0, dr / 2, s, ds) ||
257             regions_overlap(t1, dr / 2, s, ds);
258
259   } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
260      return regions_overlap(s, ds, r, dr);
261
262   } else {
263      return reg_space(r) == reg_space(s) &&
264             !(reg_offset(r) + dr <= reg_offset(s) ||
265               reg_offset(s) + ds <= reg_offset(r));
266   }
267}
268
269class vec4_instruction : public backend_instruction {
270public:
271   DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
272
273   vec4_instruction(enum opcode opcode,
274                    const dst_reg &dst = dst_reg(),
275                    const src_reg &src0 = src_reg(),
276                    const src_reg &src1 = src_reg(),
277                    const src_reg &src2 = src_reg());
278
279   dst_reg dst;
280   src_reg src[3];
281
282   enum brw_urb_write_flags urb_write_flags;
283
284   unsigned sol_binding; /**< gfx6: SOL binding table index */
285   bool sol_final_write; /**< gfx6: send commit message */
286   unsigned sol_vertex; /**< gfx6: used for setting dst index in SVB header */
287
288   bool is_send_from_grf() const;
289   unsigned size_read(unsigned arg) const;
290   bool can_reswizzle(const struct intel_device_info *devinfo,
291                      int dst_writemask,
292                      int swizzle, int swizzle_mask);
293   void reswizzle(int dst_writemask, int swizzle);
294   bool can_do_source_mods(const struct intel_device_info *devinfo);
295   bool can_do_cmod();
296   bool can_do_writemask(const struct intel_device_info *devinfo);
297   bool can_change_types() const;
298   bool has_source_and_destination_hazard() const;
299   unsigned implied_mrf_writes() const;
300
301   bool is_align1_partial_write()
302   {
303      return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
304             opcode == VEC4_OPCODE_SET_HIGH_32BIT;
305   }
306
307   bool reads_flag() const
308   {
309      return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
310   }
311
312   bool reads_flag(unsigned c)
313   {
314      if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
315         return true;
316
317      switch (predicate) {
318      case BRW_PREDICATE_NONE:
319         return false;
320      case BRW_PREDICATE_ALIGN16_REPLICATE_X:
321         return c == 0;
322      case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
323         return c == 1;
324      case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
325         return c == 2;
326      case BRW_PREDICATE_ALIGN16_REPLICATE_W:
327         return c == 3;
328      default:
329         return true;
330      }
331   }
332
333   bool writes_flag(const intel_device_info *devinfo) const
334   {
335      return (conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) &&
336                                  opcode != BRW_OPCODE_CSEL &&
337                                  opcode != BRW_OPCODE_IF &&
338                                  opcode != BRW_OPCODE_WHILE));
339   }
340
341   bool reads_g0_implicitly() const
342   {
343      switch (opcode) {
344      case SHADER_OPCODE_TEX:
345      case SHADER_OPCODE_TXL:
346      case SHADER_OPCODE_TXD:
347      case SHADER_OPCODE_TXF:
348      case SHADER_OPCODE_TXF_CMS_W:
349      case SHADER_OPCODE_TXF_CMS:
350      case SHADER_OPCODE_TXF_MCS:
351      case SHADER_OPCODE_TXS:
352      case SHADER_OPCODE_TG4:
353      case SHADER_OPCODE_TG4_OFFSET:
354      case SHADER_OPCODE_SAMPLEINFO:
355      case VS_OPCODE_PULL_CONSTANT_LOAD:
356      case GS_OPCODE_SET_PRIMITIVE_ID:
357      case GS_OPCODE_GET_INSTANCE_ID:
358      case SHADER_OPCODE_GFX4_SCRATCH_READ:
359      case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
360         return true;
361      default:
362         return false;
363      }
364   }
365};
366
367/**
368 * Make the execution of \p inst dependent on the evaluation of a possibly
369 * inverted predicate.
370 */
371inline vec4_instruction *
372set_predicate_inv(enum brw_predicate pred, bool inverse,
373                  vec4_instruction *inst)
374{
375   inst->predicate = pred;
376   inst->predicate_inverse = inverse;
377   return inst;
378}
379
380/**
381 * Make the execution of \p inst dependent on the evaluation of a predicate.
382 */
383inline vec4_instruction *
384set_predicate(enum brw_predicate pred, vec4_instruction *inst)
385{
386   return set_predicate_inv(pred, false, inst);
387}
388
389/**
390 * Write the result of evaluating the condition given by \p mod to a flag
391 * register.
392 */
393inline vec4_instruction *
394set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
395{
396   inst->conditional_mod = mod;
397   return inst;
398}
399
400/**
401 * Clamp the result of \p inst to the saturation range of its destination
402 * datatype.
403 */
404inline vec4_instruction *
405set_saturate(bool saturate, vec4_instruction *inst)
406{
407   inst->saturate = saturate;
408   return inst;
409}
410
411/**
412 * Return the number of dataflow registers written by the instruction (either
413 * fully or partially) counted from 'floor(reg_offset(inst->dst) /
414 * register_size)'.  The somewhat arbitrary register size unit is 16B for the
415 * UNIFORM and IMM files and 32B for all other files.
416 */
417inline unsigned
418regs_written(const vec4_instruction *inst)
419{
420   assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
421   return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
422                       REG_SIZE);
423}
424
425/**
426 * Return the number of dataflow registers read by the instruction (either
427 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
428 * register_size)'.  The somewhat arbitrary register size unit is 16B for the
429 * UNIFORM and IMM files and 32B for all other files.
430 */
431inline unsigned
432regs_read(const vec4_instruction *inst, unsigned i)
433{
434   const unsigned reg_size =
435      inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
436   return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
437                       reg_size);
438}
439
440static inline enum brw_reg_type
441get_exec_type(const vec4_instruction *inst)
442{
443   enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
444
445   for (int i = 0; i < 3; i++) {
446      if (inst->src[i].file != BAD_FILE) {
447         const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
448         if (type_sz(t) > type_sz(exec_type))
449            exec_type = t;
450         else if (type_sz(t) == type_sz(exec_type) &&
451                  brw_reg_type_is_floating_point(t))
452            exec_type = t;
453      }
454   }
455
456   if (exec_type == BRW_REGISTER_TYPE_B)
457      exec_type = inst->dst.type;
458
459   /* TODO: We need to handle half-float conversions. */
460   assert(exec_type != BRW_REGISTER_TYPE_HF ||
461          inst->dst.type == BRW_REGISTER_TYPE_HF);
462   assert(exec_type != BRW_REGISTER_TYPE_B);
463
464   return exec_type;
465}
466
467static inline unsigned
468get_exec_type_size(const vec4_instruction *inst)
469{
470   return type_sz(get_exec_type(inst));
471}
472
473} /* namespace brw */
474
475#endif
476