1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32/** @file brw_reg.h
33 *
34 * This file defines struct brw_reg, which is our representation for EU
35 * registers.  They're not a hardware specific format, just an abstraction
36 * that intends to capture the full flexibility of the hardware registers.
37 *
38 * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
39 * the abstract brw_reg type into the actual hardware instruction encoding.
40 */
41
42#ifndef BRW_REG_H
43#define BRW_REG_H
44
45#include <stdbool.h>
46#include "util/compiler.h"
47#include "main/macros.h"
48#include "program/prog_instruction.h"
49#include "brw_eu_defines.h"
50#include "brw_reg_type.h"
51
52#ifdef __cplusplus
53extern "C" {
54#endif
55
56struct intel_device_info;
57
58/** Number of general purpose registers (VS, WM, etc) */
59#define BRW_MAX_GRF 128
60
61/**
62 * First GRF used for the MRF hack.
63 *
64 * On gfx7, MRFs are no longer used, and contiguous GRFs are used instead.  We
65 * haven't converted our compiler to be aware of this, so it asks for MRFs and
66 * brw_eu_emit.c quietly converts them to be accesses of the top GRFs.  The
67 * register allocators have to be careful of this to avoid corrupting the "MRF"s
68 * with actual GRF allocations.
69 */
70#define GFX7_MRF_HACK_START 112
71
72/** Number of message register file registers */
73#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16)
74
75#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
76#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
77
78#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
79#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
80#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
81#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
82#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
83#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
84#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
85#define BRW_SWIZZLE_YXYX      BRW_SWIZZLE4(1,0,1,0)
86#define BRW_SWIZZLE_XZXZ      BRW_SWIZZLE4(0,2,0,2)
87#define BRW_SWIZZLE_YZXW      BRW_SWIZZLE4(1,2,0,3)
88#define BRW_SWIZZLE_YWYW      BRW_SWIZZLE4(1,3,1,3)
89#define BRW_SWIZZLE_ZXYW      BRW_SWIZZLE4(2,0,1,3)
90#define BRW_SWIZZLE_ZWZW      BRW_SWIZZLE4(2,3,2,3)
91#define BRW_SWIZZLE_WZWZ      BRW_SWIZZLE4(3,2,3,2)
92#define BRW_SWIZZLE_WZYX      BRW_SWIZZLE4(3,2,1,0)
93#define BRW_SWIZZLE_XXZZ      BRW_SWIZZLE4(0,0,2,2)
94#define BRW_SWIZZLE_YYWW      BRW_SWIZZLE4(1,1,3,3)
95#define BRW_SWIZZLE_YXWZ      BRW_SWIZZLE4(1,0,3,2)
96
97#define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2))
98#define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2))
99
100static inline bool
101brw_is_single_value_swizzle(unsigned swiz)
102{
103   return (swiz == BRW_SWIZZLE_XXXX ||
104           swiz == BRW_SWIZZLE_YYYY ||
105           swiz == BRW_SWIZZLE_ZZZZ ||
106           swiz == BRW_SWIZZLE_WWWW);
107}
108
109/**
110 * Compute the swizzle obtained from the application of \p swz0 on the result
111 * of \p swz1.  The argument ordering is expected to match function
112 * composition.
113 */
114static inline unsigned
115brw_compose_swizzle(unsigned swz0, unsigned swz1)
116{
117   return BRW_SWIZZLE4(
118      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)),
119      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)),
120      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)),
121      BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3)));
122}
123
124/**
125 * Return the result of applying swizzle \p swz to shuffle the bits of \p mask
126 * (AKA image).
127 */
128static inline unsigned
129brw_apply_swizzle_to_mask(unsigned swz, unsigned mask)
130{
131   unsigned result = 0;
132
133   for (unsigned i = 0; i < 4; i++) {
134      if (mask & (1 << BRW_GET_SWZ(swz, i)))
135         result |= 1 << i;
136   }
137
138   return result;
139}
140
141/**
142 * Return the result of applying the inverse of swizzle \p swz to shuffle the
143 * bits of \p mask (AKA preimage).  Useful to find out which components are
144 * read from a swizzled source given the instruction writemask.
145 */
146static inline unsigned
147brw_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask)
148{
149   unsigned result = 0;
150
151   for (unsigned i = 0; i < 4; i++) {
152      if (mask & (1 << i))
153         result |= 1 << BRW_GET_SWZ(swz, i);
154   }
155
156   return result;
157}
158
159/**
160 * Construct an identity swizzle for the set of enabled channels given by \p
161 * mask.  The result will only reference channels enabled in the provided \p
162 * mask, assuming that \p mask is non-zero.  The constructed swizzle will
163 * satisfy the property that for any instruction OP and any mask:
164 *
165 *    brw_OP(p, brw_writemask(dst, mask),
166 *           brw_swizzle(src, brw_swizzle_for_mask(mask)));
167 *
168 * will be equivalent to the same instruction without swizzle:
169 *
170 *    brw_OP(p, brw_writemask(dst, mask), src);
171 */
172static inline unsigned
173brw_swizzle_for_mask(unsigned mask)
174{
175   unsigned last = (mask ? ffs(mask) - 1 : 0);
176   unsigned swz[4];
177
178   for (unsigned i = 0; i < 4; i++)
179      last = swz[i] = (mask & (1 << i) ? i : last);
180
181   return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
182}
183
184/**
185 * Construct an identity swizzle for the first \p n components of a vector.
186 * When only a subset of channels of a vec4 are used we don't want to
187 * reference the other channels, as that will tell optimization passes that
188 * those other channels are used.
189 */
190static inline unsigned
191brw_swizzle_for_size(unsigned n)
192{
193   return brw_swizzle_for_mask((1 << n) - 1);
194}
195
196/**
197 * Converse of brw_swizzle_for_mask().  Returns the mask of components
198 * accessed by the specified swizzle \p swz.
199 */
200static inline unsigned
201brw_mask_for_swizzle(unsigned swz)
202{
203   return brw_apply_inv_swizzle_to_mask(swz, ~0);
204}
205
206uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz);
207
208#define REG_SIZE (8*4)
209
210/* These aren't hardware structs, just something useful for us to pass around:
211 *
212 * Align1 operation has a lot of control over input ranges.  Used in
213 * WM programs to implement shaders decomposed into "channel serial"
214 * or "structure of array" form:
215 */
216struct brw_reg {
217   union {
218      struct {
219         enum brw_reg_type type:4;
220         enum brw_reg_file file:3;      /* :2 hardware format */
221         unsigned negate:1;             /* source only */
222         unsigned abs:1;                /* source only */
223         unsigned address_mode:1;       /* relative addressing, hopefully! */
224         unsigned pad0:17;
225         unsigned subnr:5;              /* :1 in align16 */
226      };
227      uint32_t bits;
228   };
229
230   union {
231      struct {
232         unsigned nr;
233         unsigned swizzle:8;      /* src only, align16 only */
234         unsigned writemask:4;    /* dest only, align16 only */
235         int  indirect_offset:10; /* relative addressing offset */
236         unsigned vstride:4;      /* source only */
237         unsigned width:3;        /* src only, align1 only */
238         unsigned hstride:2;      /* align1 only */
239         unsigned pad1:1;
240      };
241
242      double df;
243      uint64_t u64;
244      int64_t d64;
245      float f;
246      int   d;
247      unsigned ud;
248   };
249};
250
251static inline bool
252brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
253{
254   return a->bits == b->bits && a->u64 == b->u64;
255}
256
257static inline bool
258brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b)
259{
260   if (a->file == IMM) {
261      if (a->bits != b->bits)
262         return false;
263
264      switch ((enum brw_reg_type) a->type) {
265      case BRW_REGISTER_TYPE_UQ:
266      case BRW_REGISTER_TYPE_Q:
267         return a->d64 == -b->d64;
268      case BRW_REGISTER_TYPE_DF:
269         return a->df == -b->df;
270      case BRW_REGISTER_TYPE_UD:
271      case BRW_REGISTER_TYPE_D:
272         return a->d == -b->d;
273      case BRW_REGISTER_TYPE_F:
274         return a->f == -b->f;
275      case BRW_REGISTER_TYPE_VF:
276         /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation
277          * of -0).  There are occasions where 0 or -0 is used and the exact
278          * bit pattern is desired.  At the very least, changing this to allow
279          * 0 as a negation of 0 causes some fp64 tests to fail on IVB.
280          */
281         return a->ud == (b->ud ^ 0x80808080);
282      case BRW_REGISTER_TYPE_UW:
283      case BRW_REGISTER_TYPE_W:
284      case BRW_REGISTER_TYPE_UV:
285      case BRW_REGISTER_TYPE_V:
286      case BRW_REGISTER_TYPE_HF:
287         /* FINISHME: Implement support for these types once there is
288          * something in the compiler that can generate them.  Until then,
289          * they cannot be tested.
290          */
291         return false;
292      case BRW_REGISTER_TYPE_UB:
293      case BRW_REGISTER_TYPE_B:
294      case BRW_REGISTER_TYPE_NF:
295      default:
296         unreachable("not reached");
297      }
298   } else {
299      struct brw_reg tmp = *a;
300
301      tmp.negate = !tmp.negate;
302
303      return brw_regs_equal(&tmp, b);
304   }
305}
306
307struct brw_indirect {
308   unsigned addr_subnr:4;
309   int addr_offset:10;
310   unsigned pad:18;
311};
312
313
314static inline unsigned
315type_sz(unsigned type)
316{
317   switch(type) {
318   case BRW_REGISTER_TYPE_UQ:
319   case BRW_REGISTER_TYPE_Q:
320   case BRW_REGISTER_TYPE_DF:
321   case BRW_REGISTER_TYPE_NF:
322      return 8;
323   case BRW_REGISTER_TYPE_UD:
324   case BRW_REGISTER_TYPE_D:
325   case BRW_REGISTER_TYPE_F:
326   case BRW_REGISTER_TYPE_VF:
327      return 4;
328   case BRW_REGISTER_TYPE_UW:
329   case BRW_REGISTER_TYPE_W:
330   case BRW_REGISTER_TYPE_HF:
331   /* [U]V components are 4-bit, but HW unpacks them to 16-bit (2 bytes) */
332   case BRW_REGISTER_TYPE_UV:
333   case BRW_REGISTER_TYPE_V:
334      return 2;
335   case BRW_REGISTER_TYPE_UB:
336   case BRW_REGISTER_TYPE_B:
337      return 1;
338   default:
339      unreachable("not reached");
340   }
341}
342
343static inline enum brw_reg_type
344get_exec_type(const enum brw_reg_type type)
345{
346   switch (type) {
347   case BRW_REGISTER_TYPE_B:
348   case BRW_REGISTER_TYPE_V:
349      return BRW_REGISTER_TYPE_W;
350   case BRW_REGISTER_TYPE_UB:
351   case BRW_REGISTER_TYPE_UV:
352      return BRW_REGISTER_TYPE_UW;
353   case BRW_REGISTER_TYPE_VF:
354      return BRW_REGISTER_TYPE_F;
355   default:
356      return type;
357   }
358}
359
360/**
361 * Return an integer type of the requested size and signedness.
362 */
363static inline enum brw_reg_type
364brw_int_type(unsigned sz, bool is_signed)
365{
366   switch (sz) {
367   case 1:
368      return (is_signed ? BRW_REGISTER_TYPE_B : BRW_REGISTER_TYPE_UB);
369   case 2:
370      return (is_signed ? BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW);
371   case 4:
372      return (is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD);
373   case 8:
374      return (is_signed ? BRW_REGISTER_TYPE_Q : BRW_REGISTER_TYPE_UQ);
375   default:
376      unreachable("Not reached.");
377   }
378}
379
380/**
381 * Construct a brw_reg.
382 * \param file      one of the BRW_x_REGISTER_FILE values
383 * \param nr        register number/index
384 * \param subnr     register sub number
385 * \param negate    register negate modifier
386 * \param abs       register abs modifier
387 * \param type      one of BRW_REGISTER_TYPE_x
388 * \param vstride   one of BRW_VERTICAL_STRIDE_x
389 * \param width     one of BRW_WIDTH_x
390 * \param hstride   one of BRW_HORIZONTAL_STRIDE_x
391 * \param swizzle   one of BRW_SWIZZLE_x
392 * \param writemask WRITEMASK_X/Y/Z/W bitfield
393 */
394static inline struct brw_reg
395brw_reg(enum brw_reg_file file,
396        unsigned nr,
397        unsigned subnr,
398        unsigned negate,
399        unsigned abs,
400        enum brw_reg_type type,
401        unsigned vstride,
402        unsigned width,
403        unsigned hstride,
404        unsigned swizzle,
405        unsigned writemask)
406{
407   struct brw_reg reg;
408   if (file == BRW_GENERAL_REGISTER_FILE)
409      assert(nr < BRW_MAX_GRF);
410   else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
411      assert(nr <= BRW_ARF_TIMESTAMP);
412   /* Asserting on the MRF register number requires to know the hardware gen
413    * (gfx6 has 24 MRF registers), which we don't know here, so we assert
414    * for that in the generators and in brw_eu_emit.c
415    */
416
417   reg.type = type;
418   reg.file = file;
419   reg.negate = negate;
420   reg.abs = abs;
421   reg.address_mode = BRW_ADDRESS_DIRECT;
422   reg.pad0 = 0;
423   reg.subnr = subnr * type_sz(type);
424   reg.nr = nr;
425
426   /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
427    * set swizzle and writemask to W, as the lower bits of subnr will
428    * be lost when converted to align16.  This is probably too much to
429    * keep track of as you'd want it adjusted by suboffset(), etc.
430    * Perhaps fix up when converting to align16?
431    */
432   reg.swizzle = swizzle;
433   reg.writemask = writemask;
434   reg.indirect_offset = 0;
435   reg.vstride = vstride;
436   reg.width = width;
437   reg.hstride = hstride;
438   reg.pad1 = 0;
439   return reg;
440}
441
442/** Construct float[16] register */
443static inline struct brw_reg
444brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
445{
446   return brw_reg(file,
447                  nr,
448                  subnr,
449                  0,
450                  0,
451                  BRW_REGISTER_TYPE_F,
452                  BRW_VERTICAL_STRIDE_16,
453                  BRW_WIDTH_16,
454                  BRW_HORIZONTAL_STRIDE_1,
455                  BRW_SWIZZLE_XYZW,
456                  WRITEMASK_XYZW);
457}
458
459/** Construct float[8] register */
460static inline struct brw_reg
461brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
462{
463   return brw_reg(file,
464                  nr,
465                  subnr,
466                  0,
467                  0,
468                  BRW_REGISTER_TYPE_F,
469                  BRW_VERTICAL_STRIDE_8,
470                  BRW_WIDTH_8,
471                  BRW_HORIZONTAL_STRIDE_1,
472                  BRW_SWIZZLE_XYZW,
473                  WRITEMASK_XYZW);
474}
475
476/** Construct float[4] register */
477static inline struct brw_reg
478brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
479{
480   return brw_reg(file,
481                  nr,
482                  subnr,
483                  0,
484                  0,
485                  BRW_REGISTER_TYPE_F,
486                  BRW_VERTICAL_STRIDE_4,
487                  BRW_WIDTH_4,
488                  BRW_HORIZONTAL_STRIDE_1,
489                  BRW_SWIZZLE_XYZW,
490                  WRITEMASK_XYZW);
491}
492
493/** Construct float[2] register */
494static inline struct brw_reg
495brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
496{
497   return brw_reg(file,
498                  nr,
499                  subnr,
500                  0,
501                  0,
502                  BRW_REGISTER_TYPE_F,
503                  BRW_VERTICAL_STRIDE_2,
504                  BRW_WIDTH_2,
505                  BRW_HORIZONTAL_STRIDE_1,
506                  BRW_SWIZZLE_XYXY,
507                  WRITEMASK_XY);
508}
509
510/** Construct float[1] register */
511static inline struct brw_reg
512brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
513{
514   return brw_reg(file,
515                  nr,
516                  subnr,
517                  0,
518                  0,
519                  BRW_REGISTER_TYPE_F,
520                  BRW_VERTICAL_STRIDE_0,
521                  BRW_WIDTH_1,
522                  BRW_HORIZONTAL_STRIDE_0,
523                  BRW_SWIZZLE_XXXX,
524                  WRITEMASK_X);
525}
526
527static inline struct brw_reg
528brw_vecn_reg(unsigned width, enum brw_reg_file file,
529             unsigned nr, unsigned subnr)
530{
531   switch (width) {
532   case 1:
533      return brw_vec1_reg(file, nr, subnr);
534   case 2:
535      return brw_vec2_reg(file, nr, subnr);
536   case 4:
537      return brw_vec4_reg(file, nr, subnr);
538   case 8:
539      return brw_vec8_reg(file, nr, subnr);
540   case 16:
541      return brw_vec16_reg(file, nr, subnr);
542   default:
543      unreachable("Invalid register width");
544   }
545}
546
547static inline struct brw_reg
548retype(struct brw_reg reg, enum brw_reg_type type)
549{
550   reg.type = type;
551   return reg;
552}
553
554static inline struct brw_reg
555firsthalf(struct brw_reg reg)
556{
557   return reg;
558}
559
560static inline struct brw_reg
561sechalf(struct brw_reg reg)
562{
563   if (reg.vstride)
564      reg.nr++;
565   return reg;
566}
567
568static inline struct brw_reg
569offset(struct brw_reg reg, unsigned delta)
570{
571   reg.nr += delta;
572   return reg;
573}
574
575
576static inline struct brw_reg
577byte_offset(struct brw_reg reg, unsigned bytes)
578{
579   unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
580   reg.nr = newoffset / REG_SIZE;
581   reg.subnr = newoffset % REG_SIZE;
582   return reg;
583}
584
585static inline struct brw_reg
586suboffset(struct brw_reg reg, unsigned delta)
587{
588   return byte_offset(reg, delta * type_sz(reg.type));
589}
590
591/** Construct unsigned word[16] register */
592static inline struct brw_reg
593brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
594{
595   return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
596}
597
598/** Construct unsigned word[8] register */
599static inline struct brw_reg
600brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
601{
602   return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
603}
604
605/** Construct unsigned word[1] register */
606static inline struct brw_reg
607brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
608{
609   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
610}
611
612static inline struct brw_reg
613brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
614{
615   return retype(brw_vec1_reg(file, nr, subnr), BRW_REGISTER_TYPE_UD);
616}
617
618static inline struct brw_reg
619brw_imm_reg(enum brw_reg_type type)
620{
621   return brw_reg(BRW_IMMEDIATE_VALUE,
622                  0,
623                  0,
624                  0,
625                  0,
626                  type,
627                  BRW_VERTICAL_STRIDE_0,
628                  BRW_WIDTH_1,
629                  BRW_HORIZONTAL_STRIDE_0,
630                  0,
631                  0);
632}
633
634/** Construct float immediate register */
635static inline struct brw_reg
636brw_imm_df(double df)
637{
638   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_DF);
639   imm.df = df;
640   return imm;
641}
642
643static inline struct brw_reg
644brw_imm_u64(uint64_t u64)
645{
646   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ);
647   imm.u64 = u64;
648   return imm;
649}
650
651static inline struct brw_reg
652brw_imm_f(float f)
653{
654   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
655   imm.f = f;
656   return imm;
657}
658
659/** Construct int64_t immediate register */
660static inline struct brw_reg
661brw_imm_q(int64_t q)
662{
663   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_Q);
664   imm.d64 = q;
665   return imm;
666}
667
668/** Construct int64_t immediate register */
669static inline struct brw_reg
670brw_imm_uq(uint64_t uq)
671{
672   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ);
673   imm.u64 = uq;
674   return imm;
675}
676
677/** Construct integer immediate register */
678static inline struct brw_reg
679brw_imm_d(int d)
680{
681   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
682   imm.d = d;
683   return imm;
684}
685
686/** Construct uint immediate register */
687static inline struct brw_reg
688brw_imm_ud(unsigned ud)
689{
690   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
691   imm.ud = ud;
692   return imm;
693}
694
695/** Construct ushort immediate register */
696static inline struct brw_reg
697brw_imm_uw(uint16_t uw)
698{
699   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
700   imm.ud = uw | (uw << 16);
701   return imm;
702}
703
704/** Construct short immediate register */
705static inline struct brw_reg
706brw_imm_w(int16_t w)
707{
708   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
709   imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16;
710   return imm;
711}
712
713/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
714 * numbers alias with _V and _VF below:
715 */
716
717/** Construct vector of eight signed half-byte values */
718static inline struct brw_reg
719brw_imm_v(unsigned v)
720{
721   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
722   imm.ud = v;
723   return imm;
724}
725
726/** Construct vector of eight unsigned half-byte values */
727static inline struct brw_reg
728brw_imm_uv(unsigned uv)
729{
730   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV);
731   imm.ud = uv;
732   return imm;
733}
734
735/** Construct vector of four 8-bit float values */
736static inline struct brw_reg
737brw_imm_vf(unsigned v)
738{
739   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
740   imm.ud = v;
741   return imm;
742}
743
744static inline struct brw_reg
745brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
746{
747   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
748   imm.vstride = BRW_VERTICAL_STRIDE_0;
749   imm.width = BRW_WIDTH_4;
750   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
751   imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
752   return imm;
753}
754
755
756static inline struct brw_reg
757brw_address(struct brw_reg reg)
758{
759   return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
760}
761
762/** Construct float[1] general-purpose register */
763static inline struct brw_reg
764brw_vec1_grf(unsigned nr, unsigned subnr)
765{
766   return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
767}
768
769/** Construct float[2] general-purpose register */
770static inline struct brw_reg
771brw_vec2_grf(unsigned nr, unsigned subnr)
772{
773   return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
774}
775
776/** Construct float[4] general-purpose register */
777static inline struct brw_reg
778brw_vec4_grf(unsigned nr, unsigned subnr)
779{
780   return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
781}
782
783/** Construct float[8] general-purpose register */
784static inline struct brw_reg
785brw_vec8_grf(unsigned nr, unsigned subnr)
786{
787   return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
788}
789
790/** Construct float[16] general-purpose register */
791static inline struct brw_reg
792brw_vec16_grf(unsigned nr, unsigned subnr)
793{
794   return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
795}
796
797static inline struct brw_reg
798brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr)
799{
800   return brw_vecn_reg(width, BRW_GENERAL_REGISTER_FILE, nr, subnr);
801}
802
803
804static inline struct brw_reg
805brw_uw8_grf(unsigned nr, unsigned subnr)
806{
807   return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
808}
809
810static inline struct brw_reg
811brw_uw16_grf(unsigned nr, unsigned subnr)
812{
813   return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
814}
815
816
817/** Construct null register (usually used for setting condition codes) */
818static inline struct brw_reg
819brw_null_reg(void)
820{
821   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
822}
823
824static inline struct brw_reg
825brw_null_vec(unsigned width)
826{
827   return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
828}
829
830static inline struct brw_reg
831brw_address_reg(unsigned subnr)
832{
833   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr);
834}
835
836static inline struct brw_reg
837brw_tdr_reg(void)
838{
839   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_TDR, 0);
840}
841
842/* If/else instructions break in align16 mode if writemask & swizzle
843 * aren't xyzw.  This goes against the convention for other scalar
844 * regs:
845 */
846static inline struct brw_reg
847brw_ip_reg(void)
848{
849   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
850                  BRW_ARF_IP,
851                  0,
852                  0,
853                  0,
854                  BRW_REGISTER_TYPE_UD,
855                  BRW_VERTICAL_STRIDE_4, /* ? */
856                  BRW_WIDTH_1,
857                  BRW_HORIZONTAL_STRIDE_0,
858                  BRW_SWIZZLE_XYZW, /* NOTE! */
859                  WRITEMASK_XYZW); /* NOTE! */
860}
861
862static inline struct brw_reg
863brw_notification_reg(void)
864{
865   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
866                  BRW_ARF_NOTIFICATION_COUNT,
867                  0,
868                  0,
869                  0,
870                  BRW_REGISTER_TYPE_UD,
871                  BRW_VERTICAL_STRIDE_0,
872                  BRW_WIDTH_1,
873                  BRW_HORIZONTAL_STRIDE_0,
874                  BRW_SWIZZLE_XXXX,
875                  WRITEMASK_X);
876}
877
878static inline struct brw_reg
879brw_cr0_reg(unsigned subnr)
880{
881   return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_CONTROL, subnr);
882}
883
884static inline struct brw_reg
885brw_sr0_reg(unsigned subnr)
886{
887   return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, subnr);
888}
889
890static inline struct brw_reg
891brw_acc_reg(unsigned width)
892{
893   return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE,
894                       BRW_ARF_ACCUMULATOR, 0);
895}
896
897static inline struct brw_reg
898brw_flag_reg(int reg, int subreg)
899{
900   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
901                      BRW_ARF_FLAG + reg, subreg);
902}
903
904static inline struct brw_reg
905brw_flag_subreg(unsigned subreg)
906{
907   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
908                      BRW_ARF_FLAG + subreg / 2, subreg % 2);
909}
910
911/**
912 * Return the mask register present in Gfx4-5, or the related register present
913 * in Gfx7.5 and later hardware referred to as "channel enable" register in
914 * the documentation.
915 */
916static inline struct brw_reg
917brw_mask_reg(unsigned subnr)
918{
919   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr);
920}
921
922static inline struct brw_reg
923brw_vmask_reg()
924{
925   return brw_sr0_reg(3);
926}
927
928static inline struct brw_reg
929brw_dmask_reg()
930{
931   return brw_sr0_reg(2);
932}
933
934static inline struct brw_reg
935brw_mask_stack_reg(unsigned subnr)
936{
937   return suboffset(retype(brw_vec16_reg(BRW_ARCHITECTURE_REGISTER_FILE,
938                                         BRW_ARF_MASK_STACK, 0),
939                           BRW_REGISTER_TYPE_UB), subnr);
940}
941
942static inline struct brw_reg
943brw_mask_stack_depth_reg(unsigned subnr)
944{
945   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
946                      BRW_ARF_MASK_STACK_DEPTH, subnr);
947}
948
949static inline struct brw_reg
950brw_message_reg(unsigned nr)
951{
952   return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
953}
954
955static inline struct brw_reg
956brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr)
957{
958   return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr),
959                 BRW_REGISTER_TYPE_UD);
960}
961
962/* This is almost always called with a numeric constant argument, so
963 * make things easy to evaluate at compile time:
964 */
965static inline unsigned cvt(unsigned val)
966{
967   switch (val) {
968   case 0: return 0;
969   case 1: return 1;
970   case 2: return 2;
971   case 4: return 3;
972   case 8: return 4;
973   case 16: return 5;
974   case 32: return 6;
975   }
976   return 0;
977}
978
979static inline struct brw_reg
980stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
981{
982   reg.vstride = cvt(vstride);
983   reg.width = cvt(width) - 1;
984   reg.hstride = cvt(hstride);
985   return reg;
986}
987
988/**
989 * Multiply the vertical and horizontal stride of a register by the given
990 * factor \a s.
991 */
992static inline struct brw_reg
993spread(struct brw_reg reg, unsigned s)
994{
995   if (s) {
996      assert(util_is_power_of_two_nonzero(s));
997
998      if (reg.hstride)
999         reg.hstride += cvt(s) - 1;
1000
1001      if (reg.vstride)
1002         reg.vstride += cvt(s) - 1;
1003
1004      return reg;
1005   } else {
1006      return stride(reg, 0, 1, 0);
1007   }
1008}
1009
1010/**
1011 * Reinterpret each channel of register \p reg as a vector of values of the
1012 * given smaller type and take the i-th subcomponent from each.
1013 */
1014static inline struct brw_reg
1015subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i)
1016{
1017   unsigned scale = type_sz(reg.type) / type_sz(type);
1018   assert(scale >= 1 && i < scale);
1019
1020   if (reg.file == IMM) {
1021      unsigned bit_size = type_sz(type) * 8;
1022      reg.u64 >>= i * bit_size;
1023      reg.u64 &= BITFIELD64_MASK(bit_size);
1024      if (bit_size <= 16)
1025         reg.u64 |= reg.u64 << 16;
1026      return retype(reg, type);
1027   }
1028
1029   return suboffset(retype(spread(reg, scale), type), i);
1030}
1031
1032static inline struct brw_reg
1033vec16(struct brw_reg reg)
1034{
1035   return stride(reg, 16,16,1);
1036}
1037
1038static inline struct brw_reg
1039vec8(struct brw_reg reg)
1040{
1041   return stride(reg, 8,8,1);
1042}
1043
1044static inline struct brw_reg
1045vec4(struct brw_reg reg)
1046{
1047   return stride(reg, 4,4,1);
1048}
1049
1050static inline struct brw_reg
1051vec2(struct brw_reg reg)
1052{
1053   return stride(reg, 2,2,1);
1054}
1055
1056static inline struct brw_reg
1057vec1(struct brw_reg reg)
1058{
1059   return stride(reg, 0,1,0);
1060}
1061
1062
1063static inline struct brw_reg
1064get_element(struct brw_reg reg, unsigned elt)
1065{
1066   return vec1(suboffset(reg, elt));
1067}
1068
1069static inline struct brw_reg
1070get_element_ud(struct brw_reg reg, unsigned elt)
1071{
1072   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
1073}
1074
1075static inline struct brw_reg
1076get_element_d(struct brw_reg reg, unsigned elt)
1077{
1078   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
1079}
1080
1081static inline struct brw_reg
1082brw_swizzle(struct brw_reg reg, unsigned swz)
1083{
1084   if (reg.file == BRW_IMMEDIATE_VALUE)
1085      reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz);
1086   else
1087      reg.swizzle = brw_compose_swizzle(swz, reg.swizzle);
1088
1089   return reg;
1090}
1091
1092static inline struct brw_reg
1093brw_writemask(struct brw_reg reg, unsigned mask)
1094{
1095   assert(reg.file != BRW_IMMEDIATE_VALUE);
1096   reg.writemask &= mask;
1097   return reg;
1098}
1099
1100static inline struct brw_reg
1101brw_set_writemask(struct brw_reg reg, unsigned mask)
1102{
1103   assert(reg.file != BRW_IMMEDIATE_VALUE);
1104   reg.writemask = mask;
1105   return reg;
1106}
1107
1108static inline unsigned
1109brw_writemask_for_size(unsigned n)
1110{
1111   return (1 << n) - 1;
1112}
1113
1114static inline unsigned
1115brw_writemask_for_component_packing(unsigned n, unsigned first_component)
1116{
1117   assert(first_component + n <= 4);
1118   return (((1 << n) - 1) << first_component);
1119}
1120
1121static inline struct brw_reg
1122negate(struct brw_reg reg)
1123{
1124   reg.negate ^= 1;
1125   return reg;
1126}
1127
1128static inline struct brw_reg
1129brw_abs(struct brw_reg reg)
1130{
1131   reg.abs = 1;
1132   reg.negate = 0;
1133   return reg;
1134}
1135
1136/************************************************************************/
1137
1138static inline struct brw_reg
1139brw_vec4_indirect(unsigned subnr, int offset)
1140{
1141   struct brw_reg reg =  brw_vec4_grf(0, 0);
1142   reg.subnr = subnr;
1143   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1144   reg.indirect_offset = offset;
1145   return reg;
1146}
1147
1148static inline struct brw_reg
1149brw_vec1_indirect(unsigned subnr, int offset)
1150{
1151   struct brw_reg reg =  brw_vec1_grf(0, 0);
1152   reg.subnr = subnr;
1153   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1154   reg.indirect_offset = offset;
1155   return reg;
1156}
1157
1158static inline struct brw_reg
1159brw_VxH_indirect(unsigned subnr, int offset)
1160{
1161   struct brw_reg reg = brw_vec1_grf(0, 0);
1162   reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
1163   reg.subnr = subnr;
1164   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1165   reg.indirect_offset = offset;
1166   return reg;
1167}
1168
1169static inline struct brw_reg
1170deref_4f(struct brw_indirect ptr, int offset)
1171{
1172   return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1173}
1174
1175static inline struct brw_reg
1176deref_1f(struct brw_indirect ptr, int offset)
1177{
1178   return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1179}
1180
1181static inline struct brw_reg
1182deref_4b(struct brw_indirect ptr, int offset)
1183{
1184   return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
1185}
1186
1187static inline struct brw_reg
1188deref_1uw(struct brw_indirect ptr, int offset)
1189{
1190   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
1191}
1192
1193static inline struct brw_reg
1194deref_1d(struct brw_indirect ptr, int offset)
1195{
1196   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
1197}
1198
1199static inline struct brw_reg
1200deref_1ud(struct brw_indirect ptr, int offset)
1201{
1202   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
1203}
1204
1205static inline struct brw_reg
1206get_addr_reg(struct brw_indirect ptr)
1207{
1208   return brw_address_reg(ptr.addr_subnr);
1209}
1210
1211static inline struct brw_indirect
1212brw_indirect_offset(struct brw_indirect ptr, int offset)
1213{
1214   ptr.addr_offset += offset;
1215   return ptr;
1216}
1217
1218static inline struct brw_indirect
1219brw_indirect(unsigned addr_subnr, int offset)
1220{
1221   struct brw_indirect ptr;
1222   ptr.addr_subnr = addr_subnr;
1223   ptr.addr_offset = offset;
1224   ptr.pad = 0;
1225   return ptr;
1226}
1227
1228static inline bool
1229region_matches(struct brw_reg reg, enum brw_vertical_stride v,
1230               enum brw_width w, enum brw_horizontal_stride h)
1231{
1232   return reg.vstride == v &&
1233          reg.width == w &&
1234          reg.hstride == h;
1235}
1236
1237#define has_scalar_region(reg) \
1238   region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \
1239                  BRW_HORIZONTAL_STRIDE_0)
1240
1241/**
1242 * Return the size in bytes per data element of register \p reg on the
1243 * corresponding register file.
1244 */
1245static inline unsigned
1246element_sz(struct brw_reg reg)
1247{
1248   if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) {
1249      return type_sz(reg.type);
1250
1251   } else if (reg.width == BRW_WIDTH_1 &&
1252              reg.hstride == BRW_HORIZONTAL_STRIDE_0) {
1253      assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
1254      return type_sz(reg.type) << (reg.vstride - 1);
1255
1256   } else {
1257      assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0);
1258      assert(reg.vstride == reg.hstride + reg.width);
1259      return type_sz(reg.type) << (reg.hstride - 1);
1260   }
1261}
1262
1263/* brw_packed_float.c */
1264int brw_float_to_vf(float f);
1265float brw_vf_to_float(unsigned char vf);
1266
1267#ifdef __cplusplus
1268}
1269#endif
1270
1271#endif
1272