1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32
33#include "brw_eu_defines.h"
34#include "brw_eu.h"
35
36#include "util/ralloc.h"
37
38/**
39 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
40 * registers, implicitly moving the operand to a message register.
41 *
42 * On Sandybridge, this is no longer the case.  This function performs the
43 * explicit move; it should be called before emitting a SEND instruction.
44 */
45void
46gfx6_resolve_implied_move(struct brw_codegen *p,
47			  struct brw_reg *src,
48			  unsigned msg_reg_nr)
49{
50   const struct intel_device_info *devinfo = p->devinfo;
51   if (devinfo->ver < 6)
52      return;
53
54   if (src->file == BRW_MESSAGE_REGISTER_FILE)
55      return;
56
57   if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
58      assert(devinfo->ver < 12);
59      brw_push_insn_state(p);
60      brw_set_default_exec_size(p, BRW_EXECUTE_8);
61      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
62      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
63      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
64	      retype(*src, BRW_REGISTER_TYPE_UD));
65      brw_pop_insn_state(p);
66   }
67   *src = brw_message_reg(msg_reg_nr);
68}
69
70static void
71gfx7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
72{
73   /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
74    * "The send with EOT should use register space R112-R127 for <src>. This is
75    *  to enable loading of a new thread into the same slot while the message
76    *  with EOT for current thread is pending dispatch."
77    *
78    * Since we're pretending to have 16 MRFs anyway, we may as well use the
79    * registers required for messages with EOT.
80    */
81   const struct intel_device_info *devinfo = p->devinfo;
82   if (devinfo->ver >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
83      reg->file = BRW_GENERAL_REGISTER_FILE;
84      reg->nr += GFX7_MRF_HACK_START;
85   }
86}
87
88void
89brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
90{
91   const struct intel_device_info *devinfo = p->devinfo;
92
93   if (dest.file == BRW_MESSAGE_REGISTER_FILE)
94      assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver));
95   else if (dest.file == BRW_GENERAL_REGISTER_FILE)
96      assert(dest.nr < 128);
97
98   /* The hardware has a restriction where a destination of size Byte with
99    * a stride of 1 is only allowed for a packed byte MOV. For any other
100    * instruction, the stride must be at least 2, even when the destination
101    * is the NULL register.
102    */
103   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
104       dest.nr == BRW_ARF_NULL &&
105       type_sz(dest.type) == 1 &&
106       dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
107      dest.hstride = BRW_HORIZONTAL_STRIDE_2;
108   }
109
110   gfx7_convert_mrf_to_grf(p, &dest);
111
112   if (devinfo->ver >= 12 &&
113       (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
114        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
115      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
116             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
117      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
118      assert(dest.subnr == 0);
119      assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
120             (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
121              dest.vstride == dest.width + 1));
122      assert(!dest.negate && !dest.abs);
123      brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
124      brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
125
126   } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
127              brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
128      assert(devinfo->ver < 12);
129      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
130             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
131      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
132      assert(dest.subnr % 16 == 0);
133      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
134             dest.vstride == dest.width + 1);
135      assert(!dest.negate && !dest.abs);
136      brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
137      brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
138      brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
139   } else {
140      brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
141      brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
142
143      if (dest.address_mode == BRW_ADDRESS_DIRECT) {
144         brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
145
146         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
147            brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
148            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
149               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
150            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
151         } else {
152            brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
153            brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
154            if (dest.file == BRW_GENERAL_REGISTER_FILE ||
155                dest.file == BRW_MESSAGE_REGISTER_FILE) {
156               assert(dest.writemask != 0);
157            }
158            /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
159             *    Although Dst.HorzStride is a don't care for Align16, HW needs
160             *    this to be programmed as "01".
161             */
162            brw_inst_set_dst_hstride(devinfo, inst, 1);
163         }
164      } else {
165         brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
166
167         /* These are different sizes in align1 vs align16:
168          */
169         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
170            brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
171                                          dest.indirect_offset);
172            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
173               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
174            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
175         } else {
176            brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
177                                           dest.indirect_offset);
178            /* even ignored in da16, still need to set as '01' */
179            brw_inst_set_dst_hstride(devinfo, inst, 1);
180         }
181      }
182   }
183
184   /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
185    * or 16 (SIMD16), as that's normally correct.  However, when dealing with
186    * small registers, it can be useful for us to automatically reduce it to
187    * match the register size.
188    */
189   if (p->automatic_exec_sizes) {
190      /*
191       * In platforms that support fp64 we can emit instructions with a width
192       * of 4 that need two SIMD8 registers and an exec_size of 8 or 16. In
193       * these cases we need to make sure that these instructions have their
194       * exec sizes set properly when they are emitted and we can't rely on
195       * this code to fix it.
196       */
197      bool fix_exec_size;
198      if (devinfo->ver >= 6)
199         fix_exec_size = dest.width < BRW_EXECUTE_4;
200      else
201         fix_exec_size = dest.width < BRW_EXECUTE_8;
202
203      if (fix_exec_size)
204         brw_inst_set_exec_size(devinfo, inst, dest.width);
205   }
206}
207
208void
209brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
210{
211   const struct intel_device_info *devinfo = p->devinfo;
212
213   if (reg.file == BRW_MESSAGE_REGISTER_FILE)
214      assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver));
215   else if (reg.file == BRW_GENERAL_REGISTER_FILE)
216      assert(reg.nr < 128);
217
218   gfx7_convert_mrf_to_grf(p, &reg);
219
220   if (devinfo->ver >= 6 &&
221       (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
222        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
223        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
224        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC)) {
225      /* Any source modifiers or regions will be ignored, since this just
226       * identifies the MRF/GRF to start reading the message contents from.
227       * Check for some likely failures.
228       */
229      assert(!reg.negate);
230      assert(!reg.abs);
231      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
232   }
233
234   if (devinfo->ver >= 12 &&
235       (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
236        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
237      assert(reg.file != BRW_IMMEDIATE_VALUE);
238      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
239      assert(reg.subnr == 0);
240      assert(has_scalar_region(reg) ||
241             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
242              reg.vstride == reg.width + 1));
243      assert(!reg.negate && !reg.abs);
244      brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
245      brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
246
247   } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
248              brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
249      assert(reg.file == BRW_GENERAL_REGISTER_FILE);
250      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
251      assert(reg.subnr % 16 == 0);
252      assert(has_scalar_region(reg) ||
253             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
254              reg.vstride == reg.width + 1));
255      assert(!reg.negate && !reg.abs);
256      brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
257      brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
258   } else {
259      brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
260      brw_inst_set_src0_abs(devinfo, inst, reg.abs);
261      brw_inst_set_src0_negate(devinfo, inst, reg.negate);
262      brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
263
264      if (reg.file == BRW_IMMEDIATE_VALUE) {
265         if (reg.type == BRW_REGISTER_TYPE_DF ||
266             brw_inst_opcode(p->isa, inst) == BRW_OPCODE_DIM)
267            brw_inst_set_imm_df(devinfo, inst, reg.df);
268         else if (reg.type == BRW_REGISTER_TYPE_UQ ||
269                  reg.type == BRW_REGISTER_TYPE_Q)
270            brw_inst_set_imm_uq(devinfo, inst, reg.u64);
271         else
272            brw_inst_set_imm_ud(devinfo, inst, reg.ud);
273
274         if (devinfo->ver < 12 && type_sz(reg.type) < 8) {
275            brw_inst_set_src1_reg_file(devinfo, inst,
276                                       BRW_ARCHITECTURE_REGISTER_FILE);
277            brw_inst_set_src1_reg_hw_type(devinfo, inst,
278                                          brw_inst_src0_reg_hw_type(devinfo, inst));
279         }
280      } else {
281         if (reg.address_mode == BRW_ADDRESS_DIRECT) {
282            brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
283            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
284                brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
285            } else {
286               brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
287            }
288         } else {
289            brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
290
291            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
292               brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
293            } else {
294               brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
295            }
296         }
297
298         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
299            if (reg.width == BRW_WIDTH_1 &&
300                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
301               brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
302               brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
303               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
304            } else {
305               brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
306               brw_inst_set_src0_width(devinfo, inst, reg.width);
307               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
308            }
309         } else {
310            brw_inst_set_src0_da16_swiz_x(devinfo, inst,
311               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
312            brw_inst_set_src0_da16_swiz_y(devinfo, inst,
313               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
314            brw_inst_set_src0_da16_swiz_z(devinfo, inst,
315               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
316            brw_inst_set_src0_da16_swiz_w(devinfo, inst,
317               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
318
319            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
320               /* This is an oddity of the fact we're using the same
321                * descriptions for registers in align_16 as align_1:
322                */
323               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
324            } else if (devinfo->verx10 == 70 &&
325                       reg.type == BRW_REGISTER_TYPE_DF &&
326                       reg.vstride == BRW_VERTICAL_STRIDE_2) {
327               /* From SNB PRM:
328                *
329                * "For Align16 access mode, only encodings of 0000 and 0011
330                *  are allowed. Other codes are reserved."
331                *
332                * Presumably the DevSNB behavior applies to IVB as well.
333                */
334               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
335            } else {
336               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
337            }
338         }
339      }
340   }
341}
342
343
344void
345brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
346{
347   const struct intel_device_info *devinfo = p->devinfo;
348
349   if (reg.file == BRW_GENERAL_REGISTER_FILE)
350      assert(reg.nr < 128);
351
352   if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
353       brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
354       (devinfo->ver >= 12 &&
355        (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
356         brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
357      assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
358             reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
359      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
360      assert(reg.subnr == 0);
361      assert(has_scalar_region(reg) ||
362             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
363              reg.vstride == reg.width + 1));
364      assert(!reg.negate && !reg.abs);
365      brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
366      brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
367   } else {
368      /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
369       *
370       *    "Accumulator registers may be accessed explicitly as src0
371       *    operands only."
372       */
373      assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
374             reg.nr != BRW_ARF_ACCUMULATOR);
375
376      gfx7_convert_mrf_to_grf(p, &reg);
377      assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
378
379      brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
380      brw_inst_set_src1_abs(devinfo, inst, reg.abs);
381      brw_inst_set_src1_negate(devinfo, inst, reg.negate);
382
383      /* Only src1 can be immediate in two-argument instructions.
384       */
385      assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
386
387      if (reg.file == BRW_IMMEDIATE_VALUE) {
388         /* two-argument instructions can only use 32-bit immediates */
389         assert(type_sz(reg.type) < 8);
390         brw_inst_set_imm_ud(devinfo, inst, reg.ud);
391      } else {
392         /* This is a hardware restriction, which may or may not be lifted
393          * in the future:
394          */
395         assert (reg.address_mode == BRW_ADDRESS_DIRECT);
396         /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
397
398         brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
399         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
400            brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
401         } else {
402            brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
403         }
404
405         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
406            if (reg.width == BRW_WIDTH_1 &&
407                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
408               brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
409               brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
410               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
411            } else {
412               brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
413               brw_inst_set_src1_width(devinfo, inst, reg.width);
414               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
415            }
416         } else {
417            brw_inst_set_src1_da16_swiz_x(devinfo, inst,
418               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
419            brw_inst_set_src1_da16_swiz_y(devinfo, inst,
420               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
421            brw_inst_set_src1_da16_swiz_z(devinfo, inst,
422               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
423            brw_inst_set_src1_da16_swiz_w(devinfo, inst,
424               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
425
426            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
427               /* This is an oddity of the fact we're using the same
428                * descriptions for registers in align_16 as align_1:
429                */
430               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
431            } else if (devinfo->verx10 == 70 &&
432                       reg.type == BRW_REGISTER_TYPE_DF &&
433                       reg.vstride == BRW_VERTICAL_STRIDE_2) {
434               /* From SNB PRM:
435                *
436                * "For Align16 access mode, only encodings of 0000 and 0011
437                *  are allowed. Other codes are reserved."
438                *
439                * Presumably the DevSNB behavior applies to IVB as well.
440                */
441               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
442            } else {
443               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
444            }
445         }
446      }
447   }
448}
449
450/**
451 * Specify the descriptor and extended descriptor immediate for a SEND(C)
452 * message instruction.
453 */
454void
455brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
456                unsigned desc, unsigned ex_desc)
457{
458   const struct intel_device_info *devinfo = p->devinfo;
459   assert(brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
460          brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
461   if (devinfo->ver < 12)
462      brw_inst_set_src1_file_type(devinfo, inst,
463                                  BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD);
464   brw_inst_set_send_desc(devinfo, inst, desc);
465   if (devinfo->ver >= 9)
466      brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
467}
468
469static void brw_set_math_message( struct brw_codegen *p,
470				  brw_inst *inst,
471				  unsigned function,
472				  unsigned integer_type,
473				  bool low_precision,
474				  unsigned dataType )
475{
476   const struct intel_device_info *devinfo = p->devinfo;
477   unsigned msg_length;
478   unsigned response_length;
479
480   /* Infer message length from the function */
481   switch (function) {
482   case BRW_MATH_FUNCTION_POW:
483   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
484   case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
485   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
486      msg_length = 2;
487      break;
488   default:
489      msg_length = 1;
490      break;
491   }
492
493   /* Infer response length from the function */
494   switch (function) {
495   case BRW_MATH_FUNCTION_SINCOS:
496   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
497      response_length = 2;
498      break;
499   default:
500      response_length = 1;
501      break;
502   }
503
504   brw_set_desc(p, inst, brw_message_desc(
505                   devinfo, msg_length, response_length, false));
506
507   brw_inst_set_sfid(devinfo, inst, BRW_SFID_MATH);
508   brw_inst_set_math_msg_function(devinfo, inst, function);
509   brw_inst_set_math_msg_signed_int(devinfo, inst, integer_type);
510   brw_inst_set_math_msg_precision(devinfo, inst, low_precision);
511   brw_inst_set_math_msg_saturate(devinfo, inst, brw_inst_saturate(devinfo, inst));
512   brw_inst_set_math_msg_data_type(devinfo, inst, dataType);
513   brw_inst_set_saturate(devinfo, inst, 0);
514}
515
516
517static void brw_set_ff_sync_message(struct brw_codegen *p,
518				    brw_inst *insn,
519				    bool allocate,
520				    unsigned response_length,
521				    bool end_of_thread)
522{
523   const struct intel_device_info *devinfo = p->devinfo;
524
525   brw_set_desc(p, insn, brw_message_desc(
526                   devinfo, 1, response_length, true));
527
528   brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
529   brw_inst_set_eot(devinfo, insn, end_of_thread);
530   brw_inst_set_urb_opcode(devinfo, insn, 1); /* FF_SYNC */
531   brw_inst_set_urb_allocate(devinfo, insn, allocate);
532   /* The following fields are not used by FF_SYNC: */
533   brw_inst_set_urb_global_offset(devinfo, insn, 0);
534   brw_inst_set_urb_swizzle_control(devinfo, insn, 0);
535   brw_inst_set_urb_used(devinfo, insn, 0);
536   brw_inst_set_urb_complete(devinfo, insn, 0);
537}
538
539static void brw_set_urb_message( struct brw_codegen *p,
540				 brw_inst *insn,
541                                 enum brw_urb_write_flags flags,
542				 unsigned msg_length,
543				 unsigned response_length,
544				 unsigned offset,
545				 unsigned swizzle_control )
546{
547   const struct intel_device_info *devinfo = p->devinfo;
548
549   assert(devinfo->ver < 7 || swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
550   assert(devinfo->ver < 7 || !(flags & BRW_URB_WRITE_ALLOCATE));
551   assert(devinfo->ver >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
552
553   brw_set_desc(p, insn, brw_message_desc(
554                   devinfo, msg_length, response_length, true));
555
556   brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
557   brw_inst_set_eot(devinfo, insn, !!(flags & BRW_URB_WRITE_EOT));
558
559   if (flags & BRW_URB_WRITE_OWORD) {
560      assert(msg_length == 2); /* header + one OWORD of data */
561      brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_OWORD);
562   } else {
563      brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_HWORD);
564   }
565
566   brw_inst_set_urb_global_offset(devinfo, insn, offset);
567   brw_inst_set_urb_swizzle_control(devinfo, insn, swizzle_control);
568
569   if (devinfo->ver < 8) {
570      brw_inst_set_urb_complete(devinfo, insn, !!(flags & BRW_URB_WRITE_COMPLETE));
571   }
572
573   if (devinfo->ver < 7) {
574      brw_inst_set_urb_allocate(devinfo, insn, !!(flags & BRW_URB_WRITE_ALLOCATE));
575      brw_inst_set_urb_used(devinfo, insn, !(flags & BRW_URB_WRITE_UNUSED));
576   } else {
577      brw_inst_set_urb_per_slot_offset(devinfo, insn,
578         !!(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
579   }
580}
581
582static void
583gfx7_set_dp_scratch_message(struct brw_codegen *p,
584                            brw_inst *inst,
585                            bool write,
586                            bool dword,
587                            bool invalidate_after_read,
588                            unsigned num_regs,
589                            unsigned addr_offset,
590                            unsigned mlen,
591                            unsigned rlen,
592                            bool header_present)
593{
594   const struct intel_device_info *devinfo = p->devinfo;
595   assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
596          (devinfo->ver >= 8 && num_regs == 8));
597   const unsigned block_size = (devinfo->ver >= 8 ? util_logbase2(num_regs) :
598                                num_regs - 1);
599
600   brw_set_desc(p, inst, brw_message_desc(
601                   devinfo, mlen, rlen, header_present));
602
603   brw_inst_set_sfid(devinfo, inst, GFX7_SFID_DATAPORT_DATA_CACHE);
604   brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */
605   brw_inst_set_scratch_read_write(devinfo, inst, write);
606   brw_inst_set_scratch_type(devinfo, inst, dword);
607   brw_inst_set_scratch_invalidate_after_read(devinfo, inst, invalidate_after_read);
608   brw_inst_set_scratch_block_size(devinfo, inst, block_size);
609   brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
610}
611
612static void
613brw_inst_set_state(const struct brw_isa_info *isa,
614                   brw_inst *insn,
615                   const struct brw_insn_state *state)
616{
617   const struct intel_device_info *devinfo = isa->devinfo;
618
619   brw_inst_set_exec_size(devinfo, insn, state->exec_size);
620   brw_inst_set_group(devinfo, insn, state->group);
621   brw_inst_set_compression(devinfo, insn, state->compressed);
622   brw_inst_set_access_mode(devinfo, insn, state->access_mode);
623   brw_inst_set_mask_control(devinfo, insn, state->mask_control);
624   if (devinfo->ver >= 12)
625      brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb));
626   brw_inst_set_saturate(devinfo, insn, state->saturate);
627   brw_inst_set_pred_control(devinfo, insn, state->predicate);
628   brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
629
630   if (is_3src(isa, brw_inst_opcode(isa, insn)) &&
631       state->access_mode == BRW_ALIGN_16) {
632      brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
633      if (devinfo->ver >= 7)
634         brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
635   } else {
636      brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
637      if (devinfo->ver >= 7)
638         brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
639   }
640
641   if (devinfo->ver >= 6)
642      brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
643}
644
645static brw_inst *
646brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned align)
647{
648   assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
649   assert(util_is_power_of_two_or_zero(align));
650   const unsigned align_insn = MAX2(align / sizeof(brw_inst), 1);
651   const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
652   const unsigned new_nr_insn = start_insn + nr_insn;
653
654   if (p->store_size < new_nr_insn) {
655      p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
656      p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
657   }
658
659   /* Memset any padding due to alignment to 0.  We don't want to be hashing
660    * or caching a bunch of random bits we got from a memory allocation.
661    */
662   if (p->nr_insn < start_insn) {
663      memset(&p->store[p->nr_insn], 0,
664             (start_insn - p->nr_insn) * sizeof(brw_inst));
665   }
666
667   assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
668   p->nr_insn = new_nr_insn;
669   p->next_insn_offset = new_nr_insn * sizeof(brw_inst);
670
671   return &p->store[start_insn];
672}
673
674void
675brw_realign(struct brw_codegen *p, unsigned align)
676{
677   brw_append_insns(p, 0, align);
678}
679
680int
681brw_append_data(struct brw_codegen *p, void *data,
682                unsigned size, unsigned align)
683{
684   unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
685   void *dst = brw_append_insns(p, nr_insn, align);
686   memcpy(dst, data, size);
687
688   /* If it's not a whole number of instructions, memset the end */
689   if (size < nr_insn * sizeof(brw_inst))
690      memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);
691
692   return dst - (void *)p->store;
693}
694
695#define next_insn brw_next_insn
696brw_inst *
697brw_next_insn(struct brw_codegen *p, unsigned opcode)
698{
699   brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));
700
701   memset(insn, 0, sizeof(*insn));
702   brw_inst_set_opcode(p->isa, insn, opcode);
703
704   /* Apply the default instruction state */
705   brw_inst_set_state(p->isa, insn, p->current);
706
707   return insn;
708}
709
710void
711brw_add_reloc(struct brw_codegen *p, uint32_t id,
712              enum brw_shader_reloc_type type,
713              uint32_t offset, uint32_t delta)
714{
715   if (p->num_relocs + 1 > p->reloc_array_size) {
716      p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
717      p->relocs = reralloc(p->mem_ctx, p->relocs,
718                           struct brw_shader_reloc, p->reloc_array_size);
719   }
720
721   p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
722      .id = id,
723      .type = type,
724      .offset = offset,
725      .delta = delta,
726   };
727}
728
729static brw_inst *
730brw_alu1(struct brw_codegen *p, unsigned opcode,
731         struct brw_reg dest, struct brw_reg src)
732{
733   brw_inst *insn = next_insn(p, opcode);
734   brw_set_dest(p, insn, dest);
735   brw_set_src0(p, insn, src);
736   return insn;
737}
738
739static brw_inst *
740brw_alu2(struct brw_codegen *p, unsigned opcode,
741         struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
742{
743   /* 64-bit immediates are only supported on 1-src instructions */
744   assert(src0.file != BRW_IMMEDIATE_VALUE || type_sz(src0.type) <= 4);
745   assert(src1.file != BRW_IMMEDIATE_VALUE || type_sz(src1.type) <= 4);
746
747   brw_inst *insn = next_insn(p, opcode);
748   brw_set_dest(p, insn, dest);
749   brw_set_src0(p, insn, src0);
750   brw_set_src1(p, insn, src1);
751   return insn;
752}
753
754static int
755get_3src_subreg_nr(struct brw_reg reg)
756{
757   /* Normally, SubRegNum is in bytes (0..31).  However, 3-src instructions
758    * use 32-bit units (components 0..7).  Since they only support F/D/UD
759    * types, this doesn't lose any flexibility, but uses fewer bits.
760    */
761   return reg.subnr / 4;
762}
763
764static enum gfx10_align1_3src_vertical_stride
765to_3src_align1_vstride(const struct intel_device_info *devinfo,
766                       enum brw_vertical_stride vstride)
767{
768   switch (vstride) {
769   case BRW_VERTICAL_STRIDE_0:
770      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
771   case BRW_VERTICAL_STRIDE_1:
772      assert(devinfo->ver >= 12);
773      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
774   case BRW_VERTICAL_STRIDE_2:
775      assert(devinfo->ver < 12);
776      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
777   case BRW_VERTICAL_STRIDE_4:
778      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
779   case BRW_VERTICAL_STRIDE_8:
780   case BRW_VERTICAL_STRIDE_16:
781      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
782   default:
783      unreachable("invalid vstride");
784   }
785}
786
787
788static enum gfx10_align1_3src_src_horizontal_stride
789to_3src_align1_hstride(enum brw_horizontal_stride hstride)
790{
791   switch (hstride) {
792   case BRW_HORIZONTAL_STRIDE_0:
793      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
794   case BRW_HORIZONTAL_STRIDE_1:
795      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
796   case BRW_HORIZONTAL_STRIDE_2:
797      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
798   case BRW_HORIZONTAL_STRIDE_4:
799      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
800   default:
801      unreachable("invalid hstride");
802   }
803}
804
805static brw_inst *
806brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
807         struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
808{
809   const struct intel_device_info *devinfo = p->devinfo;
810   brw_inst *inst = next_insn(p, opcode);
811
812   gfx7_convert_mrf_to_grf(p, &dest);
813
814   assert(dest.nr < 128);
815
816   if (devinfo->ver >= 10)
817      assert(!(src0.file == BRW_IMMEDIATE_VALUE &&
818               src2.file == BRW_IMMEDIATE_VALUE));
819
820   assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128);
821   assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128);
822   assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128);
823   assert(dest.address_mode == BRW_ADDRESS_DIRECT);
824   assert(src0.address_mode == BRW_ADDRESS_DIRECT);
825   assert(src1.address_mode == BRW_ADDRESS_DIRECT);
826   assert(src2.address_mode == BRW_ADDRESS_DIRECT);
827
828   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
829      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
830             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
831
832      if (devinfo->ver >= 12) {
833         brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file);
834         brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
835      } else {
836         if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) {
837            brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
838                                              BRW_ALIGN1_3SRC_ACCUMULATOR);
839            brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
840         } else {
841            brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
842                                              BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
843            brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
844         }
845      }
846      brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8);
847
848      brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
849
850      if (brw_reg_type_is_floating_point(dest.type)) {
851         brw_inst_set_3src_a1_exec_type(devinfo, inst,
852                                        BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
853      } else {
854         brw_inst_set_3src_a1_exec_type(devinfo, inst,
855                                        BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
856      }
857
858      brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
859      brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
860      brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
861      brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
862
863      if (src0.file == BRW_IMMEDIATE_VALUE) {
864         brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
865      } else {
866         brw_inst_set_3src_a1_src0_vstride(
867            devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
868         brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
869                                           to_3src_align1_hstride(src0.hstride));
870         brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr);
871         if (src0.type == BRW_REGISTER_TYPE_NF) {
872            brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
873         } else {
874            brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
875         }
876         brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
877         brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
878      }
879      brw_inst_set_3src_a1_src1_vstride(
880         devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
881      brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
882                                        to_3src_align1_hstride(src1.hstride));
883
884      brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr);
885      if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) {
886         brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
887      } else {
888         brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
889      }
890      brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
891      brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
892
893      if (src2.file == BRW_IMMEDIATE_VALUE) {
894         brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
895      } else {
896         brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
897                                           to_3src_align1_hstride(src2.hstride));
898         /* no vstride on src2 */
899         brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr);
900         brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
901         brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
902         brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
903      }
904
905      assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
906             src0.file == BRW_IMMEDIATE_VALUE ||
907             (src0.file == BRW_ARCHITECTURE_REGISTER_FILE &&
908              src0.type == BRW_REGISTER_TYPE_NF));
909      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
910             src1.file == BRW_ARCHITECTURE_REGISTER_FILE);
911      assert(src2.file == BRW_GENERAL_REGISTER_FILE ||
912             src2.file == BRW_IMMEDIATE_VALUE);
913
914      if (devinfo->ver >= 12) {
915         if (src0.file == BRW_IMMEDIATE_VALUE) {
916            brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
917         } else {
918            brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
919         }
920
921         brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
922
923         if (src2.file == BRW_IMMEDIATE_VALUE) {
924            brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
925         } else {
926            brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
927         }
928      } else {
929         brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
930                                            src0.file == BRW_GENERAL_REGISTER_FILE ?
931                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
932                                            BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
933         brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
934                                            src1.file == BRW_GENERAL_REGISTER_FILE ?
935                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
936                                            BRW_ALIGN1_3SRC_ACCUMULATOR);
937         brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
938                                            src2.file == BRW_GENERAL_REGISTER_FILE ?
939                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
940                                            BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
941      }
942
943   } else {
944      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
945             dest.file == BRW_MESSAGE_REGISTER_FILE);
946      assert(dest.type == BRW_REGISTER_TYPE_F  ||
947             dest.type == BRW_REGISTER_TYPE_DF ||
948             dest.type == BRW_REGISTER_TYPE_D  ||
949             dest.type == BRW_REGISTER_TYPE_UD ||
950             (dest.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 8));
951      if (devinfo->ver == 6) {
952         brw_inst_set_3src_a16_dst_reg_file(devinfo, inst,
953                                            dest.file == BRW_MESSAGE_REGISTER_FILE);
954      }
955      brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
956      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
957      brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
958
959      assert(src0.file == BRW_GENERAL_REGISTER_FILE);
960      brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
961      brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
962      brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
963      brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
964      brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
965      brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
966                                          src0.vstride == BRW_VERTICAL_STRIDE_0);
967
968      assert(src1.file == BRW_GENERAL_REGISTER_FILE);
969      brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
970      brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
971      brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
972      brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
973      brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
974      brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
975                                          src1.vstride == BRW_VERTICAL_STRIDE_0);
976
977      assert(src2.file == BRW_GENERAL_REGISTER_FILE);
978      brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
979      brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
980      brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
981      brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
982      brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
983      brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
984                                          src2.vstride == BRW_VERTICAL_STRIDE_0);
985
986      if (devinfo->ver >= 7) {
987         /* Set both the source and destination types based on dest.type,
988          * ignoring the source register types.  The MAD and LRP emitters ensure
989          * that all four types are float.  The BFE and BFI2 emitters, however,
990          * may send us mixed D and UD types and want us to ignore that and use
991          * the destination type.
992          */
993         brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
994         brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
995
996         /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
997          *
998          *    "Three source instructions can use operands with mixed-mode
999          *     precision. When SrcType field is set to :f or :hf it defines
1000          *     precision for source 0 only, and fields Src1Type and Src2Type
1001          *     define precision for other source operands:
1002          *
1003          *     0b = :f. Single precision Float (32-bit).
1004          *     1b = :hf. Half precision Float (16-bit)."
1005          */
1006         if (src1.type == BRW_REGISTER_TYPE_HF)
1007            brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
1008
1009         if (src2.type == BRW_REGISTER_TYPE_HF)
1010            brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
1011      }
1012   }
1013
1014   return inst;
1015}
1016
1017
1018/***********************************************************************
1019 * Convenience routines.
1020 */
1021#define ALU1(OP)					\
1022brw_inst *brw_##OP(struct brw_codegen *p,		\
1023	      struct brw_reg dest,			\
1024	      struct brw_reg src0)   			\
1025{							\
1026   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
1027}
1028
1029#define ALU2(OP)					\
1030brw_inst *brw_##OP(struct brw_codegen *p,		\
1031	      struct brw_reg dest,			\
1032	      struct brw_reg src0,			\
1033	      struct brw_reg src1)   			\
1034{							\
1035   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
1036}
1037
1038#define ALU3(OP)					\
1039brw_inst *brw_##OP(struct brw_codegen *p,		\
1040	      struct brw_reg dest,			\
1041	      struct brw_reg src0,			\
1042	      struct brw_reg src1,			\
1043	      struct brw_reg src2)   			\
1044{                                                       \
1045   if (p->current->access_mode == BRW_ALIGN_16) {       \
1046      if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
1047         src0.swizzle = BRW_SWIZZLE_XXXX;               \
1048      if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
1049         src1.swizzle = BRW_SWIZZLE_XXXX;               \
1050      if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
1051         src2.swizzle = BRW_SWIZZLE_XXXX;               \
1052   }                                                    \
1053   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
1054}
1055
1056#define ALU3F(OP)                                               \
1057brw_inst *brw_##OP(struct brw_codegen *p,         \
1058                                 struct brw_reg dest,           \
1059                                 struct brw_reg src0,           \
1060                                 struct brw_reg src1,           \
1061                                 struct brw_reg src2)           \
1062{                                                               \
1063   assert(dest.type == BRW_REGISTER_TYPE_F ||                   \
1064          dest.type == BRW_REGISTER_TYPE_DF);                   \
1065   if (dest.type == BRW_REGISTER_TYPE_F) {                      \
1066      assert(src0.type == BRW_REGISTER_TYPE_F);                 \
1067      assert(src1.type == BRW_REGISTER_TYPE_F);                 \
1068      assert(src2.type == BRW_REGISTER_TYPE_F);                 \
1069   } else if (dest.type == BRW_REGISTER_TYPE_DF) {              \
1070      assert(src0.type == BRW_REGISTER_TYPE_DF);                \
1071      assert(src1.type == BRW_REGISTER_TYPE_DF);                \
1072      assert(src2.type == BRW_REGISTER_TYPE_DF);                \
1073   }                                                            \
1074                                                                \
1075   if (p->current->access_mode == BRW_ALIGN_16) {               \
1076      if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
1077         src0.swizzle = BRW_SWIZZLE_XXXX;                       \
1078      if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
1079         src1.swizzle = BRW_SWIZZLE_XXXX;                       \
1080      if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
1081         src2.swizzle = BRW_SWIZZLE_XXXX;                       \
1082   }                                                            \
1083   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
1084}
1085
1086ALU2(SEL)
1087ALU1(NOT)
1088ALU2(AND)
1089ALU2(OR)
1090ALU2(XOR)
1091ALU2(SHR)
1092ALU2(SHL)
1093ALU1(DIM)
1094ALU2(ASR)
1095ALU2(ROL)
1096ALU2(ROR)
1097ALU3(CSEL)
1098ALU1(FRC)
1099ALU1(RNDD)
1100ALU1(RNDE)
1101ALU1(RNDU)
1102ALU1(RNDZ)
1103ALU2(MAC)
1104ALU2(MACH)
1105ALU1(LZD)
1106ALU2(DP4)
1107ALU2(DPH)
1108ALU2(DP3)
1109ALU2(DP2)
1110ALU3(DP4A)
1111ALU3(MAD)
1112ALU3F(LRP)
1113ALU1(BFREV)
1114ALU3(BFE)
1115ALU2(BFI1)
1116ALU3(BFI2)
1117ALU1(FBH)
1118ALU1(FBL)
1119ALU1(CBIT)
1120ALU2(ADDC)
1121ALU2(SUBB)
1122ALU3(ADD3)
1123
1124brw_inst *
1125brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
1126{
1127   const struct intel_device_info *devinfo = p->devinfo;
1128
1129   /* When converting F->DF on IVB/BYT, every odd source channel is ignored.
1130    * To avoid the problems that causes, we use an <X,2,0> source region to
1131    * read each element twice.
1132    */
1133   if (devinfo->verx10 == 70 &&
1134       brw_get_default_access_mode(p) == BRW_ALIGN_1 &&
1135       dest.type == BRW_REGISTER_TYPE_DF &&
1136       (src0.type == BRW_REGISTER_TYPE_F ||
1137        src0.type == BRW_REGISTER_TYPE_D ||
1138        src0.type == BRW_REGISTER_TYPE_UD) &&
1139       !has_scalar_region(src0)) {
1140      assert(src0.vstride == src0.width + src0.hstride);
1141      src0.vstride = src0.hstride;
1142      src0.width = BRW_WIDTH_2;
1143      src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1144   }
1145
1146   return brw_alu1(p, BRW_OPCODE_MOV, dest, src0);
1147}
1148
1149brw_inst *
1150brw_ADD(struct brw_codegen *p, struct brw_reg dest,
1151        struct brw_reg src0, struct brw_reg src1)
1152{
1153   /* 6.2.2: add */
1154   if (src0.type == BRW_REGISTER_TYPE_F ||
1155       (src0.file == BRW_IMMEDIATE_VALUE &&
1156	src0.type == BRW_REGISTER_TYPE_VF)) {
1157      assert(src1.type != BRW_REGISTER_TYPE_UD);
1158      assert(src1.type != BRW_REGISTER_TYPE_D);
1159   }
1160
1161   if (src1.type == BRW_REGISTER_TYPE_F ||
1162       (src1.file == BRW_IMMEDIATE_VALUE &&
1163	src1.type == BRW_REGISTER_TYPE_VF)) {
1164      assert(src0.type != BRW_REGISTER_TYPE_UD);
1165      assert(src0.type != BRW_REGISTER_TYPE_D);
1166   }
1167
1168   return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1169}
1170
1171brw_inst *
1172brw_AVG(struct brw_codegen *p, struct brw_reg dest,
1173        struct brw_reg src0, struct brw_reg src1)
1174{
1175   assert(dest.type == src0.type);
1176   assert(src0.type == src1.type);
1177   switch (src0.type) {
1178   case BRW_REGISTER_TYPE_B:
1179   case BRW_REGISTER_TYPE_UB:
1180   case BRW_REGISTER_TYPE_W:
1181   case BRW_REGISTER_TYPE_UW:
1182   case BRW_REGISTER_TYPE_D:
1183   case BRW_REGISTER_TYPE_UD:
1184      break;
1185   default:
1186      unreachable("Bad type for brw_AVG");
1187   }
1188
1189   return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1190}
1191
1192brw_inst *
1193brw_MUL(struct brw_codegen *p, struct brw_reg dest,
1194        struct brw_reg src0, struct brw_reg src1)
1195{
1196   /* 6.32.38: mul */
1197   if (src0.type == BRW_REGISTER_TYPE_D ||
1198       src0.type == BRW_REGISTER_TYPE_UD ||
1199       src1.type == BRW_REGISTER_TYPE_D ||
1200       src1.type == BRW_REGISTER_TYPE_UD) {
1201      assert(dest.type != BRW_REGISTER_TYPE_F);
1202   }
1203
1204   if (src0.type == BRW_REGISTER_TYPE_F ||
1205       (src0.file == BRW_IMMEDIATE_VALUE &&
1206	src0.type == BRW_REGISTER_TYPE_VF)) {
1207      assert(src1.type != BRW_REGISTER_TYPE_UD);
1208      assert(src1.type != BRW_REGISTER_TYPE_D);
1209   }
1210
1211   if (src1.type == BRW_REGISTER_TYPE_F ||
1212       (src1.file == BRW_IMMEDIATE_VALUE &&
1213	src1.type == BRW_REGISTER_TYPE_VF)) {
1214      assert(src0.type != BRW_REGISTER_TYPE_UD);
1215      assert(src0.type != BRW_REGISTER_TYPE_D);
1216   }
1217
1218   assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1219	  src0.nr != BRW_ARF_ACCUMULATOR);
1220   assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1221	  src1.nr != BRW_ARF_ACCUMULATOR);
1222
1223   return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1224}
1225
1226brw_inst *
1227brw_LINE(struct brw_codegen *p, struct brw_reg dest,
1228         struct brw_reg src0, struct brw_reg src1)
1229{
1230   src0.vstride = BRW_VERTICAL_STRIDE_0;
1231   src0.width = BRW_WIDTH_1;
1232   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1233   return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
1234}
1235
1236brw_inst *
1237brw_PLN(struct brw_codegen *p, struct brw_reg dest,
1238        struct brw_reg src0, struct brw_reg src1)
1239{
1240   src0.vstride = BRW_VERTICAL_STRIDE_0;
1241   src0.width = BRW_WIDTH_1;
1242   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1243   src1.vstride = BRW_VERTICAL_STRIDE_8;
1244   src1.width = BRW_WIDTH_8;
1245   src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1246   return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
1247}
1248
1249brw_inst *
1250brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
1251{
1252   const struct intel_device_info *devinfo = p->devinfo;
1253   const bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
1254   /* The F32TO16 instruction doesn't support 32-bit destination types in
1255    * Align1 mode, and neither does the Gfx8 implementation in terms of a
1256    * converting MOV.  Gfx7 does zero out the high 16 bits in Align16 mode as
1257    * an undocumented feature.
1258    */
1259   const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
1260                                 (!align16 || devinfo->ver >= 8));
1261   brw_inst *inst;
1262
1263   if (align16) {
1264      assert(dst.type == BRW_REGISTER_TYPE_UD);
1265   } else {
1266      if (devinfo->ver <= 7) {
1267         assert(dst.type == BRW_REGISTER_TYPE_W ||
1268                dst.type == BRW_REGISTER_TYPE_UW);
1269      } else {
1270         assert(dst.type == BRW_REGISTER_TYPE_HF);
1271      }
1272   }
1273
1274   brw_push_insn_state(p);
1275
1276   if (needs_zero_fill) {
1277      brw_set_default_access_mode(p, BRW_ALIGN_1);
1278      dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
1279   }
1280
1281   if (devinfo->ver >= 8) {
1282      inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
1283   } else {
1284      assert(devinfo->ver == 7);
1285      inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
1286   }
1287
1288   if (needs_zero_fill) {
1289      if (devinfo->ver < 12)
1290         brw_inst_set_no_dd_clear(devinfo, inst, true);
1291      brw_set_default_swsb(p, tgl_swsb_null());
1292      inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0));
1293      if (devinfo->ver < 12)
1294         brw_inst_set_no_dd_check(devinfo, inst, true);
1295   }
1296
1297   brw_pop_insn_state(p);
1298   return inst;
1299}
1300
1301brw_inst *
1302brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
1303{
1304   const struct intel_device_info *devinfo = p->devinfo;
1305   bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
1306
1307   if (align16) {
1308      assert(src.type == BRW_REGISTER_TYPE_UD);
1309   } else {
1310      /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1311       *
1312       *   Because this instruction does not have a 16-bit floating-point
1313       *   type, the source data type must be Word (W). The destination type
1314       *   must be F (Float).
1315       */
1316      if (src.type == BRW_REGISTER_TYPE_UD)
1317         src = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
1318
1319      assert(src.type == BRW_REGISTER_TYPE_W ||
1320             src.type == BRW_REGISTER_TYPE_UW ||
1321             src.type == BRW_REGISTER_TYPE_HF);
1322   }
1323
1324   if (devinfo->ver >= 8) {
1325      return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF));
1326   } else {
1327      assert(devinfo->ver == 7);
1328      return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src);
1329   }
1330}
1331
1332
1333void brw_NOP(struct brw_codegen *p)
1334{
1335   brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1336   memset(insn, 0, sizeof(*insn));
1337   brw_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
1338}
1339
1340void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
1341{
1342   brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
1343   brw_inst_set_cond_modifier(p->devinfo, insn, func);
1344}
1345
1346/***********************************************************************
1347 * Comparisons, if/else/endif
1348 */
1349
1350brw_inst *
1351brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1352         unsigned predicate_control)
1353{
1354   const struct intel_device_info *devinfo = p->devinfo;
1355   struct brw_reg ip = brw_ip_reg();
1356   brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1357
1358   brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
1359   brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1360   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1361   brw_inst_set_pred_control(devinfo, inst, predicate_control);
1362
1363   return inst;
1364}
1365
1366static void
1367push_if_stack(struct brw_codegen *p, brw_inst *inst)
1368{
1369   p->if_stack[p->if_stack_depth] = inst - p->store;
1370
1371   p->if_stack_depth++;
1372   if (p->if_stack_array_size <= p->if_stack_depth) {
1373      p->if_stack_array_size *= 2;
1374      p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1375			     p->if_stack_array_size);
1376   }
1377}
1378
1379static brw_inst *
1380pop_if_stack(struct brw_codegen *p)
1381{
1382   p->if_stack_depth--;
1383   return &p->store[p->if_stack[p->if_stack_depth]];
1384}
1385
1386static void
1387push_loop_stack(struct brw_codegen *p, brw_inst *inst)
1388{
1389   if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
1390      p->loop_stack_array_size *= 2;
1391      p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1392			       p->loop_stack_array_size);
1393      p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1394				     p->loop_stack_array_size);
1395   }
1396
1397   p->loop_stack[p->loop_stack_depth] = inst - p->store;
1398   p->loop_stack_depth++;
1399   p->if_depth_in_loop[p->loop_stack_depth] = 0;
1400}
1401
1402static brw_inst *
1403get_inner_do_insn(struct brw_codegen *p)
1404{
1405   return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1406}
1407
1408/* EU takes the value from the flag register and pushes it onto some
1409 * sort of a stack (presumably merging with any flag value already on
1410 * the stack).  Within an if block, the flags at the top of the stack
1411 * control execution on each channel of the unit, eg. on each of the
1412 * 16 pixel values in our wm programs.
1413 *
1414 * When the matching 'else' instruction is reached (presumably by
1415 * countdown of the instruction count patched in by our ELSE/ENDIF
1416 * functions), the relevant flags are inverted.
1417 *
1418 * When the matching 'endif' instruction is reached, the flags are
1419 * popped off.  If the stack is now empty, normal execution resumes.
1420 */
1421brw_inst *
1422brw_IF(struct brw_codegen *p, unsigned execute_size)
1423{
1424   const struct intel_device_info *devinfo = p->devinfo;
1425   brw_inst *insn;
1426
1427   insn = next_insn(p, BRW_OPCODE_IF);
1428
1429   /* Override the defaults for this instruction:
1430    */
1431   if (devinfo->ver < 6) {
1432      brw_set_dest(p, insn, brw_ip_reg());
1433      brw_set_src0(p, insn, brw_ip_reg());
1434      brw_set_src1(p, insn, brw_imm_d(0x0));
1435   } else if (devinfo->ver == 6) {
1436      brw_set_dest(p, insn, brw_imm_w(0));
1437      brw_inst_set_gfx6_jump_count(devinfo, insn, 0);
1438      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1439      brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1440   } else if (devinfo->ver == 7) {
1441      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1442      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1443      brw_set_src1(p, insn, brw_imm_w(0));
1444      brw_inst_set_jip(devinfo, insn, 0);
1445      brw_inst_set_uip(devinfo, insn, 0);
1446   } else {
1447      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1448      if (devinfo->ver < 12)
1449         brw_set_src0(p, insn, brw_imm_d(0));
1450      brw_inst_set_jip(devinfo, insn, 0);
1451      brw_inst_set_uip(devinfo, insn, 0);
1452   }
1453
1454   brw_inst_set_exec_size(devinfo, insn, execute_size);
1455   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1456   brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1457   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1458   if (!p->single_program_flow && devinfo->ver < 6)
1459      brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1460
1461   push_if_stack(p, insn);
1462   p->if_depth_in_loop[p->loop_stack_depth]++;
1463   return insn;
1464}
1465
1466/* This function is only used for gfx6-style IF instructions with an
1467 * embedded comparison (conditional modifier).  It is not used on gfx7.
1468 */
1469brw_inst *
1470gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
1471	struct brw_reg src0, struct brw_reg src1)
1472{
1473   const struct intel_device_info *devinfo = p->devinfo;
1474   brw_inst *insn;
1475
1476   insn = next_insn(p, BRW_OPCODE_IF);
1477
1478   brw_set_dest(p, insn, brw_imm_w(0));
1479   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1480   brw_inst_set_gfx6_jump_count(devinfo, insn, 0);
1481   brw_set_src0(p, insn, src0);
1482   brw_set_src1(p, insn, src1);
1483
1484   assert(brw_inst_qtr_control(devinfo, insn) == BRW_COMPRESSION_NONE);
1485   assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
1486   brw_inst_set_cond_modifier(devinfo, insn, conditional);
1487
1488   push_if_stack(p, insn);
1489   return insn;
1490}
1491
1492/**
1493 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1494 */
1495static void
1496convert_IF_ELSE_to_ADD(struct brw_codegen *p,
1497                       brw_inst *if_inst, brw_inst *else_inst)
1498{
1499   const struct intel_device_info *devinfo = p->devinfo;
1500
1501   /* The next instruction (where the ENDIF would be, if it existed) */
1502   brw_inst *next_inst = &p->store[p->nr_insn];
1503
1504   assert(p->single_program_flow);
1505   assert(if_inst != NULL && brw_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
1506   assert(else_inst == NULL || brw_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
1507   assert(brw_inst_exec_size(devinfo, if_inst) == BRW_EXECUTE_1);
1508
1509   /* Convert IF to an ADD instruction that moves the instruction pointer
1510    * to the first instruction of the ELSE block.  If there is no ELSE
1511    * block, point to where ENDIF would be.  Reverse the predicate.
1512    *
1513    * There's no need to execute an ENDIF since we don't need to do any
1514    * stack operations, and if we're currently executing, we just want to
1515    * continue normally.
1516    */
1517   brw_inst_set_opcode(p->isa, if_inst, BRW_OPCODE_ADD);
1518   brw_inst_set_pred_inv(devinfo, if_inst, true);
1519
1520   if (else_inst != NULL) {
1521      /* Convert ELSE to an ADD instruction that points where the ENDIF
1522       * would be.
1523       */
1524      brw_inst_set_opcode(p->isa, else_inst, BRW_OPCODE_ADD);
1525
1526      brw_inst_set_imm_ud(devinfo, if_inst, (else_inst - if_inst + 1) * 16);
1527      brw_inst_set_imm_ud(devinfo, else_inst, (next_inst - else_inst) * 16);
1528   } else {
1529      brw_inst_set_imm_ud(devinfo, if_inst, (next_inst - if_inst) * 16);
1530   }
1531}
1532
1533/**
1534 * Patch IF and ELSE instructions with appropriate jump targets.
1535 */
1536static void
1537patch_IF_ELSE(struct brw_codegen *p,
1538              brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
1539{
1540   const struct intel_device_info *devinfo = p->devinfo;
1541
1542   /* We shouldn't be patching IF and ELSE instructions in single program flow
1543    * mode when gen < 6, because in single program flow mode on those
1544    * platforms, we convert flow control instructions to conditional ADDs that
1545    * operate on IP (see brw_ENDIF).
1546    *
1547    * However, on Gfx6, writing to IP doesn't work in single program flow mode
1548    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1549    * not be updated by non-flow control instructions.").  And on later
1550    * platforms, there is no significant benefit to converting control flow
1551    * instructions to conditional ADDs.  So we do patch IF and ELSE
1552    * instructions in single program flow mode on those platforms.
1553    */
1554   if (devinfo->ver < 6)
1555      assert(!p->single_program_flow);
1556
1557   assert(if_inst != NULL && brw_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
1558   assert(endif_inst != NULL);
1559   assert(else_inst == NULL || brw_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
1560
1561   unsigned br = brw_jump_scale(devinfo);
1562
1563   assert(brw_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
1564   brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst));
1565
1566   if (else_inst == NULL) {
1567      /* Patch IF -> ENDIF */
1568      if (devinfo->ver < 6) {
1569	 /* Turn it into an IFF, which means no mask stack operations for
1570	  * all-false and jumping past the ENDIF.
1571	  */
1572         brw_inst_set_opcode(p->isa, if_inst, BRW_OPCODE_IFF);
1573         brw_inst_set_gfx4_jump_count(devinfo, if_inst,
1574                                      br * (endif_inst - if_inst + 1));
1575         brw_inst_set_gfx4_pop_count(devinfo, if_inst, 0);
1576      } else if (devinfo->ver == 6) {
1577	 /* As of gfx6, there is no IFF and IF must point to the ENDIF. */
1578         brw_inst_set_gfx6_jump_count(devinfo, if_inst, br*(endif_inst - if_inst));
1579      } else {
1580         brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1581         brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1582      }
1583   } else {
1584      brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst));
1585
1586      /* Patch IF -> ELSE */
1587      if (devinfo->ver < 6) {
1588         brw_inst_set_gfx4_jump_count(devinfo, if_inst,
1589                                      br * (else_inst - if_inst));
1590         brw_inst_set_gfx4_pop_count(devinfo, if_inst, 0);
1591      } else if (devinfo->ver == 6) {
1592         brw_inst_set_gfx6_jump_count(devinfo, if_inst,
1593                                      br * (else_inst - if_inst + 1));
1594      }
1595
1596      /* Patch ELSE -> ENDIF */
1597      if (devinfo->ver < 6) {
1598	 /* BRW_OPCODE_ELSE pre-gfx6 should point just past the
1599	  * matching ENDIF.
1600	  */
1601         brw_inst_set_gfx4_jump_count(devinfo, else_inst,
1602                                      br * (endif_inst - else_inst + 1));
1603         brw_inst_set_gfx4_pop_count(devinfo, else_inst, 1);
1604      } else if (devinfo->ver == 6) {
1605	 /* BRW_OPCODE_ELSE on gfx6 should point to the matching ENDIF. */
1606         brw_inst_set_gfx6_jump_count(devinfo, else_inst,
1607                                      br * (endif_inst - else_inst));
1608      } else {
1609	 /* The IF instruction's JIP should point just past the ELSE */
1610         brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1611	 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1612         brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1613         brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1614         if (devinfo->ver >= 8) {
1615            /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
1616             * should point to ENDIF.
1617             */
1618            brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1619         }
1620      }
1621   }
1622}
1623
1624void
1625brw_ELSE(struct brw_codegen *p)
1626{
1627   const struct intel_device_info *devinfo = p->devinfo;
1628   brw_inst *insn;
1629
1630   insn = next_insn(p, BRW_OPCODE_ELSE);
1631
1632   if (devinfo->ver < 6) {
1633      brw_set_dest(p, insn, brw_ip_reg());
1634      brw_set_src0(p, insn, brw_ip_reg());
1635      brw_set_src1(p, insn, brw_imm_d(0x0));
1636   } else if (devinfo->ver == 6) {
1637      brw_set_dest(p, insn, brw_imm_w(0));
1638      brw_inst_set_gfx6_jump_count(devinfo, insn, 0);
1639      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1640      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1641   } else if (devinfo->ver == 7) {
1642      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1643      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1644      brw_set_src1(p, insn, brw_imm_w(0));
1645      brw_inst_set_jip(devinfo, insn, 0);
1646      brw_inst_set_uip(devinfo, insn, 0);
1647   } else {
1648      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1649      if (devinfo->ver < 12)
1650         brw_set_src0(p, insn, brw_imm_d(0));
1651      brw_inst_set_jip(devinfo, insn, 0);
1652      brw_inst_set_uip(devinfo, insn, 0);
1653   }
1654
1655   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1656   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1657   if (!p->single_program_flow && devinfo->ver < 6)
1658      brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1659
1660   push_if_stack(p, insn);
1661}
1662
1663void
1664brw_ENDIF(struct brw_codegen *p)
1665{
1666   const struct intel_device_info *devinfo = p->devinfo;
1667   brw_inst *insn = NULL;
1668   brw_inst *else_inst = NULL;
1669   brw_inst *if_inst = NULL;
1670   brw_inst *tmp;
1671   bool emit_endif = true;
1672
1673   /* In single program flow mode, we can express IF and ELSE instructions
1674    * equivalently as ADD instructions that operate on IP.  On platforms prior
1675    * to Gfx6, flow control instructions cause an implied thread switch, so
1676    * this is a significant savings.
1677    *
1678    * However, on Gfx6, writing to IP doesn't work in single program flow mode
1679    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1680    * not be updated by non-flow control instructions.").  And on later
1681    * platforms, there is no significant benefit to converting control flow
1682    * instructions to conditional ADDs.  So we only do this trick on Gfx4 and
1683    * Gfx5.
1684    */
1685   if (devinfo->ver < 6 && p->single_program_flow)
1686      emit_endif = false;
1687
1688   /*
1689    * A single next_insn() may change the base address of instruction store
1690    * memory(p->store), so call it first before referencing the instruction
1691    * store pointer from an index
1692    */
1693   if (emit_endif)
1694      insn = next_insn(p, BRW_OPCODE_ENDIF);
1695
1696   /* Pop the IF and (optional) ELSE instructions from the stack */
1697   p->if_depth_in_loop[p->loop_stack_depth]--;
1698   tmp = pop_if_stack(p);
1699   if (brw_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
1700      else_inst = tmp;
1701      tmp = pop_if_stack(p);
1702   }
1703   if_inst = tmp;
1704
1705   if (!emit_endif) {
1706      /* ENDIF is useless; don't bother emitting it. */
1707      convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1708      return;
1709   }
1710
1711   if (devinfo->ver < 6) {
1712      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1713      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1714      brw_set_src1(p, insn, brw_imm_d(0x0));
1715   } else if (devinfo->ver == 6) {
1716      brw_set_dest(p, insn, brw_imm_w(0));
1717      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1718      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1719   } else if (devinfo->ver == 7) {
1720      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1721      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1722      brw_set_src1(p, insn, brw_imm_w(0));
1723   } else {
1724      brw_set_src0(p, insn, brw_imm_d(0));
1725   }
1726
1727   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1728   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1729   if (devinfo->ver < 6)
1730      brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1731
1732   /* Also pop item off the stack in the endif instruction: */
1733   if (devinfo->ver < 6) {
1734      brw_inst_set_gfx4_jump_count(devinfo, insn, 0);
1735      brw_inst_set_gfx4_pop_count(devinfo, insn, 1);
1736   } else if (devinfo->ver == 6) {
1737      brw_inst_set_gfx6_jump_count(devinfo, insn, 2);
1738   } else {
1739      brw_inst_set_jip(devinfo, insn, 2);
1740   }
1741   patch_IF_ELSE(p, if_inst, else_inst, insn);
1742}
1743
1744brw_inst *
1745brw_BREAK(struct brw_codegen *p)
1746{
1747   const struct intel_device_info *devinfo = p->devinfo;
1748   brw_inst *insn;
1749
1750   insn = next_insn(p, BRW_OPCODE_BREAK);
1751   if (devinfo->ver >= 8) {
1752      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1753      brw_set_src0(p, insn, brw_imm_d(0x0));
1754   } else if (devinfo->ver >= 6) {
1755      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1756      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1757      brw_set_src1(p, insn, brw_imm_d(0x0));
1758   } else {
1759      brw_set_dest(p, insn, brw_ip_reg());
1760      brw_set_src0(p, insn, brw_ip_reg());
1761      brw_set_src1(p, insn, brw_imm_d(0x0));
1762      brw_inst_set_gfx4_pop_count(devinfo, insn,
1763                                  p->if_depth_in_loop[p->loop_stack_depth]);
1764   }
1765   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1766   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1767
1768   return insn;
1769}
1770
1771brw_inst *
1772brw_CONT(struct brw_codegen *p)
1773{
1774   const struct intel_device_info *devinfo = p->devinfo;
1775   brw_inst *insn;
1776
1777   insn = next_insn(p, BRW_OPCODE_CONTINUE);
1778   brw_set_dest(p, insn, brw_ip_reg());
1779   if (devinfo->ver >= 8) {
1780      brw_set_src0(p, insn, brw_imm_d(0x0));
1781   } else {
1782      brw_set_src0(p, insn, brw_ip_reg());
1783      brw_set_src1(p, insn, brw_imm_d(0x0));
1784   }
1785
1786   if (devinfo->ver < 6) {
1787      brw_inst_set_gfx4_pop_count(devinfo, insn,
1788                                  p->if_depth_in_loop[p->loop_stack_depth]);
1789   }
1790   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1791   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1792   return insn;
1793}
1794
1795brw_inst *
1796brw_HALT(struct brw_codegen *p)
1797{
1798   const struct intel_device_info *devinfo = p->devinfo;
1799   brw_inst *insn;
1800
1801   insn = next_insn(p, BRW_OPCODE_HALT);
1802   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1803   if (devinfo->ver < 6) {
1804      /* From the Gfx4 PRM:
1805       *
1806       *    "IP register must be put (for example, by the assembler) at <dst>
1807       *    and <src0> locations.
1808       */
1809      brw_set_dest(p, insn, brw_ip_reg());
1810      brw_set_src0(p, insn, brw_ip_reg());
1811      brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */
1812   } else if (devinfo->ver < 8) {
1813      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1814      brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1815   } else if (devinfo->ver < 12) {
1816      brw_set_src0(p, insn, brw_imm_d(0x0));
1817   }
1818
1819   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1820   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1821   return insn;
1822}
1823
1824/* DO/WHILE loop:
1825 *
1826 * The DO/WHILE is just an unterminated loop -- break or continue are
1827 * used for control within the loop.  We have a few ways they can be
1828 * done.
1829 *
1830 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1831 * jip and no DO instruction.
1832 *
1833 * For non-uniform control flow pre-gfx6, there's a DO instruction to
1834 * push the mask, and a WHILE to jump back, and BREAK to get out and
1835 * pop the mask.
1836 *
1837 * For gfx6, there's no more mask stack, so no need for DO.  WHILE
1838 * just points back to the first instruction of the loop.
1839 */
1840brw_inst *
1841brw_DO(struct brw_codegen *p, unsigned execute_size)
1842{
1843   const struct intel_device_info *devinfo = p->devinfo;
1844
1845   if (devinfo->ver >= 6 || p->single_program_flow) {
1846      push_loop_stack(p, &p->store[p->nr_insn]);
1847      return &p->store[p->nr_insn];
1848   } else {
1849      brw_inst *insn = next_insn(p, BRW_OPCODE_DO);
1850
1851      push_loop_stack(p, insn);
1852
1853      /* Override the defaults for this instruction:
1854       */
1855      brw_set_dest(p, insn, brw_null_reg());
1856      brw_set_src0(p, insn, brw_null_reg());
1857      brw_set_src1(p, insn, brw_null_reg());
1858
1859      brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1860      brw_inst_set_exec_size(devinfo, insn, execute_size);
1861      brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE);
1862
1863      return insn;
1864   }
1865}
1866
1867/**
1868 * For pre-gfx6, we patch BREAK/CONT instructions to point at the WHILE
1869 * instruction here.
1870 *
1871 * For gfx6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1872 * nesting, since it can always just point to the end of the block/current loop.
1873 */
1874static void
1875brw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst)
1876{
1877   const struct intel_device_info *devinfo = p->devinfo;
1878   brw_inst *do_inst = get_inner_do_insn(p);
1879   brw_inst *inst;
1880   unsigned br = brw_jump_scale(devinfo);
1881
1882   assert(devinfo->ver < 6);
1883
1884   for (inst = while_inst - 1; inst != do_inst; inst--) {
1885      /* If the jump count is != 0, that means that this instruction has already
1886       * been patched because it's part of a loop inside of the one we're
1887       * patching.
1888       */
1889      if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_BREAK &&
1890          brw_inst_gfx4_jump_count(devinfo, inst) == 0) {
1891         brw_inst_set_gfx4_jump_count(devinfo, inst, br*((while_inst - inst) + 1));
1892      } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_CONTINUE &&
1893                 brw_inst_gfx4_jump_count(devinfo, inst) == 0) {
1894         brw_inst_set_gfx4_jump_count(devinfo, inst, br * (while_inst - inst));
1895      }
1896   }
1897}
1898
1899brw_inst *
1900brw_WHILE(struct brw_codegen *p)
1901{
1902   const struct intel_device_info *devinfo = p->devinfo;
1903   brw_inst *insn, *do_insn;
1904   unsigned br = brw_jump_scale(devinfo);
1905
1906   if (devinfo->ver >= 6) {
1907      insn = next_insn(p, BRW_OPCODE_WHILE);
1908      do_insn = get_inner_do_insn(p);
1909
1910      if (devinfo->ver >= 8) {
1911         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1912         if (devinfo->ver < 12)
1913            brw_set_src0(p, insn, brw_imm_d(0));
1914         brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1915      } else if (devinfo->ver == 7) {
1916         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1917         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1918         brw_set_src1(p, insn, brw_imm_w(0));
1919         brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1920      } else {
1921         brw_set_dest(p, insn, brw_imm_w(0));
1922         brw_inst_set_gfx6_jump_count(devinfo, insn, br * (do_insn - insn));
1923         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1924         brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1925      }
1926
1927      brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1928
1929   } else {
1930      if (p->single_program_flow) {
1931	 insn = next_insn(p, BRW_OPCODE_ADD);
1932         do_insn = get_inner_do_insn(p);
1933
1934	 brw_set_dest(p, insn, brw_ip_reg());
1935	 brw_set_src0(p, insn, brw_ip_reg());
1936	 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1937         brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1938      } else {
1939	 insn = next_insn(p, BRW_OPCODE_WHILE);
1940         do_insn = get_inner_do_insn(p);
1941
1942         assert(brw_inst_opcode(p->isa, do_insn) == BRW_OPCODE_DO);
1943
1944	 brw_set_dest(p, insn, brw_ip_reg());
1945	 brw_set_src0(p, insn, brw_ip_reg());
1946	 brw_set_src1(p, insn, brw_imm_d(0));
1947
1948         brw_inst_set_exec_size(devinfo, insn, brw_inst_exec_size(devinfo, do_insn));
1949         brw_inst_set_gfx4_jump_count(devinfo, insn, br * (do_insn - insn + 1));
1950         brw_inst_set_gfx4_pop_count(devinfo, insn, 0);
1951
1952	 brw_patch_break_cont(p, insn);
1953      }
1954   }
1955   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1956
1957   p->loop_stack_depth--;
1958
1959   return insn;
1960}
1961
1962/* FORWARD JUMPS:
1963 */
1964void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx)
1965{
1966   const struct intel_device_info *devinfo = p->devinfo;
1967   brw_inst *jmp_insn = &p->store[jmp_insn_idx];
1968   unsigned jmpi = 1;
1969
1970   if (devinfo->ver >= 5)
1971      jmpi = 2;
1972
1973   assert(brw_inst_opcode(p->isa, jmp_insn) == BRW_OPCODE_JMPI);
1974   assert(brw_inst_src1_reg_file(devinfo, jmp_insn) == BRW_IMMEDIATE_VALUE);
1975
1976   brw_inst_set_gfx4_jump_count(devinfo, jmp_insn,
1977                                jmpi * (p->nr_insn - jmp_insn_idx - 1));
1978}
1979
1980/* To integrate with the above, it makes sense that the comparison
1981 * instruction should populate the flag register.  It might be simpler
1982 * just to use the flag reg for most WM tasks?
1983 */
1984void brw_CMP(struct brw_codegen *p,
1985	     struct brw_reg dest,
1986	     unsigned conditional,
1987	     struct brw_reg src0,
1988	     struct brw_reg src1)
1989{
1990   const struct intel_device_info *devinfo = p->devinfo;
1991   brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1992
1993   brw_inst_set_cond_modifier(devinfo, insn, conditional);
1994   brw_set_dest(p, insn, dest);
1995   brw_set_src0(p, insn, src0);
1996   brw_set_src1(p, insn, src1);
1997
1998   /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1999    * page says:
2000    *    "Any CMP instruction with a null destination must use a {switch}."
2001    *
2002    * It also applies to other Gfx7 platforms (IVB, BYT) even though it isn't
2003    * mentioned on their work-arounds pages.
2004    */
2005   if (devinfo->ver == 7) {
2006      if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
2007          dest.nr == BRW_ARF_NULL) {
2008         brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
2009      }
2010   }
2011}
2012
2013void brw_CMPN(struct brw_codegen *p,
2014              struct brw_reg dest,
2015              unsigned conditional,
2016              struct brw_reg src0,
2017              struct brw_reg src1)
2018{
2019   const struct intel_device_info *devinfo = p->devinfo;
2020   brw_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
2021
2022   brw_inst_set_cond_modifier(devinfo, insn, conditional);
2023   brw_set_dest(p, insn, dest);
2024   brw_set_src0(p, insn, src0);
2025   brw_set_src1(p, insn, src1);
2026
2027   /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)
2028    * says:
2029    *
2030    *    If the destination is the null register, the {Switch} instruction
2031    *    option must be used.
2032    *
2033    * Page 77 of the Haswell PRM Volume 2b contains the same text.
2034    */
2035   if (devinfo->ver == 7) {
2036      if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
2037          dest.nr == BRW_ARF_NULL) {
2038         brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
2039      }
2040   }
2041}
2042
2043/***********************************************************************
2044 * Helpers for the various SEND message types:
2045 */
2046
2047/** Extended math function, float[8].
2048 */
2049void gfx4_math(struct brw_codegen *p,
2050	       struct brw_reg dest,
2051	       unsigned function,
2052	       unsigned msg_reg_nr,
2053	       struct brw_reg src,
2054	       unsigned precision )
2055{
2056   const struct intel_device_info *devinfo = p->devinfo;
2057   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2058   unsigned data_type;
2059   if (has_scalar_region(src)) {
2060      data_type = BRW_MATH_DATA_SCALAR;
2061   } else {
2062      data_type = BRW_MATH_DATA_VECTOR;
2063   }
2064
2065   assert(devinfo->ver < 6);
2066
2067   /* Example code doesn't set predicate_control for send
2068    * instructions.
2069    */
2070   brw_inst_set_pred_control(devinfo, insn, 0);
2071   brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
2072
2073   brw_set_dest(p, insn, dest);
2074   brw_set_src0(p, insn, src);
2075   brw_set_math_message(p,
2076                        insn,
2077                        function,
2078                        src.type == BRW_REGISTER_TYPE_D,
2079                        precision,
2080                        data_type);
2081}
2082
2083void gfx6_math(struct brw_codegen *p,
2084	       struct brw_reg dest,
2085	       unsigned function,
2086	       struct brw_reg src0,
2087	       struct brw_reg src1)
2088{
2089   const struct intel_device_info *devinfo = p->devinfo;
2090   brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
2091
2092   assert(devinfo->ver >= 6);
2093
2094   assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
2095          (devinfo->ver >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
2096
2097   assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
2098   if (devinfo->ver == 6) {
2099      assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
2100      assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
2101   }
2102
2103   if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
2104       function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
2105       function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
2106      assert(src0.type != BRW_REGISTER_TYPE_F);
2107      assert(src1.type != BRW_REGISTER_TYPE_F);
2108      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
2109             (devinfo->ver >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
2110      /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
2111       *     INT DIV function does not support source modifiers.
2112       */
2113      assert(!src0.negate);
2114      assert(!src0.abs);
2115      assert(!src1.negate);
2116      assert(!src1.abs);
2117   } else {
2118      assert(src0.type == BRW_REGISTER_TYPE_F ||
2119             (src0.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 9));
2120      assert(src1.type == BRW_REGISTER_TYPE_F ||
2121             (src1.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 9));
2122   }
2123
2124   /* Source modifiers are ignored for extended math instructions on Gfx6. */
2125   if (devinfo->ver == 6) {
2126      assert(!src0.negate);
2127      assert(!src0.abs);
2128      assert(!src1.negate);
2129      assert(!src1.abs);
2130   }
2131
2132   brw_inst_set_math_function(devinfo, insn, function);
2133
2134   brw_set_dest(p, insn, dest);
2135   brw_set_src0(p, insn, src0);
2136   brw_set_src1(p, insn, src1);
2137}
2138
2139/**
2140 * Return the right surface index to access the thread scratch space using
2141 * stateless dataport messages.
2142 */
2143unsigned
2144brw_scratch_surface_idx(const struct brw_codegen *p)
2145{
2146   /* The scratch space is thread-local so IA coherency is unnecessary. */
2147   if (p->devinfo->ver >= 8)
2148      return GFX8_BTI_STATELESS_NON_COHERENT;
2149   else
2150      return BRW_BTI_STATELESS;
2151}
2152
2153/**
2154 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
2155 * using a constant offset per channel.
2156 *
2157 * The offset must be aligned to oword size (16 bytes).  Used for
2158 * register spilling.
2159 */
2160void brw_oword_block_write_scratch(struct brw_codegen *p,
2161				   struct brw_reg mrf,
2162				   int num_regs,
2163				   unsigned offset)
2164{
2165   const struct intel_device_info *devinfo = p->devinfo;
2166   const unsigned target_cache =
2167      (devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE :
2168       devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE :
2169       BRW_SFID_DATAPORT_WRITE);
2170   const struct tgl_swsb swsb = brw_get_default_swsb(p);
2171   uint32_t msg_type;
2172
2173   if (devinfo->ver >= 6)
2174      offset /= 16;
2175
2176   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2177
2178   const unsigned mlen = 1 + num_regs;
2179
2180   /* Set up the message header.  This is g0, with g0.2 filled with
2181    * the offset.  We don't want to leave our offset around in g0 or
2182    * it'll screw up texture samples, so set it up inside the message
2183    * reg.
2184    */
2185   {
2186      brw_push_insn_state(p);
2187      brw_set_default_exec_size(p, BRW_EXECUTE_8);
2188      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2189      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2190      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
2191
2192      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2193
2194      /* set message header global offset field (reg 0, element 2) */
2195      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2196      brw_set_default_swsb(p, tgl_swsb_null());
2197      brw_MOV(p,
2198	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2199				  mrf.nr,
2200				  2), BRW_REGISTER_TYPE_UD),
2201	      brw_imm_ud(offset));
2202
2203      brw_pop_insn_state(p);
2204      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
2205   }
2206
2207   {
2208      struct brw_reg dest;
2209      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2210      int send_commit_msg;
2211      struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
2212					 BRW_REGISTER_TYPE_UW);
2213
2214      brw_inst_set_sfid(devinfo, insn, target_cache);
2215      brw_inst_set_compression(devinfo, insn, false);
2216
2217      if (brw_inst_exec_size(devinfo, insn) >= 16)
2218	 src_header = vec16(src_header);
2219
2220      assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
2221      if (devinfo->ver < 6)
2222         brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
2223
2224      /* Until gfx6, writes followed by reads from the same location
2225       * are not guaranteed to be ordered unless write_commit is set.
2226       * If set, then a no-op write is issued to the destination
2227       * register to set a dependency, and a read from the destination
2228       * can be used to ensure the ordering.
2229       *
2230       * For gfx6, only writes between different threads need ordering
2231       * protection.  Our use of DP writes is all about register
2232       * spilling within a thread.
2233       */
2234      if (devinfo->ver >= 6) {
2235	 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2236	 send_commit_msg = 0;
2237      } else {
2238	 dest = src_header;
2239	 send_commit_msg = 1;
2240      }
2241
2242      brw_set_dest(p, insn, dest);
2243      if (devinfo->ver >= 6) {
2244	 brw_set_src0(p, insn, mrf);
2245      } else {
2246	 brw_set_src0(p, insn, brw_null_reg());
2247      }
2248
2249      if (devinfo->ver >= 6)
2250	 msg_type = GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2251      else
2252	 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2253
2254      brw_set_desc(p, insn,
2255                   brw_message_desc(devinfo, mlen, send_commit_msg, true) |
2256                   brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p),
2257                                     BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
2258                                     msg_type, send_commit_msg));
2259   }
2260}
2261
2262
2263/**
2264 * Read a block of owords (half a GRF each) from the scratch buffer
2265 * using a constant index per channel.
2266 *
2267 * Offset must be aligned to oword size (16 bytes).  Used for register
2268 * spilling.
2269 */
2270void
2271brw_oword_block_read_scratch(struct brw_codegen *p,
2272			     struct brw_reg dest,
2273			     struct brw_reg mrf,
2274			     int num_regs,
2275			     unsigned offset)
2276{
2277   const struct intel_device_info *devinfo = p->devinfo;
2278   const struct tgl_swsb swsb = brw_get_default_swsb(p);
2279
2280   if (devinfo->ver >= 6)
2281      offset /= 16;
2282
2283   if (p->devinfo->ver >= 7) {
2284      /* On gen 7 and above, we no longer have message registers and we can
2285       * send from any register we want.  By using the destination register
2286       * for the message, we guarantee that the implied message write won't
2287       * accidentally overwrite anything.  This has been a problem because
2288       * the MRF registers and source for the final FB write are both fixed
2289       * and may overlap.
2290       */
2291      mrf = retype(dest, BRW_REGISTER_TYPE_UD);
2292   } else {
2293      mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2294   }
2295   dest = retype(dest, BRW_REGISTER_TYPE_UW);
2296
2297   const unsigned rlen = num_regs;
2298   const unsigned target_cache =
2299      (devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE :
2300       devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE :
2301       BRW_SFID_DATAPORT_READ);
2302
2303   {
2304      brw_push_insn_state(p);
2305      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
2306      brw_set_default_exec_size(p, BRW_EXECUTE_8);
2307      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2308      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2309
2310      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2311
2312      /* set message header global offset field (reg 0, element 2) */
2313      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2314      brw_set_default_swsb(p, tgl_swsb_null());
2315      brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
2316
2317      brw_pop_insn_state(p);
2318      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
2319   }
2320
2321   {
2322      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2323
2324      brw_inst_set_sfid(devinfo, insn, target_cache);
2325      assert(brw_inst_pred_control(devinfo, insn) == 0);
2326      brw_inst_set_compression(devinfo, insn, false);
2327
2328      brw_set_dest(p, insn, dest);	/* UW? */
2329      if (devinfo->ver >= 6) {
2330	 brw_set_src0(p, insn, mrf);
2331      } else {
2332	 brw_set_src0(p, insn, brw_null_reg());
2333         brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
2334      }
2335
2336      brw_set_desc(p, insn,
2337                   brw_message_desc(devinfo, 1, rlen, true) |
2338                   brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p),
2339                                    BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
2340                                    BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2341                                    BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
2342   }
2343}
2344
2345void
2346gfx7_block_read_scratch(struct brw_codegen *p,
2347                        struct brw_reg dest,
2348                        int num_regs,
2349                        unsigned offset)
2350{
2351   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2352   assert(brw_inst_pred_control(p->devinfo, insn) == BRW_PREDICATE_NONE);
2353
2354   brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UW));
2355
2356   /* The HW requires that the header is present; this is to get the g0.5
2357    * scratch offset.
2358    */
2359   brw_set_src0(p, insn, brw_vec8_grf(0, 0));
2360
2361   /* According to the docs, offset is "A 12-bit HWord offset into the memory
2362    * Immediate Memory buffer as specified by binding table 0xFF."  An HWORD
2363    * is 32 bytes, which happens to be the size of a register.
2364    */
2365   offset /= REG_SIZE;
2366   assert(offset < (1 << 12));
2367
2368   gfx7_set_dp_scratch_message(p, insn,
2369                               false, /* scratch read */
2370                               false, /* OWords */
2371                               false, /* invalidate after read */
2372                               num_regs,
2373                               offset,
2374                               1,        /* mlen: just g0 */
2375                               num_regs, /* rlen */
2376                               true);    /* header present */
2377}
2378
2379/**
2380 * Read float[4] vectors from the data port constant cache.
2381 * Location (in buffer) should be a multiple of 16.
2382 * Used for fetching shader constants.
2383 */
2384void brw_oword_block_read(struct brw_codegen *p,
2385			  struct brw_reg dest,
2386			  struct brw_reg mrf,
2387			  uint32_t offset,
2388			  uint32_t bind_table_index)
2389{
2390   const struct intel_device_info *devinfo = p->devinfo;
2391   const unsigned target_cache =
2392      (devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_CONSTANT_CACHE :
2393       BRW_SFID_DATAPORT_READ);
2394   const unsigned exec_size = 1 << brw_get_default_exec_size(p);
2395   const struct tgl_swsb swsb = brw_get_default_swsb(p);
2396
2397   /* On newer hardware, offset is in units of owords. */
2398   if (devinfo->ver >= 6)
2399      offset /= 16;
2400
2401   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2402
2403   brw_push_insn_state(p);
2404   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2405   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2406   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2407
2408   brw_push_insn_state(p);
2409   brw_set_default_exec_size(p, BRW_EXECUTE_8);
2410   brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
2411   brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2412
2413   /* set message header global offset field (reg 0, element 2) */
2414   brw_set_default_exec_size(p, BRW_EXECUTE_1);
2415   brw_set_default_swsb(p, tgl_swsb_null());
2416   brw_MOV(p,
2417	   retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2418			       mrf.nr,
2419			       2), BRW_REGISTER_TYPE_UD),
2420	   brw_imm_ud(offset));
2421   brw_pop_insn_state(p);
2422
2423   brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
2424
2425   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2426
2427   brw_inst_set_sfid(devinfo, insn, target_cache);
2428
2429   /* cast dest to a uword[8] vector */
2430   dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2431
2432   brw_set_dest(p, insn, dest);
2433   if (devinfo->ver >= 6) {
2434      brw_set_src0(p, insn, mrf);
2435   } else {
2436      brw_set_src0(p, insn, brw_null_reg());
2437      brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
2438   }
2439
2440   brw_set_desc(p, insn,
2441                brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) |
2442                brw_dp_read_desc(devinfo, bind_table_index,
2443                                 BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
2444                                 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2445                                 BRW_DATAPORT_READ_TARGET_DATA_CACHE));
2446
2447   brw_pop_insn_state(p);
2448}
2449
2450brw_inst *
2451brw_fb_WRITE(struct brw_codegen *p,
2452             struct brw_reg payload,
2453             struct brw_reg implied_header,
2454             unsigned msg_control,
2455             unsigned binding_table_index,
2456             unsigned msg_length,
2457             unsigned response_length,
2458             bool eot,
2459             bool last_render_target,
2460             bool header_present)
2461{
2462   const struct intel_device_info *devinfo = p->devinfo;
2463   const unsigned target_cache =
2464      (devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE :
2465       BRW_SFID_DATAPORT_WRITE);
2466   brw_inst *insn;
2467   struct brw_reg dest, src0;
2468
2469   if (brw_get_default_exec_size(p) >= BRW_EXECUTE_16)
2470      dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2471   else
2472      dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2473
2474   if (devinfo->ver >= 6) {
2475      insn = next_insn(p, BRW_OPCODE_SENDC);
2476   } else {
2477      insn = next_insn(p, BRW_OPCODE_SEND);
2478   }
2479   brw_inst_set_sfid(devinfo, insn, target_cache);
2480   brw_inst_set_compression(devinfo, insn, false);
2481
2482   if (devinfo->ver >= 6) {
2483      /* headerless version, just submit color payload */
2484      src0 = payload;
2485   } else {
2486      assert(payload.file == BRW_MESSAGE_REGISTER_FILE);
2487      brw_inst_set_base_mrf(devinfo, insn, payload.nr);
2488      src0 = implied_header;
2489   }
2490
2491   brw_set_dest(p, insn, dest);
2492   brw_set_src0(p, insn, src0);
2493   brw_set_desc(p, insn,
2494                brw_message_desc(devinfo, msg_length, response_length,
2495                                 header_present) |
2496                brw_fb_write_desc(devinfo, binding_table_index, msg_control,
2497                                  last_render_target,
2498                                  false /* coarse_write */));
2499   brw_inst_set_eot(devinfo, insn, eot);
2500
2501   return insn;
2502}
2503
2504brw_inst *
2505gfx9_fb_READ(struct brw_codegen *p,
2506             struct brw_reg dst,
2507             struct brw_reg payload,
2508             unsigned binding_table_index,
2509             unsigned msg_length,
2510             unsigned response_length,
2511             bool per_sample)
2512{
2513   const struct intel_device_info *devinfo = p->devinfo;
2514   assert(devinfo->ver >= 9);
2515   brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC);
2516
2517   brw_inst_set_sfid(devinfo, insn, GFX6_SFID_DATAPORT_RENDER_CACHE);
2518   brw_set_dest(p, insn, dst);
2519   brw_set_src0(p, insn, payload);
2520   brw_set_desc(
2521      p, insn,
2522      brw_message_desc(devinfo, msg_length, response_length, true) |
2523      brw_fb_read_desc(devinfo, binding_table_index, 0 /* msg_control */,
2524                       1 << brw_get_default_exec_size(p), per_sample));
2525   brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16);
2526
2527   return insn;
2528}
2529
2530/**
2531 * Texture sample instruction.
2532 * Note: the msg_type plus msg_length values determine exactly what kind
2533 * of sampling operation is performed.  See volume 4, page 161 of docs.
2534 */
2535void brw_SAMPLE(struct brw_codegen *p,
2536		struct brw_reg dest,
2537		unsigned msg_reg_nr,
2538		struct brw_reg src0,
2539		unsigned binding_table_index,
2540		unsigned sampler,
2541		unsigned msg_type,
2542		unsigned response_length,
2543		unsigned msg_length,
2544		unsigned header_present,
2545		unsigned simd_mode,
2546		unsigned return_format)
2547{
2548   const struct intel_device_info *devinfo = p->devinfo;
2549   brw_inst *insn;
2550
2551   if (msg_reg_nr != -1)
2552      gfx6_resolve_implied_move(p, &src0, msg_reg_nr);
2553
2554   insn = next_insn(p, BRW_OPCODE_SEND);
2555   brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER);
2556   brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */
2557
2558   /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2559    *
2560    *    "Instruction compression is not allowed for this instruction (that
2561    *     is, send). The hardware behavior is undefined if this instruction is
2562    *     set as compressed. However, compress control can be set to "SecHalf"
2563    *     to affect the EMask generation."
2564    *
2565    * No similar wording is found in later PRMs, but there are examples
2566    * utilizing send with SecHalf.  More importantly, SIMD8 sampler messages
2567    * are allowed in SIMD16 mode and they could not work without SecHalf.  For
2568    * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2569    */
2570   brw_inst_set_compression(devinfo, insn, false);
2571
2572   if (devinfo->ver < 6)
2573      brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
2574
2575   brw_set_dest(p, insn, dest);
2576   brw_set_src0(p, insn, src0);
2577   brw_set_desc(p, insn,
2578                brw_message_desc(devinfo, msg_length, response_length,
2579                                 header_present) |
2580                brw_sampler_desc(devinfo, binding_table_index, sampler,
2581                                 msg_type, simd_mode, return_format));
2582}
2583
2584/* Adjust the message header's sampler state pointer to
2585 * select the correct group of 16 samplers.
2586 */
2587void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
2588                                      struct brw_reg header,
2589                                      struct brw_reg sampler_index)
2590{
2591   /* The "Sampler Index" field can only store values between 0 and 15.
2592    * However, we can add an offset to the "Sampler State Pointer"
2593    * field, effectively selecting a different set of 16 samplers.
2594    *
2595    * The "Sampler State Pointer" needs to be aligned to a 32-byte
2596    * offset, and each sampler state is only 16-bytes, so we can't
2597    * exclusively use the offset - we have to use both.
2598    */
2599
2600   const struct intel_device_info *devinfo = p->devinfo;
2601
2602   if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
2603      const int sampler_state_size = 16; /* 16 bytes */
2604      uint32_t sampler = sampler_index.ud;
2605
2606      if (sampler >= 16) {
2607         assert(devinfo->verx10 >= 75);
2608         brw_ADD(p,
2609                 get_element_ud(header, 3),
2610                 get_element_ud(brw_vec8_grf(0, 0), 3),
2611                 brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
2612      }
2613   } else {
2614      /* Non-const sampler array indexing case */
2615      if (devinfo->verx10 <= 70) {
2616         return;
2617      }
2618
2619      struct brw_reg temp = get_element_ud(header, 3);
2620
2621      brw_push_insn_state(p);
2622      brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
2623      brw_set_default_swsb(p, tgl_swsb_regdist(1));
2624      brw_SHL(p, temp, temp, brw_imm_ud(4));
2625      brw_ADD(p,
2626              get_element_ud(header, 3),
2627              get_element_ud(brw_vec8_grf(0, 0), 3),
2628              temp);
2629      brw_pop_insn_state(p);
2630   }
2631}
2632
2633/* All these variables are pretty confusing - we might be better off
2634 * using bitmasks and macros for this, in the old style.  Or perhaps
2635 * just having the caller instantiate the fields in dword3 itself.
2636 */
2637void brw_urb_WRITE(struct brw_codegen *p,
2638		   struct brw_reg dest,
2639		   unsigned msg_reg_nr,
2640		   struct brw_reg src0,
2641                   enum brw_urb_write_flags flags,
2642		   unsigned msg_length,
2643		   unsigned response_length,
2644		   unsigned offset,
2645		   unsigned swizzle)
2646{
2647   const struct intel_device_info *devinfo = p->devinfo;
2648   brw_inst *insn;
2649
2650   gfx6_resolve_implied_move(p, &src0, msg_reg_nr);
2651
2652   if (devinfo->ver >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2653      /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2654      brw_push_insn_state(p);
2655      brw_set_default_access_mode(p, BRW_ALIGN_1);
2656      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2657      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2658      brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2659		       BRW_REGISTER_TYPE_UD),
2660	        retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2661		brw_imm_ud(0xff00));
2662      brw_pop_insn_state(p);
2663   }
2664
2665   insn = next_insn(p, BRW_OPCODE_SEND);
2666
2667   assert(msg_length < BRW_MAX_MRF(devinfo->ver));
2668
2669   brw_set_dest(p, insn, dest);
2670   brw_set_src0(p, insn, src0);
2671   brw_set_src1(p, insn, brw_imm_d(0));
2672
2673   if (devinfo->ver < 6)
2674      brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
2675
2676   brw_set_urb_message(p,
2677		       insn,
2678		       flags,
2679		       msg_length,
2680		       response_length,
2681		       offset,
2682		       swizzle);
2683}
2684
2685void
2686brw_send_indirect_message(struct brw_codegen *p,
2687                          unsigned sfid,
2688                          struct brw_reg dst,
2689                          struct brw_reg payload,
2690                          struct brw_reg desc,
2691                          unsigned desc_imm,
2692                          bool eot)
2693{
2694   const struct intel_device_info *devinfo = p->devinfo;
2695   struct brw_inst *send;
2696
2697   dst = retype(dst, BRW_REGISTER_TYPE_UW);
2698
2699   assert(desc.type == BRW_REGISTER_TYPE_UD);
2700
2701   if (desc.file == BRW_IMMEDIATE_VALUE) {
2702      send = next_insn(p, BRW_OPCODE_SEND);
2703      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
2704      brw_set_desc(p, send, desc.ud | desc_imm);
2705   } else {
2706      const struct tgl_swsb swsb = brw_get_default_swsb(p);
2707      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
2708
2709      brw_push_insn_state(p);
2710      brw_set_default_access_mode(p, BRW_ALIGN_1);
2711      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2712      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2713      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2714      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
2715
2716      /* Load the indirect descriptor to an address register using OR so the
2717       * caller can specify additional descriptor bits with the desc_imm
2718       * immediate.
2719       */
2720      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
2721
2722      brw_pop_insn_state(p);
2723
2724      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
2725      send = next_insn(p, BRW_OPCODE_SEND);
2726      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
2727
2728      if (devinfo->ver >= 12)
2729         brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
2730      else
2731         brw_set_src1(p, send, addr);
2732   }
2733
2734   brw_set_dest(p, send, dst);
2735   brw_inst_set_sfid(devinfo, send, sfid);
2736   brw_inst_set_eot(devinfo, send, eot);
2737}
2738
2739void
2740brw_send_indirect_split_message(struct brw_codegen *p,
2741                                unsigned sfid,
2742                                struct brw_reg dst,
2743                                struct brw_reg payload0,
2744                                struct brw_reg payload1,
2745                                struct brw_reg desc,
2746                                unsigned desc_imm,
2747                                struct brw_reg ex_desc,
2748                                unsigned ex_desc_imm,
2749                                bool eot)
2750{
2751   const struct intel_device_info *devinfo = p->devinfo;
2752   struct brw_inst *send;
2753
2754   dst = retype(dst, BRW_REGISTER_TYPE_UW);
2755
2756   assert(desc.type == BRW_REGISTER_TYPE_UD);
2757
2758   if (desc.file == BRW_IMMEDIATE_VALUE) {
2759      desc.ud |= desc_imm;
2760   } else {
2761      const struct tgl_swsb swsb = brw_get_default_swsb(p);
2762      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
2763
2764      brw_push_insn_state(p);
2765      brw_set_default_access_mode(p, BRW_ALIGN_1);
2766      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2767      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2768      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2769      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
2770
2771      /* Load the indirect descriptor to an address register using OR so the
2772       * caller can specify additional descriptor bits with the desc_imm
2773       * immediate.
2774       */
2775      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
2776
2777      brw_pop_insn_state(p);
2778      desc = addr;
2779
2780      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
2781   }
2782
2783   if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
2784       (devinfo->ver >= 12 ||
2785        ((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
2786      ex_desc.ud |= ex_desc_imm;
2787   } else {
2788      const struct tgl_swsb swsb = brw_get_default_swsb(p);
2789      struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
2790
2791      brw_push_insn_state(p);
2792      brw_set_default_access_mode(p, BRW_ALIGN_1);
2793      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2794      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2795      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2796      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
2797
2798      /* Load the indirect extended descriptor to an address register using OR
2799       * so the caller can specify additional descriptor bits with the
2800       * desc_imm immediate.
2801       *
2802       * Even though the instruction dispatcher always pulls the SFID and EOT
2803       * fields from the instruction itself, actual external unit which
2804       * processes the message gets the SFID and EOT from the extended
2805       * descriptor which comes from the address register.  If we don't OR
2806       * those two bits in, the external unit may get confused and hang.
2807       */
2808      unsigned imm_part = ex_desc_imm | sfid | eot << 5;
2809
2810      if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
2811         /* ex_desc bits 15:12 don't exist in the instruction encoding prior
2812          * to Gfx12, so we may have fallen back to an indirect extended
2813          * descriptor.
2814          */
2815         brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
2816      } else {
2817         brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
2818      }
2819
2820      brw_pop_insn_state(p);
2821      ex_desc = addr;
2822
2823      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
2824   }
2825
2826   send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
2827   brw_set_dest(p, send, dst);
2828   brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
2829   brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
2830
2831   if (desc.file == BRW_IMMEDIATE_VALUE) {
2832      brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
2833      brw_inst_set_send_desc(devinfo, send, desc.ud);
2834   } else {
2835      assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
2836      assert(desc.nr == BRW_ARF_ADDRESS);
2837      assert(desc.subnr == 0);
2838      brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
2839   }
2840
2841   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
2842      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
2843      brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
2844   } else {
2845      assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
2846      assert(ex_desc.nr == BRW_ARF_ADDRESS);
2847      assert((ex_desc.subnr & 0x3) == 0);
2848      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
2849      brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
2850   }
2851
2852   brw_inst_set_sfid(devinfo, send, sfid);
2853   brw_inst_set_eot(devinfo, send, eot);
2854}
2855
2856static void
2857brw_send_indirect_surface_message(struct brw_codegen *p,
2858                                  unsigned sfid,
2859                                  struct brw_reg dst,
2860                                  struct brw_reg payload,
2861                                  struct brw_reg surface,
2862                                  unsigned desc_imm)
2863{
2864   if (surface.file != BRW_IMMEDIATE_VALUE) {
2865      const struct tgl_swsb swsb = brw_get_default_swsb(p);
2866      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
2867
2868      brw_push_insn_state(p);
2869      brw_set_default_access_mode(p, BRW_ALIGN_1);
2870      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2871      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2872      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2873      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
2874
2875      /* Mask out invalid bits from the surface index to avoid hangs e.g. when
2876       * some surface array is accessed out of bounds.
2877       */
2878      brw_AND(p, addr,
2879              suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
2880                        BRW_GET_SWZ(surface.swizzle, 0)),
2881              brw_imm_ud(0xff));
2882
2883      brw_pop_insn_state(p);
2884
2885      surface = addr;
2886      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
2887   }
2888
2889   brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
2890}
2891
2892static bool
2893while_jumps_before_offset(const struct intel_device_info *devinfo,
2894                          brw_inst *insn, int while_offset, int start_offset)
2895{
2896   int scale = 16 / brw_jump_scale(devinfo);
2897   int jip = devinfo->ver == 6 ? brw_inst_gfx6_jump_count(devinfo, insn)
2898                               : brw_inst_jip(devinfo, insn);
2899   assert(jip < 0);
2900   return while_offset + jip * scale <= start_offset;
2901}
2902
2903
2904static int
2905brw_find_next_block_end(struct brw_codegen *p, int start_offset)
2906{
2907   int offset;
2908   void *store = p->store;
2909   const struct intel_device_info *devinfo = p->devinfo;
2910
2911   int depth = 0;
2912
2913   for (offset = next_offset(devinfo, store, start_offset);
2914        offset < p->next_insn_offset;
2915        offset = next_offset(devinfo, store, offset)) {
2916      brw_inst *insn = store + offset;
2917
2918      switch (brw_inst_opcode(p->isa, insn)) {
2919      case BRW_OPCODE_IF:
2920         depth++;
2921         break;
2922      case BRW_OPCODE_ENDIF:
2923         if (depth == 0)
2924            return offset;
2925         depth--;
2926         break;
2927      case BRW_OPCODE_WHILE:
2928         /* If the while doesn't jump before our instruction, it's the end
2929          * of a sibling do...while loop.  Ignore it.
2930          */
2931         if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
2932            continue;
2933         FALLTHROUGH;
2934      case BRW_OPCODE_ELSE:
2935      case BRW_OPCODE_HALT:
2936         if (depth == 0)
2937            return offset;
2938         break;
2939      default:
2940         break;
2941      }
2942   }
2943
2944   return 0;
2945}
2946
2947/* There is no DO instruction on gfx6, so to find the end of the loop
2948 * we have to see if the loop is jumping back before our start
2949 * instruction.
2950 */
2951static int
2952brw_find_loop_end(struct brw_codegen *p, int start_offset)
2953{
2954   const struct intel_device_info *devinfo = p->devinfo;
2955   int offset;
2956   void *store = p->store;
2957
2958   assert(devinfo->ver >= 6);
2959
2960   /* Always start after the instruction (such as a WHILE) we're trying to fix
2961    * up.
2962    */
2963   for (offset = next_offset(devinfo, store, start_offset);
2964        offset < p->next_insn_offset;
2965        offset = next_offset(devinfo, store, offset)) {
2966      brw_inst *insn = store + offset;
2967
2968      if (brw_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
2969	 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
2970	    return offset;
2971      }
2972   }
2973   assert(!"not reached");
2974   return start_offset;
2975}
2976
2977/* After program generation, go back and update the UIP and JIP of
2978 * BREAK, CONT, and HALT instructions to their correct locations.
2979 */
2980void
2981brw_set_uip_jip(struct brw_codegen *p, int start_offset)
2982{
2983   const struct intel_device_info *devinfo = p->devinfo;
2984   int offset;
2985   int br = brw_jump_scale(devinfo);
2986   int scale = 16 / br;
2987   void *store = p->store;
2988
2989   if (devinfo->ver < 6)
2990      return;
2991
2992   for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
2993      brw_inst *insn = store + offset;
2994      assert(brw_inst_cmpt_control(devinfo, insn) == 0);
2995
2996      switch (brw_inst_opcode(p->isa, insn)) {
2997      case BRW_OPCODE_BREAK: {
2998         int block_end_offset = brw_find_next_block_end(p, offset);
2999         assert(block_end_offset != 0);
3000         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
3001	 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
3002         brw_inst_set_uip(devinfo, insn,
3003	    (brw_find_loop_end(p, offset) - offset +
3004             (devinfo->ver == 6 ? 16 : 0)) / scale);
3005	 break;
3006      }
3007
3008      case BRW_OPCODE_CONTINUE: {
3009         int block_end_offset = brw_find_next_block_end(p, offset);
3010         assert(block_end_offset != 0);
3011         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
3012         brw_inst_set_uip(devinfo, insn,
3013            (brw_find_loop_end(p, offset) - offset) / scale);
3014
3015         assert(brw_inst_uip(devinfo, insn) != 0);
3016         assert(brw_inst_jip(devinfo, insn) != 0);
3017	 break;
3018      }
3019
3020      case BRW_OPCODE_ENDIF: {
3021         int block_end_offset = brw_find_next_block_end(p, offset);
3022         int32_t jump = (block_end_offset == 0) ?
3023                        1 * br : (block_end_offset - offset) / scale;
3024         if (devinfo->ver >= 7)
3025            brw_inst_set_jip(devinfo, insn, jump);
3026         else
3027            brw_inst_set_gfx6_jump_count(devinfo, insn, jump);
3028	 break;
3029      }
3030
3031      case BRW_OPCODE_HALT: {
3032	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
3033	  *
3034	  *    "In case of the halt instruction not inside any conditional
3035	  *     code block, the value of <JIP> and <UIP> should be the
3036	  *     same. In case of the halt instruction inside conditional code
3037	  *     block, the <UIP> should be the end of the program, and the
3038	  *     <JIP> should be end of the most inner conditional code block."
3039	  *
3040	  * The uip will have already been set by whoever set up the
3041	  * instruction.
3042	  */
3043         int block_end_offset = brw_find_next_block_end(p, offset);
3044	 if (block_end_offset == 0) {
3045            brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn));
3046	 } else {
3047            brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
3048	 }
3049         assert(brw_inst_uip(devinfo, insn) != 0);
3050         assert(brw_inst_jip(devinfo, insn) != 0);
3051	 break;
3052      }
3053
3054      default:
3055         break;
3056      }
3057   }
3058}
3059
3060void brw_ff_sync(struct brw_codegen *p,
3061		   struct brw_reg dest,
3062		   unsigned msg_reg_nr,
3063		   struct brw_reg src0,
3064		   bool allocate,
3065		   unsigned response_length,
3066		   bool eot)
3067{
3068   const struct intel_device_info *devinfo = p->devinfo;
3069   brw_inst *insn;
3070
3071   gfx6_resolve_implied_move(p, &src0, msg_reg_nr);
3072
3073   insn = next_insn(p, BRW_OPCODE_SEND);
3074   brw_set_dest(p, insn, dest);
3075   brw_set_src0(p, insn, src0);
3076   brw_set_src1(p, insn, brw_imm_d(0));
3077
3078   if (devinfo->ver < 6)
3079      brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
3080
3081   brw_set_ff_sync_message(p,
3082			   insn,
3083			   allocate,
3084			   response_length,
3085			   eot);
3086}
3087
3088/**
3089 * Emit the SEND instruction necessary to generate stream output data on Gfx6
3090 * (for transform feedback).
3091 *
3092 * If send_commit_msg is true, this is the last piece of stream output data
3093 * from this thread, so send the data as a committed write.  According to the
3094 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
3095 *
3096 *   "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
3097 *   writes are complete by sending the final write as a committed write."
3098 */
3099void
3100brw_svb_write(struct brw_codegen *p,
3101              struct brw_reg dest,
3102              unsigned msg_reg_nr,
3103              struct brw_reg src0,
3104              unsigned binding_table_index,
3105              bool   send_commit_msg)
3106{
3107   const struct intel_device_info *devinfo = p->devinfo;
3108   assert(devinfo->ver == 6);
3109   const unsigned target_cache = GFX6_SFID_DATAPORT_RENDER_CACHE;
3110   brw_inst *insn;
3111
3112   gfx6_resolve_implied_move(p, &src0, msg_reg_nr);
3113
3114   insn = next_insn(p, BRW_OPCODE_SEND);
3115   brw_inst_set_sfid(devinfo, insn, target_cache);
3116   brw_set_dest(p, insn, dest);
3117   brw_set_src0(p, insn, src0);
3118   brw_set_desc(p, insn,
3119                brw_message_desc(devinfo, 1, send_commit_msg, true) |
3120                brw_dp_write_desc(devinfo, binding_table_index,
3121                                  0, /* msg_control: ignored */
3122                                  GFX6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
3123                                  send_commit_msg)); /* send_commit_msg */
3124}
3125
3126static unsigned
3127brw_surface_payload_size(unsigned num_channels,
3128                         unsigned exec_size /**< 0 for SIMD4x2 */)
3129{
3130   if (exec_size == 0)
3131      return 1; /* SIMD4x2 */
3132   else if (exec_size <= 8)
3133      return num_channels;
3134   else
3135      return 2 * num_channels;
3136}
3137
3138void
3139brw_untyped_atomic(struct brw_codegen *p,
3140                   struct brw_reg dst,
3141                   struct brw_reg payload,
3142                   struct brw_reg surface,
3143                   unsigned atomic_op,
3144                   unsigned msg_length,
3145                   bool response_expected,
3146                   bool header_present)
3147{
3148   const struct intel_device_info *devinfo = p->devinfo;
3149   const unsigned sfid = (devinfo->verx10 >= 75 ?
3150                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
3151                          GFX7_SFID_DATAPORT_DATA_CACHE);
3152   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
3153   /* SIMD4x2 untyped atomic instructions only exist on HSW+ */
3154   const bool has_simd4x2 = devinfo->verx10 >= 75;
3155   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
3156                              has_simd4x2 ? 0 : 8;
3157   const unsigned response_length =
3158      brw_surface_payload_size(response_expected, exec_size);
3159   const unsigned desc =
3160      brw_message_desc(devinfo, msg_length, response_length, header_present) |
3161      brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op,
3162                                 response_expected);
3163   /* Mask out unused components -- This is especially important in Align16
3164    * mode on generations that don't have native support for SIMD4x2 atomics,
3165    * because unused but enabled components will cause the dataport to perform
3166    * additional atomic operations on the addresses that happen to be in the
3167    * uninitialized Y, Z and W coordinates of the payload.
3168    */
3169   const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
3170
3171   brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask),
3172                                     payload, surface, desc);
3173}
3174
3175void
3176brw_untyped_surface_read(struct brw_codegen *p,
3177                         struct brw_reg dst,
3178                         struct brw_reg payload,
3179                         struct brw_reg surface,
3180                         unsigned msg_length,
3181                         unsigned num_channels)
3182{
3183   const struct intel_device_info *devinfo = p->devinfo;
3184   const unsigned sfid = (devinfo->verx10 >= 75 ?
3185                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
3186                          GFX7_SFID_DATAPORT_DATA_CACHE);
3187   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
3188   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0;
3189   const unsigned response_length =
3190      brw_surface_payload_size(num_channels, exec_size);
3191   const unsigned desc =
3192      brw_message_desc(devinfo, msg_length, response_length, false) |
3193      brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false);
3194
3195   brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc);
3196}
3197
3198void
3199brw_untyped_surface_write(struct brw_codegen *p,
3200                          struct brw_reg payload,
3201                          struct brw_reg surface,
3202                          unsigned msg_length,
3203                          unsigned num_channels,
3204                          bool header_present)
3205{
3206   const struct intel_device_info *devinfo = p->devinfo;
3207   const unsigned sfid = (devinfo->verx10 >= 75 ?
3208                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
3209                          GFX7_SFID_DATAPORT_DATA_CACHE);
3210   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
3211   /* SIMD4x2 untyped surface write instructions only exist on HSW+ */
3212   const bool has_simd4x2 = devinfo->verx10 >= 75;
3213   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
3214                              has_simd4x2 ? 0 : 8;
3215   const unsigned desc =
3216      brw_message_desc(devinfo, msg_length, 0, header_present) |
3217      brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true);
3218   /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3219   const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW;
3220
3221   brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask),
3222                                     payload, surface, desc);
3223}
3224
3225static void
3226brw_set_memory_fence_message(struct brw_codegen *p,
3227                             struct brw_inst *insn,
3228                             enum brw_message_target sfid,
3229                             bool commit_enable,
3230                             unsigned bti)
3231{
3232   const struct intel_device_info *devinfo = p->devinfo;
3233
3234   brw_set_desc(p, insn, brw_message_desc(
3235                   devinfo, 1, (commit_enable ? 1 : 0), true));
3236
3237   brw_inst_set_sfid(devinfo, insn, sfid);
3238
3239   switch (sfid) {
3240   case GFX6_SFID_DATAPORT_RENDER_CACHE:
3241      brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
3242      break;
3243   case GFX7_SFID_DATAPORT_DATA_CACHE:
3244      brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
3245      break;
3246   default:
3247      unreachable("Not reached");
3248   }
3249
3250   if (commit_enable)
3251      brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
3252
3253   assert(devinfo->ver >= 11 || bti == 0);
3254   brw_inst_set_binding_table_index(devinfo, insn, bti);
3255}
3256
3257static void
3258gfx12_set_memory_fence_message(struct brw_codegen *p,
3259                               struct brw_inst *insn,
3260                               enum brw_message_target sfid,
3261                               uint32_t desc)
3262{
3263   const unsigned mlen = 1; /* g0 header */
3264    /* Completion signaled by write to register. No data returned. */
3265   const unsigned rlen = 1;
3266
3267   brw_inst_set_sfid(p->devinfo, insn, sfid);
3268
3269   if (sfid == BRW_SFID_URB) {
3270      brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
3271                            brw_message_desc(p->devinfo, mlen, rlen, true));
3272   } else {
3273      enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
3274      enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
3275
3276      if (sfid == GFX12_SFID_TGM) {
3277         scope = LSC_FENCE_TILE;
3278         flush_type = LSC_FLUSH_TYPE_EVICT;
3279      }
3280
3281      /* Wa_14014435656:
3282       *
3283       *   "For any fence greater than local scope, always set flush type to
3284       *    at least invalidate so that fence goes on properly."
3285       *
3286       *   "The bug is if flush_type is 'None', the scope is always downgraded
3287       *    to 'local'."
3288       *
3289       * Here set scope to NONE_6 instead of NONE, which has the same effect
3290       * as NONE but avoids the downgrade to scope LOCAL.
3291       */
3292      if (intel_device_info_is_dg2(p->devinfo) &&
3293          scope > LSC_FENCE_LOCAL &&
3294          flush_type == LSC_FLUSH_TYPE_NONE) {
3295         flush_type = LSC_FLUSH_TYPE_NONE_6;
3296      }
3297
3298      brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
3299                                               flush_type, false) |
3300                            brw_message_desc(p->devinfo, mlen, rlen, false));
3301   }
3302}
3303
3304void
3305brw_memory_fence(struct brw_codegen *p,
3306                 struct brw_reg dst,
3307                 struct brw_reg src,
3308                 enum opcode send_op,
3309                 enum brw_message_target sfid,
3310                 uint32_t desc,
3311                 bool commit_enable,
3312                 unsigned bti)
3313{
3314   const struct intel_device_info *devinfo = p->devinfo;
3315
3316   dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
3317   src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
3318
3319   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
3320    * message doesn't write anything back.
3321    */
3322   struct brw_inst *insn = next_insn(p, send_op);
3323   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
3324   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
3325   brw_set_dest(p, insn, dst);
3326   brw_set_src0(p, insn, src);
3327
3328   /* All DG2 hardware requires LSC for fence messages, even A-step */
3329   if (devinfo->has_lsc)
3330      gfx12_set_memory_fence_message(p, insn, sfid, desc);
3331   else
3332      brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
3333}
3334
3335void
3336brw_pixel_interpolator_query(struct brw_codegen *p,
3337                             struct brw_reg dest,
3338                             struct brw_reg mrf,
3339                             bool noperspective,
3340                             bool coarse_pixel_rate,
3341                             unsigned mode,
3342                             struct brw_reg data,
3343                             unsigned msg_length,
3344                             unsigned response_length)
3345{
3346   const struct intel_device_info *devinfo = p->devinfo;
3347   const uint16_t exec_size = brw_get_default_exec_size(p);
3348   const unsigned slot_group = brw_get_default_group(p) / 16;
3349   const unsigned simd_mode = (exec_size == BRW_EXECUTE_16);
3350   const unsigned desc =
3351      brw_message_desc(devinfo, msg_length, response_length, false) |
3352      brw_pixel_interp_desc(devinfo, mode, noperspective, coarse_pixel_rate,
3353                            simd_mode, slot_group);
3354
3355   /* brw_send_indirect_message will automatically use a direct send message
3356    * if data is actually immediate.
3357    */
3358   brw_send_indirect_message(p,
3359                             GFX7_SFID_PIXEL_INTERPOLATOR,
3360                             dest,
3361                             mrf,
3362                             vec1(data),
3363                             desc,
3364                             false);
3365}
3366
3367void
3368brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, bool last)
3369{
3370   const struct intel_device_info *devinfo = p->devinfo;
3371   const unsigned exec_size = 1 << brw_get_default_exec_size(p);
3372   const unsigned qtr_control = brw_get_default_group(p) / 8;
3373   brw_inst *inst;
3374
3375   assert(devinfo->ver == 7);
3376
3377   brw_push_insn_state(p);
3378
3379   /* The flag register is only used on Gfx7 in align1 mode, so avoid setting
3380    * unnecessary bits in the instruction words, get the information we need
3381    * and reset the default flag register. This allows more instructions to be
3382    * compacted.
3383    */
3384   const unsigned flag_subreg = p->current->flag_subreg;
3385   brw_set_default_flag_reg(p, 0, 0);
3386
3387   if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
3388      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3389
3390      const struct brw_reg flag = brw_flag_subreg(flag_subreg);
3391
3392      brw_set_default_exec_size(p, BRW_EXECUTE_1);
3393      brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
3394
3395      /* Run enough instructions returning zero with execution masking and
3396       * a conditional modifier enabled in order to get the full execution
3397       * mask in f1.0.  We could use a single 32-wide move here if it
3398       * weren't because of the hardware bug that causes channel enables to
3399       * be applied incorrectly to the second half of 32-wide instructions
3400       * on Gfx7.
3401       */
3402      const unsigned lower_size = MIN2(16, exec_size);
3403      for (unsigned i = 0; i < exec_size / lower_size; i++) {
3404         inst = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW),
3405                        brw_imm_uw(0));
3406         brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
3407         brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control);
3408         brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
3409         brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1);
3410         brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2);
3411         brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2);
3412      }
3413
3414      /* Find the first bit set in the exec_size-wide portion of the flag
3415       * register that was updated by the last sequence of MOV
3416       * instructions.
3417       */
3418      const enum brw_reg_type type = brw_int_type(exec_size / 8, false);
3419      brw_set_default_exec_size(p, BRW_EXECUTE_1);
3420      if (!last) {
3421         inst = brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
3422      } else {
3423         inst = brw_LZD(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
3424         struct brw_reg neg = vec1(dst);
3425         neg.negate = true;
3426         inst = brw_ADD(p, vec1(dst), neg, brw_imm_uw(31));
3427      }
3428   } else {
3429      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3430
3431      /* Overwrite the destination without and with execution masking to
3432       * find out which of the channels is active.
3433       */
3434      brw_push_insn_state(p);
3435      brw_set_default_exec_size(p, BRW_EXECUTE_4);
3436      brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
3437              brw_imm_ud(1));
3438
3439      inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
3440                     brw_imm_ud(0));
3441      brw_pop_insn_state(p);
3442      brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
3443   }
3444
3445   brw_pop_insn_state(p);
3446}
3447
3448void
3449brw_broadcast(struct brw_codegen *p,
3450              struct brw_reg dst,
3451              struct brw_reg src,
3452              struct brw_reg idx)
3453{
3454   const struct intel_device_info *devinfo = p->devinfo;
3455   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
3456   brw_inst *inst;
3457
3458   brw_push_insn_state(p);
3459   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3460   brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4);
3461
3462   assert(src.file == BRW_GENERAL_REGISTER_FILE &&
3463          src.address_mode == BRW_ADDRESS_DIRECT);
3464   assert(!src.abs && !src.negate);
3465   assert(src.type == dst.type);
3466
3467   if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
3468       idx.file == BRW_IMMEDIATE_VALUE) {
3469      /* Trivial, the source is already uniform or the index is a constant.
3470       * We will typically not get here if the optimizer is doing its job, but
3471       * asserting would be mean.
3472       */
3473      const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
3474      src = align1 ? stride(suboffset(src, i), 0, 1, 0) :
3475                     stride(suboffset(src, 4 * i), 0, 4, 1);
3476
3477      if (type_sz(src.type) > 4 && !devinfo->has_64bit_float) {
3478         brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
3479                    subscript(src, BRW_REGISTER_TYPE_D, 0));
3480         brw_set_default_swsb(p, tgl_swsb_null());
3481         brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
3482                    subscript(src, BRW_REGISTER_TYPE_D, 1));
3483      } else {
3484         brw_MOV(p, dst, src);
3485      }
3486   } else {
3487      /* From the Haswell PRM section "Register Region Restrictions":
3488       *
3489       *    "The lower bits of the AddressImmediate must not overflow to
3490       *    change the register address.  The lower 5 bits of Address
3491       *    Immediate when added to lower 5 bits of address register gives
3492       *    the sub-register offset. The upper bits of Address Immediate
3493       *    when added to upper bits of address register gives the register
3494       *    address. Any overflow from sub-register offset is dropped."
3495       *
3496       * Fortunately, for broadcast, we never have a sub-register offset so
3497       * this isn't an issue.
3498       */
3499      assert(src.subnr == 0);
3500
3501      if (align1) {
3502         const struct brw_reg addr =
3503            retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
3504         unsigned offset = src.nr * REG_SIZE + src.subnr;
3505         /* Limit in bytes of the signed indirect addressing immediate. */
3506         const unsigned limit = 512;
3507
3508         brw_push_insn_state(p);
3509         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3510         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
3511
3512         /* Take into account the component size and horizontal stride. */
3513         assert(src.vstride == src.hstride + src.width);
3514         brw_SHL(p, addr, vec1(idx),
3515                 brw_imm_ud(util_logbase2(type_sz(src.type)) +
3516                            src.hstride - 1));
3517
3518         /* We can only address up to limit bytes using the indirect
3519          * addressing immediate, account for the difference if the source
3520          * register is above this limit.
3521          */
3522         if (offset >= limit) {
3523            brw_set_default_swsb(p, tgl_swsb_regdist(1));
3524            brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
3525            offset = offset % limit;
3526         }
3527
3528         brw_pop_insn_state(p);
3529
3530         brw_set_default_swsb(p, tgl_swsb_regdist(1));
3531
3532         /* Use indirect addressing to fetch the specified component. */
3533         if (type_sz(src.type) > 4 &&
3534             (devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo) ||
3535              !devinfo->has_64bit_float)) {
3536            /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
3537             *
3538             *    "When source or destination datatype is 64b or operation is
3539             *    integer DWord multiply, indirect addressing must not be
3540             *    used."
3541             *
3542             * To work around both of this issue, we do two integer MOVs
3543             * insead of one 64-bit MOV.  Because no double value should ever
3544             * cross a register boundary, it's safe to use the immediate
3545             * offset in the indirect here to handle adding 4 bytes to the
3546             * offset and avoid the extra ADD to the register file.
3547             */
3548            brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
3549                       retype(brw_vec1_indirect(addr.subnr, offset),
3550                              BRW_REGISTER_TYPE_D));
3551            brw_set_default_swsb(p, tgl_swsb_null());
3552            brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
3553                       retype(brw_vec1_indirect(addr.subnr, offset + 4),
3554                              BRW_REGISTER_TYPE_D));
3555         } else {
3556            brw_MOV(p, dst,
3557                    retype(brw_vec1_indirect(addr.subnr, offset), src.type));
3558         }
3559      } else {
3560         /* In SIMD4x2 mode the index can be either zero or one, replicate it
3561          * to all bits of a flag register,
3562          */
3563         inst = brw_MOV(p,
3564                        brw_null_reg(),
3565                        stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1));
3566         brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
3567         brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
3568         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
3569
3570         /* and use predicated SEL to pick the right channel. */
3571         inst = brw_SEL(p, dst,
3572                        stride(suboffset(src, 4), 4, 4, 1),
3573                        stride(src, 4, 4, 1));
3574         brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
3575         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
3576      }
3577   }
3578
3579   brw_pop_insn_state(p);
3580}
3581
3582
3583/**
3584 * Emit the SEND message for a barrier
3585 */
3586void
3587brw_barrier(struct brw_codegen *p, struct brw_reg src)
3588{
3589   const struct intel_device_info *devinfo = p->devinfo;
3590   struct brw_inst *inst;
3591
3592   assert(devinfo->ver >= 7);
3593
3594   brw_push_insn_state(p);
3595   brw_set_default_access_mode(p, BRW_ALIGN_1);
3596   inst = next_insn(p, BRW_OPCODE_SEND);
3597   brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
3598   brw_set_src0(p, inst, src);
3599   brw_set_src1(p, inst, brw_null_reg());
3600   brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false));
3601
3602   brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
3603   brw_inst_set_gateway_subfuncid(devinfo, inst,
3604                                  BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
3605
3606   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
3607   brw_pop_insn_state(p);
3608}
3609
3610
3611/**
3612 * Emit the wait instruction for a barrier
3613 */
3614void
3615brw_WAIT(struct brw_codegen *p)
3616{
3617   const struct intel_device_info *devinfo = p->devinfo;
3618   struct brw_inst *insn;
3619
3620   struct brw_reg src = brw_notification_reg();
3621
3622   insn = next_insn(p, BRW_OPCODE_WAIT);
3623   brw_set_dest(p, insn, src);
3624   brw_set_src0(p, insn, src);
3625   brw_set_src1(p, insn, brw_null_reg());
3626
3627   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
3628   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
3629}
3630
3631void
3632brw_float_controls_mode(struct brw_codegen *p,
3633                        unsigned mode, unsigned mask)
3634{
3635   /* From the Skylake PRM, Volume 7, page 760:
3636    *  "Implementation Restriction on Register Access: When the control
3637    *   register is used as an explicit source and/or destination, hardware
3638    *   does not ensure execution pipeline coherency. Software must set the
3639    *   thread control field to ‘switch’ for an instruction that uses
3640    *   control register as an explicit operand."
3641    *
3642    * On Gfx12+ this is implemented in terms of SWSB annotations instead.
3643    */
3644   brw_set_default_swsb(p, tgl_swsb_regdist(1));
3645
3646   brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
3647                            brw_imm_ud(~mask));
3648   brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
3649   if (p->devinfo->ver < 12)
3650      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
3651
3652   if (mode) {
3653      brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
3654                                 brw_imm_ud(mode));
3655      brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
3656      if (p->devinfo->ver < 12)
3657         brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
3658   }
3659
3660   if (p->devinfo->ver >= 12)
3661      brw_SYNC(p, TGL_SYNC_NOP);
3662}
3663
3664void
3665brw_update_reloc_imm(const struct brw_isa_info *isa,
3666                     brw_inst *inst,
3667                     uint32_t value)
3668{
3669   const struct intel_device_info *devinfo = isa->devinfo;
3670
3671   /* Sanity check that the instruction is a MOV of an immediate */
3672   assert(brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
3673   assert(brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE);
3674
3675   /* If it was compacted, we can't safely rewrite */
3676   assert(brw_inst_cmpt_control(devinfo, inst) == 0);
3677
3678   brw_inst_set_imm_ud(devinfo, inst, value);
3679}
3680
3681/* A default value for constants that will be patched at run-time.
3682 * We pick an arbitrary value that prevents instruction compaction.
3683 */
3684#define DEFAULT_PATCH_IMM 0x4a7cc037
3685
3686void
3687brw_MOV_reloc_imm(struct brw_codegen *p,
3688                  struct brw_reg dst,
3689                  enum brw_reg_type src_type,
3690                  uint32_t id)
3691{
3692   assert(type_sz(src_type) == 4);
3693   assert(type_sz(dst.type) == 4);
3694
3695   brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
3696                 p->next_insn_offset, 0);
3697
3698   brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
3699}
3700