xref: /third_party/mesa3d/src/intel/compiler/brw_eu.h (revision bf215546)
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32
33#ifndef BRW_EU_H
34#define BRW_EU_H
35
36#include <stdbool.h>
37#include <stdio.h>
38#include "brw_inst.h"
39#include "brw_compiler.h"
40#include "brw_eu_defines.h"
41#include "brw_isa_info.h"
42#include "brw_reg.h"
43#include "brw_disasm_info.h"
44
45#include "util/bitset.h"
46
47#ifdef __cplusplus
48extern "C" {
49#endif
50
51#define BRW_EU_MAX_INSN_STACK 5
52
53struct brw_insn_state {
54   /* One of BRW_EXECUTE_* */
55   unsigned exec_size:3;
56
57   /* Group in units of channels */
58   unsigned group:5;
59
60   /* Compression control on gfx4-5 */
61   bool compressed:1;
62
63   /* One of BRW_MASK_* */
64   unsigned mask_control:1;
65
66   /* Scheduling info for Gfx12+ */
67   struct tgl_swsb swsb;
68
69   bool saturate:1;
70
71   /* One of BRW_ALIGN_* */
72   unsigned access_mode:1;
73
74   /* One of BRW_PREDICATE_* */
75   enum brw_predicate predicate:4;
76
77   bool pred_inv:1;
78
79   /* Flag subreg.  Bottom bit is subreg, top bit is reg */
80   unsigned flag_subreg:2;
81
82   bool acc_wr_control:1;
83};
84
85
86/* A helper for accessing the last instruction emitted.  This makes it easy
87 * to set various bits on an instruction without having to create temporary
88 * variable and assign the emitted instruction to those.
89 */
90#define brw_last_inst (&p->store[p->nr_insn - 1])
91
92struct brw_codegen {
93   brw_inst *store;
94   int store_size;
95   unsigned nr_insn;
96   unsigned int next_insn_offset;
97
98   void *mem_ctx;
99
100   /* Allow clients to push/pop instruction state:
101    */
102   struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
103   struct brw_insn_state *current;
104
105   /** Whether or not the user wants automatic exec sizes
106    *
107    * If true, codegen will try to automatically infer the exec size of an
108    * instruction from the width of the destination register.  If false, it
109    * will take whatever is set by brw_set_default_exec_size verbatim.
110    *
111    * This is set to true by default in brw_init_codegen.
112    */
113   bool automatic_exec_sizes;
114
115   bool single_program_flow;
116   const struct brw_isa_info *isa;
117   const struct intel_device_info *devinfo;
118
119   /* Control flow stacks:
120    * - if_stack contains IF and ELSE instructions which must be patched
121    *   (and popped) once the matching ENDIF instruction is encountered.
122    *
123    *   Just store the instruction pointer(an index).
124    */
125   int *if_stack;
126   int if_stack_depth;
127   int if_stack_array_size;
128
129   /**
130    * loop_stack contains the instruction pointers of the starts of loops which
131    * must be patched (and popped) once the matching WHILE instruction is
132    * encountered.
133    */
134   int *loop_stack;
135   /**
136    * pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
137    * blocks they were popping out of, to fix up the mask stack.  This tracks
138    * the IF/ENDIF nesting in each current nested loop level.
139    */
140   int *if_depth_in_loop;
141   int loop_stack_depth;
142   int loop_stack_array_size;
143
144   struct brw_shader_reloc *relocs;
145   int num_relocs;
146   int reloc_array_size;
147};
148
149struct brw_label {
150   int offset;
151   int number;
152   struct brw_label *next;
153};
154
155void brw_pop_insn_state( struct brw_codegen *p );
156void brw_push_insn_state( struct brw_codegen *p );
157unsigned brw_get_default_exec_size(struct brw_codegen *p);
158unsigned brw_get_default_group(struct brw_codegen *p);
159unsigned brw_get_default_access_mode(struct brw_codegen *p);
160struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p);
161void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
162void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
163void brw_set_default_saturate( struct brw_codegen *p, bool enable );
164void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode );
165void brw_inst_set_compression(const struct intel_device_info *devinfo,
166                              brw_inst *inst, bool on);
167void brw_set_default_compression(struct brw_codegen *p, bool on);
168void brw_inst_set_group(const struct intel_device_info *devinfo,
169                        brw_inst *inst, unsigned group);
170void brw_set_default_group(struct brw_codegen *p, unsigned group);
171void brw_set_default_compression_control(struct brw_codegen *p, enum brw_compression c);
172void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc);
173void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
174void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
175void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
176void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value);
177
178void brw_init_codegen(const struct brw_isa_info *isa,
179                      struct brw_codegen *p, void *mem_ctx);
180bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode);
181bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode);
182const struct brw_label *brw_find_label(const struct brw_label *root, int offset);
183void brw_create_label(struct brw_label **labels, int offset, void *mem_ctx);
184int brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
185                         const struct brw_inst *inst, bool is_compacted,
186                         int offset, const struct brw_label *root_label);
187const struct
188brw_label *brw_label_assembly(const struct brw_isa_info *isa,
189                              const void *assembly, int start, int end,
190                              void *mem_ctx);
191void brw_disassemble_with_labels(const struct brw_isa_info *isa,
192                                 const void *assembly, int start, int end, FILE *out);
193void brw_disassemble(const struct brw_isa_info *isa,
194                     const void *assembly, int start, int end,
195                     const struct brw_label *root_label, FILE *out);
196const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p,
197                                                     unsigned *num_relocs);
198const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
199
200bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
201                               const char *identifier);
202
203void brw_realign(struct brw_codegen *p, unsigned align);
204int brw_append_data(struct brw_codegen *p, void *data,
205                    unsigned size, unsigned align);
206brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
207void brw_add_reloc(struct brw_codegen *p, uint32_t id,
208                   enum brw_shader_reloc_type type,
209                   uint32_t offset, uint32_t delta);
210void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest);
211void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
212
213void gfx6_resolve_implied_move(struct brw_codegen *p,
214			       struct brw_reg *src,
215			       unsigned msg_reg_nr);
216
217/* Helpers for regular instructions:
218 */
219#define ALU1(OP)				\
220brw_inst *brw_##OP(struct brw_codegen *p,	\
221	      struct brw_reg dest,		\
222	      struct brw_reg src0);
223
224#define ALU2(OP)				\
225brw_inst *brw_##OP(struct brw_codegen *p,	\
226	      struct brw_reg dest,		\
227	      struct brw_reg src0,		\
228	      struct brw_reg src1);
229
230#define ALU3(OP)				\
231brw_inst *brw_##OP(struct brw_codegen *p,	\
232	      struct brw_reg dest,		\
233	      struct brw_reg src0,		\
234	      struct brw_reg src1,		\
235	      struct brw_reg src2);
236
237ALU1(MOV)
238ALU2(SEL)
239ALU1(NOT)
240ALU2(AND)
241ALU2(OR)
242ALU2(XOR)
243ALU2(SHR)
244ALU2(SHL)
245ALU1(DIM)
246ALU2(ASR)
247ALU2(ROL)
248ALU2(ROR)
249ALU3(CSEL)
250ALU1(F32TO16)
251ALU1(F16TO32)
252ALU2(ADD)
253ALU3(ADD3)
254ALU2(AVG)
255ALU2(MUL)
256ALU1(FRC)
257ALU1(RNDD)
258ALU1(RNDE)
259ALU1(RNDU)
260ALU1(RNDZ)
261ALU2(MAC)
262ALU2(MACH)
263ALU1(LZD)
264ALU2(DP4)
265ALU2(DPH)
266ALU2(DP3)
267ALU2(DP2)
268ALU3(DP4A)
269ALU2(LINE)
270ALU2(PLN)
271ALU3(MAD)
272ALU3(LRP)
273ALU1(BFREV)
274ALU3(BFE)
275ALU2(BFI1)
276ALU3(BFI2)
277ALU1(FBH)
278ALU1(FBL)
279ALU1(CBIT)
280ALU2(ADDC)
281ALU2(SUBB)
282
283#undef ALU1
284#undef ALU2
285#undef ALU3
286
287
288/* Helpers for SEND instruction:
289 */
290
291/**
292 * Construct a message descriptor immediate with the specified common
293 * descriptor controls.
294 */
295static inline uint32_t
296brw_message_desc(const struct intel_device_info *devinfo,
297                 unsigned msg_length,
298                 unsigned response_length,
299                 bool header_present)
300{
301   if (devinfo->ver >= 5) {
302      return (SET_BITS(msg_length, 28, 25) |
303              SET_BITS(response_length, 24, 20) |
304              SET_BITS(header_present, 19, 19));
305   } else {
306      return (SET_BITS(msg_length, 23, 20) |
307              SET_BITS(response_length, 19, 16));
308   }
309}
310
311static inline unsigned
312brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
313{
314   if (devinfo->ver >= 5)
315      return GET_BITS(desc, 28, 25);
316   else
317      return GET_BITS(desc, 23, 20);
318}
319
320static inline unsigned
321brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
322{
323   if (devinfo->ver >= 5)
324      return GET_BITS(desc, 24, 20);
325   else
326      return GET_BITS(desc, 19, 16);
327}
328
329static inline bool
330brw_message_desc_header_present(ASSERTED
331                                const struct intel_device_info *devinfo,
332                                uint32_t desc)
333{
334   assert(devinfo->ver >= 5);
335   return GET_BITS(desc, 19, 19);
336}
337
338static inline unsigned
339brw_message_ex_desc(UNUSED const struct intel_device_info *devinfo,
340                    unsigned ex_msg_length)
341{
342   return SET_BITS(ex_msg_length, 9, 6);
343}
344
345static inline unsigned
346brw_message_ex_desc_ex_mlen(UNUSED const struct intel_device_info *devinfo,
347                            uint32_t ex_desc)
348{
349   return GET_BITS(ex_desc, 9, 6);
350}
351
352static inline uint32_t
353brw_urb_desc(const struct intel_device_info *devinfo,
354             unsigned msg_type,
355             bool per_slot_offset_present,
356             bool channel_mask_present,
357             unsigned global_offset)
358{
359   if (devinfo->ver >= 8) {
360      return (SET_BITS(per_slot_offset_present, 17, 17) |
361              SET_BITS(channel_mask_present, 15, 15) |
362              SET_BITS(global_offset, 14, 4) |
363              SET_BITS(msg_type, 3, 0));
364   } else if (devinfo->ver >= 7) {
365      assert(!channel_mask_present);
366      return (SET_BITS(per_slot_offset_present, 16, 16) |
367              SET_BITS(global_offset, 13, 3) |
368              SET_BITS(msg_type, 3, 0));
369   } else {
370      unreachable("unhandled URB write generation");
371   }
372}
373
374static inline uint32_t
375brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
376                      uint32_t desc)
377{
378   assert(devinfo->ver >= 7);
379   return GET_BITS(desc, 3, 0);
380}
381
382static inline uint32_t
383brw_urb_fence_desc(const struct intel_device_info *devinfo)
384{
385   assert(devinfo->has_lsc);
386   return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0);
387}
388
389/**
390 * Construct a message descriptor immediate with the specified sampler
391 * function controls.
392 */
393static inline uint32_t
394brw_sampler_desc(const struct intel_device_info *devinfo,
395                 unsigned binding_table_index,
396                 unsigned sampler,
397                 unsigned msg_type,
398                 unsigned simd_mode,
399                 unsigned return_format)
400{
401   const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
402                          SET_BITS(sampler, 11, 8));
403
404   /* From the CHV Bspec: Shared Functions - Message Descriptor -
405    * Sampling Engine:
406    *
407    *   SIMD Mode[2]  29    This field is the upper bit of the 3-bit
408    *                       SIMD Mode field.
409    */
410   if (devinfo->ver >= 8)
411      return desc | SET_BITS(msg_type, 16, 12) |
412             SET_BITS(simd_mode & 0x3, 18, 17) |
413             SET_BITS(simd_mode >> 2, 29, 29) |
414             SET_BITS(return_format, 30, 30);
415   if (devinfo->ver >= 7)
416      return (desc | SET_BITS(msg_type, 16, 12) |
417              SET_BITS(simd_mode, 18, 17));
418   else if (devinfo->ver >= 5)
419      return (desc | SET_BITS(msg_type, 15, 12) |
420              SET_BITS(simd_mode, 17, 16));
421   else if (devinfo->verx10 >= 45)
422      return desc | SET_BITS(msg_type, 15, 12);
423   else
424      return (desc | SET_BITS(return_format, 13, 12) |
425              SET_BITS(msg_type, 15, 14));
426}
427
428static inline unsigned
429brw_sampler_desc_binding_table_index(UNUSED
430                                     const struct intel_device_info *devinfo,
431                                     uint32_t desc)
432{
433   return GET_BITS(desc, 7, 0);
434}
435
436static inline unsigned
437brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
438                         uint32_t desc)
439{
440   return GET_BITS(desc, 11, 8);
441}
442
443static inline unsigned
444brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
445{
446   if (devinfo->ver >= 7)
447      return GET_BITS(desc, 16, 12);
448   else if (devinfo->verx10 >= 45)
449      return GET_BITS(desc, 15, 12);
450   else
451      return GET_BITS(desc, 15, 14);
452}
453
454static inline unsigned
455brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
456                           uint32_t desc)
457{
458   assert(devinfo->ver >= 5);
459   if (devinfo->ver >= 8)
460      return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
461   else if (devinfo->ver >= 7)
462      return GET_BITS(desc, 18, 17);
463   else
464      return GET_BITS(desc, 17, 16);
465}
466
467static  inline unsigned
468brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
469                               uint32_t desc)
470{
471   assert(devinfo->verx10 == 40 || devinfo->ver >= 8);
472   if (devinfo->ver >= 8)
473      return GET_BITS(desc, 30, 30);
474   else
475      return GET_BITS(desc, 13, 12);
476}
477
478/**
479 * Construct a message descriptor for the dataport
480 */
481static inline uint32_t
482brw_dp_desc(const struct intel_device_info *devinfo,
483            unsigned binding_table_index,
484            unsigned msg_type,
485            unsigned msg_control)
486{
487   /* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
488    * helpers instead.
489    */
490   assert(devinfo->ver >= 6);
491   const unsigned desc = SET_BITS(binding_table_index, 7, 0);
492   if (devinfo->ver >= 8) {
493      return (desc | SET_BITS(msg_control, 13, 8) |
494              SET_BITS(msg_type, 18, 14));
495   } else if (devinfo->ver >= 7) {
496      return (desc | SET_BITS(msg_control, 13, 8) |
497              SET_BITS(msg_type, 17, 14));
498   } else {
499      return (desc | SET_BITS(msg_control, 12, 8) |
500              SET_BITS(msg_type, 16, 13));
501   }
502}
503
504static inline unsigned
505brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
506                                uint32_t desc)
507{
508   return GET_BITS(desc, 7, 0);
509}
510
511static inline unsigned
512brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
513{
514   assert(devinfo->ver >= 6);
515   if (devinfo->ver >= 8)
516      return GET_BITS(desc, 18, 14);
517   else if (devinfo->ver >= 7)
518      return GET_BITS(desc, 17, 14);
519   else
520      return GET_BITS(desc, 16, 13);
521}
522
523static inline unsigned
524brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
525{
526   assert(devinfo->ver >= 6);
527   if (devinfo->ver >= 7)
528      return GET_BITS(desc, 13, 8);
529   else
530      return GET_BITS(desc, 12, 8);
531}
532
533/**
534 * Construct a message descriptor immediate with the specified dataport read
535 * function controls.
536 */
537static inline uint32_t
538brw_dp_read_desc(const struct intel_device_info *devinfo,
539                 unsigned binding_table_index,
540                 unsigned msg_control,
541                 unsigned msg_type,
542                 unsigned target_cache)
543{
544   if (devinfo->ver >= 6)
545      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
546   else if (devinfo->verx10 >= 45)
547      return (SET_BITS(binding_table_index, 7, 0) |
548              SET_BITS(msg_control, 10, 8) |
549              SET_BITS(msg_type, 13, 11) |
550              SET_BITS(target_cache, 15, 14));
551   else
552      return (SET_BITS(binding_table_index, 7, 0) |
553              SET_BITS(msg_control, 11, 8) |
554              SET_BITS(msg_type, 13, 12) |
555              SET_BITS(target_cache, 15, 14));
556}
557
558static inline unsigned
559brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
560                          uint32_t desc)
561{
562   if (devinfo->ver >= 6)
563      return brw_dp_desc_msg_type(devinfo, desc);
564   else if (devinfo->verx10 >= 45)
565      return GET_BITS(desc, 13, 11);
566   else
567      return GET_BITS(desc, 13, 12);
568}
569
570static inline unsigned
571brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
572                             uint32_t desc)
573{
574   if (devinfo->ver >= 6)
575      return brw_dp_desc_msg_control(devinfo, desc);
576   else if (devinfo->verx10 >= 45)
577      return GET_BITS(desc, 10, 8);
578   else
579      return GET_BITS(desc, 11, 8);
580}
581
582/**
583 * Construct a message descriptor immediate with the specified dataport write
584 * function controls.
585 */
586static inline uint32_t
587brw_dp_write_desc(const struct intel_device_info *devinfo,
588                  unsigned binding_table_index,
589                  unsigned msg_control,
590                  unsigned msg_type,
591                  unsigned send_commit_msg)
592{
593   assert(devinfo->ver <= 6 || !send_commit_msg);
594   if (devinfo->ver >= 6) {
595      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
596             SET_BITS(send_commit_msg, 17, 17);
597   } else {
598      return (SET_BITS(binding_table_index, 7, 0) |
599              SET_BITS(msg_control, 11, 8) |
600              SET_BITS(msg_type, 14, 12) |
601              SET_BITS(send_commit_msg, 15, 15));
602   }
603}
604
605static inline unsigned
606brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
607                           uint32_t desc)
608{
609   if (devinfo->ver >= 6)
610      return brw_dp_desc_msg_type(devinfo, desc);
611   else
612      return GET_BITS(desc, 14, 12);
613}
614
615static inline unsigned
616brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
617                              uint32_t desc)
618{
619   if (devinfo->ver >= 6)
620      return brw_dp_desc_msg_control(devinfo, desc);
621   else
622      return GET_BITS(desc, 11, 8);
623}
624
625static inline bool
626brw_dp_write_desc_write_commit(const struct intel_device_info *devinfo,
627                               uint32_t desc)
628{
629   assert(devinfo->ver <= 6);
630   if (devinfo->ver >= 6)
631      return GET_BITS(desc, 17, 17);
632   else
633      return GET_BITS(desc, 15, 15);
634}
635
636/**
637 * Construct a message descriptor immediate with the specified dataport
638 * surface function controls.
639 */
640static inline uint32_t
641brw_dp_surface_desc(const struct intel_device_info *devinfo,
642                    unsigned msg_type,
643                    unsigned msg_control)
644{
645   assert(devinfo->ver >= 7);
646   /* We'll OR in the binding table index later */
647   return brw_dp_desc(devinfo, 0, msg_type, msg_control);
648}
649
650static inline uint32_t
651brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
652                           unsigned exec_size, /**< 0 for SIMD4x2 */
653                           unsigned atomic_op,
654                           bool response_expected)
655{
656   assert(exec_size <= 8 || exec_size == 16);
657
658   unsigned msg_type;
659   if (devinfo->verx10 >= 75) {
660      if (exec_size > 0) {
661         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
662      } else {
663         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
664      }
665   } else {
666      msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
667   }
668
669   const unsigned msg_control =
670      SET_BITS(atomic_op, 3, 0) |
671      SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
672      SET_BITS(response_expected, 5, 5);
673
674   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
675}
676
677static inline uint32_t
678brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
679                                 unsigned exec_size,
680                                 unsigned atomic_op,
681                                 bool response_expected)
682{
683   assert(exec_size <= 8 || exec_size == 16);
684   assert(devinfo->ver >= 9);
685
686   assert(exec_size > 0);
687   const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
688
689   const unsigned msg_control =
690      SET_BITS(atomic_op, 1, 0) |
691      SET_BITS(exec_size <= 8, 4, 4) |
692      SET_BITS(response_expected, 5, 5);
693
694   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
695}
696
697static inline unsigned
698brw_mdc_cmask(unsigned num_channels)
699{
700   /* See also MDC_CMASK in the SKL PRM Vol 2d. */
701   return 0xf & (0xf << num_channels);
702}
703
704static inline unsigned
705lsc_cmask(unsigned num_channels)
706{
707   assert(num_channels > 0 && num_channels <= 4);
708   return BITSET_MASK(num_channels);
709}
710
711static inline uint32_t
712brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
713                               unsigned exec_size, /**< 0 for SIMD4x2 */
714                               unsigned num_channels,
715                               bool write)
716{
717   assert(exec_size <= 8 || exec_size == 16);
718
719   unsigned msg_type;
720   if (write) {
721      if (devinfo->verx10 >= 75) {
722         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
723      } else {
724         msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
725      }
726   } else {
727      /* Read */
728      if (devinfo->verx10 >= 75) {
729         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
730      } else {
731         msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ;
732      }
733   }
734
735   /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
736   if (write && devinfo->verx10 == 70 && exec_size == 0)
737      exec_size = 8;
738
739   /* See also MDC_SM3 in the SKL PRM Vol 2d. */
740   const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
741                              exec_size <= 8 ? 2 : 1;
742
743   const unsigned msg_control =
744      SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
745      SET_BITS(simd_mode, 5, 4);
746
747   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
748}
749
750static inline unsigned
751brw_mdc_ds(unsigned bit_size)
752{
753   switch (bit_size) {
754   case 8:
755      return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
756   case 16:
757      return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
758   case 32:
759      return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
760   default:
761      unreachable("Unsupported bit_size for byte scattered messages");
762   }
763}
764
765static inline uint32_t
766brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
767                              unsigned exec_size,
768                              unsigned bit_size,
769                              bool write)
770{
771   assert(exec_size <= 8 || exec_size == 16);
772
773   assert(devinfo->verx10 >= 75);
774   const unsigned msg_type =
775      write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
776              HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
777
778   assert(exec_size > 0);
779   const unsigned msg_control =
780      SET_BITS(exec_size == 16, 0, 0) |
781      SET_BITS(brw_mdc_ds(bit_size), 3, 2);
782
783   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
784}
785
786static inline uint32_t
787brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
788                               unsigned exec_size,
789                               bool write)
790{
791   assert(exec_size == 8 || exec_size == 16);
792
793   unsigned msg_type;
794   if (write) {
795      if (devinfo->ver >= 6) {
796         msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
797      } else {
798         msg_type = BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
799      }
800   } else {
801      if (devinfo->ver >= 7) {
802         msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
803      } else if (devinfo->verx10 >= 45) {
804         msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
805      } else {
806         msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
807      }
808   }
809
810   const unsigned msg_control =
811      SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
812      SET_BITS(exec_size == 16, 0, 0);
813
814   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
815}
816
817static inline uint32_t
818brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
819                           bool align_16B,
820                           unsigned num_dwords,
821                           bool write)
822{
823   /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
824   assert(!write || align_16B);
825
826   const unsigned msg_type =
827      write ?     GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
828      align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
829                  GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
830
831   const unsigned msg_control =
832      SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
833
834   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
835}
836
837static inline uint32_t
838brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
839                                   unsigned exec_size, /**< 0 for SIMD4x2 */
840                                   unsigned num_channels,
841                                   bool write)
842{
843   assert(exec_size <= 8 || exec_size == 16);
844   assert(devinfo->ver >= 8);
845
846   unsigned msg_type =
847      write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
848              GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
849
850   /* See also MDC_SM3 in the SKL PRM Vol 2d. */
851   const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
852                              exec_size <= 8 ? 2 : 1;
853
854   const unsigned msg_control =
855      SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
856      SET_BITS(simd_mode, 5, 4);
857
858   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
859                      msg_type, msg_control);
860}
861
862static inline uint32_t
863brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
864                               bool align_16B,
865                               unsigned num_dwords,
866                               bool write)
867{
868   /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
869   assert(!write || align_16B);
870
871   unsigned msg_type =
872      write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
873              GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
874
875   unsigned msg_control =
876      SET_BITS(!align_16B, 4, 3) |
877      SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
878
879   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
880                      msg_type, msg_control);
881}
882
883/**
884 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
885 * Skylake PRM).
886 */
887static inline uint32_t
888brw_mdc_a64_ds(unsigned elems)
889{
890   switch (elems) {
891   case 1:  return 0;
892   case 2:  return 1;
893   case 4:  return 2;
894   case 8:  return 3;
895   default:
896      unreachable("Unsupported elmeent count for A64 scattered message");
897   }
898}
899
900static inline uint32_t
901brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
902                                  unsigned exec_size, /**< 0 for SIMD4x2 */
903                                  unsigned bit_size,
904                                  bool write)
905{
906   assert(exec_size <= 8 || exec_size == 16);
907   assert(devinfo->ver >= 8);
908
909   unsigned msg_type =
910      write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
911              GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
912
913   const unsigned msg_control =
914      SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
915      SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
916      SET_BITS(exec_size == 16, 4, 4);
917
918   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
919                      msg_type, msg_control);
920}
921
922static inline uint32_t
923brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
924                               ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
925                               unsigned bit_size,
926                               unsigned atomic_op,
927                               bool response_expected)
928{
929   assert(exec_size == 8);
930   assert(devinfo->ver >= 8);
931   assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
932   assert(devinfo->ver >= 12 || bit_size >= 32);
933
934   const unsigned msg_type = bit_size == 16 ?
935      GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
936      GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
937
938   const unsigned msg_control =
939      SET_BITS(atomic_op, 3, 0) |
940      SET_BITS(bit_size == 64, 4, 4) |
941      SET_BITS(response_expected, 5, 5);
942
943   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
944                      msg_type, msg_control);
945}
946
947static inline uint32_t
948brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
949                                     ASSERTED unsigned exec_size,
950                                     unsigned bit_size,
951                                     unsigned atomic_op,
952                                     bool response_expected)
953{
954   assert(exec_size == 8);
955   assert(devinfo->ver >= 9);
956   assert(bit_size == 16 || bit_size == 32);
957   assert(devinfo->ver >= 12 || bit_size == 32);
958
959   assert(exec_size > 0);
960   const unsigned msg_type = bit_size == 32 ?
961      GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
962      GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
963
964   const unsigned msg_control =
965      SET_BITS(atomic_op, 1, 0) |
966      SET_BITS(response_expected, 5, 5);
967
968   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
969                      msg_type, msg_control);
970}
971
972static inline uint32_t
973brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
974                         unsigned exec_size,
975                         unsigned exec_group,
976                         unsigned atomic_op,
977                         bool response_expected)
978{
979   assert(exec_size > 0 || exec_group == 0);
980   assert(exec_group % 8 == 0);
981
982   unsigned msg_type;
983   if (devinfo->verx10 >= 75) {
984      if (exec_size == 0) {
985         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
986      } else {
987         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
988      }
989   } else {
990      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
991      assert(exec_size > 0);
992      msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP;
993   }
994
995   const bool high_sample_mask = (exec_group / 8) % 2 == 1;
996
997   const unsigned msg_control =
998      SET_BITS(atomic_op, 3, 0) |
999      SET_BITS(high_sample_mask, 4, 4) |
1000      SET_BITS(response_expected, 5, 5);
1001
1002   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
1003}
1004
1005static inline uint32_t
1006brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
1007                             unsigned exec_size,
1008                             unsigned exec_group,
1009                             unsigned num_channels,
1010                             bool write)
1011{
1012   assert(exec_size > 0 || exec_group == 0);
1013   assert(exec_group % 8 == 0);
1014
1015   /* Typed surface reads and writes don't support SIMD16 */
1016   assert(exec_size <= 8);
1017
1018   unsigned msg_type;
1019   if (write) {
1020      if (devinfo->verx10 >= 75) {
1021         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
1022      } else {
1023         msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE;
1024      }
1025   } else {
1026      if (devinfo->verx10 >= 75) {
1027         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
1028      } else {
1029         msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ;
1030      }
1031   }
1032
1033   /* See also MDC_SG3 in the SKL PRM Vol 2d. */
1034   unsigned msg_control;
1035   if (devinfo->verx10 >= 75) {
1036      /* See also MDC_SG3 in the SKL PRM Vol 2d. */
1037      const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
1038                                  1 + ((exec_group / 8) % 2);
1039
1040      msg_control =
1041         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
1042         SET_BITS(slot_group, 5, 4);
1043   } else {
1044      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
1045      assert(exec_size > 0);
1046      const unsigned slot_group = ((exec_group / 8) % 2);
1047
1048      msg_control =
1049         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
1050         SET_BITS(slot_group, 5, 5);
1051   }
1052
1053   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
1054}
1055
1056static inline uint32_t
1057brw_fb_desc(const struct intel_device_info *devinfo,
1058            unsigned binding_table_index,
1059            unsigned msg_type,
1060            unsigned msg_control)
1061{
1062   /* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc
1063    * helpers instead.
1064    */
1065   assert(devinfo->ver >= 6);
1066   const unsigned desc = SET_BITS(binding_table_index, 7, 0);
1067   if (devinfo->ver >= 7) {
1068      return (desc | SET_BITS(msg_control, 13, 8) |
1069              SET_BITS(msg_type, 17, 14));
1070   } else {
1071      return (desc | SET_BITS(msg_control, 12, 8) |
1072              SET_BITS(msg_type, 16, 13));
1073   }
1074}
1075
1076static inline unsigned
1077brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
1078                                uint32_t desc)
1079{
1080   return GET_BITS(desc, 7, 0);
1081}
1082
1083static inline uint32_t
1084brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
1085{
1086   assert(devinfo->ver >= 6);
1087   if (devinfo->ver >= 7)
1088      return GET_BITS(desc, 13, 8);
1089   else
1090      return GET_BITS(desc, 12, 8);
1091}
1092
1093static inline unsigned
1094brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
1095{
1096   assert(devinfo->ver >= 6);
1097   if (devinfo->ver >= 7)
1098      return GET_BITS(desc, 17, 14);
1099   else
1100      return GET_BITS(desc, 16, 13);
1101}
1102
1103static inline uint32_t
1104brw_fb_read_desc(const struct intel_device_info *devinfo,
1105                 unsigned binding_table_index,
1106                 unsigned msg_control,
1107                 unsigned exec_size,
1108                 bool per_sample)
1109{
1110   assert(devinfo->ver >= 9);
1111   assert(exec_size == 8 || exec_size == 16);
1112
1113   return brw_fb_desc(devinfo, binding_table_index,
1114                      GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
1115          SET_BITS(per_sample, 13, 13) |
1116          SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
1117}
1118
1119static inline uint32_t
1120brw_fb_write_desc(const struct intel_device_info *devinfo,
1121                  unsigned binding_table_index,
1122                  unsigned msg_control,
1123                  bool last_render_target,
1124                  bool coarse_write)
1125{
1126   const unsigned msg_type =
1127      devinfo->ver >= 6 ?
1128      GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
1129      BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1130
1131   assert(devinfo->ver >= 10 || !coarse_write);
1132
1133   if (devinfo->ver >= 6) {
1134      return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
1135             SET_BITS(last_render_target, 12, 12) |
1136             SET_BITS(coarse_write, 18, 18);
1137   } else {
1138      return (SET_BITS(binding_table_index, 7, 0) |
1139              SET_BITS(msg_control, 11, 8) |
1140              SET_BITS(last_render_target, 11, 11) |
1141              SET_BITS(msg_type, 14, 12));
1142   }
1143}
1144
1145static inline unsigned
1146brw_fb_write_desc_msg_type(const struct intel_device_info *devinfo,
1147                           uint32_t desc)
1148{
1149   if (devinfo->ver >= 6)
1150      return brw_fb_desc_msg_type(devinfo, desc);
1151   else
1152      return GET_BITS(desc, 14, 12);
1153}
1154
1155static inline unsigned
1156brw_fb_write_desc_msg_control(const struct intel_device_info *devinfo,
1157                              uint32_t desc)
1158{
1159   if (devinfo->ver >= 6)
1160      return brw_fb_desc_msg_control(devinfo, desc);
1161   else
1162      return GET_BITS(desc, 11, 8);
1163}
1164
1165static inline bool
1166brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
1167                                     uint32_t desc)
1168{
1169   if (devinfo->ver >= 6)
1170      return GET_BITS(desc, 12, 12);
1171   else
1172      return GET_BITS(desc, 11, 11);
1173}
1174
1175static inline bool
1176brw_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
1177                               uint32_t desc)
1178{
1179   assert(devinfo->ver <= 6);
1180   if (devinfo->ver >= 6)
1181      return GET_BITS(desc, 17, 17);
1182   else
1183      return GET_BITS(desc, 15, 15);
1184}
1185
1186static inline bool
1187brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
1188                               uint32_t desc)
1189{
1190   assert(devinfo->ver >= 10);
1191   return GET_BITS(desc, 18, 18);
1192}
1193
1194static inline bool
1195lsc_opcode_has_cmask(enum lsc_opcode opcode)
1196{
1197   return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
1198}
1199
1200static inline bool
1201lsc_opcode_has_transpose(enum lsc_opcode opcode)
1202{
1203   return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
1204}
1205
1206static inline uint32_t
1207lsc_data_size_bytes(enum lsc_data_size data_size)
1208{
1209   switch (data_size) {
1210   case LSC_DATA_SIZE_D8:
1211      return 1;
1212   case LSC_DATA_SIZE_D16:
1213      return 2;
1214   case LSC_DATA_SIZE_D32:
1215   case LSC_DATA_SIZE_D8U32:
1216   case LSC_DATA_SIZE_D16U32:
1217   case LSC_DATA_SIZE_D16BF32:
1218      return 4;
1219   case LSC_DATA_SIZE_D64:
1220      return 8;
1221   default:
1222      unreachable("Unsupported data payload size.");
1223   }
1224}
1225
1226static inline uint32_t
1227lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1228{
1229   switch (addr_size) {
1230   case LSC_ADDR_SIZE_A16: return 2;
1231   case LSC_ADDR_SIZE_A32: return 4;
1232   case LSC_ADDR_SIZE_A64: return 8;
1233   default:
1234      unreachable("Unsupported address size.");
1235   }
1236}
1237
1238static inline uint32_t
1239lsc_vector_length(enum lsc_vect_size vect_size)
1240{
1241   switch (vect_size) {
1242   case LSC_VECT_SIZE_V1: return 1;
1243   case LSC_VECT_SIZE_V2: return 2;
1244   case LSC_VECT_SIZE_V3: return 3;
1245   case LSC_VECT_SIZE_V4: return 4;
1246   case LSC_VECT_SIZE_V8: return 8;
1247   case LSC_VECT_SIZE_V16: return 16;
1248   case LSC_VECT_SIZE_V32: return 32;
1249   case LSC_VECT_SIZE_V64: return 64;
1250   default:
1251      unreachable("Unsupported size of vector");
1252   }
1253}
1254
1255static inline enum lsc_vect_size
1256lsc_vect_size(unsigned vect_size)
1257{
1258   switch(vect_size) {
1259   case 1:  return LSC_VECT_SIZE_V1;
1260   case 2:  return LSC_VECT_SIZE_V2;
1261   case 3:  return LSC_VECT_SIZE_V3;
1262   case 4:  return LSC_VECT_SIZE_V4;
1263   case 8:  return LSC_VECT_SIZE_V8;
1264   case 16: return LSC_VECT_SIZE_V16;
1265   case 32: return LSC_VECT_SIZE_V32;
1266   case 64: return LSC_VECT_SIZE_V64;
1267   default:
1268      unreachable("Unsupported vector size for dataport");
1269   }
1270}
1271
1272static inline uint32_t
1273lsc_msg_desc(UNUSED const struct intel_device_info *devinfo,
1274             enum lsc_opcode opcode, unsigned simd_size,
1275             enum lsc_addr_surface_type addr_type,
1276             enum lsc_addr_size addr_sz, unsigned num_coordinates,
1277             enum lsc_data_size data_sz, unsigned num_channels,
1278             bool transpose, unsigned cache_ctrl, bool has_dest)
1279{
1280   assert(devinfo->has_lsc);
1281
1282   unsigned dest_length = !has_dest ? 0 :
1283      DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size,
1284                   REG_SIZE);
1285
1286   unsigned src0_length =
1287      DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size,
1288                   REG_SIZE);
1289
1290   assert(!transpose || lsc_opcode_has_transpose(opcode));
1291
1292   unsigned msg_desc =
1293      SET_BITS(opcode, 5, 0) |
1294      SET_BITS(addr_sz, 8, 7) |
1295      SET_BITS(data_sz, 11, 9) |
1296      SET_BITS(transpose, 15, 15) |
1297      SET_BITS(cache_ctrl, 19, 17) |
1298      SET_BITS(dest_length, 24, 20) |
1299      SET_BITS(src0_length, 28, 25) |
1300      SET_BITS(addr_type, 30, 29);
1301
1302   if (lsc_opcode_has_cmask(opcode))
1303      msg_desc |= SET_BITS(lsc_cmask(num_channels), 15, 12);
1304   else
1305      msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12);
1306
1307   return msg_desc;
1308}
1309
1310static inline enum lsc_opcode
1311lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1312                    uint32_t desc)
1313{
1314   assert(devinfo->has_lsc);
1315   return (enum lsc_opcode) GET_BITS(desc, 5, 0);
1316}
1317
1318static inline enum lsc_addr_size
1319lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1320                       uint32_t desc)
1321{
1322   assert(devinfo->has_lsc);
1323   return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1324}
1325
1326static inline enum lsc_data_size
1327lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1328                       uint32_t desc)
1329{
1330   assert(devinfo->has_lsc);
1331   return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1332}
1333
1334static inline enum lsc_vect_size
1335lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1336                       uint32_t desc)
1337{
1338   assert(devinfo->has_lsc);
1339   assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1340   return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1341}
1342
1343static inline enum lsc_cmask
1344lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1345                   uint32_t desc)
1346{
1347   assert(devinfo->has_lsc);
1348   assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1349   return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1350}
1351
1352static inline bool
1353lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1354                       uint32_t desc)
1355{
1356   assert(devinfo->has_lsc);
1357   return GET_BITS(desc, 15, 15);
1358}
1359
1360static inline unsigned
1361lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1362                        uint32_t desc)
1363{
1364   assert(devinfo->has_lsc);
1365   return GET_BITS(desc, 19, 17);
1366}
1367
1368static inline unsigned
1369lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
1370                      uint32_t desc)
1371{
1372   assert(devinfo->has_lsc);
1373   return GET_BITS(desc, 24, 20);
1374}
1375
1376static inline unsigned
1377lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
1378                      uint32_t desc)
1379{
1380   assert(devinfo->has_lsc);
1381   return GET_BITS(desc, 28, 25);
1382}
1383
1384static inline enum lsc_addr_surface_type
1385lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1386                       uint32_t desc)
1387{
1388   assert(devinfo->has_lsc);
1389   return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1390}
1391
1392static inline uint32_t
1393lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1394                   enum lsc_fence_scope scope,
1395                   enum lsc_flush_type flush_type,
1396                   bool route_to_lsc)
1397{
1398   assert(devinfo->has_lsc);
1399   return SET_BITS(LSC_OP_FENCE, 5, 0) |
1400          SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1401          SET_BITS(scope, 11, 9) |
1402          SET_BITS(flush_type, 14, 12) |
1403          SET_BITS(route_to_lsc, 18, 18) |
1404          SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1405}
1406
1407static inline enum lsc_fence_scope
1408lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1409                         uint32_t desc)
1410{
1411   assert(devinfo->has_lsc);
1412   return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1413}
1414
1415static inline enum lsc_flush_type
1416lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1417                              uint32_t desc)
1418{
1419   assert(devinfo->has_lsc);
1420   return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1421}
1422
1423static inline enum lsc_backup_fence_routing
1424lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1425                                  uint32_t desc)
1426{
1427   assert(devinfo->has_lsc);
1428   return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1429}
1430
1431static inline uint32_t
1432lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1433{
1434   assert(devinfo->has_lsc);
1435   return SET_BITS(bti, 31, 24) |
1436          SET_BITS(0, 23, 12);  /* base offset */
1437}
1438
1439static inline unsigned
1440lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1441                            uint32_t ex_desc)
1442{
1443   assert(devinfo->has_lsc);
1444   return GET_BITS(ex_desc, 23, 12);
1445}
1446
1447static inline unsigned
1448lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1449                      uint32_t ex_desc)
1450{
1451   assert(devinfo->has_lsc);
1452   return GET_BITS(ex_desc, 31, 24);
1453}
1454
1455static inline unsigned
1456lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1457                             uint32_t ex_desc)
1458{
1459   assert(devinfo->has_lsc);
1460   return GET_BITS(ex_desc, 31, 12);
1461}
1462
1463static inline uint32_t
1464lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1465                unsigned surface_state_index)
1466{
1467   assert(devinfo->has_lsc);
1468   return SET_BITS(surface_state_index, 31, 6);
1469}
1470
1471static inline unsigned
1472lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1473                      uint32_t ex_desc)
1474{
1475   assert(devinfo->has_lsc);
1476   return GET_BITS(ex_desc, 31, 6);
1477}
1478
1479static inline uint32_t
1480brw_mdc_sm2(unsigned exec_size)
1481{
1482   assert(exec_size == 8 || exec_size == 16);
1483   return exec_size > 8;
1484}
1485
1486static inline uint32_t
1487brw_mdc_sm2_exec_size(uint32_t sm2)
1488{
1489   assert(sm2 <= 1);
1490   return 8 << sm2;
1491}
1492
1493static inline uint32_t
1494brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1495                   unsigned exec_size, unsigned msg_type)
1496{
1497   assert(devinfo->has_ray_tracing);
1498
1499   return SET_BITS(0, 19, 19) | /* No header */
1500          SET_BITS(msg_type, 17, 14) |
1501          SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1502}
1503
1504static inline uint32_t
1505brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1506                       uint32_t desc)
1507{
1508   return GET_BITS(desc, 17, 14);
1509}
1510
1511static inline uint32_t
1512brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1513                        uint32_t desc)
1514{
1515   return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1516}
1517
1518static inline uint32_t
1519brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1520                      unsigned exec_size)
1521{
1522   assert(devinfo->has_ray_tracing);
1523
1524   return SET_BITS(0, 19, 19) | /* No header */
1525          SET_BITS(0, 17, 14) | /* Message type */
1526          SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1527}
1528
1529static inline uint32_t
1530brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo,
1531                                uint32_t desc)
1532{
1533   return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1534}
1535
1536/**
1537 * Construct a message descriptor immediate with the specified pixel
1538 * interpolator function controls.
1539 */
1540static inline uint32_t
1541brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1542                      unsigned msg_type,
1543                      bool noperspective,
1544                      bool coarse_pixel_rate,
1545                      unsigned simd_mode,
1546                      unsigned slot_group)
1547{
1548   assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1549   return (SET_BITS(slot_group, 11, 11) |
1550           SET_BITS(msg_type, 13, 12) |
1551           SET_BITS(!!noperspective, 14, 14) |
1552           SET_BITS(coarse_pixel_rate, 15, 15) |
1553           SET_BITS(simd_mode, 16, 16));
1554}
1555
1556void brw_urb_WRITE(struct brw_codegen *p,
1557		   struct brw_reg dest,
1558		   unsigned msg_reg_nr,
1559		   struct brw_reg src0,
1560                   enum brw_urb_write_flags flags,
1561		   unsigned msg_length,
1562		   unsigned response_length,
1563		   unsigned offset,
1564		   unsigned swizzle);
1565
1566/**
1567 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1568 * desc.  If \p desc is not an immediate it will be transparently loaded to an
1569 * address register using an OR instruction.
1570 */
1571void
1572brw_send_indirect_message(struct brw_codegen *p,
1573                          unsigned sfid,
1574                          struct brw_reg dst,
1575                          struct brw_reg payload,
1576                          struct brw_reg desc,
1577                          unsigned desc_imm,
1578                          bool eot);
1579
1580void
1581brw_send_indirect_split_message(struct brw_codegen *p,
1582                                unsigned sfid,
1583                                struct brw_reg dst,
1584                                struct brw_reg payload0,
1585                                struct brw_reg payload1,
1586                                struct brw_reg desc,
1587                                unsigned desc_imm,
1588                                struct brw_reg ex_desc,
1589                                unsigned ex_desc_imm,
1590                                bool eot);
1591
1592void brw_ff_sync(struct brw_codegen *p,
1593		   struct brw_reg dest,
1594		   unsigned msg_reg_nr,
1595		   struct brw_reg src0,
1596		   bool allocate,
1597		   unsigned response_length,
1598		   bool eot);
1599
1600void brw_svb_write(struct brw_codegen *p,
1601                   struct brw_reg dest,
1602                   unsigned msg_reg_nr,
1603                   struct brw_reg src0,
1604                   unsigned binding_table_index,
1605                   bool   send_commit_msg);
1606
1607brw_inst *brw_fb_WRITE(struct brw_codegen *p,
1608                       struct brw_reg payload,
1609                       struct brw_reg implied_header,
1610                       unsigned msg_control,
1611                       unsigned binding_table_index,
1612                       unsigned msg_length,
1613                       unsigned response_length,
1614                       bool eot,
1615                       bool last_render_target,
1616                       bool header_present);
1617
1618brw_inst *gfx9_fb_READ(struct brw_codegen *p,
1619                       struct brw_reg dst,
1620                       struct brw_reg payload,
1621                       unsigned binding_table_index,
1622                       unsigned msg_length,
1623                       unsigned response_length,
1624                       bool per_sample);
1625
1626void brw_SAMPLE(struct brw_codegen *p,
1627		struct brw_reg dest,
1628		unsigned msg_reg_nr,
1629		struct brw_reg src0,
1630		unsigned binding_table_index,
1631		unsigned sampler,
1632		unsigned msg_type,
1633		unsigned response_length,
1634		unsigned msg_length,
1635		unsigned header_present,
1636		unsigned simd_mode,
1637		unsigned return_format);
1638
1639void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
1640                                      struct brw_reg header,
1641                                      struct brw_reg sampler_index);
1642
1643void gfx4_math(struct brw_codegen *p,
1644	       struct brw_reg dest,
1645	       unsigned function,
1646	       unsigned msg_reg_nr,
1647	       struct brw_reg src,
1648	       unsigned precision );
1649
1650void gfx6_math(struct brw_codegen *p,
1651	       struct brw_reg dest,
1652	       unsigned function,
1653	       struct brw_reg src0,
1654	       struct brw_reg src1);
1655
1656void brw_oword_block_read(struct brw_codegen *p,
1657			  struct brw_reg dest,
1658			  struct brw_reg mrf,
1659			  uint32_t offset,
1660			  uint32_t bind_table_index);
1661
1662unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
1663
1664void brw_oword_block_read_scratch(struct brw_codegen *p,
1665				  struct brw_reg dest,
1666				  struct brw_reg mrf,
1667				  int num_regs,
1668				  unsigned offset);
1669
1670void brw_oword_block_write_scratch(struct brw_codegen *p,
1671				   struct brw_reg mrf,
1672				   int num_regs,
1673				   unsigned offset);
1674
1675void gfx7_block_read_scratch(struct brw_codegen *p,
1676                             struct brw_reg dest,
1677                             int num_regs,
1678                             unsigned offset);
1679
1680/**
1681 * Return the generation-specific jump distance scaling factor.
1682 *
1683 * Given the number of instructions to jump, we need to scale by
1684 * some number to obtain the actual jump distance to program in an
1685 * instruction.
1686 */
1687static inline unsigned
1688brw_jump_scale(const struct intel_device_info *devinfo)
1689{
1690   /* Broadwell measures jump targets in bytes. */
1691   if (devinfo->ver >= 8)
1692      return 16;
1693
1694   /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1695    * (to support compaction), so each 128-bit instruction requires 2 chunks.
1696    */
1697   if (devinfo->ver >= 5)
1698      return 2;
1699
1700   /* Gfx4 simply uses the number of 128-bit instructions. */
1701   return 1;
1702}
1703
1704void brw_barrier(struct brw_codegen *p, struct brw_reg src);
1705
1706/* If/else/endif.  Works by manipulating the execution flags on each
1707 * channel.
1708 */
1709brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
1710brw_inst *gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
1711                  struct brw_reg src0, struct brw_reg src1);
1712
1713void brw_ELSE(struct brw_codegen *p);
1714void brw_ENDIF(struct brw_codegen *p);
1715
1716/* DO/WHILE loops:
1717 */
1718brw_inst *brw_DO(struct brw_codegen *p, unsigned execute_size);
1719
1720brw_inst *brw_WHILE(struct brw_codegen *p);
1721
1722brw_inst *brw_BREAK(struct brw_codegen *p);
1723brw_inst *brw_CONT(struct brw_codegen *p);
1724brw_inst *brw_HALT(struct brw_codegen *p);
1725
1726/* Forward jumps:
1727 */
1728void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx);
1729
1730brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1731                   unsigned predicate_control);
1732
1733void brw_NOP(struct brw_codegen *p);
1734
1735void brw_WAIT(struct brw_codegen *p);
1736
1737void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func);
1738
1739/* Special case: there is never a destination, execution size will be
1740 * taken from src0:
1741 */
1742void brw_CMP(struct brw_codegen *p,
1743	     struct brw_reg dest,
1744	     unsigned conditional,
1745	     struct brw_reg src0,
1746	     struct brw_reg src1);
1747
1748void brw_CMPN(struct brw_codegen *p,
1749              struct brw_reg dest,
1750              unsigned conditional,
1751              struct brw_reg src0,
1752              struct brw_reg src1);
1753
1754void
1755brw_untyped_atomic(struct brw_codegen *p,
1756                   struct brw_reg dst,
1757                   struct brw_reg payload,
1758                   struct brw_reg surface,
1759                   unsigned atomic_op,
1760                   unsigned msg_length,
1761                   bool response_expected,
1762                   bool header_present);
1763
1764void
1765brw_untyped_surface_read(struct brw_codegen *p,
1766                         struct brw_reg dst,
1767                         struct brw_reg payload,
1768                         struct brw_reg surface,
1769                         unsigned msg_length,
1770                         unsigned num_channels);
1771
1772void
1773brw_untyped_surface_write(struct brw_codegen *p,
1774                          struct brw_reg payload,
1775                          struct brw_reg surface,
1776                          unsigned msg_length,
1777                          unsigned num_channels,
1778                          bool header_present);
1779
1780void
1781brw_memory_fence(struct brw_codegen *p,
1782                 struct brw_reg dst,
1783                 struct brw_reg src,
1784                 enum opcode send_op,
1785                 enum brw_message_target sfid,
1786                 uint32_t desc,
1787                 bool commit_enable,
1788                 unsigned bti);
1789
1790void
1791brw_pixel_interpolator_query(struct brw_codegen *p,
1792                             struct brw_reg dest,
1793                             struct brw_reg mrf,
1794                             bool noperspective,
1795                             bool coarse_pixel_rate,
1796                             unsigned mode,
1797                             struct brw_reg data,
1798                             unsigned msg_length,
1799                             unsigned response_length);
1800
1801void
1802brw_find_live_channel(struct brw_codegen *p,
1803                      struct brw_reg dst,
1804                      bool last);
1805
1806void
1807brw_broadcast(struct brw_codegen *p,
1808              struct brw_reg dst,
1809              struct brw_reg src,
1810              struct brw_reg idx);
1811
1812void
1813brw_float_controls_mode(struct brw_codegen *p,
1814                        unsigned mode, unsigned mask);
1815
1816void
1817brw_update_reloc_imm(const struct brw_isa_info *isa,
1818                     brw_inst *inst,
1819                     uint32_t value);
1820
1821void
1822brw_MOV_reloc_imm(struct brw_codegen *p,
1823                  struct brw_reg dst,
1824                  enum brw_reg_type src_type,
1825                  uint32_t id);
1826
1827/***********************************************************************
1828 * brw_eu_util.c:
1829 */
1830
1831void brw_copy_indirect_to_indirect(struct brw_codegen *p,
1832				   struct brw_indirect dst_ptr,
1833				   struct brw_indirect src_ptr,
1834				   unsigned count);
1835
1836void brw_copy_from_indirect(struct brw_codegen *p,
1837			    struct brw_reg dst,
1838			    struct brw_indirect ptr,
1839			    unsigned count);
1840
1841void brw_copy4(struct brw_codegen *p,
1842	       struct brw_reg dst,
1843	       struct brw_reg src,
1844	       unsigned count);
1845
1846void brw_copy8(struct brw_codegen *p,
1847	       struct brw_reg dst,
1848	       struct brw_reg src,
1849	       unsigned count);
1850
1851void brw_math_invert( struct brw_codegen *p,
1852		      struct brw_reg dst,
1853		      struct brw_reg src);
1854
1855void brw_set_src1(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
1856
1857void brw_set_desc_ex(struct brw_codegen *p, brw_inst *insn,
1858                     unsigned desc, unsigned ex_desc);
1859
1860static inline void
1861brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc)
1862{
1863   brw_set_desc_ex(p, insn, desc, 0);
1864}
1865
1866void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
1867
1868enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
1869enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
1870
1871/* brw_eu_compact.c */
1872void brw_compact_instructions(struct brw_codegen *p, int start_offset,
1873                              struct disasm_info *disasm);
1874void brw_uncompact_instruction(const struct brw_isa_info *isa,
1875                               brw_inst *dst, brw_compact_inst *src);
1876bool brw_try_compact_instruction(const struct brw_isa_info *isa,
1877                                 brw_compact_inst *dst, const brw_inst *src);
1878
1879void brw_debug_compact_uncompact(const struct brw_isa_info *isa,
1880                                 brw_inst *orig, brw_inst *uncompacted);
1881
1882/* brw_eu_validate.c */
1883bool brw_validate_instruction(const struct brw_isa_info *isa,
1884                              const brw_inst *inst, int offset,
1885                              unsigned inst_size,
1886                              struct disasm_info *disasm);
1887bool brw_validate_instructions(const struct brw_isa_info *isa,
1888                               const void *assembly, int start_offset, int end_offset,
1889                               struct disasm_info *disasm);
1890
1891static inline int
1892next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1893{
1894   brw_inst *insn = (brw_inst *)((char *)store + offset);
1895
1896   if (brw_inst_cmpt_control(devinfo, insn))
1897      return offset + 8;
1898   else
1899      return offset + 16;
1900}
1901
1902/** Maximum SEND message length */
1903#define BRW_MAX_MSG_LENGTH 15
1904
1905/** First MRF register used by pull loads */
1906#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1907
1908/** First MRF register used by spills */
1909#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
1910
1911#ifdef __cplusplus
1912}
1913#endif
1914
1915#endif
1916