xref: /third_party/mesa3d/src/intel/compiler/brw_eu.c (revision bf215546)
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32#include <sys/stat.h>
33#include <fcntl.h>
34
35#include "brw_eu_defines.h"
36#include "brw_eu.h"
37#include "brw_shader.h"
38#include "brw_gfx_ver_enum.h"
39#include "dev/intel_debug.h"
40
41#include "util/ralloc.h"
42
43/* Returns a conditional modifier that negates the condition. */
44enum brw_conditional_mod
45brw_negate_cmod(enum brw_conditional_mod cmod)
46{
47   switch (cmod) {
48   case BRW_CONDITIONAL_Z:
49      return BRW_CONDITIONAL_NZ;
50   case BRW_CONDITIONAL_NZ:
51      return BRW_CONDITIONAL_Z;
52   case BRW_CONDITIONAL_G:
53      return BRW_CONDITIONAL_LE;
54   case BRW_CONDITIONAL_GE:
55      return BRW_CONDITIONAL_L;
56   case BRW_CONDITIONAL_L:
57      return BRW_CONDITIONAL_GE;
58   case BRW_CONDITIONAL_LE:
59      return BRW_CONDITIONAL_G;
60   default:
61      unreachable("Can't negate this cmod");
62   }
63}
64
65/* Returns the corresponding conditional mod for swapping src0 and
66 * src1 in e.g. CMP.
67 */
68enum brw_conditional_mod
69brw_swap_cmod(enum brw_conditional_mod cmod)
70{
71   switch (cmod) {
72   case BRW_CONDITIONAL_Z:
73   case BRW_CONDITIONAL_NZ:
74      return cmod;
75   case BRW_CONDITIONAL_G:
76      return BRW_CONDITIONAL_L;
77   case BRW_CONDITIONAL_GE:
78      return BRW_CONDITIONAL_LE;
79   case BRW_CONDITIONAL_L:
80      return BRW_CONDITIONAL_G;
81   case BRW_CONDITIONAL_LE:
82      return BRW_CONDITIONAL_GE;
83   default:
84      return BRW_CONDITIONAL_NONE;
85   }
86}
87
88/**
89 * Get the least significant bit offset of the i+1-th component of immediate
90 * type \p type.  For \p i equal to the two's complement of j, return the
91 * offset of the j-th component starting from the end of the vector.  For
92 * scalar register types return zero.
93 */
94static unsigned
95imm_shift(enum brw_reg_type type, unsigned i)
96{
97   assert(type != BRW_REGISTER_TYPE_UV && type != BRW_REGISTER_TYPE_V &&
98          "Not implemented.");
99
100   if (type == BRW_REGISTER_TYPE_VF)
101      return 8 * (i & 3);
102   else
103      return 0;
104}
105
106/**
107 * Swizzle an arbitrary immediate \p x of the given type according to the
108 * permutation specified as \p swz.
109 */
110uint32_t
111brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
112{
113   if (imm_shift(type, 1)) {
114      const unsigned n = 32 / imm_shift(type, 1);
115      uint32_t y = 0;
116
117      for (unsigned i = 0; i < n; i++) {
118         /* Shift the specified component all the way to the right and left to
119          * discard any undesired L/MSBs, then shift it right into component i.
120          */
121         y |= x >> imm_shift(type, (i & ~3) + BRW_GET_SWZ(swz, i & 3))
122                << imm_shift(type, ~0u)
123                >> imm_shift(type, ~0u - i);
124      }
125
126      return y;
127   } else {
128      return x;
129   }
130}
131
132unsigned
133brw_get_default_exec_size(struct brw_codegen *p)
134{
135   return p->current->exec_size;
136}
137
138unsigned
139brw_get_default_group(struct brw_codegen *p)
140{
141   return p->current->group;
142}
143
144unsigned
145brw_get_default_access_mode(struct brw_codegen *p)
146{
147   return p->current->access_mode;
148}
149
150struct tgl_swsb
151brw_get_default_swsb(struct brw_codegen *p)
152{
153   return p->current->swsb;
154}
155
156void
157brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
158{
159   p->current->exec_size = value;
160}
161
162void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc)
163{
164   p->current->predicate = pc;
165}
166
167void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
168{
169   p->current->pred_inv = predicate_inverse;
170}
171
172void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
173{
174   assert(subreg < 2);
175   p->current->flag_subreg = reg * 2 + subreg;
176}
177
178void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
179{
180   p->current->access_mode = access_mode;
181}
182
183void
184brw_set_default_compression_control(struct brw_codegen *p,
185			    enum brw_compression compression_control)
186{
187   switch (compression_control) {
188   case BRW_COMPRESSION_NONE:
189      /* This is the "use the first set of bits of dmask/vmask/arf
190       * according to execsize" option.
191       */
192      p->current->group = 0;
193      break;
194   case BRW_COMPRESSION_2NDHALF:
195      /* For SIMD8, this is "use the second set of 8 bits." */
196      p->current->group = 8;
197      break;
198   case BRW_COMPRESSION_COMPRESSED:
199      /* For SIMD16 instruction compression, use the first set of 16 bits
200       * since we don't do SIMD32 dispatch.
201       */
202      p->current->group = 0;
203      break;
204   default:
205      unreachable("not reached");
206   }
207
208   if (p->devinfo->ver <= 6) {
209      p->current->compressed =
210         (compression_control == BRW_COMPRESSION_COMPRESSED);
211   }
212}
213
214/**
215 * Enable or disable instruction compression on the given instruction leaving
216 * the currently selected channel enable group untouched.
217 */
218void
219brw_inst_set_compression(const struct intel_device_info *devinfo,
220                         brw_inst *inst, bool on)
221{
222   if (devinfo->ver >= 6) {
223      /* No-op, the EU will figure out for us whether the instruction needs to
224       * be compressed.
225       */
226   } else {
227      /* The channel group and compression controls are non-orthogonal, there
228       * are two possible representations for uncompressed instructions and we
229       * may need to preserve the current one to avoid changing the selected
230       * channel group inadvertently.
231       */
232      if (on)
233         brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_COMPRESSED);
234      else if (brw_inst_qtr_control(devinfo, inst)
235               == BRW_COMPRESSION_COMPRESSED)
236         brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
237   }
238}
239
240void
241brw_set_default_compression(struct brw_codegen *p, bool on)
242{
243   p->current->compressed = on;
244}
245
246/**
247 * Apply the range of channel enable signals given by
248 * [group, group + exec_size) to the instruction passed as argument.
249 */
250void
251brw_inst_set_group(const struct intel_device_info *devinfo,
252                   brw_inst *inst, unsigned group)
253{
254   if (devinfo->ver >= 7) {
255      assert(group % 4 == 0 && group < 32);
256      brw_inst_set_qtr_control(devinfo, inst, group / 8);
257      brw_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
258
259   } else if (devinfo->ver == 6) {
260      assert(group % 8 == 0 && group < 32);
261      brw_inst_set_qtr_control(devinfo, inst, group / 8);
262
263   } else {
264      assert(group % 8 == 0 && group < 16);
265      /* The channel group and compression controls are non-orthogonal, there
266       * are two possible representations for group zero and we may need to
267       * preserve the current one to avoid changing the selected compression
268       * enable inadvertently.
269       */
270      if (group == 8)
271         brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_2NDHALF);
272      else if (brw_inst_qtr_control(devinfo, inst) == BRW_COMPRESSION_2NDHALF)
273         brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
274   }
275}
276
277void
278brw_set_default_group(struct brw_codegen *p, unsigned group)
279{
280   p->current->group = group;
281}
282
283void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
284{
285   p->current->mask_control = value;
286}
287
288void brw_set_default_saturate( struct brw_codegen *p, bool enable )
289{
290   p->current->saturate = enable;
291}
292
293void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
294{
295   p->current->acc_wr_control = value;
296}
297
298void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value)
299{
300   p->current->swsb = value;
301}
302
303void brw_push_insn_state( struct brw_codegen *p )
304{
305   assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
306   *(p->current + 1) = *p->current;
307   p->current++;
308}
309
310void brw_pop_insn_state( struct brw_codegen *p )
311{
312   assert(p->current != p->stack);
313   p->current--;
314}
315
316
317/***********************************************************************
318 */
319void
320brw_init_codegen(const struct brw_isa_info *isa,
321                 struct brw_codegen *p, void *mem_ctx)
322{
323   memset(p, 0, sizeof(*p));
324
325   p->isa = isa;
326   p->devinfo = isa->devinfo;
327   p->automatic_exec_sizes = true;
328   /*
329    * Set the initial instruction store array size to 1024, if found that
330    * isn't enough, then it will double the store size at brw_next_insn()
331    * until out of memory.
332    */
333   p->store_size = 1024;
334   p->store = rzalloc_array(mem_ctx, brw_inst, p->store_size);
335   p->nr_insn = 0;
336   p->current = p->stack;
337   memset(p->current, 0, sizeof(p->current[0]));
338
339   p->mem_ctx = mem_ctx;
340
341   /* Some defaults?
342    */
343   brw_set_default_exec_size(p, BRW_EXECUTE_8);
344   brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
345   brw_set_default_saturate(p, 0);
346   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
347
348   /* Set up control flow stack */
349   p->if_stack_depth = 0;
350   p->if_stack_array_size = 16;
351   p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
352
353   p->loop_stack_depth = 0;
354   p->loop_stack_array_size = 16;
355   p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
356   p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
357}
358
359
360const unsigned *brw_get_program( struct brw_codegen *p,
361			       unsigned *sz )
362{
363   *sz = p->next_insn_offset;
364   return (const unsigned *)p->store;
365}
366
367const struct brw_shader_reloc *
368brw_get_shader_relocs(struct brw_codegen *p, unsigned *num_relocs)
369{
370   *num_relocs = p->num_relocs;
371   return p->relocs;
372}
373
374bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
375                               const char *identifier)
376{
377   const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
378   if (!read_path) {
379      return false;
380   }
381
382   char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
383
384   int fd = open(name, O_RDONLY);
385   ralloc_free(name);
386
387   if (fd == -1) {
388      return false;
389   }
390
391   struct stat sb;
392   if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
393      close(fd);
394      return false;
395   }
396
397   p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_inst);
398   p->nr_insn += sb.st_size / sizeof(brw_inst);
399
400   p->next_insn_offset = start_offset + sb.st_size;
401   p->store_size = (start_offset + sb.st_size) / sizeof(brw_inst);
402   p->store = (brw_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
403   assert(p->store);
404
405   ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
406   close(fd);
407   if (ret != sb.st_size) {
408      return false;
409   }
410
411   ASSERTED bool valid =
412      brw_validate_instructions(p->isa, p->store,
413                                start_offset, p->next_insn_offset,
414                                NULL);
415   assert(valid);
416
417   return true;
418}
419
420const struct brw_label *
421brw_find_label(const struct brw_label *root, int offset)
422{
423   const struct brw_label *curr = root;
424
425   if (curr != NULL)
426   {
427      do {
428         if (curr->offset == offset)
429            return curr;
430
431         curr = curr->next;
432      } while (curr != NULL);
433   }
434
435   return curr;
436}
437
438void
439brw_create_label(struct brw_label **labels, int offset, void *mem_ctx)
440{
441   if (*labels != NULL) {
442      struct brw_label *curr = *labels;
443      struct brw_label *prev;
444
445      do {
446         prev = curr;
447
448         if (curr->offset == offset)
449            return;
450
451         curr = curr->next;
452      } while (curr != NULL);
453
454      curr = ralloc(mem_ctx, struct brw_label);
455      curr->offset = offset;
456      curr->number = prev->number + 1;
457      curr->next = NULL;
458      prev->next = curr;
459   } else {
460      struct brw_label *root = ralloc(mem_ctx, struct brw_label);
461      root->number = 0;
462      root->offset = offset;
463      root->next = NULL;
464      *labels = root;
465   }
466}
467
468const struct brw_label *
469brw_label_assembly(const struct brw_isa_info *isa,
470                   const void *assembly, int start, int end, void *mem_ctx)
471{
472   const struct intel_device_info *const devinfo = isa->devinfo;
473
474   struct brw_label *root_label = NULL;
475
476   int to_bytes_scale = sizeof(brw_inst) / brw_jump_scale(devinfo);
477
478   for (int offset = start; offset < end;) {
479      const brw_inst *inst = (const brw_inst *) ((const char *) assembly + offset);
480      brw_inst uncompacted;
481
482      bool is_compact = brw_inst_cmpt_control(devinfo, inst);
483
484      if (is_compact) {
485         brw_compact_inst *compacted = (brw_compact_inst *)inst;
486         brw_uncompact_instruction(isa, &uncompacted, compacted);
487         inst = &uncompacted;
488      }
489
490      if (brw_has_uip(devinfo, brw_inst_opcode(isa, inst))) {
491         /* Instructions that have UIP also have JIP. */
492         brw_create_label(&root_label,
493            offset + brw_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
494         brw_create_label(&root_label,
495            offset + brw_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
496      } else if (brw_has_jip(devinfo, brw_inst_opcode(isa, inst))) {
497         int jip;
498         if (devinfo->ver >= 7) {
499            jip = brw_inst_jip(devinfo, inst);
500         } else {
501            jip = brw_inst_gfx6_jump_count(devinfo, inst);
502         }
503
504         brw_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
505      }
506
507      if (is_compact) {
508         offset += sizeof(brw_compact_inst);
509      } else {
510         offset += sizeof(brw_inst);
511      }
512   }
513
514   return root_label;
515}
516
517void
518brw_disassemble_with_labels(const struct brw_isa_info *isa,
519                            const void *assembly, int start, int end, FILE *out)
520{
521   void *mem_ctx = ralloc_context(NULL);
522   const struct brw_label *root_label =
523      brw_label_assembly(isa, assembly, start, end, mem_ctx);
524
525   brw_disassemble(isa, assembly, start, end, root_label, out);
526
527   ralloc_free(mem_ctx);
528}
529
530void
531brw_disassemble(const struct brw_isa_info *isa,
532                const void *assembly, int start, int end,
533                const struct brw_label *root_label, FILE *out)
534{
535   const struct intel_device_info *devinfo = isa->devinfo;
536
537   bool dump_hex = INTEL_DEBUG(DEBUG_HEX);
538
539   for (int offset = start; offset < end;) {
540      const brw_inst *insn = (const brw_inst *)((char *)assembly + offset);
541      brw_inst uncompacted;
542
543      if (root_label != NULL) {
544        const struct brw_label *label = brw_find_label(root_label, offset);
545        if (label != NULL) {
546           fprintf(out, "\nLABEL%d:\n", label->number);
547        }
548      }
549
550      bool compacted = brw_inst_cmpt_control(devinfo, insn);
551      if (0)
552         fprintf(out, "0x%08x: ", offset);
553
554      if (compacted) {
555         brw_compact_inst *compacted = (brw_compact_inst *)insn;
556         if (dump_hex) {
557            unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
558            const unsigned int blank_spaces = 24;
559            for (int i = 0 ; i < 8; i = i + 4) {
560               fprintf(out, "%02x %02x %02x %02x ",
561                       insn_ptr[i],
562                       insn_ptr[i + 1],
563                       insn_ptr[i + 2],
564                       insn_ptr[i + 3]);
565            }
566            /* Make compacted instructions hex value output vertically aligned
567             * with uncompacted instructions hex value
568             */
569            fprintf(out, "%*c", blank_spaces, ' ');
570         }
571
572         brw_uncompact_instruction(isa, &uncompacted, compacted);
573         insn = &uncompacted;
574      } else {
575         if (dump_hex) {
576            unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
577            for (int i = 0 ; i < 16; i = i + 4) {
578               fprintf(out, "%02x %02x %02x %02x ",
579                       insn_ptr[i],
580                       insn_ptr[i + 1],
581                       insn_ptr[i + 2],
582                       insn_ptr[i + 3]);
583            }
584         }
585      }
586
587      brw_disassemble_inst(out, isa, insn, compacted, offset, root_label);
588
589      if (compacted) {
590         offset += sizeof(brw_compact_inst);
591      } else {
592         offset += sizeof(brw_inst);
593      }
594   }
595}
596
597static const struct opcode_desc opcode_descs[] = {
598   /* IR,                 HW,  name,      nsrc, ndst, gfx_vers */
599   { BRW_OPCODE_ILLEGAL,  0,   "illegal", 0,    0,    GFX_ALL },
600   { BRW_OPCODE_SYNC,     1,   "sync",    1,    0,    GFX_GE(GFX12) },
601   { BRW_OPCODE_MOV,      1,   "mov",     1,    1,    GFX_LT(GFX12) },
602   { BRW_OPCODE_MOV,      97,  "mov",     1,    1,    GFX_GE(GFX12) },
603   { BRW_OPCODE_SEL,      2,   "sel",     2,    1,    GFX_LT(GFX12) },
604   { BRW_OPCODE_SEL,      98,  "sel",     2,    1,    GFX_GE(GFX12) },
605   { BRW_OPCODE_MOVI,     3,   "movi",    2,    1,    GFX_GE(GFX45) & GFX_LT(GFX12) },
606   { BRW_OPCODE_MOVI,     99,  "movi",    2,    1,    GFX_GE(GFX12) },
607   { BRW_OPCODE_NOT,      4,   "not",     1,    1,    GFX_LT(GFX12) },
608   { BRW_OPCODE_NOT,      100, "not",     1,    1,    GFX_GE(GFX12) },
609   { BRW_OPCODE_AND,      5,   "and",     2,    1,    GFX_LT(GFX12) },
610   { BRW_OPCODE_AND,      101, "and",     2,    1,    GFX_GE(GFX12) },
611   { BRW_OPCODE_OR,       6,   "or",      2,    1,    GFX_LT(GFX12) },
612   { BRW_OPCODE_OR,       102, "or",      2,    1,    GFX_GE(GFX12) },
613   { BRW_OPCODE_XOR,      7,   "xor",     2,    1,    GFX_LT(GFX12) },
614   { BRW_OPCODE_XOR,      103, "xor",     2,    1,    GFX_GE(GFX12) },
615   { BRW_OPCODE_SHR,      8,   "shr",     2,    1,    GFX_LT(GFX12) },
616   { BRW_OPCODE_SHR,      104, "shr",     2,    1,    GFX_GE(GFX12) },
617   { BRW_OPCODE_SHL,      9,   "shl",     2,    1,    GFX_LT(GFX12) },
618   { BRW_OPCODE_SHL,      105, "shl",     2,    1,    GFX_GE(GFX12) },
619   { BRW_OPCODE_DIM,      10,  "dim",     1,    1,    GFX75 },
620   { BRW_OPCODE_SMOV,     10,  "smov",    0,    0,    GFX_GE(GFX8) & GFX_LT(GFX12) },
621   { BRW_OPCODE_SMOV,     106, "smov",    0,    0,    GFX_GE(GFX12) },
622   { BRW_OPCODE_ASR,      12,  "asr",     2,    1,    GFX_LT(GFX12) },
623   { BRW_OPCODE_ASR,      108, "asr",     2,    1,    GFX_GE(GFX12) },
624   { BRW_OPCODE_ROR,      14,  "ror",     2,    1,    GFX11 },
625   { BRW_OPCODE_ROR,      110, "ror",     2,    1,    GFX_GE(GFX12) },
626   { BRW_OPCODE_ROL,      15,  "rol",     2,    1,    GFX11 },
627   { BRW_OPCODE_ROL,      111, "rol",     2,    1,    GFX_GE(GFX12) },
628   { BRW_OPCODE_CMP,      16,  "cmp",     2,    1,    GFX_LT(GFX12) },
629   { BRW_OPCODE_CMP,      112, "cmp",     2,    1,    GFX_GE(GFX12) },
630   { BRW_OPCODE_CMPN,     17,  "cmpn",    2,    1,    GFX_LT(GFX12) },
631   { BRW_OPCODE_CMPN,     113, "cmpn",    2,    1,    GFX_GE(GFX12) },
632   { BRW_OPCODE_CSEL,     18,  "csel",    3,    1,    GFX_GE(GFX8) & GFX_LT(GFX12) },
633   { BRW_OPCODE_CSEL,     114, "csel",    3,    1,    GFX_GE(GFX12) },
634   { BRW_OPCODE_F32TO16,  19,  "f32to16", 1,    1,    GFX7 | GFX75 },
635   { BRW_OPCODE_F16TO32,  20,  "f16to32", 1,    1,    GFX7 | GFX75 },
636   { BRW_OPCODE_BFREV,    23,  "bfrev",   1,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
637   { BRW_OPCODE_BFREV,    119, "bfrev",   1,    1,    GFX_GE(GFX12) },
638   { BRW_OPCODE_BFE,      24,  "bfe",     3,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
639   { BRW_OPCODE_BFE,      120, "bfe",     3,    1,    GFX_GE(GFX12) },
640   { BRW_OPCODE_BFI1,     25,  "bfi1",    2,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
641   { BRW_OPCODE_BFI1,     121, "bfi1",    2,    1,    GFX_GE(GFX12) },
642   { BRW_OPCODE_BFI2,     26,  "bfi2",    3,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
643   { BRW_OPCODE_BFI2,     122, "bfi2",    3,    1,    GFX_GE(GFX12) },
644   { BRW_OPCODE_JMPI,     32,  "jmpi",    0,    0,    GFX_ALL },
645   { BRW_OPCODE_BRD,      33,  "brd",     0,    0,    GFX_GE(GFX7) },
646   { BRW_OPCODE_IF,       34,  "if",      0,    0,    GFX_ALL },
647   { BRW_OPCODE_IFF,      35,  "iff",     0,    0,    GFX_LE(GFX5) },
648   { BRW_OPCODE_BRC,      35,  "brc",     0,    0,    GFX_GE(GFX7) },
649   { BRW_OPCODE_ELSE,     36,  "else",    0,    0,    GFX_ALL },
650   { BRW_OPCODE_ENDIF,    37,  "endif",   0,    0,    GFX_ALL },
651   { BRW_OPCODE_DO,       38,  "do",      0,    0,    GFX_LE(GFX5) },
652   { BRW_OPCODE_CASE,     38,  "case",    0,    0,    GFX6 },
653   { BRW_OPCODE_WHILE,    39,  "while",   0,    0,    GFX_ALL },
654   { BRW_OPCODE_BREAK,    40,  "break",   0,    0,    GFX_ALL },
655   { BRW_OPCODE_CONTINUE, 41,  "cont",    0,    0,    GFX_ALL },
656   { BRW_OPCODE_HALT,     42,  "halt",    0,    0,    GFX_ALL },
657   { BRW_OPCODE_CALLA,    43,  "calla",   0,    0,    GFX_GE(GFX75) },
658   { BRW_OPCODE_MSAVE,    44,  "msave",   0,    0,    GFX_LE(GFX5) },
659   { BRW_OPCODE_CALL,     44,  "call",    0,    0,    GFX_GE(GFX6) },
660   { BRW_OPCODE_MREST,    45,  "mrest",   0,    0,    GFX_LE(GFX5) },
661   { BRW_OPCODE_RET,      45,  "ret",     0,    0,    GFX_GE(GFX6) },
662   { BRW_OPCODE_PUSH,     46,  "push",    0,    0,    GFX_LE(GFX5) },
663   { BRW_OPCODE_FORK,     46,  "fork",    0,    0,    GFX6 },
664   { BRW_OPCODE_GOTO,     46,  "goto",    0,    0,    GFX_GE(GFX8) },
665   { BRW_OPCODE_POP,      47,  "pop",     2,    0,    GFX_LE(GFX5) },
666   { BRW_OPCODE_WAIT,     48,  "wait",    0,    1,    GFX_LT(GFX12) },
667   { BRW_OPCODE_SEND,     49,  "send",    1,    1,    GFX_LT(GFX12) },
668   { BRW_OPCODE_SENDC,    50,  "sendc",   1,    1,    GFX_LT(GFX12) },
669   { BRW_OPCODE_SEND,     49,  "send",    2,    1,    GFX_GE(GFX12) },
670   { BRW_OPCODE_SENDC,    50,  "sendc",   2,    1,    GFX_GE(GFX12) },
671   { BRW_OPCODE_SENDS,    51,  "sends",   2,    1,    GFX_GE(GFX9) & GFX_LT(GFX12) },
672   { BRW_OPCODE_SENDSC,   52,  "sendsc",  2,    1,    GFX_GE(GFX9) & GFX_LT(GFX12) },
673   { BRW_OPCODE_MATH,     56,  "math",    2,    1,    GFX_GE(GFX6) },
674   { BRW_OPCODE_ADD,      64,  "add",     2,    1,    GFX_ALL },
675   { BRW_OPCODE_MUL,      65,  "mul",     2,    1,    GFX_ALL },
676   { BRW_OPCODE_AVG,      66,  "avg",     2,    1,    GFX_ALL },
677   { BRW_OPCODE_FRC,      67,  "frc",     1,    1,    GFX_ALL },
678   { BRW_OPCODE_RNDU,     68,  "rndu",    1,    1,    GFX_ALL },
679   { BRW_OPCODE_RNDD,     69,  "rndd",    1,    1,    GFX_ALL },
680   { BRW_OPCODE_RNDE,     70,  "rnde",    1,    1,    GFX_ALL },
681   { BRW_OPCODE_RNDZ,     71,  "rndz",    1,    1,    GFX_ALL },
682   { BRW_OPCODE_MAC,      72,  "mac",     2,    1,    GFX_ALL },
683   { BRW_OPCODE_MACH,     73,  "mach",    2,    1,    GFX_ALL },
684   { BRW_OPCODE_LZD,      74,  "lzd",     1,    1,    GFX_ALL },
685   { BRW_OPCODE_FBH,      75,  "fbh",     1,    1,    GFX_GE(GFX7) },
686   { BRW_OPCODE_FBL,      76,  "fbl",     1,    1,    GFX_GE(GFX7) },
687   { BRW_OPCODE_CBIT,     77,  "cbit",    1,    1,    GFX_GE(GFX7) },
688   { BRW_OPCODE_ADDC,     78,  "addc",    2,    1,    GFX_GE(GFX7) },
689   { BRW_OPCODE_SUBB,     79,  "subb",    2,    1,    GFX_GE(GFX7) },
690   { BRW_OPCODE_SAD2,     80,  "sad2",    2,    1,    GFX_ALL },
691   { BRW_OPCODE_SADA2,    81,  "sada2",   2,    1,    GFX_ALL },
692   { BRW_OPCODE_ADD3,     82,  "add3",    3,    1,    GFX_GE(GFX125) },
693   { BRW_OPCODE_DP4,      84,  "dp4",     2,    1,    GFX_LT(GFX11) },
694   { BRW_OPCODE_DPH,      85,  "dph",     2,    1,    GFX_LT(GFX11) },
695   { BRW_OPCODE_DP3,      86,  "dp3",     2,    1,    GFX_LT(GFX11) },
696   { BRW_OPCODE_DP2,      87,  "dp2",     2,    1,    GFX_LT(GFX11) },
697   { BRW_OPCODE_DP4A,     88,  "dp4a",    3,    1,    GFX_GE(GFX12) },
698   { BRW_OPCODE_LINE,     89,  "line",    2,    1,    GFX_LE(GFX10) },
699   { BRW_OPCODE_PLN,      90,  "pln",     2,    1,    GFX_GE(GFX45) & GFX_LE(GFX10) },
700   { BRW_OPCODE_MAD,      91,  "mad",     3,    1,    GFX_GE(GFX6) },
701   { BRW_OPCODE_LRP,      92,  "lrp",     3,    1,    GFX_GE(GFX6) & GFX_LE(GFX10) },
702   { BRW_OPCODE_MADM,     93,  "madm",    3,    1,    GFX_GE(GFX8) },
703   { BRW_OPCODE_NENOP,    125, "nenop",   0,    0,    GFX45 },
704   { BRW_OPCODE_NOP,      126, "nop",     0,    0,    GFX_LT(GFX12) },
705   { BRW_OPCODE_NOP,      96,  "nop",     0,    0,    GFX_GE(GFX12) }
706};
707
708void
709brw_init_isa_info(struct brw_isa_info *isa,
710                  const struct intel_device_info *devinfo)
711{
712   isa->devinfo = devinfo;
713
714   enum gfx_ver ver = gfx_ver_from_devinfo(devinfo);
715
716   memset(isa->ir_to_descs, 0, sizeof(isa->ir_to_descs));
717   memset(isa->hw_to_descs, 0, sizeof(isa->hw_to_descs));
718
719   for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
720      if (opcode_descs[i].gfx_vers & ver) {
721         const unsigned e = opcode_descs[i].ir;
722         const unsigned h = opcode_descs[i].hw;
723         assert(e < ARRAY_SIZE(isa->ir_to_descs) && !isa->ir_to_descs[e]);
724         assert(h < ARRAY_SIZE(isa->hw_to_descs) && !isa->hw_to_descs[h]);
725         isa->ir_to_descs[e] = &opcode_descs[i];
726         isa->hw_to_descs[h] = &opcode_descs[i];
727      }
728   }
729}
730
731/**
732 * Return the matching opcode_desc for the specified IR opcode and hardware
733 * generation, or NULL if the opcode is not supported by the device.
734 */
735const struct opcode_desc *
736brw_opcode_desc(const struct brw_isa_info *isa, enum opcode op)
737{
738   return op < ARRAY_SIZE(isa->ir_to_descs) ? isa->ir_to_descs[op] : NULL;
739}
740
741/**
742 * Return the matching opcode_desc for the specified HW opcode and hardware
743 * generation, or NULL if the opcode is not supported by the device.
744 */
745const struct opcode_desc *
746brw_opcode_desc_from_hw(const struct brw_isa_info *isa, unsigned hw)
747{
748   return hw < ARRAY_SIZE(isa->hw_to_descs) ? isa->hw_to_descs[hw] : NULL;
749}
750