1/*
2 * Copyright © 2012-2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that.  The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gfx5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gfx6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gfx7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62 * since the jump count field is not in DW3.
63 *
64 *    break    JIP/UIP
65 *    cont     JIP/UIP
66 *    halt     JIP/UIP
67 *    if       JIP/UIP
68 *    else     JIP (plus UIP on BDW+)
69 *    endif    JIP
70 *    while    JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 *
74 * Gfx12 reduces the number of bits that available to compacted immediates from
75 * 13 to 12, but improves the compaction of floating-point immediates by
76 * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77 * three most significant bits of the mantissa), rather than the lowest bits of
78 * the mantissa.
79 */
80
81#include "brw_eu.h"
82#include "brw_shader.h"
83#include "brw_disasm_info.h"
84#include "dev/intel_debug.h"
85
86static const uint32_t g45_control_index_table[32] = {
87   0b00000000000000000,
88   0b01000000000000000,
89   0b00110000000000000,
90   0b00000000000000010,
91   0b00100000000000000,
92   0b00010000000000000,
93   0b01000000000100000,
94   0b01000000100000000,
95   0b01010000000100000,
96   0b00000000100000010,
97   0b11000000000000000,
98   0b00001000100000010,
99   0b01001000100000000,
100   0b00000000100000000,
101   0b11000000000100000,
102   0b00001000100000000,
103   0b10110000000000000,
104   0b11010000000100000,
105   0b00110000100000000,
106   0b00100000100000000,
107   0b01000000000001000,
108   0b01000000000000100,
109   0b00111100000000000,
110   0b00101011000000000,
111   0b00110000000010000,
112   0b00010000100000000,
113   0b01000000000100100,
114   0b01000000000101000,
115   0b00110000000000110,
116   0b00000000000001010,
117   0b01010000000101000,
118   0b01010000000100100,
119};
120
121static const uint32_t g45_datatype_table[32] = {
122   0b001000000000100001,
123   0b001011010110101101,
124   0b001000001000110001,
125   0b001111011110111101,
126   0b001011010110101100,
127   0b001000000110101101,
128   0b001000000000100000,
129   0b010100010110110001,
130   0b001100011000101101,
131   0b001000000000100010,
132   0b001000001000110110,
133   0b010000001000110001,
134   0b001000001000110010,
135   0b011000001000110010,
136   0b001111011110111100,
137   0b001000000100101000,
138   0b010100011000110001,
139   0b001010010100101001,
140   0b001000001000101001,
141   0b010000001000110110,
142   0b101000001000110001,
143   0b001011011000101101,
144   0b001000000100001001,
145   0b001011011000101100,
146   0b110100011000110001,
147   0b001000001110111101,
148   0b110000001000110001,
149   0b011000000100101010,
150   0b101000001000101001,
151   0b001011010110001100,
152   0b001000000110100001,
153   0b001010010100001000,
154};
155
156static const uint16_t g45_subreg_table[32] = {
157   0b000000000000000,
158   0b000000010000000,
159   0b000001000000000,
160   0b000100000000000,
161   0b000000000100000,
162   0b100000000000000,
163   0b000000000010000,
164   0b001100000000000,
165   0b001010000000000,
166   0b000000100000000,
167   0b001000000000000,
168   0b000000000001000,
169   0b000000001000000,
170   0b000000000000001,
171   0b000010000000000,
172   0b000000010100000,
173   0b000000000000111,
174   0b000001000100000,
175   0b011000000000000,
176   0b000000110000000,
177   0b000000000000010,
178   0b000000000000100,
179   0b000000001100000,
180   0b000100000000010,
181   0b001110011000110,
182   0b001110100001000,
183   0b000110011000110,
184   0b000001000011000,
185   0b000110010000100,
186   0b001100000000110,
187   0b000000010000110,
188   0b000001000110000,
189};
190
191static const uint16_t g45_src_index_table[32] = {
192   0b000000000000,
193   0b010001101000,
194   0b010110001000,
195   0b011010010000,
196   0b001101001000,
197   0b010110001010,
198   0b010101110000,
199   0b011001111000,
200   0b001000101000,
201   0b000000101000,
202   0b010001010000,
203   0b111101101100,
204   0b010110001100,
205   0b010001101100,
206   0b011010010100,
207   0b010001001100,
208   0b001100101000,
209   0b000000000010,
210   0b111101001100,
211   0b011001101000,
212   0b010101001000,
213   0b000000000100,
214   0b000000101100,
215   0b010001101010,
216   0b000000111000,
217   0b010101011000,
218   0b000100100000,
219   0b010110000000,
220   0b010000000100,
221   0b010000111000,
222   0b000101100000,
223   0b111101110100,
224};
225
226static const uint32_t gfx6_control_index_table[32] = {
227   0b00000000000000000,
228   0b01000000000000000,
229   0b00110000000000000,
230   0b00000000100000000,
231   0b00010000000000000,
232   0b00001000100000000,
233   0b00000000100000010,
234   0b00000000000000010,
235   0b01000000100000000,
236   0b01010000000000000,
237   0b10110000000000000,
238   0b00100000000000000,
239   0b11010000000000000,
240   0b11000000000000000,
241   0b01001000100000000,
242   0b01000000000001000,
243   0b01000000000000100,
244   0b00000000000001000,
245   0b00000000000000100,
246   0b00111000100000000,
247   0b00001000100000010,
248   0b00110000100000000,
249   0b00110000000000001,
250   0b00100000000000001,
251   0b00110000000000010,
252   0b00110000000000101,
253   0b00110000000001001,
254   0b00110000000010000,
255   0b00110000000000011,
256   0b00110000000000100,
257   0b00110000100001000,
258   0b00100000000001001,
259};
260
261static const uint32_t gfx6_datatype_table[32] = {
262   0b001001110000000000,
263   0b001000110000100000,
264   0b001001110000000001,
265   0b001000000001100000,
266   0b001010110100101001,
267   0b001000000110101101,
268   0b001100011000101100,
269   0b001011110110101101,
270   0b001000000111101100,
271   0b001000000001100001,
272   0b001000110010100101,
273   0b001000000001000001,
274   0b001000001000110001,
275   0b001000001000101001,
276   0b001000000000100000,
277   0b001000001000110010,
278   0b001010010100101001,
279   0b001011010010100101,
280   0b001000000110100101,
281   0b001100011000101001,
282   0b001011011000101100,
283   0b001011010110100101,
284   0b001011110110100101,
285   0b001111011110111101,
286   0b001111011110111100,
287   0b001111011110111101,
288   0b001111011110011101,
289   0b001111011110111110,
290   0b001000000000100001,
291   0b001000000000100010,
292   0b001001111111011101,
293   0b001000001110111110,
294};
295
296static const uint16_t gfx6_subreg_table[32] = {
297   0b000000000000000,
298   0b000000000000100,
299   0b000000110000000,
300   0b111000000000000,
301   0b011110000001000,
302   0b000010000000000,
303   0b000000000010000,
304   0b000110000001100,
305   0b001000000000000,
306   0b000001000000000,
307   0b000001010010100,
308   0b000000001010110,
309   0b010000000000000,
310   0b110000000000000,
311   0b000100000000000,
312   0b000000010000000,
313   0b000000000001000,
314   0b100000000000000,
315   0b000001010000000,
316   0b001010000000000,
317   0b001100000000000,
318   0b000000001010100,
319   0b101101010010100,
320   0b010100000000000,
321   0b000000010001111,
322   0b011000000000000,
323   0b111110000000000,
324   0b101000000000000,
325   0b000000000001111,
326   0b000100010001111,
327   0b001000010001111,
328   0b000110000000000,
329};
330
331static const uint16_t gfx6_src_index_table[32] = {
332   0b000000000000,
333   0b010110001000,
334   0b010001101000,
335   0b001000101000,
336   0b011010010000,
337   0b000100100000,
338   0b010001101100,
339   0b010101110000,
340   0b011001111000,
341   0b001100101000,
342   0b010110001100,
343   0b001000100000,
344   0b010110001010,
345   0b000000000010,
346   0b010101010000,
347   0b010101101000,
348   0b111101001100,
349   0b111100101100,
350   0b011001110000,
351   0b010110001001,
352   0b010101011000,
353   0b001101001000,
354   0b010000101100,
355   0b010000000000,
356   0b001101110000,
357   0b001100010000,
358   0b001100000000,
359   0b010001101010,
360   0b001101111000,
361   0b000001110000,
362   0b001100100000,
363   0b001101010000,
364};
365
366static const uint32_t gfx7_control_index_table[32] = {
367   0b0000000000000000010,
368   0b0000100000000000000,
369   0b0000100000000000001,
370   0b0000100000000000010,
371   0b0000100000000000011,
372   0b0000100000000000100,
373   0b0000100000000000101,
374   0b0000100000000000111,
375   0b0000100000000001000,
376   0b0000100000000001001,
377   0b0000100000000001101,
378   0b0000110000000000000,
379   0b0000110000000000001,
380   0b0000110000000000010,
381   0b0000110000000000011,
382   0b0000110000000000100,
383   0b0000110000000000101,
384   0b0000110000000000111,
385   0b0000110000000001001,
386   0b0000110000000001101,
387   0b0000110000000010000,
388   0b0000110000100000000,
389   0b0001000000000000000,
390   0b0001000000000000010,
391   0b0001000000000000100,
392   0b0001000000100000000,
393   0b0010110000000000000,
394   0b0010110000000010000,
395   0b0011000000000000000,
396   0b0011000000100000000,
397   0b0101000000000000000,
398   0b0101000000100000000,
399};
400
401static const uint32_t gfx7_datatype_table[32] = {
402   0b001000000000000001,
403   0b001000000000100000,
404   0b001000000000100001,
405   0b001000000001100001,
406   0b001000000010111101,
407   0b001000001011111101,
408   0b001000001110100001,
409   0b001000001110100101,
410   0b001000001110111101,
411   0b001000010000100001,
412   0b001000110000100000,
413   0b001000110000100001,
414   0b001001010010100101,
415   0b001001110010100100,
416   0b001001110010100101,
417   0b001111001110111101,
418   0b001111011110011101,
419   0b001111011110111100,
420   0b001111011110111101,
421   0b001111111110111100,
422   0b000000001000001100,
423   0b001000000000111101,
424   0b001000000010100101,
425   0b001000010000100000,
426   0b001001010010100100,
427   0b001001110010000100,
428   0b001010010100001001,
429   0b001101111110111101,
430   0b001111111110111101,
431   0b001011110110101100,
432   0b001010010100101000,
433   0b001010110100101000,
434};
435
436static const uint16_t gfx7_subreg_table[32] = {
437   0b000000000000000,
438   0b000000000000001,
439   0b000000000001000,
440   0b000000000001111,
441   0b000000000010000,
442   0b000000010000000,
443   0b000000100000000,
444   0b000000110000000,
445   0b000001000000000,
446   0b000001000010000,
447   0b000010100000000,
448   0b001000000000000,
449   0b001000000000001,
450   0b001000010000001,
451   0b001000010000010,
452   0b001000010000011,
453   0b001000010000100,
454   0b001000010000111,
455   0b001000010001000,
456   0b001000010001110,
457   0b001000010001111,
458   0b001000110000000,
459   0b001000111101000,
460   0b010000000000000,
461   0b010000110000000,
462   0b011000000000000,
463   0b011110010000111,
464   0b100000000000000,
465   0b101000000000000,
466   0b110000000000000,
467   0b111000000000000,
468   0b111000000011100,
469};
470
471static const uint16_t gfx7_src_index_table[32] = {
472   0b000000000000,
473   0b000000000010,
474   0b000000010000,
475   0b000000010010,
476   0b000000011000,
477   0b000000100000,
478   0b000000101000,
479   0b000001001000,
480   0b000001010000,
481   0b000001110000,
482   0b000001111000,
483   0b001100000000,
484   0b001100000010,
485   0b001100001000,
486   0b001100010000,
487   0b001100010010,
488   0b001100100000,
489   0b001100101000,
490   0b001100111000,
491   0b001101000000,
492   0b001101000010,
493   0b001101001000,
494   0b001101010000,
495   0b001101100000,
496   0b001101101000,
497   0b001101110000,
498   0b001101110001,
499   0b001101111000,
500   0b010001101000,
501   0b010001101001,
502   0b010001101010,
503   0b010110001000,
504};
505
506static const uint32_t gfx8_control_index_table[32] = {
507   0b0000000000000000010,
508   0b0000100000000000000,
509   0b0000100000000000001,
510   0b0000100000000000010,
511   0b0000100000000000011,
512   0b0000100000000000100,
513   0b0000100000000000101,
514   0b0000100000000000111,
515   0b0000100000000001000,
516   0b0000100000000001001,
517   0b0000100000000001101,
518   0b0000110000000000000,
519   0b0000110000000000001,
520   0b0000110000000000010,
521   0b0000110000000000011,
522   0b0000110000000000100,
523   0b0000110000000000101,
524   0b0000110000000000111,
525   0b0000110000000001001,
526   0b0000110000000001101,
527   0b0000110000000010000,
528   0b0000110000100000000,
529   0b0001000000000000000,
530   0b0001000000000000010,
531   0b0001000000000000100,
532   0b0001000000100000000,
533   0b0010110000000000000,
534   0b0010110000000010000,
535   0b0011000000000000000,
536   0b0011000000100000000,
537   0b0101000000000000000,
538   0b0101000000100000000,
539};
540
541static const uint32_t gfx8_datatype_table[32] = {
542   0b001000000000000000001,
543   0b001000000000001000000,
544   0b001000000000001000001,
545   0b001000000000011000001,
546   0b001000000000101011101,
547   0b001000000010111011101,
548   0b001000000011101000001,
549   0b001000000011101000101,
550   0b001000000011101011101,
551   0b001000001000001000001,
552   0b001000011000001000000,
553   0b001000011000001000001,
554   0b001000101000101000101,
555   0b001000111000101000100,
556   0b001000111000101000101,
557   0b001011100011101011101,
558   0b001011101011100011101,
559   0b001011101011101011100,
560   0b001011101011101011101,
561   0b001011111011101011100,
562   0b000000000010000001100,
563   0b001000000000001011101,
564   0b001000000000101000101,
565   0b001000001000001000000,
566   0b001000101000101000100,
567   0b001000111000100000100,
568   0b001001001001000001001,
569   0b001010111011101011101,
570   0b001011111011101011101,
571   0b001001111001101001100,
572   0b001001001001001001000,
573   0b001001011001001001000,
574};
575
576static const uint16_t gfx8_subreg_table[32] = {
577   0b000000000000000,
578   0b000000000000001,
579   0b000000000001000,
580   0b000000000001111,
581   0b000000000010000,
582   0b000000010000000,
583   0b000000100000000,
584   0b000000110000000,
585   0b000001000000000,
586   0b000001000010000,
587   0b000001010000000,
588   0b001000000000000,
589   0b001000000000001,
590   0b001000010000001,
591   0b001000010000010,
592   0b001000010000011,
593   0b001000010000100,
594   0b001000010000111,
595   0b001000010001000,
596   0b001000010001110,
597   0b001000010001111,
598   0b001000110000000,
599   0b001000111101000,
600   0b010000000000000,
601   0b010000110000000,
602   0b011000000000000,
603   0b011110010000111,
604   0b100000000000000,
605   0b101000000000000,
606   0b110000000000000,
607   0b111000000000000,
608   0b111000000011100,
609};
610
611static const uint16_t gfx8_src_index_table[32] = {
612   0b000000000000,
613   0b000000000010,
614   0b000000010000,
615   0b000000010010,
616   0b000000011000,
617   0b000000100000,
618   0b000000101000,
619   0b000001001000,
620   0b000001010000,
621   0b000001110000,
622   0b000001111000,
623   0b001100000000,
624   0b001100000010,
625   0b001100001000,
626   0b001100010000,
627   0b001100010010,
628   0b001100100000,
629   0b001100101000,
630   0b001100111000,
631   0b001101000000,
632   0b001101000010,
633   0b001101001000,
634   0b001101010000,
635   0b001101100000,
636   0b001101101000,
637   0b001101110000,
638   0b001101110001,
639   0b001101111000,
640   0b010001101000,
641   0b010001101001,
642   0b010001101010,
643   0b010110001000,
644};
645
646static const uint32_t gfx11_datatype_table[32] = {
647   0b001000000000000000001,
648   0b001000000000001000000,
649   0b001000000000001000001,
650   0b001000000000011000001,
651   0b001000000000101100101,
652   0b001000000101111100101,
653   0b001000000100101000001,
654   0b001000000100101000101,
655   0b001000000100101100101,
656   0b001000001000001000001,
657   0b001000011000001000000,
658   0b001000011000001000001,
659   0b001000101000101000101,
660   0b001000111000101000100,
661   0b001000111000101000101,
662   0b001100100100101100101,
663   0b001100101100100100101,
664   0b001100101100101100100,
665   0b001100101100101100101,
666   0b001100111100101100100,
667   0b000000000010000001100,
668   0b001000000000001100101,
669   0b001000000000101000101,
670   0b001000001000001000000,
671   0b001000101000101000100,
672   0b001000111000100000100,
673   0b001001001001000001001,
674   0b001101111100101100101,
675   0b001100111100101100101,
676   0b001001111001101001100,
677   0b001001001001001001000,
678   0b001001011001001001000,
679};
680
681static const uint32_t gfx12_control_index_table[32] = {
682   0b000000000000000000100, /* 	       (16|M0)                            */
683   0b000000000000000000011, /* 	       (8|M0)                             */
684   0b000000010000000000000, /* 	(W)    (1|M0)                             */
685   0b000000010000000000100, /* 	(W)    (16|M0)                            */
686   0b000000010000000000011, /* 	(W)    (8|M0)                             */
687   0b010000000000000000100, /* 	       (16|M0)  (ge)f0.0                  */
688   0b000000000000000100100, /* 	       (16|M16)                           */
689   0b010100000000000000100, /* 	       (16|M0)  (lt)f0.0                  */
690   0b000000000000000000000, /* 	       (1|M0)                             */
691   0b000010000000000000100, /* 	       (16|M0)           (sat)            */
692   0b000000000000000010011, /* 	       (8|M8)                             */
693   0b001100000000000000100, /* 	       (16|M0)  (gt)f0.0                  */
694   0b000100000000000000100, /* 	       (16|M0)  (eq)f0.0                  */
695   0b000100010000000000100, /* 	(W)    (16|M0)  (eq)f0.0                  */
696   0b001000000000000000100, /* 	       (16|M0)  (ne)f0.0                  */
697   0b000000000000100000100, /* 	(f0.0) (16|M0)                            */
698   0b010100000000000000011, /* 	       (8|M0)   (lt)f0.0                  */
699   0b000000000000110000100, /* 	(f1.0) (16|M0)                            */
700   0b000000010000000000001, /* 	(W)    (2|M0)                             */
701   0b000000000000101000100, /* 	(f0.1) (16|M0)                            */
702   0b000000000000111000100, /* 	(f1.1) (16|M0)                            */
703   0b010000010000000000100, /* 	(W)    (16|M0)  (ge)f0.0                  */
704   0b000000000000000100011, /* 	       (8|M16)                            */
705   0b000000000000000110011, /* 	       (8|M24)                            */
706   0b010100010000000000100, /* 	(W)    (16|M0)  (lt)f0.0                  */
707   0b010000000000000000011, /* 	       (8|M0)   (ge)f0.0                  */
708   0b000100010000000000000, /* 	(W)    (1|M0)   (eq)f0.0                  */
709   0b000010000000000000011, /* 	       (8|M0)            (sat)            */
710   0b010100000000010000100, /* 	       (16|M0)  (lt)f1.0                  */
711   0b000100000000000000011, /* 	       (8|M0)   (eq)f0.0                  */
712   0b000001000000000000011, /* 	       (8|M0)                   {AccWrEn} */
713   0b000000010000000100100, /* 	(W)    (16|M16)                           */
714};
715
716static const uint32_t gfx12_datatype_table[32] = {
717   0b11010110100101010100, /* grf<1>:f  grf:f  grf:f  */
718   0b00000110100101010100, /* grf<1>:f  grf:f  arf:ub */
719   0b00000010101101010100, /* grf<1>:f  imm:f  arf:ub */
720   0b01010110110101010100, /* grf<1>:f  grf:f  imm:f  */
721   0b11010100100101010100, /* arf<1>:f  grf:f  grf:f  */
722   0b11010010100101010100, /* grf<1>:f  arf:f  grf:f  */
723   0b01010100110101010100, /* arf<1>:f  grf:f  imm:f  */
724   0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
725   0b11010000100101010100, /* arf<1>:f  arf:f  grf:f  */
726   0b00101110110011001100, /* grf<1>:d  grf:d  imm:w  */
727   0b10110110100011001100, /* grf<1>:d  grf:d  grf:d  */
728   0b01010010110101010100, /* grf<1>:f  arf:f  imm:f  */
729   0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
730   0b01010000110101010100, /* arf<1>:f  arf:f  imm:f  */
731   0b00110110110011001100, /* grf<1>:d  grf:d  imm:d  */
732   0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
733   0b00000111000101010100, /* grf<2>:f  grf:f  arf:ub */
734   0b00101100110011001100, /* arf<1>:d  grf:d  imm:w  */
735   0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
736   0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
737   0b00100110110000101010, /* grf<1>:w  grf:uw imm:uv */
738   0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
739   0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
740   0b00000110100101001100, /* grf<1>:d  grf:f  arf:ub */
741   0b10001100100011001100, /* arf<1>:d  grf:d  grf:uw */
742   0b00000110100001010100, /* grf<1>:f  grf:ud arf:ub */
743   0b00101110110001001100, /* grf<1>:d  grf:ud imm:w  */
744   0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
745   0b00000110100000110100, /* grf<1>:f  grf:uw arf:ub */
746   0b00000110100000010100, /* grf<1>:f  grf:ub arf:ub */
747   0b00000110100011010100, /* grf<1>:f  grf:d  arf:ub */
748   0b00000010100101010100, /* grf<1>:f  arf:f  arf:ub */
749};
750
751static const uint16_t gfx12_subreg_table[32] = {
752   0b000000000000000, /* .0  .0  .0  */
753   0b100000000000000, /* .0  .0  .16 */
754   0b001000000000000, /* .0  .0  .4  */
755   0b011000000000000, /* .0  .0  .12 */
756   0b000000010000000, /* .0  .4  .0  */
757   0b010000000000000, /* .0  .0  .8  */
758   0b101000000000000, /* .0  .0  .20 */
759   0b000000000001000, /* .8  .0  .0  */
760   0b000000100000000, /* .0  .8  .0  */
761   0b110000000000000, /* .0  .0  .24 */
762   0b111000000000000, /* .0  .0  .28 */
763   0b000001000000000, /* .0  .16 .0  */
764   0b000000000000100, /* .4  .0  .0  */
765   0b000001100000000, /* .0  .24 .0  */
766   0b000001010000000, /* .0  .20 .0  */
767   0b000000110000000, /* .0  .12 .0  */
768   0b000001110000000, /* .0  .28 .0  */
769   0b000000000011100, /* .28 .0  .0  */
770   0b000000000010000, /* .16 .0  .0  */
771   0b000000000001100, /* .12 .0  .0  */
772   0b000000000011000, /* .24 .0  .0  */
773   0b000000000010100, /* .20 .0  .0  */
774   0b000000000000010, /* .2  .0  .0  */
775   0b000000101000000, /* .0  .10 .0  */
776   0b000000001000000, /* .0  .2  .0  */
777   0b000000010000100, /* .4  .4  .0  */
778   0b000000001011100, /* .28 .2  .0  */
779   0b000000001000010, /* .2  .2  .0  */
780   0b000000110001100, /* .12 .12 .0  */
781   0b000000000100000, /* .0  .1  .0  */
782   0b000000001100000, /* .0  .3  .0  */
783   0b110001100000000, /* .0  .24 .24 */
784};
785
786static const uint16_t gfx12_src0_index_table[16] = {
787   0b010001100100, /*       r<8;8,1>  */
788   0b000000000000, /*       r<0;1,0>  */
789   0b010001100110, /*      -r<8;8,1>  */
790   0b010001100101, /*  (abs)r<8;8,1>  */
791   0b000000000010, /*      -r<0;1,0>  */
792   0b001000000000, /*       r<2;1,0>  */
793   0b001001000000, /*       r<2;4,0>  */
794   0b001101000000, /*       r<4;4,0>  */
795   0b001000100100, /*       r<2;2,1>  */
796   0b001100000000, /*       r<4;1,0>  */
797   0b001000100110, /*      -r<2;2,1>  */
798   0b001101000100, /*       r<4;4,1>  */
799   0b010001100111, /* -(abs)r<8;8,1>  */
800   0b000100000000, /*       r<1;1,0>  */
801   0b000000000001, /*  (abs)r<0;1,0>  */
802   0b111100010000, /*       r[a]<1,0> */
803};
804
805static const uint16_t gfx12_src1_index_table[16] = {
806   0b000100011001, /*       r<8;8,1> */
807   0b000000000000, /*       r<0;1,0> */
808   0b100100011001, /*      -r<8;8,1> */
809   0b100000000000, /*      -r<0;1,0> */
810   0b010100011001, /*  (abs)r<8;8,1> */
811   0b100011010000, /*      -r<4;4,0> */
812   0b000010000000, /*       r<2;1,0> */
813   0b000010001001, /*       r<2;2,1> */
814   0b100010001001, /*      -r<2;2,1> */
815   0b000011010000, /*       r<4;4,0> */
816   0b000011010001, /*       r<4;4,1> */
817   0b000011000000, /*       r<4;1,0> */
818   0b110100011001, /* -(abs)r<8;8,1> */
819   0b010000000000, /*  (abs)r<0;1,0> */
820   0b110000000000, /* -(abs)r<0;1,0> */
821   0b100011010001, /*      -r<4;4,1> */
822};
823
824static const uint16_t xehp_src0_index_table[16] = {
825   0b000100000000, /*       r<1;1,0>  */
826   0b000000000000, /*       r<0;1,0>  */
827   0b000100000010, /*      -r<1;1,0>  */
828   0b000100000001, /*  (abs)r<1;1,0>  */
829   0b000000000010, /*      -r<0;1,0>  */
830   0b001000000000, /*       r<2;1,0>  */
831   0b001001000000, /*       r<2;4,0>  */
832   0b001101000000, /*       r<4;4,0>  */
833   0b001100000000, /*       r<4;1,0>  */
834   0b000100000011, /* -(abs)r<1;1,0>  */
835   0b000000000001, /*  (abs)r<0;1,0>  */
836   0b111100010000, /*       r[a]<1,0> */
837   0b010001100000, /*       r<8;8,0>  */
838   0b000101000000, /*       r<1;4,0>  */
839   0b010001001000, /*       r<8;4,2>  */
840   0b001000000010, /*      -r<2;1,0>  */
841};
842
843static const uint16_t xehp_src1_index_table[16] = {
844   0b000001000000, /*       r<1;1,0>    */
845   0b000000000000, /*       r<0;1,0>    */
846   0b100001000000, /*      -r<1;1,0>    */
847   0b100000000000, /*      -r<0;1,0>    */
848   0b010001000000, /*  (abs)r<1;1,0>    */
849   0b100011010000, /*      -r<4;4,0>    */
850   0b000010000000, /*       r<2;1,0>    */
851   0b000011010000, /*       r<4;4,0>    */
852   0b000011000000, /*       r<4;1,0>    */
853   0b110001000000, /* -(abs)r<1;1,0>    */
854   0b010000000000, /*  (abs)r<0;1,0>    */
855   0b110000000000, /* -(abs)r<0;1,0>    */
856   0b000100011000, /*       r<8;8,0>    */
857   0b100010000000, /*      -r<2;1,0>    */
858   0b100000001001, /*      -r<0;2,1>    */
859   0b100001000100, /*      -r[a]<1;1,0> */
860};
861
862/* This is actually the control index table for Cherryview (26 bits), but the
863 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
864 * the start.
865 *
866 * The low 24 bits have the same mappings on both hardware.
867 */
868static const uint32_t gfx8_3src_control_index_table[4] = {
869   0b00100000000110000000000001,
870   0b00000000000110000000000001,
871   0b00000000001000000000000001,
872   0b00000000001000000000100001,
873};
874
875/* This is actually the control index table for Cherryview (49 bits), but the
876 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
877 * at the start.
878 *
879 * The low 44 bits have the same mappings on both hardware, and since the high
880 * three bits on Broadwell are zero, we can reuse Cherryview's table.
881 */
882static const uint64_t gfx8_3src_source_index_table[4] = {
883   0b0000001110010011100100111001000001111000000000000,
884   0b0000001110010011100100111001000001111000000000010,
885   0b0000001110010011100100111001000001111000000001000,
886   0b0000001110010011100100111001000001111000000100000,
887};
888
889static const uint64_t gfx12_3src_control_index_table[32] = {
890   0b000001001010010101000000000000000100, /*      (16|M0)       grf<1>:f   :f  :f  :f */
891   0b000001001010010101000000000000000011, /*      (8|M0)        grf<1>:f   :f  :f  :f */
892   0b000001001000010101000000000000000011, /*      (8|M0)        arf<1>:f   :f  :f  :f */
893   0b000001001010010101000010000000000011, /* (W)  (8|M0)        grf<1>:f   :f  :f  :f */
894   0b000001001000010101000010000000000011, /* (W)  (8|M0)        arf<1>:f   :f  :f  :f */
895   0b000001001000010101000000000000010011, /*      (8|M8)        arf<1>:f   :f  :f  :f */
896   0b000001001010010101000000000000010011, /*      (8|M8)        grf<1>:f   :f  :f  :f */
897   0b000001001000010101000010000000010011, /* (W)  (8|M8)        arf<1>:f   :f  :f  :f */
898   0b000001001010010101000010000000010011, /* (W)  (8|M8)        grf<1>:f   :f  :f  :f */
899   0b000001001010010101000010000000000100, /* (W)  (16|M0)       grf<1>:f   :f  :f  :f */
900   0b000001001000010101000000000000000100, /*      (16|M0)       arf<1>:f   :f  :f  :f */
901   0b000001001010010101010000000000000100, /*      (16|M0)  (sat)grf<1>:f   :f  :f  :f */
902   0b000001001010010101000000000000100100, /*      (16|M16)      grf<1>:f   :f  :f  :f */
903   0b000001001000010101000010000000000100, /* (W)  (16|M0)       arf<1>:f   :f  :f  :f */
904   0b000001001010010101000010000000000000, /* (W)  (1|M0)        grf<1>:f   :f  :f  :f */
905   0b000001001010010101010000000000000011, /*      (8|M0)   (sat)grf<1>:f   :f  :f  :f */
906   0b000001001000010101000010000000110011, /* (W)  (8|M24)       arf<1>:f   :f  :f  :f */
907   0b000001001000010101000010000000100011, /* (W)  (8|M16)       arf<1>:f   :f  :f  :f */
908   0b000001001010010101000010000000110011, /* (W)  (8|M24)       grf<1>:f   :f  :f  :f */
909   0b000001001010010101000010000000100011, /* (W)  (8|M16)       grf<1>:f   :f  :f  :f */
910   0b000001001000010101000000000000100011, /*      (8|M16)       arf<1>:f   :f  :f  :f */
911   0b000001001000010101000000000000110011, /*      (8|M24)       arf<1>:f   :f  :f  :f */
912   0b000001001010010101000000000000100011, /*      (8|M16)       grf<1>:f   :f  :f  :f */
913   0b000001001010010101000000000000110011, /*      (8|M24)       grf<1>:f   :f  :f  :f */
914   0b000001001000010101010000000000000100, /*      (16|M0)  (sat)arf<1>:f   :f  :f  :f */
915   0b000001001010010101010010000000000100, /* (W)  (16|M0)  (sat)grf<1>:f   :f  :f  :f */
916   0b000001001010010101000010000000100100, /* (W)  (16|M16)      grf<1>:f   :f  :f  :f */
917   0b000001001010010001000010000000000000, /* (W)  (1|M0)        grf<1>:ud :ud :ud :ud */
918   0b000001001000010101000000000000100100, /*      (16|M16)      arf<1>:f   :f  :f  :f */
919   0b000001001010010101010000000000100100, /*      (16|M16) (sat)grf<1>:f   :f  :f  :f */
920   0b000001001010010101000010000000000010, /* (W)  (4|M0)        grf<1>:f   :f  :f  :f */
921   0b000001001000010101010000000000000011, /*      (8|M0)   (sat)arf<1>:f   :f  :f  :f */
922};
923
924static const uint64_t xehp_3src_control_index_table[32] = {
925   0b0000010010100010101000000000000000100, /*          (16|M0)       grf<1>:f   :f   :f   :f          */
926   0b0000010010100010101000000000000000011, /*          (8|M0)        grf<1>:f   :f   :f   :f          */
927   0b0000010010000010101000000000000000011, /*          (8|M0)        arf<1>:f   :f   :f   :f          */
928   0b0000010010100010101000010000000000011, /*     (W)  (8|M0)        grf<1>:f   :f   :f   :f          */
929   0b0000010010000010101000010000000000011, /*     (W)  (8|M0)        arf<1>:f   :f   :f   :f          */
930   0b0000010010000010101000000000000010011, /*          (8|M8)        arf<1>:f   :f   :f   :f          */
931   0b0000010010100010101000000000000010011, /*          (8|M8)        grf<1>:f   :f   :f   :f          */
932   0b0000010010000010101000010000000010011, /*     (W)  (8|M8)        arf<1>:f   :f   :f   :f          */
933   0b0000010010100010101000010000000010011, /*     (W)  (8|M8)        grf<1>:f   :f   :f   :f          */
934   0b0000010010100010101000010000000000100, /*     (W)  (16|M0)       grf<1>:f   :f   :f   :f          */
935   0b0000010010000010101000000000000000100, /*          (16|M0)       arf<1>:f   :f   :f   :f          */
936   0b0000010010100010101010000000000000100, /*          (16|M0)  (sat)grf<1>:f   :f   :f   :f          */
937   0b0000010010100010101000000000000100100, /*          (16|M16)      grf<1>:f   :f   :f   :f          */
938   0b0000010010000010101000010000000000100, /*     (W)  (16|M0)       arf<1>:f   :f   :f   :f          */
939   0b0000010010100010101000010000000000000, /*     (W)  (1|M0)        grf<1>:f   :f   :f   :f          */
940   0b0000010010100010101010000000000000011, /*          (8|M0)   (sat)grf<1>:f   :f   :f   :f          */
941   0b0000010010000010101000010000000100011, /*     (W)  (8|M16)       arf<1>:f   :f   :f   :f          */
942   0b0000010010000010101000010000000110011, /*     (W)  (8|M24)       arf<1>:f   :f   :f   :f          */
943   0b0000010010100010101000010000000100011, /*     (W)  (8|M16)       grf<1>:f   :f   :f   :f          */
944   0b0000010010100010101000010000000110011, /*     (W)  (8|M24)       grf<1>:f   :f   :f   :f          */
945   0b0000010010000010101000000000000110011, /*          (8|M24)       arf<1>:f   :f   :f   :f          */
946   0b0000010010000010101000000000000100011, /*          (8|M16)       arf<1>:f   :f   :f   :f          */
947   0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b          */
948   0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub {Atomic} */
949   0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b {Atomic} */
950   0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub {Atomic} */
951   0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b          */
952   0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub          */
953   0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b {Atomic} */
954   0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub          */
955   0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf {Atomic} */
956   0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf          */
957};
958
959static const uint32_t gfx12_3src_source_index_table[32] = {
960   0b100101100001100000000, /*  grf<0;0>   grf<8;1>  grf<0> */
961   0b100101100001001000010, /*  arf<4;1>   grf<8;1>  grf<0> */
962   0b101101100001101000011, /*  grf<8;1>   grf<8;1>  grf<1> */
963   0b100101100001101000011, /*  grf<8;1>   grf<8;1>  grf<0> */
964   0b101100000000101000011, /*  grf<8;1>   grf<0;0>  grf<1> */
965   0b101101100001101001011, /* -grf<8;1>   grf<8;1>  grf<1> */
966   0b101001100001101000011, /*  grf<8;1>   arf<8;1>  grf<1> */
967   0b100001100001100000000, /*  grf<0;0>   arf<8;1>  grf<0> */
968   0b101101100001100000000, /*  grf<0;0>   grf<8;1>  grf<1> */
969   0b101101100101101000011, /*  grf<8;1>   grf<8;1> -grf<1> */
970   0b101101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<1> */
971   0b101100000000100000000, /*  grf<0;0>   grf<0;0>  grf<1> */
972   0b100001100001101000011, /*  grf<8;1>   arf<8;1>  grf<0> */
973   0b100101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<0> */
974   0b100101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<0> */
975   0b100101100001101001011, /* -grf<8;1>   grf<8;1>  grf<0> */
976   0b100100000000101000011, /*  grf<8;1>   grf<0;0>  grf<0> */
977   0b100101100001100001000, /* -grf<0;0>   grf<8;1>  grf<0> */
978   0b100100000000100000000, /*  grf<0;0>   grf<0;0>  grf<0> */
979   0b101101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<1> */
980   0b100101100101100000000, /*  grf<0;0>   grf<8;1> -grf<0> */
981   0b101001100001100000000, /*  grf<0;0>   arf<8;1>  grf<1> */
982   0b100101100101101000011, /*  grf<8;1>   grf<8;1> -grf<0> */
983   0b101101100101101001011, /* -grf<8;1>   grf<8;1> -grf<1> */
984   0b101001100001101001011, /* -grf<8;1>   arf<8;1>  grf<1> */
985   0b101101110001101001011, /* -grf<8;1>  -grf<8;1>  grf<1> */
986   0b101100010000101000011, /*  grf<8;1>  -grf<0;0>  grf<1> */
987   0b101100000100101000011, /*  grf<8;1>   grf<0;0> -grf<1> */
988   0b101101100001100001000, /* -grf<0;0>   grf<8;1>  grf<1> */
989   0b101101100101100000000, /*  grf<0;0>   grf<8;1> -grf<1> */
990   0b100100000100101000011, /*  grf<8;1>   grf<0;0> -grf<0> */
991   0b101001100101101000011, /*  grf<8;1>   arf<8;1> -grf<1> */
992};
993
994static const uint32_t xehp_3src_source_index_table[32] = {
995   0b100100000001100000000, /*           grf<0;0>   grf<1;0>     grf<0>      */
996   0b100100000001000000001, /*           arf<1;0>   grf<1;0>     grf<0>      */
997   0b101100000001100000001, /*           grf<1;0>   grf<1;0>     grf<1>      */
998   0b100100000001100000001, /*           grf<1;0>   grf<1;0>     grf<0>      */
999   0b101100000000100000001, /*           grf<1;0>   grf<0;0>     grf<1>      */
1000   0b101100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<1>      */
1001   0b101000000001100000001, /*           grf<1;0>   arf<1;0>     grf<1>      */
1002   0b101100000001100000000, /*           grf<0;0>   grf<1;0>     grf<1>      */
1003   0b100000000001100000000, /*           grf<0;0>   arf<1;0>     grf<0>      */
1004   0b101100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<1>      */
1005   0b101100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<1>      */
1006   0b101100000000100000000, /*           grf<0;0>   grf<0;0>     grf<1>      */
1007   0b100000000001100000001, /*           grf<1;0>   arf<1;0>     grf<0>      */
1008   0b100100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<0>      */
1009   0b100100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<0>      */
1010   0b100100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<0>      */
1011   0b100100000000100000001, /*           grf<1;0>   grf<0;0>     grf<0>      */
1012   0b100100000001100001000, /*          -grf<0;0>   grf<1;0>     grf<0>      */
1013   0b100100000000100000000, /*           grf<0;0>   grf<0;0>     grf<0>
1014                             * dpas.*x1  grf:d      grf:[ub,b]   grf:[ub,b]
1015                             * dpas.*x1  grf:f      grf:bf       grf:bf
1016                             */
1017   0b101100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<1>      */
1018   0b100100000101100000000, /*           grf<0;0>   grf<1;0>    -grf<0>      */
1019   0b101000000001100000000, /*           grf<0;0>   arf<1;0>     grf<1>      */
1020   0b100100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<0>      */
1021   0b101100000101100001001, /*          -grf<1;0>   grf<1;0>    -grf<1>      */
1022   0b100100010000100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[ub,b]  */
1023   0b100100000100100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u2,s2] */
1024   0b100100010100100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u2,s2] */
1025   0b100100001000100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[ub,b]  */
1026   0b100100001100100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u2,s2] */
1027   0b100100000010100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u4,s4] */
1028   0b100100001010100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u4,s4] */
1029   0b100100010010100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u4,s4] */
1030};
1031
1032static const uint32_t gfx12_3src_subreg_table[32] = {
1033   0b00000000000000000000, /* .0  .0  .0  .0  */
1034   0b00100000000000000000, /* .0  .0  .0  .4  */
1035   0b00000000000110000000, /* .0  .12 .0  .0  */
1036   0b10100000000000000000, /* .0  .0  .0  .20 */
1037   0b10000000001110000000, /* .0  .28 .0  .16 */
1038   0b01100000000000000000, /* .0  .0  .0  .12 */
1039   0b01000000000000000000, /* .0  .0  .0  .8  */
1040   0b00000010000000000000, /* .0  .0  .8  .0  */
1041   0b00000001000000000000, /* .0  .0  .4  .0  */
1042   0b11000000000000000000, /* .0  .0  .0  .24 */
1043   0b10000000000000000000, /* .0  .0  .0  .16 */
1044   0b11100000000000000000, /* .0  .0  .0  .28 */
1045   0b00000110000000000000, /* .0  .0  .24 .0  */
1046   0b00000000000010000000, /* .0  .4  .0  .0  */
1047   0b00000100000000000000, /* .0  .0  .16 .0  */
1048   0b00000011000000000000, /* .0  .0  .12 .0  */
1049   0b00000101000000000000, /* .0  .0  .20 .0  */
1050   0b00000111000000000000, /* .0  .0  .28 .0  */
1051   0b00000000000100000000, /* .0  .8  .0  .0  */
1052   0b00000000001000000000, /* .0  .16 .0  .0  */
1053   0b00000000001100000000, /* .0  .24 .0  .0  */
1054   0b00000000001010000000, /* .0  .20 .0  .0  */
1055   0b00000000001110000000, /* .0  .28 .0  .0  */
1056   0b11000000001110000000, /* .0  .28 .0  .24 */
1057   0b00100000000100000000, /* .0  .8  .0  .4  */
1058   0b00100000000110000000, /* .0  .12 .0  .4  */
1059   0b01000000000110000000, /* .0  .12 .0  .8  */
1060   0b10000000001100000000, /* .0  .24 .0  .16 */
1061   0b10000000001010000000, /* .0  .20 .0  .16 */
1062   0b01100000000010000000, /* .0  .4  .0  .12 */
1063   0b10100000001110000000, /* .0  .28 .0  .20 */
1064   0b01000000000010000000, /* .0  .4  .0  .8  */
1065};
1066
1067struct compaction_state {
1068   const struct brw_isa_info *isa;
1069   const uint32_t *control_index_table;
1070   const uint32_t *datatype_table;
1071   const uint16_t *subreg_table;
1072   const uint16_t *src0_index_table;
1073   const uint16_t *src1_index_table;
1074};
1075
1076static void compaction_state_init(struct compaction_state *c,
1077                                  const struct brw_isa_info *isa);
1078
1079static bool
1080set_control_index(const struct compaction_state *c,
1081                  brw_compact_inst *dst, const brw_inst *src)
1082{
1083   const struct intel_device_info *devinfo = c->isa->devinfo;
1084   uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
1085
1086   if (devinfo->ver >= 12) {
1087      uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /*  4b */
1088                    (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1089                    (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1090                    (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1091                    (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1092                    (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1093                    (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1094                    (brw_inst_bits(src, 23, 22) <<  6) | /*  2b */
1095                    (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1096                    (brw_inst_bits(src, 18, 16));        /*  3b */
1097   } else if (devinfo->ver >= 8) {
1098      uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
1099                    (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
1100                    (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
1101                    (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
1102                    (brw_inst_bits(src,  8,  8));        /*  1b */
1103   } else {
1104      uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /*  1b */
1105                    (brw_inst_bits(src, 23,  8));        /* 16b */
1106
1107      /* On gfx7, the flag register and subregister numbers are integrated into
1108       * the control index.
1109       */
1110      if (devinfo->ver == 7)
1111         uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
1112   }
1113
1114   for (int i = 0; i < 32; i++) {
1115      if (c->control_index_table[i] == uncompacted) {
1116         brw_compact_inst_set_control_index(devinfo, dst, i);
1117	 return true;
1118      }
1119   }
1120
1121   return false;
1122}
1123
1124static bool
1125set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
1126                   const brw_inst *src, bool is_immediate)
1127{
1128   const struct intel_device_info *devinfo = c->isa->devinfo;
1129   uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1130
1131   if (devinfo->ver >= 12) {
1132      uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /*  4b */
1133                    (brw_inst_bits(src, 66, 66) << 14) | /*  1b */
1134                    (brw_inst_bits(src, 50, 50) << 13) | /*  1b */
1135                    (brw_inst_bits(src, 49, 48) << 11) | /*  2b */
1136                    (brw_inst_bits(src, 47, 47) << 10) | /*  1b */
1137                    (brw_inst_bits(src, 46, 46) <<  9) | /*  1b */
1138                    (brw_inst_bits(src, 43, 40) <<  5) | /*  4b */
1139                    (brw_inst_bits(src, 39, 36) <<  1) | /*  4b */
1140                    (brw_inst_bits(src, 35, 35));        /*  1b */
1141
1142      /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1143       * is present
1144       */
1145      if (!is_immediate) {
1146         uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
1147      }
1148   } else if (devinfo->ver >= 8) {
1149      uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
1150                    (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
1151                    (brw_inst_bits(src, 46, 35));        /* 12b */
1152   } else {
1153      uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /*  3b */
1154                    (brw_inst_bits(src, 46, 32));        /* 15b */
1155   }
1156
1157   for (int i = 0; i < 32; i++) {
1158      if (c->datatype_table[i] == uncompacted) {
1159         brw_compact_inst_set_datatype_index(devinfo, dst, i);
1160	 return true;
1161      }
1162   }
1163
1164   return false;
1165}
1166
1167static bool
1168set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1169                 const brw_inst *src, bool is_immediate)
1170{
1171   const struct intel_device_info *devinfo = c->isa->devinfo;
1172   uint16_t uncompacted; /* 15b */
1173
1174   if (devinfo->ver >= 12) {
1175      uncompacted = (brw_inst_bits(src, 55, 51) << 0) |    /* 5b */
1176                    (brw_inst_bits(src, 71, 67) << 5);     /* 5b */
1177
1178      if (!is_immediate)
1179         uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1180   } else {
1181      uncompacted = (brw_inst_bits(src, 52, 48) << 0) |    /* 5b */
1182                    (brw_inst_bits(src, 68, 64) << 5);     /* 5b */
1183
1184      if (!is_immediate)
1185         uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1186   }
1187
1188   for (int i = 0; i < 32; i++) {
1189      if (c->subreg_table[i] == uncompacted) {
1190         brw_compact_inst_set_subreg_index(devinfo, dst, i);
1191	 return true;
1192      }
1193   }
1194
1195   return false;
1196}
1197
1198static bool
1199set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1200               const brw_inst *src)
1201{
1202   const struct intel_device_info *devinfo = c->isa->devinfo;
1203   uint16_t uncompacted; /* 12b */
1204   int table_len;
1205
1206   if (devinfo->ver >= 12) {
1207      table_len = ARRAY_SIZE(gfx12_src0_index_table);
1208      uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /*  4b */
1209                    (brw_inst_bits(src, 83, 81) << 5) | /*  3b */
1210                    (brw_inst_bits(src, 80, 80) << 4) | /*  1b */
1211                    (brw_inst_bits(src, 65, 64) << 2) | /*  2b */
1212                    (brw_inst_bits(src, 45, 44));       /*  2b */
1213   } else {
1214      table_len = ARRAY_SIZE(gfx8_src_index_table);
1215      uncompacted = brw_inst_bits(src, 88, 77);         /* 12b */
1216   }
1217
1218   for (int i = 0; i < table_len; i++) {
1219      if (c->src0_index_table[i] == uncompacted) {
1220         brw_compact_inst_set_src0_index(devinfo, dst, i);
1221	 return true;
1222      }
1223   }
1224
1225   return false;
1226}
1227
1228static bool
1229set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1230               const brw_inst *src, bool is_immediate, unsigned imm)
1231{
1232   const struct intel_device_info *devinfo = c->isa->devinfo;
1233   if (is_immediate) {
1234      if (devinfo->ver >= 12) {
1235         /* src1 index takes the low 4 bits of the 12-bit compacted value */
1236         brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1237      } else {
1238         /* src1 index takes the high 5 bits of the 13-bit compacted value */
1239         brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1240      }
1241      return true;
1242   } else {
1243      uint16_t uncompacted; /* 12b */
1244      int table_len;
1245
1246      if (devinfo->ver >= 12) {
1247         table_len = ARRAY_SIZE(gfx12_src0_index_table);
1248         uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /*  2b */
1249                       (brw_inst_bits(src, 119, 116) <<  6) | /*  4b */
1250                       (brw_inst_bits(src, 115, 113) <<  3) | /*  3b */
1251                       (brw_inst_bits(src, 112, 112) <<  2) | /*  1b */
1252                       (brw_inst_bits(src,  97,  96));        /*  2b */
1253      } else {
1254         table_len = ARRAY_SIZE(gfx8_src_index_table);
1255         uncompacted = brw_inst_bits(src, 120, 109);          /* 12b */
1256      }
1257
1258      for (int i = 0; i < table_len; i++) {
1259         if (c->src1_index_table[i] == uncompacted) {
1260            brw_compact_inst_set_src1_index(devinfo, dst, i);
1261            return true;
1262         }
1263      }
1264   }
1265
1266   return false;
1267}
1268
1269static bool
1270set_3src_control_index(const struct intel_device_info *devinfo,
1271                       brw_compact_inst *dst, const brw_inst *src)
1272{
1273   assert(devinfo->ver >= 8);
1274
1275   if (devinfo->verx10 >= 125) {
1276      uint64_t uncompacted =             /* 37b/XeHP+ */
1277         (brw_inst_bits(src, 95, 92) << 33) | /*  4b */
1278         (brw_inst_bits(src, 90, 88) << 30) | /*  3b */
1279         (brw_inst_bits(src, 82, 80) << 27) | /*  3b */
1280         (brw_inst_bits(src, 50, 50) << 26) | /*  1b */
1281         (brw_inst_bits(src, 49, 48) << 24) | /*  2b */
1282         (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1283         (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1284         (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1285         (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1286         (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1287         (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1288         (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1289         (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1290         (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1291         (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1292         (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1293         (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1294         (brw_inst_bits(src, 18, 16));        /*  3b */
1295
1296      for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1297         if (xehp_3src_control_index_table[i] == uncompacted) {
1298            brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1299            return true;
1300         }
1301      }
1302   } else if (devinfo->ver >= 12) {
1303      uint64_t uncompacted =             /* 36b/TGL+ */
1304         (brw_inst_bits(src, 95, 92) << 32) | /*  4b */
1305         (brw_inst_bits(src, 90, 88) << 29) | /*  3b */
1306         (brw_inst_bits(src, 82, 80) << 26) | /*  3b */
1307         (brw_inst_bits(src, 50, 50) << 25) | /*  1b */
1308         (brw_inst_bits(src, 48, 48) << 24) | /*  1b */
1309         (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1310         (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1311         (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1312         (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1313         (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1314         (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1315         (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1316         (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1317         (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1318         (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1319         (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1320         (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1321         (brw_inst_bits(src, 18, 16));        /*  3b */
1322
1323      for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1324         if (gfx12_3src_control_index_table[i] == uncompacted) {
1325            brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1326            return true;
1327         }
1328      }
1329   } else {
1330      uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1331         (brw_inst_bits(src, 34, 32) << 21) |  /*  3b */
1332         (brw_inst_bits(src, 28,  8));         /* 21b */
1333
1334      if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1335         uncompacted |=
1336            brw_inst_bits(src, 36, 35) << 24;  /*  2b */
1337      }
1338
1339      for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1340         if (gfx8_3src_control_index_table[i] == uncompacted) {
1341            brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1342            return true;
1343         }
1344      }
1345   }
1346
1347   return false;
1348}
1349
1350static bool
1351set_3src_source_index(const struct intel_device_info *devinfo,
1352                      brw_compact_inst *dst, const brw_inst *src)
1353{
1354   assert(devinfo->ver >= 8);
1355
1356   if (devinfo->ver >= 12) {
1357      uint32_t uncompacted =               /* 21b/TGL+ */
1358         (brw_inst_bits(src, 114, 114) << 20) | /*  1b */
1359         (brw_inst_bits(src, 113, 112) << 18) | /*  2b */
1360         (brw_inst_bits(src,  98,  98) << 17) | /*  1b */
1361         (brw_inst_bits(src,  97,  96) << 15) | /*  2b */
1362         (brw_inst_bits(src,  91,  91) << 14) | /*  1b */
1363         (brw_inst_bits(src,  87,  86) << 12) | /*  2b */
1364         (brw_inst_bits(src,  85,  84) << 10) | /*  2b */
1365         (brw_inst_bits(src,  83,  83) <<  9) | /*  1b */
1366         (brw_inst_bits(src,  66,  66) <<  8) | /*  1b */
1367         (brw_inst_bits(src,  65,  64) <<  6) | /*  2b */
1368         (brw_inst_bits(src,  47,  47) <<  5) | /*  1b */
1369         (brw_inst_bits(src,  46,  46) <<  4) | /*  1b */
1370         (brw_inst_bits(src,  45,  44) <<  2) | /*  2b */
1371         (brw_inst_bits(src,  43,  43) <<  1) | /*  1b */
1372         (brw_inst_bits(src,  35,  35));        /*  1b */
1373
1374      const uint32_t *three_src_source_index_table =
1375         devinfo->verx10 >= 125 ?
1376         xehp_3src_source_index_table : gfx12_3src_source_index_table;
1377      const uint32_t three_src_source_index_table_len =
1378         devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1379                                  ARRAY_SIZE(gfx12_3src_source_index_table);
1380
1381      for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1382         if (three_src_source_index_table[i] == uncompacted) {
1383            brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1384            return true;
1385         }
1386      }
1387   } else {
1388      uint64_t uncompacted =    /* 46b/BDW; 49b/CHV/SKL+ */
1389         (brw_inst_bits(src,  83,  83) << 43) |   /*  1b */
1390         (brw_inst_bits(src, 114, 107) << 35) |   /*  8b */
1391         (brw_inst_bits(src,  93,  86) << 27) |   /*  8b */
1392         (brw_inst_bits(src,  72,  65) << 19) |   /*  8b */
1393         (brw_inst_bits(src,  55,  37));          /* 19b */
1394
1395      if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1396         uncompacted |=
1397            (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
1398            (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
1399            (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
1400      } else {
1401         uncompacted |=
1402            (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
1403            (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
1404      }
1405
1406      for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1407         if (gfx8_3src_source_index_table[i] == uncompacted) {
1408            brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1409            return true;
1410         }
1411      }
1412   }
1413
1414   return false;
1415}
1416
1417static bool
1418set_3src_subreg_index(const struct intel_device_info *devinfo,
1419                      brw_compact_inst *dst, const brw_inst *src)
1420{
1421   assert(devinfo->ver >= 12);
1422
1423   uint32_t uncompacted =               /* 20b/TGL+ */
1424      (brw_inst_bits(src, 119, 115) << 15) | /*  5b */
1425      (brw_inst_bits(src, 103,  99) << 10) | /*  5b */
1426      (brw_inst_bits(src,  71,  67) <<  5) | /*  5b */
1427      (brw_inst_bits(src,  55,  51));        /*  5b */
1428
1429   for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) {
1430      if (gfx12_3src_subreg_table[i] == uncompacted) {
1431         brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1432	 return true;
1433      }
1434   }
1435
1436   return false;
1437}
1438
1439static bool
1440has_unmapped_bits(const struct brw_isa_info *isa, const brw_inst *src)
1441{
1442   const struct intel_device_info *devinfo = isa->devinfo;
1443
1444   /* EOT can only be mapped on a send if the src1 is an immediate */
1445   if ((brw_inst_opcode(isa, src) == BRW_OPCODE_SENDC ||
1446        brw_inst_opcode(isa, src) == BRW_OPCODE_SEND) &&
1447       brw_inst_eot(devinfo, src))
1448      return true;
1449
1450   /* Check for instruction bits that don't map to any of the fields of the
1451    * compacted instruction.  The instruction cannot be compacted if any of
1452    * them are set.  They overlap with:
1453    *  - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1454    *  - Dst.AddrImm[9] (bit 47 on Gfx8)
1455    *  - Src0.AddrImm[9] (bit 95 on Gfx8)
1456    *  - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1457    *  - UIP[31] (bit 95 on Gfx8)
1458    */
1459   if (devinfo->ver >= 12) {
1460      assert(!brw_inst_bits(src, 7,  7));
1461      return false;
1462   } else if (devinfo->ver >= 8) {
1463      assert(!brw_inst_bits(src, 7,  7));
1464      return brw_inst_bits(src, 95, 95) ||
1465             brw_inst_bits(src, 47, 47) ||
1466             brw_inst_bits(src, 11, 11);
1467   } else {
1468      assert(!brw_inst_bits(src, 7,  7) &&
1469             !(devinfo->ver < 7 && brw_inst_bits(src, 90, 90)));
1470      return brw_inst_bits(src, 95, 91) ||
1471             brw_inst_bits(src, 47, 47);
1472   }
1473}
1474
1475static bool
1476has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1477                       const brw_inst *src)
1478{
1479   /* Check for three-source instruction bits that don't map to any of the
1480    * fields of the compacted instruction.  All of them seem to be reserved
1481    * bits currently.
1482    */
1483   if (devinfo->ver >= 12) {
1484      assert(!brw_inst_bits(src, 7, 7));
1485   } else if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1486      assert(!brw_inst_bits(src, 127, 127) &&
1487             !brw_inst_bits(src, 7,  7));
1488   } else {
1489      assert(devinfo->ver >= 8);
1490      assert(!brw_inst_bits(src, 127, 126) &&
1491             !brw_inst_bits(src, 105, 105) &&
1492             !brw_inst_bits(src, 84, 84) &&
1493             !brw_inst_bits(src, 7,  7));
1494
1495      /* Src1Type and Src2Type, used for mixed-precision floating point */
1496      if (brw_inst_bits(src, 36, 35))
1497         return true;
1498   }
1499
1500   return false;
1501}
1502
1503static bool
1504brw_try_compact_3src_instruction(const struct intel_device_info *devinfo,
1505                                 brw_compact_inst *dst, const brw_inst *src)
1506{
1507   assert(devinfo->ver >= 8);
1508
1509   if (has_3src_unmapped_bits(devinfo, src))
1510      return false;
1511
1512#define compact(field) \
1513   brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1514#define compact_a16(field) \
1515   brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1516
1517   compact(hw_opcode);
1518
1519   if (!set_3src_control_index(devinfo, dst, src))
1520      return false;
1521
1522   if (!set_3src_source_index(devinfo, dst, src))
1523      return false;
1524
1525   if (devinfo->ver >= 12) {
1526      if (!set_3src_subreg_index(devinfo, dst, src))
1527         return false;
1528
1529      compact(swsb);
1530      compact(debug_control);
1531      compact(dst_reg_nr);
1532      compact(src0_reg_nr);
1533      compact(src1_reg_nr);
1534      compact(src2_reg_nr);
1535   } else {
1536      compact(dst_reg_nr);
1537      compact_a16(src0_rep_ctrl);
1538      compact(debug_control);
1539      compact(saturate);
1540      compact_a16(src1_rep_ctrl);
1541      compact_a16(src2_rep_ctrl);
1542      compact(src0_reg_nr);
1543      compact(src1_reg_nr);
1544      compact(src2_reg_nr);
1545      compact_a16(src0_subreg_nr);
1546      compact_a16(src1_subreg_nr);
1547      compact_a16(src2_subreg_nr);
1548   }
1549   brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1550
1551#undef compact
1552#undef compact_a16
1553
1554   return true;
1555}
1556
1557/* On SNB through ICL, compacted instructions have 12-bits for immediate
1558 * sources, and a 13th bit that's replicated through the high 20 bits.
1559 *
1560 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1561 * of packed vectors as compactable immediates.
1562 *
1563 * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1564 * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1565 * while for unsigned integers it is not.
1566 *
1567 * Returns the compacted immediate, or -1 if immediate cannot be compacted
1568 */
1569static int
1570compact_immediate(const struct intel_device_info *devinfo,
1571                  enum brw_reg_type type, unsigned imm)
1572{
1573   if (devinfo->ver >= 12) {
1574      /* 16-bit immediates need to be replicated through the 32-bit immediate
1575       * field
1576       */
1577      switch (type) {
1578      case BRW_REGISTER_TYPE_W:
1579      case BRW_REGISTER_TYPE_UW:
1580      case BRW_REGISTER_TYPE_HF:
1581         if ((imm >> 16) != (imm & 0xffff))
1582            return -1;
1583         break;
1584      default:
1585         break;
1586      }
1587
1588      switch (type) {
1589      case BRW_REGISTER_TYPE_F:
1590         /* We get the high 12-bits as-is; rest must be zero */
1591         if ((imm & 0xfffff) == 0)
1592            return (imm >> 20) & 0xfff;
1593         break;
1594      case BRW_REGISTER_TYPE_HF:
1595         /* We get the high 12-bits as-is; rest must be zero */
1596         if ((imm & 0xf) == 0)
1597            return (imm >> 4) & 0xfff;
1598         break;
1599      case BRW_REGISTER_TYPE_UD:
1600      case BRW_REGISTER_TYPE_VF:
1601      case BRW_REGISTER_TYPE_UV:
1602      case BRW_REGISTER_TYPE_V:
1603         /* We get the low 12-bits as-is; rest must be zero */
1604         if ((imm & 0xfffff000) == 0)
1605            return imm & 0xfff;
1606         break;
1607      case BRW_REGISTER_TYPE_UW:
1608         /* We get the low 12-bits as-is; rest must be zero */
1609         if ((imm & 0xf000) == 0)
1610            return imm & 0xfff;
1611         break;
1612      case BRW_REGISTER_TYPE_D:
1613         /* We get the low 11-bits as-is; 12th is replicated */
1614         if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1615            return imm & 0xfff;
1616         break;
1617      case BRW_REGISTER_TYPE_W:
1618         /* We get the low 11-bits as-is; 12th is replicated */
1619         if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1620            return imm & 0xfff;
1621         break;
1622      case BRW_REGISTER_TYPE_NF:
1623      case BRW_REGISTER_TYPE_DF:
1624      case BRW_REGISTER_TYPE_Q:
1625      case BRW_REGISTER_TYPE_UQ:
1626      case BRW_REGISTER_TYPE_B:
1627      case BRW_REGISTER_TYPE_UB:
1628         return -1;
1629      }
1630   } else {
1631      /* We get the low 12 bits as-is; 13th is replicated */
1632      if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1633         return imm & 0x1fff;
1634      }
1635   }
1636
1637   return -1;
1638}
1639
1640static int
1641uncompact_immediate(const struct intel_device_info *devinfo,
1642                    enum brw_reg_type type, unsigned compact_imm)
1643{
1644   if (devinfo->ver >= 12) {
1645      switch (type) {
1646      case BRW_REGISTER_TYPE_F:
1647         return compact_imm << 20;
1648      case BRW_REGISTER_TYPE_HF:
1649         return (compact_imm << 20) | (compact_imm << 4);
1650      case BRW_REGISTER_TYPE_UD:
1651      case BRW_REGISTER_TYPE_VF:
1652      case BRW_REGISTER_TYPE_UV:
1653      case BRW_REGISTER_TYPE_V:
1654         return compact_imm;
1655      case BRW_REGISTER_TYPE_UW:
1656         /* Replicate */
1657         return compact_imm << 16 | compact_imm;
1658      case BRW_REGISTER_TYPE_D:
1659         /* Extend the 12th bit into the high 20 bits */
1660         return (int)(compact_imm << 20) >> 20;
1661      case BRW_REGISTER_TYPE_W:
1662         /* Extend the 12th bit into the high 4 bits and replicate */
1663         return ((int)(compact_imm << 20) >> 4) |
1664                ((unsigned short)((short)(compact_imm << 4) >> 4));
1665      case BRW_REGISTER_TYPE_NF:
1666      case BRW_REGISTER_TYPE_DF:
1667      case BRW_REGISTER_TYPE_Q:
1668      case BRW_REGISTER_TYPE_UQ:
1669      case BRW_REGISTER_TYPE_B:
1670      case BRW_REGISTER_TYPE_UB:
1671         unreachable("not reached");
1672      }
1673   } else {
1674      /* Replicate the 13th bit into the high 19 bits */
1675      return (int)(compact_imm << 19) >> 19;
1676   }
1677
1678   unreachable("not reached");
1679}
1680
1681static bool
1682has_immediate(const struct intel_device_info *devinfo, const brw_inst *inst,
1683              enum brw_reg_type *type)
1684{
1685   if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1686      *type = brw_inst_src0_type(devinfo, inst);
1687      return *type != INVALID_REG_TYPE;
1688   } else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1689      *type = brw_inst_src1_type(devinfo, inst);
1690      return *type != INVALID_REG_TYPE;
1691   }
1692
1693   return false;
1694}
1695
1696/**
1697 * Applies some small changes to instruction types to increase chances of
1698 * compaction.
1699 */
1700static brw_inst
1701precompact(const struct brw_isa_info *isa, brw_inst inst)
1702{
1703   const struct intel_device_info *devinfo = isa->devinfo;
1704
1705   /* In XeHP the compaction tables removed the entries for source regions
1706    * <8;8,1> giving preference to <1;1,0> as the way to indicate
1707    * sequential elements, so convert to those before compacting.
1708    */
1709   if (devinfo->verx10 >= 125) {
1710      if (brw_inst_src0_reg_file(devinfo, &inst) == BRW_GENERAL_REGISTER_FILE &&
1711          brw_inst_src0_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1712          brw_inst_src0_vstride(devinfo, &inst) == (brw_inst_src0_width(devinfo, &inst) + 1) &&
1713          brw_inst_src0_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1714         brw_inst_set_src0_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1715         brw_inst_set_src0_width(devinfo, &inst, BRW_WIDTH_1);
1716         brw_inst_set_src0_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1717      }
1718
1719      if (brw_inst_src1_reg_file(devinfo, &inst) == BRW_GENERAL_REGISTER_FILE &&
1720          brw_inst_src1_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1721          brw_inst_src1_vstride(devinfo, &inst) == (brw_inst_src1_width(devinfo, &inst) + 1) &&
1722          brw_inst_src1_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1723         brw_inst_set_src1_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1724         brw_inst_set_src1_width(devinfo, &inst, BRW_WIDTH_1);
1725         brw_inst_set_src1_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1726      }
1727   }
1728
1729   if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1730      return inst;
1731
1732   /* The Bspec's section titled "Non-present Operands" claims that if src0
1733    * is an immediate that src1's type must be the same as that of src0.
1734    *
1735    * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1736    * that do not follow this rule. E.g., from the IVB/HSW table:
1737    *
1738    *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1739    *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
1740    *
1741    * And from the SNB table:
1742    *
1743    *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1744    *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
1745    *
1746    * Neither of these cause warnings from the simulator when used,
1747    * compacted or otherwise. In fact, all compaction mappings that have an
1748    * immediate in src0 use a:ud for src1.
1749    *
1750    * The GM45 instruction compaction tables do not contain mapped meanings
1751    * so it's not clear whether it has the restriction. We'll assume it was
1752    * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1753    *
1754    * Don't do any of this for 64-bit immediates, since the src1 fields
1755    * overlap with the immediate and setting them would overwrite the
1756    * immediate we set.
1757    */
1758   if (devinfo->ver >= 6 &&
1759       !(devinfo->platform == INTEL_PLATFORM_HSW &&
1760         brw_inst_opcode(isa, &inst) == BRW_OPCODE_DIM) &&
1761       !(devinfo->ver >= 8 &&
1762         (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1763          brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1764          brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1765      brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1766   }
1767
1768   /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1769    * for immediate values. Presumably the hardware engineers realized
1770    * that the only useful floating-point value that could be represented
1771    * in this format is 0.0, which can also be represented as a VF-typed
1772    * immediate, so they gave us the previously mentioned mapping on IVB+.
1773    *
1774    * Strangely, we do have a mapping for imm:f in src1, so we don't need
1775    * to do this there.
1776    *
1777    * If we see a 0.0:F, change the type to VF so that it can be compacted.
1778    *
1779    * Compaction of floating-point immediates is improved on Gfx12, thus
1780    * removing the need for this.
1781    */
1782   if (devinfo->ver < 12 &&
1783       brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1784       brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1785       brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1786       brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1787      enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1788      brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1789   }
1790
1791   /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1792    * set the types to :UD so the instruction can be compacted.
1793    *
1794    * FINISHME: Use dst:f | imm:f on Gfx12
1795    */
1796   if (devinfo->ver < 12 &&
1797       compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
1798                         brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1799       brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1800       brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1801       brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1802      enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1803      enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1804
1805      brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1806      brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1807   }
1808
1809   return inst;
1810}
1811
1812/**
1813 * Tries to compact instruction src into dst.
1814 *
1815 * It doesn't modify dst unless src is compactable, which is relied on by
1816 * brw_compact_instructions().
1817 */
1818static bool
1819try_compact_instruction(const struct compaction_state *c,
1820                        brw_compact_inst *dst, const brw_inst *src)
1821{
1822   const struct intel_device_info *devinfo = c->isa->devinfo;
1823   brw_compact_inst temp;
1824
1825   assert(brw_inst_cmpt_control(devinfo, src) == 0);
1826
1827   if (is_3src(c->isa, brw_inst_opcode(c->isa, src))) {
1828      if (devinfo->ver >= 8) {
1829         memset(&temp, 0, sizeof(temp));
1830         if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1831            *dst = temp;
1832            return true;
1833         } else {
1834            return false;
1835         }
1836      } else {
1837         return false;
1838      }
1839   }
1840
1841   enum brw_reg_type type;
1842   bool is_immediate = has_immediate(devinfo, src, &type);
1843
1844   unsigned compacted_imm = 0;
1845
1846   if (is_immediate) {
1847      /* Instructions with immediates cannot be compacted on Gen < 6 */
1848      if (devinfo->ver < 6)
1849         return false;
1850
1851      compacted_imm = compact_immediate(devinfo, type,
1852                                        brw_inst_imm_ud(devinfo, src));
1853      if (compacted_imm == -1)
1854         return false;
1855   }
1856
1857   if (has_unmapped_bits(c->isa, src))
1858      return false;
1859
1860   memset(&temp, 0, sizeof(temp));
1861
1862#define compact(field) \
1863   brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1864#define compact_reg(field) \
1865   brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1866                                       brw_inst_##field##_da_reg_nr(devinfo, src))
1867
1868   compact(hw_opcode);
1869   compact(debug_control);
1870
1871   if (!set_control_index(c, &temp, src))
1872      return false;
1873   if (!set_datatype_index(c, &temp, src, is_immediate))
1874      return false;
1875   if (!set_subreg_index(c, &temp, src, is_immediate))
1876      return false;
1877   if (!set_src0_index(c, &temp, src))
1878      return false;
1879   if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1880      return false;
1881
1882   if (devinfo->ver >= 12) {
1883      compact(swsb);
1884      compact_reg(dst);
1885      compact_reg(src0);
1886
1887      if (is_immediate) {
1888         /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1889         brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1890      } else {
1891         compact_reg(src1);
1892      }
1893   } else {
1894      if (devinfo->ver >= 6) {
1895         compact(acc_wr_control);
1896      } else {
1897         compact(mask_control_ex);
1898      }
1899
1900      if (devinfo->ver <= 6)
1901         compact(flag_subreg_nr);
1902
1903      compact(cond_modifier);
1904
1905      compact_reg(dst);
1906      compact_reg(src0);
1907
1908      if (is_immediate) {
1909         /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1910         brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1911      } else {
1912         compact_reg(src1);
1913      }
1914   }
1915   brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1916
1917#undef compact
1918#undef compact_reg
1919
1920   *dst = temp;
1921
1922   return true;
1923}
1924
1925bool
1926brw_try_compact_instruction(const struct brw_isa_info *isa,
1927                            brw_compact_inst *dst, const brw_inst *src)
1928{
1929   struct compaction_state c;
1930   compaction_state_init(&c, isa);
1931   return try_compact_instruction(&c, dst, src);
1932}
1933
1934static void
1935set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1936                        brw_compact_inst *src)
1937{
1938   const struct intel_device_info *devinfo = c->isa->devinfo;
1939   uint32_t uncompacted =
1940      c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1941
1942   if (devinfo->ver >= 12) {
1943      brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1944      brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1945      brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1946      brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1947      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1948      brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1949      brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
1950      brw_inst_set_bits(dst, 23, 22, (uncompacted >>  6) & 0x3);
1951      brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
1952      brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
1953   } else if (devinfo->ver >= 8) {
1954      brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1955      brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1956      brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1957      brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1958      brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1959   } else {
1960      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1961      brw_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
1962
1963      if (devinfo->ver == 7)
1964         brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1965   }
1966}
1967
1968static void
1969set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1970                         brw_compact_inst *src)
1971{
1972   const struct intel_device_info *devinfo = c->isa->devinfo;
1973   uint32_t uncompacted =
1974      c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1975
1976   if (devinfo->ver >= 12) {
1977      brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1978      brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1979      brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1980      brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1981      brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1982      brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1983      brw_inst_set_bits(dst, 46, 46, (uncompacted >>  9) & 0x1);
1984      brw_inst_set_bits(dst, 43, 40, (uncompacted >>  5) & 0xf);
1985      brw_inst_set_bits(dst, 39, 36, (uncompacted >>  1) & 0xf);
1986      brw_inst_set_bits(dst, 35, 35, (uncompacted >>  0) & 0x1);
1987   } else if (devinfo->ver >= 8) {
1988      brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1989      brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1990      brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1991   } else {
1992      brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1993      brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1994   }
1995}
1996
1997static void
1998set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1999                       brw_compact_inst *src)
2000{
2001   const struct intel_device_info *devinfo = c->isa->devinfo;
2002   uint16_t uncompacted =
2003      c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
2004
2005   if (devinfo->ver >= 12) {
2006      brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
2007      brw_inst_set_bits(dst,  71, 67, (uncompacted >>  5) & 0x1f);
2008      brw_inst_set_bits(dst,  55, 51, (uncompacted >>  0) & 0x1f);
2009   } else {
2010      brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
2011      brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
2012      brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
2013   }
2014}
2015
2016static void
2017set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
2018                     brw_compact_inst *src)
2019{
2020   const struct intel_device_info *devinfo = c->isa->devinfo;
2021   uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
2022   uint16_t uncompacted = c->src0_index_table[compacted];
2023
2024   if (devinfo->ver >= 12) {
2025      brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));
2026      brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
2027      brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
2028      brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
2029      brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
2030   } else {
2031      brw_inst_set_bits(dst, 88, 77, uncompacted);
2032   }
2033}
2034
2035static void
2036set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
2037                     brw_compact_inst *src)
2038{
2039   const struct intel_device_info *devinfo = c->isa->devinfo;
2040   uint16_t uncompacted =
2041      c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
2042
2043   if (devinfo->ver >= 12) {
2044      brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
2045      brw_inst_set_bits(dst, 119, 116, (uncompacted >>  6) & 0xf);
2046      brw_inst_set_bits(dst, 115, 113, (uncompacted >>  3) & 0x7);
2047      brw_inst_set_bits(dst, 112, 112, (uncompacted >>  2) & 0x1);
2048      brw_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
2049   } else {
2050      brw_inst_set_bits(dst, 120, 109, uncompacted);
2051   }
2052}
2053
2054static void
2055set_uncompacted_3src_control_index(const struct compaction_state *c,
2056                                   brw_inst *dst, brw_compact_inst *src)
2057{
2058   const struct intel_device_info *devinfo = c->isa->devinfo;
2059   assert(devinfo->ver >= 8);
2060
2061   if (devinfo->verx10 >= 125) {
2062      uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2063      uint64_t uncompacted = xehp_3src_control_index_table[compacted];
2064
2065      brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
2066      brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
2067      brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
2068      brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
2069      brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
2070      brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2071      brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2072      brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2073      brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2074      brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2075      brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2076      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2077      brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2078      brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2079      brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2080      brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2081      brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2082      brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2083
2084   } else if (devinfo->ver >= 12) {
2085      uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2086      uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
2087
2088      brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
2089      brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
2090      brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
2091      brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
2092      brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
2093      brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2094      brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2095      brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2096      brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2097      brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2098      brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2099      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2100      brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2101      brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2102      brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2103      brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2104      brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2105      brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2106   } else {
2107      uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2108      uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2109
2110      brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2111      brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
2112
2113      if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV)
2114         brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2115   }
2116}
2117
2118static void
2119set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2120                                  brw_inst *dst, brw_compact_inst *src)
2121{
2122   assert(devinfo->ver >= 8);
2123
2124   uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
2125
2126   if (devinfo->ver >= 12) {
2127      const uint32_t *three_src_source_index_table =
2128         devinfo->verx10 >= 125 ?
2129         xehp_3src_source_index_table : gfx12_3src_source_index_table;
2130      uint32_t uncompacted = three_src_source_index_table[compacted];
2131
2132      brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2133      brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2134      brw_inst_set_bits(dst,  98,  98, (uncompacted >> 17) & 0x1);
2135      brw_inst_set_bits(dst,  97,  96, (uncompacted >> 15) & 0x3);
2136      brw_inst_set_bits(dst,  91,  91, (uncompacted >> 14) & 0x1);
2137      brw_inst_set_bits(dst,  87,  86, (uncompacted >> 12) & 0x3);
2138      brw_inst_set_bits(dst,  85,  84, (uncompacted >> 10) & 0x3);
2139      brw_inst_set_bits(dst,  83,  83, (uncompacted >>  9) & 0x1);
2140      brw_inst_set_bits(dst,  66,  66, (uncompacted >>  8) & 0x1);
2141      brw_inst_set_bits(dst,  65,  64, (uncompacted >>  6) & 0x3);
2142      brw_inst_set_bits(dst,  47,  47, (uncompacted >>  5) & 0x1);
2143      brw_inst_set_bits(dst,  46,  46, (uncompacted >>  4) & 0x1);
2144      brw_inst_set_bits(dst,  45,  44, (uncompacted >>  2) & 0x3);
2145      brw_inst_set_bits(dst,  43,  43, (uncompacted >>  1) & 0x1);
2146      brw_inst_set_bits(dst,  35,  35, (uncompacted >>  0) & 0x1);
2147   } else {
2148      uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2149
2150      brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
2151      brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2152      brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
2153      brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
2154      brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
2155
2156      if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
2157         brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2158         brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2159         brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
2160      } else {
2161         brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
2162         brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
2163      }
2164   }
2165}
2166
2167static void
2168set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2169                                  brw_inst *dst, brw_compact_inst *src)
2170{
2171   assert(devinfo->ver >= 12);
2172
2173   uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
2174   uint32_t uncompacted = gfx12_3src_subreg_table[compacted];
2175
2176   brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2177   brw_inst_set_bits(dst, 103,  99, (uncompacted >> 10) & 0x1f);
2178   brw_inst_set_bits(dst,  71,  67, (uncompacted >>  5) & 0x1f);
2179   brw_inst_set_bits(dst,  55,  51, (uncompacted >>  0) & 0x1f);
2180}
2181
2182static void
2183brw_uncompact_3src_instruction(const struct compaction_state *c,
2184                               brw_inst *dst, brw_compact_inst *src)
2185{
2186   const struct intel_device_info *devinfo = c->isa->devinfo;
2187   assert(devinfo->ver >= 8);
2188
2189#define uncompact(field) \
2190   brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2191#define uncompact_a16(field) \
2192   brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2193
2194   uncompact(hw_opcode);
2195
2196   if (devinfo->ver >= 12) {
2197      set_uncompacted_3src_control_index(c, dst, src);
2198      set_uncompacted_3src_source_index(devinfo, dst, src);
2199      set_uncompacted_3src_subreg_index(devinfo, dst, src);
2200
2201      uncompact(debug_control);
2202      uncompact(swsb);
2203      uncompact(dst_reg_nr);
2204      uncompact(src0_reg_nr);
2205      uncompact(src1_reg_nr);
2206      uncompact(src2_reg_nr);
2207   } else {
2208      set_uncompacted_3src_control_index(c, dst, src);
2209      set_uncompacted_3src_source_index(devinfo, dst, src);
2210
2211      uncompact(dst_reg_nr);
2212      uncompact_a16(src0_rep_ctrl);
2213      uncompact(debug_control);
2214      uncompact(saturate);
2215      uncompact_a16(src1_rep_ctrl);
2216      uncompact_a16(src2_rep_ctrl);
2217      uncompact(src0_reg_nr);
2218      uncompact(src1_reg_nr);
2219      uncompact(src2_reg_nr);
2220      uncompact_a16(src0_subreg_nr);
2221      uncompact_a16(src1_subreg_nr);
2222      uncompact_a16(src2_subreg_nr);
2223   }
2224   brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2225
2226#undef uncompact
2227#undef uncompact_a16
2228}
2229
2230static void
2231uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2232                      brw_compact_inst *src)
2233{
2234   const struct intel_device_info *devinfo = c->isa->devinfo;
2235   memset(dst, 0, sizeof(*dst));
2236
2237   if (devinfo->ver >= 8 &&
2238       is_3src(c->isa, brw_opcode_decode(c->isa,
2239                  brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
2240      brw_uncompact_3src_instruction(c, dst, src);
2241      return;
2242   }
2243
2244#define uncompact(field) \
2245   brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2246#define uncompact_reg(field) \
2247   brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2248                                    brw_compact_inst_##field##_reg_nr(devinfo, src))
2249
2250   uncompact(hw_opcode);
2251   uncompact(debug_control);
2252
2253   set_uncompacted_control(c, dst, src);
2254   set_uncompacted_datatype(c, dst, src);
2255   set_uncompacted_subreg(c, dst, src);
2256   set_uncompacted_src0(c, dst, src);
2257
2258   enum brw_reg_type type;
2259   if (has_immediate(devinfo, dst, &type)) {
2260      unsigned imm = uncompact_immediate(devinfo, type,
2261                                         brw_compact_inst_imm(devinfo, src));
2262      brw_inst_set_imm_ud(devinfo, dst, imm);
2263   } else {
2264      set_uncompacted_src1(c, dst, src);
2265      uncompact_reg(src1);
2266   }
2267
2268   if (devinfo->ver >= 12) {
2269      uncompact(swsb);
2270      uncompact_reg(dst);
2271      uncompact_reg(src0);
2272   } else {
2273      if (devinfo->ver >= 6) {
2274         uncompact(acc_wr_control);
2275      } else {
2276         uncompact(mask_control_ex);
2277      }
2278
2279      uncompact(cond_modifier);
2280
2281      if (devinfo->ver <= 6)
2282         uncompact(flag_subreg_nr);
2283
2284      uncompact_reg(dst);
2285      uncompact_reg(src0);
2286   }
2287   brw_inst_set_cmpt_control(devinfo, dst, false);
2288
2289#undef uncompact
2290#undef uncompact_reg
2291}
2292
2293void
2294brw_uncompact_instruction(const struct brw_isa_info *isa,
2295                          brw_inst *dst, brw_compact_inst *src)
2296{
2297   struct compaction_state c;
2298   compaction_state_init(&c, isa);
2299   uncompact_instruction(&c, dst, src);
2300}
2301
2302void
2303brw_debug_compact_uncompact(const struct brw_isa_info *isa,
2304                            brw_inst *orig,
2305                            brw_inst *uncompacted)
2306{
2307   fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2308           isa->devinfo->ver);
2309
2310   fprintf(stderr, "  before: ");
2311   brw_disassemble_inst(stderr, isa, orig, true, 0, NULL);
2312
2313   fprintf(stderr, "  after:  ");
2314   brw_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL);
2315
2316   uint32_t *before_bits = (uint32_t *)orig;
2317   uint32_t *after_bits = (uint32_t *)uncompacted;
2318   fprintf(stderr, "  changed bits:\n");
2319   for (int i = 0; i < 128; i++) {
2320      uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2321      uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2322
2323      if (before != after) {
2324         fprintf(stderr, "  bit %d, %s to %s\n", i,
2325                 before ? "set" : "unset",
2326                 after ? "set" : "unset");
2327      }
2328   }
2329}
2330
2331static int
2332compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2333{
2334   int this_compacted_count = compacted_counts[old_ip];
2335   int target_compacted_count = compacted_counts[old_target_ip];
2336   return target_compacted_count - this_compacted_count;
2337}
2338
2339static void
2340update_uip_jip(const struct brw_isa_info *isa, brw_inst *insn,
2341               int this_old_ip, int *compacted_counts)
2342{
2343   const struct intel_device_info *devinfo = isa->devinfo;
2344
2345   /* JIP and UIP are in units of:
2346    *    - bytes on Gfx8+; and
2347    *    - compacted instructions on Gfx6+.
2348    */
2349   int shift = devinfo->ver >= 8 ? 3 : 0;
2350
2351   int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2352   jip_compacted -= compacted_between(this_old_ip,
2353                                      this_old_ip + (jip_compacted / 2),
2354                                      compacted_counts);
2355   brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
2356
2357   if (brw_inst_opcode(isa, insn) == BRW_OPCODE_ENDIF ||
2358       brw_inst_opcode(isa, insn) == BRW_OPCODE_WHILE ||
2359       (brw_inst_opcode(isa, insn) == BRW_OPCODE_ELSE && devinfo->ver <= 7))
2360      return;
2361
2362   int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2363   uip_compacted -= compacted_between(this_old_ip,
2364                                      this_old_ip + (uip_compacted / 2),
2365                                      compacted_counts);
2366   brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
2367}
2368
2369static void
2370update_gfx4_jump_count(const struct intel_device_info *devinfo, brw_inst *insn,
2371                       int this_old_ip, int *compacted_counts)
2372{
2373   assert(devinfo->ver == 5 || devinfo->platform == INTEL_PLATFORM_G4X);
2374
2375   /* Jump Count is in units of:
2376    *    - uncompacted instructions on G45; and
2377    *    - compacted instructions on Gfx5.
2378    */
2379   int shift = devinfo->platform == INTEL_PLATFORM_G4X ? 1 : 0;
2380
2381   int jump_count_compacted = brw_inst_gfx4_jump_count(devinfo, insn) << shift;
2382
2383   int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2384
2385   int this_compacted_count = compacted_counts[this_old_ip];
2386   int target_compacted_count = compacted_counts[target_old_ip];
2387
2388   jump_count_compacted -= (target_compacted_count - this_compacted_count);
2389   brw_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2390}
2391
2392static void
2393compaction_state_init(struct compaction_state *c,
2394                      const struct brw_isa_info *isa)
2395{
2396   const struct intel_device_info *devinfo = isa->devinfo;
2397
2398   assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2399   assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2400   assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2401   assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2402   assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
2403   assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
2404   assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
2405   assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
2406   assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
2407   assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
2408   assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
2409   assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
2410   assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2411   assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2412   assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2413   assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2414   assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2415   assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2416   assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2417   assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2418   assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2419   assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2420   assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2421   assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2422
2423   c->isa = isa;
2424   switch (devinfo->ver) {
2425   case 12:
2426      c->control_index_table = gfx12_control_index_table;;
2427      c->datatype_table = gfx12_datatype_table;
2428      c->subreg_table = gfx12_subreg_table;
2429      if (devinfo->verx10 >= 125) {
2430         c->src0_index_table = xehp_src0_index_table;
2431         c->src1_index_table = xehp_src1_index_table;
2432      } else {
2433         c->src0_index_table = gfx12_src0_index_table;
2434         c->src1_index_table = gfx12_src1_index_table;
2435      }
2436      break;
2437   case 11:
2438      c->control_index_table = gfx8_control_index_table;
2439      c->datatype_table = gfx11_datatype_table;
2440      c->subreg_table = gfx8_subreg_table;
2441      c->src0_index_table = gfx8_src_index_table;
2442      c->src1_index_table = gfx8_src_index_table;
2443      break;
2444   case 9:
2445   case 8:
2446      c->control_index_table = gfx8_control_index_table;
2447      c->datatype_table = gfx8_datatype_table;
2448      c->subreg_table = gfx8_subreg_table;
2449      c->src0_index_table = gfx8_src_index_table;
2450      c->src1_index_table = gfx8_src_index_table;
2451      break;
2452   case 7:
2453      c->control_index_table = gfx7_control_index_table;
2454      c->datatype_table = gfx7_datatype_table;
2455      c->subreg_table = gfx7_subreg_table;
2456      c->src0_index_table = gfx7_src_index_table;
2457      c->src1_index_table = gfx7_src_index_table;
2458      break;
2459   case 6:
2460      c->control_index_table = gfx6_control_index_table;
2461      c->datatype_table = gfx6_datatype_table;
2462      c->subreg_table = gfx6_subreg_table;
2463      c->src0_index_table = gfx6_src_index_table;
2464      c->src1_index_table = gfx6_src_index_table;
2465      break;
2466   case 5:
2467   case 4:
2468      c->control_index_table = g45_control_index_table;
2469      c->datatype_table = g45_datatype_table;
2470      c->subreg_table = g45_subreg_table;
2471      c->src0_index_table = g45_src_index_table;
2472      c->src1_index_table = g45_src_index_table;
2473      break;
2474   default:
2475      unreachable("unknown generation");
2476   }
2477}
2478
2479void
2480brw_compact_instructions(struct brw_codegen *p, int start_offset,
2481                         struct disasm_info *disasm)
2482{
2483   if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2484      return;
2485
2486   const struct intel_device_info *devinfo = p->devinfo;
2487   if (devinfo->ver == 4 && devinfo->platform != INTEL_PLATFORM_G4X)
2488      return;
2489
2490   void *store = p->store + start_offset / 16;
2491   /* For an instruction at byte offset 16*i before compaction, this is the
2492    * number of compacted instructions minus the number of padding NOP/NENOPs
2493    * that preceded it.
2494    */
2495   unsigned num_compacted_counts =
2496      (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2497   int *compacted_counts =
2498      calloc(1, sizeof(*compacted_counts) * num_compacted_counts);
2499
2500   /* For an instruction at byte offset 8*i after compaction, this was its IP
2501    * (in 16-byte units) before compaction.
2502    */
2503   unsigned num_old_ip =
2504      (p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1;
2505   int *old_ip = calloc(1, sizeof(*old_ip) * num_old_ip);
2506
2507   struct compaction_state c;
2508   compaction_state_init(&c, p->isa);
2509
2510   int offset = 0;
2511   int compacted_count = 0;
2512   for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2513        src_offset += sizeof(brw_inst)) {
2514      brw_inst *src = store + src_offset;
2515      void *dst = store + offset;
2516
2517      old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2518      compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2519
2520      brw_inst inst = precompact(p->isa, *src);
2521      brw_inst saved = inst;
2522
2523      if (try_compact_instruction(&c, dst, &inst)) {
2524         compacted_count++;
2525
2526         if (INTEL_DEBUG(DEBUG_ANY)) {
2527            brw_inst uncompacted;
2528            uncompact_instruction(&c, &uncompacted, dst);
2529            if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2530               brw_debug_compact_uncompact(p->isa, &saved, &uncompacted);
2531            }
2532         }
2533
2534         offset += sizeof(brw_compact_inst);
2535      } else {
2536         /* All uncompacted instructions need to be aligned on G45. */
2537         if ((offset & sizeof(brw_compact_inst)) != 0 &&
2538             devinfo->platform == INTEL_PLATFORM_G4X) {
2539            brw_compact_inst *align = store + offset;
2540            memset(align, 0, sizeof(*align));
2541            brw_compact_inst_set_hw_opcode(
2542               devinfo, align, brw_opcode_encode(p->isa, BRW_OPCODE_NENOP));
2543            brw_compact_inst_set_cmpt_control(devinfo, align, true);
2544            offset += sizeof(brw_compact_inst);
2545            compacted_count--;
2546            compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2547            old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2548
2549            dst = store + offset;
2550         }
2551
2552         /* If we didn't compact this instruction, we need to move it down into
2553          * place.
2554          */
2555         if (offset != src_offset) {
2556            memmove(dst, src, sizeof(brw_inst));
2557         }
2558         offset += sizeof(brw_inst);
2559      }
2560   }
2561
2562   /* Add an entry for the ending offset of the program. This greatly
2563    * simplifies the linked list walk at the end of the function.
2564    */
2565   old_ip[offset / sizeof(brw_compact_inst)] =
2566      (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2567
2568   /* Fix up control flow offsets. */
2569   p->next_insn_offset = start_offset + offset;
2570   for (offset = 0; offset < p->next_insn_offset - start_offset;
2571        offset = next_offset(devinfo, store, offset)) {
2572      brw_inst *insn = store + offset;
2573      int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2574      int this_compacted_count = compacted_counts[this_old_ip];
2575
2576      switch (brw_inst_opcode(p->isa, insn)) {
2577      case BRW_OPCODE_BREAK:
2578      case BRW_OPCODE_CONTINUE:
2579      case BRW_OPCODE_HALT:
2580         if (devinfo->ver >= 6) {
2581            update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2582         } else {
2583            update_gfx4_jump_count(devinfo, insn, this_old_ip,
2584                                   compacted_counts);
2585         }
2586         break;
2587
2588      case BRW_OPCODE_IF:
2589      case BRW_OPCODE_IFF:
2590      case BRW_OPCODE_ELSE:
2591      case BRW_OPCODE_ENDIF:
2592      case BRW_OPCODE_WHILE:
2593         if (devinfo->ver >= 7) {
2594            if (brw_inst_cmpt_control(devinfo, insn)) {
2595               brw_inst uncompacted;
2596               uncompact_instruction(&c, &uncompacted,
2597                                     (brw_compact_inst *)insn);
2598
2599               update_uip_jip(p->isa, &uncompacted, this_old_ip,
2600                              compacted_counts);
2601
2602               bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2603                                                  &uncompacted);
2604               assert(ret); (void)ret;
2605            } else {
2606               update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2607            }
2608         } else if (devinfo->ver == 6) {
2609            assert(!brw_inst_cmpt_control(devinfo, insn));
2610
2611            /* Jump Count is in units of compacted instructions on Gfx6. */
2612            int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);
2613
2614            int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2615            int target_compacted_count = compacted_counts[target_old_ip];
2616            jump_count_compacted -= (target_compacted_count - this_compacted_count);
2617            brw_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
2618         } else {
2619            update_gfx4_jump_count(devinfo, insn, this_old_ip,
2620                                   compacted_counts);
2621         }
2622         break;
2623
2624      case BRW_OPCODE_ADD:
2625         /* Add instructions modifying the IP register use an immediate src1,
2626          * and Gens that use this cannot compact instructions with immediate
2627          * operands.
2628          */
2629         if (brw_inst_cmpt_control(devinfo, insn))
2630            break;
2631
2632         if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
2633             brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2634            assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
2635
2636            int shift = 3;
2637            int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2638
2639            int target_old_ip = this_old_ip + (jump_compacted / 2);
2640            int target_compacted_count = compacted_counts[target_old_ip];
2641            jump_compacted -= (target_compacted_count - this_compacted_count);
2642            brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2643         }
2644         break;
2645
2646      default:
2647         break;
2648      }
2649   }
2650
2651   /* p->nr_insn is counting the number of uncompacted instructions still, so
2652    * divide.  We do want to be sure there's a valid instruction in any
2653    * alignment padding, so that the next compression pass (for the FS 8/16
2654    * compile passes) parses correctly.
2655    */
2656   if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2657      brw_compact_inst *align = store + offset;
2658      memset(align, 0, sizeof(*align));
2659      brw_compact_inst_set_hw_opcode(
2660         devinfo, align, brw_opcode_encode(p->isa, BRW_OPCODE_NOP));
2661      brw_compact_inst_set_cmpt_control(devinfo, align, true);
2662      p->next_insn_offset += sizeof(brw_compact_inst);
2663   }
2664   p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2665
2666   for (int i = 0; i < p->num_relocs; i++) {
2667      if (p->relocs[i].offset < (uint32_t)start_offset)
2668         continue;
2669
2670      assert(p->relocs[i].offset % 16 == 0);
2671      unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2672      p->relocs[i].offset -= compacted_counts[idx] * 8;
2673   }
2674
2675   /* Update the instruction offsets for each group. */
2676   if (disasm) {
2677      int offset = 0;
2678
2679      foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2680         while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2681                sizeof(brw_inst) != group->offset) {
2682            assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2683                   sizeof(brw_inst) < group->offset);
2684            offset = next_offset(devinfo, store, offset);
2685         }
2686
2687         group->offset = start_offset + offset;
2688
2689         offset = next_offset(devinfo, store, offset);
2690      }
2691   }
2692
2693   free(compacted_counts);
2694   free(old_ip);
2695}
2696