1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018-2019 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #ifndef r600_sfn_alu_defines_h
28 #define r600_sfn_alu_defines_h
29
30 #include "../r600_isa.h"
31
32 #include <map>
33 #include <bitset>
34
35 namespace r600 {
36
37 /* ALU op2 instructions 17:7 top three bits always zero. */
38 enum EAluOp {
39 op2_add = 0,
40 op2_mul = 1,
41 op2_mul_ieee = 2,
42 op2_max = 3,
43 op2_min = 4,
44 op2_max_dx10 = 5,
45 op2_min_dx10 = 6,
46 op2_sete = 8,
47 op2_setgt = 9,
48 op2_setge = 10,
49 op2_setne = 11,
50 op2_sete_dx10 = 12,
51 op2_setgt_dx10 = 13,
52 op2_setge_dx10 = 14,
53 op2_setne_dx10 = 15,
54 op1_fract = 16,
55 op1_trunc = 17,
56 op1_ceil = 18,
57 op1_rndne = 19,
58 op1_floor = 20,
59 op2_ashr_int = 21,
60 op2_lshr_int = 22,
61 op2_lshl_int = 23,
62 op1_mov = 25,
63 op0_nop = 26,
64 op2_mul_64 = 27,
65 op1_flt64_to_flt32 = 28,
66 op1_flt32_to_flt64 = 29,
67 op2_pred_setgt_uint = 30,
68 op2_pred_setge_uint = 31,
69 op2_pred_sete = 32,
70 op2_pred_setgt = 33,
71 op2_pred_setge = 34,
72 op2_pred_setne = 35,
73 op1_pred_set_inv = 36,
74 op2_pred_set_pop = 37,
75 op0_pred_set_clr = 38,
76 op1_pred_set_restore = 39,
77 op2_pred_sete_push = 40,
78 op2_pred_setgt_push = 41,
79 op2_pred_setge_push = 42,
80 op2_pred_setne_push = 43,
81 op2_kille = 44,
82 op2_killgt = 45,
83 op2_killge = 46,
84 op2_killne = 47,
85 op2_and_int = 48,
86 op2_or_int = 49,
87 op2_xor_int = 50,
88 op1_not_int = 51,
89 op2_add_int = 52,
90 op2_sub_int = 53,
91 op2_max_int = 54,
92 op2_min_int = 55,
93 op2_max_uint = 56,
94 op2_min_uint = 57,
95 op2_sete_int = 58,
96 op2_setgt_int = 59,
97 op2_setge_int = 60,
98 op2_setne_int = 61,
99 op2_setgt_uint = 62,
100 op2_setge_uint = 63,
101 op2_killgt_uint = 64,
102 op2_killge_uint = 65,
103 op2_prede_int = 66,
104 op2_pred_setgt_int = 67,
105 op2_pred_setge_int = 68,
106 op2_pred_setne_int = 69,
107 op2_kille_int = 70,
108 op2_killgt_int = 71,
109 op2_killge_int = 72,
110 op2_killne_int = 73,
111 op2_pred_sete_push_int = 74,
112 op2_pred_setgt_push_int = 75,
113 op2_pred_setge_push_int = 76,
114 op2_pred_setne_push_int = 77,
115 op2_pred_setlt_push_int = 78,
116 op2_pred_setle_push_int = 79,
117 op1_flt_to_int = 80,
118 op1_bfrev_int = 81,
119 op2_addc_uint = 82,
120 op2_subb_uint = 83,
121 op0_group_barrier = 84,
122 op0_group_seq_begin = 85,
123 op0_group_seq_end = 86,
124 op2_set_mode = 87,
125 op1_set_cf_idx0 = 88,
126 op1_set_cf_idx1 = 89,
127 op2_set_lds_size = 90,
128 op1_exp_ieee = 129,
129 op1_log_clamped = 130,
130 op1_log_ieee = 131,
131 op1_recip_clamped = 132,
132 op1_recip_ff = 133,
133 op1_recip_ieee = 134,
134 op1_recipsqrt_clamped = 135,
135 op1_recipsqrt_ff = 136,
136 op1_recipsqrt_ieee1 = 137,
137 op1_sqrt_ieee = 138,
138 op1_sin = 141,
139 op1_cos = 142,
140 op2_mullo_int = 143,
141 op2_mulhi_int = 144,
142 op2_mullo_uint = 145,
143 op2_mulhi_uint = 146,
144 op1_recip_int = 147,
145 op1_recip_uint = 148,
146 op1_recip_64 = 149,
147 op1_recip_clamped_64 = 150,
148 op1_recipsqrt_64 = 151,
149 op1_recipsqrt_clamped_64 = 152,
150 op1_sqrt_64 = 153,
151 op1_flt_to_uint = 154,
152 op1_int_to_flt = 155,
153 op1_uint_to_flt = 156,
154 op2_bfm_int = 160,
155 op1_flt32_to_flt16 = 162,
156 op1_flt16_to_flt32 = 163,
157 op1_ubyte0_flt = 164,
158 op1_ubyte1_flt = 165,
159 op1_ubyte2_flt = 166,
160 op1_ubyte3_flt = 167,
161 op1_bcnt_int = 170,
162 op1_ffbh_uint = 171,
163 op1_ffbl_int = 172,
164 op1_ffbh_int = 173,
165 op1_flt_to_uint4 = 174,
166 op2_dot_ieee = 175,
167 op1_flt_to_int_rpi = 176,
168 op1_flt_to_int_floor = 177,
169 op2_mulhi_uint24 = 178,
170 op1_mbcnt_32hi_int = 179,
171 op1_offset_to_flt = 180,
172 op2_mul_uint24 = 181,
173 op1_bcnt_accum_prev_int = 182,
174 op1_mbcnt_32lo_accum_prev_int = 183,
175 op2_sete_64 = 184,
176 op2_setne_64 = 185,
177 op2_setgt_64 = 186,
178 op2_setge_64 = 187,
179 op2_min_64 = 188,
180 op2_max_64 = 189,
181 op2_dot4 = 190,
182 op2_dot4_ieee = 191,
183 op2_cube = 192,
184 op1_max4 = 193,
185 op1_frexp_64 = 196,
186 op1_ldexp_64 = 197,
187 op1_fract_64 = 198,
188 op2_pred_setgt_64 = 199,
189 op2_pred_sete_64 = 198,
190 op2_pred_setge_64 = 201,
191 OP2V_MUL_64 = 202,
192 op2_add_64 = 203,
193 op1_mova_int = 204,
194 op1v_flt64_to_flt32 = 205,
195 op1v_flt32_to_flt64 = 206,
196 op2_sad_accum_prev_uint = 207,
197 op2_dot = 208,
198 op1_mul_prev = 209,
199 op1_mul_ieee_prev = 210,
200 op1_add_prev = 211,
201 op2_muladd_prev = 212,
202 op2_muladd_ieee_prev = 213,
203 op2_interp_xy = 214,
204 op2_interp_zw = 215,
205 op2_interp_x = 216,
206 op2_interp_z = 217,
207 op0_store_flags = 218,
208 op1_load_store_flags = 219,
209 op0_lds_1a = 220,
210 op0_lds_1a1d = 221,
211 op0_lds_2a = 223,
212 op1_interp_load_p0 = 224,
213 op1_interp_load_p10 = 125,
214 op1_interp_load_p20 = 126,
215 // op 3 all left shift 6
216 op3_bfe_uint = 4<< 6,
217 op3_bfe_int = 5<< 6,
218 op3_bfi_int = 6<< 6,
219 op3_fma = 7<< 6,
220 op3_cndne_64 = 9<< 6,
221 op3_fma_64 = 10<< 6,
222 op3_lerp_uint = 11<< 6,
223 op3_bit_align_int = 12<< 6,
224 op3_byte_align_int = 13<< 6,
225 op3_sad_accum_uint = 14<< 6,
226 op3_sad_accum_hi_uint = 15<< 6,
227 op3_muladd_uint24 = 16<< 6,
228 op3_lds_idx_op = 17<< 6,
229 op3_muladd = 20<< 6,
230 op3_muladd_m2 = 21<< 6,
231 op3_muladd_m4 = 22<< 6,
232 op3_muladd_d2 = 23<< 6,
233 op3_muladd_ieee = 24<< 6,
234 op3_cnde = 25<< 6,
235 op3_cndgt = 26<< 6,
236 op3_cndge = 27<< 6,
237 op3_cnde_int = 28<< 6,
238 op3_cndgt_int = 29<< 6,
239 op3_cndge_int = 30<< 6,
240 op3_mul_lit = 31<< 6,
241 op_invalid = 0xffff
242 };
243
244 enum AluModifiers {
245 alu_src0_neg,
246 alu_src0_abs,
247 alu_src0_rel,
248 alu_src1_neg,
249 alu_src1_abs,
250 alu_src1_rel,
251 alu_src2_neg,
252 alu_src2_rel,
253 alu_dst_clamp,
254 alu_dst_rel,
255 alu_last_instr,
256 alu_update_exec,
257 alu_update_pred,
258 alu_write,
259 alu_op3,
260 alu_is_trans,
261 alu_is_cayman_trans,
262 alu_is_lds,
263 alu_lds_group_start,
264 alu_lds_group_end,
265 alu_lds_address,
266 alu_no_schedule_bias,
267 alu_64bit_op,
268 alu_flag_count
269 };
270
271 enum AluDstModifiers {
272 omod_off = 0,
273 omod_mul2 = 1,
274 omod_mul4 = 2,
275 omod_divl2 = 3
276 };
277
278 enum AluPredSel {
279 pred_off = 0,
280 pred_zero = 2,
281 pred_one = 3
282 };
283
284 enum AluBankSwizzle {
285 alu_vec_012 = 0,
286 sq_alu_scl_201 = 0,
287 alu_vec_021 = 1,
288 sq_alu_scl_122 = 1,
289 alu_vec_120 = 2,
290 sq_alu_scl_212 = 2,
291 alu_vec_102 = 3,
292 sq_alu_scl_221 = 3,
293 alu_vec_201 = 4,
294 sq_alu_scl_unknown = 4,
295 alu_vec_210 = 5,
296 alu_vec_unknown = 6
297 };
298
operator ++(AluBankSwizzle& x)299 inline AluBankSwizzle operator ++(AluBankSwizzle& x) {
300 x = static_cast<AluBankSwizzle>(x + 1);
301 return x;
302 }
303
304 using AluOpFlags=std::bitset<alu_flag_count>;
305
306 struct AluOp {
307 static constexpr int x = 1;
308 static constexpr int y = 2;
309 static constexpr int z = 4;
310 static constexpr int w = 8;
311 static constexpr int v = 15;
312 static constexpr int t = 16;
313 static constexpr int a = 31;
314
AluOpr600::AluOp315 AluOp(int ns, int f, uint8_t um_r600, uint8_t um_r700, uint8_t um_eg, const char *n):
316 nsrc(ns), is_float(f), name(n)
317 {
318 unit_mask[0] = um_r600;
319 unit_mask[1] = um_r700;
320 unit_mask[2] = um_eg;
321 }
322
can_channelr600::AluOp323 bool can_channel(int flags, r600_chip_class unit_type) const {
324 assert(unit_type < 3);
325 return flags & unit_mask[unit_type];
326 }
327
328 int nsrc: 4;
329 int is_float:1;
330 uint8_t unit_mask[3];
331 const char *name;
332 };
333
334 extern const std::map<EAluOp, AluOp> alu_ops;
335
336 enum AluInlineConstants {
337 ALU_SRC_LDS_OQ_A = 219,
338 ALU_SRC_LDS_OQ_B = 220,
339 ALU_SRC_LDS_OQ_A_POP = 221,
340 ALU_SRC_LDS_OQ_B_POP = 222,
341 ALU_SRC_LDS_DIRECT_A = 223,
342 ALU_SRC_LDS_DIRECT_B = 224,
343 ALU_SRC_TIME_HI = 227,
344 ALU_SRC_TIME_LO = 228,
345 ALU_SRC_MASK_HI = 229,
346 ALU_SRC_MASK_LO = 230,
347 ALU_SRC_HW_WAVE_ID = 231,
348 ALU_SRC_SIMD_ID = 232,
349 ALU_SRC_SE_ID = 233,
350 ALU_SRC_HW_THREADGRP_ID = 234,
351 ALU_SRC_WAVE_ID_IN_GRP = 235,
352 ALU_SRC_NUM_THREADGRP_WAVES = 236,
353 ALU_SRC_HW_ALU_ODD = 237,
354 ALU_SRC_LOOP_IDX = 238,
355 ALU_SRC_PARAM_BASE_ADDR = 240,
356 ALU_SRC_NEW_PRIM_MASK = 241,
357 ALU_SRC_PRIM_MASK_HI = 242,
358 ALU_SRC_PRIM_MASK_LO = 243,
359 ALU_SRC_1_DBL_L = 244,
360 ALU_SRC_1_DBL_M = 245,
361 ALU_SRC_0_5_DBL_L = 246,
362 ALU_SRC_0_5_DBL_M = 247,
363 ALU_SRC_0 = 248,
364 ALU_SRC_1 = 249,
365 ALU_SRC_1_INT = 250,
366 ALU_SRC_M_1_INT = 251,
367 ALU_SRC_0_5 = 252,
368 ALU_SRC_LITERAL = 253,
369 ALU_SRC_PV = 254,
370 ALU_SRC_PS = 255,
371 ALU_SRC_PARAM_BASE = 0x1C0,
372 ALU_SRC_UNKNOWN
373 };
374
375 struct AluInlineConstantDescr {
376 bool use_chan;
377 const char *descr;
378 };
379
380 extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const;
381
382 #define LDSOP2(X) LDS_ ## X = LDS_OP2_LDS_ ## X
383
384 enum ESDOp {
385 DS_OP_ADD = 0,
386 DS_OP_SUB = 1,
387 DS_OP_RSUB = 2,
388 DS_OP_INC = 3,
389 DS_OP_DEC = 4,
390 DS_OP_MIN_INT = 5,
391 DS_OP_MAX_INT = 6,
392 DS_OP_MIN_UINT = 7,
393 DS_OP_MAX_UINT = 8,
394 DS_OP_AND = 9,
395 DS_OP_OR = 10,
396 DS_OP_XOR = 11,
397 DS_OP_MSKOR = 12,
398 DS_OP_WRITE = 13,
399 DS_OP_WRITE_REL = 14,
400 DS_OP_WRITE2 = 15,
401 DS_OP_CMP_STORE = 16,
402 DS_OP_CMP_STORE_SPF = 17,
403 DS_OP_BYTE_WRITE = 18,
404 DS_OP_SHORT_WRITE = 19,
405 DS_OP_ADD_RET = 32,
406 DS_OP_SUB_RET = 33,
407 DS_OP_RSUB_RET = 34,
408 DS_OP_INC_RET = 35,
409 DS_OP_DEC_RET = 36,
410 DS_OP_MIN_INT_RET = 37,
411 DS_OP_MAX_INT_RET = 38,
412 DS_OP_MIN_UINT_RET = 39,
413 DS_OP_MAX_UINT_RET = 40,
414 DS_OP_AND_RET = 41,
415 DS_OP_OR_RET = 42,
416 DS_OP_XOR_RET = 43,
417 DS_OP_MSKOR_RET = 44,
418 DS_OP_XCHG_RET = 45,
419 DS_OP_XCHG_REL_RET = 46,
420 DS_OP_XCHG2_RET = 47,
421 DS_OP_CMP_XCHG_RET = 48,
422 DS_OP_CMP_XCHG_SPF_RET = 49,
423 DS_OP_READ_RET = 50,
424 DS_OP_READ_REL_RET = 51,
425 DS_OP_READ2_RET = 52,
426 DS_OP_READWRITE_RET = 53,
427 DS_OP_BYTE_READ_RET = 54,
428 DS_OP_UBYTE_READ_RET = 55,
429 DS_OP_SHORT_READ_RET = 56,
430 DS_OP_USHORT_READ_RET = 57,
431 DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63,
432 DS_OP_INVALID = 64,
433 LDSOP2(ADD_RET),
434 LDSOP2(ADD),
435 LDSOP2(AND_RET),
436 LDSOP2(AND),
437 LDSOP2(WRITE),
438 LDSOP2(OR_RET),
439 LDSOP2(OR),
440 LDSOP2(MAX_INT_RET),
441 LDSOP2(MAX_INT),
442 LDSOP2(MAX_UINT_RET),
443 LDSOP2(MAX_UINT),
444 LDSOP2(MIN_INT_RET),
445 LDSOP2(MIN_INT),
446 LDSOP2(MIN_UINT_RET),
447 LDSOP2(MIN_UINT),
448 LDSOP2(XOR_RET),
449 LDSOP2(XOR),
450 LDSOP2(XCHG_RET),
451 LDS_CMP_XCHG_RET = LDS_OP3_LDS_CMP_XCHG_RET,
452 LDS_WRITE_REL = LDS_OP3_LDS_WRITE_REL
453 };
454
455 #undef LDSOP2
456
457 struct LDSOp {
458 int nsrc;
459 const char *name;
460 };
461
462 extern const std::map<ESDOp, LDSOp> lds_ops;
463
464 struct KCacheLine {
465 int bank{0};
466 int addr{0};
467 int len{0};
468 enum KCacheLockMode {
469 free,
470 lock_1,
471 lock_2
472 } mode{free};
473 };
474
475
476 }
477
478 #endif // ALU_DEFINES_H
479