Lines Matching refs:v1
31 //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
32 if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
37 SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
42 sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
52 //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
53 if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
57 //~gfx(7|11)! SDWA is GFX8 to GFX10.3 only: v1: %t0 = v_mul_f32 %a, %b dst_sel:dword src0_sel:dword src1_sel:dword
60 bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
68 //>> v1: %vgpr0, v1: %vgp1, s1: %sgpr0, s1: %sgpr1 = p_startpgm
69 if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
73 //~gfx8! Wrong source position for SGPR argument: v1: %_ = v_mul_f32 %sgpr0, %vgpr1 dst_sel:dword src0_sel:dword src1_sel:dword
74 //~gfx8! Wrong source position for SGPR argument: v1: %_ = v_mul_f32 %vgpr0, %sgpr1 dst_sel:dword src0_sel:dword src1_sel:dword
75 bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[2], inputs[1]);
76 bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[3]);
78 //~gfx8! Wrong source position for constant argument: v1: %_ = v_mul_f32 4, %vgpr1 dst_sel:dword src0_sel:dword src1_sel:dword
79 //~gfx8! Wrong source position for constant argument: v1: %_ = v_mul_f32 %vgpr0, 4 dst_sel:dword src0_sel:dword src1_sel:dword
80 bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(4u), inputs[1]);
81 bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand::c32(4u));
83 //! Literal applied on wrong instruction format: v1: %_ = v_mul_f32 0x1234, %vgpr1 dst_sel:dword src0_sel:dword src1_sel:dword
84 //! Literal applied on wrong instruction format: v1: %_ = v_mul_f32 %vgpr0, 0x1234 dst_sel:dword src0_sel:dword src1_sel:dword
85 //! Wrong source position for Literal argument: v1: %_ = v_mul_f32 %vgpr0, 0x1234 dst_sel:dword src0_sel:dword src1_sel:dword
86 bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x1234u), inputs[1]);
87 bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand::c32(0x1234u));
97 //>> v1: %vgpr0, v1: %vgp1, s1: %sgpr0, s1: %sgpr1 = p_startpgm
98 if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
118 //>> v1: %vgpr0, v1: %vgp1, s1: %sgpr0, s1: %sgpr1 = p_startpgm
119 if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
123 //~gfx8! SDWA omod only supported on GFX9+: v1: %_ = v_mul_f32 %vgpr0, %vgpr1 *2 dst_sel:dword src0_sel:dword src1_sel:dword
126 bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa().omod = 1;
134 //>> v1: %vgpr0, v1: %vgpr1, s2: %sgpr0 = p_startpgm
135 if (!setup_cs("v1 v1 s2", (amd_gfx_level)i))
139 //! 3rd operand must be fixed to vcc with SDWA: v1: %_ = v_cndmask_b32 %vgpr0, %vgpr1, %_ dst_sel:dword src0_sel:dword src1_sel:dword
140 bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], inputs[2]);
141 bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], bld.vcc(inputs[2]));
143 //! 2nd definition must be fixed to vcc with SDWA: v1: %_, s2: %_ = v_add_co_u32 %vgpr0, %vgpr1 dst_sel:dword src0_sel:dword src1_sel:dword
144 bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm), inputs[0], inputs[1]);
145 bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm, vcc), inputs[0], inputs[1]);
156 //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
157 if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
161 //; res = 'v1: %%res%s = v_mul_f32 %%a, %%b dst_sel:dword src0_sel:dword src1_sel:%c%s\n' % (index, 's' if variant.endswith('_signed') else 'u', sel)
171 Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
173 writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b));
176 Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
178 writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b));
181 Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
183 writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b));
186 Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
188 writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b));
191 Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
193 writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b));
196 Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
198 writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b));
201 Temp bfi_byte0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u));
202 writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b));
206 bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u));
207 writeout(7, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_word0_b));
214 //! v1: %tmp8 = p_insert %b, 1, 8
215 //! v1: %res8 = v_mul_f32 %a, %tmp8
218 bld.pseudo(ins, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u));
219 writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte1_b));
222 //~gfx7_signed! v1: %bfe_byte0_b = p_extract %b, 0, 8, 1
223 //~gfx7_signed! v1: %res9 = v_cvt_f32_u32 %bfe_byte0_b
224 //~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte0
225 //~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b
227 Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
229 writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b));
231 //~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1
232 //~gfx7_signed! v1: %res10 = v_cvt_f32_u32 %bfe_byte1_b
233 //~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte1
234 //~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b
236 Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
238 writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b));
240 //~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1
241 //~gfx7_signed! v1: %res11 = v_cvt_f32_u32 %bfe_byte2_b
242 //~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte2
243 //~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b
245 Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
247 writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b));
249 //~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1
250 //~gfx7_signed! v1: %res12 = v_cvt_f32_u32 %bfe_byte3_b
251 //~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte3
252 //~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b
254 Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
256 writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
259 //~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
261 Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
263 writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
265 //~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
267 Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
269 writeout(14, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word1_b));
279 //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
280 if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
285 //! v1: %res0 = v_mul_f32 %a, -%b dst_sel:dword src0_sel:dword src1_sel:ubyte0
287 Temp byte0 = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
290 writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_byte0));
292 //~gfx8! v1: %neg = v_mul_f32 -1.0, %b
293 //~gfx8! v1: %res1 = v_mul_f32 %a, %neg dst_sel:dword src0_sel:dword src1_sel:ubyte0
294 //~gfx(9|10)! v1: %neg_byte0 = v_mul_f32 -1.0, %b dst_sel:ubyte0 src0_sel:dword src1_sel:dword
295 //~gfx(9|10)! v1: %res1 = v_mul_f32 %a, %neg_byte0
299 bld.pseudo(ext, bld.def(v1), neg, Operand::zero(), Operand::c32(8u), Operand::zero());
300 writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_neg));
302 //! v1: %res2 = v_mul_f32 %a, |%b| dst_sel:dword src0_sel:dword src1_sel:ubyte0
305 writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], abs_byte0));
307 //! v1: %abs = v_mul_f32 1.0, |%b|
308 //! v1: %res3 = v_mul_f32 %a, %abs dst_sel:dword src0_sel:dword src1_sel:ubyte0
312 bld.pseudo(ext, bld.def(v1), abs, Operand::zero(), Operand::c32(8u), Operand::zero());
313 writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_abs));
315 //! v1: %res4 = v_mul_f32 %1, -|%2| dst_sel:dword src0_sel:dword src1_sel:ubyte0
318 writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_abs_byte0));
320 //~gfx8! v1: %neg_abs = v_mul_f32 -1.0, %abs
321 //~gfx8! v1: %res5 = v_mul_f32 %a, %neg_abs dst_sel:dword src0_sel:dword src1_sel:ubyte0
322 //~gfx(9|10)! v1: %neg_abs_byte0 = v_mul_f32 -1.0, %abs dst_sel:ubyte0 src0_sel:dword src1_sel:dword
323 //~gfx(9|10)! v1: %res5 = v_mul_f32 %a, %neg_abs_byte0
327 bld.pseudo(ext, bld.def(v1), neg_abs, Operand::zero(), Operand::c32(8u), Operand::zero());
328 writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_neg_abs));
336 //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
337 if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
342 //~gfx8! v1: %byte0_b = p_extract %b, 0, 8, 0
343 //~gfx8! v1: %res1 = v_mul_f32 %c, %byte0_b
344 //~gfx(9|10)! v1: %res1 = v_mul_f32 %c, %b dst_sel:dword src0_sel:dword src1_sel:ubyte0
346 Temp byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
348 writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[2], byte0_b));
350 //~gfx8! v1: %byte0_c = p_extract %c, 0, 8, 0
351 //~gfx8! v1: %res2 = v_mul_f32 %a, %byte0_c
352 //~gfx(9|10)! v1: %res2 = v_mul_f32 %a, %c dst_sel:dword src0_sel:dword src1_sel:ubyte0
354 Temp byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand::zero(), Operand::c32(8u),
356 writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_c));
358 //~gfx8! v1: %byte0_c_2 = p_extract %c, 0, 8, 0
359 //~gfx8! v1: %res3 = v_mul_f32 %c, %byte0_c_2
360 //~gfx(9|10)! v1: %res3 = v_mul_f32 %c, %c dst_sel:dword src0_sel:dword src1_sel:ubyte0
362 byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand::zero(), Operand::c32(8u),
364 writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[2], byte0_c));
366 //~gfx(8|9)! v1: %byte0_c_3 = p_extract %c, 0, 8, 0
367 //~gfx(8|9)! v1: %res4 = v_mul_f32 %d, %byte0_c_3
368 //~gfx10! v1: %res4 = v_mul_f32 %d, %c dst_sel:dword src0_sel:dword src1_sel:ubyte0
370 byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand::zero(), Operand::c32(8u),
372 writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[3], byte0_c));
380 //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
381 if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
384 //! v1: %res0 = v_mul_f32 -|%a|, %b dst_sel:dword src0_sel:dword src1_sel:ubyte0
386 Temp byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
388 VOP3_instruction *mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b).instr->vop3();
393 //~gfx8! v1: %byte0_b_0 = p_extract %b, 0, 8, 0
394 //~gfx8! v1: %res1 = v_mul_f32 %a, %byte0_b_0 *4
395 //~gfx(9|10)! v1: %res1 = v_mul_f32 %a, %b *4 dst_sel:dword src0_sel:dword src1_sel:ubyte0
397 byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
399 mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b).instr->vop3();
403 //~gfx8! v1: %byte0_b_1 = p_extract %b, 0, 8, 0
404 //~gfx8! v1: %res2 = v_mul_f32 %byte0_b_1, %c
405 //~gfx(9|10)! v1: %res2 = v_mul_f32 %b, %c dst_sel:dword src0_sel:ubyte0 src1_sel:dword
407 byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
409 writeout(2, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), byte0_b, inputs[2]));
412 //~gfx10! v1: %byte0_b_2 = p_extract %b, 0, 8, 0
413 //~gfx10! v1: %res3 = v_mul_f32 %byte0_b_2, 0x1234
415 byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
418 bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), byte0_b, Operand::c32(0x1234u)));
427 //>> v1: %a, v1: %b = p_startpgm
428 if (!setup_cs("v1 v1", (amd_gfx_level)i))
434 //~gfx[^7]! v1: %res0 = v_mul_f32 %a, %b dst_sel:ubyte0 src0_sel:dword src1_sel:dword
436 Temp val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
437 writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
439 //~gfx[^7]! v1: %res1 = v_mul_f32 %a, %b dst_sel:ubyte1 src0_sel:dword src1_sel:dword
441 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
442 writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(8u)));
444 //~gfx[^7]! v1: %res2 = v_mul_f32 %a, %b dst_sel:ubyte2 src0_sel:dword src1_sel:dword
446 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
447 writeout(2, bld.pseudo(ins, bld.def(v1), val, Operand::c32(2u), Operand::c32(8u)));
449 //~gfx[^7]! v1: %res3 = v_mul_f32 %a, %b dst_sel:ubyte3 src0_sel:dword src1_sel:dword
451 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
452 writeout(3, bld.pseudo(ins, bld.def(v1), val, Operand::c32(3u), Operand::c32(8u)));
454 //~gfx[^7]! v1: %res4 = v_mul_f32 %a, %b dst_sel:uword0 src0_sel:dword src1_sel:dword
456 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
457 writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u)));
459 //~gfx[^7]! v1: %res5 = v_mul_f32 %a, %b dst_sel:uword1 src0_sel:dword src1_sel:dword
461 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
462 writeout(5, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));
464 //~gfx[^7]! v1: %res6 = v_mul_f32 %a, %b dst_sel:ubyte0 src0_sel:dword src1_sel:dword
466 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
468 6, bld.pseudo(ext, bld.def(v1), val, Operand::zero(), Operand::c32(8u), Operand::zero()));
470 //~gfx[^7]! v1: %res7 = v_mul_f32 %a, %b dst_sel:uword0 src0_sel:dword src1_sel:dword
472 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
474 7, bld.pseudo(ext, bld.def(v1), val, Operand::zero(), Operand::c32(16u), Operand::zero()));
476 //~gfx[^7]! v1: %tmp8 = v_mul_f32 %a, %b
477 //~gfx[^7]! v1: %res8 = p_extract %tmp8, 2, 8, 0
479 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
481 8, bld.pseudo(ext, bld.def(v1), val, Operand::c32(2u), Operand::c32(8u), Operand::zero()));
483 //~gfx[^7]! v1: %tmp9 = v_mul_f32 %a, %b
484 //~gfx[^7]! v1: %res9 = p_extract %tmp9, 0, 8, 1
486 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
488 9, bld.pseudo(ext, bld.def(v1), val, Operand::zero(), Operand::c32(8u), Operand::c32(1u)));
493 //! v1: %res10 = v_mul_f32 %a, %b
495 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]);
496 bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u));
499 //~gfx[^7]! v1: %tmp11 = v_sub_i16 %a, %b
500 //~gfx[^7]! v1: %res11 = p_insert %tmp11, 0, 16
502 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
503 writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u)));
505 //~gfx[^7]! v1: %tmp12 = v_sub_i16 %a, %b
506 //~gfx[^7]! v1: %res12 = p_insert %tmp12, 1, 16
508 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
509 writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));
511 //~gfx[^7]! v1: %tmp13 = v_sub_i16 %a, %b
512 //~gfx[^7]! v1: %res13 = p_insert %tmp13, 0, 8
514 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
515 writeout(13, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
523 //>> v1: %a = p_startpgm
524 if (!setup_cs("v1", (amd_gfx_level)i))
529 //~gfx8! v1: %tmp0 = v_rcp_f32 %a *2
530 //~gfx8! v1: %res0 = p_insert %tmp0, 0, 8
531 //~gfx9! v1: %res0 = v_rcp_f32 %a *2 dst_sel:ubyte0 src0_sel:dword
533 Temp val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
534 val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand::c32(0x40000000u));
535 writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
537 //! v1: %res1 = v_rcp_f32 %a clamp dst_sel:ubyte0 src0_sel:dword
539 val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
540 val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand::zero(),
542 writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));
544 //! v1: %tmp2 = v_rcp_f32 %a dst_sel:ubyte0 src0_sel:dword
545 //! v1: %res2 = v_mul_f32 %tmp2, 2.0
547 val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
548 val = bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u));
549 val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand::c32(0x40000000u));
552 //! v1: %tmp3 = v_rcp_f32 %a dst_sel:ubyte0 src0_sel:dword
553 //! v1: %res3 = v_med3_f32 %tmp3, 0, 1.0
555 val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
556 val = bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u));
557 val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand::zero(),
561 //~gfx8! v1: %tmp4 = v_rcp_f32 %a *2 clamp
562 //~gfx8! v1: %res4 = p_insert %tmp4, 0, 8
563 //~gfx9! v1: %res4 = v_rcp_f32 %a *2 clamp dst_sel:ubyte0 src0_sel:dword
565 val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]);
566 val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand::c32(0x40000000u));
567 val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand::zero(),
569 writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)));