Lines Matching refs:v2b

127    //! v2b: %res5 = v_add_f16 %a, %b *0.5
129 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
130 writeout(5, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x3800u), tmp));
132 //! v2b: %res6 = v_add_f16 %a, %b *2
134 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
135 writeout(6, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp));
137 //! v2b: %res7 = v_add_f16 %a, %b *4
139 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
140 writeout(7, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4400u), tmp));
142 //! v2b: %res8 = v_add_f16 %a, %b clamp
144 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
145 writeout(8, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u),
148 //! v2b: %res9 = v_add_f16 %a, %b *2 clamp
150 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
151 tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000), tmp);
152 writeout(9, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u),
215 //! v2b: %res16_tmp = v_add_f16 %a, %b
216 //! v2b: %res16 = v_mul_f16 2.0, %res15_tmp
218 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
219 writeout(16, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp));
221 //! v2b: %res17 = v_add_f16 %a, %b clamp
223 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
224 writeout(17, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u),
253 //! v2b: %res20_tmp = v_add_f16 %a, %b
254 //! v2b: %res20 = v_mul_f16 2.0, %res20_tmp
256 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
257 writeout(20, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp));
258 //! v2b: %res21 = v_add_f16 %a, %b clamp
260 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]);
261 writeout(21, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u),
642 bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[1], Operand::zero());
1095 //>> v1: %a, v2b: %a16 = p_startpgm
1096 if (!setup_cs("v1 v2b", GFX10_3))
1105 //! v2b: %res0 = v_mul_f16 %res0_tmp, %a16
1109 //! v2b: %res1_tmp = v_mul_f16 -1.0, %a16
1115 //! v2b: %res2 = v_mul_f16 %res2_tmp, %a16
1120 //! v2b: %res3 = v_med3_f16 0, 1.0, %res3_tmp
1124 //! v2b: %res4_tmp = v_mul_f16 %a16, %a16
1130 //! v2b: %res5 = v_mul_f16 2.0, %res5_tmp
1132 writeout(5, fmul(u2u16(fmul(a, a)), bld.copy(bld.def(v2b), Operand::c16(0x4000))));
1134 //! v2b: %res6_tmp = v_mul_f16 %a16, %a16
1140 //! v2b: %res7 = v_add_f16 %res7_tmp, %a16
1144 //! v2b: %res8_tmp = v_mul_f16 %a16, %a16
1150 //! v2b: %res9 = v_mul_f16 -1.0, %res9_tmp
1154 //! v2b: %res10_tmp = v_mul_f16 %a16, %a16
1164 //>> v1: %a, v2b: %a16 = p_startpgm
1165 if (!setup_cs("v1 v2b", (amd_gfx_level)i))
1199 //>> v1: %a, v2b: %a16 = p_startpgm
1200 if (!setup_cs("v1 v2b", (amd_gfx_level)i))
1215 //! v2b: %res1_cvt = v_cvt_f16_f32 %a
1216 //! v2b: (precise)%res1 = v_mul_f16 %a16, %res1_cvt
1220 //! v2b: %res2_cvt = v_cvt_f16_f32 %a
1221 //! v2b: (precise)%res2 = v_add_f16 %a16, %res2_cvt
1225 //! v2b: %res3_cvt = v_cvt_f16_f32 %a
1226 //! v2b: (precise)%res3 = v_fma_f16 %a16, %a16, %res3_cvt
1231 //! v2b: (precise)%res4_cvt = v_cvt_f16_f32 %a
1232 //! v2b: %res4 = v_mul_f16 %a16, %res4_cvt
1236 //! v2b: (precise)%res5_cvt = v_cvt_f16_f32 %a
1237 //! v2b: %res5 = v_add_f16 %a16, %res5_cvt
1241 //! v2b: (precise)%res6_cvt = v_cvt_f16_f32 %a
1242 //! v2b: %res6 = v_fma_f16 %a16, %a16, %res6_cvt
1252 //>> v1: %a, v2b: %a16 = p_startpgm
1253 if (!setup_cs("v1 v2b", (amd_gfx_level)i))
1347 //>> v1: %a, v1: %b, v1: %c, v2b: %a16, v2b: %b16 = p_startpgm
1348 if (!setup_cs("v1 v1 v1 v2b v2b", (amd_gfx_level)i))
1359 //! v2b: %res0 = v_fma_mixlo_f16 %a, %b, -0
1363 //! v2b: %res1 = v_fma_mixlo_f16 1.0, %a, %b
1367 //! v2b: %res2 = v_fma_mixlo_f16 %a, %b, %c
1371 //! v2b: %res3 = v_fma_mixlo_f16 lo(%a16), %b, -0
1375 //! v2b: %res4 = v_fma_mixlo_f16 1.0, %a, lo(%b16)
1379 //! v2b: %res5 = v_fma_mixlo_f16 %a, lo(%b16), %c
1389 //>> v2b: %a16 = p_startpgm
1390 if (!setup_cs("v2b", (amd_gfx_level)i))
1397 //! v2b: %res0_tmp = v_mul_f16 %a16, %a16
1402 //! v2b: (precise)%res1_tmp = v_mul_f16 %a16, %a16
1413 //>> v1: %a, v1: %b, v2b: %a16, v2b: %b16 = p_startpgm
1414 if (!setup_cs("v1 v1 v2b v2b", (amd_gfx_level)i))
1426 //! v2b: %res0 = v_cvt_f16_f32 |%res0_add|
1431 //! v2b: %res1 = v_cvt_f16_f32 -%res1_add
1435 //! v2b: %res2_add = v_add_f16 %3, %4
1440 //! v2b: %res3_add = v_add_f16 %3, %4
1446 //! v2b: %res4_add = v_fma_mixlo_f16 1.0, %a, %b
1447 //! v2b: %res4 = p_extract %res4_add, 0, 8, 0
1452 //! v2b: %res5 = v_cvt_f16_f32 %res5_mul
1462 //>> v1: %a, v1: %b, v1: %c, v2b: %a16, v2b: %c16 = p_startpgm
1463 if (!setup_cs("v1 v1 v1 v2b v2b", (amd_gfx_level)i))
1506 //! v2b: %res7 = v_fma_mixlo_f16 %a, %b, %c
1516 //>> v1: %a, v1: %b, v1: %c, v2b: %a16, v2b: %b16 = p_startpgm
1517 if (!setup_cs("v1 v1 v1 v2b v2b", (amd_gfx_level)i))
1542 //! v2b: %res2_tmp = v_cvt_f16_f32 %a
1543 //! v2b: %res2 = v_add_f16 %res2_tmp, %b16
1547 //! v2b: %res3_tmp = v_cvt_f16_f32 %a
1548 //! v2b: %res3 = v_mul_f16 %res3_tmp, %b16
1552 //! v2b: %res4_tmp = v_mul_f16 %a16, %b16
1557 //! v2b: %res5_tmp = v_add_f16 %a16, %b16
1562 //! v2b: %res6_tmp = v_fma_mixlo_f16 %a, %b, -0
1563 //! v2b: %res6 = v_add_f16 %res6_tmp, %a16
1567 //! v2b: %res7_tmp = v_mul_f16 %a16, %b16
1578 //>> v1: %a, v2b: %a16 = p_startpgm
1579 if (!setup_cs("v1 v2b", (amd_gfx_level)i))
1591 //! v2b: %res1 = v_fma_mixlo_f16 %a, %a, -0 clamp
1595 //! v2b: %res2 = v_fma_mixlo_f16 %a, %a, -0 clamp
1605 //>> v1: %a, v2b: %a16 = p_startpgm
1606 if (!setup_cs("v1 v2b", (amd_gfx_level)i))
1614 /* The optimizer copy-propagates v2b=p_extract_vector(v1, 0) and p_as_uniform, so the
1619 //! v2b: %res0 = v_mul_f16 %res0_cvt, %a16
1623 //! v2b: %res1_cvt = v_cvt_f16_f32 %a
1628 //! v2b: %res2_mul = v_mul_f16 %a16, %a16
1629 //! v2b: %res2 = v_cvt_f16_f32 %res2_mul
1639 //! v2b: %res4 = v_med3_f16 0, 1.0, %res4_mul
1643 //! v2b: %res5_mul = v_fma_mixlo_f16 %a, %a, -0
1653 //! v2b: %res7_mul = v_mul_f16 %a16, %a16