Lines Matching defs:def

129 get_ssa_temp(struct isel_context* ctx, nir_ssa_def* def)
131 uint32_t id = ctx->first_temp_id + def->index;
153 bld.pseudo(aco_opcode::p_split_vector, bld.def(rc), bld.def(rc), mask);
154 mask_lo = Operand(mask_split.def(0).getTemp());
155 mask_hi = Operand(mask_split.def(1).getTemp());
161 Temp mbcnt_lo = bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, bld.def(v1), mask_lo, base);
191 return bld.readlane(bld.def(s1), data, index);
200 return bld.pseudo(aco_opcode::p_bpermute, bld.def(v1), bld.def(bld.lm), bld.def(bld.lm, vcc),
206 bld.vopc(aco_opcode::v_cmp_ge_u32, bld.def(bld.lm), Operand::c32(31u), index);
208 bld.pseudo(aco_opcode::p_split_vector, bld.def(s1), bld.def(s1), index_is_lo);
209 Temp index_is_lo_n1 = bld.sop1(aco_opcode::s_not_b32, bld.def(s1), bld.def(s1, scc),
210 index_is_lo_split.def(1).getTemp());
211 Operand same_half = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2),
212 index_is_lo_split.def(0).getTemp(), index_is_lo_n1);
213 Operand index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
224 return bld.pseudo(aco_opcode::p_bpermute, bld.def(v1), bld.def(s2), bld.def(s1, scc),
228 Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
229 return bld.ds(aco_opcode::ds_bpermute_b32, bld.def(v1), index_x4, data);
257 Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src);
265 return bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl);
268 return bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, mask, 0, false);
275 return bld.copy(bld.def(RegType::vgpr, val.size()), val);
330 bld.copy(bld.def(v1), Operand::c32(info.multiplier)));
364 return bld.copy(bld.def(dst_rc), it->second[idx]);
373 return bld.copy(bld.def(dst_rc), src);
450 padding = bld.copy(bld.def(dst_rc), Operand::zero(component_bytes));
485 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand::c32(3u));
487 shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.scc(Definition(select)), tmp,
492 bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), vec, shift);
495 bld.sop2(aco_opcode::s_lshr_b64, Definition(tmp), bld.def(s1, scc), vec, shift);
509 hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(s1), hi, Operand::zero());
513 bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), hi, Operand::zero(), bld.scc(select));
514 lo = bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), lo, shift);
516 lo = bld.pseudo(aco_opcode::p_split_vector, bld.def(s1), Definition(mid), lo);
517 hi = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), hi, shift);
518 mid = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), hi, mid);
544 tmp[i] = bld.vop3(aco_opcode::v_alignbyte_b32, bld.def(v1), tmp[i + 1], tmp[i], offset);
548 vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), tmp[0], tmp[1]);
582 vec = bld.pseudo(aco_opcode::p_as_uniform, bld.def(RegClass(RegType::sgpr, vec.size())), vec);
593 get_ssa_temp_tex(struct isel_context* ctx, nir_ssa_def* def, bool is_16bit)
595 RegClass rc = RegClass::get(RegType::vgpr, (is_16bit ? 2 : 4) * def->num_components);
596 Temp tmp = get_ssa_temp(ctx, def);
628 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.scc(Definition(dst)), val, Operand(exec, bld.lm));
673 bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), src, Operand::zero(),
684 bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), tmp, Operand::c32(31u));
687 Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), tmp);
722 bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), Operand(vec),
808 return bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), it->second[index],
836 return bld.pseudo(aco_opcode::p_create_vector, bld.def(RegClass(ptr.type(), 2)), ptr,
901 Temp tmp = bld.vop2(opc, bld.def(v1), op[0], op[1]);
932 Temp lo = bld.vop2(op, bld.def(v1), src00, src10);
933 Temp hi = bld.vop2(op, bld.def(v1), src01, src11);
957 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1], src[2]);
959 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1]);
1018 bld.vop1(op, bld.def(RegType::vgpr, dst.size()), get_alu_src(ctx, instr->src[0])));
1080 Temp cmp = bld.sopc(op, bld.scc(bld.def(s1)), src0, src1);
1121 bld.sop2(op, Definition(dst), bld.def(s1, scc), src0, src1);
1147 Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, cond);
1148 Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, cond);
1183 then = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), cond, then);
1188 bld.sop2(Builder::s_or, Definition(dst), bld.def(s1, scc), then,
1189 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), els, cond));
1197 Temp is_denormal = bld.vopc(aco_opcode::v_cmp_class_f32, bld.def(bld.lm), as_vgpr(ctx, val),
1198 bld.copy(bld.def(v1), Operand::c32((1u << 7) | (1u << 4))));
1199 Temp scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x4b800000u), val);
1200 scaled = bld.vop1(op, bld.def(v1), scaled);
1201 scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(undo), scaled);
1203 Temp not_scaled = bld.vop1(op, bld.def(v1), val);
1269 bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), val_hi, Operand::c32(20u), Operand::c32(11u));
1270 exponent = bld.vsub32(bld.def(v1), exponent, Operand::c32(1023u));
1273 Temp fract_mask = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
1275 fract_mask = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), fract_mask, exponent);
1282 Temp tmp = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), fract_mask_lo);
1283 fract_lo = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), val_lo, tmp);
1284 tmp = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), fract_mask_hi);
1285 fract_hi = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), val_hi, tmp);
1288 Temp sign = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x80000000u), val_hi);
1292 bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.def(bld.lm), exponent, Operand::zero());
1293 Temp dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_lo,
1294 bld.copy(bld.def(v1), Operand::zero()), exp_lt0);
1295 Temp dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_hi, sign, exp_lt0);
1296 Temp exp_gt51 = bld.vopc_e64(aco_opcode::v_cmp_gt_i32, bld.def(s2), exponent, Operand::c32(51u));
1297 dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), dst_lo, val_lo, exp_gt51);
1298 dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), dst_hi, val_hi, exp_gt51);
1313 Temp mask = bld.copy(bld.def(s1), Operand::c32(3u)); /* isnan */
1314 Temp min_val = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::c32(-1u),
1317 Temp isnan = bld.vopc_e64(aco_opcode::v_cmp_class_f64, bld.def(bld.lm), src0, mask);
1318 Temp fract = bld.vop1(aco_opcode::v_fract_f64, bld.def(v2), src0);
1319 Temp min = bld.vop3(aco_opcode::v_min_f64, bld.def(v2), fract, min_val);
1326 Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, isnan);
1327 Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, isnan);
1329 Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
1341 Builder::Result add = bld.vadd32(bld.def(v1), src0, src1, true);
1342 return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand::c32(-1),
1343 add.def(1).getTemp());
1350 add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.def(bld.lm), src0, src1);
1360 Builder::Result sub = bld.vsub32(bld.def(v1), src0, src1, true);
1361 return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, sub.def(0).getTemp(), Operand::c32(0u),
1362 sub.def(1).getTemp());
1369 sub = bld.vop2_e64(aco_opcode::v_sub_co_u32, dst, bld.def(bld.lm), src0, src1);
1411 Temp mask = bld.copy(bld.def(s1), Operand::c32((1u << instr->dest.dest.ssa.bit_size) - 1));
1426 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), elems[i], mask);
1429 elems[i] = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), elems[i],
1433 packed[idx] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), elems[i],
1444 packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), packed[i * 2],
1447 packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1),
1450 packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), packed[i * 2],
1462 packed[i] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc),
1465 packed[i] = bld.copy(bld.def(s1), Operand::c32(const_vals[i]));
1497 lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), lo);
1498 hi = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), hi);
1502 bld.sop1(opcode, Definition(dst), bld.def(s1, scc), src);
1522 bld.sop1(aco_opcode::s_abs_i32, Definition(dst), bld.def(s1, scc), src);
1525 bld.vsub32(bld.def(v1), Operand::zero(), src));
1543 bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(-1));
1544 bld.sop2(aco_opcode::s_min_i32, Definition(dst), bld.def(s1, scc), tmp, Operand::c32(1u));
1547 bld.sop2(aco_opcode::s_ashr_i64, bld.def(s2), bld.def(s1, scc), src, Operand::c32(63u));
1550 neqz = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), src, Operand::zero());
1553 bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), src, Operand::zero())
1554 .def(1)
1557 bld.sop2(aco_opcode::s_or_b64, Definition(dst), bld.def(s1, scc), neg, bld.scc(neqz));
1568 Temp neg = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), upper);
1569 Temp gtz = bld.vopc(aco_opcode::v_cmp_ge_i64, bld.def(bld.lm), Operand::zero(), src);
1570 Temp lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(1u), neg, gtz);
1571 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), neg, gtz);
1782 Temp msb_rev = bld.sop1(op, bld.def(s1), src);
1784 Builder::Result sub = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
1786 Temp msb = sub.def(0).getTemp();
1787 Temp carry = sub.def(1).getTemp();
1798 bld.vsub32(Definition(msb), Operand::c32(31u), Operand(msb_rev), true).def(1).getTemp();
1807 lo = uadd32_sat(bld, bld.def(v1), bld.copy(bld.def(s1), Operand::c32(32u)),
1808 bld.vop1(op, bld.def(v1), lo));
1809 hi = bld.vop1(op, bld.def(v1), hi);
1810 Temp found_hi = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::c32(-1), hi);
1812 Temp msb_rev = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lo, hi, found_hi);
1816 bld.vsub32(Definition(msb), Operand::c32(63u), Operand(msb_rev), true).def(1).getTemp();
1826 Temp msb_rev = bld.sop1(aco_opcode::s_flbit_i32_b32, bld.def(s1), src);
1829 Temp msb_rev = bld.vop1(aco_opcode::v_ffbh_u32, bld.def(v1), src);
1879 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), src00, src10);
1880 Temp dst1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), src01, src11,
1885 Temp carry = bld.vadd32(Definition(dst0), src00, src10, true).def(1).getTemp();
1886 Temp dst1 = bld.vadd32(bld.def(v1), src01, src11, false, carry);
1939 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry0)), src00, src10);
1940 Temp no_sat1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.scc(Definition(carry1)),
1943 Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1);
1952 Temp carry0 = bld.vadd32(Definition(no_sat0), src00, src10, true).def(1).getTemp();
1963 carry1 = bld.vadd32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp();
1986 Temp cond = bld.sopc(aco_opcode::s_cmp_lt_i32, bld.def(s1, scc), src1, Operand::zero());
1987 Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)),
1991 bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.scc(Definition(overflow)), src0, src1);
2015 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(dst)), src0, src1);
2019 Temp carry = bld.vadd32(bld.def(v1), src0, src1, true).def(1).getTemp();
2033 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), src00, src10);
2034 carry = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.scc(bld.def(s1)), src01, src11,
2036 .def(1)
2040 Temp carry = bld.vadd32(bld.def(v1), src00, src10, true).def(1).getTemp();
2041 carry = bld.vadd32(bld.def(v1), src01, src11, true, carry).def(1).getTemp();
2042 carry = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
2085 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(borrow)), src00, src10);
2086 Temp dst1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.def(s1, scc), src01, src11,
2091 Temp borrow = bld.vsub32(Definition(lower), src00, src10, true).def(1).getTemp();
2092 Temp upper = bld.vsub32(bld.def(v1), src01, src11, false, borrow);
2103 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(dst)), src0, src1);
2106 Temp borrow = bld.vsub32(bld.def(v1), src0, src1, true).def(1).getTemp();
2120 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(borrow)), src00, src10);
2121 borrow = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.scc(bld.def(s1)), src01, src11,
2123 .def(1)
2127 Temp borrow = bld.vsub32(bld.def(v1), src00, src10, true).def(1).getTemp();
2128 borrow = bld.vsub32(bld.def(v1), src01, src11, true, Operand(borrow)).def(1).getTemp();
2129 borrow = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
2182 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(carry0)), src00, src10);
2183 Temp no_sat1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.scc(Definition(carry1)),
2186 Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1);
2195 Temp carry0 = bld.vsub32(Definition(no_sat0), src00, src10, true).def(1).getTemp();
2206 carry1 = bld.vsub32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp();
2228 Temp cond = bld.sopc(aco_opcode::s_cmp_gt_i32, bld.def(s1, scc), src1, Operand::zero());
2229 Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)),
2233 bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.scc(Definition(overflow)), src0, src1);
2311 Temp tmp = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), get_alu_src(ctx, instr->src[0]),
2492 Temp ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), src[0], src[1], src[2]);
2493 ma = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), ma);
2494 Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
2495 Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
2496 sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/),
2497 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma));
2498 tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/),
2499 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma));
2546 src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand::c64(0x3FF0000000000000),
2550 upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand::c32(0x80000000u), upper);
2581 src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand::c64(0x3FF0000000000000),
2585 upper = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7FFFFFFFu), upper);
2708 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src0);
2710 bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, Operand::zero());
2711 Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f64, bld.def(bld.lm), src0, trunc);
2712 Temp cond = bld.sop2(aco_opcode::s_and_b64, bld.def(s2), bld.def(s1, scc), tmp0, tmp1);
2713 Temp add = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
2714 bld.copy(bld.def(v1), Operand::zero()),
2715 bld.copy(bld.def(v1), Operand::c32(0x3ff00000u)), cond);
2716 add = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2),
2717 bld.copy(bld.def(v1), Operand::zero()), add);
2752 Temp bitmask = bld.sop1(aco_opcode::s_brev_b32, bld.def(s1),
2753 bld.copy(bld.def(s1), Operand::c32(-2u)));
2755 bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask,
2756 bld.copy(bld.def(v1), Operand::c32(0x43300000u)), as_vgpr(ctx, src0_hi));
2758 bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0,
2759 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi));
2761 bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp,
2762 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi));
2766 Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
2768 Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, v);
2774 Temp dst0 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_lo,
2776 Temp dst1 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_hi,
2797 src = bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), src);
2834 Temp tmp = bld.vop1(aco_opcode::v_frexp_exp_i16_f16, bld.def(v1), src);
2835 tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), tmp, Operand::zero());
2851 src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), src);
2853 bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src, Operand::c16(1u));
2856 src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src);
2858 bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::c32(-1), src, Operand::c32(1u));
2861 Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src);
2862 Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
2863 Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp,
2866 cond = bld.vopc(aco_opcode::v_cmp_le_f64, bld.def(bld.lm), Operand::zero(), src);
2867 tmp = bld.copy(bld.def(v1), Operand::c32(0xBFF00000u));
2868 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, upper, cond);
2880 src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
2893 src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
2915 src = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
2949 src = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), src);
2969 lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower);
2970 upper = bld.vop1(aco_opcode::v_cvt_f64_i32, bld.def(v2), upper);
2971 upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u));
2972 upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper);
2989 lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower);
2990 upper = bld.vop1(aco_opcode::v_cvt_f64_i32, bld.def(v2), upper);
2991 upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u));
3024 src = bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), src);
3044 lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower);
3045 upper = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), upper);
3046 upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u));
3047 upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper);
3063 lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower);
3064 upper = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), upper);
3065 upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u));
3081 tmp = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), tmp);
3104 tmp = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), tmp);
3121 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3126 bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), tmp));
3140 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3145 bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), tmp));
3159 src = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3162 Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3163 exponent = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::zero(), exponent,
3165 Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3166 Temp sign = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), src);
3167 mantissa = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(0x800000u), mantissa);
3168 mantissa = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(7u), mantissa);
3169 mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), mantissa);
3172 bld.vsub32(Definition(new_exponent), Operand::c32(63u), exponent, true).def(1).getTemp();
3174 mantissa = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), new_exponent, mantissa);
3176 mantissa = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), mantissa, new_exponent);
3177 Temp saturate = bld.vop1(aco_opcode::v_bfrev_b32, bld.def(v1), Operand::c32(0xfffffffeu));
3180 lower = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), lower,
3182 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), upper, saturate, borrow);
3183 lower = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), sign, lower);
3184 upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), sign, upper);
3186 borrow = bld.vsub32(Definition(new_lower), lower, sign, true).def(1).getTemp();
3187 Temp new_upper = bld.vsub32(bld.def(v1), upper, sign, false, borrow);
3193 Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3195 exponent = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent,
3197 exponent = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand::zero(),
3199 exponent = bld.sop2(aco_opcode::s_min_i32, bld.def(s1), bld.def(s1, scc),
3201 Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3204 bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(31u));
3205 mantissa = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc),
3207 mantissa = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), mantissa,
3209 mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), mantissa);
3210 exponent = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3213 bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), mantissa, exponent);
3214 Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), exponent,
3216 Temp saturate = bld.sop1(aco_opcode::s_brev_b64, bld.def(s2), Operand::c32(0xfffffffeu));
3217 mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), saturate, mantissa, cond);
3220 lower = bld.sop2(aco_opcode::s_xor_b32, bld.def(s1), bld.def(s1, scc), sign, lower);
3221 upper = bld.sop2(aco_opcode::s_xor_b32, bld.def(s1), bld.def(s1, scc), sign, upper);
3224 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(borrow)), lower, sign);
3225 upper = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.def(s1, scc), upper, sign,
3230 Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3232 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3233 Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec);
3234 vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3236 Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul);
3237 Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc);
3238 Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma);
3239 Temp upper = bld.vop1(aco_opcode::v_cvt_i32_f64, bld.def(v1), floor);
3254 src = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3257 Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3259 bld.vopc(aco_opcode::v_cmp_ge_i32, bld.def(bld.lm), Operand::c32(64u), exponent);
3260 exponent = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::zero(), exponent);
3261 Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3262 mantissa = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(0x800000u), mantissa);
3263 Temp exponent_small = bld.vsub32(bld.def(v1), Operand::c32(24u), exponent);
3264 Temp small = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), exponent_small, mantissa);
3265 mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), mantissa);
3268 bld.vsub32(Definition(new_exponent), exponent, Operand::c32(24u), true).def(1).getTemp();
3270 mantissa = bld.vop3(aco_opcode::v_lshlrev_b64, bld.def(v2), new_exponent, mantissa);
3272 mantissa = bld.vop3(aco_opcode::v_lshl_b64, bld.def(v2), mantissa, new_exponent);
3275 lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lower, small, cond_small);
3276 upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), upper, Operand::zero(),
3278 lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0xffffffffu), lower,
3280 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0xffffffffu), upper,
3287 Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3289 exponent = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent,
3291 exponent = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand::zero(),
3293 Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3295 mantissa = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc),
3297 Temp exponent_small = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3299 Temp small = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), mantissa,
3301 mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), mantissa);
3302 Temp exponent_large = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3304 mantissa = bld.sop2(aco_opcode::s_lshl_b64, bld.def(s2), bld.def(s1, scc), mantissa,
3307 bld.sopc(aco_opcode::s_cmp_ge_i32, bld.def(s1, scc), Operand::c32(64u), exponent);
3308 mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa,
3313 bld.sopc(aco_opcode::s_cmp_le_i32, bld.def(s1, scc), exponent, Operand::c32(24u));
3314 lower = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), small, lower, cond_small);
3316 bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::zero(), upper, cond_small);
3320 Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3322 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3323 Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec);
3324 vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3326 Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul);
3327 Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc);
3328 Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma);
3329 Temp upper = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), floor);
3349 Temp one = bld.copy(bld.def(v1), Operand::c32(0x3c00u));
3380 Temp one = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
3382 bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), one, src);
3460 bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), Operand::zero(), src)
3461 .def(1)
3465 bld.scc(bld.def(s1)), Operand::zero(), src);
3485 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(dst.regClass()),
3489 bld.pseudo(aco_opcode::p_split_vector, bld.def(dst.regClass()), Definition(dst),
3494 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(dst.regClass()),
3502 bld.pseudo(aco_opcode::p_split_vector, bld.def(dst.regClass()), Definition(dst),
3505 bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc),
3518 src0 = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), src0,
3520 src1 = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), src1,
3522 bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), src0, src1);
3562 src = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src);
3576 src = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), src,
3580 bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src).def(1).getTemp();
3597 Temp f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src);
3602 bld.def(s1), Operand::c32(0x36Fu)); /* value is NOT negative/positive denormal value */
3603 cmp_res = bld.vopc_e64(aco_opcode::v_cmp_class_f16, bld.def(bld.lm), f16, mask);
3604 f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
3609 f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
3610 Temp smallest = bld.copy(bld.def(s1), Operand::c32(0x38800000u));
3611 Instruction* tmp0 = bld.vopc_e64(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), f32, smallest);
3613 Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f32, bld.def(bld.lm), Operand::zero(), f32);
3614 cmp_res = bld.sop2(aco_opcode::s_nand_b64, bld.def(s2), bld.def(s1, scc),
3620 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::zero(), as_vgpr(ctx, src));
3655 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), insert, bitmask);
3664 base = bld.sop2(aco_opcode::s_andn2_b32, bld.def(s1), bld.def(s1, scc), base, bitmask);
3668 bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), rhs, lhs);
3691 bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract));
3698 Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset);
3700 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask);
3701 bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset);
3704 : bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1),
3705 bld.def(s1, scc), bits, Operand::c32(16u));
3708 : bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3712 bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op);
3713 bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract);
3742 bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), Operand(vec),
3746 Definition def(dst);
3750 def = bld.def(src.type(), 1);
3752 assert(def.bytes() <= 4);
3753 if (def.regClass() == s1) {
3754 bld.pseudo(aco_opcode::p_extract, def, bld.def(s1, scc), Operand(src),
3757 src = emit_extract_vector(ctx, src, 0, def.regClass());
3758 bld.pseudo(aco_opcode::p_extract, def, Operand(src), Operand::c32(index),
3762 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(),
3777 Definition def(dst);
3783 def = bld.def(src.type(), 1);
3785 if (def.regClass() == s1) {
3786 bld.pseudo(aco_opcode::p_insert, def, bld.def(s1, scc), Operand(src),
3789 src = emit_extract_vector(ctx, src, 0, def.regClass());
3790 bld.pseudo(aco_opcode::p_insert, def, Operand(src), Operand::c32(index),
3795 def.getTemp());
3797 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(),
3805 bld.sop1(aco_opcode::s_bcnt1_i32_b32, Definition(dst), bld.def(s1, scc), src);
3810 bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1),
3813 bld.sop1(aco_opcode::s_bcnt1_i32_b64, Definition(dst), bld.def(s1, scc), src);
3911 Temp tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl1);
3912 tmp = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), src, tl, dpp_ctrl2);
3914 Temp tl = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl1);
3915 Temp tr = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl2);
3916 tmp = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), tr, tl);
3928 Temp dst = get_ssa_temp(ctx, &instr->def);
3933 assert(instr->def.num_components == 1 && "Vector load_const should be lowered to scalar.");
3938 if (instr->def.bit_size == 1) {
3943 } else if (instr->def.bit_size == 8) {
3945 } else if (instr->def.bit_size == 16) {
3954 if (instr->def.bit_size == 64)
4078 offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), offset_tmp,
4081 offset = bld.vadd32(bld.def(v1), offset_tmp, Operand::c32(to_add));
4089 lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), lo,
4091 hi = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), hi, carry);
4092 offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), lo, hi);
4096 bld.vadd32(Definition(new_lo), lo, Operand::c32(to_add), true).def(1).getTemp();
4097 hi = bld.vadd32(bld.def(v1), hi, Operand::zero(), false, carry);
4098 offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), new_lo, hi);
4112 aligned_offset = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
4115 aligned_offset = bld.sop2(aco_opcode::s_and_b64, bld.def(s2), bld.def(s1, scc),
4119 bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0xfffffffcu), offset_tmp);
4123 lo = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0xfffffffcu), lo);
4124 aligned_offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), lo, hi);
4128 aligned_offset.isTemp() ? aligned_offset.getTemp() : bld.copy(bld.def(s1), aligned_offset);
4205 bld.pseudo(aco_opcode::p_extract_vector, bld.def(new_rc), tmp[0], Operand::zero());
4218 for (auto& def : split->definitions) {
4221 def = Definition(component);
4267 return bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0xffffffffu)));
4274 offset = offset.regClass() == s1 ? bld.copy(bld.def(v1), offset) : offset;
4317 offset = bld.vadd32(bld.def(v1), offset, Operand::c32(excess));
4376 offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
4383 load->operands[1] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
4411 vaddr = bld.copy(bld.def(v1), soffset);
4506 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), Operand::zero(), Operand::zero(),
4508 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), addr, Operand::c32(-1u),
4521 Temp carry = bld.vadd32(Definition(dst0), src00, src1, true).def(1).getTemp();
4522 Temp dst1 = bld.vadd32(bld.def(v1), src01, Operand::zero(), false, carry);
4523 return bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
4527 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), src00, src1);
4528 Temp dst1 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), src01, carry);
4529 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), dst0, dst1);
4552 address = add64_32(bld, address, bld.copy(bld.def(s1), Operand::c32(UINT32_MAX)));
4556 offset = bld.copy(bld.def(s1), Operand::c32(excess_offset));
4566 address = add64_32(bld, address, bld.copy(bld.def(s1), Operand::c32(src2)));
4577 offset = offset.id() ? offset : bld.copy(bld.def(s1), Operand::zero());
4594 offset = bld.copy(bld.def(v1), bld.copy(bld.def(s1), Operand::zero()));
4916 address_offset = bld.vadd32(bld.def(v1), Operand::c32(base_offset), address_offset);
5032 Temp zero = bld.copy(bld.def(RegClass(reg_type, dword_size)),
5057 voffset = bld.copy(bld.def(v1), Operand::c32(excess_const_offset));
5059 voffset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc),
5062 voffset = bld.vadd32(bld.def(v1), Operand(voffset), Operand::c32(excess_const_offset));
5148 return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
5165 bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), wave_id_in_tg,
5167 return bld.vadd32(bld.def(v1), Operand(num_pre_threads), Operand(tid_in_wave));
5271 bld.vintrp(aco_opcode::v_interp_mov_f32, bld.def(v1), Operand::c32(2u) /* P0 */,
5273 interp_p1 = bld.vintrp(aco_opcode::v_interp_p1lv_f16, bld.def(v2b), coord1,
5283 Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1ll_f16, bld.def(v1), coord1,
5289 Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1,
5316 bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), get_arg(ctx, ctx->args->ac.frag_pos[3]));
5337 Temp x_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
5339 Temp y_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
5343 cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand::c32(1u), Operand(x_rate));
5344 x_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand::zero()),
5345 bld.copy(bld.def(v1), Operand::c32(4u)), cond);
5348 cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand::c32(1u), Operand(y_rate));
5349 y_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand::zero()),
5350 bld.copy(bld.def(v1), Operand::c32(1u)), cond);
5474 elems[i] = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), elems[i]);
5476 elems[i] = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), elems[i],
5513 Operand off = bld.copy(bld.def(s1), Operand::c32(desc_index * 16u));
5514 Temp list = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), vertex_buffers, off);
5525 index = bld.vadd32(bld.def(v1), start_instance, divided);
5527 index = bld.vadd32(bld.def(v1), start_instance, instance_id);
5530 index = bld.copy(bld.def(v1), start_instance);
5533 index = bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->ac.base_vertex),
5575 bld.vadd32(bld.def(v1), Operand::c32(fetch_offset / attrib_stride), fetch_index);
5581 soffset = bld.copy(bld.def(s1), Operand::c32(fetch_offset / 4096 * 4096));
5733 aco_opcode::v_interp_mov_f32, bld.def(instr->dest.ssa.bit_size == 16 ? v2b : v1),
5780 Temp tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tes_u, tes_v);
5781 tmp = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::c32(0x3f800000u /* 1.0f */), tmp);
5866 index = bld.nuw().sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
5943 offset = bld.nuw().sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
5946 offset = bld.vadd32(bld.def(v1), Operand::c32(base), offset);
5948 Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
5949 bld.pseudo(aco_opcode::p_constaddr, bld.def(s2), bld.def(s1, scc),
5973 Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, high);
5987 Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, Operand(v2b));
6041 coord = bld.copy(bld.def(v1), coord);
6061 coord = bld.copy(bld.def(v1), coord);
6136 coords[1] = bld.copy(bld.def(v1), Operand::zero());
6155 bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), rsrc_word5,
6312 tmp = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, tmp.size() + 1), tmp,
6393 if (comp.def->parent_instr->type == nir_instr_type_ssa_undef ||
6450 data = bld.pseudo(aco_opcode::p_create_vector, bld.def(is_64bit ? v4 : v2),
6534 Definition def =
6535 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
6537 mubuf->definitions[0] = def;
6547 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
6553 Definition def =
6554 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
6556 emit_mimg(bld, image_op, def, resource, Operand(s4), coords, 0, Operand(data));
6567 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
6580 Temp size_div3 = bld.vop3(aco_opcode::v_mul_hi_u32, bld.def(v1),
6581 bld.copy(bld.def(v1), Operand::c32(0xaaaaaaabu)), size);
6582 size_div3 = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
6586 stride = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), stride,
6589 Temp is12 = bld.sopc(aco_opcode::s_cmp_eq_i32, bld.def(s1, scc), stride, Operand::c32(12u));
6590 size = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), size_div3, size, bld.scc(is12));
6593 bld.sop2(aco_opcode::s_lshr_b32, Definition(shr_dst), bld.def(s1, scc), size,
6594 bld.sop1(aco_opcode::s_ff1_i32_b32, bld.def(s1), stride));
6618 std::vector<Temp> lod{bld.copy(bld.def(v1), Operand::zero())};
6646 Temp samples_log2 = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3,
6648 Temp samples = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand::c32(1u),
6650 Temp type = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3,
6660 bld.sopc(aco_opcode::s_cmp_gt_u32, bld.def(s1, scc), dword1, Operand::zero());
6664 Temp is_msaa = bld.sopc(aco_opcode::s_cmp_ge_u32, bld.def(s1, scc), type, Operand::c32(14u));
6754 data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2),
6823 Definition def =
6824 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
6826 mubuf->definitions[0] = def;
6836 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
7007 data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2),
7165 Definition def =
7166 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
7168 mubuf->definitions[0] = def;
7178 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
7278 bld.smem(opcode, bld.def(RegType::sgpr, size), base, offset), Operand::c32(0u));
7488 address = bld.vadd32(bld.def(v1), Operand::c32(offset), address);
7558 Temp comp0 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[0], comp[1]);
7559 Temp comp1 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[2], comp[3]);
7580 bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero());
7599 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand::c32(-1u),
7619 bld.copy(bld.def(s1), Operand::c32(ROUND_DOWN_TO(nir_src_as_uint(instr->src[0]), max)));
7676 saddr = bld.copy(bld.def(s1), Operand::c32(ROUND_DOWN_TO(const_offset, max)));
7700 next_vertex = bld.v_mul_imm(bld.def(v1), next_vertex, 4u);
7705 bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer,
7728 Temp stream_offset_tmp = bld.copy(bld.def(s1), Operand::c32(stream_offset));
7731 gsvs_dwords[0] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)),
7733 gsvs_dwords[1] = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc),
7737 gsvs_dwords[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), gsvs_dwords[1],
7739 gsvs_dwords[2] = bld.copy(bld.def(s1), Operand::c32(ctx->program->wave_size));
7741 gsvs_ring = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), gsvs_dwords[0], gsvs_dwords[1],
7758 vaddr_offset = bld.copy(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u));
7760 vaddr_offset = bld.vadd32(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u),
7801 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src);
7802 return bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc),
7803 bld.sop1(Builder::s_wqm, bld.def(bld.lm), bld.def(s1, scc), tmp));
7807 Builder::s_wqm, bld.def(bld.lm), bld.def(s1, scc),
7808 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)));
7812 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src)
7813 .def(1)
7816 return bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), cond);
7820 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm))
7821 .def(1)
7827 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
7828 tmp = bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), tmp);
7829 tmp = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), tmp, Operand::c32(1u))
7830 .def(1)
7845 Temp cluster_offset = bld.vop2(aco_opcode::v_and_b32, bld.def(v1),
7850 tmp = bld.sop2(Builder::s_orn2, bld.def(bld.lm), bld.def(s1, scc), src,
7854 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
7859 tmp = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), tmp, cluster_offset);
7861 tmp = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), cluster_offset, tmp);
7863 tmp = bld.vop2_e64(aco_opcode::v_lshrrev_b32, bld.def(v1), cluster_offset, tmp);
7866 tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(cluster_mask), tmp);
7869 return bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm), Operand::c32(cluster_mask),
7872 return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), tmp);
7874 tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u),
7875 bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), tmp, Operand::zero()));
7876 return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), tmp);
7896 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src);
7898 tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
7903 return bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm), Operand::zero(), mbcnt);
7905 return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), mbcnt);
7907 return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(),
7908 bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), mbcnt));
7925 return bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), tmp, src);
7927 return bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(s1, scc), tmp, src);
7929 return bld.sop2(Builder::s_xor, bld.def(bld.lm), bld.def(s1, scc), tmp, src);
7990 count = bld.vop1(aco_opcode::v_cvt_f16_u16, bld.def(v2b), count);
7994 count = bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), count);
8009 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), count, Operand::c32(1u));
8011 count = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), count);
8052 bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), Operand(exec, bld.lm));
8096 Temp lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm));
8106 bld.writelane(bld.def(v1), bld.copy(bld.def(s1, m0), Operand::c32(identity_lo)), lane, lo);
8108 bld.writelane(bld.def(v1), bld.copy(bld.def(s1, m0), Operand::c32(identity_hi)), lane, hi);
8112 bld.writelane(dst, bld.copy(bld.def(s1, m0), Operand::c32(identity)), lane,
8131 defs[num_defs++] = bld.def(bld.lm); /* used internally to save/restore exec */
8143 defs[num_defs++] = bld.def(RegType::sgpr, dst.size());
8146 defs[num_defs++] = bld.def(s1, scc);
8158 defs[num_defs++] = bld.def(bld.lm, vcc);
8189 Temp tl_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), p1, dpp_ctrl0);
8190 ddx_1 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), p1, tl_1, dpp_ctrl1);
8191 ddy_1 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), p1, tl_1, dpp_ctrl2);
8192 Temp tl_2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), p2, dpp_ctrl0);
8193 ddx_2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), p2, tl_2, dpp_ctrl1);
8194 ddy_2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), p2, tl_2, dpp_ctrl2);
8196 Temp tl_1 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p1, (1 << 15) | dpp_ctrl0);
8197 ddx_1 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p1, (1 << 15) | dpp_ctrl1);
8198 ddx_1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), ddx_1, tl_1);
8199 ddy_1 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p1, (1 << 15) | dpp_ctrl2);
8200 ddy_1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), ddy_1, tl_1);
8202 Temp tl_2 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p2, (1 << 15) | dpp_ctrl0);
8203 ddx_2 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p2, (1 << 15) | dpp_ctrl1);
8204 ddx_2 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), ddx_2, tl_2);
8205 ddy_2 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p2, (1 << 15) | dpp_ctrl2);
8206 ddy_2 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), ddy_2, tl_2);
8212 Temp tmp1 = bld.vop3(mad, bld.def(v1), ddx_1, pos1, p1);
8213 Temp tmp2 = bld.vop3(mad, bld.def(v1), ddx_2, pos1, p2);
8214 tmp1 = bld.vop3(mad, bld.def(v1), ddy_1, pos2, tmp1);
8215 tmp2 = bld.vop3(mad, bld.def(v1), ddy_2, pos2, tmp2);
8298 offset = bld.sop2(aco_opcode::s_lshl3_add_u32, bld.def(s1), bld.def(s1, scc), addr,
8301 offset = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), addr,
8303 offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
8307 Operand off = bld.copy(bld.def(s1), Operand(offset));
8309 bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, off);
8312 addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
8313 sample_pos = bld.global(aco_opcode::global_load_dwordx2, bld.def(v2), addr,
8321 Definition scc_tmp = bld.def(s1, scc);
8322 tmp0 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), scc_tmp, tmp0,
8324 tmp1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), tmp1,
8326 addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
8328 Temp carry = bld.vadd32(Definition(pck0), tmp0, addr, true).def(1).getTemp();
8330 Temp pck1 = bld.vop2_e64(aco_opcode::v_addc_co_u32, bld.def(v1), bld.def(bld.lm), tmp1,
8332 addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), pck0, pck1);
8335 sample_pos = bld.flat(aco_opcode::flat_load_dwordx2, bld.def(v2), addr, Operand(s1));
8341 Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
8344 addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
8345 addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), addr, Operand::zero());
8368 pos1 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos1, Operand::c32(0x3f000000u));
8369 pos2 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos2, Operand::c32(0x3f000000u));
8405 posx.id() ? bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), posx) : Operand::zero(),
8406 posy.id() ? bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), posy) : Operand::zero());
8494 bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), addr, Operand::zero()),
8495 bld.smem(aco_opcode::s_load_dword, bld.def(s1), addr, Operand::c32(8)));
8514 local_ids[i] = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
8546 bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
8549 Temp temp = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), wave_id,
8575 Temp tg_num = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
8582 bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
8592 bld.def(s1, scc), get_arg(ctx, ctx->args->ac.tg_size),
8597 bld.def(s1, scc), get_arg(ctx, ctx->args->ac.merged_wave_info),
8611 bld.def(s1, scc), Operand::c32(0x3fu), get_arg(ctx, ctx->args->ac.tg_size));
8614 bld.def(s1, scc), get_arg(ctx, ctx->args->ac.merged_wave_info),
8627 src = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src);
8629 src = bld.vopc(aco_opcode::v_cmp_lg_u64, bld.def(bld.lm), Operand::zero(), src);
8636 src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
8640 bld.pseudo(aco_opcode::p_create_vector, bld.def(dst.regClass()), src, Operand::zero());
8666 bld.def(src.regClass() == v1b ? v3b : v2b), tmp);
8680 Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src, tid);
8686 tmp = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), src, tid);
8688 tmp = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), tid, src);
8690 tmp = bld.vop2_e64(aco_opcode::v_lshrrev_b32, bld.def(v1), tid, src);
8692 tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), tmp);
8693 emit_wqm(bld, bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), tmp),
8710 emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), src), dst);
8714 lo = emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), lo));
8715 hi = emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), hi));
8720 Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src,
8721 bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)));
8735 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src)
8736 .def(1)
8739 bld.sop1(Builder::s_not, Definition(dst), bld.def(s1, scc), cond);
8819 bld.def(dst.regClass()), src);
8861 src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
8879 : bld.pseudo(aco_opcode::p_create_vector, bld.def(bld.lm),
8883 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
8884 src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp, src);
8888 Definition def = excess_bytes ? bld.def(v1) : Definition(tmp);
8891 bld.vop1_dpp(aco_opcode::v_mov_b32, def, src, dpp_ctrl);
8893 bld.ds(aco_opcode::ds_swizzle_b32, def, src, (1 << 15) | dpp_ctrl);
8897 bld.def(RegClass::get(tmp.type(), excess_bytes)), def.getTemp());
8903 lo = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl);
8904 hi = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), hi, dpp_ctrl);
8906 lo = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), lo, (1 << 15) | dpp_ctrl);
8907 hi = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), hi, (1 << 15) | dpp_ctrl);
8918 tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), tmp);
8940 src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
8943 Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src);
8972 emit_wqm(bld, bld.writelane(bld.def(v1), val, lane, src), dst);
8978 Temp lo = emit_wqm(bld, bld.writelane(bld.def(v1), val_lo, lane, src_hi));
8979 Temp hi = emit_wqm(bld, bld.writelane(bld.def(v1), val_hi, lane, src_hi));
9044 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
9063 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
9075 emit_wqm(bld, bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)),
9080 Temp flbit = bld.sop1(Builder::s_flbit_i32, bld.def(s1), Operand(exec, bld.lm));
9081 Temp last = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc),
9091 Temp elected = bld.pseudo(aco_opcode::p_elect, bld.def(bld.lm), Operand(exec, bld.lm));
9101 Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29);
9229 Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), bld.as_uniform(m0_val)));
9294 Temp is_ma_positive = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), Operand::zero(), ma);
9295 Temp sgn_ma = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, one, is_ma_positive);
9296 Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::zero(), sgn_ma);
9298 Temp is_ma_z = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), four, id);
9299 Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), two, id);
9300 is_ma_y = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), is_ma_y, is_ma_z);
9302 bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), is_ma_z, is_ma_y);
9305 Temp tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_z, deriv_x, is_not_ma_x);
9307 aco_opcode::v_cndmask_b32, bld.def(v1),
9308 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_sgn_ma, sgn_ma, is_ma_z), one, is_ma_y);
9309 *out_sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn);
9312 tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_y, deriv_z, is_ma_y);
9313 sgn = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, sgn_ma, is_ma_y);
9314 *out_tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn);
9317 tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
9318 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_x, deriv_y, is_ma_y),
9320 tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffffu), tmp);
9321 *out_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), two, tmp);
9337 coords[3] = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), coords[3]);
9339 ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
9349 sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
9351 sc = bld.vop2(madak, bld.def(v1), sc, invma, Operand::c32(0x3fc00000u /*1.5*/));
9353 tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
9355 tc = bld.vop2(madak, bld.def(v1), tc, invma, Operand::c32(0x3fc00000u /*1.5*/));
9357 id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coords[0], coords[1], coords[2]);
9360 sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, invma);
9361 tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, invma);
9369 deriv_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, invma);
9371 Temp x = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
9372 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_sc, invma),
9373 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, sc));
9374 Temp y = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
9375 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_tc, invma),
9376 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, tc));
9377 *(i ? ddy : ddx) = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), x, y);
9380 sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), sc);
9381 tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), tc);
9385 id = bld.vop2(madmk, bld.def(v1), coords[3], id, Operand::c32(0x41000000u /*8.0*/));
9540 acc = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), acc,
9544 acc = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), acc,
9551 pack = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), pack, acc);
9556 pack = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc),
9564 acc = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x3Fu), acc);
9567 acc = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(8u * i), acc);
9573 pack = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), pack, acc);
9578 pack = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(pack_const), pack);
9581 offset = bld.copy(bld.def(v1), Operand::c32(pack_const));
9604 unpacked_coord.insert(std::next(unpacked_coord.begin()), bld.copy(bld.def(rc), coord2d));
9636 Temp zero = bld.copy(bld.def(rc), Operand::zero(rc.bytes()));
9680 lod = bld.copy(bld.def(v1), Operand::zero());
9702 Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero());
9714 half_texel[i] = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), half_texel[i]);
9715 half_texel[i] = bld.vop1(aco_opcode::v_rcp_iflag_f32, bld.def(v1), half_texel[i]);
9716 half_texel[i] = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1),
9730 bld.sopc(aco_opcode::s_bitcmp0_b32, bld.def(s1, scc), sampler, Operand::c32(bit_idx));
9733 half_texel[0] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
9735 half_texel[1] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
9739 Temp new_coords[2] = {bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[0], half_texel[0]),
9740 bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[1], half_texel[1])};
9754 Temp dfmt = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), desc[1],
9756 Temp compare_cube_wa = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), dfmt,
9761 nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1),
9765 nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1),
9772 nfmt = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), nfmt,
9775 desc[1] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), desc[1],
9777 desc[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), desc[1], nfmt);
9787 new_coords[0] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), new_coords[0], coords[0],
9789 new_coords[1] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), new_coords[1], coords[1],
9879 Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
9889 bld.copy(bld.def(v1), Operand::c32(0x76543210)), tmp_dst, is_not_null);
10049 cvt_val = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), val[i]);
10051 cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]);
10052 val[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), val[i], cvt_val,
10186 Temp dst = get_ssa_temp(ctx, &instr->def);
10209 bld.branch(aco_opcode::p_branch, bld.def(s2));
10253 bld.branch(aco_opcode::p_branch, bld.def(s2));
10260 bld.branch(aco_opcode::p_branch, bld.def(s2));
10276 bld.branch(aco_opcode::p_branch, bld.def(s2));
10335 bld.branch(aco_opcode::p_branch, bld.def(s2));
10349 bld.branch(aco_opcode::p_branch, bld.def(s2));
10366 bld.branch(aco_opcode::p_branch, bld.def(s2));
10375 bld.branch(aco_opcode::p_branch, bld.def(s2));
10490 Temp cond = bld.copy(bld.def(s1, scc), Operand::zero());
10949 Temp out = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u),
10952 out = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand(out), exp->operands[2]);
11038 Temp ch2 = bld.copy(bld.def(v1), Operand::c32(0));
11044 ch2 = bld.vop3(aco_opcode::v_lshl_or_b32, bld.def(v1), tmp, Operand::c32(17), ch2);
11049 ch2 = bld.vop3(aco_opcode::v_lshl_or_b32, bld.def(v1), tmp, Operand::c32(20), ch2);
11054 ch2 = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), tmp, ch2);
11094 values[0] = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u), values[0]);
11110 bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u), mrtz_alpha);
11113 values[1] = bld.vop3(aco_opcode::v_and_or_b32, bld.def(v1), values[1],
11194 Temp isnan = bld.vopc(aco_opcode::v_cmp_class_f32, bld.def(bld.lm), values[i],
11195 bld.copy(bld.def(v1), Operand::c32(3u)));
11196 values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), values[i],
11197 bld.copy(bld.def(v1), Operand::zero()), isnan);
11225 bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, bld.def(v1),
11230 bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, bld.def(v1),
11274 values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]);
11295 values[i] = bld.vop2(aco_opcode::v_min_i32, bld.def(v1), Operand::c32(max), values[i]);
11296 values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]);
11318 compr_op, bld.def(v1), values[i * 2].isUndefined() ? Operand::zero() : values[i * 2],
11386 chan = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), chan);
11492 bld.vadd32(bld.def(v1), Operand::c32(offset), Operand(so_write_offset[buf]));
11510 bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
11515 Temp can_emit = bld.vopc(aco_opcode::v_cmp_gt_i32, bld.def(bld.lm), so_vtx_count, tid);
11523 bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->ac.streamout_write_index), tid);
11534 so_buffers[i] = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), buf_ptr,
11535 bld.copy(bld.def(s1), Operand::c32(i * 16u)));
11538 Temp offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
11541 Temp new_offset = bld.vadd32(bld.def(v1), offset, tid);
11544 bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), new_offset);
11546 Temp offset = bld.v_mul_imm(bld.def(v1), so_write_index, stride * 4u);
11547 Temp offset2 = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(4u),
11549 so_write_offset[i] = bld.vadd32(bld.def(v1), offset, offset2);
11618 bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
11626 Definition def(get_arg(ctx, ctx->args->vs_inputs[i]));
11629 def.setFixed(PhysReg(256 + ctx->args->ac.args[idx].offset));
11631 ctx->program->vs_inputs.push_back(def);
11644 Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
11647 Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp());
11652 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), get_arg(ctx, ctx->args->ac.vertex_id),
11655 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), get_arg(ctx, ctx->args->ac.tcs_rel_ids),
11658 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), get_arg(ctx, ctx->args->ac.tcs_patch_id),
11697 Temp sel = bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.def(bld.lm),
11708 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), persp_centroid, persp_center, sel);
11723 new_coord[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), linear_centroid,
11807 Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand::zero());
11816 Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count,
11819 bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand::c32(-1u), mask, bld.scc(active_64));
11837 : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
11856 prm_cnt_0 = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), prm_cnt, Operand::zero());
11857 prm_cnt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(1u), prm_cnt,
11859 vtx_cnt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(1u), vtx_cnt,
11865 bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), prm_cnt, Operand::c32(12u));
11866 tmp = bld.sop2(aco_opcode::s_or_b32, bld.m0(bld.def(s1)), bld.def(s1, scc), tmp, vtx_cnt);
11878 Temp first_lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm));
11879 Temp cond = bld.sop2(Builder::s_lshl, bld.def(bld.lm), bld.def(s1, scc),
11881 cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), cond,
11890 Temp zero = bld.copy(bld.def(v1), Operand::zero());
11892 Temp nan_coord = bld.copy(bld.def(v1), Operand::c32(-1u));
11985 ctx.gs_wave_id = bld.pseudo(aco_opcode::p_extract, bld.def(s1, m0), bld.def(s1, scc),
12053 Temp gsvs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4),
12058 stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
12061 Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u),
12078 bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand::c32(stream));