Lines Matching defs:bld
138 Builder bld(ctx->program, ctx->block);
140 assert(mask.isUndefined() || mask.bytes() == bld.lm.bytes());
144 return bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, Definition(dst), mask_lo, base);
153 bld.pseudo(aco_opcode::p_split_vector, bld.def(rc), bld.def(rc), mask);
161 Temp mbcnt_lo = bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, bld.def(v1), mask_lo, base);
164 return bld.vop2(aco_opcode::v_mbcnt_hi_u32_b32, Definition(dst), mask_hi, mbcnt_lo);
166 return bld.vop3(aco_opcode::v_mbcnt_hi_u32_b32_e64, Definition(dst), mask_hi, mbcnt_lo);
170 emit_wqm(Builder& bld, Temp src, Temp dst = Temp(0, s1), bool program_needs_wqm = false)
172 if (bld.program->stage != fragment_fs) {
176 return bld.copy(Definition(dst), src);
178 dst = bld.tmp(src.regClass());
182 bld.pseudo(aco_opcode::p_wqm, Definition(dst), src);
183 bld.program->needs_wqm |= program_needs_wqm;
188 emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
191 return bld.readlane(bld.def(s1), data, index);
200 return bld.pseudo(aco_opcode::p_bpermute, bld.def(v1), bld.def(bld.lm), bld.def(bld.lm, vcc),
206 bld.vopc(aco_opcode::v_cmp_ge_u32, bld.def(bld.lm), Operand::c32(31u), index);
208 bld.pseudo(aco_opcode::p_split_vector, bld.def(s1), bld.def(s1), index_is_lo);
209 Temp index_is_lo_n1 = bld.sop1(aco_opcode::s_not_b32, bld.def(s1), bld.def(s1, scc),
211 Operand same_half = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2),
213 Operand index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
224 return bld.pseudo(aco_opcode::p_bpermute, bld.def(v1), bld.def(s2), bld.def(s1, scc),
228 Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
229 return bld.ds(aco_opcode::ds_bpermute_b32, bld.def(v1), index_x4, data);
234 emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
257 Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src);
265 return bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl);
268 return bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, mask, 0, false);
272 as_vgpr(Builder& bld, Temp val)
275 return bld.copy(bld.def(RegType::vgpr, val.size()), val);
283 Builder bld(ctx->program, ctx->block);
284 return as_vgpr(bld, val);
292 Builder bld(ctx->program, ctx->block);
295 bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(util_logbase2(b)), a);
309 bld.copy(Definition(dst), a);
315 pre_shift_dst = (increment || multiply || post_shift) ? bld.tmp(v1) : dst;
316 bld.vop2(aco_opcode::v_lshrrev_b32, Definition(pre_shift_dst), Operand::c32(info.pre_shift),
322 increment_dst = (post_shift || multiply) ? bld.tmp(v1) : dst;
323 bld.vadd32(Definition(increment_dst), Operand::c32(info.increment), pre_shift_dst);
328 multiply_dst = post_shift ? bld.tmp(v1) : dst;
329 bld.vop3(aco_opcode::v_mul_hi_u32, Definition(multiply_dst), increment_dst,
330 bld.copy(bld.def(v1), Operand::c32(info.multiplier)));
334 bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(info.post_shift),
342 Builder bld(ctx->program, ctx->block);
343 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::c32(idx));
356 Builder bld(ctx->program, ctx->block);
364 return bld.copy(bld.def(dst_rc), it->second[idx]);
373 return bld.copy(bld.def(dst_rc), src);
375 Temp dst = bld.tmp(dst_rc);
419 Builder bld(ctx->program, ctx->block);
422 Temp tmp_dst = bld.tmp(RegClass::get(RegType::vgpr, 2 * num_components));
424 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp_dst);
436 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), vec_src);
438 bld.copy(Definition(dst), vec_src);
450 padding = bld.copy(bld.def(dst_rc), Operand::zero(component_bytes));
460 src = bld.as_uniform(src);
476 Builder bld(ctx->program, ctx->block);
485 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand::c32(3u));
486 select = bld.tmp(s1);
487 shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.scc(Definition(select)), tmp,
492 bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), vec, shift);
494 Temp tmp = dst.size() == 2 ? dst : bld.tmp(s2);
495 bld.sop2(aco_opcode::s_lshr_b64, Definition(tmp), bld.def(s1, scc), vec, shift);
501 Temp lo = bld.tmp(s2), hi;
504 hi = bld.tmp(s1);
505 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), vec);
507 hi = bld.tmp(s2);
508 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), vec);
509 hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(s1), hi, Operand::zero());
513 bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), hi, Operand::zero(), bld.scc(select));
514 lo = bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), lo, shift);
515 Temp mid = bld.tmp(s1);
516 lo = bld.pseudo(aco_opcode::p_split_vector, bld.def(s1), Definition(mid), lo);
517 hi = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), hi, shift);
518 mid = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), hi, mid);
519 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, mid);
527 Builder bld(ctx->program, ctx->block);
532 tmp[0] = bld.tmp(v1), tmp[1] = bld.tmp(v1), tmp[2] = bld.tmp(v1), tmp[3] = bld.tmp(v1);
533 bld.pseudo(aco_opcode::p_split_vector, Definition(tmp[0]), Definition(tmp[1]),
536 tmp[0] = bld.tmp(v1), tmp[1] = bld.tmp(v1), tmp[2] = bld.tmp(v1);
537 bld.pseudo(aco_opcode::p_split_vector, Definition(tmp[0]), Definition(tmp[1]),
540 tmp[0] = bld.tmp(v1), tmp[1] = bld.tmp(v1), tmp[2] = tmp[1];
541 bld.pseudo(aco_opcode::p_split_vector, Definition(tmp[0]), Definition(tmp[1]), vec);
544 tmp[i] = bld.vop3(aco_opcode::v_alignbyte_b32, bld.def(v1), tmp[i + 1], tmp[i], offset);
548 vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), tmp[0], tmp[1]);
556 bld.copy(Definition(dst), vec);
578 bld.insert(std::move(create_vec));
582 vec = bld.pseudo(aco_opcode::p_as_uniform, bld.def(RegClass(RegType::sgpr, vec.size())), vec);
586 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), vec);
606 Builder bld(ctx->program, ctx->block);
608 dst = bld.tmp(bld.lm);
611 assert(dst.regClass() == bld.lm);
613 return bld.sop2(Builder::s_cselect, Definition(dst), Operand::c32(-1), Operand::zero(),
614 bld.scc(val));
620 Builder bld(ctx->program, ctx->block);
622 dst = bld.tmp(s1);
624 assert(val.regClass() == bld.lm);
628 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.scc(Definition(dst)), val, Operand(exec, bld.lm));
642 convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsigned dst_bits,
650 dst = bld.tmp(src.type(), DIV_ROUND_UP(dst_bits, 32u));
652 dst = bld.tmp(RegClass(RegType::vgpr, dst_bits / 8u).as_subdword());
661 return bld.copy(Definition(dst), src);
663 return bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::zero());
668 tmp = src_bits == 32 ? src : bld.tmp(src.type(), 1);
673 bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), src, Operand::zero(),
677 bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), Operand::c32(src_bits),
684 bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), tmp, Operand::c32(31u));
685 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
687 Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), tmp);
688 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
690 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero());
716 Builder bld(ctx->program, ctx->block);
717 Temp tmp = dst.regClass() == s2 ? bld.tmp(s1) : dst;
720 bld.copy(Definition(tmp), vec);
722 bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), Operand(vec),
727 convert_int(ctx, bld, tmp, 32, 64, mode == sgpr_extract_sext, dst);
806 Builder bld(ctx->program, ctx->block);
808 return bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), it->second[index],
833 Builder bld(ctx->program, ctx->block);
835 ptr = bld.as_uniform(ptr);
836 return bld.pseudo(aco_opcode::p_create_vector, bld.def(RegClass(ptr.type(), 2)), ptr,
872 Builder bld(ctx->program, ctx->block);
873 bld.is_precise = instr->exact;
901 Temp tmp = bld.vop2(opc, bld.def(v1), op[0], op[1]);
902 bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0x3f800000u), tmp);
905 bld.nuw().vop2(opc, Definition(dst), op[0], op[1]);
907 bld.vop2(opc, Definition(dst), op[0], op[1]);
915 Builder bld(ctx->program, ctx->block);
916 bld.is_precise = instr->exact;
926 Temp src00 = bld.tmp(src0.type(), 1);
927 Temp src01 = bld.tmp(src0.type(), 1);
928 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
929 Temp src10 = bld.tmp(v1);
930 Temp src11 = bld.tmp(v1);
931 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1);
932 Temp lo = bld.vop2(op, bld.def(v1), src00, src10);
933 Temp hi = bld.vop2(op, bld.def(v1), src01, src11);
934 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
952 Builder bld(ctx->program, ctx->block);
953 bld.is_precise = instr->exact;
957 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1], src[2]);
959 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1]);
961 bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0x3f800000u), tmp);
963 bld.vop3(aco_opcode::v_mul_f64, Definition(dst), Operand::c64(0x3FF0000000000000), tmp);
965 bld.vop3(op, Definition(dst), src[0], src[1], src[2]);
967 bld.vop3(op, Definition(dst), src[0], src[1]);
987 Builder bld(ctx->program, ctx->block);
988 bld.is_precise = instr->exact;
989 Builder::Result res = bld.vop3p(op, Definition(dst), src0, src1, opsel_lo, opsel_hi);
1006 Builder bld(ctx->program, ctx->block);
1007 bld.is_precise = instr->exact;
1008 bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p().clamp = clamp;
1014 Builder bld(ctx->program, ctx->block);
1015 bld.is_precise = instr->exact;
1017 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst),
1018 bld.vop1(op, bld.def(RegType::vgpr, dst.size()), get_alu_src(ctx, instr->src[0])));
1020 bld.vop1(op, Definition(dst), get_alu_src(ctx, instr->src[0]));
1063 Builder bld(ctx->program, ctx->block);
1064 bld.vopc(op, Definition(dst), src0, src1);
1072 Builder bld(ctx->program, ctx->block);
1074 assert(dst.regClass() == bld.lm);
1080 Temp cmp = bld.sopc(op, bld.scc(bld.def(s1)), src0, src1);
1113 Builder bld(ctx->program, ctx->block);
1117 assert(dst.regClass() == bld.lm);
1118 assert(src0.regClass() == bld.lm);
1119 assert(src1.regClass() == bld.lm);
1121 bld.sop2(op, Definition(dst), bld.def(s1, scc), src0, src1);
1127 Builder bld(ctx->program, ctx->block);
1132 assert(cond.regClass() == bld.lm);
1140 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), els, then, cond);
1142 Temp then_lo = bld.tmp(v1), then_hi = bld.tmp(v1);
1143 bld.pseudo(aco_opcode::p_split_vector, Definition(then_lo), Definition(then_hi), then);
1144 Temp else_lo = bld.tmp(v1), else_hi = bld.tmp(v1);
1145 bld.pseudo(aco_opcode::p_split_vector, Definition(else_lo), Definition(else_hi), els);
1147 Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, cond);
1148 Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, cond);
1150 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
1158 assert(dst.regClass() == bld.lm);
1159 assert(then.regClass() == bld.lm);
1160 assert(els.regClass() == bld.lm);
1170 bld.sop2(op, Definition(dst), then, els, bld.scc(bool_to_scalar_condition(ctx, cond)));
1183 then = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), cond, then);
1186 bld.copy(Definition(dst), then);
1188 bld.sop2(Builder::s_or, Definition(dst), bld.def(s1, scc), then,
1189 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), els, cond));
1193 emit_scaled_op(isel_context* ctx, Builder& bld, Definition dst, Temp val, aco_opcode op,
1197 Temp is_denormal = bld.vopc(aco_opcode::v_cmp_class_f32, bld.def(bld.lm), as_vgpr(ctx, val),
1198 bld.copy(bld.def(v1), Operand::c32((1u << 7) | (1u << 4))));
1199 Temp scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x4b800000u), val);
1200 scaled = bld.vop1(op, bld.def(v1), scaled);
1201 scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(undo), scaled);
1203 Temp not_scaled = bld.vop1(op, bld.def(v1), val);
1205 bld.vop2(aco_opcode::v_cndmask_b32, dst, not_scaled, scaled, is_denormal);
1209 emit_rcp(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1212 bld.vop1(aco_opcode::v_rcp_f32, dst, val);
1216 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_rcp_f32, 0x4b800000u);
1220 emit_rsq(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1223 bld.vop1(aco_opcode::v_rsq_f32, dst, val);
1227 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_rsq_f32, 0x45800000u);
1231 emit_sqrt(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1234 bld.vop1(aco_opcode::v_sqrt_f32, dst, val);
1238 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_sqrt_f32, 0x39800000u);
1242 emit_log2(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1245 bld.vop1(aco_opcode::v_log_f32, dst, val);
1249 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_log_f32, 0xc1c00000u);
1253 emit_trunc_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1256 return bld.vop1(aco_opcode::v_trunc_f64, Definition(dst), val);
1264 Temp val_lo = bld.tmp(v1), val_hi = bld.tmp(v1);
1265 bld.pseudo(aco_opcode::p_split_vector, Definition(val_lo), Definition(val_hi), val);
1269 bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), val_hi, Operand::c32(20u), Operand::c32(11u));
1270 exponent = bld.vsub32(bld.def(v1), exponent, Operand::c32(1023u));
1273 Temp fract_mask = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
1275 fract_mask = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), fract_mask, exponent);
1277 Temp fract_mask_lo = bld.tmp(v1), fract_mask_hi = bld.tmp(v1);
1278 bld.pseudo(aco_opcode::p_split_vector, Definition(fract_mask_lo), Definition(fract_mask_hi),
1281 Temp fract_lo = bld.tmp(v1), fract_hi = bld.tmp(v1);
1282 Temp tmp = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), fract_mask_lo);
1283 fract_lo = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), val_lo, tmp);
1284 tmp = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), fract_mask_hi);
1285 fract_hi = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), val_hi, tmp);
1288 Temp sign = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x80000000u), val_hi);
1292 bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.def(bld.lm), exponent, Operand::zero());
1293 Temp dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_lo,
1294 bld.copy(bld.def(v1), Operand::zero()), exp_lt0);
1295 Temp dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_hi, sign, exp_lt0);
1296 Temp exp_gt51 = bld.vopc_e64(aco_opcode::v_cmp_gt_i32, bld.def(s2), exponent, Operand::c32(51u));
1297 dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), dst_lo, val_lo, exp_gt51);
1298 dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), dst_hi, val_hi, exp_gt51);
1300 return bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst_lo, dst_hi);
1304 emit_floor_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1307 return bld.vop1(aco_opcode::v_floor_f64, Definition(dst), val);
1313 Temp mask = bld.copy(bld.def(s1), Operand::c32(3u)); /* isnan */
1314 Temp min_val = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::c32(-1u),
1317 Temp isnan = bld.vopc_e64(aco_opcode::v_cmp_class_f64, bld.def(bld.lm), src0, mask);
1318 Temp fract = bld.vop1(aco_opcode::v_fract_f64, bld.def(v2), src0);
1319 Temp min = bld.vop3(aco_opcode::v_min_f64, bld.def(v2), fract, min_val);
1321 Temp then_lo = bld.tmp(v1), then_hi = bld.tmp(v1);
1322 bld.pseudo(aco_opcode::p_split_vector, Definition(then_lo), Definition(then_hi), src0);
1323 Temp else_lo = bld.tmp(v1), else_hi = bld.tmp(v1);
1324 bld.pseudo(aco_opcode::p_split_vector, Definition(else_lo), Definition(else_hi), min);
1326 Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, isnan);
1327 Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, isnan);
1329 Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
1331 Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
1338 uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
1340 if (bld.program->gfx_level < GFX8) {
1341 Builder::Result add = bld.vadd32(bld.def(v1), src0, src1, true);
1342 return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand::c32(-1),
1347 if (bld.program->gfx_level >= GFX9) {
1348 add = bld.vop2_e64(aco_opcode::v_add_u32, dst, src0, src1);
1350 add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.def(bld.lm), src0, src1);
1357 usub32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
1359 if (bld.program->gfx_level < GFX8) {
1360 Builder::Result sub = bld.vsub32(bld.def(v1), src0, src1, true);
1361 return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, sub.def(0).getTemp(), Operand::c32(0u),
1366 if (bld.program->gfx_level >= GFX9) {
1367 sub = bld.vop2_e64(aco_opcode::v_sub_u32, dst, src0, src1);
1369 sub = bld.vop2_e64(aco_opcode::v_sub_co_u32, dst, bld.def(bld.lm), src0, src1);
1382 Builder bld(ctx->program, ctx->block);
1383 bld.is_precise = instr->exact;
1411 Temp mask = bld.copy(bld.def(s1), Operand::c32((1u << instr->dest.dest.ssa.bit_size) - 1));
1426 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), elems[i], mask);
1429 elems[i] = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), elems[i],
1433 packed[idx] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), elems[i],
1444 packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), packed[i * 2],
1447 packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1),
1450 packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), packed[i * 2],
1462 packed[i] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc),
1465 packed[i] = bld.copy(bld.def(s1), Operand::c32(const_vals[i]));
1469 bld.copy(Definition(dst), packed[0]);
1471 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), packed[0], packed[1]);
1473 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), packed[0], packed[1],
1483 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src);
1486 bld.copy(Definition(dst), src);
1495 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
1496 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
1497 lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), lo);
1498 hi = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), hi);
1499 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
1502 bld.sop1(opcode, Definition(dst), bld.def(s1, scc), src);
1515 Temp sub = bld.vop3p(aco_opcode::v_pk_sub_u16, Definition(bld.tmp(v1)), Operand::zero(),
1517 bld.vop3p(aco_opcode::v_pk_max_i16, Definition(dst), sub, src, opsel_lo, opsel_hi);
1522 bld.sop1(aco_opcode::s_abs_i32, Definition(dst), bld.def(s1, scc), src);
1524 bld.vop2(aco_opcode::v_max_i32, Definition(dst), src,
1525 bld.vsub32(bld.def(v1), Operand::zero(), src));
1527 bld.vop3(
1529 bld.vop3(aco_opcode::v_sub_u16_e64, Definition(bld.tmp(v2b)), Operand::zero(2), src));
1532 bld.vop2(aco_opcode::v_max_i16, Definition(dst), src,
1533 bld.vop2(aco_opcode::v_sub_u16, Definition(bld.tmp(v2b)), Operand::zero(2), src));
1543 bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(-1));
1544 bld.sop2(aco_opcode::s_min_i32, Definition(dst), bld.def(s1, scc), tmp, Operand::c32(1u));
1547 bld.sop2(aco_opcode::s_ashr_i64, bld.def(s2), bld.def(s1, scc), src, Operand::c32(63u));
1550 neqz = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), src, Operand::zero());
1553 bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), src, Operand::zero())
1557 bld.sop2(aco_opcode::s_or_b64, Definition(dst), bld.def(s1, scc), neg, bld.scc(neqz));
1559 bld.vop3(aco_opcode::v_med3_i32, Definition(dst), Operand::c32(-1), src, Operand::c32(1u));
1561 bld.vop3(aco_opcode::v_med3_i16, Definition(dst), Operand::c16(-1), src, Operand::c16(1u));
1564 bld.vop2(aco_opcode::v_max_i16, Definition(dst), Operand::c16(-1),
1565 bld.vop2(aco_opcode::v_min_i16, Definition(bld.tmp(v1)), Operand::c16(1u), src));
1568 Temp neg = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), upper);
1569 Temp gtz = bld.vopc(aco_opcode::v_cmp_ge_i64, bld.def(bld.lm), Operand::zero(), src);
1570 Temp lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(1u), neg, gtz);
1571 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), neg, gtz);
1572 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
1700 bld.vop3(aco_opcode::v_lshrrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1724 bld.vop3(aco_opcode::v_lshlrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1747 bld.vop3(aco_opcode::v_ashrrev_i64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1763 bld.sop1(aco_opcode::s_ff1_i32_b32, Definition(dst), src);
1767 bld.sop1(aco_opcode::s_ff1_i32_b64, Definition(dst), src);
1782 Temp msb_rev = bld.sop1(op, bld.def(s1), src);
1784 Builder::Result sub = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
1789 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), msb,
1790 bld.scc(carry));
1794 Temp msb_rev = bld.tmp(v1);
1796 Temp msb = bld.tmp(v1);
1798 bld.vsub32(Definition(msb), Operand::c32(31u), Operand(msb_rev), true).def(1).getTemp();
1799 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), msb, msb_rev, carry);
1804 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
1805 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
1807 lo = uadd32_sat(bld, bld.def(v1), bld.copy(bld.def(s1), Operand::c32(32u)),
1808 bld.vop1(op, bld.def(v1), lo));
1809 hi = bld.vop1(op, bld.def(v1), hi);
1810 Temp found_hi = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::c32(-1), hi);
1812 Temp msb_rev = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lo, hi, found_hi);
1814 Temp msb = bld.tmp(v1);
1816 bld.vsub32(Definition(msb), Operand::c32(63u), Operand(msb_rev), true).def(1).getTemp();
1817 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), msb, msb_rev, carry);
1826 Temp msb_rev = bld.sop1(aco_opcode::s_flbit_i32_b32, bld.def(s1), src);
1827 bld.sop2(aco_opcode::s_min_u32, Definition(dst), Operand::c32(32u), msb_rev);
1829 Temp msb_rev = bld.vop1(aco_opcode::v_ffbh_u32, bld.def(v1), src);
1830 bld.vop2(aco_opcode::v_min_u32, Definition(dst), Operand::c32(32u), msb_rev);
1838 bld.sop1(aco_opcode::s_brev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
1840 bld.vop1(aco_opcode::v_bfrev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
1864 bld.vadd32(Definition(dst), Operand(src0), Operand(src1));
1869 Temp src00 = bld.tmp(src0.type(), 1);
1870 Temp src01 = bld.tmp(dst.type(), 1);
1871 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
1872 Temp src10 = bld.tmp(src1.type(), 1);
1873 Temp src11 = bld.tmp(dst.type(), 1);
1874 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1);
1877 Temp carry = bld.tmp(s1);
1879 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), src00, src10);
1880 Temp dst1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), src01, src11,
1881 bld.scc(carry));
1882 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
1884 Temp dst0 = bld.tmp(v1);
1885 Temp carry = bld.vadd32(Definition(dst0), src00, src10, true).def(1).getTemp();
1886 Temp dst1 = bld.vadd32(bld.def(v1), src01, src11, false, carry);
1887 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
1903 Temp tmp = bld.tmp(s1), carry = bld.tmp(s1);
1904 bld.sop2(aco_opcode::s_add_u32, Definition(tmp), bld.scc(Definition(carry)), src0, src1);
1905 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), tmp,
1906 bld.scc(carry));
1911 add_instr = bld.vop3(aco_opcode::v_add_u16_e64, Definition(dst), src0, src1).instr;
1916 bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
1921 uadd32_sat(bld, Definition(dst), src0, src1);
1927 Temp src00 = bld.tmp(src0.type(), 1);
1928 Temp src01 = bld.tmp(src0.type(), 1);
1929 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
1930 Temp src10 = bld.tmp(src1.type(), 1);
1931 Temp src11 = bld.tmp(src1.type(), 1);
1932 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1);
1935 Temp carry0 = bld.tmp(s1);
1936 Temp carry1 = bld.tmp(s1);
1939 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry0)), src00, src10);
1940 Temp no_sat1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.scc(Definition(carry1)),
1941 src01, src11, bld.scc(carry0));
1943 Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1);
1945 bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c64(-1), no_sat,
1946 bld.scc(carry1));
1948 Temp no_sat0 = bld.tmp(v1);
1949 Temp dst0 = bld.tmp(v1);
1950 Temp dst1 = bld.tmp(v1);
1952 Temp carry0 = bld.vadd32(Definition(no_sat0), src00, src10, true).def(1).getTemp();
1956 carry1 = bld.tmp(bld.lm);
1957 bld.vop2_e64(aco_opcode::v_addc_co_u32, Definition(dst1), Definition(carry1),
1962 Temp no_sat1 = bld.tmp(v1);
1963 carry1 = bld.vadd32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp();
1964 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst1), no_sat1, Operand::c32(-1),
1968 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst0), no_sat0, Operand::c32(-1),
1970 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
1986 Temp cond = bld.sopc(aco_opcode::s_cmp_lt_i32, bld.def(s1, scc), src1, Operand::zero());
1987 Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)),
1989 Temp overflow = bld.tmp(s1);
1991 bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.scc(Definition(overflow)), src0, src1);
1992 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bound, add, bld.scc(overflow));
2000 bld.vop3(aco_opcode::v_add_i16, Definition(dst), src0, src1).instr;
2004 bld.vop3(aco_opcode::v_add_i32, Definition(dst), src0, src1).instr;
2015 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(dst)), src0, src1);
2019 Temp carry = bld.vadd32(bld.def(v1), src0, src1, true).def(1).getTemp();
2020 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), Operand::c32(1u),
2025 Temp src00 = bld.tmp(src0.type(), 1);
2026 Temp src01 = bld.tmp(dst.type(), 1);
2027 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
2028 Temp src10 = bld.tmp(src1.type(), 1);
2029 Temp src11 = bld.tmp(dst.type(), 1);
2030 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1);
2032 Temp carry = bld.tmp(s1);
2033 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), src00, src10);
2034 carry = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.scc(bld.def(s1)), src01, src11,
2035 bld.scc(carry))
2038 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand::zero());
2040 Temp carry = bld.vadd32(bld.def(v1), src00, src10, true).def(1).getTemp();
2041 carry = bld.vadd32(bld.def(v1), src01, src11, true, carry).def(1).getTemp();
2042 carry = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
2044 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand::zero());
2062 bld.vsub32(Definition(dst), src0, src1);
2066 bld.vop3(aco_opcode::v_sub_u16_e64, Definition(dst), src0, src1);
2068 bld.vop2(aco_opcode::v_subrev_u16, Definition(dst), src1, as_vgpr(ctx, src0));
2070 bld.vop2(aco_opcode::v_sub_u16, Definition(dst), src0, as_vgpr(ctx, src1));
2072 bld.vsub32(Definition(dst), src0, src1);
2076 Temp src00 = bld.tmp(src0.type(), 1);
2077 Temp src01 = bld.tmp(dst.type(), 1);
2078 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
2079 Temp src10 = bld.tmp(src1.type(), 1);
2080 Temp src11 = bld.tmp(dst.type(), 1);
2081 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1);
2083 Temp borrow = bld.tmp(s1);
2085 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(borrow)), src00, src10);
2086 Temp dst1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.def(s1, scc), src01, src11,
2087 bld.scc(borrow));
2088 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
2090 Temp lower = bld.tmp(v1);
2091 Temp borrow = bld.vsub32(Definition(lower), src00, src10, true).def(1).getTemp();
2092 Temp upper = bld.vsub32(bld.def(v1), src01, src11, false, borrow);
2093 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
2103 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(dst)), src0, src1);
2106 Temp borrow = bld.vsub32(bld.def(v1), src0, src1, true).def(1).getTemp();
2107 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), Operand::c32(1u),
2112 Temp src00 = bld.tmp(src0.type(), 1);
2113 Temp src01 = bld.tmp(dst.type(), 1);
2114 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
2115 Temp src10 = bld.tmp(src1.type(), 1);
2116 Temp src11 = bld.tmp(dst.type(), 1);
2117 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1);
2119 Temp borrow = bld.tmp(s1);
2120 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(borrow)), src00, src10);
2121 borrow = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.scc(bld.def(s1)), src01, src11,
2122 bld.scc(borrow))
2125 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand::zero());
2127 Temp borrow = bld.vsub32(bld.def(v1), src00, src10, true).def(1).getTemp();
2128 borrow = bld.vsub32(bld.def(v1), src01, src11, true, Operand(borrow)).def(1).getTemp();
2129 borrow = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
2131 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand::zero());
2146 Temp tmp = bld.tmp(s1), carry = bld.tmp(s1);
2147 bld.sop2(aco_opcode::s_sub_u32, Definition(tmp), bld.scc(Definition(carry)), src0, src1);
2148 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(0), tmp, bld.scc(carry));
2153 sub_instr = bld.vop3(aco_opcode::v_sub_u16_e64, Definition(dst), src0, src1).instr;
2160 sub_instr = bld.vop2_e64(op, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
2165 usub32_sat(bld, Definition(dst), src0, as_vgpr(ctx, src1));
2170 Temp src00 = bld.tmp(src0.type(), 1);
2171 Temp src01 = bld.tmp(src0.type(), 1);
2172 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
2173 Temp src10 = bld.tmp(src1.type(), 1);
2174 Temp src11 = bld.tmp(src1.type(), 1);
2175 bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1);
2178 Temp carry0 = bld.tmp(s1);
2179 Temp carry1 = bld.tmp(s1);
2182 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(carry0)), src00, src10);
2183 Temp no_sat1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.scc(Definition(carry1)),
2184 src01, src11, bld.scc(carry0));
2186 Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1);
2188 bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c64(0ull), no_sat,
2189 bld.scc(carry1));
2191 Temp no_sat0 = bld.tmp(v1);
2192 Temp dst0 = bld.tmp(v1);
2193 Temp dst1 = bld.tmp(v1);
2195 Temp carry0 = bld.vsub32(Definition(no_sat0), src00, src10, true).def(1).getTemp();
2199 carry1 = bld.tmp(bld.lm);
2200 bld.vop2_e64(aco_opcode::v_subb_co_u32, Definition(dst1), Definition(carry1),
2205 Temp no_sat1 = bld.tmp(v1);
2206 carry1 = bld.vsub32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp();
2207 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst1), no_sat1, Operand::c32(0u),
2211 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst0), no_sat0, Operand::c32(0u),
2213 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
2228 Temp cond = bld.sopc(aco_opcode::s_cmp_gt_i32, bld.def(s1, scc), src1, Operand::zero());
2229 Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)),
2231 Temp overflow = bld.tmp(s1);
2233 bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.scc(Definition(overflow)), src0, src1);
2234 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bound, sub, bld.scc(overflow));
2242 bld.vop3(aco_opcode::v_sub_i16, Definition(dst), src0, src1).instr;
2246 bld.vop3(aco_opcode::v_sub_i32, Definition(dst), src0, src1).instr;
2269 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[1]),
2272 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[0]),
2291 Temp tmp = dst.regClass() == s1 ? bld.tmp(v1) : dst;
2299 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
2311 Temp tmp = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), get_alu_src(ctx, instr->src[0]),
2313 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
2377 Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), as_vgpr(ctx, src0),
2402 bld.vop3p(aco_opcode::v_pk_fma_f16, Definition(dst), src0, src1, src2, opsel_lo, opsel_hi);
2492 Temp ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), src[0], src[1], src[2]);
2493 ma = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), ma);
2494 Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
2495 Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
2496 sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/),
2497 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma));
2498 tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/),
2499 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma));
2500 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc);
2507 bld.vop3(aco_opcode::v_cubeid_f32, Definition(dst), src[0], src[1], src[2]);
2519 emit_rsq(ctx, bld, Definition(dst), src);
2532 bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
2540 bld.vop2(aco_opcode::v_mul_f16, Definition(dst), Operand::c16(0xbc00u), as_vgpr(ctx, src));
2542 bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0xbf800000u),
2546 src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand::c64(0x3FF0000000000000),
2548 Temp upper = bld.tmp(v1), lower = bld.tmp(v1);
2549 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
2550 upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand::c32(0x80000000u), upper);
2551 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
2561 bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src,
2570 Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst),
2575 Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f32, Definition(dst),
2581 src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand::c64(0x3FF0000000000000),
2583 Temp upper = bld.tmp(v1), lower = bld.tmp(v1);
2584 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
2585 upper = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7FFFFFFFu), upper);
2586 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
2596 bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
2603 bld.vop3(aco_opcode::v_med3_f16, Definition(dst), Operand::c16(0u), Operand::c16(0x3c00),
2606 bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::zero(),
2612 Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand::zero());
2624 emit_log2(ctx, bld, Definition(dst), src);
2635 emit_rcp(ctx, bld, Definition(dst), src);
2659 emit_sqrt(ctx, bld, Definition(dst), src);
2687 emit_floor_f64(ctx, bld, Definition(dst), src);
2708 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src0);
2710 bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, Operand::zero());
2711 Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f64, bld.def(bld.lm), src0, trunc);
2712 Temp cond = bld.sop2(aco_opcode::s_and_b64, bld.def(s2), bld.def(s1, scc), tmp0, tmp1);
2713 Temp add = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
2714 bld.copy(bld.def(v1), Operand::zero()),
2715 bld.copy(bld.def(v1), Operand::c32(0x3ff00000u)), cond);
2716 add = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2),
2717 bld.copy(bld.def(v1), Operand::zero()), add);
2718 bld.vop3(aco_opcode::v_add_f64, Definition(dst), trunc, add);
2732 emit_trunc_f64(ctx, bld, Definition(dst), src);
2748 Temp src0_lo = bld.tmp(v1), src0_hi = bld.tmp(v1);
2750 bld.pseudo(aco_opcode::p_split_vector, Definition(src0_lo), Definition(src0_hi), src0);
2752 Temp bitmask = bld.sop1(aco_opcode::s_brev_b32, bld.def(s1),
2753 bld.copy(bld.def(s1), Operand::c32(-2u)));
2755 bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask,
2756 bld.copy(bld.def(v1), Operand::c32(0x43300000u)), as_vgpr(ctx, src0_hi));
2758 bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0,
2759 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi));
2761 bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp,
2762 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi));
2766 Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
2768 Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, v);
2772 Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
2773 bld.pseudo(aco_opcode::p_split_vector, Definition(tmp_lo), Definition(tmp_hi), tmp);
2774 Temp dst0 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_lo,
2776 Temp dst1 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_hi,
2779 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
2793 bld.vop1(opcode, Definition(dst), src);
2797 src = bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), src);
2801 bld.vop1(opcode, Definition(dst), src);
2834 Temp tmp = bld.vop1(aco_opcode::v_frexp_exp_i16_f16, bld.def(v1), src);
2835 tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), tmp, Operand::zero());
2836 convert_int(ctx, bld, tmp, 8, 32, true, dst);
2851 src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), src);
2853 bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src, Operand::c16(1u));
2854 bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
2856 src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src);
2858 bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::c32(-1), src, Operand::c32(1u));
2859 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
2861 Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src);
2862 Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
2863 Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp,
2866 cond = bld.vopc(aco_opcode::v_cmp_le_f64, bld.def(bld.lm), Operand::zero(), src);
2867 tmp = bld.copy(bld.def(v1), Operand::c32(0xBFF00000u));
2868 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, upper, cond);
2870 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper);
2880 src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
2885 bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src);
2887 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
2893 src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
2895 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
2897 bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src, Operand::zero());
2899 bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src, as_vgpr(ctx, src));
2915 src = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
2916 bld.vop1(aco_opcode::v_cvt_f64_f32, Definition(dst), src);
2927 src = convert_int(ctx, bld, src, input_size, target_size, true);
2935 src = convert_int(ctx, bld, src, 64, 32, false);
2939 bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
2949 src = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), src);
2950 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
2961 src = convert_int(ctx, bld, src, input_size, 32, true);
2963 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
2967 Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
2968 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
2969 lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower);
2970 upper = bld.vop1(aco_opcode::v_cvt_f64_i32, bld.def(v2), upper);
2971 upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u));
2972 upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper);
2973 bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper);
2982 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, true);
2983 bld.vop1(aco_opcode::v_cvt_f64_i32, Definition(dst), src);
2987 Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
2988 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
2989 lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower);
2990 upper = bld.vop1(aco_opcode::v_cvt_f64_i32, bld.def(v2), upper);
2991 upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u));
2992 bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper);
3007 src = convert_int(ctx, bld, src, input_size, target_size, false);
3015 src = convert_int(ctx, bld, src, 64, 32, false);
3021 bld.vop1(aco_opcode::v_cvt_f16_u16, Definition(dst), src);
3024 src = bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), src);
3025 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
3034 bld.vop1(aco_opcode::v_cvt_f32_ubyte0, Definition(dst), src);
3037 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, false);
3038 bld.vop1(aco_opcode::v_cvt_f32_u32, Definition(dst), src);
3042 Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
3043 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
3044 lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower);
3045 upper = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), upper);
3046 upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u));
3047 upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper);
3048 bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper);
3056 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, false);
3057 bld.vop1(aco_opcode::v_cvt_f64_u32, Definition(dst), src);
3061 Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
3062 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
3063 lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower);
3064 upper = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), upper);
3065 upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u));
3066 bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper);
3079 Temp tmp = bld.tmp(v1);
3081 tmp = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), tmp);
3082 tmp = convert_int(ctx, bld, tmp, 32, instr->dest.dest.ssa.bit_size, false,
3085 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
3102 Temp tmp = bld.tmp(v1);
3104 tmp = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), tmp);
3105 tmp = convert_int(ctx, bld, tmp, 32, instr->dest.dest.ssa.bit_size, false,
3108 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
3121 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3123 bld.vop1(aco_opcode::v_cvt_i32_f32, Definition(dst), tmp);
3125 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst),
3126 bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), tmp));
3140 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3142 bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(dst), tmp);
3144 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst),
3145 bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), tmp));
3159 src = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3162 Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3163 exponent = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::zero(), exponent,
3165 Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3166 Temp sign = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), src);
3167 mantissa = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(0x800000u), mantissa);
3168 mantissa = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(7u), mantissa);
3169 mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), mantissa);
3170 Temp new_exponent = bld.tmp(v1);
3172 bld.vsub32(Definition(new_exponent), Operand::c32(63u), exponent, true).def(1).getTemp();
3174 mantissa = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), new_exponent, mantissa);
3176 mantissa = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), mantissa, new_exponent);
3177 Temp saturate = bld.vop1(aco_opcode::v_bfrev_b32, bld.def(v1), Operand::c32(0xfffffffeu));
3178 Temp lower = bld.tmp(v1), upper = bld.tmp(v1);
3179 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa);
3180 lower = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), lower,
3182 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), upper, saturate, borrow);
3183 lower = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), sign, lower);
3184 upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), sign, upper);
3185 Temp new_lower = bld.tmp(v1);
3186 borrow = bld.vsub32(Definition(new_lower), lower, sign, true).def(1).getTemp();
3187 Temp new_upper = bld.vsub32(bld.def(v1), upper, sign, false, borrow);
3188 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), new_lower, new_upper);
3192 src = bld.as_uniform(src);
3193 Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3195 exponent = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent,
3197 exponent = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand::zero(),
3199 exponent = bld.sop2(aco_opcode::s_min_i32, bld.def(s1), bld.def(s1, scc),
3201 Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3204 bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(31u));
3205 mantissa = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc),
3207 mantissa = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), mantissa,
3209 mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), mantissa);
3210 exponent = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3213 bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), mantissa, exponent);
3214 Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), exponent,
3216 Temp saturate = bld.sop1(aco_opcode::s_brev_b64, bld.def(s2), Operand::c32(0xfffffffeu));
3217 mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), saturate, mantissa, cond);
3218 Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
3219 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa);
3220 lower = bld.sop2(aco_opcode::s_xor_b32, bld.def(s1), bld.def(s1, scc), sign, lower);
3221 upper = bld.sop2(aco_opcode::s_xor_b32, bld.def(s1), bld.def(s1, scc), sign, upper);
3222 Temp borrow = bld.tmp(s1);
3224 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(borrow)), lower, sign);
3225 upper = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.def(s1, scc), upper, sign,
3226 bld.scc(borrow));
3227 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3230 Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3232 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3233 Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec);
3234 vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3236 Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul);
3237 Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc);
3238 Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma);
3239 Temp upper = bld.vop1(aco_opcode::v_cvt_i32_f64, bld.def(v1), floor);
3241 lower = bld.as_uniform(lower);
3242 upper = bld.as_uniform(upper);
3244 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3254 src = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3257 Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3259 bld.vopc(aco_opcode::v_cmp_ge_i32, bld.def(bld.lm), Operand::c32(64u), exponent);
3260 exponent = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::zero(), exponent);
3261 Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3262 mantissa = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(0x800000u), mantissa);
3263 Temp exponent_small = bld.vsub32(bld.def(v1), Operand::c32(24u), exponent);
3264 Temp small = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), exponent_small, mantissa);
3265 mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), mantissa);
3266 Temp new_exponent = bld.tmp(v1);
3268 bld.vsub32(Definition(new_exponent), exponent, Operand::c32(24u), true).def(1).getTemp();
3270 mantissa = bld.vop3(aco_opcode::v_lshlrev_b64, bld.def(v2), new_exponent, mantissa);
3272 mantissa = bld.vop3(aco_opcode::v_lshl_b64, bld.def(v2), mantissa, new_exponent);
3273 Temp lower = bld.tmp(v1), upper = bld.tmp(v1);
3274 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa);
3275 lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lower, small, cond_small);
3276 upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), upper, Operand::zero(),
3278 lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0xffffffffu), lower,
3280 upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0xffffffffu), upper,
3282 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3286 src = bld.as_uniform(src);
3287 Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3289 exponent = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent,
3291 exponent = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand::zero(),
3293 Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3295 mantissa = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc),
3297 Temp exponent_small = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3299 Temp small = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), mantissa,
3301 mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), mantissa);
3302 Temp exponent_large = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3304 mantissa = bld.sop2(aco_opcode::s_lshl_b64, bld.def(s2), bld.def(s1, scc), mantissa,
3307 bld.sopc(aco_opcode::s_cmp_ge_i32, bld.def(s1, scc), Operand::c32(64u), exponent);
3308 mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa,
3310 Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
3311 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa);
3313 bld.sopc(aco_opcode::s_cmp_le_i32, bld.def(s1, scc), exponent, Operand::c32(24u));
3314 lower = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), small, lower, cond_small);
3316 bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::zero(), upper, cond_small);
3317 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3320 Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3322 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3323 Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec);
3324 vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3326 Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul);
3327 Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc);
3328 Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma);
3329 Temp upper = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), floor);
3331 lower = bld.as_uniform(lower);
3332 upper = bld.as_uniform(upper);
3334 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3343 assert(src.regClass() == bld.lm);
3347 bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3c00u), src);
3349 Temp one = bld.copy(bld.def(v1), Operand::c32(0x3c00u));
3350 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), one, src);
3358 assert(src.regClass() == bld.lm);
3362 bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3f800000u), src);
3364 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(),
3373 assert(src.regClass() == bld.lm);
3377 bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c32(0x3f800000u),
3378 Operand::zero(), bld.scc(src));
3380 Temp one = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
3382 bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), one, src);
3383 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper);
3402 convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]), input_bitsize, output_bitsize,
3418 convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]), instr->src[0].src.ssa->bit_size,
3429 assert(src.regClass() == bld.lm);
3431 Temp tmp = dst.bytes() == 8 ? bld.tmp(RegClass::get(dst.type(), 4)) : dst;
3435 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(tmp), Operand::zero(), Operand::c32(1u),
3442 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero());
3448 assert(dst.regClass() == bld.lm);
3452 assert(dst.regClass() == bld.lm);
3453 bld.vopc(src.size() == 2 ? aco_opcode::v_cmp_lg_u64 : aco_opcode::v_cmp_lg_u32,
3460 bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), Operand::zero(), src)
3464 tmp = bld.sopc(src.size() == 2 ? aco_opcode::s_cmp_lg_u64 : aco_opcode::s_cmp_lg_u32,
3465 bld.scc(bld.def(s1)), Operand::zero(), src);
3474 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3481 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src0, src1);
3485 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(dst.regClass()),
3489 bld.pseudo(aco_opcode::p_split_vector, bld.def(dst.regClass()), Definition(dst),
3494 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(dst.regClass()),
3497 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3502 bld.pseudo(aco_opcode::p_split_vector, bld.def(dst.regClass()), Definition(dst),
3505 bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc),
3516 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src0, src1);
3518 src0 = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), src0,
3520 src1 = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), src1,
3522 bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), src0, src1);
3526 case nir_op_pack_32_4x8: bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0], 4)); break;
3545 bld.vop3(opcode, Definition(dst), src0, src1);
3555 bld.vop3(opcode, Definition(dst), src0, src1);
3562 src = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src);
3566 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
3576 src = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), src,
3580 bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src).def(1).getTemp();
3584 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
3597 Temp f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src);
3601 Temp mask = bld.copy(
3602 bld.def(s1), Operand::c32(0x36Fu)); /* value is NOT negative/positive denormal value */
3603 cmp_res = bld.vopc_e64(aco_opcode::v_cmp_class_f16, bld.def(bld.lm), f16, mask);
3604 f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
3609 f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
3610 Temp smallest = bld.copy(bld.def(s1), Operand::c32(0x38800000u));
3611 Instruction* tmp0 = bld.vopc_e64(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), f32, smallest);
3613 Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f32, bld.def(bld.lm), Operand::zero(), f32);
3614 cmp_res = bld.sop2(aco_opcode::s_nand_b64, bld.def(s2), bld.def(s1, scc),
3620 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::zero(), as_vgpr(ctx, src));
3621 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), copysign_0, f32, cmp_res);
3623 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), f32, cmp_res);
3632 bld.sop2(aco_opcode::s_bfm_b32, Definition(dst), bits, offset);
3634 bld.vop3(aco_opcode::v_bfm_b32, Definition(dst), bits, offset);
3655 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), insert, bitmask);
3664 base = bld.sop2(aco_opcode::s_andn2_b32, bld.def(s1), bld.def(s1, scc), base, bitmask);
3668 bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), rhs, lhs);
3691 bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract));
3698 Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset);
3700 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask);
3701 bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset);
3704 : bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1),
3705 bld.def(s1, scc), bits, Operand::c32(16u));
3708 : bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3712 bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op);
3713 bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract);
3733 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3742 bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), Operand(vec),
3750 def = bld.def(src.type(), 1);
3754 bld.pseudo(aco_opcode::p_extract, def, bld.def(s1, scc), Operand(src),
3758 bld.pseudo(aco_opcode::p_extract, def, Operand(src), Operand::c32(index),
3762 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(),
3774 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3783 def = bld.def(src.type(), 1);
3786 bld.pseudo(aco_opcode::p_insert, def, bld.def(s1, scc), Operand(src),
3790 bld.pseudo(aco_opcode::p_insert, def, Operand(src), Operand::c32(index),
3794 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(),
3797 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(),
3805 bld.sop1(aco_opcode::s_bcnt1_i32_b32, Definition(dst), bld.def(s1, scc), src);
3807 bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), src, Operand::zero());
3809 bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), emit_extract_vector(ctx, src, 1, v1),
3810 bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1),
3813 bld.sop1(aco_opcode::s_bcnt1_i32_b64, Definition(dst), bld.def(s1, scc), src);
3889 bld.copy(Definition(dst), Operand::zero());
3911 Temp tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl1);
3912 tmp = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), src, tl, dpp_ctrl2);
3914 Temp tl = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl1);
3915 Temp tr = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl2);
3916 tmp = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), tr, tl);
3918 emit_wqm(bld, tmp, dst, true);
3936 Builder bld(ctx->program, ctx->block);
3939 assert(dst.regClass() == bld.lm);
3941 Operand op = bld.lm.size() == 1 ? Operand::c32(val) : Operand::c64(val);
3942 bld.copy(Definition(dst), op);
3944 bld.copy(Definition(dst), Operand::c32(instr->value[0].u8));
3947 bld.copy(Definition(dst), Operand::c32(instr->value[0].i16));
3949 bld.copy(Definition(dst), Operand::c32(instr->value[0].u32));
4002 using Callback = Temp (*)(Builder& bld, const LoadEmitInfo& info, Temp offset,
4013 emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
4078 offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), offset_tmp,
4081 offset = bld.vadd32(bld.def(v1), offset_tmp, Operand::c32(to_add));
4083 Temp lo = bld.tmp(offset_tmp.type(), 1);
4084 Temp hi = bld.tmp(offset_tmp.type(), 1);
4085 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), offset_tmp);
4088 Temp carry = bld.tmp(s1);
4089 lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), lo,
4091 hi = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), hi, carry);
4092 offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), lo, hi);
4094 Temp new_lo = bld.tmp(v1);
4096 bld.vadd32(Definition(new_lo), lo, Operand::c32(to_add), true).def(1).getTemp();
4097 hi = bld.vadd32(bld.def(v1), hi, Operand::zero(), false, carry);
4098 offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), new_lo, hi);
4112 aligned_offset = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
4115 aligned_offset = bld.sop2(aco_opcode::s_and_b64, bld.def(s2), bld.def(s1, scc),
4119 bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0xfffffffcu), offset_tmp);
4121 Temp hi = bld.tmp(v1), lo = bld.tmp(v1);
4122 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), offset_tmp);
4123 lo = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0xfffffffcu), lo);
4124 aligned_offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), lo, hi);
4128 aligned_offset.isTemp() ? aligned_offset.getTemp() : bld.copy(bld.def(s1), aligned_offset);
4130 Temp val = params.callback(bld, info, aligned_offset_tmp, bytes_needed, align,
4194 tmp[0] = bld.tmp(RegClass::get(reg_type, tmp_size));
4196 bld.insert(std::move(vec));
4205 bld.pseudo(aco_opcode::p_extract_vector, bld.def(new_rc), tmp[0], Operand::zero());
4219 Temp component = bld.tmp(elem_rc);
4224 bld.insert(std::move(split));
4231 allocated_vec[j] = bld.as_uniform(allocated_vec[j]);
4250 Temp tmp = bld.tmp(RegType::vgpr, info.dst.size());
4252 bld.insert(std::move(vec));
4253 bld.pseudo(aco_opcode::p_as_uniform, Definition(info.dst), tmp);
4256 bld.insert(std::move(vec));
4261 load_lds_size_m0(Builder& bld)
4264 if (bld.program->gfx_level >= GFX9)
4267 return bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0xffffffffu)));
4271 lds_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4274 offset = offset.regClass() == s1 ? bld.copy(bld.def(v1), offset) : offset;
4276 Operand m = load_lds_size_m0(bld);
4278 bool large_ds_read = bld.program->gfx_level >= GFX7;
4279 bool usable_read2 = bld.program->gfx_level >= GFX7;
4306 op = bld.program->gfx_level >= GFX9 ? aco_opcode::ds_read_u16_d16 : aco_opcode::ds_read_u16;
4309 op = bld.program->gfx_level >= GFX9 ? aco_opcode::ds_read_u8_d16 : aco_opcode::ds_read_u8;
4317 offset = bld.vadd32(bld.def(v1), offset, Operand::c32(excess));
4324 Temp val = rc == info.dst.regClass() && dst_hint.id() ? dst_hint : bld.tmp(rc);
4327 instr = bld.ds(op, Definition(val), offset, m, const_offset, const_offset + 1);
4329 instr = bld.ds(op, Definition(val), offset, m, const_offset);
4341 smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4376 offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
4383 load->operands[1] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
4391 Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc);
4394 load->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
4396 bld.insert(std::move(load));
4403 mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4411 vaddr = bld.copy(bld.def(v1), soffset);
4429 } else if (bytes_needed <= 12 && bld.program->gfx_level > GFX6) {
4443 info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
4449 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
4451 bld.insert(std::move(mubuf));
4459 scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4484 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
4491 bld.insert(std::move(flat));
4500 get_gfx6_global_rsrc(Builder& bld, Temp addr)
4506 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), Operand::zero(), Operand::zero(),
4508 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), addr, Operand::c32(-1u),
4513 add64_32(Builder& bld, Temp src0, Temp src1)
4515 Temp src00 = bld.tmp(src0.type(), 1);
4516 Temp src01 = bld.tmp(src0.type(), 1);
4517 bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
4520 Temp dst0 = bld.tmp(v1);
4521 Temp carry = bld.vadd32(Definition(dst0), src00, src1, true).def(1).getTemp();
4522 Temp dst1 = bld.vadd32(bld.def(v1), src01, Operand::zero(), false, carry);
4523 return bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
4525 Temp carry = bld.tmp(s1);
4527 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), src00, src1);
4528 Temp dst1 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), src01, carry);
4529 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), dst0, dst1);
4534 lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout,
4543 if (bld.program->gfx_level >= GFX9)
4544 max_const_offset_plus_one = bld.program->dev.scratch_global_offset_max;
4545 else if (bld.program->gfx_level == GFX6)
4552 address = add64_32(bld, address, bld.copy(bld.def(s1), Operand::c32(UINT32_MAX)));
4556 offset = bld.copy(bld.def(s1), Operand::c32(excess_offset));
4566 address = add64_32(bld, address, bld.copy(bld.def(s1), Operand::c32(src2)));
4571 if (bld.program->gfx_level == GFX6) {
4574 address = add64_32(bld, address, offset);
4577 offset = offset.id() ? offset : bld.copy(bld.def(s1), Operand::zero());
4578 } else if (bld.program->gfx_level <= GFX8) {
4581 address = add64_32(bld, address, offset);
4584 address = as_vgpr(bld, address);
4588 address = add64_32(bld, address, offset);
4591 offset = as_vgpr(bld, offset);
4594 offset = bld.copy(bld.def(v1), bld.copy(bld.def(s1), Operand::zero()));
4603 global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4611 lower_global_address(bld, 0, &addr, &const_offset, &offset);
4614 bool use_mubuf = bld.program->gfx_level == GFX6;
4615 bool global = bld.program->gfx_level >= GFX9;
4647 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
4651 mubuf->operands[0] = Operand(get_gfx6_global_rsrc(bld, addr));
4661 bld.insert(std::move(mubuf));
4676 info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
4681 bld.insert(std::move(flat));
4695 Builder bld(ctx->program, ctx->block);
4702 emit_load(ctx, bld, info, lds_load_params);
4714 Builder bld(ctx->program, ctx->block);
4719 dst[0] = bld.as_uniform(src);
4733 dst[i] = bld.tmp(RegClass::get(dst_type, bytes[i]));
4761 src = bld.as_uniform(src);
4768 temps.emplace_back(bld.tmp(RegClass::get(dst_type, elem_size_bytes)));
4771 bld.insert(std::move(split));
4779 dst[i] = bld.as_uniform(temps[idx++]);
4790 tmp = bld.as_uniform(tmp);
4794 bld.insert(std::move(vec));
4827 Builder bld(ctx->program, ctx->block);
4887 Operand m = load_lds_size_m0(bld);
4916 address_offset = bld.vadd32(bld.def(v1), Operand::c32(base_offset), address_offset);
4927 instr = bld.ds(op, address_offset, split_data, second_data, m, inline_offset,
4930 instr = bld.ds(op, address_offset, split_data, m, inline_offset);
5015 Builder bld(ctx->program, ctx->block);
5019 dst = bld.tmp(RegClass(reg_type, cnt * dword_size));
5032 Temp zero = bld.copy(bld.def(RegClass(reg_type, dword_size)),
5039 bld.insert(std::move(instr));
5050 resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_offset)
5057 voffset = bld.copy(bld.def(v1), Operand::c32(excess_const_offset));
5059 voffset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc),
5062 voffset = bld.vadd32(bld.def(v1), Operand(voffset), Operand::c32(excess_const_offset));
5079 Builder bld(ctx->program, ctx->block);
5081 const_offset = resolve_excess_vmem_const_offset(bld, voffset, const_offset);
5087 bld.mubuf(op, Operand(descriptor), voffset_op, soffset_op, Operand(vdata), const_offset,
5101 Builder bld(ctx->program, ctx->block);
5129 Builder bld(ctx->program, ctx->block);
5141 emit_load(ctx, bld, info, mubuf_load_params);
5147 Builder bld(ctx->program, ctx->block);
5148 return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
5157 Builder bld(ctx->program, ctx->block);
5158 Temp tid_in_wave = emit_mbcnt(ctx, bld.tmp(v1));
5165 bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), wave_id_in_tg,
5167 return bld.vadd32(bld.def(v1), Operand(num_pre_threads), Operand(tid_in_wave));
5265 Builder bld(ctx->program, ctx->block);
5271 bld.vintrp(aco_opcode::v_interp_mov_f32, bld.def(v1), Operand::c32(2u) /* P0 */,
5272 bld.m0(prim_mask), idx, component);
5273 interp_p1 = bld.vintrp(aco_opcode::v_interp_p1lv_f16, bld.def(v2b), coord1,
5274 bld.m0(prim_mask), interp_p1, idx, component);
5275 bld.vintrp(aco_opcode::v_interp_p2_legacy_f16, Definition(dst), coord2, bld.m0(prim_mask),
5283 Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1ll_f16, bld.def(v1), coord1,
5284 bld.m0(prim_mask), idx, component);
5285 bld.vintrp(interp_p2_op, Definition(dst), coord2, bld.m0(prim_mask), interp_p1, idx,
5289 Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1,
5290 bld.m0(prim_mask), idx, component);
5295 bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), interp_p1,
5303 Builder bld(ctx->program, ctx->block);
5316 bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), get_arg(ctx, ctx->args->ac.frag_pos[3]));
5331 Builder bld(ctx->program, ctx->block);
5337 Temp x_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
5339 Temp y_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
5343 cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand::c32(1u), Operand(x_rate));
5344 x_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand::zero()),
5345 bld.copy(bld.def(v1), Operand::c32(4u)), cond);
5348 cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand::c32(1u), Operand(y_rate));
5349 y_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand::zero()),
5350 bld.copy(bld.def(v1), Operand::c32(1u)), cond);
5352 bld.vop2(aco_opcode::v_or_b32, Definition(dst), Operand(x_rate), Operand(y_rate));
5451 Builder bld(ctx->program, ctx->block);
5474 elems[i] = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), elems[i]);
5476 elems[i] = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), elems[i],
5513 Operand off = bld.copy(bld.def(s1), Operand::c32(desc_index * 16u));
5514 Temp list = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), vertex_buffers, off);
5523 Temp divided = bld.tmp(v1);
5525 index = bld.vadd32(bld.def(v1), start_instance, divided);
5527 index = bld.vadd32(bld.def(v1), start_instance, instance_id);
5530 index = bld.copy(bld.def(v1), start_instance);
5533 index = bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->ac.base_vertex),
5575 bld.vadd32(bld.def(v1), Operand::c32(fetch_offset / attrib_stride), fetch_index);
5581 soffset = bld.copy(bld.def(s1), Operand::c32(fetch_offset / 4096 * 4096));
5631 fetch_dst = bld.tmp(RegClass::get(RegType::vgpr, fetch_bytes));
5635 Instruction* mubuf = bld.mubuf(opcode, Definition(fetch_dst), list, fetch_index,
5640 Instruction* mtbuf = bld.mtbuf(opcode, Definition(fetch_dst), list, fetch_index,
5721 bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(dst), Operand::c32(vertex_id),
5722 bld.m0(prim_mask), idx, component);
5732 vec->operands[i] = bld.vintrp(
5733 aco_opcode::v_interp_mov_f32, bld.def(instr->dest.ssa.bit_size == 16 ? v2b : v1),
5734 Operand::c32(vertex_id), bld.m0(prim_mask), chan_idx, chan_component);
5737 bld.insert(std::move(vec));
5749 Builder bld(ctx->program, ctx->block);
5772 Builder bld(ctx->program, ctx->block);
5780 Temp tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tes_u, tes_v);
5781 tmp = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::c32(0x3f800000u /* 1.0f */), tmp);
5785 Temp tess_coord = bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tes_u, tes_v, tes_w);
5794 Builder bld(ctx->program, ctx->block);
5799 offset = bld.as_uniform(offset);
5814 emit_load(ctx, bld, info, smem_load_params);
5816 emit_load(ctx, bld, info, mubuf_load_params);
5823 Builder bld(ctx->program, ctx->block);
5824 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5834 Builder bld(ctx->program, ctx->block);
5864 Temp index = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5866 index = bld.nuw().sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
5877 vec = fits_in_dword ? bld.tmp(s1) : bld.tmp(s2);
5881 vec = count == 4 ? bld.tmp(s4) : count > 1 ? bld.tmp(s2) : bld.tmp(s1);
5890 vec = bld.tmp(s4);
5895 vec = bld.tmp(s8);
5902 bld.smem(op, Definition(vec), ptr, index).instr->smem().prevent_overflow = true;
5913 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), emit_extract_vector(ctx, vec, 0, rc),
5924 Builder bld(ctx->program, ctx->block);
5943 offset = bld.nuw().sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
5946 offset = bld.vadd32(bld.def(v1), Operand::c32(base), offset);
5948 Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
5949 bld.pseudo(aco_opcode::p_constaddr, bld.def(s2), bld.def(s1, scc),
5964 Builder bld(ctx->program, ctx->block);
5973 Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, high);
5987 Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, Operand(v2b));
6022 emit_mimg(Builder& bld, aco_opcode op, Definition dst, Temp rsrc, Operand samp,
6026 unsigned max_nsa_size = bld.program->gfx_level >= GFX10_3 ? 13 : 5;
6027 bool use_nsa = bld.program->gfx_level >= GFX10 && coords.size() <= max_nsa_size;
6032 coord = bld.tmp(RegType::vgpr, coords.size());
6039 bld.insert(std::move(vec));
6041 coord = bld.copy(bld.def(v1), coord);
6048 coord = emit_wqm(bld, coord, bld.tmp(coord.regClass()), true);
6056 coords[i] = emit_wqm(bld, coords[i], bld.tmp(coords[i].regClass()), true);
6061 coord = bld.copy(bld.def(v1), coord);
6076 bld.insert(std::move(mimg));
6083 Builder bld(ctx->program, ctx->block);
6106 MIMG_instruction* mimg = emit_mimg(bld, aco_opcode::image_bvh64_intersect_ray, Definition(dst),
6128 Builder bld(ctx->program, ctx->block);
6136 coords[1] = bld.copy(bld.def(v1), Operand::zero());
6151 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6155 bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), rsrc_word5,
6192 emit_tfe_init(Builder& bld, Temp dst)
6194 Temp tmp = bld.tmp(dst.regClass());
6206 bld.insert(std::move(vec));
6214 Builder bld(ctx->program, ctx->block);
6247 tmp = bld.tmp(RegClass::get(RegType::vgpr, num_bytes));
6249 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6285 load->operands[3] = emit_tfe_init(bld, tmp);
6293 Operand vdata = is_sparse ? emit_tfe_init(bld, tmp) : Operand(v1);
6295 emit_mimg(bld, opcode, Definition(tmp), resource, Operand(s4), coords, 0, vdata);
6312 tmp = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, tmp.size() + 1), tmp,
6323 Builder bld(ctx->program, ctx->block);
6343 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6381 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6414 data = bld.tmp(RegClass::get(RegType::vgpr, dmask_count * rc.bytes()));
6416 bld.insert(std::move(vec));
6422 emit_mimg(bld, opcode, Definition(), resource, Operand(s4), coords, 0, Operand(data));
6442 Builder bld(ctx->program, ctx->block);
6450 data = bld.pseudo(aco_opcode::p_create_vector, bld.def(is_64bit ? v4 : v2),
6525 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6535 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
6547 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
6552 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6554 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
6556 emit_mimg(bld, image_op, def, resource, Operand(s4), coords, 0, Operand(data));
6567 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
6576 Builder bld(ctx->program, ctx->block);
6580 Temp size_div3 = bld.vop3(aco_opcode::v_mul_hi_u32, bld.def(v1),
6581 bld.copy(bld.def(v1), Operand::c32(0xaaaaaaabu)), size);
6582 size_div3 = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
6583 bld.as_uniform(size_div3), Operand::c32(1u));
6586 stride = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), stride,
6589 Temp is12 = bld.sopc(aco_opcode::s_cmp_eq_i32, bld.def(s1, scc), stride, Operand::c32(12u));
6590 size = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), size_div3, size, bld.scc(is12));
6592 Temp shr_dst = dst.type() == RegType::vgpr ? bld.tmp(s1) : dst;
6593 bld.sop2(aco_opcode::s_lshr_b32, Definition(shr_dst), bld.def(s1, scc), size,
6594 bld.sop1(aco_opcode::s_ff1_i32_b32, bld.def(s1), stride));
6596 bld.copy(Definition(dst), shr_dst);
6609 Builder bld(ctx->program, ctx->block);
6612 Temp desc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6618 std::vector<Temp> lod{bld.copy(bld.def(v1), Operand::zero())};
6621 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6626 emit_mimg(bld, aco_opcode::image_get_resinfo, Definition(dst), resource, Operand(s4), lod);
6643 Builder bld(ctx->program, ctx->block);
6646 Temp samples_log2 = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3,
6648 Temp samples = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand::c32(1u),
6650 Temp type = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3,
6660 bld.sopc(aco_opcode::s_cmp_gt_u32, bld.def(s1, scc), dword1, Operand::zero());
6664 Temp is_msaa = bld.sopc(aco_opcode::s_cmp_ge_u32, bld.def(s1, scc), type, Operand::c32(14u));
6665 bld.sop2(aco_opcode::s_cselect_b32, dst, samples, default_sample, bld.scc(is_msaa));
6671 Builder bld(ctx->program, ctx->block);
6673 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6680 Builder bld(ctx->program, ctx->block);
6684 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6700 Builder bld(ctx->program, ctx->block);
6706 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
6748 Builder bld(ctx->program, ctx->block);
6754 data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2),
6758 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6824 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
6836 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
6859 Builder bld(ctx->program, ctx->block);
6895 emit_load(ctx, bld, info, params);
6898 info.resource = bld.as_uniform(info.resource);
6899 info.offset = Operand(bld.as_uniform(info.offset));
6900 emit_load(ctx, bld, info, smem_load_params);
6907 Builder bld(ctx->program, ctx->block);
6931 lower_global_address(bld, offsets[i], &write_address, &write_const_offset, &write_offset);
6977 Temp rsrc = get_gfx6_global_rsrc(bld, write_address);
7001 Builder bld(ctx->program, ctx->block);
7007 data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2),
7017 lower_global_address(bld, 0, &addr, &const_offset, &offset);
7155 Temp rsrc = get_gfx6_global_rsrc(bld, addr);
7166 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
7178 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
7204 Builder bld(ctx->program, ctx->block);
7207 Temp descriptor = bld.as_uniform(get_ssa_temp(ctx, intrin->src[0].ssa));
7209 Temp s_offset = bld.as_uniform(get_ssa_temp(ctx, intrin->src[2].ssa));
7252 Builder bld(ctx->program, ctx->block);
7254 Temp base = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
7255 Temp offset = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
7277 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst),
7278 bld.smem(opcode, bld.def(RegType::sgpr, size), base, offset), Operand::c32(0u));
7280 bld.smem(opcode, Definition(dst), base, offset);
7303 Builder bld(ctx->program, ctx->block);
7351 bld.barrier(aco_opcode::p_barrier,
7362 Builder bld(ctx->program, ctx->block);
7386 Builder bld(ctx->program, ctx->block);
7387 Operand m = load_lds_size_m0(bld);
7488 address = bld.vadd32(bld.def(v1), Operand::c32(offset), address);
7518 Builder bld(ctx->program, ctx->block);
7520 assert(bld.program->gfx_level >= GFX7);
7527 Operand m = load_lds_size_m0(bld);
7537 ds = bld.ds(op, address, data0, data1, m, offset0, offset1);
7540 Definition tmp_dst(dst.type() == RegType::vgpr ? dst : bld.tmp(is64bit ? v4 : v2));
7543 ds = bld.ds(op, tmp_dst, address, m, offset0, offset1);
7556 comp[i] = bld.as_uniform(emit_extract_vector(ctx, ds->definitions[0].getTemp(), i, v1));
7558 Temp comp0 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[0], comp[1]);
7559 Temp comp1 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[2], comp[3]);
7562 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), comp0, comp1);
7565 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), comp[0], comp[1]);
7576 Builder bld(ctx->program, ctx->block);
7580 bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero());
7599 return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand::c32(-1u),
7606 Builder bld(ctx->program, ctx->block);
7619 bld.copy(bld.def(s1), Operand::c32(ROUND_DOWN_TO(nir_src_as_uint(instr->src[0]), max)));
7626 emit_load(ctx, bld, info, params);
7631 emit_load(ctx, bld, info, scratch_mubuf_load_params);
7638 Builder bld(ctx->program, ctx->block);
7676 saddr = bld.copy(bld.def(s1), Operand::c32(ROUND_DOWN_TO(const_offset, max)));
7678 bld.scratch(op, addr, saddr, write_datas[i], const_offset % max,
7686 Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset,
7696 Builder bld(ctx->program, ctx->block);
7700 next_vertex = bld.v_mul_imm(bld.def(v1), next_vertex, 4u);
7705 bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer,
7723 gsvs_dwords[i] = bld.tmp(s1);
7724 bld.pseudo(aco_opcode::p_split_vector, Definition(gsvs_dwords[0]), Definition(gsvs_dwords[1]),
7728 Temp stream_offset_tmp = bld.copy(bld.def(s1), Operand::c32(stream_offset));
7730 Temp carry = bld.tmp(s1);
7731 gsvs_dwords[0] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)),
7733 gsvs_dwords[1] = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc),
7734 gsvs_dwords[1], Operand::zero(), bld.scc(carry));
7737 gsvs_dwords[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), gsvs_dwords[1],
7739 gsvs_dwords[2] = bld.copy(bld.def(s1), Operand::c32(ctx->program->wave_size));
7741 gsvs_ring = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), gsvs_dwords[0], gsvs_dwords[1],
7758 vaddr_offset = bld.copy(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u));
7760 vaddr_offset = bld.vadd32(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u),
7776 bld.insert(std::move(mubuf));
7787 bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream));
7793 Builder bld(ctx->program, ctx->block);
7801 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src);
7802 return bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc),
7803 bld.sop1(Builder::s_wqm, bld.def(bld.lm), bld.def(s1, scc), tmp));
7806 return bld.sop1(
7807 Builder::s_wqm, bld.def(bld.lm), bld.def(s1, scc),
7808 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)));
7812 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src)
7815 Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
7816 return bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), cond);
7820 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm))
7827 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
7828 tmp = bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), tmp);
7829 tmp = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), tmp, Operand::c32(1u))
7844 Temp lane_id = emit_mbcnt(ctx, bld.tmp(v1));
7845 Temp cluster_offset = bld.vop2(aco_opcode::v_and_b32, bld.def(v1),
7850 tmp = bld.sop2(Builder::s_orn2, bld.def(bld.lm), bld.def(s1, scc), src,
7851 Operand(exec, bld.lm));
7854 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
7859 tmp = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), tmp, cluster_offset);
7861 tmp = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), cluster_offset, tmp);
7863 tmp = bld.vop2_e64(aco_opcode::v_lshrrev_b32, bld.def(v1), cluster_offset, tmp);
7866 tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(cluster_mask), tmp);
7869 return bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm), Operand::c32(cluster_mask),
7872 return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), tmp);
7874 tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u),
7875 bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), tmp, Operand::zero()));
7876 return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), tmp);
7886 Builder bld(ctx->program, ctx->block);
7887 assert(src.regClass() == bld.lm);
7896 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src);
7898 tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
7900 Temp mbcnt = emit_mbcnt(ctx, bld.tmp(v1), Operand(tmp));
7903 return bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm), Operand::zero(), mbcnt);
7905 return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), mbcnt);
7907 return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(),
7908 bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), mbcnt));
7917 Builder bld(ctx->program, ctx->block);
7925 return bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), tmp, src);
7927 return bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(s1, scc), tmp, src);
7929 return bld.sop2(Builder::s_xor, bld.def(bld.lm), bld.def(s1, scc), tmp, src);
7969 Builder bld(ctx->program, ctx->block);
7973 bld.pseudo(aco_opcode::p_as_uniform, dst, src);
7975 bld.copy(dst, src);
7981 Builder bld(ctx->program, ctx->block);
7986 Temp tmp = dst.regClass() == s1 ? bld.tmp(RegClass::get(RegType::vgpr, src.ssa->bit_size / 8))
7990 count = bld.vop1(aco_opcode::v_cvt_f16_u16, bld.def(v2b), count);
7991 bld.vop2(aco_opcode::v_mul_f16, Definition(tmp), count, src_tmp);
7994 count = bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), count);
7995 bld.vop2(aco_opcode::v_mul_f32, Definition(tmp), count, src_tmp);
7999 bld.pseudo(aco_opcode::p_as_uniform, dst, tmp);
8005 src_tmp = bld.as_uniform(src_tmp);
8009 bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), count, Operand::c32(1u));
8011 count = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), count);
8017 bld.pseudo(aco_opcode::p_extract_vector, dst, count, Operand::zero());
8019 bld.copy(dst, count);
8021 bld.copy(dst, Operand::zero(dst.bytes()));
8023 bld.v_mul_imm(dst, count, nir_src_as_uint(src));
8025 bld.sop2(aco_opcode::s_mul_i32, dst, src_tmp, count);
8027 bld.vop3(aco_opcode::v_mul_lo_u16_e64, dst, src_tmp, count);
8029 bld.vop2(aco_opcode::v_mul_lo_u16, dst, src_tmp, count);
8031 bld.vop3(aco_opcode::v_mul_lo_u32, dst, src_tmp, count);
8033 bld.sop2(aco_opcode::s_mul_i32, dst, src_tmp, count);
8045 Builder bld(ctx->program, ctx->block);
8052 bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), Operand(exec, bld.lm));
8065 Builder bld(ctx->program, ctx->block);
8079 packed_tid = emit_mbcnt(ctx, bld.tmp(v1), Operand(exec, bld.lm), Operand::c32(1u));
8081 packed_tid = emit_mbcnt(ctx, bld.tmp(v1), Operand(exec, bld.lm));
8096 Temp lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm));
8100 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8101 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8106 bld.writelane(bld.def(v1), bld.copy(bld.def(s1, m0), Operand::c32(identity_lo)), lane, lo);
8108 bld.writelane(bld.def(v1), bld.copy(bld.def(s1, m0), Operand::c32(identity_hi)), lane, hi);
8109 bld.pseudo(aco_opcode::p_create_vector, dst, lo, hi);
8112 bld.writelane(dst, bld.copy(bld.def(s1, m0), Operand::c32(identity)), lane,
8126 Builder bld(ctx->program, ctx->block);
8131 defs[num_defs++] = bld.def(bld.lm); /* used internally to save/restore exec */
8143 defs[num_defs++] = bld.def(RegType::sgpr, dst.size());
8146 defs[num_defs++] = bld.def(s1, scc);
8158 defs[num_defs++] = bld.def(bld.lm, vcc);
8170 bld.insert(std::move(reduce));
8178 Builder bld(ctx->program, ctx->block);
8189 Temp tl_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), p1, dpp_ctrl0);
8190 ddx_1 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), p1, tl_1, dpp_ctrl1);
8191 ddy_1 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), p1, tl_1, dpp_ctrl2);
8192 Temp tl_2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), p2, dpp_ctrl0);
8193 ddx_2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), p2, tl_2, dpp_ctrl1);
8194 ddy_2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), p2, tl_2, dpp_ctrl2);
8196 Temp tl_1 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p1, (1 << 15) | dpp_ctrl0);
8197 ddx_1 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p1, (1 << 15) | dpp_ctrl1);
8198 ddx_1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), ddx_1, tl_1);
8199 ddy_1 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p1, (1 << 15) | dpp_ctrl2);
8200 ddy_1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), ddy_1, tl_1);
8202 Temp tl_2 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p2, (1 << 15) | dpp_ctrl0);
8203 ddx_2 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p2, (1 << 15) | dpp_ctrl1);
8204 ddx_2 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), ddx_2, tl_2);
8205 ddy_2 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p2, (1 << 15) | dpp_ctrl2);
8206 ddy_2 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), ddy_2, tl_2);
8212 Temp tmp1 = bld.vop3(mad, bld.def(v1), ddx_1, pos1, p1);
8213 Temp tmp2 = bld.vop3(mad, bld.def(v1), ddx_2, pos1, p2);
8214 tmp1 = bld.vop3(mad, bld.def(v1), ddy_1, pos2, tmp1);
8215 tmp2 = bld.vop3(mad, bld.def(v1), ddy_2, pos2, tmp2);
8216 Temp wqm1 = bld.tmp(v1);
8217 emit_wqm(bld, tmp1, wqm1, true);
8218 Temp wqm2 = bld.tmp(v1);
8219 emit_wqm(bld, tmp2, wqm2, true);
8220 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), wqm1, wqm2);
8249 Builder bld(ctx->program, ctx->block);
8258 bld.copy(Definition(dst), bary);
8266 bld.copy(Definition(dst), model);
8282 bld.copy(Definition(dst), bary);
8298 offset = bld.sop2(aco_opcode::s_lshl3_add_u32, bld.def(s1), bld.def(s1, scc), addr,
8301 offset = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), addr,
8303 offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
8307 Operand off = bld.copy(bld.def(s1), Operand(offset));
8309 bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, off);
8312 addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
8313 sample_pos = bld.global(aco_opcode::global_load_dwordx2, bld.def(v2), addr,
8317 Temp tmp0 = bld.tmp(s1);
8318 Temp tmp1 = bld.tmp(s1);
8319 bld.pseudo(aco_opcode::p_split_vector, Definition(tmp0), Definition(tmp1),
8321 Definition scc_tmp = bld.def(s1, scc);
8322 tmp0 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), scc_tmp, tmp0,
8324 tmp1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), tmp1,
8325 Operand::zero(), bld.scc(scc_tmp.getTemp()));
8326 addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
8327 Temp pck0 = bld.tmp(v1);
8328 Temp carry = bld.vadd32(Definition(pck0), tmp0, addr, true).def(1).getTemp();
8330 Temp pck1 = bld.vop2_e64(aco_opcode::v_addc_co_u32, bld.def(v1), bld.def(bld.lm), tmp1,
8332 addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), pck0, pck1);
8335 sample_pos = bld.flat(aco_opcode::flat_load_dwordx2, bld.def(v2), addr, Operand(s1));
8341 Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
8344 addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
8345 addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), addr, Operand::zero());
8347 sample_pos = bld.tmp(v2);
8365 Temp pos1 = bld.tmp(RegClass(sample_pos.type(), 1));
8366 Temp pos2 = bld.tmp(RegClass(sample_pos.type(), 1));
8367 bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), sample_pos);
8368 pos1 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos1, Operand::c32(0x3f000000u));
8369 pos2 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos2, Operand::c32(0x3f000000u));
8377 Temp pos1 = bld.tmp(rc), pos2 = bld.tmp(rc);
8378 bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), offset);
8384 bld.vopc(aco_opcode::v_cmp_lg_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8390 bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.view_index)));
8403 bld.pseudo(
8405 posx.id() ? bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), posx) : Operand::zero(),
8406 posy.id() ? bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), posy) : Operand::zero());
8489 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.num_work_groups));
8493 bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
8494 bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), addr, Operand::zero()),
8495 bld.smem(aco_opcode::s_load_dword, bld.def(s1), addr, Operand::c32(8)));
8504 bld.copy(Definition(dst), Operand(addr));
8514 local_ids[i] = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
8519 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), local_ids[0], local_ids[1],
8522 bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.local_invocation_ids)));
8531 bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
8546 bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
8549 Temp temp = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), wave_id,
8551 Temp thread_id = emit_mbcnt(ctx, bld.tmp(v1));
8553 bld.vadd32(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), temp, thread_id);
8555 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8560 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), thread_id_in_threadgroup(ctx));
8567 Temp id = emit_mbcnt(ctx, bld.tmp(v1));
8575 Temp tg_num = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
8577 bld.vop2(aco_opcode::v_or_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), tg_num,
8582 bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
8584 bld.vop3(aco_opcode::v_lshl_or_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8591 bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8592 bld.def(s1, scc), get_arg(ctx, ctx->args->ac.tg_size),
8596 bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8597 bld.def(s1, scc), get_arg(ctx, ctx->args->ac.merged_wave_info),
8600 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand::zero());
8610 bld.sop2(aco_opcode::s_and_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8611 bld.def(s1, scc), Operand::c32(0x3fu), get_arg(ctx, ctx->args->ac.tg_size));
8613 bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8614 bld.def(s1, scc), get_arg(ctx, ctx->args->ac.merged_wave_info),
8617 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand::c32(0x1u));
8625 assert(src.regClass() == bld.lm);
8627 src = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src);
8629 src = bld.vopc(aco_opcode::v_cmp_lg_u64, bld.def(bld.lm), Operand::zero(), src);
8636 src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
8637 if (dst.size() != bld.lm.size()) {
8640 bld.pseudo(aco_opcode::p_create_vector, bld.def(dst.regClass()), src, Operand::zero());
8643 emit_wqm(bld, src, dst);
8655 tid = bld.as_uniform(tid);
8662 Temp tmp = bld.tmp(v1);
8663 tmp = emit_wqm(bld, emit_bpermute(ctx, bld, tid, src), tmp);
8665 bld.pseudo(aco_opcode::p_split_vector, Definition(dst),
8666 bld.def(src.regClass() == v1b ? v3b : v2b), tmp);
8668 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
8670 emit_wqm(bld, emit_bpermute(ctx, bld, tid, src), dst);
8672 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8673 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8674 lo = emit_wqm(bld, emit_bpermute(ctx, bld, tid, lo));
8675 hi = emit_wqm(bld, emit_bpermute(ctx, bld, tid, hi));
8676 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
8679 assert(src.regClass() == bld.lm);
8680 Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src, tid);
8681 bool_to_vector_condition(ctx, emit_wqm(bld, tmp), dst);
8683 assert(src.regClass() == bld.lm);
8686 tmp = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), src, tid);
8688 tmp = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), tid, src);
8690 tmp = bld.vop2_e64(aco_opcode::v_lshrrev_b32, bld.def(v1), tid, src);
8692 tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), tmp);
8693 emit_wqm(bld, bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), tmp),
8702 bld.vop3(aco_opcode::v_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8710 emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), src), dst);
8712 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8713 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8714 lo = emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), lo));
8715 hi = emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), hi));
8716 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
8719 assert(src.regClass() == bld.lm);
8720 Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src,
8721 bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)));
8722 bool_to_vector_condition(ctx, emit_wqm(bld, tmp), dst);
8724 bld.copy(Definition(dst), src);
8731 assert(src.regClass() == bld.lm);
8732 assert(dst.regClass() == bld.lm);
8735 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src)
8738 Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
8739 bld.sop1(Builder::s_not, Definition(dst), bld.def(s1, scc), cond);
8745 assert(src.regClass() == bld.lm);
8746 assert(dst.regClass() == bld.lm);
8749 bool_to_vector_condition(ctx, emit_wqm(bld, tmp), dst);
8791 emit_wqm(bld, emit_boolean_reduce(ctx, op, cluster_size, src), dst);
8794 emit_wqm(bld, emit_boolean_exclusive_scan(ctx, op, src), dst);
8797 emit_wqm(bld, emit_boolean_inclusive_scan(ctx, op, src), dst);
8802 bld.copy(Definition(dst), src);
8819 bld.def(dst.regClass()), src);
8820 emit_wqm(bld, tmp_dst, dst);
8861 src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
8868 tmp = bld.tmp(v1);
8870 tmp = bld.tmp(dst.regClass());
8874 assert(src.regClass() == bld.lm && tmp.regClass() == bld.lm);
8877 Operand mask_tmp = bld.lm.bytes() == 4
8879 : bld.pseudo(aco_opcode::p_create_vector, bld.def(bld.lm),
8883 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
8884 src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp, src);
8885 bld.sop1(Builder::s_wqm, Definition(tmp), src);
8888 Definition def = excess_bytes ? bld.def(v1) : Definition(tmp);
8891 bld.vop1_dpp(aco_opcode::v_mov_b32, def, src, dpp_ctrl);
8893 bld.ds(aco_opcode::ds_swizzle_b32, def, src, (1 << 15) | dpp_ctrl);
8896 bld.pseudo(aco_opcode::p_split_vector, Definition(tmp),
8897 bld.def(RegClass::get(tmp.type(), excess_bytes)), def.getTemp());
8899 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8900 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8903 lo = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl);
8904 hi = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), hi, dpp_ctrl);
8906 lo = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), lo, (1 << 15) | dpp_ctrl);
8907 hi = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), hi, (1 << 15) | dpp_ctrl);
8910 bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), lo, hi);
8918 tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), tmp);
8921 emit_wqm(bld, tmp, dst, true);
8939 assert(src.regClass() == bld.lm);
8940 src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
8942 src = emit_masked_swizzle(ctx, bld, src, mask);
8943 Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src);
8944 emit_wqm(bld, tmp, dst);
8946 Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
8949 Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
8952 emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask), dst);
8954 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8955 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8956 lo = emit_wqm(bld, emit_masked_swizzle(ctx, bld, lo, mask));
8957 hi = emit_wqm(bld, emit_masked_swizzle(ctx, bld, hi, mask));
8958 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
8967 Temp val = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
8968 Temp lane = bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa));
8972 emit_wqm(bld, bld.writelane(bld.def(v1), val, lane, src), dst);
8974 Temp src_lo = bld.tmp(v1), src_hi = bld.tmp(v1);
8975 Temp val_lo = bld.tmp(s1), val_hi = bld.tmp(s1);
8976 bld.pseudo(aco_opcode::p_split_vector, Definition(src_lo), Definition(src_hi), src);
8977 bld.pseudo(aco_opcode::p_split_vector, Definition(val_lo), Definition(val_hi), val);
8978 Temp lo = emit_wqm(bld, bld.writelane(bld.def(v1), val_lo, lane, src_hi));
8979 Temp hi = emit_wqm(bld, bld.writelane(bld.def(v1), val_hi, lane, src_hi));
8980 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
8992 src = emit_extract_vector(ctx, src, 0, RegClass(src.type(), bld.lm.size()));
8993 Temp wqm_tmp = emit_mbcnt(ctx, bld.tmp(v1), Operand(src), Operand(add_src));
8994 emit_wqm(bld, wqm_tmp, dst);
9001 bld.vop3(aco_opcode::v_perm_b32, Definition(dst), get_ssa_temp(ctx, instr->src[0].ssa),
9012 bld.copy(Definition(dst), src);
9014 bld.vop3(aco_opcode::v_permlane16_b32, Definition(dst), src,
9015 bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)),
9016 bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa)));
9027 bld.pseudo(aco_opcode::p_is_helper, Definition(dst), Operand(exec, bld.lm));
9033 bld.pseudo(aco_opcode::p_demote_to_helper, Operand::c32(-1u));
9042 assert(src.regClass() == bld.lm);
9044 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
9045 bld.pseudo(aco_opcode::p_demote_to_helper, cond);
9061 assert(src.regClass() == bld.lm);
9063 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
9066 bld.pseudo(aco_opcode::p_discard_if, cond);
9075 emit_wqm(bld, bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)),
9080 Temp flbit = bld.sop1(Builder::s_flbit_i32, bld.def(s1), Operand(exec, bld.lm));
9081 Temp last = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc),
9083 emit_wqm(bld, last, get_ssa_temp(ctx, &instr->dest.ssa));
9091 Temp elected = bld.pseudo(aco_opcode::p_elect, bld.def(bld.lm), Operand(exec, bld.lm));
9092 emit_wqm(bld, elected, get_ssa_temp(ctx, &instr->dest.ssa));
9101 Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29);
9102 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), clock, Operand::zero());
9107 bld.smem(opcode, Definition(dst), memory_sync_info(0, semantic_volatile));
9114 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.vertex_id));
9119 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.base_vertex));
9124 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.start_instance));
9129 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.instance_id));
9134 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.draw_id));
9142 bld.vop2_e64(aco_opcode::v_and_b32, Definition(dst), Operand::c32(127u),
9145 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.gs_invocation_id));
9147 bld.vop3(aco_opcode::v_bfe_u32, Definition(dst), get_arg(ctx, ctx->args->ac.tcs_rel_ids),
9160 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.gs_prim_id));
9163 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.tcs_patch_id));
9166 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.tes_patch_id));
9172 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.gs_prim_id));
9175 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.vs_prim_id));
9191 bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1,
9205 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), merged_wave_info_to_mask(ctx, i));
9229 Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), bld.as_uniform(m0_val)));
9230 bld.ds(aco_opcode::ds_add_u32, as_vgpr(ctx, gds_addr), as_vgpr(ctx, store_val), m, 0u, 0u,
9238 bld.copy(Definition(dst), Operand(addr));
9256 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
9267 bld.copy(Definition(dst), src);
9283 Builder bld(ctx->program, ctx->block);
9294 Temp is_ma_positive = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), Operand::zero(), ma);
9295 Temp sgn_ma = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, one, is_ma_positive);
9296 Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::zero(), sgn_ma);
9298 Temp is_ma_z = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), four, id);
9299 Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), two, id);
9300 is_ma_y = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), is_ma_y, is_ma_z);
9302 bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), is_ma_z, is_ma_y);
9305 Temp tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_z, deriv_x, is_not_ma_x);
9306 Temp sgn = bld.vop2_e64(
9307 aco_opcode::v_cndmask_b32, bld.def(v1),
9308 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_sgn_ma, sgn_ma, is_ma_z), one, is_ma_y);
9309 *out_sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn);
9312 tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_y, deriv_z, is_ma_y);
9313 sgn = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, sgn_ma, is_ma_y);
9314 *out_tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn);
9317 tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
9318 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_x, deriv_y, is_ma_y),
9320 tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffffu), tmp);
9321 *out_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), two, tmp);
9328 Builder bld(ctx->program, ctx->block);
9337 coords[3] = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), coords[3]);
9339 ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
9345 Temp invma = bld.tmp(v1);
9349 sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
9351 sc = bld.vop2(madak, bld.def(v1), sc, invma, Operand::c32(0x3fc00000u /*1.5*/));
9353 tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
9355 tc = bld.vop2(madak, bld.def(v1), tc, invma, Operand::c32(0x3fc00000u /*1.5*/));
9357 id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coords[0], coords[1], coords[2]);
9360 sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, invma);
9361 tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, invma);
9369 deriv_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, invma);
9371 Temp x = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
9372 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_sc, invma),
9373 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, sc));
9374 Temp y = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
9375 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_tc, invma),
9376 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, tc));
9377 *(i ? ddy : ddx) = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), x, y);
9380 sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), sc);
9381 tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), tc);
9385 id = bld.vop2(madmk, bld.def(v1), coords[3], id, Operand::c32(0x41000000u /*8.0*/));
9413 Builder bld(ctx->program, ctx->block);
9427 resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
9430 sampler = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
9540 acc = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), acc,
9544 acc = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), acc,
9551 pack = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), pack, acc);
9556 pack = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc),
9564 acc = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x3Fu), acc);
9567 acc = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(8u * i), acc);
9573 pack = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), pack, acc);
9578 pack = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(pack_const), pack);
9581 offset = bld.copy(bld.def(v1), Operand::c32(pack_const));
9604 unpacked_coord.insert(std::next(unpacked_coord.begin()), bld.copy(bld.def(rc), coord2d));
9636 Temp zero = bld.copy(bld.def(rc), Operand::zero(rc.bytes()));
9669 tmp_dst = bld.tmp(instr->is_sparse ? v5 : (d16 ? v2 : v4));
9671 tmp_dst = bld.tmp(v1);
9675 tmp_dst = bld.tmp(RegClass::get(RegType::vgpr, bytes));
9680 lod = bld.copy(bld.def(v1), Operand::zero());
9682 MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, Definition(tmp_dst),
9702 Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero());
9703 Temp size = bld.tmp(v2);
9704 MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, Definition(size),
9714 half_texel[i] = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), half_texel[i]);
9715 half_texel[i] = bld.vop1(aco_opcode::v_rcp_iflag_f32, bld.def(v1), half_texel[i]);
9716 half_texel[i] = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1),
9730 bld.sopc(aco_opcode::s_bitcmp0_b32, bld.def(s1, scc), sampler, Operand::c32(bit_idx));
9733 half_texel[0] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
9735 half_texel[1] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
9739 Temp new_coords[2] = {bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[0], half_texel[0]),
9740 bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[1], half_texel[1])};
9749 desc[i] = bld.tmp(s1);
9754 Temp dfmt = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), desc[1],
9756 Temp compare_cube_wa = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), dfmt,
9761 nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1),
9763 Operand::c32(V_008F14_IMG_NUM_FORMAT_UINT), bld.scc(compare_cube_wa));
9765 nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1),
9767 Operand::c32(V_008F14_IMG_NUM_FORMAT_SINT), bld.scc(compare_cube_wa));
9769 tg4_compare_cube_wa64 = bld.tmp(bld.lm);
9772 nfmt = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), nfmt,
9775 desc[1] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), desc[1],
9777 desc[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), desc[1], nfmt);
9783 resource = bld.tmp(resource.regClass());
9787 new_coords[0] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), new_coords[0], coords[0],
9789 new_coords[1] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), new_coords[1], coords[1],
9829 mubuf->operands[3] = emit_tfe_init(bld, tmp_dst);
9859 Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
9861 emit_mimg(bld, op, Definition(tmp_dst), resource, Operand(s4), args, 0, vdata);
9879 Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
9881 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst),
9882 bld.as_uniform(tmp_dst), Operand::c32(0x76543210),
9883 bld.scc(is_not_null));
9885 Temp is_not_null = bld.tmp(bld.lm);
9886 bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
9888 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst),
9889 bld.copy(bld.def(v1), Operand::c32(0x76543210)), tmp_dst, is_not_null);
10025 bool implicit_derivs = bld.program->stage == fragment_fs && !has_derivs && !has_lod &&
10029 Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
10030 MIMG_instruction* tex = emit_mimg(bld, opcode, Definition(tmp_dst), resource, Operand(sampler),
10049 cvt_val = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), val[i]);
10051 cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]);
10052 val[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), val[i], cvt_val,
10056 Temp tmp = dst.regClass() == tmp_dst.regClass() ? dst : bld.tmp(tmp_dst.regClass());
10058 tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), val[0], val[1], val[2],
10061 tmp_dst = bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), val[0], val[1], val[2],
10208 Builder bld(ctx->program, ctx->block);
10209 bld.branch(aco_opcode::p_branch, bld.def(s2));
10237 Builder bld(ctx->program, ctx->block);
10252 bld.reset(break_block);
10253 bld.branch(aco_opcode::p_branch, bld.def(s2));
10259 bld.reset(continue_block);
10260 bld.branch(aco_opcode::p_branch, bld.def(s2));
10275 bld.reset(ctx->block);
10276 bld.branch(aco_opcode::p_branch, bld.def(s2));
10320 Builder bld(ctx->program, ctx->block);
10335 bld.branch(aco_opcode::p_branch, bld.def(s2));
10349 bld.branch(aco_opcode::p_branch, bld.def(s2));
10366 bld.branch(aco_opcode::p_branch, bld.def(s2));
10374 bld.reset(break_block);
10375 bld.branch(aco_opcode::p_branch, bld.def(s2));
10489 Builder bld(ctx->program, ctx->block);
10490 Temp cond = bld.copy(bld.def(s1, scc), Operand::zero());
10798 Builder bld(ctx->program, ctx->block);
10947 Builder bld(ctx->program, ctx->block);
10949 Temp out = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u),
10952 out = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand(out), exp->operands[2]);
11029 Builder bld(ctx->program, ctx->block);
11038 Temp ch2 = bld.copy(bld.def(v1), Operand::c32(0));
11044 ch2 = bld.vop3(aco_opcode::v_lshl_or_b32, bld.def(v1), tmp, Operand::c32(17), ch2);
11049 ch2 = bld.vop3(aco_opcode::v_lshl_or_b32, bld.def(v1), tmp, Operand::c32(20), ch2);
11054 ch2 = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), tmp, ch2);
11059 bld.exp(aco_opcode::exp, prim_ch1, prim_ch2, Operand(v1), Operand(v1),
11077 Builder bld(ctx->program, ctx->block);
11094 values[0] = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u), values[0]);
11110 bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u), mrtz_alpha);
11113 values[1] = bld.vop3(aco_opcode::v_and_or_b32, bld.def(v1), values[1],
11152 bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3], enabled_channels,
11174 Builder bld(ctx->program, ctx->block);
11194 Temp isnan = bld.vopc(aco_opcode::v_cmp_class_f32, bld.def(bld.lm), values[i],
11195 bld.copy(bld.def(v1), Operand::c32(3u)));
11196 values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), values[i],
11197 bld.copy(bld.def(v1), Operand::zero()), isnan);
11225 bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, bld.def(v1),
11230 bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, bld.def(v1),
11274 values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]);
11295 values[i] = bld.vop2(aco_opcode::v_min_i32, bld.def(v1), Operand::c32(max), values[i]);
11296 values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]);
11317 values[i] = bld.vop3(
11318 compr_op, bld.def(v1), values[i * 2].isUndefined() ? Operand::zero() : values[i * 2],
11340 bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3], enabled_channels, target,
11352 Builder bld(ctx->program, ctx->block);
11355 bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1),
11362 Builder bld(ctx->program, ctx->block);
11386 chan = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), chan);
11389 Temp tmp = convert_int(ctx, bld, chan.getTemp(), 16, 32, sign_ext);
11412 Builder bld(ctx->program, ctx->block);
11490 Builder bld(ctx->program, ctx->block);
11492 bld.vadd32(bld.def(v1), Operand::c32(offset), Operand(so_write_offset[buf]));
11507 Builder bld(ctx->program, ctx->block);
11510 bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
11513 Temp tid = emit_mbcnt(ctx, bld.tmp(v1));
11515 Temp can_emit = bld.vopc(aco_opcode::v_cmp_gt_i32, bld.def(bld.lm), so_vtx_count, tid);
11520 bld.reset(ctx->block);
11523 bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->ac.streamout_write_index), tid);
11534 so_buffers[i] = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), buf_ptr,
11535 bld.copy(bld.def(s1), Operand::c32(i * 16u)));
11538 Temp offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
11541 Temp new_offset = bld.vadd32(bld.def(v1), offset, tid);
11544 bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), new_offset);
11546 Temp offset = bld.v_mul_imm(bld.def(v1), so_write_index, stride * 4u);
11547 Temp offset2 = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(4u),
11549 so_write_offset[i] = bld.vadd32(bld.def(v1), offset, offset2);
11617 Builder bld(ctx->program, ctx->block);
11618 bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
11642 Builder bld(ctx->program, ctx->block);
11644 Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
11652 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), get_arg(ctx, ctx->args->ac.vertex_id),
11655 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), get_arg(ctx, ctx->args->ac.tcs_rel_ids),
11658 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), get_arg(ctx, ctx->args->ac.tcs_patch_id),
11684 Builder bld(ctx->program, ctx->block);
11697 Temp sel = bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.def(bld.lm),
11708 bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), persp_centroid, persp_center, sel);
11710 ctx->persp_centroid = bld.tmp(v2);
11711 bld.pseudo(aco_opcode::p_create_vector, Definition(ctx->persp_centroid),
11723 new_coord[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), linear_centroid,
11726 ctx->linear_centroid = bld.tmp(v2);
11727 bld.pseudo(aco_opcode::p_create_vector, Definition(ctx->linear_centroid),
11806 Builder bld(ctx->program, ctx->block);
11807 Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand::zero());
11816 Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count,
11819 bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand::c32(-1u), mask, bld.scc(active_64));
11823 cond = emit_extract_vector(ctx, mask, 0, bld.lm);
11832 Builder bld(ctx->program, ctx->block);
11837 : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
11848 Builder bld(ctx->program, ctx->block);
11856 prm_cnt_0 = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), prm_cnt, Operand::zero());
11857 prm_cnt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(1u), prm_cnt,
11858 bld.scc(prm_cnt_0));
11859 vtx_cnt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(1u), vtx_cnt,
11860 bld.scc(prm_cnt_0));
11865 bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), prm_cnt, Operand::c32(12u));
11866 tmp = bld.sop2(aco_opcode::s_or_b32, bld.m0(bld.def(s1)), bld.def(s1, scc), tmp, vtx_cnt);
11871 bld.sopp(aco_opcode::s_sendmsg, bld.m0(tmp), -1, sendmsg_gs_alloc_req);
11878 Temp first_lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm));
11879 Temp cond = bld.sop2(Builder::s_lshl, bld.def(bld.lm), bld.def(s1, scc),
11881 cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), cond,
11882 Operand::zero(ctx->program->wave_size == 64 ? 8 : 4), bld.scc(prm_cnt_0));
11886 bld.reset(ctx->block);
11890 Temp zero = bld.copy(bld.def(v1), Operand::zero());
11892 Temp nan_coord = bld.copy(bld.def(v1), Operand::c32(-1u));
11894 bld.exp(aco_opcode::exp, zero, Operand(v1), Operand(v1), Operand(v1), 1 /* enabled mask */,
11897 bld.exp(aco_opcode::exp, nan_coord, nan_coord, nan_coord, nan_coord, 0xf /* enabled mask */,
11903 bld.reset(ctx->block);
11967 Builder bld(ctx.program, ctx.block);
11980 bld.barrier(aco_opcode::p_barrier,
11985 ctx.gs_wave_id = bld.pseudo(aco_opcode::p_extract, bld.def(s1, m0), bld.def(s1, scc),
12003 Builder bld(ctx.program, ctx.block);
12004 bld.barrier(aco_opcode::p_barrier,
12006 bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx.gs_wave_id), -1,
12032 Builder bld(ctx.program, ctx.block);
12033 bld.sopp(aco_opcode::s_endpgm);
12051 Builder bld(ctx.program, ctx.block);
12053 Temp gsvs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4),
12058 stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
12061 Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u),
12078 bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand::c32(stream));
12081 bld.reset(ctx.block);
12095 Temp val = bld.tmp(v1);
12109 bld.reset(ctx.block);
12118 bld.reset(ctx.block);
12131 bld.reset(ctx.block);
12132 bld.sopp(aco_opcode::s_endpgm);
12162 Builder bld(ctx.program, ctx.block);
12165 bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg{ttmp4}, s4), Operand(PhysReg{tma}, s2),
12169 bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(PhysReg{ttmp4}, s4), Operand::zero(),
12182 bld.sopk(aco_opcode::s_getreg_b32, Definition(PhysReg{ttmp8}, s1),
12185 bld.smem(aco_opcode::s_buffer_store_dword, Operand(PhysReg{ttmp4}, s4),
12193 bld.sopp(aco_opcode::s_endpgm);
12212 load_vb_descs(Builder& bld, PhysReg dest, Operand base, unsigned start, unsigned max)
12214 unsigned count = MIN2((bld.program->dev.sgpr_limit - dest.reg()) / 4u, max);
12217 if (bld.program->gfx_level >= GFX10 && num_loads > 1)
12218 bld.sopp(aco_opcode::s_clause, -1, num_loads - 1);
12224 bld.smem(aco_opcode::s_load_dwordx16, Definition(dest, s16), base,
12227 bld.smem(aco_opcode::s_load_dwordx8, Definition(dest, s8), base,
12230 bld.smem(aco_opcode::s_load_dwordx4, Definition(dest, s4), base,
12241 calc_nontrivial_instance_id(Builder& bld, const struct radv_shader_args* args, unsigned index,
12245 bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_sgpr, s2),
12250 bld.sopp(aco_opcode::s_waitcnt, -1, lgkm_imm.pack(bld.program->gfx_level));
12256 if (bld.program->gfx_level >= GFX8 && bld.program->gfx_level < GFX11) {
12258 if (bld.program->gfx_level < GFX9) {
12259 bld.vop1(aco_opcode::v_mov_b32, Definition(tmp_vgpr1, v1), div_info);
12263 bld.vop2(aco_opcode::v_lshrrev_b32, fetch_index_def, div_info, instance_id);
12266 if (bld.program->gfx_level >= GFX9)
12267 instr = bld.vop2_sdwa(aco_opcode::v_add_u32, fetch_index_def, div_info, fetch_index).instr;
12269 instr = bld.vop2_sdwa(aco_opcode::v_add_co_u32, fetch_index_def, Definition(vcc, bld.lm),
12274 bld.vop3(aco_opcode::v_mul_hi_u32, fetch_index_def, Operand(tmp_sgpr.advance(4), s1),
12278 bld.vop2_sdwa(aco_opcode::v_lshrrev_b32, fetch_index_def, div_info, fetch_index).instr;
12284 bld.vop2(aco_opcode::v_lshrrev_b32, fetch_index_def, div_info, instance_id);
12286 bld.vop3(aco_opcode::v_bfe_u32, tmp_def, div_info, Operand::c32(8u), Operand::c32(8u));
12287 bld.vadd32(fetch_index_def, tmp_op, fetch_index, false, Operand(s2), true);
12289 bld.vop3(aco_opcode::v_mul_hi_u32, fetch_index_def, fetch_index,
12292 bld.vop3(aco_opcode::v_bfe_u32, tmp_def, div_info, Operand::c32(16u), Operand::c32(8u));
12293 bld.vop2(aco_opcode::v_lshrrev_b32, fetch_index_def, tmp_op, fetch_index);
12296 bld.vadd32(fetch_index_def, start_instance, fetch_index, false, Operand(s2), true);
12322 Builder bld(program, block);
12326 bld.sopp(aco_opcode::s_setprio, -1u, 0x3u);
12351 bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1),
12354 bld.sopk(aco_opcode::s_movk_i32, Definition(vertex_buffers.advance(4), s1),
12357 bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers.advance(4), s1),
12370 load_vb_descs(bld, desc, Operand(vertex_buffers, s2), loc, key->num_attributes - loc);
12377 bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), count, Operand::c32(0u));
12379 bld.sopc(aco_opcode::s_bitcmp1_b32, Definition(scc, s1), count,
12381 bld.sop2(aco_opcode::s_cselect_b64, Definition(exec, s2), Operand::c64(UINT64_MAX),
12395 bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->ac.base_vertex),
12398 bld.vadd32(Definition(instance_index, v1), start_instance, instance_id, false,
12401 bld.vop1(aco_opcode::v_mov_b32, Definition(start_instance_vgpr, v1), start_instance);
12404 bld.sopp(aco_opcode::s_waitcnt, -1, lgkm_imm.pack(program->gfx_level));
12419 bld, args, index, instance_id, start_instance, prolog_input,
12445 bld.mubuf(aco_opcode::buffer_load_dword, Definition(dest.advance(j * 4u), v1),
12449 bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(dest.advance(j * 4u), v1),
12458 bld.vop1(aco_opcode::v_mov_b32, Definition(dest.advance(j * 4u), v1),
12462 bld.mubuf(aco_opcode::buffer_load_format_xyzw, Definition(dest, v4),
12471 bld.sopp(aco_opcode::s_waitcnt, -1, vm_imm.pack(program->gfx_level));
12484 bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1));
12493 bld.vop3(aco_opcode::v_bfe_i32, Definition(alpha, v1), Operand(alpha, v1),
12498 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1));
12499 bld.vop2(aco_opcode::v_max_f32, Definition(alpha, v1), Operand::c32(0xbf800000u),
12502 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1));
12511 bld.smem(aco_opcode::s_load_dwordx2, Definition(prolog_input, s2),
12513 bld.sopp(aco_opcode::s_waitcnt, -1, lgkm_imm.pack(program->gfx_level));
12517 bld.sop1(aco_opcode::s_setpc_b64, continue_pc);
12539 Builder bld(ctx.program, ctx.block);
12574 bld.reset(ctx.block);
12575 bld.sopp(aco_opcode::s_endpgm);