Lines Matching refs:dst

136 emit_mbcnt(isel_context* ctx, Temp dst, Operand mask = Operand(), Operand base = Operand::zero())
144 return bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, Definition(dst), mask_lo, base);
164 return bld.vop2(aco_opcode::v_mbcnt_hi_u32_b32, Definition(dst), mask_hi, mbcnt_lo);
166 return bld.vop3(aco_opcode::v_mbcnt_hi_u32_b32_e64, Definition(dst), mask_hi, mbcnt_lo);
170 emit_wqm(Builder& bld, Temp src, Temp dst = Temp(0, s1), bool program_needs_wqm = false)
173 if (!dst.id())
176 return bld.copy(Definition(dst), src);
177 } else if (!dst.id()) {
178 dst = bld.tmp(src.regClass());
181 assert(src.size() == dst.size());
182 bld.pseudo(aco_opcode::p_wqm, Definition(dst), src);
184 return dst;
289 emit_v_div_u32(isel_context* ctx, Temp dst, Temp a, uint32_t b)
295 bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(util_logbase2(b)), a);
309 bld.copy(Definition(dst), a);
315 pre_shift_dst = (increment || multiply || post_shift) ? bld.tmp(v1) : dst;
322 increment_dst = (post_shift || multiply) ? bld.tmp(v1) : dst;
328 multiply_dst = post_shift ? bld.tmp(v1) : dst;
334 bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(info.post_shift),
340 emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, Temp dst)
343 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::c32(idx));
375 Temp dst = bld.tmp(dst_rc);
376 emit_extract_vector(ctx, src, idx, dst);
377 return dst;
415 expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components, unsigned mask,
421 if (dst.type() == RegType::sgpr && num_components > dst.size()) {
424 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp_dst);
425 ctx->allocated_vec[dst.id()] = ctx->allocated_vec[tmp_dst.id()];
431 if (vec_src == dst)
435 if (dst.type() == RegType::sgpr)
436 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), vec_src);
438 bld.copy(Definition(dst), vec_src);
442 unsigned component_bytes = dst.bytes() / num_components;
444 RegClass dst_rc = RegClass::get(dst.type(), component_bytes);
445 assert(dst.type() == RegType::vgpr || !src_rc.is_subdword());
454 vec->definitions[0] = Definition(dst);
459 if (dst.type() == RegType::sgpr)
469 ctx->allocated_vec.emplace(dst.id(), elems);
474 byte_align_scalar(isel_context* ctx, Temp vec, Operand offset, Temp dst)
492 bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), vec, shift);
494 Temp tmp = dst.size() == 2 ? dst : bld.tmp(s2);
496 if (tmp == dst)
497 emit_split_vector(ctx, dst, 2);
499 emit_extract_vector(ctx, tmp, 0, dst);
519 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, mid);
520 emit_split_vector(ctx, dst, 2);
525 byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigned component_size)
543 for (unsigned i = 0; i < dst.size(); i++)
547 if (dst.size() == 2)
554 if (vec.regClass() == dst.regClass()) {
556 bld.copy(Definition(dst), vec);
557 emit_split_vector(ctx, dst, num_components);
570 if (dst.type() == RegType::vgpr) {
571 /* if dst is vgpr - split the src and create a shrunk version according to the mask. */
572 num_components = dst.bytes() / component_size;
577 create_vec->definitions[0] = Definition(dst);
581 /* if dst is sgpr - split the src, but move the original to sgpr. */
583 byte_align_scalar(ctx, vec, offset, dst);
585 assert(dst.size() == vec.size());
586 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), vec);
589 ctx->allocated_vec.emplace(dst.id(), elems);
604 bool_to_vector_condition(isel_context* ctx, Temp val, Temp dst = Temp(0, s2))
607 if (!dst.id())
608 dst = bld.tmp(bld.lm);
611 assert(dst.regClass() == bld.lm);
613 return bld.sop2(Builder::s_cselect, Definition(dst), Operand::c32(-1), Operand::zero(),
618 bool_to_scalar_condition(isel_context* ctx, Temp val, Temp dst = Temp(0, s1))
621 if (!dst.id())
622 dst = bld.tmp(s1);
625 assert(dst.regClass() == s1);
628 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.scc(Definition(dst)), val, Operand(exec, bld.lm));
629 return dst;
639 * If dst.bytes() is larger than dst_bits/8, the value of the upper bits is undefined.
643 bool sign_extend, Temp dst = Temp())
648 if (!dst.id()) {
650 dst = bld.tmp(src.type(), DIV_ROUND_UP(dst_bits, 32u));
652 dst = bld.tmp(RegClass(RegType::vgpr, dst_bits / 8u).as_subdword());
656 assert(dst.type() == RegType::sgpr || dst_bits == dst.bytes() * 8);
658 if (dst.bytes() == src.bytes() && dst_bits < src_bits) {
661 return bld.copy(Definition(dst), src);
662 } else if (dst.bytes() < src.bytes()) {
663 return bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::zero());
666 Temp tmp = dst;
682 if (sign_extend && dst.regClass() == s2) {
685 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
686 } else if (sign_extend && dst.regClass() == v2) {
688 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high);
690 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero());
694 return dst;
704 extract_8_16_bit_sgpr_element(isel_context* ctx, Temp dst, nir_alu_src* src, sgpr_extract_mode mode)
717 Temp tmp = dst.regClass() == s2 ? bld.tmp(s1) : dst;
726 if (dst.regClass() == s2)
727 convert_int(ctx, bld, tmp, 32, 64, mode == sgpr_extract_sext, dst);
729 return dst;
775 Temp dst = ctx->program->allocateTmp(RegClass(vec.type(), elem_size * size / 4));
776 vec_instr->definitions[0] = Definition(dst);
778 ctx->allocated_vec.emplace(dst.id(), elems);
779 return vec.type() == RegType::sgpr ? Builder(ctx->program, ctx->block).as_uniform(dst) : dst;
841 emit_sop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
848 sop2->definitions[0] = Definition(dst);
868 emit_vop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode opc, Temp dst,
900 assert(dst.size() == 1);
902 bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0x3f800000u), tmp);
905 bld.nuw().vop2(opc, Definition(dst), op[0], op[1]);
907 bld.vop2(opc, Definition(dst), op[0], op[1]);
913 emit_vop2_instruction_logic64(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
934 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
938 emit_vop3a_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
957 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1], src[2]);
959 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1]);
960 if (dst.size() == 1)
961 bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0x3f800000u), tmp);
963 bld.vop3(aco_opcode::v_mul_f64, Definition(dst), Operand::c64(0x3FF0000000000000), tmp);
965 bld.vop3(op, Definition(dst), src[0], src[1], src[2]);
967 bld.vop3(op, Definition(dst), src[0], src[1]);
972 emit_vop3p_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
989 Builder::Result res = bld.vop3p(op, Definition(dst), src0, src1, opsel_lo, opsel_hi);
994 emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool clamp)
1008 bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p().clamp = clamp;
1012 emit_vop1_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1016 if (dst.type() == RegType::sgpr)
1017 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst),
1018 bld.vop1(op, bld.def(RegType::vgpr, dst.size()), get_alu_src(ctx, instr->src[0])));
1020 bld.vop1(op, Definition(dst), get_alu_src(ctx, instr->src[0]));
1024 emit_vopc_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1064 bld.vopc(op, Definition(dst), src0, src1);
1068 emit_sopc_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1074 assert(dst.regClass() == bld.lm);
1082 bool_to_vector_condition(ctx, cmp, dst);
1086 emit_comparison(isel_context* ctx, nir_alu_instr* instr, Temp dst, aco_opcode v16_op,
1101 assert(dst.regClass() == ctx->program->lane_mask);
1104 emit_vopc_instruction(ctx, instr, op, dst);
1106 emit_sopc_instruction(ctx, instr, op, dst);
1111 Temp dst)
1117 assert(dst.regClass() == bld.lm);
1121 bld.sop2(op, Definition(dst), bld.def(s1, scc), src0, src1);
1125 emit_bcsel(isel_context* ctx, nir_alu_instr* instr, Temp dst)
1134 if (dst.type() == RegType::vgpr) {
1136 if (dst.size() == 1) {
1140 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), els, then, cond);
1141 } else if (dst.size() == 2) {
1150 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
1158 assert(dst.regClass() == bld.lm);
1164 if (dst.regClass() == s1 || dst.regClass() == s2) {
1167 assert(dst.size() == then.size());
1169 dst.regClass() == s1 ? aco_opcode::s_cselect_b32 : aco_opcode::s_cselect_b64;
1170 bld.sop2(op, Definition(dst), then, els, bld.scc(bool_to_scalar_condition(ctx, cond)));
1178 * this implements bcsel on bools: dst = s0 ? s1 : s2
1179 * are going to be: dst = (s0 & s1) | (~s0 & s2) */
1186 bld.copy(Definition(dst), then);
1188 bld.sop2(Builder::s_or, Definition(dst), bld.def(s1, scc), then,
1193 emit_scaled_op(isel_context* ctx, Builder& bld, Definition dst, Temp val, aco_opcode op,
1205 bld.vop2(aco_opcode::v_cndmask_b32, dst, not_scaled, scaled, is_denormal);
1209 emit_rcp(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1212 bld.vop1(aco_opcode::v_rcp_f32, dst, val);
1216 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_rcp_f32, 0x4b800000u);
1220 emit_rsq(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1223 bld.vop1(aco_opcode::v_rsq_f32, dst, val);
1227 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_rsq_f32, 0x45800000u);
1231 emit_sqrt(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1234 bld.vop1(aco_opcode::v_sqrt_f32, dst, val);
1238 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_sqrt_f32, 0x39800000u);
1242 emit_log2(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1245 bld.vop1(aco_opcode::v_log_f32, dst, val);
1249 emit_scaled_op(ctx, bld, dst, val, aco_opcode::v_log_f32, 0xc1c00000u);
1253 emit_trunc_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1256 return bld.vop1(aco_opcode::v_trunc_f64, Definition(dst), val);
1300 return bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst_lo, dst_hi);
1304 emit_floor_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1307 return bld.vop1(aco_opcode::v_floor_f64, Definition(dst), val);
1331 Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
1338 uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
1342 return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand::c32(-1),
1348 add = bld.vop2_e64(aco_opcode::v_add_u32, dst, src0, src1);
1350 add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.def(bld.lm), src0, src1);
1353 return dst.getTemp();
1357 usub32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
1361 return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, sub.def(0).getTemp(), Operand::c32(0u),
1367 sub = bld.vop2_e64(aco_opcode::v_sub_u32, dst, src0, src1);
1369 sub = bld.vop2_e64(aco_opcode::v_sub_co_u32, dst, bld.def(bld.lm), src0, src1);
1372 return dst.getTemp();
1379 isel_err(&instr->instr, "nir alu dst not in ssa");
1384 Temp dst = get_ssa_temp(ctx, &instr->dest.dest.ssa);
1397 if (instr->dest.dest.ssa.bit_size >= 32 || dst.type() == RegType::vgpr) {
1406 vec->definitions[0] = Definition(dst);
1408 ctx->allocated_vec.emplace(dst.id(), elems);
1440 for (unsigned i = 0; i < dst.size(); i++) {
1460 for (unsigned i = 0; i < dst.size(); i++) {
1468 if (dst.size() == 1)
1469 bld.copy(Definition(dst), packed[0]);
1470 else if (dst.size() == 2)
1471 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), packed[0], packed[1]);
1473 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), packed[0], packed[1],
1480 if (src.type() == RegType::vgpr && dst.type() == RegType::sgpr) {
1482 assert(src.size() == dst.size() && "wrong src or dst register class for nir_op_mov");
1483 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src);
1485 assert(src.bytes() == dst.bytes() && "wrong src or dst register class for nir_op_mov");
1486 bld.copy(Definition(dst), src);
1492 if (dst.regClass() == v1 || dst.regClass() == v2b || dst.regClass() == v1b) {
1493 emit_vop1_instruction(ctx, instr, aco_opcode::v_not_b32, dst);
1494 } else if (dst.regClass() == v2) {
1499 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
1500 } else if (dst.type() == RegType::sgpr) {
1501 aco_opcode opcode = dst.size() == 1 ? aco_opcode::s_not_b32 : aco_opcode::s_not_b64;
1502 bld.sop1(opcode, Definition(dst), bld.def(s1, scc), src);
1509 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1517 bld.vop3p(aco_opcode::v_pk_max_i16, Definition(dst), sub, src, opsel_lo, opsel_hi);
1521 if (dst.regClass() == s1) {
1522 bld.sop1(aco_opcode::s_abs_i32, Definition(dst), bld.def(s1, scc), src);
1523 } else if (dst.regClass() == v1) {
1524 bld.vop2(aco_opcode::v_max_i32, Definition(dst), src,
1526 } else if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
1528 aco_opcode::v_max_i16_e64, Definition(dst), src,
1530 } else if (dst.regClass() == v2b) {
1532 bld.vop2(aco_opcode::v_max_i16, Definition(dst), src,
1541 if (dst.regClass() == s1) {
1544 bld.sop2(aco_opcode::s_min_i32, Definition(dst), bld.def(s1, scc), tmp, Operand::c32(1u));
1545 } else if (dst.regClass() == s2) {
1557 bld.sop2(aco_opcode::s_or_b64, Definition(dst), bld.def(s1, scc), neg, bld.scc(neqz));
1558 } else if (dst.regClass() == v1) {
1559 bld.vop3(aco_opcode::v_med3_i32, Definition(dst), Operand::c32(-1), src, Operand::c32(1u));
1560 } else if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX9) {
1561 bld.vop3(aco_opcode::v_med3_i16, Definition(dst), Operand::c16(-1), src, Operand::c16(1u));
1562 } else if (dst.regClass() == v2b) {
1564 bld.vop2(aco_opcode::v_max_i16, Definition(dst), Operand::c16(-1),
1566 } else if (dst.regClass() == v2) {
1572 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
1579 if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
1580 emit_vop3a_instruction(ctx, instr, aco_opcode::v_max_i16_e64, dst);
1581 } else if (dst.regClass() == v2b) {
1582 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_i16, dst, true);
1583 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1584 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_max_i16, dst);
1585 } else if (dst.regClass() == v1) {
1586 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_i32, dst, true);
1587 } else if (dst.regClass() == s1) {
1588 emit_sop2_instruction(ctx, instr, aco_opcode::s_max_i32, dst, true);
1595 if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
1596 emit_vop3a_instruction(ctx, instr, aco_opcode::v_max_u16_e64, dst);
1597 } else if (dst.regClass() == v2b) {
1598 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_u16, dst, true);
1599 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1600 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_max_u16, dst);
1601 } else if (dst.regClass() == v1) {
1602 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_u32, dst, true);
1603 } else if (dst.regClass() == s1) {
1604 emit_sop2_instruction(ctx, instr, aco_opcode::s_max_u32, dst, true);
1611 if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
1612 emit_vop3a_instruction(ctx, instr, aco_opcode::v_min_i16_e64, dst);
1613 } else if (dst.regClass() == v2b) {
1614 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_i16, dst, true);
1615 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1616 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_min_i16, dst);
1617 } else if (dst.regClass() == v1) {
1618 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_i32, dst, true);
1619 } else if (dst.regClass() == s1) {
1620 emit_sop2_instruction(ctx, instr, aco_opcode::s_min_i32, dst, true);
1627 if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
1628 emit_vop3a_instruction(ctx, instr, aco_opcode::v_min_u16_e64, dst);
1629 } else if (dst.regClass() == v2b) {
1630 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_u16, dst, true);
1631 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1632 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_min_u16, dst);
1633 } else if (dst.regClass() == v1) {
1634 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_u32, dst, true);
1635 } else if (dst.regClass() == s1) {
1636 emit_sop2_instruction(ctx, instr, aco_opcode::s_min_u32, dst, true);
1644 emit_boolean_logic(ctx, instr, Builder::s_or, dst);
1645 } else if (dst.regClass() == v1 || dst.regClass() == v2b || dst.regClass() == v1b) {
1646 emit_vop2_instruction(ctx, instr, aco_opcode::v_or_b32, dst, true);
1647 } else if (dst.regClass() == v2) {
1648 emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_or_b32, dst);
1649 } else if (dst.regClass() == s1) {
1650 emit_sop2_instruction(ctx, instr, aco_opcode::s_or_b32, dst, true);
1651 } else if (dst.regClass() == s2) {
1652 emit_sop2_instruction(ctx, instr, aco_opcode::s_or_b64, dst, true);
1660 emit_boolean_logic(ctx, instr, Builder::s_and, dst);
1661 } else if (dst.regClass() == v1 || dst.regClass() == v2b || dst.regClass() == v1b) {
1662 emit_vop2_instruction(ctx, instr, aco_opcode::v_and_b32, dst, true);
1663 } else if (dst.regClass() == v2) {
1664 emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_and_b32, dst);
1665 } else if (dst.regClass() == s1) {
1666 emit_sop2_instruction(ctx, instr, aco_opcode::s_and_b32, dst, true);
1667 } else if (dst.regClass() == s2) {
1668 emit_sop2_instruction(ctx, instr, aco_opcode::s_and_b64, dst, true);
1676 emit_boolean_logic(ctx, instr, Builder::s_xor, dst);
1677 } else if (dst.regClass() == v1 || dst.regClass() == v2b || dst.regClass() == v1b) {
1678 emit_vop2_instruction(ctx, instr, aco_opcode::v_xor_b32, dst, true);
1679 } else if (dst.regClass() == v2) {
1680 emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_xor_b32, dst);
1681 } else if (dst.regClass() == s1) {
1682 emit_sop2_instruction(ctx, instr, aco_opcode::s_xor_b32, dst, true);
1683 } else if (dst.regClass() == s2) {
1684 emit_sop2_instruction(ctx, instr, aco_opcode::s_xor_b64, dst, true);
1691 if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
1692 emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshrrev_b16_e64, dst, false, 2, true);
1693 } else if (dst.regClass() == v2b) {
1694 emit_vop2_instruction(ctx, instr, aco_opcode::v_lshrrev_b16, dst, false, true);
1695 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1696 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_lshrrev_b16, dst, true);
1697 } else if (dst.regClass() == v1) {
1698 emit_vop2_instruction(ctx, instr, aco_opcode::v_lshrrev_b32, dst, false, true);
1699 } else if (dst.regClass() == v2 && ctx->program->gfx_level >= GFX8) {
1700 bld.vop3(aco_opcode::v_lshrrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1702 } else if (dst.regClass() == v2) {
1703 emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshr_b64, dst);
1704 } else if (dst.regClass() == s2) {
1705 emit_sop2_instruction(ctx, instr, aco_opcode::s_lshr_b64, dst, true);
1706 } else if (dst.regClass() == s1) {
1707 emit_sop2_instruction(ctx, instr, aco_opcode::s_lshr_b32, dst, true);
1714 if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
1715 emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshlrev_b16_e64, dst, false, 2, true);
1716 } else if (dst.regClass() == v2b) {
1717 emit_vop2_instruction(ctx, instr, aco_opcode::v_lshlrev_b16, dst, false, true);
1718 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1719 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_lshlrev_b16, dst, true);
1720 } else if (dst.regClass() == v1) {
1721 emit_vop2_instruction(ctx, instr, aco_opcode::v_lshlrev_b32, dst, false, true, false,
1723 } else if (dst.regClass() == v2 && ctx->program->gfx_level >= GFX8) {
1724 bld.vop3(aco_opcode::v_lshlrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1726 } else if (dst.regClass() == v2) {
1727 emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshl_b64, dst);
1728 } else if (dst.regClass() == s1) {
1729 emit_sop2_instruction(ctx, instr, aco_opcode::s_lshl_b32, dst, true, 1);
1730 } else if (dst.regClass() == s2) {
1731 emit_sop2_instruction(ctx, instr, aco_opcode::s_lshl_b64, dst, true);
1738 if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
1739 emit_vop3a_instruction(ctx, instr, aco_opcode::v_ashrrev_i16_e64, dst, false, 2, true);
1740 } else if (dst.regClass() == v2b) {
1741 emit_vop2_instruction(ctx, instr, aco_opcode::v_ashrrev_i16, dst, false, true);
1742 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1743 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_ashrrev_i16, dst, true);
1744 } else if (dst.regClass() == v1) {
1745 emit_vop2_instruction(ctx, instr, aco_opcode::v_ashrrev_i32, dst, false, true);
1746 } else if (dst.regClass() == v2 && ctx->program->gfx_level >= GFX8) {
1747 bld.vop3(aco_opcode::v_ashrrev_i64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1749 } else if (dst.regClass() == v2) {
1750 emit_vop3a_instruction(ctx, instr, aco_opcode::v_ashr_i64, dst);
1751 } else if (dst.regClass() == s1) {
1752 emit_sop2_instruction(ctx, instr, aco_opcode::s_ashr_i32, dst, true);
1753 } else if (dst.regClass() == s2) {
1754 emit_sop2_instruction(ctx, instr, aco_opcode::s_ashr_i64, dst, true);
1763 bld.sop1(aco_opcode::s_ff1_i32_b32, Definition(dst), src);
1765 emit_vop1_instruction(ctx, instr, aco_opcode::v_ffbl_b32, dst);
1767 bld.sop1(aco_opcode::s_ff1_i32_b64, Definition(dst), src);
1789 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), msb,
1799 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), msb, msb_rev, carry);
1817 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), msb, msb_rev, carry);
1827 bld.sop2(aco_opcode::s_min_u32, Definition(dst), Operand::c32(32u), msb_rev);
1830 bld.vop2(aco_opcode::v_min_u32, Definition(dst), Operand::c32(32u), msb_rev);
1837 if (dst.regClass() == s1) {
1838 bld.sop1(aco_opcode::s_brev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
1839 } else if (dst.regClass() == v1) {
1840 bld.vop1(aco_opcode::v_bfrev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
1847 if (dst.regClass() == s1) {
1848 emit_sop2_instruction(ctx, instr, aco_opcode::s_add_u32, dst, true);
1850 } else if (dst.bytes() <= 2 && ctx->program->gfx_level >= GFX10) {
1851 emit_vop3a_instruction(ctx, instr, aco_opcode::v_add_u16_e64, dst);
1853 } else if (dst.bytes() <= 2 && ctx->program->gfx_level >= GFX8) {
1854 emit_vop2_instruction(ctx, instr, aco_opcode::v_add_u16, dst, true);
1856 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1857 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
1863 if (dst.type() == RegType::vgpr && dst.bytes() <= 4) {
1864 bld.vadd32(Definition(dst), Operand(src0), Operand(src1));
1870 Temp src01 = bld.tmp(dst.type(), 1);
1873 Temp src11 = bld.tmp(dst.type(), 1);
1876 if (dst.regClass() == s2) {
1882 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
1883 } else if (dst.regClass() == v2) {
1887 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
1894 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1896 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
1902 if (dst.regClass() == s1) {
1905 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), tmp,
1908 } else if (dst.regClass() == v2b) {
1911 add_instr = bld.vop3(aco_opcode::v_add_u16_e64, Definition(dst), src0, src1).instr;
1916 bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
1920 } else if (dst.regClass() == v1) {
1921 uadd32_sat(bld, Definition(dst), src0, src1);
1934 if (dst.regClass() == s2) {
1945 bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c64(-1), no_sat,
1947 } else if (dst.regClass() == v2) {
1970 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
1977 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1979 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
1985 if (dst.regClass() == s1) {
1992 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bound, add, bld.scc(overflow));
1998 if (dst.regClass() == v2b) {
2000 bld.vop3(aco_opcode::v_add_i16, Definition(dst), src0, src1).instr;
2002 } else if (dst.regClass() == v1) {
2004 bld.vop3(aco_opcode::v_add_i32, Definition(dst), src0, src1).instr;
2014 if (dst.regClass() == s1) {
2015 bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(dst)), src0, src1);
2018 if (dst.regClass() == v1) {
2020 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), Operand::c32(1u),
2026 Temp src01 = bld.tmp(dst.type(), 1);
2029 Temp src11 = bld.tmp(dst.type(), 1);
2031 if (dst.regClass() == s2) {
2038 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand::zero());
2039 } else if (dst.regClass() == v2) {
2044 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand::zero());
2051 if (dst.regClass() == s1) {
2052 emit_sop2_instruction(ctx, instr, aco_opcode::s_sub_i32, dst, true);
2054 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2055 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_u16, dst);
2061 if (dst.regClass() == v1) {
2062 bld.vsub32(Definition(dst), src0, src1);
2064 } else if (dst.bytes() <= 2) {
2066 bld.vop3(aco_opcode::v_sub_u16_e64, Definition(dst), src0, src1);
2068 bld.vop2(aco_opcode::v_subrev_u16, Definition(dst), src1, as_vgpr(ctx, src0));
2070 bld.vop2(aco_opcode::v_sub_u16, Definition(dst), src0, as_vgpr(ctx, src1));
2072 bld.vsub32(Definition(dst), src0, src1);
2077 Temp src01 = bld.tmp(dst.type(), 1);
2080 Temp src11 = bld.tmp(dst.type(), 1);
2082 if (dst.regClass() == s2) {
2088 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
2089 } else if (dst.regClass() == v2) {
2093 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
2102 if (dst.regClass() == s1) {
2103 bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.scc(Definition(dst)), src0, src1);
2105 } else if (dst.regClass() == v1) {
2107 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), Operand::c32(1u),
2113 Temp src01 = bld.tmp(dst.type(), 1);
2116 Temp src11 = bld.tmp(dst.type(), 1);
2118 if (dst.regClass() == s2) {
2125 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand::zero());
2126 } else if (dst.regClass() == v2) {
2131 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand::zero());
2138 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2139 Instruction* sub_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_u16, dst);
2145 if (dst.regClass() == s1) {
2148 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(0), tmp, bld.scc(carry));
2150 } else if (dst.regClass() == v2b) {
2153 sub_instr = bld.vop3(aco_opcode::v_sub_u16_e64, Definition(dst), src0, src1).instr;
2160 sub_instr = bld.vop2_e64(op, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
2164 } else if (dst.regClass() == v1) {
2165 usub32_sat(bld, Definition(dst), src0, as_vgpr(ctx, src1));
2177 if (dst.regClass() == s2) {
2188 bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c64(0ull), no_sat,
2190 } else if (dst.regClass() == v2) {
2213 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
2220 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2221 Instruction* sub_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_i16, dst);
2227 if (dst.regClass() == s1) {
2234 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bound, sub, bld.scc(overflow));
2240 if (dst.regClass() == v2b) {
2242 bld.vop3(aco_opcode::v_sub_i16, Definition(dst), src0, src1).instr;
2244 } else if (dst.regClass() == v1) {
2246 bld.vop3(aco_opcode::v_sub_i32, Definition(dst), src0, src1).instr;
2254 if (dst.bytes() <= 2 && ctx->program->gfx_level >= GFX10) {
2255 emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u16_e64, dst);
2256 } else if (dst.bytes() <= 2 && ctx->program->gfx_level >= GFX8) {
2257 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_lo_u16, dst, true);
2258 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2259 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_mul_lo_u16, dst);
2260 } else if (dst.type() == RegType::vgpr) {
2266 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_u32_u24, dst,
2269 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[1]),
2272 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[0]),
2275 emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u32, dst);
2277 } else if (dst.regClass() == s1) {
2278 emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
2285 if (dst.regClass() == s1 && ctx->options->gfx_level >= GFX9) {
2286 emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_hi_u32, dst, false);
2287 } else if (dst.bytes() == 4) {
2291 Temp tmp = dst.regClass() == s1 ? bld.tmp(v1) : dst;
2298 if (dst.regClass() == s1)
2299 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
2306 if (dst.regClass() == v1) {
2307 emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_hi_i32, dst);
2308 } else if (dst.regClass() == s1 && ctx->options->gfx_level >= GFX9) {
2309 emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_hi_i32, dst, false);
2310 } else if (dst.regClass() == s1) {
2313 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
2320 if (dst.regClass() == v2b) {
2321 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f16, dst, true);
2322 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2323 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_mul_f16, dst);
2324 } else if (dst.regClass() == v1) {
2325 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f32, dst, true);
2326 } else if (dst.regClass() == v2) {
2327 emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_f64, dst);
2334 if (dst.regClass() == v1) {
2335 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_legacy_f32, dst, true);
2342 if (dst.regClass() == v2b) {
2343 emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f16, dst, true);
2344 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2345 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst);
2346 } else if (dst.regClass() == v1) {
2347 emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f32, dst, true);
2348 } else if (dst.regClass() == v2) {
2349 emit_vop3a_instruction(ctx, instr, aco_opcode::v_add_f64, dst);
2356 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2357 Instruction* add = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst);
2366 if (dst.regClass() == v2b) {
2368 emit_vop2_instruction(ctx, instr, aco_opcode::v_sub_f16, dst, false);
2370 emit_vop2_instruction(ctx, instr, aco_opcode::v_subrev_f16, dst, true);
2371 } else if (dst.regClass() == v1) {
2373 emit_vop2_instruction(ctx, instr, aco_opcode::v_sub_f32, dst, false);
2375 emit_vop2_instruction(ctx, instr, aco_opcode::v_subrev_f32, dst, true);
2376 } else if (dst.regClass() == v2) {
2377 Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), as_vgpr(ctx, src0),
2386 if (dst.regClass() == v2b) {
2387 emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f16, dst, false, 3);
2388 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2402 bld.vop3p(aco_opcode::v_pk_fma_f16, Definition(dst), src0, src1, src2, opsel_lo, opsel_hi);
2403 } else if (dst.regClass() == v1) {
2404 emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f32, dst,
2406 } else if (dst.regClass() == v2) {
2407 emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f64, dst, false, 3);
2414 if (dst.regClass() == v1) {
2415 emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst,
2423 if (dst.regClass() == v2b) {
2425 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_f16, dst, true);
2426 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2427 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_max_f16, dst);
2428 } else if (dst.regClass() == v1) {
2429 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_f32, dst, true, false,
2431 } else if (dst.regClass() == v2) {
2432 emit_vop3a_instruction(ctx, instr, aco_opcode::v_max_f64, dst,
2440 if (dst.regClass() == v2b) {
2442 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_f16, dst, true);
2443 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2444 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_min_f16, dst, true);
2445 } else if (dst.regClass() == v1) {
2446 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_f32, dst, true, false,
2448 } else if (dst.regClass() == v2) {
2449 emit_vop3a_instruction(ctx, instr, aco_opcode::v_min_f64, dst,
2457 emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, false);
2461 emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, true);
2465 emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_u32_u8, dst, false);
2469 emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_u32_u8, dst, true);
2473 emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_i32_i16, dst, false);
2477 emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_i32_i16, dst, true);
2481 emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_u32_u16, dst, false);
2485 emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_u32_u16, dst, true);
2500 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc);
2507 bld.vop3(aco_opcode::v_cubeid_f32, Definition(dst), src[0], src[1], src[2]);
2511 emit_bcsel(ctx, instr, dst);
2515 if (dst.regClass() == v2b) {
2516 emit_vop1_instruction(ctx, instr, aco_opcode::v_rsq_f16, dst);
2517 } else if (dst.regClass() == v1) {
2519 emit_rsq(ctx, bld, Definition(dst), src);
2520 } else if (dst.regClass() == v2) {
2522 emit_vop1_instruction(ctx, instr, aco_opcode::v_rsq_f64, dst);
2529 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2532 bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
2539 if (dst.regClass() == v2b) {
2540 bld.vop2(aco_opcode::v_mul_f16, Definition(dst), Operand::c16(0xbc00u), as_vgpr(ctx, src));
2541 } else if (dst.regClass() == v1) {
2542 bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0xbf800000u),
2544 } else if (dst.regClass() == v2) {
2551 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
2558 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2561 bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src,
2569 if (dst.regClass() == v2b) {
2570 Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst),
2574 } else if (dst.regClass() == v1) {
2575 Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f32, Definition(dst),
2579 } else if (dst.regClass() == v2) {
2586 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
2593 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2596 bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
2602 if (dst.regClass() == v2b) {
2603 bld.vop3(aco_opcode::v_med3_f16, Definition(dst), Operand::c16(0u), Operand::c16(0x3c00),
2605 } else if (dst.regClass() == v1) {
2606 bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::zero(),
2611 } else if (dst.regClass() == v2) {
2612 Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand::zero());
2620 if (dst.regClass() == v2b) {
2621 emit_vop1_instruction(ctx, instr, aco_opcode::v_log_f16, dst);
2622 } else if (dst.regClass() == v1) {
2624 emit_log2(ctx, bld, Definition(dst), src);
2631 if (dst.regClass() == v2b) {
2632 emit_vop1_instruction(ctx, instr, aco_opcode::v_rcp_f16, dst);
2633 } else if (dst.regClass() == v1) {
2635 emit_rcp(ctx, bld, Definition(dst), src);
2636 } else if (dst.regClass() == v2) {
2638 emit_vop1_instruction(ctx, instr, aco_opcode::v_rcp_f64, dst);
2645 if (dst.regClass() == v2b) {
2646 emit_vop1_instruction(ctx, instr, aco_opcode::v_exp_f16, dst);
2647 } else if (dst.regClass() == v1) {
2648 emit_vop1_instruction(ctx, instr, aco_opcode::v_exp_f32, dst);
2655 if (dst.regClass() == v2b) {
2656 emit_vop1_instruction(ctx, instr, aco_opcode::v_sqrt_f16, dst);
2657 } else if (dst.regClass() == v1) {
2659 emit_sqrt(ctx, bld, Definition(dst), src);
2660 } else if (dst.regClass() == v2) {
2662 emit_vop1_instruction(ctx, instr, aco_opcode::v_sqrt_f64, dst);
2669 if (dst.regClass() == v2b) {
2670 emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f16, dst);
2671 } else if (dst.regClass() == v1) {
2672 emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f32, dst);
2673 } else if (dst.regClass() == v2) {
2674 emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f64, dst);
2681 if (dst.regClass() == v2b) {
2682 emit_vop1_instruction(ctx, instr, aco_opcode::v_floor_f16, dst);
2683 } else if (dst.regClass() == v1) {
2684 emit_vop1_instruction(ctx, instr, aco_opcode::v_floor_f32, dst);
2685 } else if (dst.regClass() == v2) {
2687 emit_floor_f64(ctx, bld, Definition(dst), src);
2694 if (dst.regClass() == v2b) {
2695 emit_vop1_instruction(ctx, instr, aco_opcode::v_ceil_f16, dst);
2696 } else if (dst.regClass() == v1) {
2697 emit_vop1_instruction(ctx, instr, aco_opcode::v_ceil_f32, dst);
2698 } else if (dst.regClass() == v2) {
2700 emit_vop1_instruction(ctx, instr, aco_opcode::v_ceil_f64, dst);
2718 bld.vop3(aco_opcode::v_add_f64, Definition(dst), trunc, add);
2726 if (dst.regClass() == v2b) {
2727 emit_vop1_instruction(ctx, instr, aco_opcode::v_trunc_f16, dst);
2728 } else if (dst.regClass() == v1) {
2729 emit_vop1_instruction(ctx, instr, aco_opcode::v_trunc_f32, dst);
2730 } else if (dst.regClass() == v2) {
2732 emit_trunc_f64(ctx, bld, Definition(dst), src);
2739 if (dst.regClass() == v2b) {
2740 emit_vop1_instruction(ctx, instr, aco_opcode::v_rndne_f16, dst);
2741 } else if (dst.regClass() == v1) {
2742 emit_vop1_instruction(ctx, instr, aco_opcode::v_rndne_f32, dst);
2743 } else if (dst.regClass() == v2) {
2745 emit_vop1_instruction(ctx, instr, aco_opcode::v_rndne_f64, dst);
2779 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
2790 if (dst.regClass() == v2b) {
2793 bld.vop1(opcode, Definition(dst), src);
2794 } else if (dst.regClass() == v1) {
2801 bld.vop1(opcode, Definition(dst), src);
2808 if (dst.regClass() == v2b) {
2809 emit_vop2_instruction(ctx, instr, aco_opcode::v_ldexp_f16, dst, false);
2810 } else if (dst.regClass() == v1) {
2811 emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f32, dst);
2812 } else if (dst.regClass() == v2) {
2813 emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f64, dst);
2820 if (dst.regClass() == v2b) {
2821 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_mant_f16, dst);
2822 } else if (dst.regClass() == v1) {
2823 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_mant_f32, dst);
2824 } else if (dst.regClass() == v2) {
2825 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_mant_f64, dst);
2836 convert_int(ctx, bld, tmp, 8, 32, true, dst);
2838 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_exp_i32_f32, dst);
2840 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_exp_i32_f64, dst);
2848 if (dst.regClass() == v2b) {
2854 bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
2855 } else if (dst.regClass() == v1) {
2859 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
2860 } else if (dst.regClass() == v2) {
2870 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper);
2885 bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src);
2887 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
2895 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
2897 bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src, Operand::zero());
2899 bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src, as_vgpr(ctx, src));
2904 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_f32_f16, dst);
2906 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_f32_f64, dst);
2916 bld.vop1(aco_opcode::v_cvt_f64_f32, Definition(dst), src);
2920 assert(dst.regClass() == v2b);
2939 bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
2950 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
2955 assert(dst.size() == 1);
2963 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
2973 bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper);
2983 bld.vop1(aco_opcode::v_cvt_f64_i32, Definition(dst), src);
2992 bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper);
3000 assert(dst.regClass() == v2b);
3021 bld.vop1(aco_opcode::v_cvt_f16_u16, Definition(dst), src);
3025 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
3030 assert(dst.size() == 1);
3034 bld.vop1(aco_opcode::v_cvt_f32_ubyte0, Definition(dst), src);
3038 bld.vop1(aco_opcode::v_cvt_f32_u32, Definition(dst), src);
3048 bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper);
3057 bld.vop1(aco_opcode::v_cvt_f64_u32, Definition(dst), src);
3066 bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper);
3076 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i16_f16, dst);
3083 (dst.type() == RegType::sgpr) ? Temp() : dst);
3084 if (dst.type() == RegType::sgpr) {
3085 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
3089 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f32, dst);
3091 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f64, dst);
3099 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u16_f16, dst);
3106 (dst.type() == RegType::sgpr) ? Temp() : dst);
3107 if (dst.type() == RegType::sgpr) {
3108 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
3112 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f32, dst);
3114 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f64, dst);
3122 if (dst.type() == RegType::vgpr) {
3123 bld.vop1(aco_opcode::v_cvt_i32_f32, Definition(dst), tmp);
3125 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst),
3129 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f32, dst);
3131 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f64, dst);
3141 if (dst.type() == RegType::vgpr) {
3142 bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(dst), tmp);
3144 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst),
3148 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f32, dst);
3150 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f64, dst);
3161 if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::vgpr) {
3188 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), new_lower, new_upper);
3190 } else if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::sgpr) {
3227 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3240 if (dst.type() == RegType::sgpr) {
3244 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3256 if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::vgpr) {
3282 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3284 } else if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::sgpr) {
3317 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3330 if (dst.type() == RegType::sgpr) {
3334 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper);
3345 if (dst.regClass() == s1) {
3347 bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3c00u), src);
3348 } else if (dst.regClass() == v2b) {
3350 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), one, src);
3360 if (dst.regClass() == s1) {
3362 bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3f800000u), src);
3363 } else if (dst.regClass() == v1) {
3364 bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(),
3375 if (dst.regClass() == s2) {
3377 bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c32(0x3f800000u),
3379 } else if (dst.regClass() == v2) {
3383 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper);
3393 if (dst.type() == RegType::sgpr && instr->src[0].src.ssa->bit_size < 32) {
3398 extract_8_16_bit_sgpr_element(ctx, dst, &instr->src[0], mode);
3403 output_bitsize > input_bitsize, dst);
3411 if (dst.type() == RegType::sgpr && instr->src[0].src.ssa->bit_size < 32) {
3416 extract_8_16_bit_sgpr_element(ctx, dst, &instr->src[0], mode);
3419 instr->dest.dest.ssa.bit_size, false, dst);
3431 Temp tmp = dst.bytes() == 8 ? bld.tmp(RegClass::get(dst.type(), 4)) : dst;
3441 if (tmp != dst)
3442 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero());
3448 assert(dst.regClass() == bld.lm);
3452 assert(dst.regClass() == bld.lm);
3454 Definition(dst), Operand::zero(), src);
3467 bool_to_vector_condition(ctx, tmp, dst);
3474 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3475 emit_split_vector(ctx, dst, instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
3481 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src0, src1);
3485 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(dst.regClass()),
3489 bld.pseudo(aco_opcode::p_split_vector, bld.def(dst.regClass()), Definition(dst),
3493 if (dst.type() == RegType::vgpr) {
3494 bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(dst.regClass()),
3497 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3501 if (dst.type() == RegType::vgpr) {
3502 bld.pseudo(aco_opcode::p_split_vector, bld.def(dst.regClass()), Definition(dst),
3505 bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc),
3513 if (dst.regClass() == v1) {
3516 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src0, src1);
3522 bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), src0, src1);
3526 case nir_op_pack_32_4x8: bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0], 4)); break;
3528 if (dst.regClass() == v1) {
3530 emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
3532 emit_vop2_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst, false);
3545 bld.vop3(opcode, Definition(dst), src0, src1);
3555 bld.vop3(opcode, Definition(dst), src0, src1);
3563 if (dst.regClass() == v1) {
3566 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
3581 if (dst.regClass() == v1) {
3584 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
3591 assert(dst.regClass() == v1);
3592 emit_vop3a_instruction(ctx, instr, aco_opcode::v_sad_u8, dst, false, 3u, false);
3621 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), copysign_0, f32, cmp_res);
3623 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), f32, cmp_res);
3631 if (dst.regClass() == s1) {
3632 bld.sop2(aco_opcode::s_bfm_b32, Definition(dst), bits, offset);
3633 } else if (dst.regClass() == v1) {
3634 bld.vop3(aco_opcode::v_bfm_b32, Definition(dst), bits, offset);
3642 /* dst = (insert & bitmask) | (base & ~bitmask) */
3643 if (dst.regClass() == s1) {
3668 bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), rhs, lhs);
3670 } else if (dst.regClass() == v1) {
3671 emit_vop3a_instruction(ctx, instr, aco_opcode::v_bfi_b32, dst, false, 3);
3679 if (dst.bytes() != 4)
3682 if (dst.type() == RegType::sgpr) {
3691 bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract));
3701 bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset);
3713 bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract);
3719 emit_vop3a_instruction(ctx, instr, opcode, dst, false, 3);
3733 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3734 } else if (dst.regClass() == s1 && instr->dest.dest.ssa.bit_size == 16) {
3742 bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), Operand(vec),
3746 Definition def(dst);
3747 if (dst.bytes() == 8) {
3761 if (dst.size() == 2)
3762 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(),
3774 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3777 Definition def(dst);
3779 if (dst.bytes() == 8) {
3793 if (dst.size() == 2 && swap)
3794 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(),
3796 else if (dst.size() == 2)
3797 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(),
3805 bld.sop1(aco_opcode::s_bcnt1_i32_b32, Definition(dst), bld.def(s1, scc), src);
3807 bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), src, Operand::zero());
3809 bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), emit_extract_vector(ctx, src, 1, v1),
3813 bld.sop1(aco_opcode::s_bcnt1_i32_b64, Definition(dst), bld.def(s1, scc), src);
3820 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lt_f16, aco_opcode::v_cmp_lt_f32,
3825 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_ge_f16, aco_opcode::v_cmp_ge_f32,
3830 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_eq_f16, aco_opcode::v_cmp_eq_f32,
3835 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_neq_f16, aco_opcode::v_cmp_neq_f32,
3840 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lt_i16, aco_opcode::v_cmp_lt_i32,
3845 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_ge_i16, aco_opcode::v_cmp_ge_i32,
3851 emit_boolean_logic(ctx, instr, Builder::s_xnor, dst);
3854 ctx, instr, dst, aco_opcode::v_cmp_eq_i16, aco_opcode::v_cmp_eq_i32,
3861 emit_boolean_logic(ctx, instr, Builder::s_xor, dst);
3864 ctx, instr, dst, aco_opcode::v_cmp_lg_i16, aco_opcode::v_cmp_lg_i32,
3870 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lt_u16, aco_opcode::v_cmp_lt_u32,
3875 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_ge_u16, aco_opcode::v_cmp_ge_u32,
3889 bld.copy(Definition(dst), Operand::zero());
3918 emit_wqm(bld, tmp, dst, true);
3928 Temp dst = get_ssa_temp(ctx, &instr->def);
3934 assert(dst.type() == RegType::sgpr);
3939 assert(dst.regClass() == bld.lm);
3942 bld.copy(Definition(dst), op);
3944 bld.copy(Definition(dst), Operand::c32(instr->value[0].u8));
3947 bld.copy(Definition(dst), Operand::c32(instr->value[0].i16));
3948 } else if (dst.size() == 1) {
3949 bld.copy(Definition(dst), Operand::c32(instr->value[0].u32));
3951 assert(dst.size() != 1);
3953 aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
3955 for (unsigned i = 0; i < dst.size(); i++)
3958 for (unsigned i = 0; i < dst.size(); i++)
3961 vec->definitions[0] = Definition(dst);
3985 Temp dst;
4020 Temp* const vals = (Temp*)alloca(info.dst.bytes() * sizeof(Temp));
4131 reduced_const_offset, byte_align ? Temp() : info.dst);
4133 /* the callback wrote directly to dst */
4134 if (val == info.dst) {
4136 emit_split_vector(ctx, info.dst, info.num_components);
4155 byte_align_scalar(ctx, val, byte_align_off, info.dst);
4157 byte_align_vector(ctx, val, byte_align_off, info.dst, component_size);
4230 if (allocated_vec[j].bytes() % 4 == 0 && info.dst.type() == RegType::sgpr)
4237 if (info.dst.type() == RegType::vgpr || !has_vgprs)
4238 ctx->allocated_vec.emplace(info.dst.id(), allocated_vec);
4241 MAX2((int)info.dst.bytes() - int(allocated_vec[0].bytes() * info.num_components), 0);
4249 if (info.dst.type() == RegType::sgpr && has_vgprs) {
4250 Temp tmp = bld.tmp(RegType::vgpr, info.dst.size());
4253 bld.pseudo(aco_opcode::p_as_uniform, Definition(info.dst), tmp);
4255 vec->definitions[0] = Definition(info.dst);
4324 Temp val = rc == info.dst.regClass() && dst_hint.id() ? dst_hint : bld.tmp(rc);
4690 load_lds(isel_context* ctx, unsigned elem_size_bytes, unsigned num_components, Temp dst,
4697 LoadEmitInfo info = {Operand(as_vgpr(ctx, address)), dst, num_components, elem_size_bytes};
4704 return dst;
4708 split_store_data(isel_context* ctx, RegType dst_type, unsigned count, Temp* dst, unsigned* bytes,
4719 dst[0] = bld.as_uniform(src);
4721 dst[0] = as_vgpr(ctx, src);
4733 dst[i] = bld.tmp(RegClass::get(dst_type, bytes[i]));
4776 unsigned op_count = dst[i].bytes() / elem_size_bytes;
4779 dst[i] = bld.as_uniform(temps[idx++]);
4781 dst[i] = as_vgpr(ctx, temps[idx++]);
4793 vec->definitions[0] = Definition(dst[i]);
5013 unsigned elem_size_bytes, unsigned split_cnt = 0u, Temp dst = Temp())
5018 if (!dst.id())
5019 dst = bld.tmp(RegClass(reg_type, cnt * dword_size));
5024 instr->definitions[0] = Definition(dst);
5042 emit_split_vector(ctx, dst, split_cnt);
5044 ctx->allocated_vec.emplace(dst.id(), allocated_vec); /* emit_split_vector already does this */
5046 return dst;
5120 load_vmem_mubuf(isel_context* ctx, Temp dst, Temp descriptor, Temp voffset, Temp soffset,
5126 assert((num_components * elem_size_bytes) == dst.bytes());
5131 LoadEmitInfo info = {Operand(voffset), dst, num_components, elem_size_bytes, descriptor};
5212 load_input_from_temps(isel_context* ctx, nir_intrinsic_instr* instr, Temp dst)
5234 create_vec_from_array(ctx, src, dst.size(), dst.regClass().type(), 4u, 0, dst);
5259 emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src, Temp dst,
5267 if (dst.regClass() == v2b) {
5275 bld.vintrp(aco_opcode::v_interp_p2_legacy_f16, Definition(dst), coord2, bld.m0(prim_mask),
5285 bld.vintrp(interp_p2_op, Definition(dst), coord2, bld.m0(prim_mask), interp_p1, idx,
5295 bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), interp_p1,
5301 emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
5322 vec->definitions[0] = Definition(dst);
5324 emit_split_vector(ctx, dst, num_components);
5329 emit_load_frag_shading_rate(isel_context* ctx, Temp dst)
5352 bld.vop2(aco_opcode::v_or_b32, Definition(dst), Operand(x_rate), Operand(y_rate));
5358 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5367 emit_interp_instr(ctx, idx, component, coords, dst, prim_mask);
5376 vec->definitions[0] = Definition(dst);
5452 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5481 vec->definitions[0] = Definition(dst);
5483 ctx->allocated_vec.emplace(dst.id(), elems);
5627 if (channel_start == 0 && fetch_bytes == dst.bytes() && num_channels <= 3) {
5629 fetch_dst = dst;
5685 vec->definitions[0] = Definition(dst);
5687 emit_split_vector(ctx, dst, num_components);
5690 ctx->allocated_vec.emplace(dst.id(), elems);
5721 bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(dst), Operand::c32(vertex_id),
5736 vec->definitions[0] = Definition(dst);
5750 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5752 if (load_input_from_temps(ctx, instr, dst))
5773 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5785 Temp tess_coord = bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tes_u, tes_v, tes_w);
5790 load_buffer(isel_context* ctx, unsigned num_components, unsigned component_size, Temp dst,
5797 dst.type() != RegType::vgpr && (!glc || ctx->options->gfx_level >= GFX8) && allow_smem;
5808 LoadEmitInfo info = {Operand(offset), dst, num_components, component_size, rsrc};
5822 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5827 load_buffer(ctx, instr->num_components, size, dst, rsrc, get_ssa_temp(ctx, instr->src[1].ssa),
5835 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5857 vec->definitions[0] = Definition(dst);
5859 ctx->allocated_vec.emplace(dst.id(), elems);
5869 Temp vec = dst;
5906 byte_align_scalar(ctx, vec, byte_offset, dst);
5912 RegClass rc = dst.size() == 3 ? s1 : s2;
5913 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), emit_extract_vector(ctx, vec, 0, rc),
5916 emit_split_vector(ctx, dst, instr->dest.ssa.num_components);
5922 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5955 load_buffer(ctx, instr->num_components, size, dst, rsrc, offset, size, 0);
6022 emit_mimg(Builder& bld, aco_opcode op, Definition dst, Temp rsrc, Operand samp,
6066 create_instruction<MIMG_instruction>(op, Format::MIMG, 3 + coords.size(), dst.isTemp())};
6067 if (dst.isTemp())
6068 mimg->definitions[0] = dst;
6084 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6106 MIMG_instruction* mimg = emit_mimg(bld, aco_opcode::image_bvh64_intersect_ray, Definition(dst),
6192 emit_tfe_init(Builder& bld, Temp dst)
6194 Temp tmp = bld.tmp(dst.regClass());
6197 aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
6198 for (unsigned i = 0; i < dst.size(); i++)
6218 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6244 if (num_bytes == dst.bytes() && dst.type() == RegType::vgpr)
6245 tmp = dst;
6316 expand_vector(ctx, tmp, dst, instr->dest.ssa.num_components, expand_mask,
6520 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6535 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
6547 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
6554 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
6567 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
6572 get_buffer_size(isel_context* ctx, Temp desc, Temp dst)
6592 Temp shr_dst = dst.type() == RegType::vgpr ? bld.tmp(s1) : dst;
6595 if (dst.type() == RegType::vgpr)
6596 bld.copy(Definition(dst), shr_dst);
6600 emit_extract_vector(ctx, desc, 2, dst);
6623 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6626 emit_mimg(bld, aco_opcode::image_get_resinfo, Definition(dst), resource, Operand(s4), lod);
6637 emit_split_vector(ctx, dst, instr->dest.ssa.num_components);
6641 get_image_samples(isel_context* ctx, Definition dst, Temp resource)
6665 bld.sop2(aco_opcode::s_cselect_b32, dst, samples, default_sample, bld.scc(is_msaa));
6672 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6674 get_image_samples(ctx, Definition(dst), resource);
6683 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6692 load_buffer(ctx, num_components, size, dst, rsrc, get_ssa_temp(ctx, instr->src[1].ssa),
6760 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6824 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
6836 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
6891 if (info.dst.type() == RegType::vgpr || (info.glc && ctx->options->gfx_level < GFX8) ||
7010 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7089 flat->definitions[0] = Definition(dst);
7166 return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
7178 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
7206 Temp dst = get_ssa_temp(ctx, &intrin->dest.ssa);
7223 load_vmem_mubuf(ctx, dst, descriptor, v_offset, s_offset, const_offset, elem_size_bytes,
7253 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7260 assert(dst.bytes() <= 64);
7262 if (dst.bytes() > 32) {
7265 } else if (dst.bytes() > 16) {
7268 } else if (dst.bytes() > 8) {
7271 } else if (dst.bytes() > 4) {
7276 if (dst.size() != size) {
7277 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst),
7280 bld.smem(opcode, Definition(dst), base, offset);
7282 emit_split_vector(ctx, dst, instr->dest.ssa.num_components);
7360 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7367 load_lds(ctx, elem_size_bytes, num_components, dst, address, nir_intrinsic_base(instr), align);
7539 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7540 Definition tmp_dst(dst.type() == RegType::vgpr ? dst : bld.tmp(is64bit ? v4 : v2));
7550 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7551 if (dst.type() == RegType::sgpr) {
7552 emit_split_vector(ctx, ds->definitions[0].getTemp(), dst.size());
7555 for (unsigned i = 0; i < dst.size(); i++)
7562 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), comp0, comp1);
7563 ctx->allocated_vec[dst.id()] = {comp0, comp1};
7565 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), comp[0], comp[1]);
7569 emit_split_vector(ctx, dst, 2);
7607 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7609 LoadEmitInfo info = {Operand(v1), dst, instr->dest.ssa.num_components,
7970 Definition dst(get_ssa_temp(ctx, &instr->dest.ssa));
7971 assert(dst.regClass().type() != RegType::vgpr);
7973 bld.pseudo(aco_opcode::p_as_uniform, dst, src);
7975 bld.copy(dst, src);
7979 emit_addition_uniform_reduce(isel_context* ctx, nir_op op, Definition dst, nir_src src, Temp count)
7986 Temp tmp = dst.regClass() == s1 ? bld.tmp(RegClass::get(RegType::vgpr, src.ssa->bit_size / 8))
7987 : dst.getTemp();
7998 if (tmp != dst.getTemp())
7999 bld.pseudo(aco_opcode::p_as_uniform, dst, tmp);
8004 if (dst.regClass() == s1)
8013 assert(dst.getTemp().type() == count.type());
8016 if (nir_src_as_uint(src) == 1 && dst.bytes() <= 2)
8017 bld.pseudo(aco_opcode::p_extract_vector, dst, count, Operand::zero());
8019 bld.copy(dst, count);
8021 bld.copy(dst, Operand::zero(dst.bytes()));
8023 bld.v_mul_imm(dst, count, nir_src_as_uint(src));
8025 bld.sop2(aco_opcode::s_mul_i32, dst, src_tmp, count);
8026 } else if (dst.bytes() <= 2 && ctx->program->gfx_level >= GFX10) {
8027 bld.vop3(aco_opcode::v_mul_lo_u16_e64, dst, src_tmp, count);
8028 } else if (dst.bytes() <= 2 && ctx->program->gfx_level >= GFX8) {
8029 bld.vop2(aco_opcode::v_mul_lo_u16, dst, src_tmp, count);
8030 } else if (dst.getTemp().type() == RegType::vgpr) {
8031 bld.vop3(aco_opcode::v_mul_lo_u32, dst, src_tmp, count);
8033 bld.sop2(aco_opcode::s_mul_i32, dst, src_tmp, count);
8046 Definition dst(get_ssa_temp(ctx, &instr->dest.ssa));
8054 emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], thread_count);
8066 Definition dst(get_ssa_temp(ctx, &instr->dest.ssa));
8083 emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], packed_tid);
8099 if (dst.bytes() == 8) {
8109 bld.pseudo(aco_opcode::p_create_vector, dst, lo, hi);
8112 bld.writelane(dst, bld.copy(bld.def(s1, m0), Operand::c32(identity)), lane,
8121 Definition dst, Temp src)
8130 defs[num_defs++] = dst;
8143 defs[num_defs++] = bld.def(RegType::sgpr, dst.size());
8164 reduce->operands[1] = Operand(RegClass(RegType::vgpr, dst.size()).as_linear());
8172 return dst.getTemp();
8176 emit_interp_center(isel_context* ctx, Temp dst, Temp bary, Temp pos1, Temp pos2)
8220 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), wqm1, wqm2);
8257 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8258 bld.copy(Definition(dst), bary);
8259 emit_split_vector(ctx, dst, 2);
8265 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8266 bld.copy(Definition(dst), model);
8267 emit_split_vector(ctx, dst, 3);
8272 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8282 bld.copy(Definition(dst), bary);
8283 emit_split_vector(ctx, dst, 2);
8371 emit_interp_center(ctx, dst, bary, pos1, pos2);
8389 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8390 bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.view_index)));
8487 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8489 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.num_work_groups));
8493 bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
8497 emit_split_vector(ctx, dst, 3);
8501 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8504 bld.copy(Definition(dst), Operand(addr));
8508 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8519 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), local_ids[0], local_ids[1],
8522 bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.local_invocation_ids)));
8524 emit_split_vector(ctx, dst, 3);
8528 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8531 bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
8535 emit_split_vector(ctx, dst, 3);
8622 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8637 if (dst.size() != bld.lm.size()) {
8640 bld.pseudo(aco_opcode::p_create_vector, bld.def(dst.regClass()), src, Operand::zero());
8643 emit_wqm(bld, src, dst);
8656 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8664 if (dst.type() == RegType::vgpr)
8665 bld.pseudo(aco_opcode::p_split_vector, Definition(dst),
8668 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
8670 emit_wqm(bld, emit_bpermute(ctx, bld, tid, src), dst);
8676 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
8677 emit_split_vector(ctx, dst, 2);
8681 bool_to_vector_condition(ctx, emit_wqm(bld, tmp), dst);
8694 dst);
8708 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8710 emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), src), dst);
8716 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
8717 emit_split_vector(ctx, dst, 2);
8722 bool_to_vector_condition(ctx, emit_wqm(bld, tmp), dst);
8724 bld.copy(Definition(dst), src);
8730 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8732 assert(dst.regClass() == bld.lm);
8739 bld.sop1(Builder::s_not, Definition(dst), bld.def(s1, scc), cond);
8744 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8746 assert(dst.regClass() == bld.lm);
8749 bool_to_vector_condition(ctx, emit_wqm(bld, tmp), dst);
8756 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8791 emit_wqm(bld, emit_boolean_reduce(ctx, op, cluster_size, src), dst);
8794 emit_wqm(bld, emit_boolean_exclusive_scan(ctx, op, src), dst);
8797 emit_wqm(bld, emit_boolean_inclusive_scan(ctx, op, src), dst);
8802 bld.copy(Definition(dst), src);
8819 bld.def(dst.regClass()), src);
8820 emit_wqm(bld, tmp_dst, dst);
8856 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8857 Temp tmp(dst);
8870 tmp = bld.tmp(dst.regClass());
8916 if (tmp.id() != dst.id()) {
8921 emit_wqm(bld, tmp, dst, true);
8932 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8944 emit_wqm(bld, tmp, dst);
8945 } else if (dst.regClass() == v1b) {
8947 emit_extract_vector(ctx, tmp, 0, dst);
8948 } else if (dst.regClass() == v2b) {
8950 emit_extract_vector(ctx, tmp, 0, dst);
8951 } else if (dst.regClass() == v1) {
8952 emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask), dst);
8953 } else if (dst.regClass() == v2) {
8958 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
8959 emit_split_vector(ctx, dst, 2);
8969 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8970 if (dst.regClass() == v1) {
8971 /* src2 is ignored for writelane. RA assigns the same reg for dst */
8972 emit_wqm(bld, bld.writelane(bld.def(v1), val, lane, src), dst);
8973 } else if (dst.regClass() == v2) {
8980 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
8981 emit_split_vector(ctx, dst, 2);
8990 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8994 emit_wqm(bld, wqm_tmp, dst);
8998 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8999 assert(dst.regClass() == v1);
9001 bld.vop3(aco_opcode::v_perm_b32, Definition(dst), get_ssa_temp(ctx, instr->src[0].ssa),
9008 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9012 bld.copy(Definition(dst), src);
9013 } else if (dst.regClass() == v1 && src.regClass() == v1) {
9014 bld.vop3(aco_opcode::v_permlane16_b32, Definition(dst), src,
9026 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9027 bld.pseudo(aco_opcode::p_is_helper, Definition(dst), Operand(exec, bld.lm));
9097 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9102 bld.pseudo(aco_opcode::p_create_vector, Definition(dst), clock, Operand::zero());
9107 bld.smem(opcode, Definition(dst), memory_sync_info(0, semantic_volatile));
9109 emit_split_vector(ctx, dst, 2);
9113 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9114 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.vertex_id));
9118 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9119 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.base_vertex));
9123 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9124 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.start_instance));
9128 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9129 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.instance_id));
9133 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9134 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.draw_id));
9138 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9142 bld.vop2_e64(aco_opcode::v_and_b32, Definition(dst), Operand::c32(127u),
9145 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.gs_invocation_id));
9147 bld.vop3(aco_opcode::v_bfe_u32, Definition(dst), get_arg(ctx, ctx->args->ac.tcs_rel_ids),
9156 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9160 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.gs_prim_id));
9163 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.tcs_patch_id));
9166 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.tes_patch_id));
9172 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.gs_prim_id));
9175 bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.vs_prim_id));
9235 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9238 bld.copy(Definition(dst), Operand(addr));
9263 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9267 bld.copy(Definition(dst), src);
9268 emit_split_vector(ctx, dst, dst.size());
9658 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9659 Temp tmp_dst = dst;
9668 if (tg4_integer_cube_workaround || dst.type() == RegType::sgpr)
9673 dst.type() == RegType::sgpr) {
9695 expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
9832 expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
9875 assert(dmask == 1 && dst.bytes() == 4);
9876 assert(dst.id() != tmp_dst.id());
9878 if (dst.regClass() == s1) {
9881 bld.sop2(aco_opcode::s_cselect_b32, Definition(dst),
9888 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst),
9892 expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
10040 assert(tmp_dst.id() != dst.id());
10041 assert(tmp_dst.size() == dst.size());
10056 Temp tmp = dst.regClass() == tmp_dst.regClass() ? dst : bld.tmp(tmp_dst.regClass());
10065 expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, mask);
10091 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
10092 assert(instr->dest.ssa.bit_size != 1 || dst.regClass() == ctx->program->lane_mask);
10094 bool logical = !dst.is_linear() || nir_dest_is_divergent(instr->dest);
10119 operands[num_operands++] = Operand(dst.regClass());
10130 Operand op = get_phi_operand(ctx, src.second, dst.regClass(), logical);
10136 operands[num_operands++] = Operand(dst.regClass());
10150 if (dst.is_linear() && ctx->block->kind & block_kind_merge && num_defined == 1) {
10170 phi->operands[1] = Operand(dst.regClass());
10171 phi->definitions[0] = Definition(dst);
10179 phi->definitions[0] = Definition(dst);
10186 Temp dst = get_ssa_temp(ctx, &instr->def);
10188 assert(dst.type() == RegType::sgpr);
10190 if (dst.size() == 1) {
10191 Builder(ctx->program, ctx->block).copy(Definition(dst), Operand::zero());
10194 aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
10195 for (unsigned i = 0; i < dst.size(); i++)
10197 vec->definitions[0] = Definition(dst);
11599 Temp dst = ctx->program->allocateTmp(type);
11600 ctx->arg_temps[i] = dst;
11601 startpgm->definitions[arg] = Definition(dst);