Lines Matching refs:instr
52 _isel_err(isel_context* ctx, const char* file, unsigned line, const nir_instr* instr,
62 nir_print_instr(instr, memf);
259 ret.instr->dpp8().lane_sel[i] = (((i & and_mask) | or_mask) ^ xor_mask) & 0x7;
821 get_alu_src_ub(isel_context* ctx, nir_alu_instr* instr, int src_idx)
824 nir_ssa_scalar{instr->src[src_idx].src.ssa, instr->src[src_idx].swizzle[0]};
841 emit_sop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
846 sop2->operands[0] = Operand(get_alu_src(ctx, instr->src[0]));
847 sop2->operands[1] = Operand(get_alu_src(ctx, instr->src[1]));
849 if (instr->no_unsigned_wrap)
856 uint32_t src_ub = get_alu_src_ub(ctx, instr, i);
868 emit_vop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode opc, Temp dst,
873 bld.is_precise = instr->exact;
875 Temp src0 = get_alu_src(ctx, instr->src[swap_srcs ? 1 : 0]);
876 Temp src1 = get_alu_src(ctx, instr->src[swap_srcs ? 0 : 1]);
891 uint32_t src_ub = get_alu_src_ub(ctx, instr, swap_srcs ? !i : i);
913 emit_vop2_instruction_logic64(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
916 bld.is_precise = instr->exact;
918 Temp src0 = get_alu_src(ctx, instr->src[0]);
919 Temp src1 = get_alu_src(ctx, instr->src[1]);
938 emit_vop3a_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
945 src[i] = get_alu_src(ctx, instr->src[swap_srcs ? 1 - i : i]);
953 bld.is_precise = instr->exact;
972 emit_vop3p_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
975 Temp src0 = get_alu_src_vop3p(ctx, instr->src[swap_srcs]);
976 Temp src1 = get_alu_src_vop3p(ctx, instr->src[!swap_srcs]);
979 assert(instr->dest.dest.ssa.num_components == 2);
983 (instr->src[!swap_srcs].swizzle[0] & 1) << 1 | (instr->src[swap_srcs].swizzle[0] & 1);
985 (instr->src[!swap_srcs].swizzle[1] & 1) << 1 | (instr->src[swap_srcs].swizzle[1] & 1);
988 bld.is_precise = instr->exact;
994 emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool clamp)
999 src[i] = get_alu_src(ctx, instr->src[i]);
1007 bld.is_precise = instr->exact;
1008 bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p().clamp = clamp;
1012 emit_vop1_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1015 bld.is_precise = instr->exact;
1018 bld.vop1(op, bld.def(RegType::vgpr, dst.size()), get_alu_src(ctx, instr->src[0])));
1020 bld.vop1(op, Definition(dst), get_alu_src(ctx, instr->src[0]));
1024 emit_vopc_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1026 Temp src0 = get_alu_src(ctx, instr->src[0]);
1027 Temp src1 = get_alu_src(ctx, instr->src[1]);
1068 emit_sopc_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1070 Temp src0 = get_alu_src(ctx, instr->src[0]);
1071 Temp src1 = get_alu_src(ctx, instr->src[1]);
1086 emit_comparison(isel_context* ctx, nir_alu_instr* instr, Temp dst, aco_opcode v16_op,
1090 aco_opcode s_op = instr->src[0].src.ssa->bit_size == 64 ? s64_op
1091 : instr->src[0].src.ssa->bit_size == 32 ? s32_op
1093 aco_opcode v_op = instr->src[0].src.ssa->bit_size == 64 ? v64_op
1094 : instr->src[0].src.ssa->bit_size == 32 ? v32_op
1096 bool use_valu = s_op == aco_opcode::num_opcodes || nir_dest_is_divergent(instr->dest.dest) ||
1097 get_ssa_temp(ctx, instr->src[0].src.ssa).type() == RegType::vgpr ||
1098 get_ssa_temp(ctx, instr->src[1].src.ssa).type() == RegType::vgpr;
1104 emit_vopc_instruction(ctx, instr, op, dst);
1106 emit_sopc_instruction(ctx, instr, op, dst);
1110 emit_boolean_logic(isel_context* ctx, nir_alu_instr* instr, Builder::WaveSpecificOpcode op,
1114 Temp src0 = get_alu_src(ctx, instr->src[0]);
1115 Temp src1 = get_alu_src(ctx, instr->src[1]);
1125 emit_bcsel(isel_context* ctx, nir_alu_instr* instr, Temp dst)
1128 Temp cond = get_alu_src(ctx, instr->src[0]);
1129 Temp then = get_alu_src(ctx, instr->src[1]);
1130 Temp els = get_alu_src(ctx, instr->src[2]);
1152 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1157 if (instr->dest.dest.ssa.bit_size == 1) {
1163 if (!nir_src_is_divergent(instr->src[0].src)) { /* uniform condition and values in sgpr */
1172 isel_err(&instr->instr, "Unimplemented uniform bcsel bit size");
1180 assert(instr->dest.dest.ssa.bit_size == 1);
1352 add.instr->vop3().clamp = 1;
1371 sub.instr->vop3().clamp = 1;
1376 visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
1378 if (!instr->dest.dest.is_ssa) {
1379 isel_err(&instr->instr, "nir alu dst not in ssa");
1383 bld.is_precise = instr->exact;
1384 Temp dst = get_ssa_temp(ctx, &instr->dest.dest.ssa);
1385 switch (instr->op) {
1393 unsigned num = instr->dest.dest.ssa.num_components;
1395 elems[i] = get_alu_src(ctx, instr->src[i]);
1397 if (instr->dest.dest.ssa.bit_size >= 32 || dst.type() == RegType::vgpr) {
1399 aco_opcode::p_create_vector, Format::PSEUDO, instr->dest.dest.ssa.num_components, 1)};
1400 RegClass elem_rc = RegClass::get(RegType::vgpr, instr->dest.dest.ssa.bit_size / 8u);
1411 Temp mask = bld.copy(bld.def(s1), Operand::c32((1u << instr->dest.dest.ssa.bit_size) - 1));
1417 unsigned idx = i * instr->dest.dest.ssa.bit_size / packed_size;
1418 unsigned offset = i * instr->dest.dest.ssa.bit_size % packed_size;
1419 if (nir_src_is_const(instr->src[i].src)) {
1420 const_vals[idx] |= nir_src_as_uint(instr->src[i].src) << offset;
1424 if (offset != packed_size - instr->dest.dest.ssa.bit_size)
1479 Temp src = get_alu_src(ctx, instr->src[0]);
1491 Temp src = get_alu_src(ctx, instr->src[0]);
1493 emit_vop1_instruction(ctx, instr, aco_opcode::v_not_b32, dst);
1504 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1509 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1510 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
1512 unsigned opsel_lo = (instr->src[0].swizzle[0] & 1) << 1;
1513 unsigned opsel_hi = ((instr->src[0].swizzle[1] & 1) << 1) | 1;
1520 Temp src = get_alu_src(ctx, instr->src[0]);
1535 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1540 Temp src = get_alu_src(ctx, instr->src[0]);
1574 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1580 emit_vop3a_instruction(ctx, instr, aco_opcode::v_max_i16_e64, dst);
1582 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_i16, dst, true);
1583 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1584 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_max_i16, dst);
1586 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_i32, dst, true);
1588 emit_sop2_instruction(ctx, instr, aco_opcode::s_max_i32, dst, true);
1590 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1596 emit_vop3a_instruction(ctx, instr, aco_opcode::v_max_u16_e64, dst);
1598 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_u16, dst, true);
1599 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1600 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_max_u16, dst);
1602 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_u32, dst, true);
1604 emit_sop2_instruction(ctx, instr, aco_opcode::s_max_u32, dst, true);
1606 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1612 emit_vop3a_instruction(ctx, instr, aco_opcode::v_min_i16_e64, dst);
1614 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_i16, dst, true);
1615 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1616 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_min_i16, dst);
1618 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_i32, dst, true);
1620 emit_sop2_instruction(ctx, instr, aco_opcode::s_min_i32, dst, true);
1622 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1628 emit_vop3a_instruction(ctx, instr, aco_opcode::v_min_u16_e64, dst);
1630 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_u16, dst, true);
1631 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1632 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_min_u16, dst);
1634 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_u32, dst, true);
1636 emit_sop2_instruction(ctx, instr, aco_opcode::s_min_u32, dst, true);
1638 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1643 if (instr->dest.dest.ssa.bit_size == 1) {
1644 emit_boolean_logic(ctx, instr, Builder::s_or, dst);
1646 emit_vop2_instruction(ctx, instr, aco_opcode::v_or_b32, dst, true);
1648 emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_or_b32, dst);
1650 emit_sop2_instruction(ctx, instr, aco_opcode::s_or_b32, dst, true);
1652 emit_sop2_instruction(ctx, instr, aco_opcode::s_or_b64, dst, true);
1654 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1659 if (instr->dest.dest.ssa.bit_size == 1) {
1660 emit_boolean_logic(ctx, instr, Builder::s_and, dst);
1662 emit_vop2_instruction(ctx, instr, aco_opcode::v_and_b32, dst, true);
1664 emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_and_b32, dst);
1666 emit_sop2_instruction(ctx, instr, aco_opcode::s_and_b32, dst, true);
1668 emit_sop2_instruction(ctx, instr, aco_opcode::s_and_b64, dst, true);
1670 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1675 if (instr->dest.dest.ssa.bit_size == 1) {
1676 emit_boolean_logic(ctx, instr, Builder::s_xor, dst);
1678 emit_vop2_instruction(ctx, instr, aco_opcode::v_xor_b32, dst, true);
1680 emit_vop2_instruction_logic64(ctx, instr, aco_opcode::v_xor_b32, dst);
1682 emit_sop2_instruction(ctx, instr, aco_opcode::s_xor_b32, dst, true);
1684 emit_sop2_instruction(ctx, instr, aco_opcode::s_xor_b64, dst, true);
1686 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1692 emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshrrev_b16_e64, dst, false, 2, true);
1694 emit_vop2_instruction(ctx, instr, aco_opcode::v_lshrrev_b16, dst, false, true);
1695 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1696 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_lshrrev_b16, dst, true);
1698 emit_vop2_instruction(ctx, instr, aco_opcode::v_lshrrev_b32, dst, false, true);
1700 bld.vop3(aco_opcode::v_lshrrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1701 get_alu_src(ctx, instr->src[0]));
1703 emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshr_b64, dst);
1705 emit_sop2_instruction(ctx, instr, aco_opcode::s_lshr_b64, dst, true);
1707 emit_sop2_instruction(ctx, instr, aco_opcode::s_lshr_b32, dst, true);
1709 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1715 emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshlrev_b16_e64, dst, false, 2, true);
1717 emit_vop2_instruction(ctx, instr, aco_opcode::v_lshlrev_b16, dst, false, true);
1718 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1719 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_lshlrev_b16, dst, true);
1721 emit_vop2_instruction(ctx, instr, aco_opcode::v_lshlrev_b32, dst, false, true, false,
1724 bld.vop3(aco_opcode::v_lshlrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1725 get_alu_src(ctx, instr->src[0]));
1727 emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshl_b64, dst);
1729 emit_sop2_instruction(ctx, instr, aco_opcode::s_lshl_b32, dst, true, 1);
1731 emit_sop2_instruction(ctx, instr, aco_opcode::s_lshl_b64, dst, true);
1733 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1739 emit_vop3a_instruction(ctx, instr, aco_opcode::v_ashrrev_i16_e64, dst, false, 2, true);
1741 emit_vop2_instruction(ctx, instr, aco_opcode::v_ashrrev_i16, dst, false, true);
1742 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1743 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_ashrrev_i16, dst, true);
1745 emit_vop2_instruction(ctx, instr, aco_opcode::v_ashrrev_i32, dst, false, true);
1747 bld.vop3(aco_opcode::v_ashrrev_i64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1748 get_alu_src(ctx, instr->src[0]));
1750 emit_vop3a_instruction(ctx, instr, aco_opcode::v_ashr_i64, dst);
1752 emit_sop2_instruction(ctx, instr, aco_opcode::s_ashr_i32, dst, true);
1754 emit_sop2_instruction(ctx, instr, aco_opcode::s_ashr_i64, dst, true);
1756 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1761 Temp src = get_alu_src(ctx, instr->src[0]);
1765 emit_vop1_instruction(ctx, instr, aco_opcode::v_ffbl_b32, dst);
1769 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1775 Temp src = get_alu_src(ctx, instr->src[0]);
1778 ? (instr->op == nir_op_ufind_msb ? aco_opcode::s_flbit_i32_b64
1780 : (instr->op == nir_op_ufind_msb ? aco_opcode::s_flbit_i32_b32
1793 instr->op == nir_op_ufind_msb ? aco_opcode::v_ffbh_u32 : aco_opcode::v_ffbh_i32;
1795 emit_vop1_instruction(ctx, instr, op, msb_rev);
1802 instr->op == nir_op_ufind_msb ? aco_opcode::v_ffbh_u32 : aco_opcode::v_ffbh_i32;
1819 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1824 Temp src = get_alu_src(ctx, instr->src[0]);
1832 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1838 bld.sop1(aco_opcode::s_brev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
1840 bld.vop1(aco_opcode::v_bfrev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
1842 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1848 emit_sop2_instruction(ctx, instr, aco_opcode::s_add_u32, dst, true);
1851 emit_vop3a_instruction(ctx, instr, aco_opcode::v_add_u16_e64, dst);
1854 emit_vop2_instruction(ctx, instr, aco_opcode::v_add_u16, dst, true);
1856 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1857 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
1861 Temp src0 = get_alu_src(ctx, instr->src[0]);
1862 Temp src1 = get_alu_src(ctx, instr->src[1]);
1889 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1894 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1896 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
1900 Temp src0 = get_alu_src(ctx, instr->src[0]);
1901 Temp src1 = get_alu_src(ctx, instr->src[1]);
1911 add_instr = bld.vop3(aco_opcode::v_add_u16_e64, Definition(dst), src0, src1).instr;
1916 bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
1959 .instr->vop3()
1972 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
1977 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
1979 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
1983 Temp src0 = get_alu_src(ctx, instr->src[0]);
1984 Temp src1 = get_alu_src(ctx, instr->src[1]);
2000 bld.vop3(aco_opcode::v_add_i16, Definition(dst), src0, src1).instr;
2004 bld.vop3(aco_opcode::v_add_i32, Definition(dst), src0, src1).instr;
2007 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2012 Temp src0 = get_alu_src(ctx, instr->src[0]);
2013 Temp src1 = get_alu_src(ctx, instr->src[1]);
2046 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2052 emit_sop2_instruction(ctx, instr, aco_opcode::s_sub_i32, dst, true);
2054 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2055 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_u16, dst);
2059 Temp src0 = get_alu_src(ctx, instr->src[0]);
2060 Temp src1 = get_alu_src(ctx, instr->src[1]);
2095 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2100 Temp src0 = get_alu_src(ctx, instr->src[0]);
2101 Temp src1 = get_alu_src(ctx, instr->src[1]);
2133 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2138 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2139 Instruction* sub_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_u16, dst);
2143 Temp src0 = get_alu_src(ctx, instr->src[0]);
2144 Temp src1 = get_alu_src(ctx, instr->src[1]);
2153 sub_instr = bld.vop3(aco_opcode::v_sub_u16_e64, Definition(dst), src0, src1).instr;
2160 sub_instr = bld.vop2_e64(op, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
2202 .instr->vop3()
2215 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2220 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2221 Instruction* sub_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_i16, dst);
2225 Temp src0 = get_alu_src(ctx, instr->src[0]);
2226 Temp src1 = get_alu_src(ctx, instr->src[1]);
2242 bld.vop3(aco_opcode::v_sub_i16, Definition(dst), src0, src1).instr;
2246 bld.vop3(aco_opcode::v_sub_i32, Definition(dst), src0, src1).instr;
2249 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2255 emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u16_e64, dst);
2257 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_lo_u16, dst, true);
2258 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2259 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_mul_lo_u16, dst);
2261 uint32_t src0_ub = get_alu_src_ub(ctx, instr, 0);
2262 uint32_t src1_ub = get_alu_src_ub(ctx, instr, 1);
2266 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_u32_u24, dst,
2268 } else if (nir_src_is_const(instr->src[0].src)) {
2269 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[1]),
2270 nir_src_as_uint(instr->src[0].src), false);
2271 } else if (nir_src_is_const(instr->src[1].src)) {
2272 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[0]),
2273 nir_src_as_uint(instr->src[1].src), false);
2275 emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u32, dst);
2278 emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
2280 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2286 emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_hi_u32, dst, false);
2288 uint32_t src0_ub = get_alu_src_ub(ctx, instr, 0);
2289 uint32_t src1_ub = get_alu_src_ub(ctx, instr, 1);
2293 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_hi_u32_u24, tmp, true);
2295 emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_hi_u32, tmp);
2301 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2307 emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_hi_i32, dst);
2309 emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_hi_i32, dst, false);
2311 Temp tmp = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), get_alu_src(ctx, instr->src[0]),
2312 as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
2315 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2321 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f16, dst, true);
2322 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2323 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_mul_f16, dst);
2325 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f32, dst, true);
2327 emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_f64, dst);
2329 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2335 emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_legacy_f32, dst, true);
2337 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2343 emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f16, dst, true);
2344 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2345 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst);
2347 emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f32, dst, true);
2349 emit_vop3a_instruction(ctx, instr, aco_opcode::v_add_f64, dst);
2351 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2356 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2357 Instruction* add = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst);
2364 Temp src0 = get_alu_src(ctx, instr->src[0]);
2365 Temp src1 = get_alu_src(ctx, instr->src[1]);
2368 emit_vop2_instruction(ctx, instr, aco_opcode::v_sub_f16, dst, false);
2370 emit_vop2_instruction(ctx, instr, aco_opcode::v_subrev_f16, dst, true);
2373 emit_vop2_instruction(ctx, instr, aco_opcode::v_sub_f32, dst, false);
2375 emit_vop2_instruction(ctx, instr, aco_opcode::v_subrev_f32, dst, true);
2381 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2387 emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f16, dst, false, 3);
2388 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2389 assert(instr->dest.dest.ssa.num_components == 2);
2391 Temp src0 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[0]));
2392 Temp src1 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[1]));
2393 Temp src2 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[2]));
2398 opsel_lo |= (instr->src[i].swizzle[0] & 1) << i;
2399 opsel_hi |= (instr->src[i].swizzle[1] & 1) << i;
2404 emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f32, dst,
2407 emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f64, dst, false, 3);
2409 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2415 emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_legacy_f32, dst,
2418 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2425 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_f16, dst, true);
2426 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2427 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_max_f16, dst);
2429 emit_vop2_instruction(ctx, instr, aco_opcode::v_max_f32, dst, true, false,
2432 emit_vop3a_instruction(ctx, instr, aco_opcode::v_max_f64, dst,
2435 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2442 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_f16, dst, true);
2443 } else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2444 emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_min_f16, dst, true);
2446 emit_vop2_instruction(ctx, instr, aco_opcode::v_min_f32, dst, true, false,
2449 emit_vop3a_instruction(ctx, instr, aco_opcode::v_min_f64, dst,
2452 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2457 emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, false);
2461 emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, true);
2465 emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_u32_u8, dst, false);
2469 emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_u32_u8, dst, true);
2473 emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_i32_i16, dst, false);
2477 emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_i32_i16, dst, true);
2481 emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_u32_u16, dst, false);
2485 emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_u32_u16, dst, true);
2489 Temp in = get_alu_src(ctx, instr->src[0], 3);
2504 Temp in = get_alu_src(ctx, instr->src[0], 3);
2511 emit_bcsel(ctx, instr, dst);
2516 emit_vop1_instruction(ctx, instr, aco_opcode::v_rsq_f16, dst);
2518 Temp src = get_alu_src(ctx, instr->src[0]);
2522 emit_vop1_instruction(ctx, instr, aco_opcode::v_rsq_f64, dst);
2524 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2529 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2530 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2533 instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
2538 Temp src = get_alu_src(ctx, instr->src[0]);
2553 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2558 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2559 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2562 instr->src[0].swizzle[0] & 1 ? 3 : 0, instr->src[0].swizzle[1] & 1 ? 3 : 0)
2563 .instr;
2568 Temp src = get_alu_src(ctx, instr->src[0]);
2572 .instr;
2577 .instr;
2588 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2593 if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
2594 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2597 instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
2601 Temp src = get_alu_src(ctx, instr->src[0]);
2615 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2621 emit_vop1_instruction(ctx, instr, aco_opcode::v_log_f16, dst);
2623 Temp src = get_alu_src(ctx, instr->src[0]);
2626 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2632 emit_vop1_instruction(ctx, instr, aco_opcode::v_rcp_f16, dst);
2634 Temp src = get_alu_src(ctx, instr->src[0]);
2638 emit_vop1_instruction(ctx, instr, aco_opcode::v_rcp_f64, dst);
2640 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2646 emit_vop1_instruction(ctx, instr, aco_opcode::v_exp_f16, dst);
2648 emit_vop1_instruction(ctx, instr, aco_opcode::v_exp_f32, dst);
2650 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2656 emit_vop1_instruction(ctx, instr, aco_opcode::v_sqrt_f16, dst);
2658 Temp src = get_alu_src(ctx, instr->src[0]);
2662 emit_vop1_instruction(ctx, instr, aco_opcode::v_sqrt_f64, dst);
2664 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2670 emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f16, dst);
2672 emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f32, dst);
2674 emit_vop1_instruction(ctx, instr, aco_opcode::v_fract_f64, dst);
2676 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2682 emit_vop1_instruction(ctx, instr, aco_opcode::v_floor_f16, dst);
2684 emit_vop1_instruction(ctx, instr, aco_opcode::v_floor_f32, dst);
2686 Temp src = get_alu_src(ctx, instr->src[0]);
2689 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2695 emit_vop1_instruction(ctx, instr, aco_opcode::v_ceil_f16, dst);
2697 emit_vop1_instruction(ctx, instr, aco_opcode::v_ceil_f32, dst);
2700 emit_vop1_instruction(ctx, instr, aco_opcode::v_ceil_f64, dst);
2707 Temp src0 = get_alu_src(ctx, instr->src[0]);
2721 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2727 emit_vop1_instruction(ctx, instr, aco_opcode::v_trunc_f16, dst);
2729 emit_vop1_instruction(ctx, instr, aco_opcode::v_trunc_f32, dst);
2731 Temp src = get_alu_src(ctx, instr->src[0]);
2734 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2740 emit_vop1_instruction(ctx, instr, aco_opcode::v_rndne_f16, dst);
2742 emit_vop1_instruction(ctx, instr, aco_opcode::v_rndne_f32, dst);
2745 emit_vop1_instruction(ctx, instr, aco_opcode::v_rndne_f64, dst);
2749 Temp src0 = get_alu_src(ctx, instr->src[0]);
2782 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2788 Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
2792 instr->op == nir_op_fsin_amd ? aco_opcode::v_sin_f16 : aco_opcode::v_cos_f16;
2800 instr->op == nir_op_fsin_amd ? aco_opcode::v_sin_f32 : aco_opcode::v_cos_f32;
2803 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2809 emit_vop2_instruction(ctx, instr, aco_opcode::v_ldexp_f16, dst, false);
2811 emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f32, dst);
2813 emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f64, dst);
2815 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2821 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_mant_f16, dst);
2823 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_mant_f32, dst);
2825 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_mant_f64, dst);
2827 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2832 if (instr->src[0].src.ssa->bit_size == 16) {
2833 Temp src = get_alu_src(ctx, instr->src[0]);
2837 } else if (instr->src[0].src.ssa->bit_size == 32) {
2838 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_exp_i32_f32, dst);
2839 } else if (instr->src[0].src.ssa->bit_size == 64) {
2840 emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_exp_i32_f64, dst);
2842 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2847 Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
2872 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2878 Temp src = get_alu_src(ctx, instr->src[0]);
2879 if (instr->src[0].src.ssa->bit_size == 64)
2881 if (instr->op == nir_op_f2f16_rtne && ctx->block->fp_mode.round16_64 != fp_round_ne)
2891 Temp src = get_alu_src(ctx, instr->src[0]);
2892 if (instr->src[0].src.ssa->bit_size == 64)
2903 if (instr->src[0].src.ssa->bit_size == 16) {
2904 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_f32_f16, dst);
2905 } else if (instr->src[0].src.ssa->bit_size == 64) {
2906 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_f32_f64, dst);
2908 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
2913 Temp src = get_alu_src(ctx, instr->src[0]);
2914 if (instr->src[0].src.ssa->bit_size == 16)
2921 Temp src = get_alu_src(ctx, instr->src[0]);
2922 const unsigned input_size = instr->src[0].src.ssa->bit_size;
2956 Temp src = get_alu_src(ctx, instr->src[0]);
2957 const unsigned input_size = instr->src[0].src.ssa->bit_size;
2979 if (instr->src[0].src.ssa->bit_size <= 32) {
2980 Temp src = get_alu_src(ctx, instr->src[0]);
2981 if (instr->src[0].src.ssa->bit_size <= 16)
2982 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, true);
2984 } else if (instr->src[0].src.ssa->bit_size == 64) {
2985 Temp src = get_alu_src(ctx, instr->src[0]);
2995 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3001 Temp src = get_alu_src(ctx, instr->src[0]);
3002 const unsigned input_size = instr->src[0].src.ssa->bit_size;
3031 Temp src = get_alu_src(ctx, instr->src[0]);
3032 const unsigned input_size = instr->src[0].src.ssa->bit_size;
3037 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, false);
3053 if (instr->src[0].src.ssa->bit_size <= 32) {
3054 Temp src = get_alu_src(ctx, instr->src[0]);
3055 if (instr->src[0].src.ssa->bit_size <= 16)
3056 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, false);
3058 } else if (instr->src[0].src.ssa->bit_size == 64) {
3059 Temp src = get_alu_src(ctx, instr->src[0]);
3068 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3074 if (instr->src[0].src.ssa->bit_size == 16) {
3076 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i16_f16, dst);
3080 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_f32_f16, tmp);
3082 tmp = convert_int(ctx, bld, tmp, 32, instr->dest.dest.ssa.bit_size, false,
3088 } else if (instr->src[0].src.ssa->bit_size == 32) {
3089 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f32, dst);
3091 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f64, dst);
3097 if (instr->src[0].src.ssa->bit_size == 16) {
3099 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u16_f16, dst);
3103 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_f32_f16, tmp);
3105 tmp = convert_int(ctx, bld, tmp, 32, instr->dest.dest.ssa.bit_size, false,
3111 } else if (instr->src[0].src.ssa->bit_size == 32) {
3112 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f32, dst);
3114 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f64, dst);
3119 Temp src = get_alu_src(ctx, instr->src[0]);
3120 if (instr->src[0].src.ssa->bit_size == 16) {
3128 } else if (instr->src[0].src.ssa->bit_size == 32) {
3129 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f32, dst);
3130 } else if (instr->src[0].src.ssa->bit_size == 64) {
3131 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_i32_f64, dst);
3133 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3138 Temp src = get_alu_src(ctx, instr->src[0]);
3139 if (instr->src[0].src.ssa->bit_size == 16) {
3147 } else if (instr->src[0].src.ssa->bit_size == 32) {
3148 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f32, dst);
3149 } else if (instr->src[0].src.ssa->bit_size == 64) {
3150 emit_vop1_instruction(ctx, instr, aco_opcode::v_cvt_u32_f64, dst);
3152 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3157 Temp src = get_alu_src(ctx, instr->src[0]);
3158 if (instr->src[0].src.ssa->bit_size == 16)
3161 if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::vgpr) {
3190 } else if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::sgpr) {
3229 } else if (instr->src[0].src.ssa->bit_size == 64) {
3247 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3252 Temp src = get_alu_src(ctx, instr->src[0]);
3253 if (instr->src[0].src.ssa->bit_size == 16)
3256 if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::vgpr) {
3284 } else if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::sgpr) {
3319 } else if (instr->src[0].src.ssa->bit_size == 64) {
3337 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3342 Temp src = get_alu_src(ctx, instr->src[0]);
3357 Temp src = get_alu_src(ctx, instr->src[0]);
3372 Temp src = get_alu_src(ctx, instr->src[0]);
3393 if (dst.type() == RegType::sgpr && instr->src[0].src.ssa->bit_size < 32) {
3395 sgpr_extract_mode mode = instr->dest.dest.ssa.bit_size > instr->src[0].src.ssa->bit_size
3398 extract_8_16_bit_sgpr_element(ctx, dst, &instr->src[0], mode);
3400 const unsigned input_bitsize = instr->src[0].src.ssa->bit_size;
3401 const unsigned output_bitsize = instr->dest.dest.ssa.bit_size;
3402 convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]), input_bitsize, output_bitsize,
3411 if (dst.type() == RegType::sgpr && instr->src[0].src.ssa->bit_size < 32) {
3413 sgpr_extract_mode mode = instr->dest.dest.ssa.bit_size > instr->src[0].src.ssa->bit_size
3416 extract_8_16_bit_sgpr_element(ctx, dst, &instr->src[0], mode);
3418 convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]), instr->src[0].src.ssa->bit_size,
3419 instr->dest.dest.ssa.bit_size, false, dst);
3428 Temp src = get_alu_src(ctx, instr->src[0]);
3447 Temp src = get_alu_src(ctx, instr->src[0]);
3474 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3475 emit_split_vector(ctx, dst, instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
3478 Temp src0 = get_alu_src(ctx, instr->src[0]);
3479 Temp src1 = get_alu_src(ctx, instr->src[1]);
3486 get_alu_src(ctx, instr->src[0]));
3490 get_alu_src(ctx, instr->src[0]));
3495 get_alu_src(ctx, instr->src[0]));
3497 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3503 get_alu_src(ctx, instr->src[0]));
3506 get_alu_src(ctx, instr->src[0]), Operand::c32(1u), Operand::c32(16u),
3511 Temp src0 = get_alu_src(ctx, instr->src[0]);
3512 Temp src1 = get_alu_src(ctx, instr->src[1]);
3526 case nir_op_pack_32_4x8: bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0], 4)); break;
3530 emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
3532 emit_vop2_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst, false);
3534 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3540 Temp src = get_alu_src(ctx, instr->src[0], 2);
3543 aco_opcode opcode = instr->op == nir_op_pack_unorm_2x16 ? aco_opcode::v_cvt_pknorm_u16_f32
3550 Temp src = get_alu_src(ctx, instr->src[0], 2);
3553 aco_opcode opcode = instr->op == nir_op_pack_uint_2x16 ? aco_opcode::v_cvt_pk_u16_u32
3560 Temp src = get_alu_src(ctx, instr->src[0]);
3565 (instr->op == nir_op_unpack_half_2x16_split_x_flush_to_zero));
3568 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3574 Temp src = get_alu_src(ctx, instr->src[0]);
3583 (instr->op == nir_op_unpack_half_2x16_split_y_flush_to_zero));
3586 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3592 emit_vop3a_instruction(ctx, instr, aco_opcode::v_sad_u8, dst, false, 3u, false);
3596 Temp src = get_alu_src(ctx, instr->src[0]);
3628 Temp bits = get_alu_src(ctx, instr->src[0]);
3629 Temp offset = get_alu_src(ctx, instr->src[1]);
3636 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3644 Temp bitmask = get_alu_src(ctx, instr->src[0]);
3645 Temp insert = get_alu_src(ctx, instr->src[1]);
3646 Temp base = get_alu_src(ctx, instr->src[2]);
3648 nir_const_value* const_bitmask = nir_src_as_const_value(instr->src[0].src);
3649 nir_const_value* const_insert = nir_src_as_const_value(instr->src[1].src);
3660 nir_const_value* const_base = nir_src_as_const_value(instr->src[2].src);
3671 emit_vop3a_instruction(ctx, instr, aco_opcode::v_bfi_b32, dst, false, 3);
3673 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3683 Temp base = get_alu_src(ctx, instr->src[0]);
3685 nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src);
3686 nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src);
3690 instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
3695 Temp offset = get_alu_src(ctx, instr->src[1]);
3696 Temp bits = get_alu_src(ctx, instr->src[2]);
3697 if (instr->op == nir_op_ubfe) {
3718 instr->op == nir_op_ubfe ? aco_opcode::v_bfe_u32 : aco_opcode::v_bfe_i32;
3719 emit_vop3a_instruction(ctx, instr, opcode, dst, false, 3);
3727 bool is_signed = instr->op == nir_op_extract_i16 || instr->op == nir_op_extract_i8;
3728 unsigned comp = instr->op == nir_op_extract_u8 || instr->op == nir_op_extract_i8 ? 4 : 2;
3730 unsigned index = nir_src_as_uint(instr->src[1].src);
3731 if (bits >= instr->dest.dest.ssa.bit_size || index * bits >= instr->dest.dest.ssa.bit_size) {
3733 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3734 } else if (dst.regClass() == s1 && instr->dest.dest.ssa.bit_size == 16) {
3735 Temp vec = get_ssa_temp(ctx, instr->src[0].src.ssa);
3736 unsigned swizzle = instr->src[0].swizzle[0];
3741 index += swizzle * instr->dest.dest.ssa.bit_size / bits;
3745 Temp src = get_alu_src(ctx, instr->src[0]);
3769 unsigned comp = instr->op == nir_op_insert_u8 ? 4 : 2;
3771 unsigned index = nir_src_as_uint(instr->src[1].src);
3772 if (bits >= instr->dest.dest.ssa.bit_size || index * bits >= instr->dest.dest.ssa.bit_size) {
3774 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3776 Temp src = get_alu_src(ctx, instr->src[0]);
3803 Temp src = get_alu_src(ctx, instr->src[0]);
3815 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
3820 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lt_f16, aco_opcode::v_cmp_lt_f32,
3825 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_ge_f16, aco_opcode::v_cmp_ge_f32,
3830 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_eq_f16, aco_opcode::v_cmp_eq_f32,
3835 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_neq_f16, aco_opcode::v_cmp_neq_f32,
3840 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lt_i16, aco_opcode::v_cmp_lt_i32,
3845 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_ge_i16, aco_opcode::v_cmp_ge_i32,
3850 if (instr->src[0].src.ssa->bit_size == 1)
3851 emit_boolean_logic(ctx, instr, Builder::s_xnor, dst);
3854 ctx, instr, dst, aco_opcode::v_cmp_eq_i16, aco_opcode::v_cmp_eq_i32,
3860 if (instr->src[0].src.ssa->bit_size == 1)
3861 emit_boolean_logic(ctx, instr, Builder::s_xor, dst);
3864 ctx, instr, dst, aco_opcode::v_cmp_lg_i16, aco_opcode::v_cmp_lg_i32,
3870 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lt_u16, aco_opcode::v_cmp_lt_u32,
3875 emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_ge_u16, aco_opcode::v_cmp_ge_u32,
3885 if (!nir_src_is_divergent(instr->src[0].src)) {
3893 Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
3895 if (instr->op == nir_op_fddx_fine) {
3898 } else if (instr->op == nir_op_fddy_fine) {
3903 if (instr->op == nir_op_fddx || instr->op == nir_op_fddx_coarse)
3921 default: isel_err(&instr->instr, "Unknown NIR ALU instr");
3926 visit_load_const(isel_context* ctx, nir_load_const_instr* instr)
3928 Temp dst = get_ssa_temp(ctx, &instr->def);
3933 assert(instr->def.num_components == 1 && "Vector load_const should be lowered to scalar.");
3938 if (instr->def.bit_size == 1) {
3940 int val = instr->value[0].b ? -1 : 0;
3943 } else if (instr->def.bit_size == 8) {
3944 bld.copy(Definition(dst), Operand::c32(instr->value[0].u8));
3945 } else if (instr->def.bit_size == 16) {
3947 bld.copy(Definition(dst), Operand::c32(instr->value[0].i16));
3949 bld.copy(Definition(dst), Operand::c32(instr->value[0].u32));
3954 if (instr->def.bit_size == 64)
3956 vec->operands[i] = Operand::c32(instr->value[0].u64 >> i * 32);
3959 vec->operands[i] = Operand::c32(instr->value[i].u32);
4325 Instruction* instr;
4327 instr = bld.ds(op, Definition(val), offset, m, const_offset, const_offset + 1);
4329 instr = bld.ds(op, Definition(val), offset, m, const_offset);
4330 instr->ds().sync = info.sync;
4333 instr->operands.pop_back();
4923 Instruction* instr;
4927 instr = bld.ds(op, address_offset, split_data, second_data, m, inline_offset,
4930 instr = bld.ds(op, address_offset, split_data, m, inline_offset);
4932 instr->ds().sync = memory_sync_info(storage_shared);
4935 instr->operands.pop_back();
4955 split_buffer_store(isel_context* ctx, nir_intrinsic_instr* instr, bool smem, RegType dst_type,
4987 unsigned align_mul = instr ? nir_intrinsic_align_mul(instr) : 4;
4988 unsigned align_offset = (instr ? nir_intrinsic_align_offset(instr) : 0) + offset;
5022 aco_ptr<Pseudo_instruction> instr{
5024 instr->definitions[0] = Definition(dst);
5030 instr->operands[i] = Operand(arr[i]);
5035 instr->operands[i] = Operand(zero);
5039 bld.insert(std::move(instr));
5092 r.instr->mubuf().sync = sync;
5171 store_output_to_temps(isel_context* ctx, nir_intrinsic_instr* instr)
5173 unsigned write_mask = nir_intrinsic_write_mask(instr);
5174 unsigned component = nir_intrinsic_component(instr);
5175 unsigned idx = nir_intrinsic_base(instr) * 4u + component;
5176 nir_src offset = *nir_get_io_offset_src(instr);
5181 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
5183 if (instr->src[0].ssa->bit_size == 64)
5186 RegClass rc = instr->src[0].ssa->bit_size == 16 ? v2b : v1;
5197 unsigned index = nir_intrinsic_base(instr) - FRAG_RESULT_DATA0;
5199 if (nir_intrinsic_src_type(instr) == nir_type_float16) {
5201 } else if (nir_intrinsic_src_type(instr) == nir_type_int16) {
5203 } else if (nir_intrinsic_src_type(instr) == nir_type_uint16) {
5212 load_input_from_temps(isel_context* ctx, nir_intrinsic_instr* instr, Temp dst)
5221 nir_src* off_src = nir_get_io_offset_src(instr);
5222 nir_src* vertex_index_src = nir_get_io_arrayed_index_src(instr);
5231 unsigned idx = nir_intrinsic_base(instr) * 4u + nir_intrinsic_component(instr) +
5242 visit_store_output(isel_context* ctx, nir_intrinsic_instr* instr)
5248 bool stored_to_temps = store_output_to_temps(ctx, instr);
5250 isel_err(instr->src[1].ssa->parent_instr, "Unimplemented output offset instruction");
5293 interp_p1.instr->operands[0].setLateKill(true);
5356 visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr)
5358 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5359 Temp coords = get_ssa_temp(ctx, instr->src[0].ssa);
5360 unsigned idx = nir_intrinsic_base(instr);
5361 unsigned component = nir_intrinsic_component(instr);
5364 assert(nir_src_is_const(instr->src[1]) && !nir_src_as_uint(instr->src[1]));
5366 if (instr->dest.ssa.num_components == 1) {
5370 aco_opcode::p_create_vector, Format::PSEUDO, instr->dest.ssa.num_components, 1));
5371 for (unsigned i = 0; i < instr->dest.ssa.num_components; i++) {
5372 Temp tmp = ctx->program->allocateTmp(instr->dest.ssa.bit_size == 16 ? v2b : v1);
5449 visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
5452 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5453 nir_src offset = *nir_get_io_offset_src(instr);
5460 unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0;
5461 unsigned component = nir_intrinsic_component(instr);
5462 unsigned bitsize = instr->dest.ssa.bit_size;
5463 unsigned num_components = instr->dest.ssa.num_components;
5473 if (nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float)
5493 unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0;
5494 unsigned component = nir_intrinsic_component(instr);
5495 unsigned bitsize = instr->dest.ssa.bit_size;
5506 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa) << component;
5637 .instr;
5642 .instr;
5663 unsigned num_components = instr->dest.ssa.num_components;
5699 unsigned idx = nir_intrinsic_base(instr);
5700 unsigned component = nir_intrinsic_component(instr);
5703 if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
5704 nir_const_value* src0 = nir_src_as_const_value(instr->src[0]);
5719 if (instr->dest.ssa.num_components == 1 &&
5720 instr->dest.ssa.bit_size != 64) {
5724 unsigned num_components = instr->dest.ssa.num_components;
5725 if (instr->dest.ssa.bit_size == 64)
5733 aco_opcode::v_interp_mov_f32, bld.def(instr->dest.ssa.bit_size == 16 ? v2b : v1),
5745 visit_load_tcs_per_vertex_input(isel_context* ctx, nir_intrinsic_instr* instr)
5750 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5752 if (load_input_from_temps(ctx, instr, dst))
5759 visit_load_per_vertex_input(isel_context* ctx, nir_intrinsic_instr* instr)
5762 case MESA_SHADER_TESS_CTRL: visit_load_tcs_per_vertex_input(ctx, instr); break;
5768 visit_load_tess_coord(isel_context* ctx, nir_intrinsic_instr* instr)
5773 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5820 visit_load_ubo(isel_context* ctx, nir_intrinsic_instr* instr)
5822 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5824 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5826 unsigned size = instr->dest.ssa.bit_size / 8;
5827 load_buffer(ctx, instr->num_components, size, dst, rsrc, get_ssa_temp(ctx, instr->src[1].ssa),
5828 nir_intrinsic_align_mul(instr), nir_intrinsic_align_offset(instr));
5832 visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr)
5835 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5836 unsigned offset = nir_intrinsic_base(instr);
5837 unsigned count = instr->dest.ssa.num_components;
5838 nir_const_value* index_cv = nir_src_as_const_value(instr->src[0]);
5840 if (instr->dest.ssa.bit_size == 64)
5843 if (index_cv && instr->dest.ssa.bit_size >= 32) {
5864 Temp index = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5873 if (instr->dest.ssa.bit_size == 8) {
5878 } else if (instr->dest.ssa.bit_size == 16) {
5902 bld.smem(op, Definition(vec), ptr, index).instr->smem().prevent_overflow = true;
5916 emit_split_vector(ctx, dst, instr->dest.ssa.num_components);
5920 visit_load_constant(isel_context* ctx, nir_intrinsic_instr* instr)
5922 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5938 unsigned base = nir_intrinsic_base(instr);
5939 unsigned range = nir_intrinsic_range(instr);
5941 Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
5953 unsigned size = instr->dest.ssa.bit_size / 8;
5955 load_buffer(ctx, instr->num_components, size, dst, rsrc, offset, size, 0);
6081 visit_bvh64_intersect_ray_amd(isel_context* ctx, nir_intrinsic_instr* instr)
6084 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6085 Temp resource = get_ssa_temp(ctx, instr->src[0].ssa);
6086 Temp node = get_ssa_temp(ctx, instr->src[1].ssa);
6087 Temp tmax = get_ssa_temp(ctx, instr->src[2].ssa);
6088 Temp origin = get_ssa_temp(ctx, instr->src[3].ssa);
6089 Temp dir = get_ssa_temp(ctx, instr->src[4].ssa);
6090 Temp inv_dir = get_ssa_temp(ctx, instr->src[5].ssa);
6115 get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr)
6118 Temp src0 = get_ssa_temp(ctx, instr->src[1].ssa);
6119 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
6120 bool is_array = nir_intrinsic_image_array(instr);
6131 coords[--count] = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[2].ssa), 0, v1);
6151 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6160 if (instr->intrinsic == nir_intrinsic_bindless_image_load ||
6161 instr->intrinsic == nir_intrinsic_bindless_image_sparse_load ||
6162 instr->intrinsic == nir_intrinsic_bindless_image_store) {
6163 int lod_index = instr->intrinsic == nir_intrinsic_bindless_image_store ? 4 : 3;
6165 nir_src_is_const(instr->src[lod_index]) && nir_src_as_uint(instr->src[lod_index]) == 0;
6168 coords.emplace_back(get_ssa_temp(ctx, instr->src[lod_index].ssa));
6175 get_memory_sync_info(nir_intrinsic_instr* instr, storage_class storage, unsigned semantics)
6181 unsigned access = nir_intrinsic_access(instr);
6212 visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
6215 const enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
6216 bool is_array = nir_intrinsic_image_array(instr);
6217 bool is_sparse = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
6218 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6220 memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
6221 unsigned access = nir_intrinsic_access(instr);
6223 unsigned result_size = instr->dest.ssa.num_components - is_sparse;
6225 nir_ssa_def_components_read(&instr->dest.ssa) & u_bit_consecutive(0, result_size);
6230 if (instr->dest.ssa.bit_size == 64) {
6238 bool d16 = instr->dest.ssa.bit_size == 16;
6249 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6252 Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6288 std::vector<Temp> coords = get_image_coords(ctx, instr);
6290 bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
6308 if (is_sparse && instr->dest.ssa.bit_size == 64) {
6316 expand_vector(ctx, tmp, dst, instr->dest.ssa.num_components, expand_mask,
6317 instr->dest.ssa.bit_size == 64);
6321 visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
6324 const enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
6325 bool is_array = nir_intrinsic_image_array(instr);
6326 Temp data = get_ssa_temp(ctx, instr->src[3].ssa);
6327 bool d16 = instr->src[3].ssa->bit_size == 16;
6330 if (instr->src[3].ssa->bit_size == 64 && data.bytes() > 8)
6334 uint32_t num_components = d16 ? instr->src[3].ssa->num_components : data.size();
6336 memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
6337 unsigned access = nir_intrinsic_access(instr);
6343 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6344 Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6380 std::vector<Temp> coords = get_image_coords(ctx, instr);
6381 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6383 bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
6390 if (instr->src[3].ssa->bit_size == 32 || instr->src[3].ssa->bit_size == 16) {
6391 for (uint32_t i = 0; i < instr->num_components; i++) {
6392 nir_ssa_scalar comp = nir_ssa_scalar_resolved(instr->src[3].ssa, i);
6437 visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
6439 bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
6440 const enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
6441 bool is_array = nir_intrinsic_image_array(instr);
6444 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[3].ssa));
6445 bool cmpswap = instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
6451 get_ssa_temp(ctx, instr->src[4].ssa), data);
6454 switch (instr->intrinsic) {
6520 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6521 memory_sync_info sync = get_memory_sync_info(instr, storage_image, semantic_atomicrmw);
6524 Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6525 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6551 std::vector<Temp> coords = get_image_coords(ctx, instr);
6552 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6605 visit_image_size(isel_context* ctx, nir_intrinsic_instr* instr)
6607 const enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
6608 bool is_array = nir_intrinsic_image_array(instr);
6612 Temp desc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6613 return get_buffer_size(ctx, desc, get_ssa_temp(ctx, &instr->dest.ssa));
6617 assert(nir_src_as_uint(instr->src[1]) == 0);
6621 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6623 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6629 mimg->dmask = (1 << instr->dest.ssa.num_components) - 1;
6633 assert(instr->dest.ssa.num_components == 2);
6637 emit_split_vector(ctx, dst, instr->dest.ssa.num_components);
6669 visit_image_samples(isel_context* ctx, nir_intrinsic_instr* instr)
6672 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6673 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6678 visit_load_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
6681 unsigned num_components = instr->num_components;
6683 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6684 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6686 unsigned access = nir_intrinsic_access(instr);
6688 unsigned size = instr->dest.ssa.bit_size / 8;
6692 load_buffer(ctx, num_components, size, dst, rsrc, get_ssa_temp(ctx, instr->src[1].ssa),
6693 nir_intrinsic_align_mul(instr), nir_intrinsic_align_offset(instr), glc, allow_smem,
6694 get_memory_sync_info(instr, storage_buffer, 0));
6698 visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
6701 Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
6702 unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
6703 unsigned writemask = util_widen_mask(nir_intrinsic_write_mask(instr), elem_size_bytes);
6704 Temp offset = get_ssa_temp(ctx, instr->src[2].ssa);
6706 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
6708 memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
6710 (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT | ACCESS_NON_READABLE)) &&
6716 split_buffer_store(ctx, instr, false, RegType::vgpr, data, writemask, 16, &write_count,
6746 visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
6749 bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
6750 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
6751 bool cmpswap = instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap;
6755 get_ssa_temp(ctx, instr->src[3].ssa), data);
6757 Temp offset = get_ssa_temp(ctx, instr->src[1].ssa);
6758 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6760 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6763 switch (instr->intrinsic) {
6816 aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
6832 mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
6857 visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
6860 unsigned num_components = instr->num_components;
6861 unsigned component_size = instr->dest.ssa.bit_size / 8;
6865 parse_global(ctx, instr, &addr, &const_offset, &offset);
6867 LoadEmitInfo info = {Operand(addr), get_ssa_temp(ctx, &instr->dest.ssa), num_components,
6874 info.glc = nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT);
6875 info.align_mul = nir_intrinsic_align_mul(instr);
6876 info.align_offset = nir_intrinsic_align_offset(instr);
6877 info.sync = get_memory_sync_info(instr, storage_buffer, 0);
6883 unsigned align = nir_intrinsic_align(instr);
6890 (nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE) && byte_align_for_smem_mubuf;
6905 visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
6908 unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
6909 unsigned writemask = util_widen_mask(nir_intrinsic_write_mask(instr), elem_size_bytes);
6911 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
6912 memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
6914 (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT | ACCESS_NON_READABLE)) &&
6920 split_buffer_store(ctx, instr, false, RegType::vgpr, data, writemask, 16, &write_count,
6925 parse_global(ctx, instr, &addr, &const_offset, &offset);
6999 visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
7002 bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
7003 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7004 bool cmpswap = instr->intrinsic == nir_intrinsic_global_atomic_comp_swap_amd;
7008 get_ssa_temp(ctx, instr->src[2].ssa), data);
7010 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7016 parse_global(ctx, instr, &addr, &const_offset, &offset);
7021 switch (instr->intrinsic) {
7075 aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
7095 flat->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
7101 switch (instr->intrinsic) {
7157 aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
7174 mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
7250 visit_load_smem(isel_context* ctx, nir_intrinsic_instr* instr)
7253 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7254 Temp base = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
7255 Temp offset = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
7282 emit_split_vector(ctx, dst, instr->dest.ssa.num_components);
7301 emit_scoped_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
7307 sync_scope mem_scope = translate_nir_scope(nir_intrinsic_memory_scope(instr));
7308 sync_scope exec_scope = translate_nir_scope(nir_intrinsic_execution_scope(instr));
7338 unsigned nir_storage = nir_intrinsic_memory_modes(instr);
7342 unsigned nir_semantics = nir_intrinsic_memory_semantics(instr);
7357 visit_load_shared(isel_context* ctx, nir_intrinsic_instr* instr)
7360 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7361 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7364 unsigned elem_size_bytes = instr->dest.ssa.bit_size / 8;
7365 unsigned num_components = instr->dest.ssa.num_components;
7366 unsigned align = nir_intrinsic_align_mul(instr) ? nir_intrinsic_align(instr) : elem_size_bytes;
7367 load_lds(ctx, elem_size_bytes, num_components, dst, address, nir_intrinsic_base(instr), align);
7371 visit_store_shared(isel_context* ctx, nir_intrinsic_instr* instr)
7373 unsigned writemask = nir_intrinsic_write_mask(instr);
7374 Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
7375 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7376 unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
7378 unsigned align = nir_intrinsic_align_mul(instr) ? nir_intrinsic_align(instr) : elem_size_bytes;
7379 store_lds(ctx, elem_size_bytes, data, writemask, address, nir_intrinsic_base(instr), align);
7383 visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
7385 unsigned offset = nir_intrinsic_base(instr);
7388 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7389 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7393 switch (instr->intrinsic) {
7476 bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
7480 assert(instr->dest.ssa.bit_size == 32);
7483 assert(instr->dest.ssa.bit_size == 64);
7498 Temp data2 = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
7504 ds->definitions[0] = Definition(get_ssa_temp(ctx, &instr->dest.ssa));
7514 visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr)
7516 bool is_store = instr->intrinsic == nir_intrinsic_store_shared2_amd;
7517 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[is_store].ssa));
7522 bool is64bit = (is_store ? instr->src[0].ssa->bit_size : instr->dest.ssa.bit_size) == 64;
7523 uint8_t offset0 = nir_intrinsic_offset0(instr);
7524 uint8_t offset1 = nir_intrinsic_offset1(instr);
7525 bool st64 = nir_intrinsic_st64(instr);
7533 Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
7539 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7550 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7604 visit_load_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
7607 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7609 LoadEmitInfo info = {Operand(v1), dst, instr->dest.ssa.num_components,
7610 instr->dest.ssa.bit_size / 8u};
7611 info.align_mul = nir_intrinsic_align_mul(instr);
7612 info.align_offset = nir_intrinsic_align_offset(instr);
7616 if (nir_src_is_const(instr->src[0])) {
7619 bld.copy(bld.def(s1), Operand::c32(ROUND_DOWN_TO(nir_src_as_uint(instr->src[0]), max)));
7620 info.const_offset = nir_src_as_uint(instr->src[0]) % max;
7622 info.offset = Operand(get_ssa_temp(ctx, instr->src[0].ssa));
7629 info.offset = Operand(as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)));
7636 visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
7639 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7640 Temp offset = get_ssa_temp(ctx, instr->src[1].ssa);
7642 unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
7643 unsigned writemask = util_widen_mask(nir_intrinsic_write_mask(instr), elem_size_bytes);
7649 split_buffer_store(ctx, instr, false, RegType::vgpr, data, writemask, swizzle_component_size,
7654 offset = nir_src_is_const(instr->src[1]) ? Temp(0, s1) : offset;
7656 nir_src_is_const(instr->src[1]) ? nir_src_as_uint(instr->src[1]) : 0;
7694 visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr)
7698 unsigned stream = nir_intrinsic_stream_id(instr);
7699 Temp next_vertex = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7701 nir_const_value* next_vertex_cv = nir_src_as_const_value(instr->src[0]);
7967 emit_uniform_subgroup(isel_context* ctx, nir_intrinsic_instr* instr, Temp src)
7970 Definition dst(get_ssa_temp(ctx, &instr->dest.ssa));
8038 emit_uniform_reduce(isel_context* ctx, nir_intrinsic_instr* instr)
8040 nir_op op = (nir_op)nir_intrinsic_reduction_op(instr);
8046 Definition dst(get_ssa_temp(ctx, &instr->dest.ssa));
8047 unsigned bit_size = instr->src[0].ssa->bit_size;
8054 emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], thread_count);
8056 emit_uniform_subgroup(ctx, instr, get_ssa_temp(ctx, instr->src[0].ssa));
8063 emit_uniform_scan(isel_context* ctx, nir_intrinsic_instr* instr)
8066 Definition dst(get_ssa_temp(ctx, &instr->dest.ssa));
8067 nir_op op = (nir_op)nir_intrinsic_reduction_op(instr);
8068 bool inc = instr->intrinsic == nir_intrinsic_inclusive_scan;
8074 if (instr->src[0].ssa->bit_size > 32)
8083 emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], packed_tid);
8091 emit_uniform_subgroup(ctx, instr, get_ssa_temp(ctx, instr->src[0].ssa));
8097 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8098 ReduceOp reduce_op = get_reduce_op(op, instr->src[0].ssa->bit_size);
8247 visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
8250 switch (instr->intrinsic) {
8254 glsl_interp_mode mode = (glsl_interp_mode)nir_intrinsic_interp_mode(instr);
8255 Temp bary = get_interp_param(ctx, instr->intrinsic, mode);
8257 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8265 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8271 Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
8272 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8288 Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
8289 nir_const_value* const_addr = nir_src_as_const_value(instr->src[0]);
8375 Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
8379 Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
8380 emit_interp_center(ctx, get_ssa_temp(ctx, &instr->dest.ssa), bary, pos1, pos2);
8384 bld.vopc(aco_opcode::v_cmp_lg_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8389 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8394 emit_load_frag_coord(ctx, get_ssa_temp(ctx, &instr->dest.ssa), 4);
8398 emit_load_frag_shading_rate(ctx, get_ssa_temp(ctx, &instr->dest.ssa));
8404 aco_opcode::p_create_vector, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8409 case nir_intrinsic_load_tess_coord: visit_load_tess_coord(ctx, instr); break;
8410 case nir_intrinsic_load_interpolated_input: visit_load_interpolated_input(ctx, instr); break;
8411 case nir_intrinsic_store_output: visit_store_output(ctx, instr); break;
8413 case nir_intrinsic_load_input_vertex: visit_load_input(ctx, instr); break;
8414 case nir_intrinsic_load_per_vertex_input: visit_load_per_vertex_input(ctx, instr); break;
8415 case nir_intrinsic_load_ubo: visit_load_ubo(ctx, instr); break;
8416 case nir_intrinsic_load_push_constant: visit_load_push_constant(ctx, instr); break;
8417 case nir_intrinsic_load_constant: visit_load_constant(ctx, instr); break;
8418 case nir_intrinsic_load_shared: visit_load_shared(ctx, instr); break;
8419 case nir_intrinsic_store_shared: visit_store_shared(ctx, instr); break;
8432 case nir_intrinsic_shared_atomic_fmax: visit_shared_atomic(ctx, instr); break;
8434 case nir_intrinsic_store_shared2_amd: visit_access_shared2_amd(ctx, instr); break;
8436 case nir_intrinsic_bindless_image_sparse_load: visit_image_load(ctx, instr); break;
8437 case nir_intrinsic_bindless_image_store: visit_image_store(ctx, instr); break;
8449 case nir_intrinsic_bindless_image_atomic_fmax: visit_image_atomic(ctx, instr); break;
8450 case nir_intrinsic_bindless_image_size: visit_image_size(ctx, instr); break;
8451 case nir_intrinsic_bindless_image_samples: visit_image_samples(ctx, instr); break;
8452 case nir_intrinsic_load_ssbo: visit_load_ssbo(ctx, instr); break;
8453 case nir_intrinsic_store_ssbo: visit_store_ssbo(ctx, instr); break;
8454 case nir_intrinsic_load_buffer_amd: visit_load_buffer(ctx, instr); break;
8455 case nir_intrinsic_store_buffer_amd: visit_store_buffer(ctx, instr); break;
8456 case nir_intrinsic_load_smem_amd: visit_load_smem(ctx, instr); break;
8457 case nir_intrinsic_load_global_amd: visit_load_global(ctx, instr); break;
8458 case nir_intrinsic_store_global_amd: visit_store_global(ctx, instr); break;
8470 case nir_intrinsic_global_atomic_fmax_amd: visit_global_atomic(ctx, instr); break;
8482 case nir_intrinsic_ssbo_atomic_fmax: visit_atomic_ssbo(ctx, instr); break;
8483 case nir_intrinsic_load_scratch: visit_load_scratch(ctx, instr); break;
8484 case nir_intrinsic_store_scratch: visit_store_scratch(ctx, instr); break;
8485 case nir_intrinsic_scoped_barrier: emit_scoped_barrier(ctx, instr); break;
8487 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8501 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8508 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8528 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8537 isel_err(&instr->instr, "Unsupported stage for load_workgroup_id");
8553 bld.vadd32(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), temp, thread_id);
8555 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8560 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), thread_id_in_threadgroup(ctx));
8563 emit_mbcnt(ctx, get_ssa_temp(ctx, &instr->dest.ssa));
8577 bld.vop2(aco_opcode::v_or_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), tg_num,
8584 bld.vop3(aco_opcode::v_lshl_or_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8591 bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8596 bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8600 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand::zero());
8605 emit_mbcnt(ctx, get_ssa_temp(ctx, &instr->dest.ssa));
8610 bld.sop2(aco_opcode::s_and_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8613 bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8617 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand::c32(0x1u));
8621 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8622 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8624 if (instr->src[0].ssa->bit_size == 1) {
8626 } else if (instr->src[0].ssa->bit_size == 32 && src.regClass() == v1) {
8628 } else if (instr->src[0].ssa->bit_size == 64 && src.regClass() == v2) {
8631 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
8648 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8649 if (!nir_src_is_divergent(instr->src[0])) {
8650 emit_uniform_subgroup(ctx, instr, src);
8652 Temp tid = get_ssa_temp(ctx, instr->src[1].ssa);
8653 if (instr->intrinsic == nir_intrinsic_read_invocation ||
8654 !nir_src_is_divergent(instr->src[1]))
8656 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8658 if (instr->dest.ssa.bit_size != 1)
8678 } else if (instr->dest.ssa.bit_size == 1 && tid.regClass() == s1) {
8682 } else if (instr->dest.ssa.bit_size == 1 && tid.regClass() == v1) {
8696 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
8702 bld.vop3(aco_opcode::v_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
8707 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8708 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8718 } else if (instr->dest.ssa.bit_size == 1) {
8729 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8730 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8743 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8744 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8755 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8756 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8757 nir_op op = (nir_op)nir_intrinsic_reduction_op(instr);
8759 instr->intrinsic == nir_intrinsic_reduce ? nir_intrinsic_cluster_size(instr) : 0;
8763 if (!nir_src_is_divergent(instr->src[0]) && cluster_size == ctx->program->wave_size &&
8764 instr->dest.ssa.bit_size != 1) {
8767 ASSERTED bool expected_divergent = instr->intrinsic == nir_intrinsic_exclusive_scan;
8768 if (instr->intrinsic == nir_intrinsic_inclusive_scan)
8770 assert(nir_dest_is_divergent(instr->dest) == expected_divergent);
8772 if (instr->intrinsic == nir_intrinsic_reduce) {
8773 if (emit_uniform_reduce(ctx, instr))
8775 } else if (emit_uniform_scan(ctx, instr)) {
8780 if (instr->dest.ssa.bit_size == 1) {
8789 switch (instr->intrinsic) {
8804 unsigned bit_size = instr->src[0].ssa->bit_size;
8811 switch (instr->intrinsic) {
8829 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8831 if (!nir_dest_is_divergent(instr->dest)) {
8832 emit_uniform_subgroup(ctx, instr, src);
8839 bool bool_use_valu = instr->dest.ssa.bit_size == 1;
8843 switch (instr->intrinsic) {
8847 case nir_intrinsic_quad_swizzle_amd: dpp_ctrl = nir_intrinsic_swizzle_mask(instr); break;
8849 lane = nir_src_as_const_value(instr->src[1])->u32;
8856 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8863 else if (instr->dest.ssa.bit_size != 1)
8872 if (instr->dest.ssa.bit_size == 1 && instr->intrinsic == nir_intrinsic_quad_broadcast) {
8886 } else if (instr->dest.ssa.bit_size <= 32 || bool_use_valu) {
8887 unsigned excess_bytes = bool_use_valu ? 0 : 4 - instr->dest.ssa.bit_size / 8;
8898 } else if (instr->dest.ssa.bit_size == 64) {
8913 isel_err(&instr->instr, "Unimplemented NIR quad group instruction bit size.");
8927 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8928 if (!nir_dest_is_divergent(instr->dest)) {
8929 emit_uniform_subgroup(ctx, instr, src);
8932 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8933 uint32_t mask = nir_intrinsic_swizzle_mask(instr);
8935 if (instr->dest.ssa.bit_size != 1)
8938 if (instr->dest.ssa.bit_size == 1) {
8961 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
8966 Temp src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
8967 Temp val = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
8968 Temp lane = bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa));
8969 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8983 isel_err(&instr->instr, "Unimplemented NIR instr bit size");
8988 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8989 Temp add_src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
8990 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8998 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9001 bld.vop3(aco_opcode::v_perm_b32, Definition(dst), get_ssa_temp(ctx, instr->src[0].ssa),
9002 as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa)),
9003 as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa)));
9007 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9008 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9015 bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)),
9016 bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa)));
9018 isel_err(&instr->instr, "Unimplemented lane_permute_16_amd");
9026 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9041 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9058 if (instr->intrinsic == nir_intrinsic_discard_if ||
9059 instr->intrinsic == nir_intrinsic_terminate_if) {
9060 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9076 get_ssa_temp(ctx, &instr->dest.ssa));
9083 emit_wqm(bld, last, get_ssa_temp(ctx, &instr->dest.ssa));
9092 emit_wqm(bld, elected, get_ssa_temp(ctx, &instr->dest.ssa));
9097 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9098 if (nir_intrinsic_memory_scope(instr) == NIR_SCOPE_SUBGROUP &&
9104 aco_opcode opcode = nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE
9113 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9118 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9123 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9128 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9133 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9138 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9156 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9185 visit_emit_vertex_with_counter(ctx, instr);
9190 unsigned stream = nir_intrinsic_stream_id(instr);
9204 unsigned i = instr->intrinsic == nir_intrinsic_has_input_vertex_amd ? 0 : 1;
9205 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), merged_wave_info_to_mask(ctx, i));
9214 Temp prim_ch1 = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
9220 Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa);
9221 Temp num_primitives = get_ssa_temp(ctx, instr->src[1].ssa);
9226 Temp store_val = get_ssa_temp(ctx, instr->src[0].ssa);
9227 Temp gds_addr = get_ssa_temp(ctx, instr->src[1].ssa);
9228 Temp m0_val = get_ssa_temp(ctx, instr->src[2].ssa);
9235 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9241 case nir_intrinsic_bvh64_intersect_ray_amd: visit_bvh64_intersect_ray_amd(ctx, instr); break;
9243 ctx->arg_temps[ctx->args->ac.vertex_id.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
9244 ctx->arg_temps[ctx->args->ac.instance_id.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
9248 ctx->arg_temps[ctx->args->ac.tes_u.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
9249 ctx->arg_temps[ctx->args->ac.tes_v.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
9251 get_ssa_temp(ctx, instr->src[2].ssa);
9252 ctx->arg_temps[ctx->args->ac.tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[3].ssa);
9256 bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
9262 assert(nir_intrinsic_base(instr) < ctx->args->ac.arg_count);
9263 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9264 Temp src = ctx->arg_temps[nir_intrinsic_base(instr)];
9266 assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr : RegType::vgpr));
9272 isel_err(&instr->instr, "Unimplemented intrinsic instr");
9409 visit_tex(isel_context* ctx, nir_tex_instr* instr)
9411 assert(instr->op != nir_texop_txf_ms && instr->op != nir_texop_samples_identical);
9424 for (unsigned i = 0; i < instr->num_srcs; i++) {
9425 switch (instr->src[i].src_type) {
9427 resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
9430 sampler = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
9436 bool tg4_integer_workarounds = ctx->options->gfx_level <= GFX8 && instr->op == nir_texop_tg4 &&
9437 (instr->dest_type & (nir_type_int | nir_type_uint));
9439 tg4_integer_workarounds && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
9443 int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord);
9445 a16 = instr->src[coord_idx].src.ssa->bit_size == 16;
9447 int ddx_idx = nir_tex_instr_src_index(instr, nir_tex_src_ddx);
9449 g16 = instr->src[ddx_idx].src.ssa->bit_size == 16;
9451 for (unsigned i = 0; i < instr->num_srcs; i++) {
9452 switch (instr->src[i].src_type) {
9454 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9455 coord = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, a16);
9459 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9461 bias = get_ssa_temp(ctx, instr->src[i].src.ssa);
9465 if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0) {
9468 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9469 lod = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, a16);
9475 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9476 clamped_lod = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, a16);
9480 if (instr->is_shadow) {
9481 assert(instr->src[i].src.ssa->bit_size == 32);
9482 compare = get_ssa_temp(ctx, instr->src[i].src.ssa);
9487 assert(instr->src[i].src.ssa->bit_size == 32);
9488 offset = get_ssa_temp(ctx, instr->src[i].src.ssa);
9489 get_const_vec(instr->src[i].src.ssa, const_offset);
9493 assert(instr->src[i].src.ssa->bit_size == (g16 ? 16 : 32));
9494 ddx = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, g16);
9498 assert(instr->src[i].src.ssa->bit_size == (g16 ? 16 : 32));
9499 ddy = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, g16);
9503 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9504 sample_index = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, a16);
9513 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
9514 return get_buffer_size(ctx, resource, get_ssa_temp(ctx, &instr->dest.ssa));
9516 if (instr->op == nir_texop_texture_samples) {
9517 get_image_samples(ctx, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), resource);
9522 assert(instr->op != nir_texop_txf);
9590 if (ctx->options->gfx_level == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
9591 instr->op != nir_texop_lod && instr->coord_components) {
9601 coord2d = instr->op == nir_texop_txf ? Operand::c16(0) : Operand::c16(0x3800);
9603 coord2d = instr->op == nir_texop_txf ? Operand::c32(0) : Operand::c32(0x3f000000);
9620 assert(instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE || !a16);
9621 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->coord_components)
9622 prepare_cube_coords(ctx, coords, &ddx, &ddy, instr->op == nir_texop_txd,
9623 instr->is_array && instr->op != nir_texop_lod);
9634 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && ctx->options->gfx_level == GFX9) {
9645 bool da = should_declare_array(ctx, instr->sampler_dim, instr->is_array);
9648 unsigned dmask = nir_ssa_def_components_read(&instr->dest.ssa) & 0xf;
9649 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
9651 if (instr->is_sparse)
9654 ctx->options->gfx_level >= GFX10 && instr->sampler_dim != GLSL_SAMPLER_DIM_BUF
9655 ? ac_get_sampler_dim(ctx->options->gfx_level, instr->sampler_dim, instr->is_array)
9657 bool d16 = instr->dest.ssa.bit_size == 16;
9658 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9662 if (instr->op == nir_texop_tg4) {
9663 assert(instr->dest.ssa.num_components == (4 + instr->is_sparse));
9664 if (instr->is_shadow)
9667 dmask = 1 << instr->component;
9669 tmp_dst = bld.tmp(instr->is_sparse ? v5 : (d16 ? v2 : v4));
9670 } else if (instr->op == nir_texop_fragment_mask_fetch_amd) {
9672 } else if (util_bitcount(dmask) != instr->dest.ssa.num_components ||
9674 unsigned bytes = util_bitcount(dmask) * instr->dest.ssa.bit_size / 8;
9678 if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels) {
9684 if (ctx->options->gfx_level == GFX9 && instr->op == nir_texop_txs &&
9685 instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array) {
9687 } else if (instr->op == nir_texop_query_levels) {
9695 expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
9720 if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && !instr->is_array) {
9760 if (instr->dest_type & nir_type_uint) {
9796 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
9821 create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3 + instr->is_sparse, 1)};
9827 mubuf->tfe = instr->is_sparse;
9832 expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
9853 if (instr->op == nir_texop_txf || instr->op == nir_texop_fragment_fetch_amd ||
9854 instr->op == nir_texop_fragment_mask_fetch_amd) {
9855 aco_opcode op = level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
9856 instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS
9859 Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
9862 if (instr->op == nir_texop_fragment_mask_fetch_amd)
9869 tex->tfe = instr->is_sparse;
9873 if (instr->op == nir_texop_fragment_mask_fetch_amd) {
9892 expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, dmask);
9991 if (instr->op == nir_texop_tg4) {
10021 } else if (instr->op == nir_texop_lod) {
10026 !level_zero && instr->sampler_dim != GLSL_SAMPLER_DIM_MS &&
10027 instr->sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS;
10029 Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
10035 tex->tfe = instr->is_sparse;
10048 if (instr->dest_type & nir_type_uint)
10057 if (instr->is_sparse)
10064 unsigned mask = instr->op == nir_texop_tg4 ? (instr->is_sparse ? 0x1F : 0xF) : dmask;
10065 expand_vector(ctx, tmp_dst, dst, instr->dest.ssa.num_components, mask);
10088 visit_phi(isel_context* ctx, nir_phi_instr* instr)
10091 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
10092 assert(instr->dest.ssa.bit_size != 1 || dst.regClass() == ctx->program->lane_mask);
10094 bool logical = !dst.is_linear() || nir_dest_is_divergent(instr->dest);
10100 nir_foreach_phi_src (src, instr)
10106 (std::max(exec_list_length(&instr->srcs), (unsigned)preds.size()) + 1) * sizeof(Operand));
10143 nir_loop* loop = nir_cf_node_as_loop(instr->instr.block->cf_node.parent);
10145 if (last->successors[0] != instr->instr.block)
10184 visit_undef(isel_context* ctx, nir_ssa_undef_instr* instr)
10186 Temp dst = get_ssa_temp(ctx, &instr->def);
10290 for (auto&& instr : loop_entry->instructions) {
10291 if (instr->opcode == aco_opcode::p_linear_phi) {
10293 new_phi->definitions[0] = instr->definitions[0];
10295 new_phi->operands[i] = instr->operands[i];
10297 for (unsigned i = new_phi->operands.size(); i < instr->operands.size(); i++)
10298 assert(instr->operands[i].tempId() == instr->operands.back().tempId());
10299 instr.swap(new_phi);
10300 } else if (instr->opcode == aco_opcode::p_phi) {
10396 visit_jump(isel_context* ctx, nir_jump_instr* instr)
10399 ctx->cf_info.nir_to_aco[instr->instr.block->index] = ctx->block->index;
10401 switch (instr->type) {
10404 default: isel_err(&instr->instr, "Unknown NIR jump instr"); abort();
10411 nir_foreach_instr (instr, block) {
10412 switch (instr->type) {
10413 case nir_instr_type_alu: visit_alu_instr(ctx, nir_instr_as_alu(instr)); break;
10414 case nir_instr_type_load_const: visit_load_const(ctx, nir_instr_as_load_const(instr)); break;
10415 case nir_instr_type_intrinsic: visit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); break;
10416 case nir_instr_type_tex: visit_tex(ctx, nir_instr_as_tex(instr)); break;
10417 case nir_instr_type_phi: visit_phi(ctx, nir_instr_as_phi(instr)); break;
10418 case nir_instr_type_ssa_undef: visit_undef(ctx, nir_instr_as_ssa_undef(instr)); break;
10420 case nir_instr_type_jump: visit_jump(ctx, nir_instr_as_jump(instr)); break;
10421 default: isel_err(instr, "Unknown NIR instr type");
10509 for (aco_ptr<Instruction>& instr : ctx->program->blocks[loop_header_idx].instructions) {
10510 if ((logical && instr->opcode == aco_opcode::p_phi) ||
10511 (linear && instr->opcode == aco_opcode::p_linear_phi)) {
10513 instr->operands.pop_back();
10514 } else if (!is_phi(instr)) {
10526 for (aco_ptr<Instruction>& instr : ctx->program->blocks[loop_header_idx].instructions) {
10527 if (instr->opcode == aco_opcode::p_linear_phi) {
10529 instr->operands.pop_back();
10531 instr->operands.back() =
10532 create_continue_phis(ctx, loop_header_idx, ctx->block->index, instr, vals);
10533 } else if (!is_phi(instr)) {
12265 Instruction* instr;
12267 instr = bld.vop2_sdwa(aco_opcode::v_add_u32, fetch_index_def, div_info, fetch_index).instr;
12269 instr = bld.vop2_sdwa(aco_opcode::v_add_co_u32, fetch_index_def, Definition(vcc, bld.lm),
12271 .instr;
12272 instr->sdwa().sel[0] = SubdwordSel::ubyte1;
12277 instr =
12278 bld.vop2_sdwa(aco_opcode::v_lshrrev_b32, fetch_index_def, div_info, fetch_index).instr;
12279 instr->sdwa().sel[0] = SubdwordSel::ubyte2;