Lines Matching refs:src

170 emit_wqm(Builder& bld, Temp src, Temp dst = Temp(0, s1), bool program_needs_wqm = false)
174 return src;
176 return bld.copy(Definition(dst), src);
178 dst = bld.tmp(src.regClass());
181 assert(src.size() == dst.size());
182 bld.pseudo(aco_opcode::p_wqm, Definition(dst), src);
234 emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
257 Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src);
265 return bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl);
268 return bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, mask, 0, false);
340 emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, Temp dst)
343 bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::c32(idx));
347 emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, RegClass dst_rc)
350 if (src.regClass() == dst_rc) {
352 return src;
355 assert(src.bytes() > (idx * dst_rc.bytes()));
357 auto it = ctx->allocated_vec.find(src.id());
369 src = as_vgpr(ctx, src);
371 if (src.bytes() == dst_rc.bytes()) {
373 return bld.copy(bld.def(dst_rc), src);
376 emit_extract_vector(ctx, src, idx, dst);
458 Temp src = emit_extract_vector(ctx, vec_src, k++, src_rc);
460 src = bld.as_uniform(src);
461 vec->operands[i] = Operand(src);
462 elems[i] = src;
571 /* if dst is vgpr - split the src and create a shrunk version according to the mask. */
581 /* if dst is sgpr - split the src, but move the original to sgpr. */
642 convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsigned dst_bits,
649 if (dst_bits % 32 == 0 || src.type() == RegType::sgpr)
650 dst = bld.tmp(src.type(), DIV_ROUND_UP(dst_bits, 32u));
655 assert(src.type() == RegType::sgpr || src_bits == src.bytes() * 8);
658 if (dst.bytes() == src.bytes() && dst_bits < src_bits) {
661 return bld.copy(Definition(dst), src);
662 } else if (dst.bytes() < src.bytes()) {
663 return bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::zero());
668 tmp = src_bits == 32 ? src : bld.tmp(src.type(), 1);
670 if (tmp == src) {
671 } else if (src.regClass() == s1) {
673 bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), src, Operand::zero(),
677 bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), Operand::c32(src_bits),
704 extract_8_16_bit_sgpr_element(isel_context* ctx, Temp dst, nir_alu_src* src, sgpr_extract_mode mode)
706 Temp vec = get_ssa_temp(ctx, src->src.ssa);
707 unsigned src_size = src->src.ssa->bit_size;
708 unsigned swizzle = src->swizzle[0];
733 get_alu_src(struct isel_context* ctx, nir_alu_src src, unsigned size = 1)
735 if (src.src.ssa->num_components == 1 && size == 1)
736 return get_ssa_temp(ctx, src.src.ssa);
738 Temp vec = get_ssa_temp(ctx, src.src.ssa);
739 unsigned elem_size = src.src.ssa->bit_size / 8u;
743 if (src.swizzle[i] != i)
753 assert(src.src.ssa->bit_size == 8 || src.src.ssa->bit_size == 16);
754 return extract_8_16_bit_sgpr_element(ctx, ctx->program->allocateTmp(s1), &src,
765 return emit_extract_vector(ctx, vec, src.swizzle[0], elem_rc);
772 elems[i] = emit_extract_vector(ctx, vec, src.swizzle[i], elem_rc);
784 get_alu_src_vop3p(struct isel_context* ctx, nir_alu_src src)
790 assert(src.src.ssa->bit_size == 16);
791 assert(src.swizzle[0] >> 1 == src.swizzle[1] >> 1);
793 Temp tmp = get_ssa_temp(ctx, src.src.ssa);
798 unsigned dword = src.swizzle[0] >> 1;
814 assert(((src.swizzle[0] | src.swizzle[1]) & 1) == 0);
824 nir_ssa_scalar{instr->src[src_idx].src.ssa, instr->src[src_idx].swizzle[0]};
846 sop2->operands[0] = Operand(get_alu_src(ctx, instr->src[0]));
847 sop2->operands[1] = Operand(get_alu_src(ctx, instr->src[1]));
875 Temp src0 = get_alu_src(ctx, instr->src[swap_srcs ? 1 : 0]);
876 Temp src1 = get_alu_src(ctx, instr->src[swap_srcs ? 0 : 1]);
918 Temp src0 = get_alu_src(ctx, instr->src[0]);
919 Temp src1 = get_alu_src(ctx, instr->src[1]);
942 Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)};
945 src[i] = get_alu_src(ctx, instr->src[swap_srcs ? 1 - i : i]);
947 src[i] = as_vgpr(ctx, src[i]);
949 has_sgpr = src[i].type() == RegType::sgpr;
957 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1], src[2]);
959 tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1]);
965 bld.vop3(op, Definition(dst), src[0], src[1], src[2]);
967 bld.vop3(op, Definition(dst), src[0], src[1]);
975 Temp src0 = get_alu_src_vop3p(ctx, instr->src[swap_srcs]);
976 Temp src1 = get_alu_src_vop3p(ctx, instr->src[!swap_srcs]);
983 (instr->src[!swap_srcs].swizzle[0] & 1) << 1 | (instr->src[swap_srcs].swizzle[0] & 1);
985 (instr->src[!swap_srcs].swizzle[1] & 1) << 1 | (instr->src[swap_srcs].swizzle[1] & 1);
996 Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)};
999 src[i] = get_alu_src(ctx, instr->src[i]);
1001 src[i] = as_vgpr(ctx, src[i]);
1003 has_sgpr = src[i].type() == RegType::sgpr;
1008 bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p().clamp = clamp;
1018 bld.vop1(op, bld.def(RegType::vgpr, dst.size()), get_alu_src(ctx, instr->src[0])));
1020 bld.vop1(op, Definition(dst), get_alu_src(ctx, instr->src[0]));
1026 Temp src0 = get_alu_src(ctx, instr->src[0]);
1027 Temp src1 = get_alu_src(ctx, instr->src[1]);
1070 Temp src0 = get_alu_src(ctx, instr->src[0]);
1071 Temp src1 = get_alu_src(ctx, instr->src[1]);
1090 aco_opcode s_op = instr->src[0].src.ssa->bit_size == 64 ? s64_op
1091 : instr->src[0].src.ssa->bit_size == 32 ? s32_op
1093 aco_opcode v_op = instr->src[0].src.ssa->bit_size == 64 ? v64_op
1094 : instr->src[0].src.ssa->bit_size == 32 ? v32_op
1097 get_ssa_temp(ctx, instr->src[0].src.ssa).type() == RegType::vgpr ||
1098 get_ssa_temp(ctx, instr->src[1].src.ssa).type() == RegType::vgpr;
1114 Temp src0 = get_alu_src(ctx, instr->src[0]);
1115 Temp src1 = get_alu_src(ctx, instr->src[1]);
1128 Temp cond = get_alu_src(ctx, instr->src[0]);
1129 Temp then = get_alu_src(ctx, instr->src[1]);
1130 Temp els = get_alu_src(ctx, instr->src[2]);
1163 if (!nir_src_is_divergent(instr->src[0].src)) { /* uniform condition and values in sgpr */
1395 elems[i] = get_alu_src(ctx, instr->src[i]);
1419 if (nir_src_is_const(instr->src[i].src)) {
1420 const_vals[idx] |= nir_src_as_uint(instr->src[i].src) << offset;
1479 Temp src = get_alu_src(ctx, instr->src[0]);
1480 if (src.type() == RegType::vgpr && dst.type() == RegType::sgpr) {
1482 assert(src.size() == dst.size() && "wrong src or dst register class for nir_op_mov");
1483 bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src);
1485 assert(src.bytes() == dst.bytes() && "wrong src or dst register class for nir_op_mov");
1486 bld.copy(Definition(dst), src);
1491 Temp src = get_alu_src(ctx, instr->src[0]);
1496 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
1502 bld.sop1(opcode, Definition(dst), bld.def(s1, scc), src);
1510 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
1512 unsigned opsel_lo = (instr->src[0].swizzle[0] & 1) << 1;
1513 unsigned opsel_hi = ((instr->src[0].swizzle[1] & 1) << 1) | 1;
1516 src, opsel_lo, opsel_hi);
1517 bld.vop3p(aco_opcode::v_pk_max_i16, Definition(dst), sub, src, opsel_lo, opsel_hi);
1520 Temp src = get_alu_src(ctx, instr->src[0]);
1522 bld.sop1(aco_opcode::s_abs_i32, Definition(dst), bld.def(s1, scc), src);
1524 bld.vop2(aco_opcode::v_max_i32, Definition(dst), src,
1525 bld.vsub32(bld.def(v1), Operand::zero(), src));
1528 aco_opcode::v_max_i16_e64, Definition(dst), src,
1529 bld.vop3(aco_opcode::v_sub_u16_e64, Definition(bld.tmp(v2b)), Operand::zero(2), src));
1531 src = as_vgpr(ctx, src);
1532 bld.vop2(aco_opcode::v_max_i16, Definition(dst), src,
1533 bld.vop2(aco_opcode::v_sub_u16, Definition(bld.tmp(v2b)), Operand::zero(2), src));
1540 Temp src = get_alu_src(ctx, instr->src[0]);
1543 bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(-1));
1547 bld.sop2(aco_opcode::s_ashr_i64, bld.def(s2), bld.def(s1, scc), src, Operand::c32(63u));
1550 neqz = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), src, Operand::zero());
1553 bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), src, Operand::zero())
1559 bld.vop3(aco_opcode::v_med3_i32, Definition(dst), Operand::c32(-1), src, Operand::c32(1u));
1561 bld.vop3(aco_opcode::v_med3_i16, Definition(dst), Operand::c16(-1), src, Operand::c16(1u));
1563 src = as_vgpr(ctx, src);
1565 bld.vop2(aco_opcode::v_min_i16, Definition(bld.tmp(v1)), Operand::c16(1u), src));
1567 Temp upper = emit_extract_vector(ctx, src, 1, v1);
1569 Temp gtz = bld.vopc(aco_opcode::v_cmp_ge_i64, bld.def(bld.lm), Operand::zero(), src);
1700 bld.vop3(aco_opcode::v_lshrrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1701 get_alu_src(ctx, instr->src[0]));
1724 bld.vop3(aco_opcode::v_lshlrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1725 get_alu_src(ctx, instr->src[0]));
1747 bld.vop3(aco_opcode::v_ashrrev_i64, Definition(dst), get_alu_src(ctx, instr->src[1]),
1748 get_alu_src(ctx, instr->src[0]));
1761 Temp src = get_alu_src(ctx, instr->src[0]);
1762 if (src.regClass() == s1) {
1763 bld.sop1(aco_opcode::s_ff1_i32_b32, Definition(dst), src);
1764 } else if (src.regClass() == v1) {
1766 } else if (src.regClass() == s2) {
1767 bld.sop1(aco_opcode::s_ff1_i32_b64, Definition(dst), src);
1775 Temp src = get_alu_src(ctx, instr->src[0]);
1776 if (src.regClass() == s1 || src.regClass() == s2) {
1777 aco_opcode op = src.regClass() == s2
1782 Temp msb_rev = bld.sop1(op, bld.def(s1), src);
1785 Operand::c32(src.size() * 32u - 1u), msb_rev);
1791 } else if (src.regClass() == v1) {
1800 } else if (src.regClass() == v2) {
1805 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
1824 Temp src = get_alu_src(ctx, instr->src[0]);
1825 if (src.regClass() == s1) {
1826 Temp msb_rev = bld.sop1(aco_opcode::s_flbit_i32_b32, bld.def(s1), src);
1828 } else if (src.regClass() == v1) {
1829 Temp msb_rev = bld.vop1(aco_opcode::v_ffbh_u32, bld.def(v1), src);
1838 bld.sop1(aco_opcode::s_brev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
1840 bld.vop1(aco_opcode::v_bfrev_b32, Definition(dst), get_alu_src(ctx, instr->src[0]));
1861 Temp src0 = get_alu_src(ctx, instr->src[0]);
1862 Temp src1 = get_alu_src(ctx, instr->src[1]);
1900 Temp src0 = get_alu_src(ctx, instr->src[0]);
1901 Temp src1 = get_alu_src(ctx, instr->src[1]);
1983 Temp src0 = get_alu_src(ctx, instr->src[0]);
1984 Temp src1 = get_alu_src(ctx, instr->src[1]);
2012 Temp src0 = get_alu_src(ctx, instr->src[0]);
2013 Temp src1 = get_alu_src(ctx, instr->src[1]);
2059 Temp src0 = get_alu_src(ctx, instr->src[0]);
2060 Temp src1 = get_alu_src(ctx, instr->src[1]);
2100 Temp src0 = get_alu_src(ctx, instr->src[0]);
2101 Temp src1 = get_alu_src(ctx, instr->src[1]);
2143 Temp src0 = get_alu_src(ctx, instr->src[0]);
2144 Temp src1 = get_alu_src(ctx, instr->src[1]);
2225 Temp src0 = get_alu_src(ctx, instr->src[0]);
2226 Temp src1 = get_alu_src(ctx, instr->src[1]);
2268 } else if (nir_src_is_const(instr->src[0].src)) {
2269 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[1]),
2270 nir_src_as_uint(instr->src[0].src), false);
2271 } else if (nir_src_is_const(instr->src[1].src)) {
2272 bld.v_mul_imm(Definition(dst), get_alu_src(ctx, instr->src[0]),
2273 nir_src_as_uint(instr->src[1].src), false);
2311 Temp tmp = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), get_alu_src(ctx, instr->src[0]),
2312 as_vgpr(ctx, get_alu_src(ctx, instr->src[1])));
2364 Temp src0 = get_alu_src(ctx, instr->src[0]);
2365 Temp src1 = get_alu_src(ctx, instr->src[1]);
2391 Temp src0 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[0]));
2392 Temp src1 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[1]));
2393 Temp src2 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[2]));
2398 opsel_lo |= (instr->src[i].swizzle[0] & 1) << i;
2399 opsel_hi |= (instr->src[i].swizzle[1] & 1) << i;
2489 Temp in = get_alu_src(ctx, instr->src[0], 3);
2490 Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1),
2492 Temp ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), src[0], src[1], src[2]);
2494 Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
2495 Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
2504 Temp in = get_alu_src(ctx, instr->src[0], 3);
2505 Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1),
2507 bld.vop3(aco_opcode::v_cubeid_f32, Definition(dst), src[0], src[1], src[2]);
2518 Temp src = get_alu_src(ctx, instr->src[0]);
2519 emit_rsq(ctx, bld, Definition(dst), src);
2530 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2532 bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
2533 instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
2538 Temp src = get_alu_src(ctx, instr->src[0]);
2540 bld.vop2(aco_opcode::v_mul_f16, Definition(dst), Operand::c16(0xbc00u), as_vgpr(ctx, src));
2543 as_vgpr(ctx, src));
2546 src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand::c64(0x3FF0000000000000),
2547 as_vgpr(ctx, src));
2549 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
2559 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2561 bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src,
2562 instr->src[0].swizzle[0] & 1 ? 3 : 0, instr->src[0].swizzle[1] & 1 ? 3 : 0)
2568 Temp src = get_alu_src(ctx, instr->src[0]);
2571 Operand::c16(0x3c00), as_vgpr(ctx, src))
2576 Operand::c32(0x3f800000u), as_vgpr(ctx, src))
2581 src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand::c64(0x3FF0000000000000),
2582 as_vgpr(ctx, src));
2584 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
2594 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2596 bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
2597 instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
2601 Temp src = get_alu_src(ctx, instr->src[0]);
2604 src);
2607 Operand::c32(0x3f800000u), src);
2612 Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand::zero());
2623 Temp src = get_alu_src(ctx, instr->src[0]);
2624 emit_log2(ctx, bld, Definition(dst), src);
2634 Temp src = get_alu_src(ctx, instr->src[0]);
2635 emit_rcp(ctx, bld, Definition(dst), src);
2658 Temp src = get_alu_src(ctx, instr->src[0]);
2659 emit_sqrt(ctx, bld, Definition(dst), src);
2686 Temp src = get_alu_src(ctx, instr->src[0]);
2687 emit_floor_f64(ctx, bld, Definition(dst), src);
2707 Temp src0 = get_alu_src(ctx, instr->src[0]);
2731 Temp src = get_alu_src(ctx, instr->src[0]);
2732 emit_trunc_f64(ctx, bld, Definition(dst), src);
2749 Temp src0 = get_alu_src(ctx, instr->src[0]);
2788 Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
2793 bld.vop1(opcode, Definition(dst), src);
2797 src = bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), src);
2801 bld.vop1(opcode, Definition(dst), src);
2832 if (instr->src[0].src.ssa->bit_size == 16) {
2833 Temp src = get_alu_src(ctx, instr->src[0]);
2834 Temp tmp = bld.vop1(aco_opcode::v_frexp_exp_i16_f16, bld.def(v1), src);
2837 } else if (instr->src[0].src.ssa->bit_size == 32) {
2839 } else if (instr->src[0].src.ssa->bit_size == 64) {
2847 Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
2851 src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), src);
2852 src =
2853 bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src, Operand::c16(1u));
2854 bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
2856 src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src);
2857 src =
2858 bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::c32(-1), src, Operand::c32(1u));
2859 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
2861 Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src);
2864 emit_extract_vector(ctx, src, 1, v1), cond);
2866 cond = bld.vopc(aco_opcode::v_cmp_le_f64, bld.def(bld.lm), Operand::zero(), src);
2878 Temp src = get_alu_src(ctx, instr->src[0]);
2879 if (instr->src[0].src.ssa->bit_size == 64)
2880 src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
2885 bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src);
2887 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
2891 Temp src = get_alu_src(ctx, instr->src[0]);
2892 if (instr->src[0].src.ssa->bit_size == 64)
2893 src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
2895 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
2897 bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src, Operand::zero());
2899 bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src, as_vgpr(ctx, src));
2903 if (instr->src[0].src.ssa->bit_size == 16) {
2905 } else if (instr->src[0].src.ssa->bit_size == 64) {
2913 Temp src = get_alu_src(ctx, instr->src[0]);
2914 if (instr->src[0].src.ssa->bit_size == 16)
2915 src = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
2916 bld.vop1(aco_opcode::v_cvt_f64_f32, Definition(dst), src);
2921 Temp src = get_alu_src(ctx, instr->src[0]);
2922 const unsigned input_size = instr->src[0].src.ssa->bit_size;
2927 src = convert_int(ctx, bld, src, input_size, target_size, true);
2935 src = convert_int(ctx, bld, src, 64, 32, false);
2939 bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
2949 src = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), src);
2950 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
2956 Temp src = get_alu_src(ctx, instr->src[0]);
2957 const unsigned input_size = instr->src[0].src.ssa->bit_size;
2961 src = convert_int(ctx, bld, src, input_size, 32, true);
2963 bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
2966 RegClass rc = RegClass(src.type(), 1);
2968 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
2979 if (instr->src[0].src.ssa->bit_size <= 32) {
2980 Temp src = get_alu_src(ctx, instr->src[0]);
2981 if (instr->src[0].src.ssa->bit_size <= 16)
2982 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, true);
2983 bld.vop1(aco_opcode::v_cvt_f64_i32, Definition(dst), src);
2984 } else if (instr->src[0].src.ssa->bit_size == 64) {
2985 Temp src = get_alu_src(ctx, instr->src[0]);
2986 RegClass rc = RegClass(src.type(), 1);
2988 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
3001 Temp src = get_alu_src(ctx, instr->src[0]);
3002 const unsigned input_size = instr->src[0].src.ssa->bit_size;
3007 src = convert_int(ctx, bld, src, input_size, target_size, false);
3015 src = convert_int(ctx, bld, src, 64, 32, false);
3021 bld.vop1(aco_opcode::v_cvt_f16_u16, Definition(dst), src);
3024 src = bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), src);
3025 bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
3031 Temp src = get_alu_src(ctx, instr->src[0]);
3032 const unsigned input_size = instr->src[0].src.ssa->bit_size;
3034 bld.vop1(aco_opcode::v_cvt_f32_ubyte0, Definition(dst), src);
3037 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, false);
3038 bld.vop1(aco_opcode::v_cvt_f32_u32, Definition(dst), src);
3041 RegClass rc = RegClass(src.type(), 1);
3043 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
3053 if (instr->src[0].src.ssa->bit_size <= 32) {
3054 Temp src = get_alu_src(ctx, instr->src[0]);
3055 if (instr->src[0].src.ssa->bit_size <= 16)
3056 src = convert_int(ctx, bld, src, instr->src[0].src.ssa->bit_size, 32, false);
3057 bld.vop1(aco_opcode::v_cvt_f64_u32, Definition(dst), src);
3058 } else if (instr->src[0].src.ssa->bit_size == 64) {
3059 Temp src = get_alu_src(ctx, instr->src[0]);
3060 RegClass rc = RegClass(src.type(), 1);
3062 bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src);
3074 if (instr->src[0].src.ssa->bit_size == 16) {
3088 } else if (instr->src[0].src.ssa->bit_size == 32) {
3097 if (instr->src[0].src.ssa->bit_size == 16) {
3111 } else if (instr->src[0].src.ssa->bit_size == 32) {
3119 Temp src = get_alu_src(ctx, instr->src[0]);
3120 if (instr->src[0].src.ssa->bit_size == 16) {
3121 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3128 } else if (instr->src[0].src.ssa->bit_size == 32) {
3130 } else if (instr->src[0].src.ssa->bit_size == 64) {
3138 Temp src = get_alu_src(ctx, instr->src[0]);
3139 if (instr->src[0].src.ssa->bit_size == 16) {
3140 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3147 } else if (instr->src[0].src.ssa->bit_size == 32) {
3149 } else if (instr->src[0].src.ssa->bit_size == 64) {
3157 Temp src = get_alu_src(ctx, instr->src[0]);
3158 if (instr->src[0].src.ssa->bit_size == 16)
3159 src = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3161 if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::vgpr) {
3162 Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3165 Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3166 Temp sign = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), src);
3190 } else if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::sgpr) {
3191 if (src.type() == RegType::vgpr)
3192 src = bld.as_uniform(src);
3193 Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3202 Operand::c32(0x7fffffu), src);
3204 bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(31u));
3229 } else if (instr->src[0].src.ssa->bit_size == 64) {
3232 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3252 Temp src = get_alu_src(ctx, instr->src[0]);
3253 if (instr->src[0].src.ssa->bit_size == 16)
3254 src = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3256 if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::vgpr) {
3257 Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3261 Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3284 } else if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::sgpr) {
3285 if (src.type() == RegType::vgpr)
3286 src = bld.as_uniform(src);
3287 Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3294 Operand::c32(0x7fffffu), src);
3319 } else if (instr->src[0].src.ssa->bit_size == 64) {
3322 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3342 Temp src = get_alu_src(ctx, instr->src[0]);
3343 assert(src.regClass() == bld.lm);
3346 src = bool_to_scalar_condition(ctx, src);
3347 bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3c00u), src);
3350 bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), one, src);
3357 Temp src = get_alu_src(ctx, instr->src[0]);
3358 assert(src.regClass() == bld.lm);
3361 src = bool_to_scalar_condition(ctx, src);
3362 bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3f800000u), src);
3365 Operand::c32(0x3f800000u), src);
3372 Temp src = get_alu_src(ctx, instr->src[0]);
3373 assert(src.regClass() == bld.lm);
3376 src = bool_to_scalar_condition(ctx, src);
3378 Operand::zero(), bld.scc(src));
3382 bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), one, src);
3393 if (dst.type() == RegType::sgpr && instr->src[0].src.ssa->bit_size < 32) {
3395 sgpr_extract_mode mode = instr->dest.dest.ssa.bit_size > instr->src[0].src.ssa->bit_size
3398 extract_8_16_bit_sgpr_element(ctx, dst, &instr->src[0], mode);
3400 const unsigned input_bitsize = instr->src[0].src.ssa->bit_size;
3402 convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]), input_bitsize, output_bitsize,
3411 if (dst.type() == RegType::sgpr && instr->src[0].src.ssa->bit_size < 32) {
3413 sgpr_extract_mode mode = instr->dest.dest.ssa.bit_size > instr->src[0].src.ssa->bit_size
3416 extract_8_16_bit_sgpr_element(ctx, dst, &instr->src[0], mode);
3418 convert_int(ctx, bld, get_alu_src(ctx, instr->src[0]), instr->src[0].src.ssa->bit_size,
3428 Temp src = get_alu_src(ctx, instr->src[0]);
3429 assert(src.regClass() == bld.lm);
3433 bool_to_scalar_condition(ctx, src, tmp);
3436 src);
3447 Temp src = get_alu_src(ctx, instr->src[0]);
3450 if (src.type() == RegType::vgpr) {
3451 assert(src.regClass() == v1 || src.regClass() == v2);
3453 bld.vopc(src.size() == 2 ? aco_opcode::v_cmp_lg_u64 : aco_opcode::v_cmp_lg_u32,
3454 Definition(dst), Operand::zero(), src);
3456 assert(src.regClass() == s1 || src.regClass() == s2);
3458 if (src.regClass() == s2 && ctx->program->gfx_level <= GFX7) {
3460 bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), Operand::zero(), src)
3464 tmp = bld.sopc(src.size() == 2 ? aco_opcode::s_cmp_lg_u64 : aco_opcode::s_cmp_lg_u32,
3465 bld.scc(bld.def(s1)), Operand::zero(), src);
3474 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3478 Temp src0 = get_alu_src(ctx, instr->src[0]);
3479 Temp src1 = get_alu_src(ctx, instr->src[1]);
3486 get_alu_src(ctx, instr->src[0]));
3490 get_alu_src(ctx, instr->src[0]));
3495 get_alu_src(ctx, instr->src[0]));
3497 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3503 get_alu_src(ctx, instr->src[0]));
3506 get_alu_src(ctx, instr->src[0]), Operand::c32(1u), Operand::c32(16u),
3511 Temp src0 = get_alu_src(ctx, instr->src[0]);
3512 Temp src1 = get_alu_src(ctx, instr->src[1]);
3526 case nir_op_pack_32_4x8: bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0], 4)); break;
3540 Temp src = get_alu_src(ctx, instr->src[0], 2);
3541 Temp src0 = emit_extract_vector(ctx, src, 0, v1);
3542 Temp src1 = emit_extract_vector(ctx, src, 1, v1);
3550 Temp src = get_alu_src(ctx, instr->src[0], 2);
3551 Temp src0 = emit_extract_vector(ctx, src, 0, v1);
3552 Temp src1 = emit_extract_vector(ctx, src, 1, v1);
3560 Temp src = get_alu_src(ctx, instr->src[0]);
3561 if (src.regClass() == v1)
3562 src = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src);
3566 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
3574 Temp src = get_alu_src(ctx, instr->src[0]);
3575 if (src.regClass() == s1)
3576 src = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), src,
3579 src =
3580 bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src).def(1).getTemp();
3584 bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
3596 Temp src = get_alu_src(ctx, instr->src[0]);
3597 Temp f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src);
3620 bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::zero(), as_vgpr(ctx, src));
3628 Temp bits = get_alu_src(ctx, instr->src[0]);
3629 Temp offset = get_alu_src(ctx, instr->src[1]);
3644 Temp bitmask = get_alu_src(ctx, instr->src[0]);
3645 Temp insert = get_alu_src(ctx, instr->src[1]);
3646 Temp base = get_alu_src(ctx, instr->src[2]);
3648 nir_const_value* const_bitmask = nir_src_as_const_value(instr->src[0].src);
3649 nir_const_value* const_insert = nir_src_as_const_value(instr->src[1].src);
3660 nir_const_value* const_base = nir_src_as_const_value(instr->src[2].src);
3683 Temp base = get_alu_src(ctx, instr->src[0]);
3685 nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src);
3686 nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src);
3695 Temp offset = get_alu_src(ctx, instr->src[1]);
3696 Temp bits = get_alu_src(ctx, instr->src[2]);
3730 unsigned index = nir_src_as_uint(instr->src[1].src);
3733 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3735 Temp vec = get_ssa_temp(ctx, instr->src[0].src.ssa);
3736 unsigned swizzle = instr->src[0].swizzle[0];
3745 Temp src = get_alu_src(ctx, instr->src[0]);
3748 src = emit_extract_vector(ctx, src, index / comp, RegClass(src.type(), 1));
3750 def = bld.def(src.type(), 1);
3754 bld.pseudo(aco_opcode::p_extract, def, bld.def(s1, scc), Operand(src),
3757 src = emit_extract_vector(ctx, src, 0, def.regClass());
3758 bld.pseudo(aco_opcode::p_extract, def, Operand(src), Operand::c32(index),
3771 unsigned index = nir_src_as_uint(instr->src[1].src);
3774 bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
3776 Temp src = get_alu_src(ctx, instr->src[0]);
3780 src = emit_extract_vector(ctx, src, 0u, RegClass(src.type(), 1));
3783 def = bld.def(src.type(), 1);
3786 bld.pseudo(aco_opcode::p_insert, def, bld.def(s1, scc), Operand(src),
3789 src = emit_extract_vector(ctx, src, 0, def.regClass());
3790 bld.pseudo(aco_opcode::p_insert, def, Operand(src), Operand::c32(index),
3803 Temp src = get_alu_src(ctx, instr->src[0]);
3804 if (src.regClass() == s1) {
3805 bld.sop1(aco_opcode::s_bcnt1_i32_b32, Definition(dst), bld.def(s1, scc), src);
3806 } else if (src.regClass() == v1) {
3807 bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), src, Operand::zero());
3808 } else if (src.regClass() == v2) {
3809 bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), emit_extract_vector(ctx, src, 1, v1),
3811 emit_extract_vector(ctx, src, 0, v1), Operand::zero()));
3812 } else if (src.regClass() == s2) {
3813 bld.sop1(aco_opcode::s_bcnt1_i32_b64, Definition(dst), bld.def(s1, scc), src);
3850 if (instr->src[0].src.ssa->bit_size == 1)
3860 if (instr->src[0].src.ssa->bit_size == 1)
3885 if (!nir_src_is_divergent(instr->src[0].src)) {
3893 Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
3911 Temp tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl1);
3912 tmp = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), src, tl, dpp_ctrl2);
3914 Temp tl = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl1);
3915 Temp tr = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl2);
4709 Temp src)
4719 dst[0] = bld.as_uniform(src);
4721 dst[0] = as_vgpr(ctx, src);
4737 auto it = ctx->allocated_vec.find(src.id());
4742 assert(src.bytes() % elem_size == 0);
4744 for (unsigned i = 0; i < src.bytes() / elem_size; i++) {
4751 temps.insert(temps.end(), it->second.begin(), it->second.begin() + src.bytes() / elem_size);
4756 /* split src if necessary */
4758 if (is_subdword && src.type() == RegType::sgpr)
4759 src = as_vgpr(ctx, src);
4761 src = bld.as_uniform(src);
4763 unsigned num_elems = src.bytes() / elem_size_bytes;
4766 split->operands[0] = Operand(src);
5096 store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Temp soffset,
5109 split_buffer_store(ctx, NULL, false, RegType::vgpr, src, write_mask, allow_combining ? 16 : 4,
5181 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
5183 if (instr->src[0].ssa->bit_size == 64)
5186 RegClass rc = instr->src[0].ssa->bit_size == 16 ? v2b : v1;
5191 ctx->outputs.temps[idx] = emit_extract_vector(ctx, src, i, rc);
5233 Temp* src = &ctx->inputs.temps[idx];
5234 create_vec_from_array(ctx, src, dst.size(), dst.regClass().type(), 4u, 0, dst);
5250 isel_err(instr->src[1].ssa->parent_instr, "Unimplemented output offset instruction");
5259 emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src, Temp dst,
5262 Temp coord1 = emit_extract_vector(ctx, src, 0, v1);
5263 Temp coord2 = emit_extract_vector(ctx, src, 1, v1);
5359 Temp coords = get_ssa_temp(ctx, instr->src[0].ssa);
5364 assert(nir_src_is_const(instr->src[1]) && !nir_src_as_uint(instr->src[1]));
5704 nir_const_value* src0 = nir_src_as_const_value(instr->src[0]);
5824 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5827 load_buffer(ctx, instr->num_components, size, dst, rsrc, get_ssa_temp(ctx, instr->src[1].ssa),
5838 nir_const_value* index_cv = nir_src_as_const_value(instr->src[0]);
5864 Temp index = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5941 Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
6085 Temp resource = get_ssa_temp(ctx, instr->src[0].ssa);
6086 Temp node = get_ssa_temp(ctx, instr->src[1].ssa);
6087 Temp tmax = get_ssa_temp(ctx, instr->src[2].ssa);
6088 Temp origin = get_ssa_temp(ctx, instr->src[3].ssa);
6089 Temp dir = get_ssa_temp(ctx, instr->src[4].ssa);
6090 Temp inv_dir = get_ssa_temp(ctx, instr->src[5].ssa);
6118 Temp src0 = get_ssa_temp(ctx, instr->src[1].ssa);
6131 coords[--count] = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[2].ssa), 0, v1);
6151 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6165 nir_src_is_const(instr->src[lod_index]) && nir_src_as_uint(instr->src[lod_index]) == 0;
6168 coords.emplace_back(get_ssa_temp(ctx, instr->src[lod_index].ssa));
6249 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6252 Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6290 bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
6326 Temp data = get_ssa_temp(ctx, instr->src[3].ssa);
6327 bool d16 = instr->src[3].ssa->bit_size == 16;
6330 if (instr->src[3].ssa->bit_size == 64 && data.bytes() > 8)
6334 uint32_t num_components = d16 ? instr->src[3].ssa->num_components : data.size();
6343 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6344 Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6381 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6383 bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
6390 if (instr->src[3].ssa->bit_size == 32 || instr->src[3].ssa->bit_size == 16) {
6392 nir_ssa_scalar comp = nir_ssa_scalar_resolved(instr->src[3].ssa, i);
6444 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[3].ssa));
6451 get_ssa_temp(ctx, instr->src[4].ssa), data);
6524 Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6525 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6552 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6612 Temp desc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6617 assert(nir_src_as_uint(instr->src[1]) == 0);
6621 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6673 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6684 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6692 load_buffer(ctx, num_components, size, dst, rsrc, get_ssa_temp(ctx, instr->src[1].ssa),
6701 Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
6702 unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
6704 Temp offset = get_ssa_temp(ctx, instr->src[2].ssa);
6706 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
6750 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
6755 get_ssa_temp(ctx, instr->src[3].ssa), data);
6757 Temp offset = get_ssa_temp(ctx, instr->src[1].ssa);
6758 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6844 *address = get_ssa_temp(ctx, intrin->src[is_store ? 1 : 0].ssa);
6849 nir_src offset_src = intrin->src[num_src - 1];
6908 unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
6911 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7003 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7008 get_ssa_temp(ctx, instr->src[2].ssa), data);
7207 Temp descriptor = bld.as_uniform(get_ssa_temp(ctx, intrin->src[0].ssa));
7208 Temp v_offset = as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[1].ssa));
7209 Temp s_offset = bld.as_uniform(get_ssa_temp(ctx, intrin->src[2].ssa));
7230 Temp store_src = get_ssa_temp(ctx, intrin->src[0].ssa);
7231 Temp descriptor = get_ssa_temp(ctx, intrin->src[1].ssa);
7232 Temp v_offset = get_ssa_temp(ctx, intrin->src[2].ssa);
7233 Temp s_offset = get_ssa_temp(ctx, intrin->src[3].ssa);
7240 unsigned elem_size_bytes = intrin->src[0].ssa->bit_size / 8u;
7254 Temp base = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
7255 Temp offset = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
7361 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7374 Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
7375 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7376 unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
7388 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7389 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7498 Temp data2 = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
7517 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[is_store].ssa));
7522 bool is64bit = (is_store ? instr->src[0].ssa->bit_size : instr->dest.ssa.bit_size) == 64;
7533 Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
7616 if (nir_src_is_const(instr->src[0])) {
7619 bld.copy(bld.def(s1), Operand::c32(ROUND_DOWN_TO(nir_src_as_uint(instr->src[0]), max)));
7620 info.const_offset = nir_src_as_uint(instr->src[0]) % max;
7622 info.offset = Operand(get_ssa_temp(ctx, instr->src[0].ssa));
7629 info.offset = Operand(as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)));
7639 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7640 Temp offset = get_ssa_temp(ctx, instr->src[1].ssa);
7642 unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8;
7654 offset = nir_src_is_const(instr->src[1]) ? Temp(0, s1) : offset;
7656 nir_src_is_const(instr->src[1]) ? nir_src_as_uint(instr->src[1]) : 0;
7699 Temp next_vertex = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7701 nir_const_value* next_vertex_cv = nir_src_as_const_value(instr->src[0]);
7791 emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp src)
7796 return src;
7801 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src);
7808 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)));
7812 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src)
7820 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm))
7827 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
7850 tmp = bld.sop2(Builder::s_orn2, bld.def(bld.lm), bld.def(s1, scc), src,
7854 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
7884 emit_boolean_exclusive_scan(isel_context* ctx, nir_op op, Temp src)
7887 assert(src.regClass() == bld.lm);
7896 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src);
7898 tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
7915 emit_boolean_inclusive_scan(isel_context* ctx, nir_op op, Temp src)
7923 Temp tmp = emit_boolean_exclusive_scan(ctx, op, src);
7925 return bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), tmp, src);
7927 return bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(s1, scc), tmp, src);
7929 return bld.sop2(Builder::s_xor, bld.def(bld.lm), bld.def(s1, scc), tmp, src);
7967 emit_uniform_subgroup(isel_context* ctx, nir_intrinsic_instr* instr, Temp src)
7972 if (src.regClass().type() == RegType::vgpr)
7973 bld.pseudo(aco_opcode::p_as_uniform, dst, src);
7975 bld.copy(dst, src);
7979 emit_addition_uniform_reduce(isel_context* ctx, nir_op op, Definition dst, nir_src src, Temp count)
7982 Temp src_tmp = get_ssa_temp(ctx, src.ssa);
7986 Temp tmp = dst.regClass() == s1 ? bld.tmp(RegClass::get(RegType::vgpr, src.ssa->bit_size / 8))
7989 if (src.ssa->bit_size == 16) {
7993 assert(src.ssa->bit_size == 32);
8015 if (nir_src_is_const(src)) {
8016 if (nir_src_as_uint(src) == 1 && dst.bytes() <= 2)
8018 else if (nir_src_as_uint(src) == 1)
8020 else if (nir_src_as_uint(src) == 0)
8023 bld.v_mul_imm(dst, count, nir_src_as_uint(src));
8047 unsigned bit_size = instr->src[0].ssa->bit_size;
8054 emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], thread_count);
8056 emit_uniform_subgroup(ctx, instr, get_ssa_temp(ctx, instr->src[0].ssa));
8074 if (instr->src[0].ssa->bit_size > 32)
8083 emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], packed_tid);
8091 emit_uniform_subgroup(ctx, instr, get_ssa_temp(ctx, instr->src[0].ssa));
8097 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8098 ReduceOp reduce_op = get_reduce_op(op, instr->src[0].ssa->bit_size);
8101 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8113 as_vgpr(ctx, src));
8121 Definition dst, Temp src)
8123 assert(src.bytes() <= 8);
8124 assert(src.type() == RegType::vgpr);
8162 reduce->operands[0] = Operand(src);
8288 Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
8289 nir_const_value* const_addr = nir_src_as_const_value(instr->src[0]);
8375 Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
8621 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8624 if (instr->src[0].ssa->bit_size == 1) {
8625 assert(src.regClass() == bld.lm);
8626 } else if (instr->src[0].ssa->bit_size == 32 && src.regClass() == v1) {
8627 src = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src);
8628 } else if (instr->src[0].ssa->bit_size == 64 && src.regClass() == v2) {
8629 src = bld.vopc(aco_opcode::v_cmp_lg_u64, bld.def(bld.lm), Operand::zero(), src);
8636 src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
8639 src =
8640 bld.pseudo(aco_opcode::p_create_vector, bld.def(dst.regClass()), src, Operand::zero());
8643 emit_wqm(bld, src, dst);
8648 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8649 if (!nir_src_is_divergent(instr->src[0])) {
8650 emit_uniform_subgroup(ctx, instr, src);
8652 Temp tid = get_ssa_temp(ctx, instr->src[1].ssa);
8654 !nir_src_is_divergent(instr->src[1]))
8659 src = as_vgpr(ctx, src);
8661 if (src.regClass() == v1b || src.regClass() == v2b) {
8663 tmp = emit_wqm(bld, emit_bpermute(ctx, bld, tid, src), tmp);
8666 bld.def(src.regClass() == v1b ? v3b : v2b), tmp);
8669 } else if (src.regClass() == v1) {
8670 emit_wqm(bld, emit_bpermute(ctx, bld, tid, src), dst);
8671 } else if (src.regClass() == v2) {
8673 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8679 assert(src.regClass() == bld.lm);
8680 Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src, tid);
8683 assert(src.regClass() == bld.lm);
8686 tmp = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), src, tid);
8688 tmp = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), tid, src);
8690 tmp = bld.vop2_e64(aco_opcode::v_lshrrev_b32, bld.def(v1), tid, src);
8707 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8709 if (src.regClass() == v1b || src.regClass() == v2b || src.regClass() == v1) {
8710 emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), src), dst);
8711 } else if (src.regClass() == v2) {
8713 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8719 assert(src.regClass() == bld.lm);
8720 Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src,
8724 bld.copy(Definition(dst), src);
8729 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8731 assert(src.regClass() == bld.lm);
8735 bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src)
8743 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8745 assert(src.regClass() == bld.lm);
8748 Temp tmp = bool_to_scalar_condition(ctx, src);
8755 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8763 if (!nir_src_is_divergent(instr->src[0]) && cluster_size == ctx->program->wave_size &&
8791 emit_wqm(bld, emit_boolean_reduce(ctx, op, cluster_size, src), dst);
8794 emit_wqm(bld, emit_boolean_exclusive_scan(ctx, op, src), dst);
8797 emit_wqm(bld, emit_boolean_inclusive_scan(ctx, op, src), dst);
8802 bld.copy(Definition(dst), src);
8804 unsigned bit_size = instr->src[0].ssa->bit_size;
8806 src = emit_extract_vector(ctx, src, 0, RegClass::get(RegType::vgpr, bit_size / 8));
8819 bld.def(dst.regClass()), src);
8829 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8832 emit_uniform_subgroup(ctx, instr, src);
8849 lane = nir_src_as_const_value(instr->src[1])->u32;
8861 src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
8862 Operand::c32(-1), src);
8864 src = as_vgpr(ctx, src);
8874 assert(src.regClass() == bld.lm && tmp.regClass() == bld.lm);
8882 src =
8883 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
8884 src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp, src);
8885 bld.sop1(Builder::s_wqm, Definition(tmp), src);
8891 bld.vop1_dpp(aco_opcode::v_mov_b32, def, src, dpp_ctrl);
8893 bld.ds(aco_opcode::ds_swizzle_b32, def, src, (1 << 15) | dpp_ctrl);
8900 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8927 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8929 emit_uniform_subgroup(ctx, instr, src);
8936 src = as_vgpr(ctx, src);
8939 assert(src.regClass() == bld.lm);
8940 src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(),
8941 Operand::c32(-1), src);
8942 src = emit_masked_swizzle(ctx, bld, src, mask);
8943 Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src);
8946 Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
8949 Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
8952 emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask), dst);
8955 bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
8966 Temp src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
8967 Temp val = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
8968 Temp lane = bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa));
8972 emit_wqm(bld, bld.writelane(bld.def(v1), val, lane, src), dst);
8976 bld.pseudo(aco_opcode::p_split_vector, Definition(src_lo), Definition(src_hi), src);
8988 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8989 Temp add_src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
8992 src = emit_extract_vector(ctx, src, 0, RegClass(src.type(), bld.lm.size()));
8993 Temp wqm_tmp = emit_mbcnt(ctx, bld.tmp(v1), Operand(src), Operand(add_src));
9001 bld.vop3(aco_opcode::v_perm_b32, Definition(dst), get_ssa_temp(ctx, instr->src[0].ssa),
9002 as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa)),
9003 as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa)));
9007 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9011 if (src.regClass() == s1) {
9012 bld.copy(Definition(dst), src);
9013 } else if (dst.regClass() == v1 && src.regClass() == v1) {
9014 bld.vop3(aco_opcode::v_permlane16_b32, Definition(dst), src,
9015 bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)),
9016 bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa)));
9041 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9042 assert(src.regClass() == bld.lm);
9044 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
9060 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9061 assert(src.regClass() == bld.lm);
9063 bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
9214 Temp prim_ch1 = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
9220 Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa);
9221 Temp num_primitives = get_ssa_temp(ctx, instr->src[1].ssa);
9226 Temp store_val = get_ssa_temp(ctx, instr->src[0].ssa);
9227 Temp gds_addr = get_ssa_temp(ctx, instr->src[1].ssa);
9228 Temp m0_val = get_ssa_temp(ctx, instr->src[2].ssa);
9243 ctx->arg_temps[ctx->args->ac.vertex_id.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
9244 ctx->arg_temps[ctx->args->ac.instance_id.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
9248 ctx->arg_temps[ctx->args->ac.tes_u.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
9249 ctx->arg_temps[ctx->args->ac.tes_v.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
9251 get_ssa_temp(ctx, instr->src[2].ssa);
9252 ctx->arg_temps[ctx->args->ac.tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[3].ssa);
9264 Temp src = ctx->arg_temps[nir_intrinsic_base(instr)];
9265 assert(src.id());
9266 assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr : RegType::vgpr));
9267 bld.copy(Definition(dst), src);
9404 vec_instr->src[i].swizzle[0] == 0 ? nir_src_as_const_value(vec_instr->src[i].src) : NULL;
9425 switch (instr->src[i].src_type) {
9427 resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
9430 sampler = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
9445 a16 = instr->src[coord_idx].src.ssa->bit_size == 16;
9449 g16 = instr->src[ddx_idx].src.ssa->bit_size == 16;
9452 switch (instr->src[i].src_type) {
9454 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9455 coord = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, a16);
9459 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9461 bias = get_ssa_temp(ctx, instr->src[i].src.ssa);
9465 if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0) {
9468 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9469 lod = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, a16);
9475 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9476 clamped_lod = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, a16);
9481 assert(instr->src[i].src.ssa->bit_size == 32);
9482 compare = get_ssa_temp(ctx, instr->src[i].src.ssa);
9487 assert(instr->src[i].src.ssa->bit_size == 32);
9488 offset = get_ssa_temp(ctx, instr->src[i].src.ssa);
9489 get_const_vec(instr->src[i].src.ssa, const_offset);
9493 assert(instr->src[i].src.ssa->bit_size == (g16 ? 16 : 32));
9494 ddx = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, g16);
9498 assert(instr->src[i].src.ssa->bit_size == (g16 ? 16 : 32));
9499 ddy = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, g16);
9503 assert(instr->src[i].src.ssa->bit_size == (a16 ? 16 : 32));
9504 sample_index = get_ssa_temp_tex(ctx, instr->src[i].src.ssa, a16);
10100 nir_foreach_phi_src (src, instr)
10101 phi_src[src->pred->index] = src->src.ssa;
10109 for (std::pair<unsigned, nir_ssa_def*> src : phi_src) {
10113 unsigned block = ctx->cf_info.nir_to_aco[src.first];
10130 Operand op = get_phi_operand(ctx, src.second, dst.regClass(), logical);
10149 /* we can use a linear phi in some cases if one src is undef */