Lines Matching refs:Temp
70 Temp cond;
128 Temp
132 return Temp(id, ctx->program->temp_rc[id]);
135 Temp
136 emit_mbcnt(isel_context* ctx, Temp dst, Operand mask = Operand(), Operand base = Operand::zero())
161 Temp mbcnt_lo = bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, bld.def(v1), mask_lo, base);
169 Temp
170 emit_wqm(Builder& bld, Temp src, Temp dst = Temp(0, s1), bool program_needs_wqm = false)
187 static Temp
188 emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
205 Temp index_is_lo =
209 Temp index_is_lo_n1 = bld.sop1(aco_opcode::s_not_b32, bld.def(s1), bld.def(s1, scc),
228 Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
233 static Temp
234 emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
271 Temp
272 as_vgpr(Builder& bld, Temp val)
280 Temp
281 as_vgpr(isel_context* ctx, Temp val)
289 emit_v_div_u32(isel_context* ctx, Temp dst, Temp a, uint32_t b)
313 Temp pre_shift_dst = a;
320 Temp increment_dst = pre_shift_dst;
326 Temp multiply_dst = increment_dst;
340 emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, Temp dst)
346 Temp
347 emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, RegClass dst_rc)
375 Temp dst = bld.tmp(dst_rc);
382 emit_split_vector(isel_context* ctx, Temp vec_src, unsigned num_components)
403 std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
415 expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components, unsigned mask,
422 Temp tmp_dst = bld.tmp(RegClass::get(RegType::vgpr, 2 * num_components));
446 std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
448 Temp padding = Temp(0, dst_rc);
458 Temp src = emit_extract_vector(ctx, vec_src, k++, src_rc);
474 byte_align_scalar(isel_context* ctx, Temp vec, Operand offset, Temp dst)
478 Temp select = Temp();
484 Temp tmp =
494 Temp tmp = dst.size() == 2 ? dst : bld.tmp(s2);
501 Temp lo = bld.tmp(s2), hi;
511 if (select != Temp())
515 Temp mid = bld.tmp(s1);
525 byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigned component_size)
529 Temp tmp[4] = {vec, vec, vec, vec};
562 std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
592 Temp
596 Temp tmp = get_ssa_temp(ctx, def);
603 Temp
604 bool_to_vector_condition(isel_context* ctx, Temp val, Temp dst = Temp(0, s2))
617 Temp
618 bool_to_scalar_condition(isel_context* ctx, Temp val, Temp dst = Temp(0, s1))
633 * Copies the first src_bits of the input to the output Temp. Input bits at positions larger than
641 Temp
642 convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsigned dst_bits,
643 bool sign_extend, Temp dst = Temp())
666 Temp tmp = dst;
683 Temp high =
687 Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), tmp);
703 Temp
704 extract_8_16_bit_sgpr_element(isel_context* ctx, Temp dst, nir_alu_src* src, sgpr_extract_mode mode)
706 Temp vec = get_ssa_temp(ctx, src->src.ssa);
717 Temp tmp = dst.regClass() == s2 ? bld.tmp(s1) : dst;
732 Temp
738 Temp vec = get_ssa_temp(ctx, src.src.ssa);
768 std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
775 Temp dst = ctx->program->allocateTmp(RegClass(vec.type(), elem_size * size / 4));
783 Temp
793 Temp tmp = get_ssa_temp(ctx, src.src.ssa);
828 Temp
829 convert_pointer_to_64_bit(isel_context* ctx, Temp ptr, bool non_uniform = false)
841 emit_sop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
868 emit_vop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode opc, Temp dst,
875 Temp src0 = get_alu_src(ctx, instr->src[swap_srcs ? 1 : 0]);
876 Temp src1 = get_alu_src(ctx, instr->src[swap_srcs ? 0 : 1]);
879 Temp t = src0;
901 Temp tmp = bld.vop2(opc, bld.def(v1), op[0], op[1]);
913 emit_vop2_instruction_logic64(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
918 Temp src0 = get_alu_src(ctx, instr->src[0]);
919 Temp src1 = get_alu_src(ctx, instr->src[1]);
926 Temp src00 = bld.tmp(src0.type(), 1);
927 Temp src01 = bld.tmp(src0.type(), 1);
929 Temp src10 = bld.tmp(v1);
930 Temp src11 = bld.tmp(v1);
932 Temp lo = bld.vop2(op, bld.def(v1), src00, src10);
933 Temp hi = bld.vop2(op, bld.def(v1), src01, src11);
938 emit_vop3a_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
942 Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)};
955 Temp tmp;
972 emit_vop3p_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
975 Temp src0 = get_alu_src_vop3p(ctx, instr->src[swap_srcs]);
976 Temp src1 = get_alu_src_vop3p(ctx, instr->src[!swap_srcs]);
994 emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool clamp)
996 Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)};
1012 emit_vop1_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1024 emit_vopc_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1026 Temp src0 = get_alu_src(ctx, instr->src[0]);
1027 Temp src1 = get_alu_src(ctx, instr->src[1]);
1055 Temp t = src0;
1068 emit_sopc_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1070 Temp src0 = get_alu_src(ctx, instr->src[0]);
1071 Temp src1 = get_alu_src(ctx, instr->src[1]);
1080 Temp cmp = bld.sopc(op, bld.scc(bld.def(s1)), src0, src1);
1086 emit_comparison(isel_context* ctx, nir_alu_instr* instr, Temp dst, aco_opcode v16_op,
1111 Temp dst)
1114 Temp src0 = get_alu_src(ctx, instr->src[0]);
1115 Temp src1 = get_alu_src(ctx, instr->src[1]);
1125 emit_bcsel(isel_context* ctx, nir_alu_instr* instr, Temp dst)
1128 Temp cond = get_alu_src(ctx, instr->src[0]);
1129 Temp then = get_alu_src(ctx, instr->src[1]);
1130 Temp els = get_alu_src(ctx, instr->src[2]);
1142 Temp then_lo = bld.tmp(v1), then_hi = bld.tmp(v1);
1144 Temp else_lo = bld.tmp(v1), else_hi = bld.tmp(v1);
1147 Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, cond);
1148 Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, cond);
1193 emit_scaled_op(isel_context* ctx, Builder& bld, Definition dst, Temp val, aco_opcode op,
1197 Temp is_denormal = bld.vopc(aco_opcode::v_cmp_class_f32, bld.def(bld.lm), as_vgpr(ctx, val),
1199 Temp scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x4b800000u), val);
1203 Temp not_scaled = bld.vop1(op, bld.def(v1), val);
1209 emit_rcp(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1220 emit_rsq(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1231 emit_sqrt(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1242 emit_log2(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1252 Temp
1253 emit_trunc_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1264 Temp val_lo = bld.tmp(v1), val_hi = bld.tmp(v1);
1268 Temp exponent =
1273 Temp fract_mask = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
1277 Temp fract_mask_lo = bld.tmp(v1), fract_mask_hi = bld.tmp(v1);
1281 Temp fract_lo = bld.tmp(v1), fract_hi = bld.tmp(v1);
1282 Temp tmp = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), fract_mask_lo);
1288 Temp sign = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x80000000u), val_hi);
1291 Temp exp_lt0 =
1293 Temp dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_lo,
1295 Temp dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_hi, sign, exp_lt0);
1296 Temp exp_gt51 = bld.vopc_e64(aco_opcode::v_cmp_gt_i32, bld.def(s2), exponent, Operand::c32(51u));
1303 Temp
1304 emit_floor_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1311 Temp src0 = as_vgpr(ctx, val);
1313 Temp mask = bld.copy(bld.def(s1), Operand::c32(3u)); /* isnan */
1314 Temp min_val = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::c32(-1u),
1317 Temp isnan = bld.vopc_e64(aco_opcode::v_cmp_class_f64, bld.def(bld.lm), src0, mask);
1318 Temp fract = bld.vop1(aco_opcode::v_fract_f64, bld.def(v2), src0);
1319 Temp min = bld.vop3(aco_opcode::v_min_f64, bld.def(v2), fract, min_val);
1321 Temp then_lo = bld.tmp(v1), then_hi = bld.tmp(v1);
1323 Temp else_lo = bld.tmp(v1), else_hi = bld.tmp(v1);
1326 Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, isnan);
1327 Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, isnan);
1329 Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
1337 Temp
1338 uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
1356 Temp
1357 usub32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
1384 Temp dst = get_ssa_temp(ctx, &instr->dest.dest.ssa);
1392 std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
1411 Temp mask = bld.copy(bld.def(s1), Operand::c32((1u << instr->dest.dest.ssa.bit_size) - 1));
1413 std::array<Temp, NIR_MAX_VEC_COMPONENTS> packed;
1479 Temp src = get_alu_src(ctx, instr->src[0]);
1491 Temp src = get_alu_src(ctx, instr->src[0]);
1495 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
1510 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
1515 Temp sub = bld.vop3p(aco_opcode::v_pk_sub_u16, Definition(bld.tmp(v1)), Operand::zero(),
1520 Temp src = get_alu_src(ctx, instr->src[0]);
1540 Temp src = get_alu_src(ctx, instr->src[0]);
1542 Temp tmp =
1546 Temp neg =
1548 Temp neqz;
1567 Temp upper = emit_extract_vector(ctx, src, 1, v1);
1568 Temp neg = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), upper);
1569 Temp gtz = bld.vopc(aco_opcode::v_cmp_ge_i64, bld.def(bld.lm), Operand::zero(), src);
1570 Temp lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(1u), neg, gtz);
1761 Temp src = get_alu_src(ctx, instr->src[0]);
1775 Temp src = get_alu_src(ctx, instr->src[0]);
1782 Temp msb_rev = bld.sop1(op, bld.def(s1), src);
1786 Temp msb = sub.def(0).getTemp();
1787 Temp carry = sub.def(1).getTemp();
1794 Temp msb_rev = bld.tmp(v1);
1796 Temp msb = bld.tmp(v1);
1797 Temp carry =
1804 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
1810 Temp found_hi = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::c32(-1), hi);
1812 Temp msb_rev = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lo, hi, found_hi);
1814 Temp msb = bld.tmp(v1);
1815 Temp carry =
1824 Temp src = get_alu_src(ctx, instr->src[0]);
1826 Temp msb_rev = bld.sop1(aco_opcode::s_flbit_i32_b32, bld.def(s1), src);
1829 Temp msb_rev = bld.vop1(aco_opcode::v_ffbh_u32, bld.def(v1), src);
1861 Temp src0 = get_alu_src(ctx, instr->src[0]);
1862 Temp src1 = get_alu_src(ctx, instr->src[1]);
1869 Temp src00 = bld.tmp(src0.type(), 1);
1870 Temp src01 = bld.tmp(dst.type(), 1);
1872 Temp src10 = bld.tmp(src1.type(), 1);
1873 Temp src11 = bld.tmp(dst.type(), 1);
1877 Temp carry = bld.tmp(s1);
1878 Temp dst0 =
1880 Temp dst1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), src01, src11,
1884 Temp dst0 = bld.tmp(v1);
1885 Temp carry = bld.vadd32(Definition(dst0), src00, src10, true).def(1).getTemp();
1886 Temp dst1 = bld.vadd32(bld.def(v1), src01, src11, false, carry);
1900 Temp src0 = get_alu_src(ctx, instr->src[0]);
1901 Temp src1 = get_alu_src(ctx, instr->src[1]);
1903 Temp tmp = bld.tmp(s1), carry = bld.tmp(s1);
1927 Temp src00 = bld.tmp(src0.type(), 1);
1928 Temp src01 = bld.tmp(src0.type(), 1);
1930 Temp src10 = bld.tmp(src1.type(), 1);
1931 Temp src11 = bld.tmp(src1.type(), 1);
1935 Temp carry0 = bld.tmp(s1);
1936 Temp carry1 = bld.tmp(s1);
1938 Temp no_sat0 =
1940 Temp no_sat1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.scc(Definition(carry1)),
1943 Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1);
1948 Temp no_sat0 = bld.tmp(v1);
1949 Temp dst0 = bld.tmp(v1);
1950 Temp dst1 = bld.tmp(v1);
1952 Temp carry0 = bld.vadd32(Definition(no_sat0), src00, src10, true).def(1).getTemp();
1953 Temp carry1;
1962 Temp no_sat1 = bld.tmp(v1);
1983 Temp src0 = get_alu_src(ctx, instr->src[0]);
1984 Temp src1 = get_alu_src(ctx, instr->src[1]);
1986 Temp cond = bld.sopc(aco_opcode::s_cmp_lt_i32, bld.def(s1, scc), src1, Operand::zero());
1987 Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)),
1989 Temp overflow = bld.tmp(s1);
1990 Temp add =
2012 Temp src0 = get_alu_src(ctx, instr->src[0]);
2013 Temp src1 = get_alu_src(ctx, instr->src[1]);
2019 Temp carry = bld.vadd32(bld.def(v1), src0, src1, true).def(1).getTemp();
2025 Temp src00 = bld.tmp(src0.type(), 1);
2026 Temp src01 = bld.tmp(dst.type(), 1);
2028 Temp src10 = bld.tmp(src1.type(), 1);
2029 Temp src11 = bld.tmp(dst.type(), 1);
2032 Temp carry = bld.tmp(s1);
2040 Temp carry = bld.vadd32(bld.def(v1), src00, src10, true).def(1).getTemp();
2059 Temp src0 = get_alu_src(ctx, instr->src[0]);
2060 Temp src1 = get_alu_src(ctx, instr->src[1]);
2076 Temp src00 = bld.tmp(src0.type(), 1);
2077 Temp src01 = bld.tmp(dst.type(), 1);
2079 Temp src10 = bld.tmp(src1.type(), 1);
2080 Temp src11 = bld.tmp(dst.type(), 1);
2083 Temp borrow = bld.tmp(s1);
2084 Temp dst0 =
2086 Temp dst1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.def(s1, scc), src01, src11,
2090 Temp lower = bld.tmp(v1);
2091 Temp borrow = bld.vsub32(Definition(lower), src00, src10, true).def(1).getTemp();
2092 Temp upper = bld.vsub32(bld.def(v1), src01, src11, false, borrow);
2100 Temp src0 = get_alu_src(ctx, instr->src[0]);
2101 Temp src1 = get_alu_src(ctx, instr->src[1]);
2106 Temp borrow = bld.vsub32(bld.def(v1), src0, src1, true).def(1).getTemp();
2112 Temp src00 = bld.tmp(src0.type(), 1);
2113 Temp src01 = bld.tmp(dst.type(), 1);
2115 Temp src10 = bld.tmp(src1.type(), 1);
2116 Temp src11 = bld.tmp(dst.type(), 1);
2119 Temp borrow = bld.tmp(s1);
2127 Temp borrow = bld.vsub32(bld.def(v1), src00, src10, true).def(1).getTemp();
2143 Temp src0 = get_alu_src(ctx, instr->src[0]);
2144 Temp src1 = get_alu_src(ctx, instr->src[1]);
2146 Temp tmp = bld.tmp(s1), carry = bld.tmp(s1);
2170 Temp src00 = bld.tmp(src0.type(), 1);
2171 Temp src01 = bld.tmp(src0.type(), 1);
2173 Temp src10 = bld.tmp(src1.type(), 1);
2174 Temp src11 = bld.tmp(src1.type(), 1);
2178 Temp carry0 = bld.tmp(s1);
2179 Temp carry1 = bld.tmp(s1);
2181 Temp no_sat0 =
2183 Temp no_sat1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.scc(Definition(carry1)),
2186 Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1);
2191 Temp no_sat0 = bld.tmp(v1);
2192 Temp dst0 = bld.tmp(v1);
2193 Temp dst1 = bld.tmp(v1);
2195 Temp carry0 = bld.vsub32(Definition(no_sat0), src00, src10, true).def(1).getTemp();
2196 Temp carry1;
2205 Temp no_sat1 = bld.tmp(v1);
2225 Temp src0 = get_alu_src(ctx, instr->src[0]);
2226 Temp src1 = get_alu_src(ctx, instr->src[1]);
2228 Temp cond = bld.sopc(aco_opcode::s_cmp_gt_i32, bld.def(s1, scc), src1, Operand::zero());
2229 Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)),
2231 Temp overflow = bld.tmp(s1);
2232 Temp sub =
2291 Temp tmp = dst.regClass() == s1 ? bld.tmp(v1) : dst;
2311 Temp tmp = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), get_alu_src(ctx, instr->src[0]),
2364 Temp src0 = get_alu_src(ctx, instr->src[0]);
2365 Temp src1 = get_alu_src(ctx, instr->src[1]);
2391 Temp src0 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[0]));
2392 Temp src1 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[1]));
2393 Temp src2 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[2]));
2489 Temp in = get_alu_src(ctx, instr->src[0], 3);
2490 Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1),
2492 Temp ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), src[0], src[1], src[2]);
2494 Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
2495 Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
2504 Temp in = get_alu_src(ctx, instr->src[0], 3);
2505 Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1),
2518 Temp src = get_alu_src(ctx, instr->src[0]);
2530 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2538 Temp src = get_alu_src(ctx, instr->src[0]);
2548 Temp upper = bld.tmp(v1), lower = bld.tmp(v1);
2559 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2568 Temp src = get_alu_src(ctx, instr->src[0]);
2583 Temp upper = bld.tmp(v1), lower = bld.tmp(v1);
2594 Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2601 Temp src = get_alu_src(ctx, instr->src[0]);
2623 Temp src = get_alu_src(ctx, instr->src[0]);
2634 Temp src = get_alu_src(ctx, instr->src[0]);
2658 Temp src = get_alu_src(ctx, instr->src[0]);
2686 Temp src = get_alu_src(ctx, instr->src[0]);
2707 Temp src0 = get_alu_src(ctx, instr->src[0]);
2708 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src0);
2709 Temp tmp0 =
2711 Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f64, bld.def(bld.lm), src0, trunc);
2712 Temp cond = bld.sop2(aco_opcode::s_and_b64, bld.def(s2), bld.def(s1, scc), tmp0, tmp1);
2713 Temp add = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
2731 Temp src = get_alu_src(ctx, instr->src[0]);
2748 Temp src0_lo = bld.tmp(v1), src0_hi = bld.tmp(v1);
2749 Temp src0 = get_alu_src(ctx, instr->src[0]);
2752 Temp bitmask = bld.sop1(aco_opcode::s_brev_b32, bld.def(s1),
2754 Temp bfi =
2757 Temp tmp =
2766 Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
2770 Temp cond = vop3->definitions[0].getTemp();
2772 Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
2774 Temp dst0 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_lo,
2776 Temp dst1 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_hi,
2788 Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
2833 Temp src = get_alu_src(ctx, instr->src[0]);
2834 Temp tmp = bld.vop1(aco_opcode::v_frexp_exp_i16_f16, bld.def(v1), src);
2847 Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
2861 Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src);
2862 Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
2863 Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp,
2878 Temp src = get_alu_src(ctx, instr->src[0]);
2891 Temp src = get_alu_src(ctx, instr->src[0]);
2913 Temp src = get_alu_src(ctx, instr->src[0]);
2921 Temp src = get_alu_src(ctx, instr->src[0]);
2956 Temp src = get_alu_src(ctx, instr->src[0]);
2967 Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
2980 Temp src = get_alu_src(ctx, instr->src[0]);
2985 Temp src = get_alu_src(ctx, instr->src[0]);
2987 Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
3001 Temp src = get_alu_src(ctx, instr->src[0]);
3031 Temp src = get_alu_src(ctx, instr->src[0]);
3042 Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
3054 Temp src = get_alu_src(ctx, instr->src[0]);
3059 Temp src = get_alu_src(ctx, instr->src[0]);
3061 Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
3079 Temp tmp = bld.tmp(v1);
3083 (dst.type() == RegType::sgpr) ? Temp() : dst);
3102 Temp tmp = bld.tmp(v1);
3106 (dst.type() == RegType::sgpr) ? Temp() : dst);
3119 Temp src = get_alu_src(ctx, instr->src[0]);
3121 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3138 Temp src = get_alu_src(ctx, instr->src[0]);
3140 Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3157 Temp src = get_alu_src(ctx, instr->src[0]);
3162 Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3165 Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3166 Temp sign = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), src);
3170 Temp new_exponent = bld.tmp(v1);
3171 Temp borrow =
3177 Temp saturate = bld.vop1(aco_opcode::v_bfrev_b32, bld.def(v1), Operand::c32(0xfffffffeu));
3178 Temp lower = bld.tmp(v1), upper = bld.tmp(v1);
3185 Temp new_lower = bld.tmp(v1);
3187 Temp new_upper = bld.vsub32(bld.def(v1), upper, sign, false, borrow);
3193 Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3201 Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3203 Temp sign =
3214 Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), exponent,
3216 Temp saturate = bld.sop1(aco_opcode::s_brev_b64, bld.def(s2), Operand::c32(0xfffffffeu));
3218 Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
3222 Temp borrow = bld.tmp(s1);
3230 Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3232 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3233 Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec);
3236 Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul);
3237 Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc);
3238 Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma);
3239 Temp upper = bld.vop1(aco_opcode::v_cvt_i32_f64, bld.def(v1), floor);
3252 Temp src = get_alu_src(ctx, instr->src[0]);
3257 Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3258 Temp exponent_in_range =
3261 Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3263 Temp exponent_small = bld.vsub32(bld.def(v1), Operand::c32(24u), exponent);
3264 Temp small = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), exponent_small, mantissa);
3266 Temp new_exponent = bld.tmp(v1);
3267 Temp cond_small =
3273 Temp lower = bld.tmp(v1), upper = bld.tmp(v1);
3287 Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3293 Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3297 Temp exponent_small = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3299 Temp small = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), mantissa,
3302 Temp exponent_large = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3306 Temp cond =
3310 Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
3312 Temp cond_small =
3320 Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3322 Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3323 Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec);
3326 Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul);
3327 Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc);
3328 Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma);
3329 Temp upper = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), floor);
3342 Temp src = get_alu_src(ctx, instr->src[0]);
3349 Temp one = bld.copy(bld.def(v1), Operand::c32(0x3c00u));
3357 Temp src = get_alu_src(ctx, instr->src[0]);
3372 Temp src = get_alu_src(ctx, instr->src[0]);
3380 Temp one = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
3381 Temp upper =
3428 Temp src = get_alu_src(ctx, instr->src[0]);
3431 Temp tmp = dst.bytes() == 8 ? bld.tmp(RegClass::get(dst.type(), 4)) : dst;
3447 Temp src = get_alu_src(ctx, instr->src[0]);
3457 Temp tmp;
3478 Temp src0 = get_alu_src(ctx, instr->src[0]);
3479 Temp src1 = get_alu_src(ctx, instr->src[1]);
3511 Temp src0 = get_alu_src(ctx, instr->src[0]);
3512 Temp src1 = get_alu_src(ctx, instr->src[1]);
3540 Temp src = get_alu_src(ctx, instr->src[0], 2);
3541 Temp src0 = emit_extract_vector(ctx, src, 0, v1);
3542 Temp src1 = emit_extract_vector(ctx, src, 1, v1);
3550 Temp src = get_alu_src(ctx, instr->src[0], 2);
3551 Temp src0 = emit_extract_vector(ctx, src, 0, v1);
3552 Temp src1 = emit_extract_vector(ctx, src, 1, v1);
3560 Temp src = get_alu_src(ctx, instr->src[0]);
3574 Temp src = get_alu_src(ctx, instr->src[0]);
3596 Temp src = get_alu_src(ctx, instr->src[0]);
3597 Temp f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src);
3598 Temp f32, cmp_res;
3601 Temp mask = bld.copy(
3610 Temp smallest = bld.copy(bld.def(s1), Operand::c32(0x38800000u));
3613 Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f32, bld.def(bld.lm), Operand::zero(), f32);
3619 Temp copysign_0 =
3628 Temp bits = get_alu_src(ctx, instr->src[0]);
3629 Temp offset = get_alu_src(ctx, instr->src[1]);
3644 Temp bitmask = get_alu_src(ctx, instr->src[0]);
3645 Temp insert = get_alu_src(ctx, instr->src[1]);
3646 Temp base = get_alu_src(ctx, instr->src[2]);
3683 Temp base = get_alu_src(ctx, instr->src[0]);
3695 Temp offset = get_alu_src(ctx, instr->src[1]);
3696 Temp bits = get_alu_src(ctx, instr->src[2]);
3698 Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset);
3699 Temp masked =
3711 Temp extract =
3735 Temp vec = get_ssa_temp(ctx, instr->src[0].src.ssa);
3745 Temp src = get_alu_src(ctx, instr->src[0]);
3776 Temp src = get_alu_src(ctx, instr->src[0]);
3803 Temp src = get_alu_src(ctx, instr->src[0]);
3893 Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
3909 Temp tmp;
3911 Temp tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl1);
3914 Temp tl = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl1);
3915 Temp tr = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl2);
3928 Temp dst = get_ssa_temp(ctx, &instr->def);
3985 Temp dst;
3988 Temp resource = Temp(0, s1); /* buffer resource or base 64-bit address */
3998 Temp soffset = Temp(0, s1);
4002 using Callback = Temp (*)(Builder& bld, const LoadEmitInfo& info, Temp offset,
4004 Temp dst_hint);
4020 Temp* const vals = (Temp*)alloca(info.dst.bytes() * sizeof(Temp));
4074 Temp offset_tmp = offset.isTemp() ? offset.getTemp() : Temp();
4083 Temp lo = bld.tmp(offset_tmp.type(), 1);
4084 Temp hi = bld.tmp(offset_tmp.type(), 1);
4088 Temp carry = bld.tmp(s1);
4094 Temp new_lo = bld.tmp(v1);
4095 Temp carry =
4108 Temp offset_tmp = offset.isTemp() ? offset.getTemp() : Temp();
4121 Temp hi = bld.tmp(v1), lo = bld.tmp(v1);
4127 Temp aligned_offset_tmp =
4130 Temp val = params.callback(bld, info, aligned_offset_tmp, bytes_needed, align,
4131 reduced_const_offset, byte_align ? Temp() : info.dst);
4176 std::array<Temp, NIR_MAX_VEC_COMPONENTS> allocated_vec;
4179 Temp* const tmp = (Temp*)alloca(num_vals * sizeof(Temp));
4219 Temp component = bld.tmp(elem_rc);
4250 Temp tmp = bld.tmp(RegType::vgpr, info.dst.size());
4267 return bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0xffffffffu)));
4270 Temp
4271 lds_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4272 unsigned align, unsigned const_offset, Temp dst_hint)
4324 Temp val = rc == info.dst.regClass() && dst_hint.id() ? dst_hint : bld.tmp(rc);
4340 Temp
4341 smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4342 unsigned align, unsigned const_offset, Temp dst_hint)
4347 Temp addr = info.resource;
4350 offset = Temp();
4391 Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc);
4402 Temp
4403 mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4404 unsigned align_, unsigned const_offset, Temp dst_hint)
4449 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
4458 Temp
4459 scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4460 unsigned align_, unsigned const_offset, Temp dst_hint)
4484 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
4499 Temp
4500 get_gfx6_global_rsrc(Builder& bld, Temp addr)
4512 Temp
4513 add64_32(Builder& bld, Temp src0, Temp src1)
4515 Temp src00 = bld.tmp(src0.type(), 1);
4516 Temp src01 = bld.tmp(src0.type(), 1);
4520 Temp dst0 = bld.tmp(v1);
4521 Temp carry = bld.vadd32(Definition(dst0), src00, src1, true).def(1).getTemp();
4522 Temp dst1 = bld.vadd32(bld.def(v1), src01, Operand::zero(), false, carry);
4525 Temp carry = bld.tmp(s1);
4526 Temp dst0 =
4528 Temp dst1 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), src01, carry);
4534 lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout,
4535 uint32_t* const_offset_inout, Temp* offset_inout)
4537 Temp address = *address_inout;
4539 Temp offset = *offset_inout;
4575 offset = Temp();
4582 offset = Temp();
4589 offset = Temp();
4602 Temp
4603 global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4604 unsigned align_, unsigned const_offset, Temp dst_hint)
4606 Temp addr = info.resource;
4609 offset = Temp();
4647 Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
4689 Temp
4690 load_lds(isel_context* ctx, unsigned elem_size_bytes, unsigned num_components, Temp dst,
4691 Temp address, unsigned base_offset, unsigned align)
4708 split_store_data(isel_context* ctx, RegType dst_type, unsigned count, Temp* dst, unsigned* bytes,
4709 Temp src)
4735 std::vector<Temp> temps;
4788 Temp tmp = temps[idx++];
4821 store_lds(isel_context* ctx, unsigned elem_size_bytes, Temp data, uint32_t wrmask, Temp address,
4832 Temp write_datas[32];
4896 Temp split_data = write_datas[i];
4914 Temp address_offset = address;
4925 Temp second_data = write_datas[second];
4956 Temp data, unsigned writemask, int swizzle_element_size, unsigned* write_count,
4957 Temp* write_datas, unsigned* offsets)
5011 Temp
5012 create_vec_from_array(isel_context* ctx, Temp arr[], unsigned cnt, RegType reg_type,
5013 unsigned elem_size_bytes, unsigned split_cnt = 0u, Temp dst = Temp())
5021 std::array<Temp, NIR_MAX_VEC_COMPONENTS> allocated_vec;
5032 Temp zero = bld.copy(bld.def(RegClass(reg_type, dword_size)),
5050 resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_offset)
5071 emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp vdata,
5096 store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Temp soffset,
5107 Temp write_datas[32];
5120 load_vmem_mubuf(isel_context* ctx, Temp dst, Temp descriptor, Temp voffset, Temp soffset,
5144 Temp
5152 Temp
5158 Temp tid_in_wave = emit_mbcnt(ctx, bld.tmp(v1));
5163 Temp wave_id_in_tg = wave_id_in_threadgroup(ctx);
5164 Temp num_pre_threads =
5181 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
5212 load_input_from_temps(isel_context* ctx, nir_intrinsic_instr* instr, Temp dst)
5233 Temp* src = &ctx->inputs.temps[idx];
5259 emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src, Temp dst,
5260 Temp prim_mask)
5262 Temp coord1 = emit_extract_vector(ctx, src, 0, v1);
5263 Temp coord2 = emit_extract_vector(ctx, src, 1, v1);
5301 emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
5329 emit_load_frag_shading_rate(isel_context* ctx, Temp dst)
5332 Temp cond;
5337 Temp x_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
5339 Temp y_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
5358 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5359 Temp coords = get_ssa_temp(ctx, instr->src[0].ssa);
5362 Temp prim_mask = get_arg(ctx, ctx->args->ac.prim_mask);
5372 Temp tmp = ctx->program->allocateTmp(instr->dest.ssa.bit_size == 16 ? v2b : v1);
5452 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5465 Temp input = get_arg(ctx, ctx->args->vs_inputs[location]);
5469 std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
5490 Temp vertex_buffers =
5514 Temp list = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), vertex_buffers, off);
5516 Temp index;
5519 Temp start_instance = get_arg(ctx, ctx->args->ac.start_instance);
5521 Temp instance_id = get_arg(ctx, ctx->args->ac.instance_id);
5523 Temp divided = bld.tmp(v1);
5537 Temp* const channels = (Temp*)alloca(num_channels * sizeof(Temp));
5545 channels[i] = Temp(0, s1);
5572 Temp fetch_index = index;
5626 Temp fetch_dst;
5667 std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
5672 Temp channel = channels[idx];
5697 Temp prim_mask = get_arg(ctx, ctx->args->ac.prim_mask);
5750 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5773 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5780 Temp tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tes_u, tes_v);
5785 Temp tess_coord = bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tes_u, tes_v, tes_w);
5790 load_buffer(isel_context* ctx, unsigned num_components, unsigned component_size, Temp dst,
5791 Temp rsrc, Temp offset, unsigned align_mul, unsigned align_offset, bool glc = false,
5822 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5824 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5835 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5848 std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
5864 Temp index = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5868 Temp ptr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.push_constants));
5869 Temp vec = dst;
5922 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5941 Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
5948 Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
5959 * The byte count of each input Temp must be a multiple of 2.
5961 static std::vector<Temp>
5962 emit_pack_v1(isel_context* ctx, const std::vector<Temp>& unpacked)
5965 std::vector<Temp> packed;
5966 Temp low = Temp();
5967 for (Temp tmp : unpacked) {
5971 if (low != Temp()) {
5972 Temp high = emit_extract_vector(ctx, tmp, byte_idx / 2, v2b);
5973 Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, high);
5974 low = Temp();
5986 if (low != Temp()) {
5987 Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, Operand(v2b));
6022 emit_mimg(Builder& bld, aco_opcode op, Definition dst, Temp rsrc, Operand samp,
6023 std::vector<Temp> coords, unsigned wqm_mask = 0, Operand vdata = Operand(v1))
6030 Temp coord = coords[0];
6059 for (Temp& coord : coords) {
6084 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6085 Temp resource = get_ssa_temp(ctx, instr->src[0].ssa);
6086 Temp node = get_ssa_temp(ctx, instr->src[1].ssa);
6087 Temp tmax = get_ssa_temp(ctx, instr->src[2].ssa);
6088 Temp origin = get_ssa_temp(ctx, instr->src[3].ssa);
6089 Temp dir = get_ssa_temp(ctx, instr->src[4].ssa);
6090 Temp inv_dir = get_ssa_temp(ctx, instr->src[5].ssa);
6092 std::vector<Temp> args;
6114 static std::vector<Temp>
6118 Temp src0 = get_ssa_temp(ctx, instr->src[1].ssa);
6127 std::vector<Temp> coords(count);
6151 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6152 Temp rsrc_word5 = emit_extract_vector(ctx, rsrc, 5, v1);
6154 Temp first_layer =
6192 emit_tfe_init(Builder& bld, Temp dst)
6194 Temp tmp = bld.tmp(dst.regClass());
6218 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6243 Temp tmp;
6249 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6252 Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6288 std::vector<Temp> coords = get_image_coords(ctx, instr);
6326 Temp data = get_ssa_temp(ctx, instr->src[3].ssa);
6343 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6344 Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6380 std::vector<Temp> coords = get_image_coords(ctx, instr);
6381 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6444 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[3].ssa));
6520 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6524 Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6525 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6551 std::vector<Temp> coords = get_image_coords(ctx, instr);
6552 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6572 get_buffer_size(isel_context* ctx, Temp desc, Temp dst)
6578 Temp size = emit_extract_vector(ctx, desc, 2, s1);
6580 Temp size_div3 = bld.vop3(aco_opcode::v_mul_hi_u32, bld.def(v1),
6585 Temp stride = emit_extract_vector(ctx, desc, 1, s1);
6589 Temp is12 = bld.sopc(aco_opcode::s_cmp_eq_i32, bld.def(s1, scc), stride, Operand::c32(12u));
6592 Temp shr_dst = dst.type() == RegType::vgpr ? bld.tmp(s1) : dst;
6612 Temp desc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6618 std::vector<Temp> lod{bld.copy(bld.def(v1), Operand::zero())};
6621 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6623 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6641 get_image_samples(isel_context* ctx, Definition dst, Temp resource)
6645 Temp dword3 = emit_extract_vector(ctx, resource, 3, s1);
6646 Temp samples_log2 = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3,
6648 Temp samples = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand::c32(1u),
6650 Temp type = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3,
6658 Temp dword1 = emit_extract_vector(ctx, resource, 1, s1);
6659 Temp is_non_null_descriptor =
6664 Temp is_msaa = bld.sopc(aco_opcode::s_cmp_ge_u32, bld.def(s1, scc), type, Operand::c32(14u));
6672 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6673 Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6683 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6684 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6701 Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
6704 Temp offset = get_ssa_temp(ctx, instr->src[2].ssa);
6706 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
6714 Temp write_datas[32];
6750 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
6757 Temp offset = get_ssa_temp(ctx, instr->src[1].ssa);
6758 Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6760 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6840 parse_global(isel_context* ctx, nir_intrinsic_instr* intrin, Temp* address, uint32_t* const_offset,
6841 Temp* offset)
6853 *offset = Temp();
6863 Temp addr, offset;
6911 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
6918 Temp write_datas[32];
6923 Temp addr, offset;
6928 Temp write_address = addr;
6930 Temp write_offset = offset;
6977 Temp rsrc = get_gfx6_global_rsrc(bld, write_address);
7003 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7010 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7014 Temp addr, offset;
7155 Temp rsrc = get_gfx6_global_rsrc(bld, addr);
7206 Temp dst = get_ssa_temp(ctx, &intrin->dest.ssa);
7207 Temp descriptor = bld.as_uniform(get_ssa_temp(ctx, intrin->src[0].ssa));
7208 Temp v_offset = as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[1].ssa));
7209 Temp s_offset = bld.as_uniform(get_ssa_temp(ctx, intrin->src[2].ssa));
7230 Temp store_src = get_ssa_temp(ctx, intrin->src[0].ssa);
7231 Temp descriptor = get_ssa_temp(ctx, intrin->src[1].ssa);
7232 Temp v_offset = get_ssa_temp(ctx, intrin->src[2].ssa);
7233 Temp s_offset = get_ssa_temp(ctx, intrin->src[3].ssa);
7253 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7254 Temp base = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
7255 Temp offset = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
7360 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7361 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7374 Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
7375 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7388 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7389 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7498 Temp data2 = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
7517 Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[is_store].ssa));
7533 Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
7535 Temp data0 = emit_extract_vector(ctx, data, 0, comp_rc);
7536 Temp data1 = emit_extract_vector(ctx, data, 1, comp_rc);
7539 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7550 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7553 Temp comp[4];
7558 Temp comp0 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[0], comp[1]);
7559 Temp comp1 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[2], comp[3]);
7573 Temp
7577 Temp scratch_addr = ctx->program->private_segment_buffer;
7607 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7639 Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7640 Temp offset = get_ssa_temp(ctx, instr->src[1].ssa);
7646 Temp write_datas[32];
7654 offset = nir_src_is_const(instr->src[1]) ? Temp(0, s1) : offset;
7682 Temp rsrc = get_scratch_resource(ctx);
7699 Temp next_vertex = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7704 Temp gsvs_ring =
7721 Temp gsvs_dwords[4];
7728 Temp stream_offset_tmp = bld.copy(bld.def(s1), Operand::c32(stream_offset));
7730 Temp carry = bld.tmp(s1);
7790 Temp
7791 emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp src)
7800 Temp tmp =
7811 Temp tmp =
7815 Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
7819 Temp tmp =
7826 Temp tmp =
7844 Temp lane_id = emit_mbcnt(ctx, bld.tmp(v1));
7845 Temp cluster_offset = bld.vop2(aco_opcode::v_and_b32, bld.def(v1),
7848 Temp tmp;
7879 return Temp();
7883 Temp
7884 emit_boolean_exclusive_scan(isel_context* ctx, nir_op op, Temp src)
7893 Temp tmp;
7900 Temp mbcnt = emit_mbcnt(ctx, bld.tmp(v1), Operand(tmp));
7911 return Temp();
7914 Temp
7915 emit_boolean_inclusive_scan(isel_context* ctx, nir_op op, Temp src)
7923 Temp tmp = emit_boolean_exclusive_scan(ctx, op, src);
7932 return Temp();
7967 emit_uniform_subgroup(isel_context* ctx, nir_intrinsic_instr* instr, Temp src)
7979 emit_addition_uniform_reduce(isel_context* ctx, nir_op op, Definition dst, nir_src src, Temp count)
7982 Temp src_tmp = get_ssa_temp(ctx, src.ssa);
7986 Temp tmp = dst.regClass() == s1 ? bld.tmp(RegClass::get(RegType::vgpr, src.ssa->bit_size / 8))
8051 Temp thread_count =
8077 Temp packed_tid;
8096 Temp lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm));
8097 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8100 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8119 Temp
8121 Definition dst, Temp src)
8176 emit_interp_center(isel_context* ctx, Temp dst, Temp bary, Temp pos1, Temp pos2)
8179 Temp p1 = emit_extract_vector(ctx, bary, 0, v1);
8180 Temp p2 = emit_extract_vector(ctx, bary, 1, v1);
8182 Temp ddx_1, ddx_2, ddy_1, ddy_2;
8189 Temp tl_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), p1, dpp_ctrl0);
8192 Temp tl_2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), p2, dpp_ctrl0);
8196 Temp tl_1 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p1, (1 << 15) | dpp_ctrl0);
8202 Temp tl_2 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p2, (1 << 15) | dpp_ctrl0);
8212 Temp tmp1 = bld.vop3(mad, bld.def(v1), ddx_1, pos1, p1);
8213 Temp tmp2 = bld.vop3(mad, bld.def(v1), ddx_2, pos1, p2);
8216 Temp wqm1 = bld.tmp(v1);
8218 Temp wqm2 = bld.tmp(v1);
8224 Temp merged_wave_info_to_mask(isel_context* ctx, unsigned i);
8225 void ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt);
8226 static void create_primitive_exports(isel_context *ctx, Temp prim_ch1);
8229 Temp
8255 Temp bary = get_interp_param(ctx, instr->intrinsic, mode);
8257 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8263 Temp model = get_arg(ctx, ctx->args->ac.pull_model);
8265 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8271 Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
8272 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8287 Temp sample_pos;
8288 Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
8290 Temp private_segment_buffer = ctx->program->private_segment_buffer;
8317 Temp tmp0 = bld.tmp(s1);
8318 Temp tmp1 = bld.tmp(s1);
8327 Temp pck0 = bld.tmp(v1);
8328 Temp carry = bld.vadd32(Definition(pck0), tmp0, addr, true).def(1).getTemp();
8330 Temp pck1 = bld.vop2_e64(aco_opcode::v_addc_co_u32, bld.def(v1), bld.def(bld.lm), tmp1,
8341 Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
8365 Temp pos1 = bld.tmp(RegClass(sample_pos.type(), 1));
8366 Temp pos2 = bld.tmp(RegClass(sample_pos.type(), 1));
8375 Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
8377 Temp pos1 = bld.tmp(rc), pos2 = bld.tmp(rc);
8379 Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
8389 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8401 Temp posx = get_arg(ctx, ctx->args->ac.frag_pos[0]);
8402 Temp posy = get_arg(ctx, ctx->args->ac.frag_pos[1]);
8487 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8491 Temp addr = get_arg(ctx, ctx->args->ac.num_work_groups);
8501 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8502 Temp addr = get_arg(ctx, ctx->args->ac.ray_launch_size_addr);
8508 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8510 Temp local_ids[3];
8528 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8545 Temp wave_id =
8549 Temp temp = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), wave_id,
8551 Temp thread_id = emit_mbcnt(ctx, bld.tmp(v1));
8567 Temp id = emit_mbcnt(ctx, bld.tmp(v1));
8575 Temp tg_num = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
8581 Temp tg_num =
8621 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8622 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8648 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8652 Temp tid = get_ssa_temp(ctx, instr->src[1].ssa);
8656 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8662 Temp tmp = bld.tmp(v1);
8672 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8680 Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src, tid);
8684 Temp tmp;
8707 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8708 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8712 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8720 Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src,
8729 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8730 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8734 Temp tmp =
8738 Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
8743 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8744 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8748 Temp tmp = bool_to_scalar_condition(ctx, src);
8755 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8756 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8818 Temp tmp_dst = emit_reduction_instr(ctx, aco_op, reduce_op, cluster_size,
8829 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8856 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8857 Temp tmp(dst);
8899 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8927 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8932 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8943 Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src);
8946 Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
8949 Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
8954 Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8966 Temp src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
8967 Temp val = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
8968 Temp lane = bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa));
8969 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8974 Temp src_lo = bld.tmp(v1), src_hi = bld.tmp(v1);
8975 Temp val_lo = bld.tmp(s1), val_hi = bld.tmp(s1);
8978 Temp lo = emit_wqm(bld, bld.writelane(bld.def(v1), val_lo, lane, src_hi));
8979 Temp hi = emit_wqm(bld, bld.writelane(bld.def(v1), val_hi, lane, src_hi));
8988 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8989 Temp add_src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
8990 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8993 Temp wqm_tmp = emit_mbcnt(ctx, bld.tmp(v1), Operand(src), Operand(add_src));
8998 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9007 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9008 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9026 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9041 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9043 Temp cond =
9060 Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9080 Temp flbit = bld.sop1(Builder::s_flbit_i32, bld.def(s1), Operand(exec, bld.lm));
9081 Temp last = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc),
9091 Temp elected = bld.pseudo(aco_opcode::p_elect, bld.def(bld.lm), Operand(exec, bld.lm));
9097 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9101 Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29);
9113 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9118 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9123 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9128 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9133 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9138 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9156 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9214 Temp prim_ch1 = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
9220 Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa);
9221 Temp num_primitives = get_ssa_temp(ctx, instr->src[1].ssa);
9226 Temp store_val = get_ssa_temp(ctx, instr->src[0].ssa);
9227 Temp gds_addr = get_ssa_temp(ctx, instr->src[1].ssa);
9228 Temp m0_val = get_ssa_temp(ctx, instr->src[2].ssa);
9229 Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), bld.as_uniform(m0_val)));
9235 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9236 Temp addr = get_arg(ctx, ctx->args->ac.sbt_descriptors);
9263 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9264 Temp src = ctx->arg_temps[nir_intrinsic_base(instr)];
9280 build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp* out_sc,
9281 Temp* out_tc)
9285 Temp deriv_x = emit_extract_vector(ctx, deriv, 0, v1);
9286 Temp deriv_y = emit_extract_vector(ctx, deriv, 1, v1);
9287 Temp deriv_z = emit_extract_vector(ctx, deriv, 2, v1);
9294 Temp is_ma_positive = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), Operand::zero(), ma);
9295 Temp sgn_ma = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, one, is_ma_positive);
9296 Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::zero(), sgn_ma);
9298 Temp is_ma_z = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), four, id);
9299 Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), two, id);
9301 Temp is_not_ma_x =
9305 Temp tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_z, deriv_x, is_not_ma_x);
9306 Temp sgn = bld.vop2_e64(
9325 prepare_cube_coords(isel_context* ctx, std::vector<Temp>& coords, Temp* ddx, Temp* ddy,
9329 Temp ma, tc, sc, id;
9345 Temp invma = bld.tmp(v1);
9365 Temp deriv_ma;
9366 Temp deriv_sc, deriv_tc;
9371 Temp x = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
9374 Temp y = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
9417 Temp resource, sampler, bias = Temp(), compare = Temp(), sample_index = Temp(), lod = Temp(),
9418 offset = Temp(), ddx = Temp(), ddy = Temp(), clamped_lod = Temp(),
9419 coord = Temp();
9420 std::vector<Temp> coords;
9421 std::vector<Temp> derivs;
9525 Temp acc, pack = Temp();
9548 if (pack == Temp()) {
9555 if (pack_const && pack != Temp())
9570 if (pack == Temp()) {
9577 if (pack_const && pack != Temp())
9580 if (pack_const && pack == Temp())
9582 else if (pack == Temp())
9589 std::vector<Temp> unpacked_coord;
9606 } else if (coord != Temp()) {
9629 std::array<Temp, 2> ddxddy = {ddx, ddy};
9630 for (Temp tmp : ddxddy) {
9631 if (tmp == Temp())
9633 std::vector<Temp> unpacked = {tmp};
9636 Temp zero = bld.copy(bld.def(rc), Operand::zero(rc.bytes()));
9639 for (Temp derv : emit_pack_v1(ctx, unpacked))
9658 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9659 Temp tmp_dst = dst;
9683 resource, Operand(s4), std::vector<Temp>{lod});
9699 Temp tg4_compare_cube_wa64 = Temp();
9702 Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero());
9703 Temp size = bld.tmp(v2);
9705 resource, Operand(s4), std::vector<Temp>{tg4_lod});
9711 Temp half_texel[2];
9729 Temp not_needed =
9739 Temp new_coords[2] = {bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[0], half_texel[0]),
9744 Temp* const desc = (Temp*)alloca(resource.size() * sizeof(Temp));
9754 Temp dfmt = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), desc[1],
9756 Temp compare_cube_wa = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), dfmt,
9759 Temp nfmt;
9837 std::vector<Temp> args;
9879 Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
9885 Temp is_not_null = bld.tmp(bld.lm);
10044 Temp val[4];
10047 Temp cvt_val;
10056 Temp tmp = dst.regClass() == tmp_dst.regClass() ? dst : bld.tmp(tmp_dst.regClass());
10071 Temp tmp = get_ssa_temp(ctx, ssa);
10091 Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
10186 Temp dst = get_ssa_temp(ctx, &instr->def);
10475 static void begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond);
10490 Temp cond = bld.copy(bld.def(s1, scc), Operand::zero());
10543 begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond)
10701 begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
10797 Temp cond = get_ssa_temp(ctx, if_stmt->condition.ssa);
10949 Temp out = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u),
11020 create_primitive_exports(isel_context *ctx, Temp prim_ch1)
11038 Temp ch2 = bld.copy(bld.def(v1), Operand::c32(0));
11043 Temp tmp = ctx->outputs.temps[VARYING_SLOT_LAYER * 4u];
11048 Temp tmp = ctx->outputs.temps[VARYING_SLOT_VIEWPORT * 4u];
11053 Temp tmp = ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u];
11194 Temp isnan = bld.vopc(aco_opcode::v_cmp_class_f32, bld.def(bld.lm), values[i],
11389 Temp tmp = convert_int(ctx, bld, chan.getTemp(), 16, 32, sign_ext);
11398 Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ps_epilog_pc));
11453 emit_stream_output(isel_context* ctx, Temp const* so_buffers, Temp const* so_write_offset,
11473 Temp write_data = ctx->program->allocateTmp(RegClass(RegType::vgpr, count));
11509 Temp so_vtx_count =
11513 Temp tid = emit_mbcnt(ctx, bld.tmp(v1));
11515 Temp can_emit = bld.vopc(aco_opcode::v_cmp_gt_i32, bld.def(bld.lm), so_vtx_count, tid);
11522 Temp so_write_index =
11525 Temp so_buffers[4];
11526 Temp so_write_offset[4];
11527 Temp buf_ptr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->streamout_buffers));
11538 Temp offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
11541 Temp new_offset = bld.vadd32(bld.def(v1), offset, tid);
11546 Temp offset = bld.v_mul_imm(bld.def(v1), so_write_index, stride * 4u);
11547 Temp offset2 = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(4u),
11592 Temp elems[16];
11599 Temp dst = ctx->program->allocateTmp(type);
11647 Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp());
11651 Temp instance_id =
11654 Temp vs_rel_patch_id =
11657 Temp vertex_id =
11697 Temp sel = bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.def(bld.lm),
11701 Temp new_coord[2];
11703 Temp persp_centroid =
11705 Temp persp_center =
11717 Temp new_coord[2];
11719 Temp linear_centroid =
11721 Temp linear_center =
11801 Temp
11802 lanecount_to_mask(isel_context* ctx, Temp count, bool allow64 = true)
11807 Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand::zero());
11808 Temp cond;
11816 Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count,
11829 Temp
11835 Temp count = i == 0
11844 ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt)
11849 Temp prm_cnt_0;
11864 Temp tmp =
11878 Temp first_lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm));
11879 Temp cond = bld.sop2(Builder::s_lshl, bld.def(bld.lm), bld.def(s1, scc),
11890 Temp zero = bld.copy(bld.def(v1), Operand::zero());
11892 Temp nan_coord = bld.copy(bld.def(v1), Operand::c32(-1u));
11962 Temp cond = merged_wave_info_to_mask(&ctx, i);
12053 Temp gsvs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4),
12061 Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u),
12077 Temp cond =
12095 Temp val = bld.tmp(v1);
12097 load_vmem_mubuf(&ctx, val, gsvs_ring, vtx_offset, Temp(), const_offset, 4, 1, 0u, true,
12559 Temp inputs = get_arg(&ctx, ctx.args->ps_epilog_inputs[i]);