aco_instruction_selection.cpp - OpenGrok cross reference for /third_party/mesa3d/src/amd/compiler/aco_instruction

Lines Matching refs:Temp
70    Temp cond;
128 Temp
132    return Temp(id, ctx->program->temp_rc[id]);
135 Temp
136 emit_mbcnt(isel_context* ctx, Temp dst, Operand mask = Operand(), Operand base = Operand::zero())
161    Temp mbcnt_lo = bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, bld.def(v1), mask_lo, base);
169 Temp
170 emit_wqm(Builder& bld, Temp src, Temp dst = Temp(0, s1), bool program_needs_wqm = false)
187 static Temp
188 emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
205       Temp index_is_lo =
209       Temp index_is_lo_n1 = bld.sop1(aco_opcode::s_not_b32, bld.def(s1), bld.def(s1, scc),
228       Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
233 static Temp
234 emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
271 Temp
272 as_vgpr(Builder& bld, Temp val)
280 Temp
281 as_vgpr(isel_context* ctx, Temp val)
289 emit_v_div_u32(isel_context* ctx, Temp dst, Temp a, uint32_t b)
313    Temp pre_shift_dst = a;
320    Temp increment_dst = pre_shift_dst;
326    Temp multiply_dst = increment_dst;
340 emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, Temp dst)
346 Temp
347 emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, RegClass dst_rc)
375       Temp dst = bld.tmp(dst_rc);
382 emit_split_vector(isel_context* ctx, Temp vec_src, unsigned num_components)
403    std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
415 expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components, unsigned mask,
422       Temp tmp_dst = bld.tmp(RegClass::get(RegType::vgpr, 2 * num_components));
446    std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
448    Temp padding = Temp(0, dst_rc);
458          Temp src = emit_extract_vector(ctx, vec_src, k++, src_rc);
474 byte_align_scalar(isel_context* ctx, Temp vec, Operand offset, Temp dst)
478    Temp select = Temp();
484       Temp tmp =
494       Temp tmp = dst.size() == 2 ? dst : bld.tmp(s2);
501       Temp lo = bld.tmp(s2), hi;
511       if (select != Temp())
515       Temp mid = bld.tmp(s1);
525 byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigned component_size)
529       Temp tmp[4] = {vec, vec, vec, vec};
562    std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
592 Temp
596    Temp tmp = get_ssa_temp(ctx, def);
603 Temp
604 bool_to_vector_condition(isel_context* ctx, Temp val, Temp dst = Temp(0, s2))
617 Temp
618 bool_to_scalar_condition(isel_context* ctx, Temp val, Temp dst = Temp(0, s1))
633  * Copies the first src_bits of the input to the output Temp. Input bits at positions larger than
641 Temp
642 convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsigned dst_bits,
643             bool sign_extend, Temp dst = Temp())
666    Temp tmp = dst;
683          Temp high =
687          Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), tmp);
703 Temp
704 extract_8_16_bit_sgpr_element(isel_context* ctx, Temp dst, nir_alu_src* src, sgpr_extract_mode mode)
706    Temp vec = get_ssa_temp(ctx, src->src.ssa);
717    Temp tmp = dst.regClass() == s2 ? bld.tmp(s1) : dst;
732 Temp
738    Temp vec = get_ssa_temp(ctx, src.src.ssa);
768       std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
775       Temp dst = ctx->program->allocateTmp(RegClass(vec.type(), elem_size * size / 4));
783 Temp
793    Temp tmp = get_ssa_temp(ctx, src.src.ssa);
828 Temp
829 convert_pointer_to_64_bit(isel_context* ctx, Temp ptr, bool non_uniform = false)
841 emit_sop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
868 emit_vop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode opc, Temp dst,
875    Temp src0 = get_alu_src(ctx, instr->src[swap_srcs ? 1 : 0]);
876    Temp src1 = get_alu_src(ctx, instr->src[swap_srcs ? 0 : 1]);
879          Temp t = src0;
901       Temp tmp = bld.vop2(opc, bld.def(v1), op[0], op[1]);
913 emit_vop2_instruction_logic64(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
918    Temp src0 = get_alu_src(ctx, instr->src[0]);
919    Temp src1 = get_alu_src(ctx, instr->src[1]);
926    Temp src00 = bld.tmp(src0.type(), 1);
927    Temp src01 = bld.tmp(src0.type(), 1);
929    Temp src10 = bld.tmp(v1);
930    Temp src11 = bld.tmp(v1);
932    Temp lo = bld.vop2(op, bld.def(v1), src00, src10);
933    Temp hi = bld.vop2(op, bld.def(v1), src01, src11);
938 emit_vop3a_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
942    Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)};
955       Temp tmp;
972 emit_vop3p_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst,
975    Temp src0 = get_alu_src_vop3p(ctx, instr->src[swap_srcs]);
976    Temp src1 = get_alu_src_vop3p(ctx, instr->src[!swap_srcs]);
994 emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool clamp)
996    Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)};
1012 emit_vop1_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1024 emit_vopc_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1026    Temp src0 = get_alu_src(ctx, instr->src[0]);
1027    Temp src1 = get_alu_src(ctx, instr->src[1]);
1055          Temp t = src0;
1068 emit_sopc_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
1070    Temp src0 = get_alu_src(ctx, instr->src[0]);
1071    Temp src1 = get_alu_src(ctx, instr->src[1]);
1080    Temp cmp = bld.sopc(op, bld.scc(bld.def(s1)), src0, src1);
1086 emit_comparison(isel_context* ctx, nir_alu_instr* instr, Temp dst, aco_opcode v16_op,
1111                    Temp dst)
1114    Temp src0 = get_alu_src(ctx, instr->src[0]);
1115    Temp src1 = get_alu_src(ctx, instr->src[1]);
1125 emit_bcsel(isel_context* ctx, nir_alu_instr* instr, Temp dst)
1128    Temp cond = get_alu_src(ctx, instr->src[0]);
1129    Temp then = get_alu_src(ctx, instr->src[1]);
1130    Temp els = get_alu_src(ctx, instr->src[2]);
1142          Temp then_lo = bld.tmp(v1), then_hi = bld.tmp(v1);
1144          Temp else_lo = bld.tmp(v1), else_hi = bld.tmp(v1);
1147          Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, cond);
1148          Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, cond);
1193 emit_scaled_op(isel_context* ctx, Builder& bld, Definition dst, Temp val, aco_opcode op,
1197    Temp is_denormal = bld.vopc(aco_opcode::v_cmp_class_f32, bld.def(bld.lm), as_vgpr(ctx, val),
1199    Temp scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x4b800000u), val);
1203    Temp not_scaled = bld.vop1(op, bld.def(v1), val);
1209 emit_rcp(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1220 emit_rsq(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1231 emit_sqrt(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1242 emit_log2(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1252 Temp
1253 emit_trunc_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1264    Temp val_lo = bld.tmp(v1), val_hi = bld.tmp(v1);
1268    Temp exponent =
1273    Temp fract_mask = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
1277    Temp fract_mask_lo = bld.tmp(v1), fract_mask_hi = bld.tmp(v1);
1281    Temp fract_lo = bld.tmp(v1), fract_hi = bld.tmp(v1);
1282    Temp tmp = bld.vop1(aco_opcode::v_not_b32, bld.def(v1), fract_mask_lo);
1288    Temp sign = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x80000000u), val_hi);
1291    Temp exp_lt0 =
1293    Temp dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_lo,
1295    Temp dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_hi, sign, exp_lt0);
1296    Temp exp_gt51 = bld.vopc_e64(aco_opcode::v_cmp_gt_i32, bld.def(s2), exponent, Operand::c32(51u));
1303 Temp
1304 emit_floor_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val)
1311    Temp src0 = as_vgpr(ctx, val);
1313    Temp mask = bld.copy(bld.def(s1), Operand::c32(3u)); /* isnan */
1314    Temp min_val = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::c32(-1u),
1317    Temp isnan = bld.vopc_e64(aco_opcode::v_cmp_class_f64, bld.def(bld.lm), src0, mask);
1318    Temp fract = bld.vop1(aco_opcode::v_fract_f64, bld.def(v2), src0);
1319    Temp min = bld.vop3(aco_opcode::v_min_f64, bld.def(v2), fract, min_val);
1321    Temp then_lo = bld.tmp(v1), then_hi = bld.tmp(v1);
1323    Temp else_lo = bld.tmp(v1), else_hi = bld.tmp(v1);
1326    Temp dst0 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_lo, then_lo, isnan);
1327    Temp dst1 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), else_hi, then_hi, isnan);
1329    Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
1337 Temp
1338 uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
1356 Temp
1357 usub32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
1384    Temp dst = get_ssa_temp(ctx, &instr->dest.dest.ssa);
1392       std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
1411          Temp mask = bld.copy(bld.def(s1), Operand::c32((1u << instr->dest.dest.ssa.bit_size) - 1));
1413          std::array<Temp, NIR_MAX_VEC_COMPONENTS> packed;
1479       Temp src = get_alu_src(ctx, instr->src[0]);
1491       Temp src = get_alu_src(ctx, instr->src[0]);
1495          Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
1510          Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
1515          Temp sub = bld.vop3p(aco_opcode::v_pk_sub_u16, Definition(bld.tmp(v1)), Operand::zero(),
1520       Temp src = get_alu_src(ctx, instr->src[0]);
1540       Temp src = get_alu_src(ctx, instr->src[0]);
1542          Temp tmp =
1546          Temp neg =
1548          Temp neqz;
1567          Temp upper = emit_extract_vector(ctx, src, 1, v1);
1568          Temp neg = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), upper);
1569          Temp gtz = bld.vopc(aco_opcode::v_cmp_ge_i64, bld.def(bld.lm), Operand::zero(), src);
1570          Temp lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(1u), neg, gtz);
1761       Temp src = get_alu_src(ctx, instr->src[0]);
1775       Temp src = get_alu_src(ctx, instr->src[0]);
1782          Temp msb_rev = bld.sop1(op, bld.def(s1), src);
1786          Temp msb = sub.def(0).getTemp();
1787          Temp carry = sub.def(1).getTemp();
1794          Temp msb_rev = bld.tmp(v1);
1796          Temp msb = bld.tmp(v1);
1797          Temp carry =
1804          Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
1810          Temp found_hi = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::c32(-1), hi);
1812          Temp msb_rev = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lo, hi, found_hi);
1814          Temp msb = bld.tmp(v1);
1815          Temp carry =
1824       Temp src = get_alu_src(ctx, instr->src[0]);
1826          Temp msb_rev = bld.sop1(aco_opcode::s_flbit_i32_b32, bld.def(s1), src);
1829          Temp msb_rev = bld.vop1(aco_opcode::v_ffbh_u32, bld.def(v1), src);
1861       Temp src0 = get_alu_src(ctx, instr->src[0]);
1862       Temp src1 = get_alu_src(ctx, instr->src[1]);
1869       Temp src00 = bld.tmp(src0.type(), 1);
1870       Temp src01 = bld.tmp(dst.type(), 1);
1872       Temp src10 = bld.tmp(src1.type(), 1);
1873       Temp src11 = bld.tmp(dst.type(), 1);
1877          Temp carry = bld.tmp(s1);
1878          Temp dst0 =
1880          Temp dst1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), src01, src11,
1884          Temp dst0 = bld.tmp(v1);
1885          Temp carry = bld.vadd32(Definition(dst0), src00, src10, true).def(1).getTemp();
1886          Temp dst1 = bld.vadd32(bld.def(v1), src01, src11, false, carry);
1900       Temp src0 = get_alu_src(ctx, instr->src[0]);
1901       Temp src1 = get_alu_src(ctx, instr->src[1]);
1903          Temp tmp = bld.tmp(s1), carry = bld.tmp(s1);
1927       Temp src00 = bld.tmp(src0.type(), 1);
1928       Temp src01 = bld.tmp(src0.type(), 1);
1930       Temp src10 = bld.tmp(src1.type(), 1);
1931       Temp src11 = bld.tmp(src1.type(), 1);
1935          Temp carry0 = bld.tmp(s1);
1936          Temp carry1 = bld.tmp(s1);
1938          Temp no_sat0 =
1940          Temp no_sat1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.scc(Definition(carry1)),
1943          Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1);
1948          Temp no_sat0 = bld.tmp(v1);
1949          Temp dst0 = bld.tmp(v1);
1950          Temp dst1 = bld.tmp(v1);
1952          Temp carry0 = bld.vadd32(Definition(no_sat0), src00, src10, true).def(1).getTemp();
1953          Temp carry1;
1962             Temp no_sat1 = bld.tmp(v1);
1983       Temp src0 = get_alu_src(ctx, instr->src[0]);
1984       Temp src1 = get_alu_src(ctx, instr->src[1]);
1986          Temp cond = bld.sopc(aco_opcode::s_cmp_lt_i32, bld.def(s1, scc), src1, Operand::zero());
1987          Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)),
1989          Temp overflow = bld.tmp(s1);
1990          Temp add =
2012       Temp src0 = get_alu_src(ctx, instr->src[0]);
2013       Temp src1 = get_alu_src(ctx, instr->src[1]);
2019          Temp carry = bld.vadd32(bld.def(v1), src0, src1, true).def(1).getTemp();
2025       Temp src00 = bld.tmp(src0.type(), 1);
2026       Temp src01 = bld.tmp(dst.type(), 1);
2028       Temp src10 = bld.tmp(src1.type(), 1);
2029       Temp src11 = bld.tmp(dst.type(), 1);
2032          Temp carry = bld.tmp(s1);
2040          Temp carry = bld.vadd32(bld.def(v1), src00, src10, true).def(1).getTemp();
2059       Temp src0 = get_alu_src(ctx, instr->src[0]);
2060       Temp src1 = get_alu_src(ctx, instr->src[1]);
2076       Temp src00 = bld.tmp(src0.type(), 1);
2077       Temp src01 = bld.tmp(dst.type(), 1);
2079       Temp src10 = bld.tmp(src1.type(), 1);
2080       Temp src11 = bld.tmp(dst.type(), 1);
2083          Temp borrow = bld.tmp(s1);
2084          Temp dst0 =
2086          Temp dst1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.def(s1, scc), src01, src11,
2090          Temp lower = bld.tmp(v1);
2091          Temp borrow = bld.vsub32(Definition(lower), src00, src10, true).def(1).getTemp();
2092          Temp upper = bld.vsub32(bld.def(v1), src01, src11, false, borrow);
2100       Temp src0 = get_alu_src(ctx, instr->src[0]);
2101       Temp src1 = get_alu_src(ctx, instr->src[1]);
2106          Temp borrow = bld.vsub32(bld.def(v1), src0, src1, true).def(1).getTemp();
2112       Temp src00 = bld.tmp(src0.type(), 1);
2113       Temp src01 = bld.tmp(dst.type(), 1);
2115       Temp src10 = bld.tmp(src1.type(), 1);
2116       Temp src11 = bld.tmp(dst.type(), 1);
2119          Temp borrow = bld.tmp(s1);
2127          Temp borrow = bld.vsub32(bld.def(v1), src00, src10, true).def(1).getTemp();
2143       Temp src0 = get_alu_src(ctx, instr->src[0]);
2144       Temp src1 = get_alu_src(ctx, instr->src[1]);
2146          Temp tmp = bld.tmp(s1), carry = bld.tmp(s1);
2170       Temp src00 = bld.tmp(src0.type(), 1);
2171       Temp src01 = bld.tmp(src0.type(), 1);
2173       Temp src10 = bld.tmp(src1.type(), 1);
2174       Temp src11 = bld.tmp(src1.type(), 1);
2178          Temp carry0 = bld.tmp(s1);
2179          Temp carry1 = bld.tmp(s1);
2181          Temp no_sat0 =
2183          Temp no_sat1 = bld.sop2(aco_opcode::s_subb_u32, bld.def(s1), bld.scc(Definition(carry1)),
2186          Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1);
2191          Temp no_sat0 = bld.tmp(v1);
2192          Temp dst0 = bld.tmp(v1);
2193          Temp dst1 = bld.tmp(v1);
2195          Temp carry0 = bld.vsub32(Definition(no_sat0), src00, src10, true).def(1).getTemp();
2196          Temp carry1;
2205             Temp no_sat1 = bld.tmp(v1);
2225       Temp src0 = get_alu_src(ctx, instr->src[0]);
2226       Temp src1 = get_alu_src(ctx, instr->src[1]);
2228          Temp cond = bld.sopc(aco_opcode::s_cmp_gt_i32, bld.def(s1, scc), src1, Operand::zero());
2229          Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)),
2231          Temp overflow = bld.tmp(s1);
2232          Temp sub =
2291          Temp tmp = dst.regClass() == s1 ? bld.tmp(v1) : dst;
2311          Temp tmp = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), get_alu_src(ctx, instr->src[0]),
2364       Temp src0 = get_alu_src(ctx, instr->src[0]);
2365       Temp src1 = get_alu_src(ctx, instr->src[1]);
2391          Temp src0 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[0]));
2392          Temp src1 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[1]));
2393          Temp src2 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[2]));
2489       Temp in = get_alu_src(ctx, instr->src[0], 3);
2490       Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1),
2492       Temp ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), src[0], src[1], src[2]);
2494       Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
2495       Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
2504       Temp in = get_alu_src(ctx, instr->src[0], 3);
2505       Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1),
2518          Temp src = get_alu_src(ctx, instr->src[0]);
2530          Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2538       Temp src = get_alu_src(ctx, instr->src[0]);
2548          Temp upper = bld.tmp(v1), lower = bld.tmp(v1);
2559          Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2568       Temp src = get_alu_src(ctx, instr->src[0]);
2583          Temp upper = bld.tmp(v1), lower = bld.tmp(v1);
2594          Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
2601       Temp src = get_alu_src(ctx, instr->src[0]);
2623          Temp src = get_alu_src(ctx, instr->src[0]);
2634          Temp src = get_alu_src(ctx, instr->src[0]);
2658          Temp src = get_alu_src(ctx, instr->src[0]);
2686          Temp src = get_alu_src(ctx, instr->src[0]);
2707             Temp src0 = get_alu_src(ctx, instr->src[0]);
2708             Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src0);
2709             Temp tmp0 =
2711             Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f64, bld.def(bld.lm), src0, trunc);
2712             Temp cond = bld.sop2(aco_opcode::s_and_b64, bld.def(s2), bld.def(s1, scc), tmp0, tmp1);
2713             Temp add = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
2731          Temp src = get_alu_src(ctx, instr->src[0]);
2748             Temp src0_lo = bld.tmp(v1), src0_hi = bld.tmp(v1);
2749             Temp src0 = get_alu_src(ctx, instr->src[0]);
2752             Temp bitmask = bld.sop1(aco_opcode::s_brev_b32, bld.def(s1),
2754             Temp bfi =
2757             Temp tmp =
2766             Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
2770             Temp cond = vop3->definitions[0].getTemp();
2772             Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
2774             Temp dst0 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_lo,
2776             Temp dst1 = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp_hi,
2788       Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
2833          Temp src = get_alu_src(ctx, instr->src[0]);
2834          Temp tmp = bld.vop1(aco_opcode::v_frexp_exp_i16_f16, bld.def(v1), src);
2847       Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
2861          Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src);
2862          Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
2863          Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp,
2878       Temp src = get_alu_src(ctx, instr->src[0]);
2891       Temp src = get_alu_src(ctx, instr->src[0]);
2913       Temp src = get_alu_src(ctx, instr->src[0]);
2921       Temp src = get_alu_src(ctx, instr->src[0]);
2956       Temp src = get_alu_src(ctx, instr->src[0]);
2967          Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
2980          Temp src = get_alu_src(ctx, instr->src[0]);
2985          Temp src = get_alu_src(ctx, instr->src[0]);
2987          Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
3001       Temp src = get_alu_src(ctx, instr->src[0]);
3031       Temp src = get_alu_src(ctx, instr->src[0]);
3042          Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
3054          Temp src = get_alu_src(ctx, instr->src[0]);
3059          Temp src = get_alu_src(ctx, instr->src[0]);
3061          Temp lower = bld.tmp(rc), upper = bld.tmp(rc);
3079             Temp tmp = bld.tmp(v1);
3083                               (dst.type() == RegType::sgpr) ? Temp() : dst);
3102             Temp tmp = bld.tmp(v1);
3106                               (dst.type() == RegType::sgpr) ? Temp() : dst);
3119       Temp src = get_alu_src(ctx, instr->src[0]);
3121          Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3138       Temp src = get_alu_src(ctx, instr->src[0]);
3140          Temp tmp = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src);
3157       Temp src = get_alu_src(ctx, instr->src[0]);
3162          Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3165          Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3166          Temp sign = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), src);
3170          Temp new_exponent = bld.tmp(v1);
3171          Temp borrow =
3177          Temp saturate = bld.vop1(aco_opcode::v_bfrev_b32, bld.def(v1), Operand::c32(0xfffffffeu));
3178          Temp lower = bld.tmp(v1), upper = bld.tmp(v1);
3185          Temp new_lower = bld.tmp(v1);
3187          Temp new_upper = bld.vsub32(bld.def(v1), upper, sign, false, borrow);
3193          Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3201          Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3203          Temp sign =
3214          Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), exponent,
3216          Temp saturate = bld.sop1(aco_opcode::s_brev_b64, bld.def(s2), Operand::c32(0xfffffffeu));
3218          Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
3222          Temp borrow = bld.tmp(s1);
3230          Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3232          Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3233          Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec);
3236          Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul);
3237          Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc);
3238          Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma);
3239          Temp upper = bld.vop1(aco_opcode::v_cvt_i32_f64, bld.def(v1), floor);
3252       Temp src = get_alu_src(ctx, instr->src[0]);
3257          Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src);
3258          Temp exponent_in_range =
3261          Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src);
3263          Temp exponent_small = bld.vsub32(bld.def(v1), Operand::c32(24u), exponent);
3264          Temp small = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), exponent_small, mantissa);
3266          Temp new_exponent = bld.tmp(v1);
3267          Temp cond_small =
3273          Temp lower = bld.tmp(v1), upper = bld.tmp(v1);
3287          Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src,
3293          Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
3297          Temp exponent_small = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3299          Temp small = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), mantissa,
3302          Temp exponent_large = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc),
3306          Temp cond =
3310          Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
3312          Temp cond_small =
3320          Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(),
3322          Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src);
3323          Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec);
3326          Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul);
3327          Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc);
3328          Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma);
3329          Temp upper = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), floor);
3342       Temp src = get_alu_src(ctx, instr->src[0]);
3349          Temp one = bld.copy(bld.def(v1), Operand::c32(0x3c00u));
3357       Temp src = get_alu_src(ctx, instr->src[0]);
3372       Temp src = get_alu_src(ctx, instr->src[0]);
3380          Temp one = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
3381          Temp upper =
3428       Temp src = get_alu_src(ctx, instr->src[0]);
3431       Temp tmp = dst.bytes() == 8 ? bld.tmp(RegClass::get(dst.type(), 4)) : dst;
3447       Temp src = get_alu_src(ctx, instr->src[0]);
3457          Temp tmp;
3478       Temp src0 = get_alu_src(ctx, instr->src[0]);
3479       Temp src1 = get_alu_src(ctx, instr->src[1]);
3511       Temp src0 = get_alu_src(ctx, instr->src[0]);
3512       Temp src1 = get_alu_src(ctx, instr->src[1]);
3540       Temp src = get_alu_src(ctx, instr->src[0], 2);
3541       Temp src0 = emit_extract_vector(ctx, src, 0, v1);
3542       Temp src1 = emit_extract_vector(ctx, src, 1, v1);
3550       Temp src = get_alu_src(ctx, instr->src[0], 2);
3551       Temp src0 = emit_extract_vector(ctx, src, 0, v1);
3552       Temp src1 = emit_extract_vector(ctx, src, 1, v1);
3560       Temp src = get_alu_src(ctx, instr->src[0]);
3574       Temp src = get_alu_src(ctx, instr->src[0]);
3596       Temp src = get_alu_src(ctx, instr->src[0]);
3597       Temp f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src);
3598       Temp f32, cmp_res;
3601          Temp mask = bld.copy(
3610          Temp smallest = bld.copy(bld.def(s1), Operand::c32(0x38800000u));
3613          Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f32, bld.def(bld.lm), Operand::zero(), f32);
3619          Temp copysign_0 =
3628       Temp bits = get_alu_src(ctx, instr->src[0]);
3629       Temp offset = get_alu_src(ctx, instr->src[1]);
3644          Temp bitmask = get_alu_src(ctx, instr->src[0]);
3645          Temp insert = get_alu_src(ctx, instr->src[1]);
3646          Temp base = get_alu_src(ctx, instr->src[2]);
3683          Temp base = get_alu_src(ctx, instr->src[0]);
3695          Temp offset = get_alu_src(ctx, instr->src[1]);
3696          Temp bits = get_alu_src(ctx, instr->src[2]);
3698             Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset);
3699             Temp masked =
3711             Temp extract =
3735          Temp vec = get_ssa_temp(ctx, instr->src[0].src.ssa);
3745          Temp src = get_alu_src(ctx, instr->src[0]);
3776          Temp src = get_alu_src(ctx, instr->src[0]);
3803       Temp src = get_alu_src(ctx, instr->src[0]);
3893       Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
3909       Temp tmp;
3911          Temp tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl1);
3914          Temp tl = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl1);
3915          Temp tr = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl2);
3928    Temp dst = get_ssa_temp(ctx, &instr->def);
3985    Temp dst;
3988    Temp resource = Temp(0, s1); /* buffer resource or base 64-bit address */
3998    Temp soffset = Temp(0, s1);
4002    using Callback = Temp (*)(Builder& bld, const LoadEmitInfo& info, Temp offset,
4004                              Temp dst_hint);
4020    Temp* const vals = (Temp*)alloca(info.dst.bytes() * sizeof(Temp));
4074          Temp offset_tmp = offset.isTemp() ? offset.getTemp() : Temp();
4083             Temp lo = bld.tmp(offset_tmp.type(), 1);
4084             Temp hi = bld.tmp(offset_tmp.type(), 1);
4088                Temp carry = bld.tmp(s1);
4094                Temp new_lo = bld.tmp(v1);
4095                Temp carry =
4108          Temp offset_tmp = offset.isTemp() ? offset.getTemp() : Temp();
4121             Temp hi = bld.tmp(v1), lo = bld.tmp(v1);
4127       Temp aligned_offset_tmp =
4130       Temp val = params.callback(bld, info, aligned_offset_tmp, bytes_needed, align,
4131                                  reduced_const_offset, byte_align ? Temp() : info.dst);
4176    std::array<Temp, NIR_MAX_VEC_COMPONENTS> allocated_vec;
4179       Temp* const tmp = (Temp*)alloca(num_vals * sizeof(Temp));
4219             Temp component = bld.tmp(elem_rc);
4250       Temp tmp = bld.tmp(RegType::vgpr, info.dst.size());
4267    return bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0xffffffffu)));
4270 Temp
4271 lds_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4272                   unsigned align, unsigned const_offset, Temp dst_hint)
4324    Temp val = rc == info.dst.regClass() && dst_hint.id() ? dst_hint : bld.tmp(rc);
4340 Temp
4341 smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4342                    unsigned align, unsigned const_offset, Temp dst_hint)
4347    Temp addr = info.resource;
4350       offset = Temp();
4391    Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc);
4402 Temp
4403 mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4404                     unsigned align_, unsigned const_offset, Temp dst_hint)
4449    Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
4458 Temp
4459 scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4460                       unsigned align_, unsigned const_offset, Temp dst_hint)
4484    Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
4499 Temp
4500 get_gfx6_global_rsrc(Builder& bld, Temp addr)
4512 Temp
4513 add64_32(Builder& bld, Temp src0, Temp src1)
4515    Temp src00 = bld.tmp(src0.type(), 1);
4516    Temp src01 = bld.tmp(src0.type(), 1);
4520       Temp dst0 = bld.tmp(v1);
4521       Temp carry = bld.vadd32(Definition(dst0), src00, src1, true).def(1).getTemp();
4522       Temp dst1 = bld.vadd32(bld.def(v1), src01, Operand::zero(), false, carry);
4525       Temp carry = bld.tmp(s1);
4526       Temp dst0 =
4528       Temp dst1 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), src01, carry);
4534 lower_global_address(Builder& bld, uint32_t offset_in, Temp* address_inout,
4535                      uint32_t* const_offset_inout, Temp* offset_inout)
4537    Temp address = *address_inout;
4539    Temp offset = *offset_inout;
4575          offset = Temp();
4582          offset = Temp();
4589          offset = Temp();
4602 Temp
4603 global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
4604                      unsigned align_, unsigned const_offset, Temp dst_hint)
4606    Temp addr = info.resource;
4609       offset = Temp();
4647    Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
4689 Temp
4690 load_lds(isel_context* ctx, unsigned elem_size_bytes, unsigned num_components, Temp dst,
4691          Temp address, unsigned base_offset, unsigned align)
4708 split_store_data(isel_context* ctx, RegType dst_type, unsigned count, Temp* dst, unsigned* bytes,
4709                  Temp src)
4735    std::vector<Temp> temps;
4788          Temp tmp = temps[idx++];
4821 store_lds(isel_context* ctx, unsigned elem_size_bytes, Temp data, uint32_t wrmask, Temp address,
4832    Temp write_datas[32];
4896       Temp split_data = write_datas[i];
4914       Temp address_offset = address;
4925          Temp second_data = write_datas[second];
4956                    Temp data, unsigned writemask, int swizzle_element_size, unsigned* write_count,
4957                    Temp* write_datas, unsigned* offsets)
5011 Temp
5012 create_vec_from_array(isel_context* ctx, Temp arr[], unsigned cnt, RegType reg_type,
5013                       unsigned elem_size_bytes, unsigned split_cnt = 0u, Temp dst = Temp())
5021    std::array<Temp, NIR_MAX_VEC_COMPONENTS> allocated_vec;
5032          Temp zero = bld.copy(bld.def(RegClass(reg_type, dword_size)),
5050 resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_offset)
5071 emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp vdata,
5096 store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Temp soffset,
5107    Temp write_datas[32];
5120 load_vmem_mubuf(isel_context* ctx, Temp dst, Temp descriptor, Temp voffset, Temp soffset,
5144 Temp
5152 Temp
5158    Temp tid_in_wave = emit_mbcnt(ctx, bld.tmp(v1));
5163    Temp wave_id_in_tg = wave_id_in_threadgroup(ctx);
5164    Temp num_pre_threads =
5181    Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
5212 load_input_from_temps(isel_context* ctx, nir_intrinsic_instr* instr, Temp dst)
5233    Temp* src = &ctx->inputs.temps[idx];
5259 emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src, Temp dst,
5260                   Temp prim_mask)
5262    Temp coord1 = emit_extract_vector(ctx, src, 0, v1);
5263    Temp coord2 = emit_extract_vector(ctx, src, 1, v1);
5301 emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
5329 emit_load_frag_shading_rate(isel_context* ctx, Temp dst)
5332    Temp cond;
5337    Temp x_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
5339    Temp y_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
5358    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5359    Temp coords = get_ssa_temp(ctx, instr->src[0].ssa);
5362    Temp prim_mask = get_arg(ctx, ctx->args->ac.prim_mask);
5372          Temp tmp = ctx->program->allocateTmp(instr->dest.ssa.bit_size == 16 ? v2b : v1);
5452    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5465       Temp input = get_arg(ctx, ctx->args->vs_inputs[location]);
5469       std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
5490       Temp vertex_buffers =
5514       Temp list = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), vertex_buffers, off);
5516       Temp index;
5519          Temp start_instance = get_arg(ctx, ctx->args->ac.start_instance);
5521             Temp instance_id = get_arg(ctx, ctx->args->ac.instance_id);
5523                Temp divided = bld.tmp(v1);
5537       Temp* const channels = (Temp*)alloca(num_channels * sizeof(Temp));
5545             channels[i] = Temp(0, s1);
5572          Temp fetch_index = index;
5626          Temp fetch_dst;
5667          std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
5672                Temp channel = channels[idx];
5697       Temp prim_mask = get_arg(ctx, ctx->args->ac.prim_mask);
5750    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5773    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5780       Temp tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tes_u, tes_v);
5785    Temp tess_coord = bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tes_u, tes_v, tes_w);
5790 load_buffer(isel_context* ctx, unsigned num_components, unsigned component_size, Temp dst,
5791             Temp rsrc, Temp offset, unsigned align_mul, unsigned align_offset, bool glc = false,
5822    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5824    Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5835    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5848          std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
5864    Temp index = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
5868    Temp ptr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.push_constants));
5869    Temp vec = dst;
5922    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
5941    Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
5948    Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
5959  * The byte count of each input Temp must be a multiple of 2.
5961 static std::vector<Temp>
5962 emit_pack_v1(isel_context* ctx, const std::vector<Temp>& unpacked)
5965    std::vector<Temp> packed;
5966    Temp low = Temp();
5967    for (Temp tmp : unpacked) {
5971          if (low != Temp()) {
5972             Temp high = emit_extract_vector(ctx, tmp, byte_idx / 2, v2b);
5973             Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, high);
5974             low = Temp();
5986    if (low != Temp()) {
5987       Temp dword = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), low, Operand(v2b));
6022 emit_mimg(Builder& bld, aco_opcode op, Definition dst, Temp rsrc, Operand samp,
6023           std::vector<Temp> coords, unsigned wqm_mask = 0, Operand vdata = Operand(v1))
6030       Temp coord = coords[0];
6059       for (Temp& coord : coords) {
6084    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6085    Temp resource = get_ssa_temp(ctx, instr->src[0].ssa);
6086    Temp node = get_ssa_temp(ctx, instr->src[1].ssa);
6087    Temp tmax = get_ssa_temp(ctx, instr->src[2].ssa);
6088    Temp origin = get_ssa_temp(ctx, instr->src[3].ssa);
6089    Temp dir = get_ssa_temp(ctx, instr->src[4].ssa);
6090    Temp inv_dir = get_ssa_temp(ctx, instr->src[5].ssa);
6092    std::vector<Temp> args;
6114 static std::vector<Temp>
6118    Temp src0 = get_ssa_temp(ctx, instr->src[1].ssa);
6127    std::vector<Temp> coords(count);
6151       Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6152       Temp rsrc_word5 = emit_extract_vector(ctx, rsrc, 5, v1);
6154       Temp first_layer =
6192 emit_tfe_init(Builder& bld, Temp dst)
6194    Temp tmp = bld.tmp(dst.regClass());
6218    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6243    Temp tmp;
6249    Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6252       Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6288       std::vector<Temp> coords = get_image_coords(ctx, instr);
6326    Temp data = get_ssa_temp(ctx, instr->src[3].ssa);
6343       Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6344       Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6380    std::vector<Temp> coords = get_image_coords(ctx, instr);
6381    Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6444    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[3].ssa));
6520    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6524       Temp vindex = emit_extract_vector(ctx, get_ssa_temp(ctx, instr->src[1].ssa), 0, v1);
6525       Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6551    std::vector<Temp> coords = get_image_coords(ctx, instr);
6552    Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6572 get_buffer_size(isel_context* ctx, Temp desc, Temp dst)
6578       Temp size = emit_extract_vector(ctx, desc, 2, s1);
6580       Temp size_div3 = bld.vop3(aco_opcode::v_mul_hi_u32, bld.def(v1),
6585       Temp stride = emit_extract_vector(ctx, desc, 1, s1);
6589       Temp is12 = bld.sopc(aco_opcode::s_cmp_eq_i32, bld.def(s1, scc), stride, Operand::c32(12u));
6592       Temp shr_dst = dst.type() == RegType::vgpr ? bld.tmp(s1) : dst;
6612       Temp desc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6618    std::vector<Temp> lod{bld.copy(bld.def(v1), Operand::zero())};
6621    Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6623    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6641 get_image_samples(isel_context* ctx, Definition dst, Temp resource)
6645    Temp dword3 = emit_extract_vector(ctx, resource, 3, s1);
6646    Temp samples_log2 = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3,
6648    Temp samples = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand::c32(1u),
6650    Temp type = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3,
6658       Temp dword1 = emit_extract_vector(ctx, resource, 1, s1);
6659       Temp is_non_null_descriptor =
6664    Temp is_msaa = bld.sopc(aco_opcode::s_cmp_ge_u32, bld.def(s1, scc), type, Operand::c32(14u));
6672    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6673    Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6683    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6684    Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6701    Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
6704    Temp offset = get_ssa_temp(ctx, instr->src[2].ssa);
6706    Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
6714    Temp write_datas[32];
6750    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
6757    Temp offset = get_ssa_temp(ctx, instr->src[1].ssa);
6758    Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
6760    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
6840 parse_global(isel_context* ctx, nir_intrinsic_instr* intrin, Temp* address, uint32_t* const_offset,
6841              Temp* offset)
6853       *offset = Temp();
6863    Temp addr, offset;
6911    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
6918    Temp write_datas[32];
6923    Temp addr, offset;
6928       Temp write_address = addr;
6930       Temp write_offset = offset;
6977          Temp rsrc = get_gfx6_global_rsrc(bld, write_address);
7003    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7010    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7014    Temp addr, offset;
7155       Temp rsrc = get_gfx6_global_rsrc(bld, addr);
7206    Temp dst = get_ssa_temp(ctx, &intrin->dest.ssa);
7207    Temp descriptor = bld.as_uniform(get_ssa_temp(ctx, intrin->src[0].ssa));
7208    Temp v_offset = as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[1].ssa));
7209    Temp s_offset = bld.as_uniform(get_ssa_temp(ctx, intrin->src[2].ssa));
7230    Temp store_src = get_ssa_temp(ctx, intrin->src[0].ssa);
7231    Temp descriptor = get_ssa_temp(ctx, intrin->src[1].ssa);
7232    Temp v_offset = get_ssa_temp(ctx, intrin->src[2].ssa);
7233    Temp s_offset = get_ssa_temp(ctx, intrin->src[3].ssa);
7253    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7254    Temp base = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
7255    Temp offset = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
7360    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7361    Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7374    Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
7375    Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7388    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
7389    Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7498       Temp data2 = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
7517    Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[is_store].ssa));
7533       Temp data = get_ssa_temp(ctx, instr->src[0].ssa);
7535       Temp data0 = emit_extract_vector(ctx, data, 0, comp_rc);
7536       Temp data1 = emit_extract_vector(ctx, data, 1, comp_rc);
7539       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7550       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7553          Temp comp[4];
7558             Temp comp0 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[0], comp[1]);
7559             Temp comp1 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[2], comp[3]);
7573 Temp
7577    Temp scratch_addr = ctx->program->private_segment_buffer;
7607    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
7639    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7640    Temp offset = get_ssa_temp(ctx, instr->src[1].ssa);
7646    Temp write_datas[32];
7654       offset = nir_src_is_const(instr->src[1]) ? Temp(0, s1) : offset;
7682       Temp rsrc = get_scratch_resource(ctx);
7699    Temp next_vertex = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
7704    Temp gsvs_ring =
7721    Temp gsvs_dwords[4];
7728       Temp stream_offset_tmp = bld.copy(bld.def(s1), Operand::c32(stream_offset));
7730       Temp carry = bld.tmp(s1);
7790 Temp
7791 emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp src)
7800       Temp tmp =
7811       Temp tmp =
7815       Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
7819       Temp tmp =
7826       Temp tmp =
7844       Temp lane_id = emit_mbcnt(ctx, bld.tmp(v1));
7845       Temp cluster_offset = bld.vop2(aco_opcode::v_and_b32, bld.def(v1),
7848       Temp tmp;
7879       return Temp();
7883 Temp
7884 emit_boolean_exclusive_scan(isel_context* ctx, nir_op op, Temp src)
7893    Temp tmp;
7900    Temp mbcnt = emit_mbcnt(ctx, bld.tmp(v1), Operand(tmp));
7911    return Temp();
7914 Temp
7915 emit_boolean_inclusive_scan(isel_context* ctx, nir_op op, Temp src)
7923    Temp tmp = emit_boolean_exclusive_scan(ctx, op, src);
7932    return Temp();
7967 emit_uniform_subgroup(isel_context* ctx, nir_intrinsic_instr* instr, Temp src)
7979 emit_addition_uniform_reduce(isel_context* ctx, nir_op op, Definition dst, nir_src src, Temp count)
7982    Temp src_tmp = get_ssa_temp(ctx, src.ssa);
7986       Temp tmp = dst.regClass() == s1 ? bld.tmp(RegClass::get(RegType::vgpr, src.ssa->bit_size / 8))
8051       Temp thread_count =
8077       Temp packed_tid;
8096    Temp lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm));
8097    Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8100       Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8119 Temp
8121                      Definition dst, Temp src)
8176 emit_interp_center(isel_context* ctx, Temp dst, Temp bary, Temp pos1, Temp pos2)
8179    Temp p1 = emit_extract_vector(ctx, bary, 0, v1);
8180    Temp p2 = emit_extract_vector(ctx, bary, 1, v1);
8182    Temp ddx_1, ddx_2, ddy_1, ddy_2;
8189       Temp tl_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), p1, dpp_ctrl0);
8192       Temp tl_2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), p2, dpp_ctrl0);
8196       Temp tl_1 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p1, (1 << 15) | dpp_ctrl0);
8202       Temp tl_2 = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), p2, (1 << 15) | dpp_ctrl0);
8212    Temp tmp1 = bld.vop3(mad, bld.def(v1), ddx_1, pos1, p1);
8213    Temp tmp2 = bld.vop3(mad, bld.def(v1), ddx_2, pos1, p2);
8216    Temp wqm1 = bld.tmp(v1);
8218    Temp wqm2 = bld.tmp(v1);
8224 Temp merged_wave_info_to_mask(isel_context* ctx, unsigned i);
8225 void ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt);
8226 static void create_primitive_exports(isel_context *ctx, Temp prim_ch1);
8229 Temp
8255       Temp bary = get_interp_param(ctx, instr->intrinsic, mode);
8257       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8263       Temp model = get_arg(ctx, ctx->args->ac.pull_model);
8265       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8271       Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
8272       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8287       Temp sample_pos;
8288       Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
8290       Temp private_segment_buffer = ctx->program->private_segment_buffer;
8317          Temp tmp0 = bld.tmp(s1);
8318          Temp tmp1 = bld.tmp(s1);
8327          Temp pck0 = bld.tmp(v1);
8328          Temp carry = bld.vadd32(Definition(pck0), tmp0, addr, true).def(1).getTemp();
8330          Temp pck1 = bld.vop2_e64(aco_opcode::v_addc_co_u32, bld.def(v1), bld.def(bld.lm), tmp1,
8341          Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
8365       Temp pos1 = bld.tmp(RegClass(sample_pos.type(), 1));
8366       Temp pos2 = bld.tmp(RegClass(sample_pos.type(), 1));
8375       Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
8377       Temp pos1 = bld.tmp(rc), pos2 = bld.tmp(rc);
8379       Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
8389       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8401       Temp posx = get_arg(ctx, ctx->args->ac.frag_pos[0]);
8402       Temp posy = get_arg(ctx, ctx->args->ac.frag_pos[1]);
8487       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8491          Temp addr = get_arg(ctx, ctx->args->ac.num_work_groups);
8501       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8502       Temp addr = get_arg(ctx, ctx->args->ac.ray_launch_size_addr);
8508       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8510          Temp local_ids[3];
8528       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8545             Temp wave_id =
8549             Temp temp = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), wave_id,
8551             Temp thread_id = emit_mbcnt(ctx, bld.tmp(v1));
8567       Temp id = emit_mbcnt(ctx, bld.tmp(v1));
8575          Temp tg_num = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
8581          Temp tg_num =
8621       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8622       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8648       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8652          Temp tid = get_ssa_temp(ctx, instr->src[1].ssa);
8656          Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8662             Temp tmp = bld.tmp(v1);
8672             Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8680             Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src, tid);
8684             Temp tmp;
8707       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8708       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8712          Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8720          Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src,
8729       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8730       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8734       Temp tmp =
8738       Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
8743       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8744       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8748       Temp tmp = bool_to_scalar_condition(ctx, src);
8755       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8756       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8818          Temp tmp_dst = emit_reduction_instr(ctx, aco_op, reduce_op, cluster_size,
8829       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8856       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8857       Temp tmp(dst);
8899          Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8927       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8932       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8943          Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src);
8946          Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
8949          Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
8954          Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
8966       Temp src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
8967       Temp val = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
8968       Temp lane = bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa));
8969       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8974          Temp src_lo = bld.tmp(v1), src_hi = bld.tmp(v1);
8975          Temp val_lo = bld.tmp(s1), val_hi = bld.tmp(s1);
8978          Temp lo = emit_wqm(bld, bld.writelane(bld.def(v1), val_lo, lane, src_hi));
8979          Temp hi = emit_wqm(bld, bld.writelane(bld.def(v1), val_hi, lane, src_hi));
8988       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
8989       Temp add_src = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
8990       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
8993       Temp wqm_tmp = emit_mbcnt(ctx, bld.tmp(v1), Operand(src), Operand(add_src));
8998       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9007       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9008       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9026       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9041       Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9043       Temp cond =
9060          Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
9080       Temp flbit = bld.sop1(Builder::s_flbit_i32, bld.def(s1), Operand(exec, bld.lm));
9081       Temp last = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc),
9091       Temp elected = bld.pseudo(aco_opcode::p_elect, bld.def(bld.lm), Operand(exec, bld.lm));
9097       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9101          Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29);
9113       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9118       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9123       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9128       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9133       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9138       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9156       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9214       Temp prim_ch1 = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
9220       Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa);
9221       Temp num_primitives = get_ssa_temp(ctx, instr->src[1].ssa);
9226       Temp store_val = get_ssa_temp(ctx, instr->src[0].ssa);
9227       Temp gds_addr = get_ssa_temp(ctx, instr->src[1].ssa);
9228       Temp m0_val = get_ssa_temp(ctx, instr->src[2].ssa);
9229       Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), bld.as_uniform(m0_val)));
9235       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9236       Temp addr = get_arg(ctx, ctx->args->ac.sbt_descriptors);
9263       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9264       Temp src = ctx->arg_temps[nir_intrinsic_base(instr)];
9280 build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp* out_sc,
9281                   Temp* out_tc)
9285    Temp deriv_x = emit_extract_vector(ctx, deriv, 0, v1);
9286    Temp deriv_y = emit_extract_vector(ctx, deriv, 1, v1);
9287    Temp deriv_z = emit_extract_vector(ctx, deriv, 2, v1);
9294    Temp is_ma_positive = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), Operand::zero(), ma);
9295    Temp sgn_ma = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, one, is_ma_positive);
9296    Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::zero(), sgn_ma);
9298    Temp is_ma_z = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), four, id);
9299    Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), two, id);
9301    Temp is_not_ma_x =
9305    Temp tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_z, deriv_x, is_not_ma_x);
9306    Temp sgn = bld.vop2_e64(
9325 prepare_cube_coords(isel_context* ctx, std::vector<Temp>& coords, Temp* ddx, Temp* ddy,
9329    Temp ma, tc, sc, id;
9345    Temp invma = bld.tmp(v1);
9365          Temp deriv_ma;
9366          Temp deriv_sc, deriv_tc;
9371          Temp x = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
9374          Temp y = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
9417    Temp resource, sampler, bias = Temp(), compare = Temp(), sample_index = Temp(), lod = Temp(),
9418                            offset = Temp(), ddx = Temp(), ddy = Temp(), clamped_lod = Temp(),
9419                            coord = Temp();
9420    std::vector<Temp> coords;
9421    std::vector<Temp> derivs;
9525       Temp acc, pack = Temp();
9548             if (pack == Temp()) {
9555          if (pack_const && pack != Temp())
9570             if (pack == Temp()) {
9577          if (pack_const && pack != Temp())
9580       if (pack_const && pack == Temp())
9582       else if (pack == Temp())
9589    std::vector<Temp> unpacked_coord;
9606    } else if (coord != Temp()) {
9629       std::array<Temp, 2> ddxddy = {ddx, ddy};
9630       for (Temp tmp : ddxddy) {
9631          if (tmp == Temp())
9633          std::vector<Temp> unpacked = {tmp};
9636             Temp zero = bld.copy(bld.def(rc), Operand::zero(rc.bytes()));
9639          for (Temp derv : emit_pack_v1(ctx, unpacked))
9658    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
9659    Temp tmp_dst = dst;
9683                                         resource, Operand(s4), std::vector<Temp>{lod});
9699    Temp tg4_compare_cube_wa64 = Temp();
9702       Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero());
9703       Temp size = bld.tmp(v2);
9705                                         resource, Operand(s4), std::vector<Temp>{tg4_lod});
9711       Temp half_texel[2];
9729          Temp not_needed =
9739       Temp new_coords[2] = {bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[0], half_texel[0]),
9744          Temp* const desc = (Temp*)alloca(resource.size() * sizeof(Temp));
9754          Temp dfmt = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), desc[1],
9756          Temp compare_cube_wa = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), dfmt,
9759          Temp nfmt;
9837    std::vector<Temp> args;
9879             Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
9885             Temp is_not_null = bld.tmp(bld.lm);
10044       Temp val[4];
10047          Temp cvt_val;
10056       Temp tmp = dst.regClass() == tmp_dst.regClass() ? dst : bld.tmp(tmp_dst.regClass());
10071    Temp tmp = get_ssa_temp(ctx, ssa);
10091    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
10186    Temp dst = get_ssa_temp(ctx, &instr->def);
10475 static void begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond);
10490       Temp cond = bld.copy(bld.def(s1, scc), Operand::zero());
10543 begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond)
10701 begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
10797    Temp cond = get_ssa_temp(ctx, if_stmt->condition.ssa);
10949          Temp out = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u),
11020 create_primitive_exports(isel_context *ctx, Temp prim_ch1)
11038    Temp ch2 = bld.copy(bld.def(v1), Operand::c32(0));
11043       Temp tmp = ctx->outputs.temps[VARYING_SLOT_LAYER * 4u];
11048       Temp tmp = ctx->outputs.temps[VARYING_SLOT_VIEWPORT * 4u];
11053       Temp tmp = ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u];
11194          Temp isnan = bld.vopc(aco_opcode::v_cmp_class_f32, bld.def(bld.lm), values[i],
11389             Temp tmp = convert_int(ctx, bld, chan.getTemp(), 16, 32, sign_ext);
11398    Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ps_epilog_pc));
11453 emit_stream_output(isel_context* ctx, Temp const* so_buffers, Temp const* so_write_offset,
11473       Temp write_data = ctx->program->allocateTmp(RegClass(RegType::vgpr, count));
11509    Temp so_vtx_count =
11513    Temp tid = emit_mbcnt(ctx, bld.tmp(v1));
11515    Temp can_emit = bld.vopc(aco_opcode::v_cmp_gt_i32, bld.def(bld.lm), so_vtx_count, tid);
11522    Temp so_write_index =
11525    Temp so_buffers[4];
11526    Temp so_write_offset[4];
11527    Temp buf_ptr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->streamout_buffers));
11538          Temp offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
11541          Temp new_offset = bld.vadd32(bld.def(v1), offset, tid);
11546          Temp offset = bld.v_mul_imm(bld.def(v1), so_write_index, stride * 4u);
11547          Temp offset2 = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(4u),
11592          Temp elems[16];
11599          Temp dst = ctx->program->allocateTmp(type);
11647    Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp());
11651    Temp instance_id =
11654    Temp vs_rel_patch_id =
11657    Temp vertex_id =
11697       Temp sel = bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.def(bld.lm),
11701          Temp new_coord[2];
11703             Temp persp_centroid =
11705             Temp persp_center =
11717          Temp new_coord[2];
11719             Temp linear_centroid =
11721             Temp linear_center =
11801 Temp
11802 lanecount_to_mask(isel_context* ctx, Temp count, bool allow64 = true)
11807    Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand::zero());
11808    Temp cond;
11816       Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count,
11829 Temp
11835    Temp count = i == 0
11844 ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt)
11849    Temp prm_cnt_0;
11864    Temp tmp =
11878       Temp first_lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm));
11879       Temp cond = bld.sop2(Builder::s_lshl, bld.def(bld.lm), bld.def(s1, scc),
11890       Temp zero = bld.copy(bld.def(v1), Operand::zero());
11892       Temp nan_coord = bld.copy(bld.def(v1), Operand::c32(-1u));
11962          Temp cond = merged_wave_info_to_mask(&ctx, i);
12053    Temp gsvs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4),
12061    Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u),
12077          Temp cond =
12095             Temp val = bld.tmp(v1);
12097             load_vmem_mubuf(&ctx, val, gsvs_ring, vtx_offset, Temp(), const_offset, 4, 1, 0u, true,
12559       Temp inputs = get_arg(&ctx, ctx.args->ps_epilog_inputs[i]);