Lines Matching refs:ctx

511 can_use_VOP3(opt_ctx& ctx, const aco_ptr<Instruction>& instr)
519 if (instr->operands.size() && instr->operands[0].isLiteral() && ctx.program->gfx_level < GFX10)
535 pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsigned index)
550 ctx.program->gfx_level >= GFX9 ||
599 can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
601 if (instr->isSDWA() && ctx.program->gfx_level < GFX9)
613 to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
626 ssa_info& info = ctx.info[instr->definitions[i].tempId()];
644 to_SDWA(opt_ctx& ctx, aco_ptr<Instruction>& instr)
646 aco_ptr<Instruction> tmp = convert_to_SDWA(ctx.program->gfx_level, instr);
651 ssa_info& info = ctx.info[instr->definitions[i].tempId()];
697 check_vop3_operands(opt_ctx& ctx, unsigned num_operands, Operand* operands)
699 int limit = ctx.program->gfx_level >= GFX10 ? 2 : 1;
718 if (ctx.program->gfx_level < GFX10)
745 parse_base_offset(opt_ctx& ctx, Instruction* instr, unsigned op_index, Temp* base, uint32_t* offset,
753 if (!ctx.info[tmp.id()].is_add_sub())
756 Instruction* add_instr = ctx.info[tmp.id()].instr;
793 ctx.info[add_instr->operands[i].tempId()].is_constant_or_literal(32)) {
794 *offset = ctx.info[add_instr->operands[i].tempId()].val * (uint32_t)(is_sub ? -1 : 1);
802 if (parse_base_offset(ctx, add_instr, !i, base, &offset2, prevent_overflow)) {
814 skip_smem_offset_align(opt_ctx& ctx, SMEM_instruction* smem)
824 if (!op.isTemp() || !ctx.info[op.tempId()].is_bitwise())
827 Instruction* bitwise_instr = ctx.info[op.tempId()].instr;
840 smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
844 skip_smem_offset_align(ctx, &instr->smem());
849 ssa_info info = ctx.info[instr->operands[1].tempId()];
855 ((ctx.program->gfx_level == GFX6 && info.val <= 0x3FF) ||
856 (ctx.program->gfx_level == GFX7 && info.val <= 0xFFFFFFFF) ||
857 (ctx.program->gfx_level >= GFX8 && info.val <= 0xFFFFF))) {
859 } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, prevent_overflow) &&
860 base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->gfx_level >= GFX9 &&
864 if (ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) &&
865 ctx.info[smem.operands.back().tempId()].val == 0) {
891 skip_smem_offset_align(ctx, &instr->smem());
912 get_constant_op(opt_ctx& ctx, ssa_info info, uint32_t bits)
916 return Operand::get_const(ctx.program->gfx_level, info.val, bits / 8u);
920 propagate_constants_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& info, unsigned i)
928 instr->operands[i] = get_constant_op(ctx, info, bits);
952 Operand op = Operand::get_const(ctx.program->gfx_level, val, bits / 8u);
954 op = Operand::get_const(ctx.program->gfx_level, val | 0xffff0000, 4);
956 op = Operand::get_const(ctx.program->gfx_level, val << 16, 4);
1063 can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
1077 } else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
1078 (tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
1083 can_use_opsel(ctx.program->gfx_level, instr->opcode, idx) &&
1107 apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
1116 ctx.info[tmp.id()].label &= ~label_insert;
1133 } else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
1134 (tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
1135 to_SDWA(ctx, instr);
1158 ctx.info[def.tempId()].label &= (label_vopc | label_f2f32 | instr_mod_labels);
1162 check_sdwa_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr)
1168 ssa_info& info = ctx.info[op.tempId()];
1171 if (!can_apply_extract(ctx, instr, i, info))
1178 does_fp_op_flush_denorms(opt_ctx& ctx, aco_opcode op)
1180 if (ctx.program->gfx_level <= GFX8) {
1196 can_eliminate_fcanonicalize(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp tmp)
1198 float_mode* fp = &ctx.fp_mode;
1199 if (ctx.info[tmp.id()].is_canonicalized() ||
1204 return instr_info.can_use_input_modifiers[(int)op] && does_fp_op_flush_denorms(ctx, op);
1208 can_eliminate_and_exec(opt_ctx& ctx, Temp tmp, unsigned pass_flags)
1210 if (ctx.info[tmp.id()].is_vopc()) {
1211 Instruction* vopc_instr = ctx.info[tmp.id()].instr;
1216 if (ctx.info[tmp.id()].is_bitwise()) {
1217 Instruction* instr = ctx.info[tmp.id()].instr;
1222 return can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), pass_flags) &&
1223 can_eliminate_and_exec(ctx, instr->operands[1].getTemp(), pass_flags);
1229 is_copy_label(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& info)
1232 (info.is_fcanonicalize() && can_eliminate_fcanonicalize(ctx, instr, info.temp));
1236 is_op_canonicalized(opt_ctx& ctx, Operand op)
1238 float_mode* fp = &ctx.fp_mode;
1239 if ((op.isTemp() && ctx.info[op.tempId()].is_canonicalized()) ||
1243 if (op.isConstant() || (op.isTemp() && ctx.info[op.tempId()].is_constant_or_literal(32))) {
1244 uint32_t val = op.isTemp() ? ctx.info[op.tempId()].val : op.constantValue();
1254 is_scratch_offset_valid(opt_ctx& ctx, Instruction* instr, int32_t offset)
1256 bool negative_unaligned_scratch_offset_bug = ctx.program->gfx_level == GFX10;
1257 int32_t min = ctx.program->dev.scratch_global_offset_min;
1258 int32_t max = ctx.program->dev.scratch_global_offset_max;
1268 label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
1274 all_const && (!op.isTemp() || ctx.info[op.tempId()].is_constant_or_literal(32));
1275 perfwarn(ctx.program, all_const, "All instruction operands are constant", instr.get());
1280 perfwarn(ctx.program, is_copy && !instr->usesModifiers(), "Use p_parallelcopy instead",
1285 smem_combine(ctx, instr);
1291 ssa_info info = ctx.info[instr->operands[i].tempId()];
1297 instr->operands[i].setTemp(ctx.info[instr->operands[i].tempId()].temp);
1298 info = ctx.info[info.temp.id()];
1304 pseudo_propagate_temp(ctx, instr, info.temp, i);
1305 info = ctx.info[info.temp.id()];
1314 instr->operands[i] = get_constant_op(ctx, info, bits);
1321 if (is_copy_label(ctx, instr, info) && info.temp.type() == RegType::vgpr &&
1324 info = ctx.info[info.temp.id()];
1327 if (info.is_temp() && info.temp.type() == RegType::sgpr && can_apply_sgprs(ctx, instr) &&
1331 info = ctx.info[info.temp.id()];
1343 can_use_mod = can_use_mod && (instr->isDPP16() || can_use_VOP3(ctx, instr));
1355 can_eliminate_fcanonicalize(ctx, instr, info.temp)) {
1357 to_VOP3(ctx, instr);
1367 can_eliminate_fcanonicalize(ctx, instr, info.temp)) {
1369 to_VOP3(ctx, instr);
1381 propagate_constants_vop3p(ctx, instr, info, i);
1386 (!instr->isSDWA() || ctx.program->gfx_level >= GFX9)) {
1387 Operand op = get_constant_op(ctx, info, bits);
1388 perfwarn(ctx.program, instr->opcode == aco_opcode::v_cndmask_b32 && i == 2,
1399 } else if (can_use_VOP3(ctx, instr)) {
1400 to_VOP3(ctx, instr);
1413 info = ctx.info[info.temp.id()];
1421 bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->gfx_level < GFX9;
1435 parse_base_offset(ctx, instr.get(), i, &base, &offset,
1442 } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) &&
1456 info = ctx.info[info.temp.id()];
1458 if (i <= 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
1460 is_scratch_offset_valid(ctx, instr.get(), scratch.offset + (int32_t)offset)) {
1465 ctx.program->gfx_level >= GFX10_3 &&
1466 is_scratch_offset_valid(ctx, NULL, scratch.offset + (int32_t)info.val)) {
1480 bool has_usable_ds_offset = ctx.program->gfx_level >= GFX7;
1482 parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
1520 if (ctx.info[instr->operands[0].tempId()].is_scc_invert()) {
1524 instr->operands[0].setTemp(ctx.info[instr->operands[0].tempId()].temp);
1531 check_sdwa_extract(ctx, instr);
1539 if (!does_fp_op_flush_denorms(ctx, instr->opcode)) {
1542 canonicalized = is_op_canonicalized(ctx, instr->operands[i]);
1545 ctx.info[instr->definitions[0].tempId()].set_canonicalized();
1549 ctx.info[instr->definitions[0].tempId()].set_vopc(instr.get());
1550 check_sdwa_extract(ctx, instr);
1554 ctx.info[instr->definitions[0].tempId()].set_vop3p(instr.get());
1564 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1575 if (aligned && op.isTemp() && ctx.info[op.tempId()].is_vec()) {
1576 Instruction* vec = ctx.info[op.tempId()].instr;
1591 if (ops[i].isTemp() && ctx.info[ops[i].tempId()].is_temp() &&
1592 ops[i].regClass() == ctx.info[ops[i].tempId()].temp.regClass())
1593 ops[i].setTemp(ctx.info[ops[i].tempId()].temp);
1602 ctx.info[instr->definitions[0].tempId()].set_vec(instr.get());
1606 if (instr->operands[1].isTemp() && ctx.info[instr->operands[1].tempId()].is_split()) {
1607 Instruction* split = ctx.info[instr->operands[1].tempId()].instr;
1610 ctx.info[instr->definitions[0].tempId()].set_temp(split->operands[0].getTemp());
1616 ssa_info& info = ctx.info[instr->operands[0].tempId()];
1622 ctx.info[def.tempId()].set_constant(ctx.program->gfx_level, val & mask);
1629 ctx.info[instr->definitions[1].tempId()].set_split(instr.get());
1632 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1633 ctx.info[instr->definitions[1].tempId()].set_extract(instr.get());
1639 Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
1654 ctx.info[instr->definitions[i].tempId()].set_constant(ctx.program->gfx_level,
1657 ctx.info[instr->definitions[i].tempId()].set_undefined();
1660 ctx.info[instr->definitions[i].tempId()].set_temp(vec_op.getTemp());
1666 ssa_info& info = ctx.info[instr->operands[0].tempId()];
1690 Operand::get_const(ctx.program->gfx_level, val, instr->definitions[0].bytes());
1699 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1701 ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
1711 if (instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_vec() &&
1716 Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
1725 if (op.isTemp() && ctx.info[op.tempId()].is_temp() &&
1726 ctx.info[op.tempId()].temp.type() == instr->definitions[0].regClass().type())
1727 op.setTemp(ctx.info[op.tempId()].temp);
1729 ctx.info[instr->definitions[0].tempId()].set_vec(instr.get());
1739 ctx.info[instr->definitions[0].tempId()].set_constant(
1740 ctx.program->gfx_level, instr->operands[0].constantValue64());
1742 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1743 if (ctx.info[instr->operands[0].tempId()].is_canonicalized())
1744 ctx.info[instr->definitions[0].tempId()].set_canonicalized();
1753 ctx.info[instr->definitions[0].tempId()].set_dpp16(instr.get());
1755 ctx.info[instr->definitions[0].tempId()].set_dpp8(instr.get());
1759 if (!ctx.program->needs_wqm)
1760 ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->gfx_level, 0u);
1762 case aco_opcode::v_mul_f64: ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); break;
1766 ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
1788 ctx.info[instr->definitions[0].tempId()].set_neg_abs(other);
1790 ctx.info[instr->definitions[0].tempId()].set_abs(other);
1792 ctx.info[instr->definitions[0].tempId()].set_neg(other);
1794 ctx.info[instr->definitions[0].tempId()].set_fcanonicalize(other);
1799 ctx.info[instr->operands[i].tempId()].set_omod2(instr.get());
1802 ctx.info[instr->operands[i].tempId()].set_omod4(instr.get());
1805 ctx.info[instr->operands[i].tempId()].set_omod5(instr.get());
1807 (!(fp16 ? ctx.fp_mode.preserve_signed_zero_inf_nan16_64
1808 : ctx.fp_mode.preserve_signed_zero_inf_nan32) ||
1810 ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->gfx_level, 0u);
1822 ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
1843 ctx.info[instr->operands[idx].tempId()].set_clamp(instr.get());
1848 ctx.info[instr->definitions[0].tempId()].set_vcc(instr->operands[2].getTemp());
1851 ctx.info[instr->definitions[0].tempId()].set_b2f(instr->operands[2].getTemp());
1853 ctx.info[instr->definitions[0].tempId()].set_b2i(instr->operands[2].getTemp());
1859 ctx.info[instr->operands[1].tempId()].is_vcc())
1860 ctx.info[instr->definitions[0].tempId()].set_temp(
1861 ctx.info[instr->operands[1].tempId()].temp);
1875 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1883 ctx.info[instr->definitions[0].tempId()].set_undefined();
1902 ctx.info[instr->definitions[0].tempId()].set_add_sub(instr.get());
1906 if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) {
1907 ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
1908 ctx.info[instr->definitions[1].tempId()].set_scc_invert(
1909 ctx.info[instr->operands[0].tempId()].temp);
1910 } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bitwise()) {
1911 ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
1912 ctx.info[instr->definitions[1].tempId()].set_scc_invert(
1913 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
1915 ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
1920 if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) {
1923 ctx.info[instr->definitions[1].tempId()].set_temp(
1924 ctx.info[instr->operands[0].tempId()].temp);
1925 ctx.info[instr->definitions[0].tempId()].set_uniform_bool(
1926 ctx.info[instr->operands[0].tempId()].temp);
1928 } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bitwise()) {
1931 ctx.info[instr->definitions[1].tempId()].set_temp(
1932 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
1933 ctx.info[instr->definitions[0].tempId()].set_uniform_bool(
1934 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
1936 } else if ((ctx.program->stage.num_sw_stages() > 1 ||
1937 ctx.program->stage.hw == HWStage::NGG) &&
1941 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1943 } else if (can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) {
1944 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1954 [&ctx](const Operand& op)
1956 return op.isTemp() && (ctx.info[op.tempId()].is_uniform_bool() ||
1957 ctx.info[op.tempId()].is_uniform_bitwise());
1959 ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
1961 ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
1969 ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
1987 ctx.info[instr->definitions[0].tempId()].set_minmax(instr.get());
1993 ctx.info[instr->definitions[0].tempId()].set_uniform_bool(instr->operands[2].getTemp());
1995 if (instr->operands[2].isTemp() && ctx.info[instr->operands[2].tempId()].is_scc_invert()) {
1998 instr->operands[2].setTemp(ctx.info[instr->operands[2].tempId()].temp);
2002 if (instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_scc_invert()) {
2003 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
2010 ctx.info[instr->definitions[0].tempId()].set_canonicalized();
2014 ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
2016 ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
2023 ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
2025 ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
2026 ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
2034 ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
2039 ctx.info[instr->operands[0].tempId()].set_f2f16(instr.get());
2044 ctx.info[instr->definitions[0].tempId()].set_f2f32(instr.get());
2052 if (!(ctx.info[instr->definitions[0].tempId()].label & (label_neg | label_abs)))
2053 check_sdwa_extract(ctx, instr);
2057 original_temp_id(opt_ctx& ctx, Temp tmp)
2059 if (ctx.info[tmp.id()].is_temp())
2060 return ctx.info[tmp.id()].temp.id();
2066 decrease_uses(opt_ctx& ctx, Instruction* instr)
2068 if (!--ctx.uses[instr->definitions[0].tempId()]) {
2071 ctx.uses[op.tempId()]--;
2077 follow_operand(opt_ctx& ctx, Operand op, bool ignore_uses = false)
2079 if (!op.isTemp() || !(ctx.info[op.tempId()].label & instr_usedef_labels))
2081 if (!ignore_uses && ctx.uses[op.tempId()] > 1)
2084 Instruction* instr = ctx.info[op.tempId()].instr;
2088 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2098 combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2100 if (instr->definitions[0].regClass() != ctx.program->lane_mask)
2102 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2115 op_instr[i] = follow_operand(ctx, instr->operands[i], true);
2143 if (original_temp_id(ctx, op0) != original_temp_id(ctx, op1))
2153 if (num_sgprs > (ctx.program->gfx_level >= GFX10 ? 2 : 1))
2156 ctx.uses[op[0].id()]++;
2157 ctx.uses[op[1].id()]++;
2158 decrease_uses(ctx, op_instr[0]);
2159 decrease_uses(ctx, op_instr[1]);
2184 ctx.info[instr->definitions[0].tempId()].label = 0;
2185 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr);
2195 combine_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2197 if (instr->definitions[0].regClass() != ctx.program->lane_mask)
2199 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2205 Instruction* nan_test = follow_operand(ctx, instr->operands[0], true);
2206 Instruction* cmp = follow_operand(ctx, instr->operands[1], true);
2225 unsigned prop_cmp0 = original_temp_id(ctx, cmp->operands[0].getTemp());
2226 unsigned prop_cmp1 = original_temp_id(ctx, cmp->operands[1].getTemp());
2227 unsigned prop_nan0 = original_temp_id(ctx, nan_test->operands[0].getTemp());
2228 unsigned prop_nan1 = original_temp_id(ctx, nan_test->operands[1].getTemp());
2234 ctx.uses[cmp->operands[0].tempId()]++;
2235 ctx.uses[cmp->operands[1].tempId()]++;
2236 decrease_uses(ctx, nan_test);
2237 decrease_uses(ctx, cmp);
2258 ctx.info[instr->definitions[0].tempId()].label = 0;
2259 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr);
2267 is_operand_constant(opt_ctx& ctx, Operand op, unsigned bit_size, uint64_t* value)
2273 unsigned id = original_temp_id(ctx, op.getTemp());
2274 if (!ctx.info[id].is_constant_or_literal(bit_size))
2276 *value = get_constant_op(ctx, ctx.info[id], bit_size).constantValue64();
2296 combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2298 if (instr->definitions[0].regClass() != ctx.program->lane_mask)
2300 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2305 Instruction* nan_test = follow_operand(ctx, instr->operands[0], true);
2306 Instruction* cmp = follow_operand(ctx, instr->operands[1], true);
2328 unsigned prop_nan0 = original_temp_id(ctx, nan_test->operands[0].getTemp());
2329 unsigned prop_nan1 = original_temp_id(ctx, nan_test->operands[1].getTemp());
2343 original_temp_id(ctx, cmp->operands[i].getTemp()) == prop_nan0) {
2352 if (!is_operand_constant(ctx, cmp->operands[constant_operand], bit_size, &constant_value))
2358 ctx.uses[cmp->operands[0].tempId()]++;
2360 ctx.uses[cmp->operands[1].tempId()]++;
2361 decrease_uses(ctx, nan_test);
2362 decrease_uses(ctx, cmp);
2383 ctx.info[instr->definitions[0].tempId()].label = 0;
2384 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr);
2393 combine_inverse_comparison(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2397 if (ctx.uses[instr->definitions[1].tempId()])
2400 Instruction* cmp = follow_operand(ctx, instr->operands[1]);
2409 ctx.uses[cmp->operands[0].tempId()]++;
2411 ctx.uses[cmp->operands[1].tempId()]++;
2412 decrease_uses(ctx, cmp);
2462 ctx.info[instr->definitions[0].tempId()].label = 0;
2463 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr);
2473 match_op3_for_vop3(opt_ctx& ctx, aco_opcode op1, aco_opcode op2, Instruction* op1_instr, bool swap,
2482 Instruction* op2_instr = follow_operand(ctx, op1_instr->operands[swap]);
2541 if (!check_vop3_operands(ctx, 3, operands))
2548 create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr,
2562 ctx.info[instr->definitions[0].tempId()].label = 0;
2568 combine_three_valu_op(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode op2, aco_opcode new_op,
2578 if (match_op3_for_vop3(ctx, instr->opcode, op2, instr.get(), swap, shuffle, operands, neg,
2580 ctx.uses[instr->operands[swap].tempId()]--;
2581 create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
2590 combine_add_or_then_and_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2595 if (is_or && combine_three_valu_op(ctx, instr, aco_opcode::s_and_b32, aco_opcode::v_and_or_b32,
2598 if (is_or && combine_three_valu_op(ctx, instr, aco_opcode::v_and_b32, aco_opcode::v_and_or_b32,
2601 if (combine_three_valu_op(ctx, instr, aco_opcode::s_lshl_b32, new_op_lshl, "120", 1 | 2))
2603 if (combine_three_valu_op(ctx, instr, aco_opcode::v_lshlrev_b32, new_op_lshl, "210", 1 | 2))
2615 Instruction* extins = follow_operand(ctx, instr->operands[i]);
2641 if (!check_vop3_operands(ctx, 3, operands))
2650 ctx.uses[instr->operands[i].tempId()]--;
2651 create_vop3_for_op3(ctx, op, instr, operands, neg, abs, opsel, clamp, omod);
2659 combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposite, aco_opcode minmax3)
2662 if (combine_three_valu_op(ctx, instr, instr->opcode, minmax3, "012", 1 | 2))
2672 if (match_op3_for_vop3(ctx, instr->opcode, opposite, instr.get(), swap, "012", operands, neg,
2675 ctx.uses[instr->operands[swap].tempId()]--;
2678 create_vop3_for_op3(ctx, minmax3, instr, operands, neg, abs, opsel, clamp, omod);
2692 combine_salu_not_bitwise(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2697 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2700 Instruction* op2_instr = follow_operand(ctx, instr->operands[0]);
2716 ctx.uses[instr->operands[0].tempId()]--;
2717 ctx.info[op2_instr->definitions[0].tempId()].label = 0;
2737 combine_salu_n2(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2739 if (instr->definitions[0].isTemp() && ctx.info[instr->definitions[0].tempId()].is_uniform_bool())
2743 Instruction* op2_instr = follow_operand(ctx, instr->operands[i]);
2747 if (ctx.uses[op2_instr->definitions[1].tempId()] || fixed_to_exec(op2_instr->operands[0]))
2754 ctx.uses[instr->operands[i].tempId()]--;
2757 ctx.info[instr->definitions[0].tempId()].label = 0;
2774 combine_salu_lshl_add(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2776 if (instr->opcode == aco_opcode::s_add_i32 && ctx.uses[instr->definitions[1].tempId()])
2780 Instruction* op2_instr = follow_operand(ctx, instr->operands[i], true);
2782 ctx.uses[op2_instr->definitions[1].tempId()])
2795 ctx.uses[instr->operands[i].tempId()]--;
2798 ctx.info[instr->definitions[0].tempId()].label = 0;
2810 combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode new_op, uint8_t ops)
2818 if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2i() &&
2819 ctx.uses[instr->operands[i].tempId()] == 1) {
2825 } else if (ctx.program->gfx_level >= GFX10 ||
2832 ctx.uses[instr->operands[i].tempId()]--;
2838 Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
2842 ctx.uses.push_back(0);
2846 new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
2848 ctx.info[instr->definitions[0].tempId()].set_add_sub(instr.get());
2857 combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2863 Instruction* op_instr = follow_operand(ctx, instr->operands[i]);
2870 ctx.uses[instr->operands[i].tempId()]--;
2875 ctx.info[instr->definitions[0].tempId()].label = 0;
2928 combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode min, aco_opcode max,
2946 if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap, "012", operands, neg,
2963 ctx.info[operands[i].tempId()].is_constant_or_literal(32)) {
2964 val = ctx.info[operands[i].tempId()].val >> (hi16 ? 16 : 0);
3038 ctx.uses[instr->operands[swap].tempId()]--;
3039 create_vop3_for_op3(ctx, med, instr, operands, neg, abs, opsel, clamp, omod);
3049 apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3068 ssa_info& info = ctx.info[instr->operands[i].tempId()];
3069 if (is_copy_label(ctx, instr, info) && info.temp.type() == RegType::sgpr)
3075 if (ctx.program->gfx_level >= GFX10 && !is_shift64)
3090 uint16_t uses = ctx.uses[instr->operands[i].tempId()];
3091 if (sgpr_info_id == 0 || uses < ctx.uses[sgpr_info_id]) {
3098 ssa_info& info = ctx.info[sgpr_info_id];
3103 if (!info.is_extract() && num_sgprs && ctx.uses[sgpr_info_id] > 1 && !instr->isVOP3() &&
3118 if (info.is_extract() && can_apply_extract(ctx, instr, sgpr_idx, info))
3119 apply_extract(ctx, instr, sgpr_idx, info);
3129 } else if (can_use_VOP3(ctx, instr) && !info.is_extract()) {
3130 to_VOP3(ctx, instr);
3138 ctx.uses[sgpr_info_id]--;
3139 ctx.uses[sgpr.id()]++;
3142 if ((ctx.info[sgpr.id()].label & (label_extract | label_temp)) &&
3143 ctx.info[sgpr.id()].temp.type() == RegType::sgpr)
3150 apply_omod_clamp_helper(opt_ctx& ctx, T* instr, ssa_info& def_info)
3169 apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3171 if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1 ||
3175 bool can_vop3 = can_use_VOP3(ctx, instr);
3182 bool can_use_omod = (can_vop3 || ctx.program->gfx_level >= GFX9) && !instr->isVOP3P();
3185 can_use_omod && ctx.fp_mode.denorm32 == 0 && !ctx.fp_mode.preserve_signed_zero_inf_nan32;
3187 can_use_omod = can_use_omod && ctx.fp_mode.denorm16_64 == 0 &&
3188 !ctx.fp_mode.preserve_signed_zero_inf_nan16_64;
3190 ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
3197 if (!ctx.uses[def_info.instr->definitions[0].tempId()])
3204 assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
3207 if (!apply_omod_clamp_helper(ctx, &instr->sdwa(), def_info))
3213 to_VOP3(ctx, instr);
3214 if (!apply_omod_clamp_helper(ctx, &instr->vop3(), def_info))
3219 ctx.info[instr->definitions[0].tempId()].label &= label_clamp | label_insert | label_f2f16;
3220 ctx.uses[def_info.instr->definitions[0].tempId()]--;
3229 apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3231 if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1)
3234 ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
3239 if (!ctx.uses[def_info.instr->definitions[0].tempId()])
3243 assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
3248 if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
3251 to_SDWA(ctx, instr);
3257 ctx.info[instr->definitions[0].tempId()].label = 0;
3258 ctx.uses[def_info.instr->definitions[0].tempId()]--;
3267 apply_ds_extract(opt_ctx& ctx, aco_ptr<Instruction>& extract)
3270 if (!ctx.info[extract->operands[0].tempId()].is_usedef() ||
3271 ctx.uses[extract->operands[0].tempId()] > 1)
3275 Instruction* ds = ctx.info[extract->operands[0].tempId()].instr;
3309 ctx.uses[extract->definitions[0].tempId()] = 0;
3310 ctx.info[ds->definitions[0].tempId()].label = 0;
3316 combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3322 Instruction* op_instr = follow_operand(ctx, instr->operands[i], true);
3332 } else if (ctx.program->gfx_level >= GFX10 ||
3340 ctx.uses[instr->operands[i].tempId()]--;
3341 if (ctx.uses[instr->operands[i].tempId()])
3342 ctx.uses[op_instr->operands[2].tempId()]++;
3349 ctx.info[instr->definitions[0].tempId()].label = 0;
3363 combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub)
3379 Instruction* op_instr = follow_operand(ctx, instr->operands[i]);
3403 if (!check_vop3_operands(ctx, 3, ops))
3406 ctx.uses[instr->operands[i].tempId()]--;
3415 ctx.info[instr->definitions[0].tempId()].label = 0;
3447 combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3453 vop3p->clamp && instr->operands[0].isTemp() && ctx.uses[instr->operands[0].tempId()] == 1 &&
3456 ssa_info& info = ctx.info[instr->operands[0].tempId()];
3458 VOP3P_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->vop3p();
3462 ctx.info[candidate->definitions[0].tempId()].instr = candidate;
3463 ctx.uses[instr->definitions[0].tempId()]--;
3475 ssa_info& info = ctx.info[op.tempId()];
3488 if (!check_vop3_operands(ctx, instr->operands.size(), ops))
3508 if (--ctx.uses[fneg->definitions[0].tempId()])
3509 ctx.uses[fneg->operands[0].tempId()]++;
3526 if (!instr->operands[i].isTemp() || !ctx.info[instr->operands[i].tempId()].is_vop3p())
3528 ssa_info& info = ctx.info[instr->operands[i].tempId()];
3539 if (ctx.uses[instr->operands[i].tempId()] >= uses || !check_vop3_operands(ctx, 3, op))
3550 uses = ctx.uses[instr->operands[i].tempId()];
3558 ctx.uses[mul_instr->definitions[0].tempId()]--;
3559 if (ctx.uses[mul_instr->definitions[0].tempId()]) {
3561 ctx.uses[op[0].tempId()]++;
3563 ctx.uses[op[1].tempId()]++;
3590 ctx.info[instr->definitions[0].tempId()].set_vop3p(instr.get());
3596 can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3598 if (ctx.program->gfx_level < GFX9)
3602 if (ctx.program->gfx_level == GFX9 && ctx.fp_mode.denorm16_64)
3616 if (instr->opcode == aco_opcode::v_fma_f32 && !ctx.program->dev.fused_mad_mix &&
3627 to_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3660 ctx.info[instr->definitions[0].tempId()].label &= label_f2f16 | label_clamp | label_mul;
3661 if (ctx.info[instr->definitions[0].tempId()].label & label_mul)
3662 ctx.info[instr->definitions[0].tempId()].instr = instr.get();
3666 combine_output_conversion(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3668 ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
3673 if (!can_use_mad_mix(ctx, instr) || ctx.uses[instr->definitions[0].tempId()] != 1)
3676 if (!ctx.uses[conv->definitions[0].tempId()])
3683 to_mad_mix(ctx, instr);
3689 ctx.info[instr->definitions[0].tempId()].label &= label_clamp;
3690 ctx.uses[conv->definitions[0].tempId()]--;
3696 combine_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3698 if (!can_use_mad_mix(ctx, instr))
3705 if (!ctx.info[tmp.id()].is_f2f32())
3708 Instruction* conv = ctx.info[tmp.id()].instr;
3727 if (!check_vop3_operands(ctx, instr->operands.size(), op))
3733 to_mad_mix(ctx, instr);
3737 if (--ctx.uses[tmp.id()])
3738 ctx.uses[conv->operands[0].tempId()]++;
3759 is_pow_of_two(opt_ctx& ctx, Operand op)
3761 if (op.isTemp() && ctx.info[op.tempId()].is_constant_or_literal(op.bytes() * 8))
3762 return is_pow_of_two(ctx, get_constant_op(ctx, ctx.info[op.tempId()], op.bytes() * 8));
3785 combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3787 if (instr->definitions.empty() || is_dead(ctx.uses, instr.get()))
3797 ssa_info& info = ctx.info[op.tempId()];
3802 if (ctx.uses[op.tempId()] > 4) {
3809 can_apply_extract(ctx, instr, i, info)) {
3811 apply_extract(ctx, instr, i, info);
3812 if (--ctx.uses[instr->operands[i].tempId()])
3813 ctx.uses[info.instr->operands[0].tempId()]++;
3818 if (can_apply_sgprs(ctx, instr))
3819 apply_sgprs(ctx, instr);
3820 combine_mad_mix(ctx, instr);
3821 while (apply_omod_clamp(ctx, instr) | combine_output_conversion(ctx, instr))
3823 apply_insert(ctx, instr);
3828 return combine_vop3p(ctx, instr);
3834 ssa_info& info = ctx.info[instr->operands[0].tempId()];
3835 if (info.is_extract() && can_apply_extract(ctx, instr, 0, info)) {
3836 apply_extract(ctx, instr, 0, info);
3837 if (--ctx.uses[instr->operands[0].tempId()])
3838 ctx.uses[info.instr->operands[0].tempId()]++;
3842 apply_ds_extract(ctx, instr);
3856 if ((ctx.info[instr->definitions[0].tempId()].label & (label_neg | label_abs)) &&
3857 ctx.uses[instr->operands[1].tempId()] == 1) {
3858 Temp val = ctx.info[instr->definitions[0].tempId()].temp;
3860 if (!ctx.info[val.id()].is_mul())
3863 Instruction* mul_instr = ctx.info[val.id()].instr;
3872 ctx.fp_mode.preserve_signed_zero_inf_nan32)
3878 ctx.uses[mul_instr->definitions[0].tempId()]--;
3880 bool is_neg = ctx.info[instr->definitions[0].tempId()].is_neg();
3881 bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs();
3903 ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
3927 if (!instr->operands[i].isTemp() || !ctx.info[instr->operands[i].tempId()].is_mul())
3929 ssa_info& info = ctx.info[instr->operands[i].tempId()];
3952 bool is_fma_precise = is_pow_of_two(ctx, info.instr->operands[0]) ||
3953 is_pow_of_two(ctx, info.instr->operands[1]);
3955 bool has_fma = mad16 || mad64 || (legacy && ctx.program->gfx_level >= GFX10_3) ||
3956 (mad32 && !legacy && !mad_mix && ctx.program->dev.has_fast_fma32) ||
3957 (mad_mix && ctx.program->dev.fused_mad_mix);
3958 bool has_mad = mad_mix ? !ctx.program->dev.fused_mad_mix
3959 : ((mad32 && ctx.program->gfx_level < GFX10_3) ||
3960 (mad16 && ctx.program->gfx_level <= GFX9));
3966 has_mad && (mad_mix || mad32 ? ctx.fp_mode.denorm32 : ctx.fp_mode.denorm16_64) == 0;
3975 if (info.instr->isSDWA() || info.instr->isDPP() || !check_vop3_operands(ctx, 3, op) ||
3976 ctx.uses[instr->operands[i].tempId()] > uses)
3979 if (ctx.uses[instr->operands[i].tempId()] == uses) {
3988 uses = ctx.uses[instr->operands[i].tempId()];
3996 ctx.uses[mul_instr->definitions[0].tempId()]--;
3997 if (ctx.uses[mul_instr->definitions[0].tempId()]) {
3999 ctx.uses[op[0].tempId()]++;
4001 ctx.uses[op[1].tempId()]++;
4087 assert(emit_fma == (ctx.program->gfx_level >= GFX10_3));
4090 mad_op = emit_fma ? (ctx.program->gfx_level == GFX8 ? aco_opcode::v_fma_legacy_f16
4092 : (ctx.program->gfx_level == GFX8 ? aco_opcode::v_mad_legacy_f16
4113 ctx.mad_infos.emplace_back(std::move(add_instr), mul_instr->definitions[0].tempId());
4114 ctx.info[instr->definitions[0].tempId()].set_mad(instr.get(), ctx.mad_infos.size() - 1);
4120 !ctx.fp_mode.preserve_signed_zero_inf_nan32) ||
4122 !instr->usesModifiers() && !ctx.fp_mode.must_flush_denorms32) {
4124 if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2f() &&
4125 ctx.uses[instr->operands[i].tempId()] == 1 && instr->operands[!i].isTemp() &&
4127 ctx.uses[instr->operands[i].tempId()]--;
4128 ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++;
4134 new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
4137 ctx.info[instr->definitions[0].tempId()].label = 0;
4141 } else if (instr->opcode == aco_opcode::v_or_b32 && ctx.program->gfx_level >= GFX9) {
4142 if (combine_three_valu_op(ctx, instr, aco_opcode::s_or_b32, aco_opcode::v_or3_b32, "012",
4144 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_or_b32, aco_opcode::v_or3_b32,
4146 } else if (combine_add_or_then_and_lshl(ctx, instr)) {
4148 } else if (instr->opcode == aco_opcode::v_xor_b32 && ctx.program->gfx_level >= GFX10) {
4149 if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xor3_b32, "012",
4151 } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32,
4156 ctx, instr, aco_opcode::v_mul_lo_u16,
4157 ctx.program->gfx_level == GFX8 ? aco_opcode::v_mad_legacy_u16 : aco_opcode::v_mad_u16,
4160 combine_three_valu_op(ctx, instr, aco_opcode::v_mul_lo_u16_e64, aco_opcode::v_mad_u16, "120",
4163 if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) {
4164 } else if (combine_add_bcnt(ctx, instr)) {
4165 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24,
4167 } else if (ctx.program->gfx_level >= GFX9 && !instr->usesModifiers()) {
4168 if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120",
4170 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32,
4172 } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32,
4174 } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_u32, aco_opcode::v_add3_u32,
4176 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add3_u32,
4178 } else if (combine_add_or_then_and_lshl(ctx, instr)) {
4183 bool carry_out = ctx.uses[instr->definitions[1].tempId()] > 0;
4184 if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) {
4185 } else if (!carry_out && combine_add_bcnt(ctx, instr)) {
4186 } else if (!carry_out && combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24,
4188 } else if (!carry_out && combine_add_lshl(ctx, instr, false)) {
4193 instr->opcode != aco_opcode::v_sub_u32 && ctx.uses[instr->definitions[1].tempId()] > 0;
4194 if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 2)) {
4195 } else if (!carry_out && combine_add_lshl(ctx, instr, true)) {
4200 combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 1);
4201 } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) {
4202 combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add_lshl_u32, "120",
4205 ctx.program->gfx_level >= GFX9) {
4206 combine_salu_lshl_add(ctx, instr);
4208 combine_salu_not_bitwise(ctx, instr);
4211 if (combine_ordering_test(ctx, instr)) {
4212 } else if (combine_comparison_ordering(ctx, instr)) {
4213 } else if (combine_constant_comparison_ordering(ctx, instr)) {
4214 } else if (combine_salu_n2(ctx, instr)) {
4217 combine_and_subbrev(ctx, instr);
4220 * since ctx.uses[mad_info::mul_temp_id] is always 0, we don't have to worry about
4223 ctx.mad_infos.emplace_back(nullptr, 0);
4224 ctx.info[instr->definitions[0].tempId()].set_mad(instr.get(), ctx.mad_infos.size() - 1);
4229 (!some_gfx9_only || ctx.program->gfx_level >= GFX9)) {
4230 if (combine_minmax(ctx, instr, instr->opcode == min ? max : min,
4233 combine_clamp(ctx, instr, min, max, med3);
4240 combine_inverse_comparison(ctx, instr);
4244 to_uniform_bool_instr(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4250 if (!ctx.info[op.tempId()].is_uniform_bool() && !ctx.info[op.tempId()].is_uniform_bitwise())
4267 ctx.uses[op.tempId()]--;
4269 if (ctx.info[op.tempId()].is_uniform_bool()) {
4271 op.setTemp(ctx.info[op.tempId()].temp);
4272 } else if (ctx.info[op.tempId()].is_uniform_bitwise()) {
4278 Instruction* pred_instr = ctx.info[op.tempId()].instr;
4287 ctx.uses[op.tempId()]++;
4297 select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4301 if (is_dead(ctx.uses, instr.get())) {
4313 if (ctx.uses[instr->definitions[i].tempId()]) {
4320 if (num_used == 1 && ctx.info[instr->operands[0].tempId()].is_vec() &&
4321 ctx.uses[instr->operands[0].tempId()] == 1) {
4322 Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
4334 ctx.uses[instr->operands[0].tempId()]--;
4337 ctx.uses[vec_op.tempId()]--;
4340 ctx.uses[op.tempId()]++;
4366 if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) {
4367 mad_info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].instr->pass_flags];
4369 if (ctx.uses[mad_info->mul_temp_id] && mad_info->add_instr) {
4370 ctx.uses[mad_info->mul_temp_id]++;
4372 ctx.uses[instr->operands[0].tempId()]--;
4374 ctx.uses[instr->operands[1].tempId()]--;
4385 ctx.program->gfx_level < GFX10)
4398 ctx.info[instr->operands[2].tempId()].is_literal(get_operand_size(instr, 2))) {
4410 if ((!has_sgpr || ctx.program->gfx_level >= GFX10) && has_vgpr) {
4412 literal_uses = ctx.uses[instr->operands[2].tempId()];
4424 if (ctx.program->gfx_level < GFX10 && instr->operands[!i].isTemp() &&
4428 if (ctx.info[instr->operands[i].tempId()].is_literal(get_operand_size(instr, i)) &&
4429 ctx.uses[instr->operands[i].tempId()] < literal_uses) {
4431 literal_uses = ctx.uses[instr->operands[i].tempId()];
4444 ctx.uses[instr->operands[literal_idx].tempId()]--;
4456 ctx.info[instr->operands[0].tempId()].set_scc_needed();
4461 ctx.info[instr->operands[2].tempId()].set_scc_needed();
4463 ctx.info[instr->definitions[0].tempId()].is_scc_needed()) {
4465 ctx.info[instr->operands[0].tempId()].set_scc_needed();
4476 if (instr->definitions.size() && ctx.uses[instr->definitions[0].tempId()] == 0 &&
4477 ctx.info[instr->definitions[0].tempId()].is_uniform_bitwise()) {
4478 bool transform_done = to_uniform_bool_instr(ctx, instr);
4480 if (transform_done && !ctx.info[instr->definitions[1].tempId()].is_scc_needed()) {
4497 ssa_info info = ctx.info[instr->operands[i].tempId()];
4526 if (--ctx.uses[info.instr->definitions[0].tempId()])
4527 ctx.uses[info.instr->operands[0].tempId()]++;
4534 if (instr->isSDWA() || (instr->isVOP3() && ctx.program->gfx_level < GFX10) ||
4535 (instr->isVOP3P() && ctx.program->gfx_level < GFX10))
4546 (ctx.program->gfx_level >= GFX10 && (can_use_VOP3(ctx, instr) || instr->isVOP3P())))
4568 } else if (!op.isTemp() || !ctx.info[op.tempId()].is_literal(bits)) {
4575 if (ctx.uses[op.tempId()] < literal_uses) {
4578 literal = Operand::c32(ctx.info[op.tempId()].val);
4579 literal_uses = ctx.uses[op.tempId()];
4591 if (ctx.program->gfx_level >= GFX10 && !is_shift64)
4606 ctx.uses[instr->operands[i].tempId()]--;
4719 apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4726 if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) {
4727 mad_info* info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].instr->pass_flags];
4729 (ctx.uses[instr->operands[info->literal_idx].tempId()] == 0 || info->literal_idx == 2)) {
4754 Operand::c32(ctx.info[instr->operands[info->literal_idx].tempId()].val);
4756 ctx.instructions.emplace_back(std::move(new_mad));
4766 if (op.isTemp() && ctx.info[op.tempId()].is_literal(bits) && ctx.uses[op.tempId()] == 0) {
4767 Operand literal = Operand::literal32(ctx.info[op.tempId()].val);
4770 to_VOP3(ctx, instr);
4780 if (instr->opcode == aco_opcode::s_add_u32 && ctx.uses[instr->definitions[1].tempId()] == 0 &&
4784 ctx.instructions.emplace_back(std::move(instr));
4790 opt_ctx ctx;
4791 ctx.program = program;
4793 ctx.info = info.data();
4797 ctx.fp_mode = block.fp_mode;
4799 label_instruction(ctx, instr);
4802 ctx.uses = dead_code_analysis(program);
4806 ctx.fp_mode = block.fp_mode;
4808 combine_instruction(ctx, instr);
4815 ctx.fp_mode = block->fp_mode;
4818 select_instruction(ctx, *instr_rit);
4823 ctx.instructions.clear();
4824 ctx.fp_mode = block.fp_mode;
4826 apply_literals(ctx, instr);
4827 block.instructions.swap(ctx.instructions);