Lines Matching refs:instr
39 perfwarn(Program* program, bool cond, const char* msg, Instruction* instr)
49 aco_print_instr(instr, memf);
81 mad_info(aco_ptr<Instruction> instr, uint32_t id)
82 : add_instr(std::move(instr)), mul_temp_id(id), literal_idx(0), check_literal(false)
152 Instruction* instr;
159 /* Since all the instr_usedef_labels use instr for the same thing
161 * clear any other instr labels. */
163 label &= ~(instr_mod_labels | temp_labels | val_labels); /* instr, temp and val alias */
167 label &= ~(temp_labels | val_labels); /* instr, temp and val alias */
172 label &= ~(instr_labels | val_labels); /* instr, temp and val alias */
179 label &= ~(instr_labels | temp_labels); /* instr, temp and val alias */
182 label &= ~(instr_labels | temp_labels); /* instr, temp and val alias */
191 instr = vec;
277 instr = mul;
294 instr = mad;
302 instr = mul;
310 instr = mul;
318 instr = mul;
326 instr = med3;
334 instr = conv;
362 instr = add_sub_instr;
370 instr = bitwise_instr;
382 instr = minmax_instr;
390 instr = vopc_instr;
426 instr = label_instr;
434 instr = vop3p_instr;
454 instr = cvt;
462 instr = extract;
470 instr = insert;
478 instr = mov;
484 instr = mov;
494 instr = split;
511 can_use_VOP3(opt_ctx& ctx, const aco_ptr<Instruction>& instr)
513 if (instr->isVOP3())
516 if (instr->isVOP3P())
519 if (instr->operands.size() && instr->operands[0].isLiteral() && ctx.program->gfx_level < GFX10)
522 if (instr->isDPP() || instr->isSDWA())
525 return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 &&
526 instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 &&
527 instr->opcode != aco_opcode::v_fmamk_f32 && instr->opcode != aco_opcode::v_fmaak_f32 &&
528 instr->opcode != aco_opcode::v_fmamk_f16 && instr->opcode != aco_opcode::v_fmaak_f16 &&
529 instr->opcode != aco_opcode::v_readlane_b32 &&
530 instr->opcode != aco_opcode::v_writelane_b32 &&
531 instr->opcode != aco_opcode::v_readfirstlane_b32;
535 pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsigned index)
537 if (instr->definitions.empty())
541 instr->opcode == aco_opcode::p_as_uniform ||
542 std::all_of(instr->definitions.begin(), instr->definitions.end(),
551 std::none_of(instr->definitions.begin(), instr->definitions.end(),
554 switch (instr->opcode) {
559 if (temp.bytes() != instr->operands[index].bytes())
571 if (temp.bytes() > instr->operands[index].bytes())
578 int decrease = instr->operands[index].bytes() - temp.bytes();
580 decrease -= instr->definitions.back().bytes();
581 instr->definitions.pop_back();
587 if (temp.regClass() == instr->definitions[0].regClass())
588 instr->opcode = aco_opcode::p_parallelcopy;
593 instr->operands[index].setTemp(temp);
599 can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
601 if (instr->isSDWA() && ctx.program->gfx_level < GFX9)
603 return instr->opcode != aco_opcode::v_readfirstlane_b32 &&
604 instr->opcode != aco_opcode::v_readlane_b32 &&
605 instr->opcode != aco_opcode::v_readlane_b32_e64 &&
606 instr->opcode != aco_opcode::v_writelane_b32 &&
607 instr->opcode != aco_opcode::v_writelane_b32_e64 &&
608 instr->opcode != aco_opcode::v_permlane16_b32 &&
609 instr->opcode != aco_opcode::v_permlanex16_b32;
613 to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
615 if (instr->isVOP3())
618 aco_ptr<Instruction> tmp = std::move(instr);
620 instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(),
622 std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
623 for (unsigned i = 0; i < instr->definitions.size(); i++) {
624 instr->definitions[i] = tmp->definitions[i];
625 if (instr->definitions[i].isTemp()) {
626 ssa_info& info = ctx.info[instr->definitions[i].tempId()];
627 if (info.label & instr_usedef_labels && info.instr == tmp.get())
628 info.instr = instr.get();
634 instr->pass_flags = tmp->pass_flags;
644 to_SDWA(opt_ctx& ctx, aco_ptr<Instruction>& instr)
646 aco_ptr<Instruction> tmp = convert_to_SDWA(ctx.program->gfx_level, instr);
650 for (unsigned i = 0; i < instr->definitions.size(); i++) {
651 ssa_info& info = ctx.info[instr->definitions[i].tempId()];
652 if (info.label & instr_labels && info.instr == tmp.get())
653 info.instr = instr.get();
682 valu_can_accept_vgpr(aco_ptr<Instruction>& instr, unsigned operand)
684 if (instr->opcode == aco_opcode::v_readlane_b32 ||
685 instr->opcode == aco_opcode::v_readlane_b32_e64 ||
686 instr->opcode == aco_opcode::v_writelane_b32 ||
687 instr->opcode == aco_opcode::v_writelane_b32_e64)
689 if (instr->opcode == aco_opcode::v_permlane16_b32 ||
690 instr->opcode == aco_opcode::v_permlanex16_b32)
745 parse_base_offset(opt_ctx& ctx, Instruction* instr, unsigned op_index, Temp* base, uint32_t* offset,
748 Operand op = instr->operands[op_index];
756 Instruction* add_instr = ctx.info[tmp.id()].instr;
827 Instruction* bitwise_instr = ctx.info[op.tempId()].instr;
840 smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
843 if (!instr->operands.empty())
844 skip_smem_offset_align(ctx, &instr->smem());
847 if (!instr->operands.empty() && instr->operands[1].isTemp()) {
848 SMEM_instruction& smem = instr->smem();
849 ssa_info info = ctx.info[instr->operands[1].tempId()];
858 instr->operands[1] = Operand::c32(info.val);
859 } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, prevent_overflow) &&
884 instr.reset(new_instr);
890 if (!instr->operands.empty())
891 skip_smem_offset_align(ctx, &instr->smem());
895 get_operand_size(aco_ptr<Instruction>& instr, unsigned index)
897 if (instr->isPseudo())
898 return instr->operands[index].bytes() * 8u;
899 else if (instr->opcode == aco_opcode::v_mad_u64_u32 ||
900 instr->opcode == aco_opcode::v_mad_i64_i32)
902 else if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
903 instr->opcode == aco_opcode::v_fma_mixlo_f16)
904 return instr->vop3p().opsel_hi & (1u << index) ? 16 : 32;
905 else if (instr->isVALU() || instr->isSALU())
906 return instr_info.operand_size[(int)instr->opcode];
920 propagate_constants_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& info, unsigned i)
925 assert(instr->operands[i].isTemp());
926 unsigned bits = get_operand_size(instr, i);
928 instr->operands[i] = get_constant_op(ctx, info, bits);
934 (instr->opcode == aco_opcode::v_dot4_i32_i8 || instr->opcode == aco_opcode::v_dot2_i32_i16 ||
935 instr->opcode == aco_opcode::v_dot4_u32_u8 || instr->opcode == aco_opcode::v_dot2_u32_u16) &&
941 VOP3P_instruction* vop3p = &instr->vop3p();
971 instr->operands[i] = opsel_lo ? const_hi : const_lo;
978 instr->operands[i] = const_lo;
985 instr->operands[i] = const_hi;
992 instr->operands[i] = const_lo;
1001 if (!instr_info.can_use_input_modifiers[(int)instr->opcode])
1004 instr->operands[i] = Operand::c16(const_lo.constantValue() & 0x7FFF);
1025 parse_extract(Instruction* instr)
1027 if (instr->opcode == aco_opcode::p_extract) {
1028 unsigned size = instr->operands[2].constantValue() / 8;
1029 unsigned offset = instr->operands[1].constantValue() * size;
1030 bool sext = instr->operands[3].constantEquals(1);
1032 } else if (instr->opcode == aco_opcode::p_insert && instr->operands[1].constantEquals(0)) {
1033 return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
1034 } else if (instr->opcode == aco_opcode::p_extract_vector) {
1035 unsigned size = instr->definitions[0].bytes();
1036 unsigned offset = instr->operands[1].constantValue() * size;
1039 } else if (instr->opcode == aco_opcode::p_split_vector) {
1040 assert(instr->operands[0].bytes() == 4 && instr->definitions[1].bytes() == 2);
1048 parse_insert(Instruction* instr)
1050 if (instr->opcode == aco_opcode::p_extract && instr->operands[3].constantEquals(0) &&
1051 instr->operands[1].constantEquals(0)) {
1052 return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
1053 } else if (instr->opcode == aco_opcode::p_insert) {
1054 unsigned size = instr->operands[2].constantValue() / 8;
1055 unsigned offset = instr->operands[1].constantValue() * size;
1063 can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
1068 Temp tmp = info.instr->operands[0].getTemp();
1069 SubdwordSel sel = parse_extract(info.instr);
1075 } else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) {
1077 } else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
1079 if (instr->isSDWA() && instr->sdwa().sel[idx] != SubdwordSel::dword)
1082 } else if (instr->isVOP3() && sel.size() == 2 &&
1083 can_use_opsel(ctx.program->gfx_level, instr->opcode, idx) &&
1084 !(instr->vop3().opsel & (1 << idx))) {
1086 } else if (instr->opcode == aco_opcode::p_extract) {
1087 SubdwordSel instrSel = parse_extract(instr.get());
1103 /* Combine an p_extract (or p_insert, in some cases) instruction with instr.
1104 * instr(p_extract(...)) -> instr()
1107 apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
1109 Temp tmp = info.instr->operands[0].getTemp();
1110 SubdwordSel sel = parse_extract(info.instr);
1113 instr->operands[idx].set16bit(false);
1114 instr->operands[idx].set24bit(false);
1120 } else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) {
1122 case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break;
1123 case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break;
1124 case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break;
1125 case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break;
1127 } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && instr->operands[0].isConstant() &&
1129 ((sel.size() == 2 && instr->operands[0].constantValue() >= 16u) ||
1130 (sel.size() == 1 && instr->operands[0].constantValue() >= 24u))) {
1133 } else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
1135 to_SDWA(ctx, instr);
1136 static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel;
1137 } else if (instr->isVOP3()) {
1139 instr->vop3().opsel |= 1 << idx;
1140 } else if (instr->opcode == aco_opcode::p_extract) {
1141 SubdwordSel instrSel = parse_extract(instr.get());
1148 instr->operands[1] = Operand::c32(offset / size);
1149 instr->operands[2] = Operand::c32(size * 8u);
1150 instr->operands[3] = Operand::c32(sign_extend);
1157 for (Definition& def : instr->definitions)
1162 check_sdwa_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr)
1164 for (unsigned i = 0; i < instr->operands.size(); i++) {
1165 Operand op = instr->operands[i];
1169 if (info.is_extract() && (info.instr->operands[0].getTemp().type() == RegType::vgpr ||
1171 if (!can_apply_extract(ctx, instr, i, info))
1196 can_eliminate_fcanonicalize(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp tmp)
1203 aco_opcode op = instr->opcode;
1211 Instruction* vopc_instr = ctx.info[tmp.id()].instr;
1217 Instruction* instr = ctx.info[tmp.id()].instr;
1218 if (instr->operands.size() != 2 || instr->pass_flags != pass_flags)
1220 if (!(instr->operands[0].isTemp() && instr->operands[1].isTemp()))
1222 return can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), pass_flags) &&
1223 can_eliminate_and_exec(ctx, instr->operands[1].getTemp(), pass_flags);
1229 is_copy_label(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& info)
1232 (info.is_fcanonicalize() && can_eliminate_fcanonicalize(ctx, instr, info.temp));
1254 is_scratch_offset_valid(opt_ctx& ctx, Instruction* instr, int32_t offset)
1260 bool has_vgpr_offset = instr && !instr->operands[0].isUndefined();
1268 label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
1270 if (instr->isSALU() || instr->isVALU() || instr->isPseudo()) {
1272 for (Operand& op : instr->operands)
1275 perfwarn(ctx.program, all_const, "All instruction operands are constant", instr.get());
1277 ASSERTED bool is_copy = instr->opcode == aco_opcode::s_mov_b32 ||
1278 instr->opcode == aco_opcode::s_mov_b64 ||
1279 instr->opcode == aco_opcode::v_mov_b32;
1280 perfwarn(ctx.program, is_copy && !instr->usesModifiers(), "Use p_parallelcopy instead",
1281 instr.get());
1284 if (instr->isSMEM())
1285 smem_combine(ctx, instr);
1287 for (unsigned i = 0; i < instr->operands.size(); i++) {
1288 if (!instr->operands[i].isTemp())
1291 ssa_info info = ctx.info[instr->operands[i].tempId()];
1293 if (info.is_undefined() && is_phi(instr))
1294 instr->operands[i] = Operand(instr->operands[i].regClass());
1296 while (info.is_temp() && info.temp.regClass() == instr->operands[i].getTemp().regClass()) {
1297 instr->operands[i].setTemp(ctx.info[instr->operands[i].tempId()].temp);
1302 if (instr->isPseudo()) {
1304 pseudo_propagate_temp(ctx, instr, info.temp, i);
1310 if (instr->isSALU() || instr->isPseudo()) {
1311 unsigned bits = get_operand_size(instr, i);
1312 if ((info.is_constant(bits) || (info.is_literal(bits) && instr->isPseudo())) &&
1313 !instr->operands[i].isFixed() && alu_can_accept_constant(instr->opcode, i)) {
1314 instr->operands[i] = get_constant_op(ctx, info, bits);
1320 else if (instr->isVALU()) {
1321 if (is_copy_label(ctx, instr, info) && info.temp.type() == RegType::vgpr &&
1322 valu_can_accept_vgpr(instr, i)) {
1323 instr->operands[i].setTemp(info.temp);
1327 if (info.is_temp() && info.temp.type() == RegType::sgpr && can_apply_sgprs(ctx, instr) &&
1328 instr->operands.size() == 1) {
1329 instr->format = withoutDPP(instr->format);
1330 instr->operands[i].setTemp(info.temp);
1337 instr->opcode != aco_opcode::v_cndmask_b32 || instr->operands[i].getTemp().bytes() == 4;
1338 can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode];
1340 if (instr->isSDWA())
1341 can_use_mod = can_use_mod && instr->sdwa().sel[i].size() == 4;
1343 can_use_mod = can_use_mod && (instr->isDPP16() || can_use_VOP3(ctx, instr));
1345 unsigned bits = get_operand_size(instr, i);
1346 bool mod_bitsize_compat = instr->operands[i].bytes() * 8 == bits;
1348 if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32 && mod_bitsize_compat) {
1349 instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32;
1350 instr->operands[i].setTemp(info.temp);
1351 } else if (info.is_neg() && instr->opcode == aco_opcode::v_add_f16 && mod_bitsize_compat) {
1352 instr->opcode = i ? aco_opcode::v_sub_f16 : aco_opcode::v_subrev_f16;
1353 instr->operands[i].setTemp(info.temp);
1355 can_eliminate_fcanonicalize(ctx, instr, info.temp)) {
1356 if (!instr->isDPP() && !instr->isSDWA())
1357 to_VOP3(ctx, instr);
1358 instr->operands[i].setTemp(info.temp);
1359 if (instr->isDPP16() && !instr->dpp16().abs[i])
1360 instr->dpp16().neg[i] = true;
1361 else if (instr->isSDWA() && !instr->sdwa().abs[i])
1362 instr->sdwa().neg[i] = true;
1363 else if (instr->isVOP3() && !instr->vop3().abs[i])
1364 instr->vop3().neg[i] = true;
1367 can_eliminate_fcanonicalize(ctx, instr, info.temp)) {
1368 if (!instr->isDPP() && !instr->isSDWA())
1369 to_VOP3(ctx, instr);
1370 instr->operands[i] = Operand(info.temp);
1371 if (instr->isDPP16())
1372 instr->dpp16().abs[i] = true;
1373 else if (instr->isSDWA())
1374 instr->sdwa().abs[i] = true;
1376 instr->vop3().abs[i] = true;
1380 if (instr->isVOP3P()) {
1381 propagate_constants_vop3p(ctx, instr, info, i);
1385 if (info.is_constant(bits) && alu_can_accept_constant(instr->opcode, i) &&
1386 (!instr->isSDWA() || ctx.program->gfx_level >= GFX9)) {
1388 perfwarn(ctx.program, instr->opcode == aco_opcode::v_cndmask_b32 && i == 2,
1389 "v_cndmask_b32 with a constant selector", instr.get());
1390 if (i == 0 || instr->isSDWA() || instr->opcode == aco_opcode::v_readlane_b32 ||
1391 instr->opcode == aco_opcode::v_writelane_b32) {
1392 instr->format = withoutDPP(instr->format);
1393 instr->operands[i] = op;
1395 } else if (!instr->isVOP3() && can_swap_operands(instr, &instr->opcode)) {
1396 instr->operands[i] = instr->operands[0];
1397 instr->operands[0] = op;
1399 } else if (can_use_VOP3(ctx, instr)) {
1400 to_VOP3(ctx, instr);
1401 instr->operands[i] = op;
1408 else if (instr->isMUBUF()) {
1409 MUBUF_instruction& mubuf = instr->mubuf();
1426 instr->operands[1] = Operand(v1);
1431 instr->operands[2] = Operand::c32(0);
1435 parse_base_offset(ctx, instr.get(), i, &base, &offset,
1439 instr->operands[1].setTemp(base);
1442 } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) &&
1444 instr->operands[i].setTemp(base);
1451 else if (instr->isScratch()) {
1452 FLAT_instruction& scratch = instr->scratch();
1458 if (i <= 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
1459 base.regClass() == instr->operands[i].regClass() &&
1460 is_scratch_offset_valid(ctx, instr.get(), scratch.offset + (int32_t)offset)) {
1461 instr->operands[i].setTemp(base);
1468 instr->operands[i] = Operand(instr->operands[i].regClass());
1475 else if (instr->isDS()) {
1477 DS_instruction& ds = instr->ds();
1482 parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
1483 base.regClass() == instr->operands[i].regClass() &&
1484 instr->opcode != aco_opcode::ds_swizzle_b32) {
1485 if (instr->opcode == aco_opcode::ds_write2_b32 ||
1486 instr->opcode == aco_opcode::ds_read2_b32 ||
1487 instr->opcode == aco_opcode::ds_write2_b64 ||
1488 instr->opcode == aco_opcode::ds_read2_b64 ||
1489 instr->opcode == aco_opcode::ds_write2st64_b32 ||
1490 instr->opcode == aco_opcode::ds_read2st64_b32 ||
1491 instr->opcode == aco_opcode::ds_write2st64_b64 ||
1492 instr->opcode == aco_opcode::ds_read2st64_b64) {
1493 bool is64bit = instr->opcode == aco_opcode::ds_write2_b64 ||
1494 instr->opcode == aco_opcode::ds_read2_b64 ||
1495 instr->opcode == aco_opcode::ds_write2st64_b64 ||
1496 instr->opcode == aco_opcode::ds_read2st64_b64;
1497 bool st64 = instr->opcode == aco_opcode::ds_write2st64_b32 ||
1498 instr->opcode == aco_opcode::ds_read2st64_b32 ||
1499 instr->opcode == aco_opcode::ds_write2st64_b64 ||
1500 instr->opcode == aco_opcode::ds_read2st64_b64;
1506 instr->operands[i].setTemp(base);
1512 instr->operands[i].setTemp(base);
1519 else if (instr->isBranch()) {
1520 if (ctx.info[instr->operands[0].tempId()].is_scc_invert()) {
1522 instr->opcode = instr->opcode == aco_opcode::p_cbranch_z ? aco_opcode::p_cbranch_nz
1524 instr->operands[0].setTemp(ctx.info[instr->operands[0].tempId()].temp);
1530 if (instr->definitions.empty()) {
1531 check_sdwa_extract(ctx, instr);
1535 if (instr->isVALU() || instr->isVINTRP()) {
1536 if (instr_info.can_use_output_modifiers[(int)instr->opcode] || instr->isVINTRP() ||
1537 instr->opcode == aco_opcode::v_cndmask_b32) {
1539 if (!does_fp_op_flush_denorms(ctx, instr->opcode)) {
1540 unsigned ops = instr->opcode == aco_opcode::v_cndmask_b32 ? 2 : instr->operands.size();
1542 canonicalized = is_op_canonicalized(ctx, instr->operands[i]);
1545 ctx.info[instr->definitions[0].tempId()].set_canonicalized();
1548 if (instr->isVOPC()) {
1549 ctx.info[instr->definitions[0].tempId()].set_vopc(instr.get());
1550 check_sdwa_extract(ctx, instr);
1553 if (instr->isVOP3P()) {
1554 ctx.info[instr->definitions[0].tempId()].set_vop3p(instr.get());
1559 switch (instr->opcode) {
1561 bool copy_prop = instr->operands.size() == 1 && instr->operands[0].isTemp() &&
1562 instr->operands[0].regClass() == instr->definitions[0].regClass();
1564 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1571 for (const Operand& op : instr->operands) {
1576 Instruction* vec = ctx.info[op.tempId()].instr;
1585 if (ops.size() != instr->operands.size()) {
1586 assert(ops.size() > instr->operands.size());
1587 Definition def = instr->definitions[0];
1588 instr.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector,
1594 instr->operands[i] = ops[i];
1596 instr->definitions[0] = def;
1599 assert(instr->operands[i] == ops[i]);
1602 ctx.info[instr->definitions[0].tempId()].set_vec(instr.get());
1604 if (instr->operands.size() == 2) {
1606 if (instr->operands[1].isTemp() && ctx.info[instr->operands[1].tempId()].is_split()) {
1607 Instruction* split = ctx.info[instr->operands[1].tempId()].instr;
1608 if (instr->operands[0].isTemp() &&
1609 instr->operands[0].getTemp() == split->definitions[0].getTemp())
1610 ctx.info[instr->definitions[0].tempId()].set_temp(split->operands[0].getTemp());
1616 ssa_info& info = ctx.info[instr->operands[0].tempId()];
1620 for (Definition def : instr->definitions) {
1627 if (instr->definitions.size() == 2 && instr->operands[0].isTemp() &&
1628 instr->definitions[0].bytes() == instr->definitions[1].bytes()) {
1629 ctx.info[instr->definitions[1].tempId()].set_split(instr.get());
1630 if (instr->operands[0].bytes() == 4) {
1632 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1633 ctx.info[instr->definitions[1].tempId()].set_extract(instr.get());
1639 Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
1643 for (unsigned i = 0; i < instr->definitions.size();
1644 split_offset += instr->definitions[i++].bytes()) {
1649 vec->operands[vec_index].bytes() != instr->definitions[i].bytes())
1654 ctx.info[instr->definitions[i].tempId()].set_constant(ctx.program->gfx_level,
1657 ctx.info[instr->definitions[i].tempId()].set_undefined();
1660 ctx.info[instr->definitions[i].tempId()].set_temp(vec_op.getTemp());
1666 ssa_info& info = ctx.info[instr->operands[0].tempId()];
1667 const unsigned index = instr->operands[1].constantValue();
1668 const unsigned dst_offset = index * instr->definitions[0].bytes();
1672 Instruction* vec = info.instr;
1679 } else if (offset != dst_offset || op.bytes() != instr->definitions[0].bytes()) {
1682 instr->operands[0] = op;
1687 uint32_t mask = u_bit_consecutive(0, instr->definitions[0].bytes() * 8u);
1689 instr->operands[0] =
1690 Operand::get_const(ctx.program->gfx_level, val, instr->definitions[0].bytes());
1694 if (instr->operands[0].bytes() != instr->definitions[0].bytes()) {
1695 if (instr->operands[0].size() != 1)
1699 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1701 ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
1706 instr->opcode = aco_opcode::p_parallelcopy;
1707 instr->operands.pop_back();
1711 if (instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_vec() &&
1712 instr->operands[0].regClass() != instr->definitions[0].regClass()) {
1716 Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
1717 aco_ptr<Instruction> old_copy = std::move(instr);
1719 instr.reset(create_instruction<Pseudo_instruction>(
1721 instr->definitions[0] = old_copy->definitions[0];
1722 std::copy(vec->operands.begin(), vec->operands.end(), instr->operands.begin());
1724 Operand& op = instr->operands[i];
1726 ctx.info[op.tempId()].temp.type() == instr->definitions[0].regClass().type())
1729 ctx.info[instr->definitions[0].tempId()].set_vec(instr.get());
1734 if (instr->definitions[0].isFixed()) {
1736 } else if (instr->usesModifiers()) {
1738 } else if (instr->operands[0].isConstant()) {
1739 ctx.info[instr->definitions[0].tempId()].set_constant(
1740 ctx.program->gfx_level, instr->operands[0].constantValue64());
1741 } else if (instr->operands[0].isTemp()) {
1742 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1743 if (ctx.info[instr->operands[0].tempId()].is_canonicalized())
1744 ctx.info[instr->definitions[0].tempId()].set_canonicalized();
1746 assert(instr->operands[0].isFixed());
1750 if (instr->isDPP16()) {
1752 assert(instr->dpp16().row_mask == 0xf && instr->dpp16().bank_mask == 0xf);
1753 ctx.info[instr->definitions[0].tempId()].set_dpp16(instr.get());
1754 } else if (instr->isDPP8()) {
1755 ctx.info[instr->definitions[0].tempId()].set_dpp8(instr.get());
1760 ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->gfx_level, 0u);
1762 case aco_opcode::v_mul_f64: ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); break;
1766 ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
1769 bool uses_mods = instr->usesModifiers();
1770 bool fp16 = instr->opcode == aco_opcode::v_mul_f16;
1773 if (instr->operands[!i].isConstant() && instr->operands[i].isTemp()) {
1774 if (!instr->isDPP() && !instr->isSDWA() &&
1775 (instr->operands[!i].constantEquals(fp16 ? 0x3c00 : 0x3f800000) || /* 1.0 */
1776 instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u))) { /* -1.0 */
1777 bool neg1 = instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u);
1779 VOP3_instruction* vop3 = instr->isVOP3() ? &instr->vop3() : NULL;
1786 Temp other = instr->operands[i].getTemp();
1788 ctx.info[instr->definitions[0].tempId()].set_neg_abs(other);
1790 ctx.info[instr->definitions[0].tempId()].set_abs(other);
1792 ctx.info[instr->definitions[0].tempId()].set_neg(other);
1794 ctx.info[instr->definitions[0].tempId()].set_fcanonicalize(other);
1797 } else if (instr->operands[!i].constantValue() ==
1799 ctx.info[instr->operands[i].tempId()].set_omod2(instr.get());
1800 } else if (instr->operands[!i].constantValue() ==
1802 ctx.info[instr->operands[i].tempId()].set_omod4(instr.get());
1803 } else if (instr->operands[!i].constantValue() ==
1805 ctx.info[instr->operands[i].tempId()].set_omod5(instr.get());
1806 } else if (instr->operands[!i].constantValue() == 0u &&
1809 instr->opcode == aco_opcode::v_mul_legacy_f32)) { /* 0.0 */
1810 ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->gfx_level, 0u);
1822 ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
1826 VOP3_instruction& vop3 = instr->vop3();
1833 bool is_fp16 = instr->opcode == aco_opcode::v_med3_f16;
1835 if (instr->operands[i].constantEquals(0))
1837 else if (instr->operands[i].constantEquals(is_fp16 ? 0x3c00 : 0x3f800000)) /* 1.0 */
1842 if (found_zero && found_one && instr->operands[idx].isTemp())
1843 ctx.info[instr->operands[idx].tempId()].set_clamp(instr.get());
1847 if (instr->operands[0].constantEquals(0) && instr->operands[1].constantEquals(0xFFFFFFFF))
1848 ctx.info[instr->definitions[0].tempId()].set_vcc(instr->operands[2].getTemp());
1849 else if (instr->operands[0].constantEquals(0) &&
1850 instr->operands[1].constantEquals(0x3f800000u))
1851 ctx.info[instr->definitions[0].tempId()].set_b2f(instr->operands[2].getTemp());
1852 else if (instr->operands[0].constantEquals(0) && instr->operands[1].constantEquals(1))
1853 ctx.info[instr->definitions[0].tempId()].set_b2i(instr->operands[2].getTemp());
1857 if (instr->format == Format::VOPC && /* don't optimize VOP3 / SDWA / DPP */
1858 instr->operands[0].constantEquals(0) && instr->operands[1].isTemp() &&
1859 ctx.info[instr->operands[1].tempId()].is_vcc())
1860 ctx.info[instr->definitions[0].tempId()].set_temp(
1861 ctx.info[instr->operands[1].tempId()].temp);
1865 bool all_same_temp = instr->operands[0].isTemp();
1868 all_same_temp = instr->definitions[0].regClass() == instr->operands[0].regClass();
1869 for (unsigned i = 1; all_same_temp && (i < instr->operands.size()); i++) {
1870 if (!instr->operands[i].isTemp() ||
1871 instr->operands[i].tempId() != instr->operands[0].tempId())
1875 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1877 bool all_undef = instr->operands[0].isUndefined();
1878 for (unsigned i = 1; all_undef && (i < instr->operands.size()); i++) {
1879 if (!instr->operands[i].isUndefined())
1883 ctx.info[instr->definitions[0].tempId()].set_undefined();
1902 ctx.info[instr->definitions[0].tempId()].set_add_sub(instr.get());
1906 if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) {
1907 ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
1908 ctx.info[instr->definitions[1].tempId()].set_scc_invert(
1909 ctx.info[instr->operands[0].tempId()].temp);
1910 } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bitwise()) {
1911 ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
1912 ctx.info[instr->definitions[1].tempId()].set_scc_invert(
1913 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
1915 ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
1919 if (fixed_to_exec(instr->operands[1]) && instr->operands[0].isTemp()) {
1920 if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) {
1923 ctx.info[instr->definitions[1].tempId()].set_temp(
1924 ctx.info[instr->operands[0].tempId()].temp);
1925 ctx.info[instr->definitions[0].tempId()].set_uniform_bool(
1926 ctx.info[instr->operands[0].tempId()].temp);
1928 } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bitwise()) {
1931 ctx.info[instr->definitions[1].tempId()].set_temp(
1932 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
1933 ctx.info[instr->definitions[0].tempId()].set_uniform_bool(
1934 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
1938 instr->pass_flags == 1) {
1941 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1943 } else if (can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) {
1944 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1953 if (std::all_of(instr->operands.begin(), instr->operands.end(),
1959 ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
1961 ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
1969 ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
1987 ctx.info[instr->definitions[0].tempId()].set_minmax(instr.get());
1991 if (instr->operands[0].constantEquals((unsigned)-1) && instr->operands[1].constantEquals(0)) {
1993 ctx.info[instr->definitions[0].tempId()].set_uniform_bool(instr->operands[2].getTemp());
1995 if (instr->operands[2].isTemp() && ctx.info[instr->operands[2].tempId()].is_scc_invert()) {
1997 std::swap(instr->operands[0], instr->operands[1]);
1998 instr->operands[2].setTemp(ctx.info[instr->operands[2].tempId()].temp);
2002 if (instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_scc_invert()) {
2003 ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
2009 if (instr->operands[0].constantEquals(0x3f800000u))
2010 ctx.info[instr->definitions[0].tempId()].set_canonicalized();
2013 if (instr->definitions[0].bytes() == 4) {
2014 ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
2015 if (instr->operands[0].regClass() == v1 && parse_insert(instr.get()))
2016 ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
2021 if (instr->operands[0].bytes() == 4) {
2022 if (instr->operands[0].regClass() == v1)
2023 ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
2024 if (parse_extract(instr.get()))
2025 ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
2026 ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
2034 ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
2038 if (instr->operands[0].isTemp())
2039 ctx.info[instr->operands[0].tempId()].set_f2f16(instr.get());
2043 if (instr->operands[0].isTemp())
2044 ctx.info[instr->definitions[0].tempId()].set_f2f32(instr.get());
2052 if (!(ctx.info[instr->definitions[0].tempId()].label & (label_neg | label_abs)))
2053 check_sdwa_extract(ctx, instr);
2066 decrease_uses(opt_ctx& ctx, Instruction* instr)
2068 if (!--ctx.uses[instr->definitions[0].tempId()]) {
2069 for (const Operand& op : instr->operands) {
2084 Instruction* instr = ctx.info[op.tempId()].instr;
2086 if (instr->definitions.size() == 2) {
2087 assert(instr->definitions[0].isTemp() && instr->definitions[0].tempId() == op.tempId());
2088 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2092 return instr;
2098 combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2100 if (instr->definitions[0].regClass() != ctx.program->lane_mask)
2102 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2105 bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32;
2115 op_instr[i] = follow_operand(ctx, instr->operands[i], true);
2182 new_instr->definitions[0] = instr->definitions[0];
2184 ctx.info[instr->definitions[0].tempId()].label = 0;
2185 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr);
2187 instr.reset(new_instr);
2195 combine_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2197 if (instr->definitions[0].regClass() != ctx.program->lane_mask)
2199 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2202 bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32;
2205 Instruction* nan_test = follow_operand(ctx, instr->operands[0], true);
2206 Instruction* cmp = follow_operand(ctx, instr->operands[1], true);
2256 new_instr->definitions[0] = instr->definitions[0];
2258 ctx.info[instr->definitions[0].tempId()].label = 0;
2259 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr);
2261 instr.reset(new_instr);
2296 combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2298 if (instr->definitions[0].regClass() != ctx.program->lane_mask)
2300 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2303 bool is_or = instr->opcode == aco_opcode::s_or_b64 || instr->opcode == aco_opcode::s_or_b32;
2305 Instruction* nan_test = follow_operand(ctx, instr->operands[0], true);
2306 Instruction* cmp = follow_operand(ctx, instr->operands[1], true);
2381 new_instr->definitions[0] = instr->definitions[0];
2383 ctx.info[instr->definitions[0].tempId()].label = 0;
2384 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr);
2386 instr.reset(new_instr);
2393 combine_inverse_comparison(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2395 if (!instr->operands[0].isFixed() || instr->operands[0].physReg() != exec)
2397 if (ctx.uses[instr->definitions[1].tempId()])
2400 Instruction* cmp = follow_operand(ctx, instr->operands[1]);
2460 new_instr->definitions[0] = instr->definitions[0];
2462 ctx.info[instr->definitions[0].tempId()].label = 0;
2463 ctx.info[instr->definitions[0].tempId()].set_vopc(new_instr);
2465 instr.reset(new_instr);
2548 create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr,
2561 new_instr->definitions[0] = instr->definitions[0];
2562 ctx.info[instr->definitions[0].tempId()].label = 0;
2564 instr.reset(new_instr);
2568 combine_three_valu_op(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode op2, aco_opcode new_op,
2578 if (match_op3_for_vop3(ctx, instr->opcode, op2, instr.get(), swap, shuffle, operands, neg,
2580 ctx.uses[instr->operands[swap].tempId()]--;
2581 create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
2590 combine_add_or_then_and_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2592 bool is_or = instr->opcode == aco_opcode::v_or_b32;
2595 if (is_or && combine_three_valu_op(ctx, instr, aco_opcode::s_and_b32, aco_opcode::v_and_or_b32,
2598 if (is_or && combine_three_valu_op(ctx, instr, aco_opcode::v_and_b32, aco_opcode::v_and_or_b32,
2601 if (combine_three_valu_op(ctx, instr, aco_opcode::s_lshl_b32, new_op_lshl, "120", 1 | 2))
2603 if (combine_three_valu_op(ctx, instr, aco_opcode::v_lshlrev_b32, new_op_lshl, "210", 1 | 2))
2606 if (instr->isSDWA() || instr->isDPP())
2615 Instruction* extins = follow_operand(ctx, instr->operands[i]);
2639 operands[2] = instr->operands[!i];
2647 if (instr->isVOP3())
2648 clamp = instr->vop3().clamp;
2650 ctx.uses[instr->operands[i].tempId()]--;
2651 create_vop3_for_op3(ctx, op, instr, operands, neg, abs, opsel, clamp, omod);
2659 combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposite, aco_opcode minmax3)
2662 if (combine_three_valu_op(ctx, instr, instr->opcode, minmax3, "012", 1 | 2))
2672 if (match_op3_for_vop3(ctx, instr->opcode, opposite, instr.get(), swap, "012", operands, neg,
2675 ctx.uses[instr->operands[swap].tempId()]--;
2678 create_vop3_for_op3(ctx, minmax3, instr, operands, neg, abs, opsel, clamp, omod);
2692 combine_salu_not_bitwise(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2695 if (!instr->operands[0].isTemp())
2697 if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2700 Instruction* op2_instr = follow_operand(ctx, instr->operands[0]);
2714 std::swap(instr->definitions[0], op2_instr->definitions[0]);
2715 std::swap(instr->definitions[1], op2_instr->definitions[1]);
2716 ctx.uses[instr->operands[0].tempId()]--;
2737 combine_salu_n2(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2739 if (instr->definitions[0].isTemp() && ctx.info[instr->definitions[0].tempId()].is_uniform_bool())
2743 Instruction* op2_instr = follow_operand(ctx, instr->operands[i]);
2750 if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() &&
2751 instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue())
2754 ctx.uses[instr->operands[i].tempId()]--;
2755 instr->operands[0] = instr->operands[!i];
2756 instr->operands[1] = op2_instr->operands[0];
2757 ctx.info[instr->definitions[0].tempId()].label = 0;
2759 switch (instr->opcode) {
2760 case aco_opcode::s_and_b32: instr->opcode = aco_opcode::s_andn2_b32; break;
2761 case aco_opcode::s_or_b32: instr->opcode = aco_opcode::s_orn2_b32; break;
2762 case aco_opcode::s_and_b64: instr->opcode = aco_opcode::s_andn2_b64; break;
2763 case aco_opcode::s_or_b64: instr->opcode = aco_opcode::s_orn2_b64; break;
2774 combine_salu_lshl_add(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2776 if (instr->opcode == aco_opcode::s_add_i32 && ctx.uses[instr->definitions[1].tempId()])
2780 Instruction* op2_instr = follow_operand(ctx, instr->operands[i], true);
2791 if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() &&
2792 instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue())
2795 ctx.uses[instr->operands[i].tempId()]--;
2796 instr->operands[1] = instr->operands[!i];
2797 instr->operands[0] = op2_instr->operands[0];
2798 ctx.info[instr->definitions[0].tempId()].label = 0;
2800 instr->opcode = std::array<aco_opcode, 4>{
2810 combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode new_op, uint8_t ops)
2812 if (instr->usesModifiers())
2818 if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2i() &&
2819 ctx.uses[instr->operands[i].tempId()] == 1) {
2822 if (instr->operands[!i].isTemp() &&
2823 instr->operands[!i].getTemp().type() == RegType::vgpr) {
2826 (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
2832 ctx.uses[instr->operands[i].tempId()]--;
2833 new_instr->definitions[0] = instr->definitions[0];
2834 if (instr->definitions.size() == 2) {
2835 new_instr->definitions[1] = instr->definitions[1];
2845 new_instr->operands[1] = instr->operands[!i];
2846 new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
2847 instr = std::move(new_instr);
2848 ctx.info[instr->definitions[0].tempId()].set_add_sub(instr.get());
2857 combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2859 if (instr->usesModifiers())
2863 Instruction* op_instr = follow_operand(ctx, instr->operands[i]);
2870 ctx.uses[instr->operands[i].tempId()]--;
2872 new_instr->operands[1] = instr->operands[!i];
2873 new_instr->definitions[0] = instr->definitions[0];
2874 instr = std::move(new_instr);
2875 ctx.info[instr->definitions[0].tempId()].label = 0;
2928 combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode min, aco_opcode max,
2935 if (instr->opcode == min)
2937 else if (instr->opcode == max)
2946 if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap, "012", operands, neg,
2951 if (precise && instr->opcode != min &&
3030 if (instr->opcode == min) {
3038 ctx.uses[instr->operands[swap].tempId()]--;
3039 create_vop3_for_op3(ctx, med, instr, operands, neg, abs, opsel, clamp, omod);
3049 apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3051 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
3052 instr->opcode == aco_opcode::v_lshrrev_b64 ||
3053 instr->opcode == aco_opcode::v_ashrrev_i64;
3059 for (unsigned i = 0; i < instr->operands.size(); i++) {
3060 if (instr->operands[i].isLiteral())
3062 if (!instr->operands[i].isTemp())
3064 if (instr->operands[i].getTemp().type() == RegType::sgpr) {
3065 if (instr->operands[i].tempId() != sgpr_ids[0])
3066 sgpr_ids[!!sgpr_ids[0]] = instr->operands[i].tempId();
3068 ssa_info& info = ctx.info[instr->operands[i].tempId()];
3069 if (is_copy_label(ctx, instr, info) && info.temp.type() == RegType::sgpr)
3071 if (info.is_extract() && info.instr->operands[0].getTemp().type() == RegType::sgpr)
3090 uint16_t uses = ctx.uses[instr->operands[i].tempId()];
3093 sgpr_info_id = instr->operands[i].tempId();
3103 if (!info.is_extract() && num_sgprs && ctx.uses[sgpr_info_id] > 1 && !instr->isVOP3() &&
3104 !instr->isSDWA() && instr->format != Format::VOP3P)
3107 Temp sgpr = info.is_extract() ? info.instr->operands[0].getTemp() : info.temp;
3113 instr->format = withoutDPP(instr->format);
3115 if (sgpr_idx == 0 || instr->isVOP3() || instr->isSDWA() || instr->isVOP3P() ||
3118 if (info.is_extract() && can_apply_extract(ctx, instr, sgpr_idx, info))
3119 apply_extract(ctx, instr, sgpr_idx, info);
3122 instr->operands[sgpr_idx] = Operand(sgpr);
3123 } else if (can_swap_operands(instr, &instr->opcode)) {
3124 instr->operands[sgpr_idx] = instr->operands[0];
3125 instr->operands[0] = Operand(sgpr);
3129 } else if (can_use_VOP3(ctx, instr) && !info.is_extract()) {
3130 to_VOP3(ctx, instr);
3131 instr->operands[sgpr_idx] = Operand(sgpr);
3150 apply_omod_clamp_helper(opt_ctx& ctx, T* instr, ssa_info& def_info)
3152 if (!def_info.is_clamp() && (instr->clamp || instr->omod))
3156 instr->omod = 1;
3158 instr->omod = 2;
3160 instr->omod = 3;
3162 instr->clamp = true;
3169 apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3171 if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1 ||
3172 !instr_info.can_use_output_modifiers[(int)instr->opcode])
3175 bool can_vop3 = can_use_VOP3(ctx, instr);
3177 instr->opcode == aco_opcode::v_fma_mix_f32 || instr->opcode == aco_opcode::v_fma_mixlo_f16;
3178 if (!instr->isSDWA() && !is_mad_mix && !can_vop3)
3182 bool can_use_omod = (can_vop3 || ctx.program->gfx_level >= GFX9) && !instr->isVOP3P();
3183 if (instr->definitions[0].bytes() == 4)
3190 ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
3197 if (!ctx.uses[def_info.instr->definitions[0].tempId()])
3200 if (def_info.instr->definitions[0].bytes() != instr->definitions[0].bytes())
3204 assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
3206 if (instr->isSDWA()) {
3207 if (!apply_omod_clamp_helper(ctx, &instr->sdwa(), def_info))
3209 } else if (instr->isVOP3P()) {
3211 instr->vop3p().clamp = true;
3213 to_VOP3(ctx, instr);
3214 if (!apply_omod_clamp_helper(ctx, &instr->vop3(), def_info))
3218 instr->definitions[0].swapTemp(def_info.instr->definitions[0]);
3219 ctx.info[instr->definitions[0].tempId()].label &= label_clamp | label_insert | label_f2f16;
3220 ctx.uses[def_info.instr->definitions[0].tempId()]--;
3225 /* Combine an p_insert (or p_extract, in some cases) instruction with instr.
3226 * p_insert(instr(...)) -> instr_insert().
3229 apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3231 if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1)
3234 ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
3239 if (!ctx.uses[def_info.instr->definitions[0].tempId()])
3243 assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
3245 SubdwordSel sel = parse_insert(def_info.instr);
3248 if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
3251 to_SDWA(ctx, instr);
3252 if (instr->sdwa().dst_sel.size() != 4)
3254 static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
3256 instr->definitions[0].swapTemp(def_info.instr->definitions[0]);
3257 ctx.info[instr->definitions[0].tempId()].label = 0;
3258 ctx.uses[def_info.instr->definitions[0].tempId()]--;
3275 Instruction* ds = ctx.info[extract->operands[0].tempId()].instr;
3316 combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3318 if (instr->usesModifiers())
3322 Instruction* op_instr = follow_operand(ctx, instr->operands[i], true);
3328 if (instr->operands[!i].isTemp() &&
3329 instr->operands[!i].getTemp().type() == RegType::vgpr) {
3333 (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
3340 ctx.uses[instr->operands[i].tempId()]--;
3341 if (ctx.uses[instr->operands[i].tempId()])
3345 new_instr->operands[1] = instr->operands[!i];
3347 new_instr->definitions[0] = instr->definitions[0];
3348 instr = std::move(new_instr);
3349 ctx.info[instr->definitions[0].tempId()].label = 0;
3363 combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub)
3365 if (instr->usesModifiers())
3379 Instruction* op_instr = follow_operand(ctx, instr->operands[i]);
3401 instr->operands[!i],
3406 ctx.uses[instr->operands[i].tempId()]--;
3413 new_instr->definitions[0] = instr->definitions[0];
3414 instr = std::move(new_instr);
3415 ctx.info[instr->definitions[0].tempId()].label = 0;
3424 propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opsel_hi)
3430 uint8_t tmp_lo = instr->opsel_lo;
3431 uint8_t tmp_hi = instr->opsel_hi;
3432 bool neg_lo[3] = {instr->neg_lo[0], instr->neg_lo[1], instr->neg_lo[2]};
3433 bool neg_hi[3] = {instr->neg_hi[0], instr->neg_hi[1], instr->neg_hi[2]};
3435 instr->opsel_lo = tmp_hi;
3437 instr->neg_lo[i] = neg_hi[i];
3440 instr->opsel_hi = tmp_lo;
3442 instr->neg_hi[i] = neg_lo[i];
3447 combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3449 VOP3P_instruction* vop3p = &instr->vop3p();
3452 if (instr->opcode == aco_opcode::v_pk_mul_f16 && instr->operands[1].constantEquals(0x3C00) &&
3453 vop3p->clamp && instr->operands[0].isTemp() && ctx.uses[instr->operands[0].tempId()] == 1 &&
3456 ssa_info& info = ctx.info[instr->operands[0].tempId()];
3457 if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) {
3458 VOP3P_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->vop3p();
3461 instr->definitions[0].swapTemp(candidate->definitions[0]);
3462 ctx.info[candidate->definitions[0].tempId()].instr = candidate;
3463 ctx.uses[instr->definitions[0].tempId()]--;
3469 if (instr_info.can_use_input_modifiers[(int)instr->opcode]) {
3470 for (unsigned i = 0; i < instr->operands.size(); i++) {
3471 Operand& op = instr->operands[i];
3476 if (info.is_vop3p() && info.instr->opcode == aco_opcode::v_pk_mul_f16 &&
3477 info.instr->operands[1].constantEquals(0x3C00)) {
3479 VOP3P_instruction* fneg = &info.instr->vop3p();
3485 for (unsigned j = 0; j < instr->operands.size(); j++)
3486 ops[j] = instr->operands[j];
3487 ops[i] = info.instr->operands[0];
3488 if (!check_vop3_operands(ctx, instr->operands.size(), ops))
3493 instr->operands[i] = fneg->operands[0];
3514 if (instr->opcode == aco_opcode::v_pk_add_f16 || instr->opcode == aco_opcode::v_pk_add_u16) {
3515 bool fadd = instr->opcode == aco_opcode::v_pk_add_f16;
3516 if (fadd && instr->definitions[0].isPrecise())
3526 if (!instr->operands[i].isTemp() || !ctx.info[instr->operands[i].tempId()].is_vop3p())
3528 ssa_info& info = ctx.info[instr->operands[i].tempId()];
3530 if (info.instr->opcode != aco_opcode::v_pk_mul_f16 ||
3531 info.instr->definitions[0].isPrecise())
3534 if (info.instr->opcode != aco_opcode::v_pk_mul_lo_u16)
3538 Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]};
3539 if (ctx.uses[instr->operands[i].tempId()] >= uses || !check_vop3_operands(ctx, 3, op))
3543 if (info.instr->vop3p().clamp)
3546 mul_instr = info.instr;
3550 uses = ctx.uses[instr->operands[i].tempId()];
3557 Operand op[3] = {mul_instr->operands[0], mul_instr->operands[1], instr->operands[add_op_idx]};
3588 fma->definitions[0] = instr->definitions[0];
3589 instr = std::move(fma);
3590 ctx.info[instr->definitions[0].tempId()].set_vop3p(instr.get());
3596 can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3605 switch (instr->opcode) {
3616 if (instr->opcode == aco_opcode::v_fma_f32 && !ctx.program->dev.fused_mad_mix &&
3617 instr->definitions[0].isPrecise())
3620 if (instr->isVOP3())
3621 return !instr->vop3().omod && !(instr->vop3().opsel & 0x8);
3623 return instr->format == Format::VOP2;
3627 to_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3629 bool is_add = instr->opcode != aco_opcode::v_mul_f32 && instr->opcode != aco_opcode::v_fma_f32;
3634 vop3p->opsel_lo = instr->isVOP3() ? ((instr->vop3().opsel & 0x7) << (is_add ? 1 : 0)) : 0x0;
3636 for (unsigned i = 0; i < instr->operands.size(); i++) {
3637 vop3p->operands[is_add + i] = instr->operands[i];
3638 vop3p->neg_lo[is_add + i] = instr->isVOP3() && instr->vop3().neg[i];
3639 vop3p->neg_lo[is_add + i] |= instr->isSDWA() && instr->sdwa().neg[i];
3640 vop3p->neg_hi[is_add + i] = instr->isVOP3() && instr->vop3().abs[i];
3641 vop3p->neg_hi[is_add + i] |= instr->isSDWA() && instr->sdwa().abs[i];
3642 vop3p->opsel_lo |= (instr->isSDWA() && instr->sdwa().sel[i].offset()) << (is_add + i);
3644 if (instr->opcode == aco_opcode::v_mul_f32) {
3651 if (instr->opcode == aco_opcode::v_sub_f32)
3653 else if (instr->opcode == aco_opcode::v_subrev_f32)
3656 vop3p->definitions[0] = instr->definitions[0];
3657 vop3p->clamp = instr->isVOP3() && instr->vop3().clamp;
3658 instr = std::move(vop3p);
3660 ctx.info[instr->definitions[0].tempId()].label &= label_f2f16 | label_clamp | label_mul;
3661 if (ctx.info[instr->definitions[0].tempId()].label & label_mul)
3662 ctx.info[instr->definitions[0].tempId()].instr = instr.get();
3666 combine_output_conversion(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3668 ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
3671 Instruction* conv = def_info.instr;
3673 if (!can_use_mad_mix(ctx, instr) || ctx.uses[instr->definitions[0].tempId()] != 1)
3682 if (!instr->isVOP3P())
3683 to_mad_mix(ctx, instr);
3685 instr->opcode = aco_opcode::v_fma_mixlo_f16;
3686 instr->definitions[0].swapTemp(conv->definitions[0]);
3688 instr->definitions[0].setPrecise(true);
3689 ctx.info[instr->definitions[0].tempId()].label &= label_clamp;
3696 combine_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3698 if (!can_use_mad_mix(ctx, instr))
3701 for (unsigned i = 0; i < instr->operands.size(); i++) {
3702 if (!instr->operands[i].isTemp())
3704 Temp tmp = instr->operands[i].getTemp();
3708 Instruction* conv = ctx.info[tmp.id()].instr;
3718 if (get_operand_size(instr, i) != 32)
3724 for (unsigned j = 0; j < instr->operands.size(); j++)
3725 op[j] = instr->operands[j];
3727 if (!check_vop3_operands(ctx, instr->operands.size(), op))
3730 if (!instr->isVOP3P()) {
3732 instr->opcode != aco_opcode::v_mul_f32 && instr->opcode != aco_opcode::v_fma_f32;
3733 to_mad_mix(ctx, instr);
3739 instr->operands[i].setTemp(conv->operands[0].getTemp());
3741 instr->definitions[0].setPrecise(true);
3742 instr->vop3p().opsel_hi ^= 1u << i;
3744 instr->vop3p().opsel_lo |= 1u << i;
3747 if (!instr->vop3p().neg_hi[i]) {
3748 instr->vop3p().neg_lo[i] ^= neg;
3749 instr->vop3p().neg_hi[i] = abs;
3785 combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3787 if (instr->definitions.empty() || is_dead(ctx.uses, instr.get()))
3790 if (instr->isVALU()) {
3793 for (unsigned i = 0; i < instr->operands.size(); i++) {
3794 Operand& op = instr->operands[i];
3807 (info.instr->operands[0].getTemp().type() == RegType::vgpr ||
3808 instr->operands[i].getTemp().type() == RegType::sgpr) &&
3809 can_apply_extract(ctx, instr, i, info)) {
3811 apply_extract(ctx, instr, i, info);
3812 if (--ctx.uses[instr->operands[i].tempId()])
3813 ctx.uses[info.instr->operands[0].tempId()]++;
3814 instr->operands[i].setTemp(info.instr->operands[0].getTemp());
3818 if (can_apply_sgprs(ctx, instr))
3819 apply_sgprs(ctx, instr);
3820 combine_mad_mix(ctx, instr);
3821 while (apply_omod_clamp(ctx, instr) | combine_output_conversion(ctx, instr))
3823 apply_insert(ctx, instr);
3826 if (instr->isVOP3P() && instr->opcode != aco_opcode::v_fma_mix_f32 &&
3827 instr->opcode != aco_opcode::v_fma_mixlo_f16)
3828 return combine_vop3p(ctx, instr);
3830 if (instr->isSDWA() || instr->isDPP())
3833 if (instr->opcode == aco_opcode::p_extract) {
3834 ssa_info& info = ctx.info[instr->operands[0].tempId()];
3835 if (info.is_extract() && can_apply_extract(ctx, instr, 0, info)) {
3836 apply_extract(ctx, instr, 0, info);
3837 if (--ctx.uses[instr->operands[0].tempId()])
3838 ctx.uses[info.instr->operands[0].tempId()]++;
3839 instr->operands[0].setTemp(info.instr->operands[0].getTemp());
3842 apply_ds_extract(ctx, instr);
3856 if ((ctx.info[instr->definitions[0].tempId()].label & (label_neg | label_abs)) &&
3857 ctx.uses[instr->operands[1].tempId()] == 1) {
3858 Temp val = ctx.info[instr->definitions[0].tempId()].temp;
3863 Instruction* mul_instr = ctx.info[val.id()].instr;
3874 if (mul_instr->definitions[0].bytes() != instr->definitions[0].bytes())
3879 Definition def = instr->definitions[0];
3880 bool is_neg = ctx.info[instr->definitions[0].tempId()].is_neg();
3881 bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs();
3882 instr.reset(
3884 instr->operands[0] = mul_instr->operands[0];
3885 instr->operands[1] = mul_instr->operands[1];
3886 instr->definitions[0] = def;
3887 VOP3_instruction& new_mul = instr->vop3();
3903 ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
3909 (instr->opcode == aco_opcode::v_fma_mix_f32 ||
3910 instr->opcode == aco_opcode::v_fma_mixlo_f16) &&
3911 !instr->vop3p().neg_lo[0] &&
3912 ((instr->operands[0].constantEquals(0x3f800000) && (instr->vop3p().opsel_hi & 0x1) == 0) ||
3913 (instr->operands[0].constantEquals(0x3C00) && (instr->vop3p().opsel_hi & 0x1) &&
3914 !(instr->vop3p().opsel_lo & 0x1)));
3915 bool mad32 = instr->opcode == aco_opcode::v_add_f32 || instr->opcode == aco_opcode::v_sub_f32 ||
3916 instr->opcode == aco_opcode::v_subrev_f32;
3917 bool mad16 = instr->opcode == aco_opcode::v_add_f16 || instr->opcode == aco_opcode::v_sub_f16 ||
3918 instr->opcode == aco_opcode::v_subrev_f16;
3919 bool mad64 = instr->opcode == aco_opcode::v_add_f64;
3926 for (unsigned i = is_add_mix ? 1 : 0; i < instr->operands.size(); i++) {
3927 if (!instr->operands[i].isTemp() || !ctx.info[instr->operands[i].tempId()].is_mul())
3929 ssa_info& info = ctx.info[instr->operands[i].tempId()];
3932 if (info.instr->isVOP3() && (info.instr->vop3().clamp || info.instr->vop3().omod))
3934 if (info.instr->isVOP3P() && info.instr->vop3p().clamp)
3937 if (info.instr->isVOP3P() && instr->isVOP3() && instr->vop3().omod)
3940 if (is_add_mix && info.instr->definitions[0].bytes() == 2)
3943 if (get_operand_size(instr, i) != info.instr->definitions[0].bytes() * 8)
3946 bool legacy = info.instr->opcode == aco_opcode::v_mul_legacy_f32;
3947 bool mad_mix = is_add_mix || info.instr->isVOP3P();
3952 bool is_fma_precise = is_pow_of_two(ctx, info.instr->operands[0]) ||
3953 is_pow_of_two(ctx, info.instr->operands[1]);
3963 (!(info.instr->definitions[0].isPrecise() || instr->definitions[0].isPrecise()) ||
3973 Operand op[3] = {info.instr->operands[0], info.instr->operands[1],
3974 instr->operands[candidate_add_op_idx]};
3975 if (info.instr->isSDWA() || info.instr->isDPP() || !check_vop3_operands(ctx, 3, op) ||
3976 ctx.uses[instr->operands[i].tempId()] > uses)
3979 if (ctx.uses[instr->operands[i].tempId()] == uses) {
3981 unsigned new_idx = info.instr->definitions[0].tempId();
3986 mul_instr = info.instr;
3988 uses = ctx.uses[instr->operands[i].tempId()];
3995 instr->operands[add_op_idx]};
4027 if (instr->isVOP3()) {
4028 VOP3_instruction& vop3 = instr->vop3();
4042 } else if (instr->isVOP3P()) {
4043 VOP3P_instruction& vop3p = instr->vop3p();
4060 if (instr->opcode == aco_opcode::v_sub_f32 || instr->opcode == aco_opcode::v_sub_f16)
4062 else if (instr->opcode == aco_opcode::v_subrev_f32 ||
4063 instr->opcode == aco_opcode::v_subrev_f16)
4066 aco_ptr<Instruction> add_instr = std::move(instr);
4083 instr = std::move(mad);
4108 instr = std::move(mad);
4110 instr->definitions[0] = add_instr->definitions[0];
4114 ctx.info[instr->definitions[0].tempId()].set_mad(instr.get(), ctx.mad_infos.size() - 1);
4119 else if (((instr->opcode == aco_opcode::v_mul_f32 &&
4121 instr->opcode == aco_opcode::v_mul_legacy_f32) &&
4122 !instr->usesModifiers() && !ctx.fp_mode.must_flush_denorms32) {
4124 if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2f() &&
4125 ctx.uses[instr->operands[i].tempId()] == 1 && instr->operands[!i].isTemp() &&
4126 instr->operands[!i].getTemp().type() == RegType::vgpr) {
4127 ctx.uses[instr->operands[i].tempId()]--;
4128 ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++;
4133 new_instr->operands[1] = instr->operands[!i];
4134 new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
4135 new_instr->definitions[0] = instr->definitions[0];
4136 instr = std::move(new_instr);
4137 ctx.info[instr->definitions[0].tempId()].label = 0;
4141 } else if (instr->opcode == aco_opcode::v_or_b32 && ctx.program->gfx_level >= GFX9) {
4142 if (combine_three_valu_op(ctx, instr, aco_opcode::s_or_b32, aco_opcode::v_or3_b32, "012",
4144 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_or_b32, aco_opcode::v_or3_b32,
4146 } else if (combine_add_or_then_and_lshl(ctx, instr)) {
4148 } else if (instr->opcode == aco_opcode::v_xor_b32 && ctx.program->gfx_level >= GFX10) {
4149 if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xor3_b32, "012",
4151 } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32,
4154 } else if (instr->opcode == aco_opcode::v_add_u16) {
4156 ctx, instr, aco_opcode::v_mul_lo_u16,
4159 } else if (instr->opcode == aco_opcode::v_add_u16_e64) {
4160 combine_three_valu_op(ctx, instr, aco_opcode::v_mul_lo_u16_e64, aco_opcode::v_mad_u16, "120",
4162 } else if (instr->opcode == aco_opcode::v_add_u32) {
4163 if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) {
4164 } else if (combine_add_bcnt(ctx, instr)) {
4165 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24,
4167 } else if (ctx.program->gfx_level >= GFX9 && !instr->usesModifiers()) {
4168 if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120",
4170 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32,
4172 } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32,
4174 } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_u32, aco_opcode::v_add3_u32,
4176 } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add3_u32,
4178 } else if (combine_add_or_then_and_lshl(ctx, instr)) {
4181 } else if (instr->opcode == aco_opcode::v_add_co_u32 ||
4182 instr->opcode == aco_opcode::v_add_co_u32_e64) {
4183 bool carry_out = ctx.uses[instr->definitions[1].tempId()] > 0;
4184 if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) {
4185 } else if (!carry_out && combine_add_bcnt(ctx, instr)) {
4186 } else if (!carry_out && combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24,
4188 } else if (!carry_out && combine_add_lshl(ctx, instr, false)) {
4190 } else if (instr->opcode == aco_opcode::v_sub_u32 || instr->opcode == aco_opcode::v_sub_co_u32 ||
4191 instr->opcode == aco_opcode::v_sub_co_u32_e64) {
4193 instr->opcode != aco_opcode::v_sub_u32 && ctx.uses[instr->definitions[1].tempId()] > 0;
4194 if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 2)) {
4195 } else if (!carry_out && combine_add_lshl(ctx, instr, true)) {
4197 } else if (instr->opcode == aco_opcode::v_subrev_u32 ||
4198 instr->opcode == aco_opcode::v_subrev_co_u32 ||
4199 instr->opcode == aco_opcode::v_subrev_co_u32_e64) {
4200 combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 1);
4201 } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) {
4202 combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add_lshl_u32, "120",
4204 } else if ((instr->opcode == aco_opcode::s_add_u32 || instr->opcode == aco_opcode::s_add_i32) &&
4206 combine_salu_lshl_add(ctx, instr);
4207 } else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) {
4208 combine_salu_not_bitwise(ctx, instr);
4209 } else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32 ||
4210 instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) {
4211 if (combine_ordering_test(ctx, instr)) {
4212 } else if (combine_comparison_ordering(ctx, instr)) {
4213 } else if (combine_constant_comparison_ordering(ctx, instr)) {
4214 } else if (combine_salu_n2(ctx, instr)) {
4216 } else if (instr->opcode == aco_opcode::v_and_b32) {
4217 combine_and_subbrev(ctx, instr);
4218 } else if (instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) {
4224 ctx.info[instr->definitions[0].tempId()].set_mad(instr.get(), ctx.mad_infos.size() - 1);
4228 if (get_minmax_info(instr->opcode, &min, &max, &min3, &max3, &med3, &some_gfx9_only) &&
4230 if (combine_minmax(ctx, instr, instr->opcode == min ? max : min,
4231 instr->opcode == min ? min3 : max3)) {
4233 combine_clamp(ctx, instr, min, max, med3);
4239 if (instr->opcode == aco_opcode::s_andn2_b32 || instr->opcode == aco_opcode::s_andn2_b64)
4240 combine_inverse_comparison(ctx, instr);
4244 to_uniform_bool_instr(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4247 for (Operand& op : instr->operands) {
4254 switch (instr->opcode) {
4256 case aco_opcode::s_and_b64: instr->opcode = aco_opcode::s_and_b32; break;
4258 case aco_opcode::s_or_b64: instr->opcode = aco_opcode::s_or_b32; break;
4260 case aco_opcode::s_xor_b64: instr->opcode = aco_opcode::s_absdiff_i32; break;
4266 for (Operand& op : instr->operands) {
4278 Instruction* pred_instr = ctx.info[op.tempId()].instr;
4290 instr->definitions[0].setTemp(Temp(instr->definitions[0].tempId(), s1));
4291 assert(instr->operands[0].regClass() == s1);
4292 assert(instr->operands[1].regClass() == s1);
4297 select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4301 if (is_dead(ctx.uses, instr.get())) {
4302 instr.reset();
4307 if (instr->opcode == aco_opcode::p_split_vector) {
4311 for (unsigned i = 0, offset = 0; i < instr->definitions.size();
4312 offset += instr->definitions[i++].bytes()) {
4313 if (ctx.uses[instr->definitions[i].tempId()]) {
4320 if (num_used == 1 && ctx.info[instr->operands[0].tempId()].is_vec() &&
4321 ctx.uses[instr->operands[0].tempId()] == 1) {
4322 Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
4333 if (off != instr->operands[0].bytes() && op.bytes() == instr->definitions[idx].bytes()) {
4334 ctx.uses[instr->operands[0].tempId()]--;
4345 extract->definitions[0] = instr->definitions[idx];
4346 instr = std::move(extract);
4353 instr->operands[0].bytes() % instr->definitions[idx].bytes() == 0 &&
4354 split_offset % instr->definitions[idx].bytes() == 0) {
4357 extract->operands[0] = instr->operands[0];
4359 Operand::c32((uint32_t)split_offset / instr->definitions[idx].bytes());
4360 extract->definitions[0] = instr->definitions[idx];
4361 instr = std::move(extract);
4366 if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) {
4367 mad_info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].instr->pass_flags];
4371 if (instr->operands[0].isTemp())
4372 ctx.uses[instr->operands[0].tempId()]--;
4373 if (instr->operands[1].isTemp())
4374 ctx.uses[instr->operands[1].tempId()]--;
4375 instr.swap(mad_info->add_instr);
4379 else if (!instr->usesModifiers() && !instr->isVOP3P() &&
4380 instr->opcode != aco_opcode::v_fma_f64 &&
4381 instr->opcode != aco_opcode::v_mad_legacy_f32 &&
4382 instr->opcode != aco_opcode::v_fma_legacy_f32) {
4384 if ((instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) &&
4390 if (instr->opcode == aco_opcode::v_fma_legacy_f16)
4397 if (instr->operands[2].isTemp() &&
4398 ctx.info[instr->operands[2].tempId()].is_literal(get_operand_size(instr, 2))) {
4402 if (!instr->operands[i].isTemp())
4404 has_sgpr |= instr->operands[i].getTemp().type() == RegType::sgpr;
4405 has_vgpr |= instr->operands[i].getTemp().type() == RegType::vgpr;
4412 literal_uses = ctx.uses[instr->operands[2].tempId()];
4418 if (instr->operands[2].isTemp() && instr->operands[2].getTemp().type() == RegType::vgpr) {
4420 if (!instr->operands[i].isTemp())
4424 if (ctx.program->gfx_level < GFX10 && instr->operands[!i].isTemp() &&
4425 instr->operands[!i].getTemp().type() == RegType::sgpr)
4428 if (ctx.info[instr->operands[i].tempId()].is_literal(get_operand_size(instr, i)) &&
4429 ctx.uses[instr->operands[i].tempId()] < literal_uses) {
4431 literal_uses = ctx.uses[instr->operands[i].tempId()];
4444 ctx.uses[instr->operands[literal_idx].tempId()]--;
4454 if (instr->isBranch() && instr->operands.size() && instr->operands[0].isTemp() &&
4455 instr->operands[0].isFixed() && instr->operands[0].physReg() == scc) {
4456 ctx.info[instr->operands[0].tempId()].set_scc_needed();
4458 } else if ((instr->opcode == aco_opcode::s_cselect_b64 ||
4459 instr->opcode == aco_opcode::s_cselect_b32) &&
4460 instr->operands[2].isTemp()) {
4461 ctx.info[instr->operands[2].tempId()].set_scc_needed();
4462 } else if (instr->opcode == aco_opcode::p_wqm && instr->operands[0].isTemp() &&
4463 ctx.info[instr->definitions[0].tempId()].is_scc_needed()) {
4465 ctx.info[instr->operands[0].tempId()].set_scc_needed();
4468 instr->definitions[0].setFixed(scc);
4472 if (!instr->isSALU() && !instr->isVALU())
4476 if (instr->definitions.size() && ctx.uses[instr->definitions[0].tempId()] == 0 &&
4477 ctx.info[instr->definitions[0].tempId()].is_uniform_bitwise()) {
4478 bool transform_done = to_uniform_bool_instr(ctx, instr);
4480 if (transform_done && !ctx.info[instr->definitions[1].tempId()].is_scc_needed()) {
4483 uint32_t def0_id = instr->definitions[0].getTemp().id();
4484 uint32_t def1_id = instr->definitions[1].getTemp().id();
4485 instr->definitions[0].setTemp(Temp(def1_id, s1));
4486 instr->definitions[1].setTemp(Temp(def0_id, s1));
4493 if (instr->isVALU()) {
4494 for (unsigned i = 0; i < instr->operands.size(); i++) {
4495 if (!instr->operands[i].isTemp())
4497 ssa_info info = ctx.info[instr->operands[i].tempId()];
4500 if (info.is_dpp() && info.instr->pass_flags == instr->pass_flags &&
4501 (i == 0 || can_swap_operands(instr, &swapped_op)) &&
4502 can_use_DPP(instr, true, info.is_dpp8()) && !instr->isDPP()) {
4504 convert_to_DPP(instr, dpp8);
4506 DPP8_instruction* dpp = &instr->dpp8();
4508 dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
4510 instr->opcode = swapped_op;
4511 std::swap(instr->operands[0], instr->operands[1]);
4514 DPP16_instruction* dpp = &instr->dpp16();
4516 instr->opcode = swapped_op;
4517 std::swap(instr->operands[0], instr->operands[1]);
4521 dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
4522 dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
4523 dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
4524 dpp->abs[0] |= info.instr->dpp16().abs[0];
4526 if (--ctx.uses[info.instr->definitions[0].tempId()])
4527 ctx.uses[info.instr->operands[0].tempId()]++;
4528 instr->operands[0].setTemp(info.instr->operands[0].getTemp());
4534 if (instr->isSDWA() || (instr->isVOP3() && ctx.program->gfx_level < GFX10) ||
4535 (instr->isVOP3P() && ctx.program->gfx_level < GFX10))
4545 if (instr->isSALU() ||
4546 (ctx.program->gfx_level >= GFX10 && (can_use_VOP3(ctx, instr) || instr->isVOP3P())))
4547 num_operands = instr->operands.size();
4549 else if (instr->isVALU() && instr->operands.size() >= 3)
4558 Operand op = instr->operands[i];
4559 unsigned bits = get_operand_size(instr, i);
4561 if (instr->isVALU() && op.isTemp() && op.getTemp().type() == RegType::sgpr &&
4572 if (!alu_can_accept_constant(instr->opcode, i))
4587 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
4588 instr->opcode == aco_opcode::v_lshrrev_b64 ||
4589 instr->opcode == aco_opcode::v_ashrrev_i64;
4590 unsigned const_bus_limit = instr->isVALU() ? 1 : UINT32_MAX;
4605 if (instr->operands[i].isTemp() && instr->operands[i].tempId() == literal_id)
4606 ctx.uses[instr->operands[i].tempId()]--;
4666 try_convert_sopc_to_sopk(aco_ptr<Instruction>& instr)
4668 if (sopk_opcode_for_sopc(instr->opcode) == aco_opcode::num_opcodes)
4671 if (instr->operands[0].isLiteral()) {
4672 std::swap(instr->operands[0], instr->operands[1]);
4673 instr->opcode = sopc_32_swapped(instr->opcode);
4676 if (!instr->operands[1].isLiteral())
4679 if (instr->operands[0].isFixed() && instr->operands[0].physReg() >= 128)
4682 uint32_t value = instr->operands[1].constantValue();
4692 if (!value_is_i16 && sopc_is_signed(instr->opcode)) {
4693 if (instr->opcode == aco_opcode::s_cmp_lg_i32)
4694 instr->opcode = aco_opcode::s_cmp_lg_u32;
4695 else if (instr->opcode == aco_opcode::s_cmp_eq_i32)
4696 instr->opcode = aco_opcode::s_cmp_eq_u32;
4699 } else if (!value_is_u16 && !sopc_is_signed(instr->opcode)) {
4700 if (instr->opcode == aco_opcode::s_cmp_lg_u32)
4701 instr->opcode = aco_opcode::s_cmp_lg_i32;
4702 else if (instr->opcode == aco_opcode::s_cmp_eq_u32)
4703 instr->opcode = aco_opcode::s_cmp_eq_i32;
4710 instr->format = Format::SOPK;
4711 SOPK_instruction* instr_sopk = &instr->sopk();
4719 apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4722 if (!instr)
4726 if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) {
4727 mad_info* info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].instr->pass_flags];
4729 (ctx.uses[instr->operands[info->literal_idx].tempId()] == 0 || info->literal_idx == 2)) {
4734 if (instr->opcode == aco_opcode::v_fma_f32)
4736 else if (instr->opcode == aco_opcode::v_mad_f16 ||
4737 instr->opcode == aco_opcode::v_mad_legacy_f16)
4739 else if (instr->opcode == aco_opcode::v_fma_f16)
4744 new_mad->operands[0] = instr->operands[0];
4745 new_mad->operands[1] = instr->operands[1];
4750 new_mad->operands[0] = instr->operands[1 - info->literal_idx];
4751 new_mad->operands[1] = instr->operands[2];
4754 Operand::c32(ctx.info[instr->operands[info->literal_idx].tempId()].val);
4755 new_mad->definitions[0] = instr->definitions[0];
4762 if (instr->isSALU() || instr->isVALU()) {
4763 for (unsigned i = 0; i < instr->operands.size(); i++) {
4764 Operand op = instr->operands[i];
4765 unsigned bits = get_operand_size(instr, i);
4768 instr->format = withoutDPP(instr->format);
4769 if (instr->isVALU() && i > 0 && instr->format != Format::VOP3P)
4770 to_VOP3(ctx, instr);
4771 instr->operands[i] = literal;
4776 if (instr->isSOPC())
4777 try_convert_sopc_to_sopk(instr);
4780 if (instr->opcode == aco_opcode::s_add_u32 && ctx.uses[instr->definitions[1].tempId()] == 0 &&
4781 (instr->operands[0].isLiteral() || instr->operands[1].isLiteral()))
4782 instr->opcode = aco_opcode::s_add_i32;
4784 ctx.instructions.emplace_back(std::move(instr));
4798 for (aco_ptr<Instruction>& instr : block.instructions)
4799 label_instruction(ctx, instr);
4807 for (aco_ptr<Instruction>& instr : block.instructions)
4808 combine_instruction(ctx, instr);
4825 for (aco_ptr<Instruction>& instr : block.instructions)
4826 apply_literals(ctx, instr);