Lines Matching defs:instr
40 unsigned get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
42 void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte,
45 get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr, RegClass rc);
46 void add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg);
208 DefInfo(ra_ctx& ctx, aco_ptr<Instruction>& instr, RegClass rc_, int operand) : rc(rc_)
217 stride = get_subdword_operand_stride(ctx.program->gfx_level, instr, operand, rc);
219 std::pair<unsigned, unsigned> info = get_subdword_definition_info(ctx.program, instr, rc);
232 } else if (instr->isMIMG() && instr->mimg().d16 && ctx.program->gfx_level <= GFX9) {
241 bool imageGather4D16Bug = operand == -1 && rc == v2 && instr->mimg().dmask != 0xF;
493 get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
496 if (instr->isPseudo()) {
498 if (instr->opcode == aco_opcode::p_as_uniform)
507 if (instr->isVALU()) {
508 if (can_use_SDWA(gfx_level, instr, false))
510 if (can_use_opsel(gfx_level, instr->opcode, idx))
512 if (instr->format == Format::VOP3P)
516 switch (instr->opcode) {
534 add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte,
538 if (instr->isPseudo() || byte == 0)
542 if (instr->isVALU()) {
544 if (instr->format == Format::VOP3) {
546 instr->vop3().opsel |= 1 << idx;
549 if (instr->isVOP3P()) {
550 assert(byte == 2 && !(instr->vop3p().opsel_lo & (1 << idx)));
551 instr->vop3p().opsel_lo |= 1 << idx;
552 instr->vop3p().opsel_hi |= 1 << idx;
555 if (instr->opcode == aco_opcode::v_cvt_f32_ubyte0) {
557 case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break;
558 case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break;
559 case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break;
560 case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break;
566 assert(can_use_SDWA(gfx_level, instr, false));
567 convert_to_SDWA(gfx_level, instr);
572 if (instr->opcode == aco_opcode::ds_write_b8)
573 instr->opcode = aco_opcode::ds_write_b8_d16_hi;
574 else if (instr->opcode == aco_opcode::ds_write_b16)
575 instr->opcode = aco_opcode::ds_write_b16_d16_hi;
576 else if (instr->opcode == aco_opcode::buffer_store_byte)
577 instr->opcode = aco_opcode::buffer_store_byte_d16_hi;
578 else if (instr->opcode == aco_opcode::buffer_store_short)
579 instr->opcode = aco_opcode::buffer_store_short_d16_hi;
580 else if (instr->opcode == aco_opcode::buffer_store_format_d16_x)
581 instr->opcode = aco_opcode::buffer_store_format_d16_hi_x;
582 else if (instr->opcode == aco_opcode::flat_store_byte)
583 instr->opcode = aco_opcode::flat_store_byte_d16_hi;
584 else if (instr->opcode == aco_opcode::flat_store_short)
585 instr->opcode = aco_opcode::flat_store_short_d16_hi;
586 else if (instr->opcode == aco_opcode::scratch_store_byte)
587 instr->opcode = aco_opcode::scratch_store_byte_d16_hi;
588 else if (instr->opcode == aco_opcode::scratch_store_short)
589 instr->opcode = aco_opcode::scratch_store_short_d16_hi;
590 else if (instr->opcode == aco_opcode::global_store_byte)
591 instr->opcode = aco_opcode::global_store_byte_d16_hi;
592 else if (instr->opcode == aco_opcode::global_store_short)
593 instr->opcode = aco_opcode::global_store_short_d16_hi;
601 get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr, RegClass rc)
605 if (instr->isPseudo()) {
612 if (instr->isVALU() || instr->isVINTRP()) {
615 if (can_use_SDWA(gfx_level, instr, false))
619 if (instr_is_16bit(gfx_level, instr->opcode))
623 if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
624 can_use_opsel(gfx_level, instr->opcode, -1))
630 switch (instr->opcode) {
666 if (instr->isMIMG() && instr->mimg().d16 && !program->dev.sram_ecc_enabled) {
675 add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg)
677 if (instr->isPseudo())
680 if (instr->isVALU()) {
682 assert(instr->definitions[0].bytes() <= 2);
684 if (reg.byte() == 0 && instr_is_16bit(gfx_level, instr->opcode))
688 if (instr->format == Format::VOP3) {
690 assert(can_use_opsel(gfx_level, instr->opcode, -1));
691 instr->vop3().opsel |= (1 << 3); /* dst in high half */
695 if (instr->opcode == aco_opcode::v_fma_mixlo_f16) {
696 instr->opcode = aco_opcode::v_fma_mixhi_f16;
701 assert(can_use_SDWA(gfx_level, instr, false));
702 convert_to_SDWA(gfx_level, instr);
708 else if (instr->opcode == aco_opcode::buffer_load_ubyte_d16)
709 instr->opcode = aco_opcode::buffer_load_ubyte_d16_hi;
710 else if (instr->opcode == aco_opcode::buffer_load_sbyte_d16)
711 instr->opcode = aco_opcode::buffer_load_sbyte_d16_hi;
712 else if (instr->opcode == aco_opcode::buffer_load_short_d16)
713 instr->opcode = aco_opcode::buffer_load_short_d16_hi;
714 else if (instr->opcode == aco_opcode::buffer_load_format_d16_x)
715 instr->opcode = aco_opcode::buffer_load_format_d16_hi_x;
716 else if (instr->opcode == aco_opcode::flat_load_ubyte_d16)
717 instr->opcode = aco_opcode::flat_load_ubyte_d16_hi;
718 else if (instr->opcode == aco_opcode::flat_load_sbyte_d16)
719 instr->opcode = aco_opcode::flat_load_sbyte_d16_hi;
720 else if (instr->opcode == aco_opcode::flat_load_short_d16)
721 instr->opcode = aco_opcode::flat_load_short_d16_hi;
722 else if (instr->opcode == aco_opcode::scratch_load_ubyte_d16)
723 instr->opcode = aco_opcode::scratch_load_ubyte_d16_hi;
724 else if (instr->opcode == aco_opcode::scratch_load_sbyte_d16)
725 instr->opcode = aco_opcode::scratch_load_sbyte_d16_hi;
726 else if (instr->opcode == aco_opcode::scratch_load_short_d16)
727 instr->opcode = aco_opcode::scratch_load_short_d16_hi;
728 else if (instr->opcode == aco_opcode::global_load_ubyte_d16)
729 instr->opcode = aco_opcode::global_load_ubyte_d16_hi;
730 else if (instr->opcode == aco_opcode::global_load_sbyte_d16)
731 instr->opcode = aco_opcode::global_load_sbyte_d16_hi;
732 else if (instr->opcode == aco_opcode::global_load_short_d16)
733 instr->opcode = aco_opcode::global_load_short_d16_hi;
734 else if (instr->opcode == aco_opcode::ds_read_u8_d16)
735 instr->opcode = aco_opcode::ds_read_u8_d16_hi;
736 else if (instr->opcode == aco_opcode::ds_read_i8_d16)
737 instr->opcode = aco_opcode::ds_read_i8_d16_hi;
738 else if (instr->opcode == aco_opcode::ds_read_u16_d16)
739 instr->opcode = aco_opcode::ds_read_u16_d16_hi;
768 aco_ptr<Instruction>& instr, UpdateRenames flags)
788 for (Definition& def : instr->definitions) {
813 for (Operand& op : instr->operands) {
836 for (unsigned i = 0; i < instr->operands.size(); i++) {
837 Operand& op = instr->operands[i];
1042 aco_ptr<Instruction>& instr, const PhysRegInterval def_reg,
1047 for (unsigned i = 0; i < instr->operands.size(); i++) {
1048 if (instr->operands[i].isTemp() && instr->operands[i].tempId() == id &&
1049 instr->operands[i].isKillBeforeDef()) {
1050 assert(!reg_file.test(reg, instr->operands[i].bytes()));
1053 reg.reg_b += instr->operands[i].bytes();
1064 for (unsigned i = 0; i < instr->operands.size(); i++) {
1066 reg.reg_b += instr->operands[i].bytes();
1071 if (instr->operands[i].isTemp() && instr->operands[i].isFirstKill() &&
1072 instr->operands[i].regClass() == info.rc) {
1073 assignment& op = ctx.assignments[instr->operands[i].tempId()];
1076 reg_file.get_id(op.reg) == instr->operands[i].tempId()) {
1078 parallelcopies.emplace_back(instr->operands[i], pc_def);
1092 aco_ptr<Instruction>& instr, const PhysRegInterval def_reg)
1104 if (instr->opcode == aco_opcode::p_create_vector) {
1106 get_reg_for_create_vector_copy(ctx, reg_file, parallelcopies, instr, def_reg, info, id);
1108 for (unsigned i = 0; !is_phi(instr) && i < instr->operands.size(); i++) {
1109 if (instr->operands[i].isTemp() && instr->operands[i].tempId() == id) {
1110 info = DefInfo(ctx, instr, var.rc, i);
1111 if (instr->operands[i].isKillBeforeDef()) {
1181 for (const Operand& op : instr->operands) {
1221 if (!get_regs_for_copies(ctx, reg_file, parallelcopies, new_vars, bounds, instr, def_reg))
1238 aco_ptr<Instruction>& instr)
1251 for (unsigned j = 0; !is_phi(instr) && j < instr->operands.size(); j++) {
1252 Operand& op = instr->operands[j];
1356 if (instr->opcode == aco_opcode::p_create_vector) {
1357 for (Operand& op : instr->operands) {
1366 if (!is_phi(instr) && instr->opcode != aco_opcode::p_create_vector) {
1367 for (Operand& op : instr->operands) {
1374 if (!get_regs_for_copies(ctx, tmp_file, pc, vars, bounds, instr, best_win))
1384 get_reg_specified(ra_ctx& ctx, RegisterFile& reg_file, RegClass rc, aco_ptr<Instruction>& instr,
1393 sdw_def_info = get_subdword_definition_info(ctx.program, instr, rc);
1520 is_mimg_vaddr_intact(ra_ctx& ctx, RegisterFile& reg_file, Instruction* instr)
1523 for (unsigned i = 0; i < instr->operands.size() - 3u; i++) {
1524 Operand op = instr->operands[i + 3];
1532 PhysRegInterval vec = PhysRegInterval{first, instr->operands.size() - 3u};
1552 get_reg_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp, aco_ptr<Instruction>& instr)
1577 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, reg))
1597 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, reg))
1606 std::vector<std::pair<Operand, Definition>>& parallelcopies, aco_ptr<Instruction>& instr,
1618 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, reg))
1629 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, affinity.reg))
1634 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, vcc))
1641 res = get_reg_vector(ctx, reg_file, temp, instr);
1646 DefInfo info(ctx, instr, temp.regClass(), operand_index);
1657 res = get_reg_impl(ctx, reg_file, parallelcopies, info, instr);
1671 for (Definition def : instr->definitions) {
1677 for (Operand op : instr->operands) {
1694 for (Operand op : instr->operands) {
1702 for (Definition def : instr->definitions) {
1710 return get_reg(ctx, reg_file, temp, parallelcopies, instr, operand_index);
1716 aco_ptr<Instruction>& instr)
1733 for (unsigned i = 0, offset = 0; i < instr->operands.size();
1734 offset += instr->operands[i].bytes(), i++) {
1736 if (!instr->operands[i].isTemp() || !instr->operands[i].isKillBeforeDef() ||
1737 instr->operands[i].getTemp().type() != rc.type())
1740 if (offset > instr->operands[i].physReg().reg_b)
1743 unsigned reg_lower = instr->operands[i].physReg().reg_b - offset;
1796 for (unsigned j = 0, offset2 = 0; j < instr->operands.size();
1797 offset2 += instr->operands[j].bytes(), j++) {
1798 Operand& op = instr->operands[j];
1816 return get_reg(ctx, reg_file, temp, parallelcopies, instr);
1818 DefInfo info(ctx, instr, rc, -1);
1826 for (Operand& op : instr->operands) {
1830 for (unsigned i = 0; i < instr->operands.size(); i++) {
1831 if ((correct_pos_mask >> i) & 1u && instr->operands[i].isKill())
1832 tmp_file.clear(instr->operands[i]);
1841 get_regs_for_copies(ctx, tmp_file, pc, vars, bounds, instr, PhysRegInterval{best_pos, size});
1846 return get_reg(ctx, reg_file, temp, parallelcopies, instr);
1848 return get_reg_create_vector(ctx, reg_file, temp, parallelcopies, instr);
1858 handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
1860 if (instr->format != Format::PSEUDO)
1864 switch (instr->opcode) {
1875 for (Definition& def : instr->definitions) {
1882 for (Operand& op : instr->operands) {
1893 instr->pseudo().tmp_in_scc = reg_file[scc];
1909 instr->pseudo().scratch_sgpr = PhysReg{(unsigned)reg};
1913 operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg,
1916 if (instr->operands[idx].isFixed())
1917 return instr->operands[idx].physReg() == reg;
1919 bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
1920 instr->opcode == aco_opcode::v_writelane_b32_e64;
1924 instr->operands[!idx].isTemp() &&
1925 (!instr->operands[!idx].isFixed() || instr->operands[!idx].physReg() != m0);
1926 if (is_other_sgpr && instr->operands[!idx].tempId() != instr->operands[idx].tempId()) {
1927 instr->operands[idx].setFixed(m0);
1933 unsigned stride = get_subdword_operand_stride(gfx_level, instr, idx, rc);
1938 switch (instr->format) {
1942 (reg != vcc || (instr->definitions.empty() && idx == 2) ||
1953 aco_ptr<Instruction>& instr, Operand& operand, unsigned operand_index)
1972 DefInfo info(ctx, instr, operand.regClass(), -1);
1973 get_regs_for_copies(ctx, tmp_file, parallelcopy, blocking_vars, info.bounds, instr,
1981 dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index);
1988 update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops | fill_killed_ops);
2269 for (aco_ptr<Instruction>& instr : current.instructions) {
2271 if (idx == loop_header_idx && is_phi(instr))
2274 for (Operand& op : instr->operands) {
2311 for (aco_ptr<Instruction>& instr : block.instructions) {
2312 if (!is_phi(instr))
2314 Operand& operand = instr->operands[0];
2331 for (aco_ptr<Instruction>& instr : block.instructions) {
2332 if (!is_phi(instr))
2335 instr->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds;
2337 for (unsigned i = 0; i < instr->operands.size(); i++) {
2338 Operand& operand = instr->operands[i];
2378 aco_ptr<Instruction>& instr = *rit;
2379 if (is_phi(instr))
2383 if (instr->opcode == aco_opcode::p_create_vector) {
2384 for (const Operand& op : instr->operands) {
2386 op.getTemp().type() == instr->definitions[0].getTemp().type())
2387 ctx.vectors[op.tempId()] = instr.get();
2389 } else if (instr->format == Format::MIMG && instr->operands.size() > 4) {
2390 for (unsigned i = 3; i < instr->operands.size(); i++)
2391 ctx.vectors[instr->operands[i].tempId()] = instr.get();
2392 } else if (instr->opcode == aco_opcode::p_split_vector &&
2393 instr->operands[0].isFirstKillBeforeDef()) {
2394 ctx.split_vectors[instr->operands[0].tempId()] = instr.get();
2395 } else if (instr->isVOPC() && !instr->isVOP3()) {
2396 if (!instr->isSDWA() || ctx.program->gfx_level == GFX8)
2397 ctx.assignments[instr->definitions[0].tempId()].vcc = true;
2398 } else if (instr->isVOP2() && !instr->isVOP3()) {
2399 if (instr->operands.size() == 3 && instr->operands[2].isTemp() &&
2400 instr->operands[2].regClass().type() == RegType::sgpr)
2401 ctx.assignments[instr->operands[2].tempId()].vcc = true;
2402 if (instr->definitions.size() == 2)
2403 ctx.assignments[instr->definitions[1].tempId()].vcc = true;
2404 } else if (instr->opcode == aco_opcode::s_and_b32 ||
2405 instr->opcode == aco_opcode::s_and_b64) {
2409 if (!instr->definitions[1].isKill() && instr->operands[0].isTemp() &&
2410 instr->operands[1].isFixed() && instr->operands[1].physReg() == exec)
2411 ctx.assignments[instr->operands[0].tempId()].vcc = true;
2415 for (const Operand& op : instr->operands) {
2421 for (unsigned i = 0; i < instr->definitions.size(); i++) {
2422 const Definition& def = instr->definitions[i];
2434 switch (instr->opcode) {
2435 case aco_opcode::p_parallelcopy: op = instr->operands[i]; break;
2439 case aco_opcode::v_writelane_b32_e64: op = instr->operands[2]; break;
2449 if (instr->usesModifiers())
2451 op = instr->operands[2];
2456 if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32)
2458 op = instr->operands[2];
2474 aco_ptr<Instruction>& instr = *rit;
2475 assert(is_phi(instr));
2477 live.erase(instr->definitions[0].tempId());
2478 if (instr->definitions[0].isKill() || instr->definitions[0].isFixed())
2481 assert(instr->definitions[0].isTemp());
2483 temp_to_phi_ressources.find(instr->definitions[0].tempId());
2488 phi_ressources[index][0] = instr->definitions[0].getTemp();
2491 phi_ressources.emplace_back(std::vector<Temp>{instr->definitions[0].getTemp()});
2495 for (const Operand& op : instr->operands) {
2496 if (op.isTemp() && op.isKill() && op.regClass() == instr->definitions[0].regClass()) {
2546 aco_ptr<Instruction>& instr)
2549 if ((instr->opcode != aco_opcode::v_mad_f32 &&
2550 (instr->opcode != aco_opcode::v_fma_f32 || program->gfx_level < GFX10) &&
2551 instr->opcode != aco_opcode::v_mad_f16 && instr->opcode != aco_opcode::v_mad_legacy_f16 &&
2552 (instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) &&
2553 (instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) &&
2554 (instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) &&
2555 (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) &&
2556 (instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) ||
2557 !instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() ||
2558 instr->operands[2].getTemp().type() != RegType::vgpr ||
2559 ((!instr->operands[0].isTemp() || instr->operands[0].getTemp().type() != RegType::vgpr) &&
2560 (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)) ||
2561 instr->usesModifiers() || instr->operands[0].physReg().byte() != 0 ||
2562 instr->operands[1].physReg().byte() != 0 || instr->operands[2].physReg().byte() != 0)
2565 if (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)
2566 std::swap(instr->operands[0], instr->operands[1]);
2568 unsigned def_id = instr->definitions[0].tempId();
2571 if (affinity.assigned && affinity.reg != instr->operands[2].physReg() &&
2572 !register_file.test(affinity.reg, instr->operands[2].bytes()))
2580 instr->format = Format::VOP2;
2581 switch (instr->opcode) {
2582 case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;
2583 case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break;
2585 case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
2586 case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
2587 case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
2588 case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
2589 case aco_opcode::v_mad_legacy_f32: instr->opcode = aco_opcode::v_mac_legacy_f32; break;
2590 case aco_opcode::v_fma_legacy_f32: instr->opcode = aco_opcode::v_fmac_legacy_f32; break;
2597 aco_ptr<Instruction>& instr)
2600 if (instr->opcode != aco_opcode::s_add_i32 && instr->opcode != aco_opcode::s_mul_i32 &&
2601 instr->opcode != aco_opcode::s_cselect_b32)
2606 if (instr->opcode != aco_opcode::s_cselect_b32 && instr->operands[1].isLiteral())
2609 if (!instr->operands[!literal_idx].isTemp() ||
2610 !instr->operands[!literal_idx].isKillBeforeDef() ||
2611 instr->operands[!literal_idx].getTemp().type() != RegType::sgpr ||
2612 instr->operands[!literal_idx].physReg() >= 128)
2615 if (!instr->operands[literal_idx].isLiteral())
2619 uint32_t value = instr->operands[literal_idx].constantValue();
2623 unsigned def_id = instr->definitions[0].tempId();
2626 if (affinity.assigned && affinity.reg != instr->operands[!literal_idx].physReg() &&
2627 !register_file.test(affinity.reg, instr->operands[!literal_idx].bytes()))
2633 instr->format = Format::SOPK;
2634 SOPK_instruction* instr_sopk = &instr->sopk();
2653 aco_ptr<Instruction>& instr)
2655 if (instr->isVALU())
2656 optimize_encoding_vop2(program, ctx, register_file, instr);
2657 if (instr->isSALU())
2658 optimize_encoding_sopk(program, ctx, register_file, instr);
2703 auto NonPhi = [](aco_ptr<Instruction>& instr) -> bool { return instr && !is_phi(instr); };
2707 aco_ptr<Instruction>& instr = *instr_it;
2711 if (instr->opcode == aco_opcode::p_logical_end) {
2714 instructions.emplace_back(std::move(instr));
2738 instructions.emplace_back(std::move(instr));
2743 if (instr->opcode == aco_opcode::p_branch) {
2745 instructions.emplace_back(std::move(instr));
2751 assert(!is_phi(instr));
2756 for (unsigned i = 0; i < instr->operands.size(); ++i) {
2757 auto& operand = instr->operands[i];
2766 if (operand_can_use_reg(program->gfx_level, instr, i, reg, operand.regClass()))
2769 get_reg_for_operand(ctx, register_file, parallelcopy, instr, operand, i);
2771 if (instr->isEXP() || (instr->isVMEM() && i == 3 && ctx.program->gfx_level == GFX6) ||
2772 (instr->isDS() && instr->ds().gds)) {
2779 for (const Operand& op : instr->operands) {
2784 optimize_encoding(program, ctx, register_file, instr);
2792 if (instr->opcode == aco_opcode::v_interp_p2_f32 ||
2793 instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 ||
2794 instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 ||
2795 instr->opcode == aco_opcode::v_mac_legacy_f32 ||
2796 instr->opcode == aco_opcode::v_fmac_legacy_f32 ||
2797 instr->opcode == aco_opcode::v_pk_fmac_f16 ||
2798 instr->opcode == aco_opcode::v_writelane_b32 ||
2799 instr->opcode == aco_opcode::v_writelane_b32_e64 ||
2800 instr->opcode == aco_opcode::v_dot4c_i32_i8) {
2801 assert(instr->definitions[0].bytes() == instr->operands[2].bytes() ||
2802 instr->operands[2].regClass() == v1);
2803 instr->definitions[0].setFixed(instr->operands[2].physReg());
2804 } else if (instr->opcode == aco_opcode::s_addk_i32 ||
2805 instr->opcode == aco_opcode::s_mulk_i32 ||
2806 instr->opcode == aco_opcode::s_cmovk_i32) {
2807 assert(instr->definitions[0].bytes() == instr->operands[0].bytes());
2808 instr->definitions[0].setFixed(instr->operands[0].physReg());
2809 } else if (instr->isMUBUF() && instr->definitions.size() == 1 &&
2810 instr->operands.size() == 4) {
2811 assert(instr->definitions[0].bytes() == instr->operands[3].bytes());
2812 instr->definitions[0].setFixed(instr->operands[3].physReg());
2813 } else if (instr->isMIMG() && instr->definitions.size() == 1 &&
2814 !instr->operands[2].isUndefined()) {
2815 assert(instr->definitions[0].bytes() == instr->operands[2].bytes());
2816 instr->definitions[0].setFixed(instr->operands[2].physReg());
2822 for (unsigned i = 0; i < instr->definitions.size(); ++i) {
2823 auto& definition = instr->definitions[i];
2837 for (const Operand& op : instr->operands) {
2843 DefInfo info(ctx, instr, definition.regClass(), -1);
2844 success = get_regs_for_copies(ctx, tmp_file, parallelcopy, vars, info.bounds, instr,
2848 update_renames(ctx, register_file, parallelcopy, instr, (UpdateRenames)0);
2860 for (unsigned i = 0; i < instr->definitions.size(); ++i) {
2861 Definition* definition = &instr->definitions[i];
2867 if (instr->opcode == aco_opcode::p_split_vector) {
2868 PhysReg reg = instr->operands[0].physReg();
2871 reg.reg_b += instr->definitions[j].bytes();
2872 if (get_reg_specified(ctx, register_file, rc, instr, reg)) {
2875 RegClass vec_rc = RegClass::get(rc.type(), instr->operands[0].bytes());
2879 if (res.second && get_reg_specified(ctx, register_file, rc, instr, reg))
2881 } else if (instr->definitions[i - 1].isFixed()) {
2882 reg = instr->definitions[i - 1].physReg();
2883 reg.reg_b += instr->definitions[i - 1].bytes();
2884 if (get_reg_specified(ctx, register_file, rc, instr, reg))
2887 } else if (instr->opcode == aco_opcode::p_wqm ||
2888 instr->opcode == aco_opcode::p_parallelcopy) {
2889 PhysReg reg = instr->operands[i].physReg();
2890 if (instr->operands[i].isTemp() &&
2891 instr->operands[i].getTemp().type() == definition->getTemp().type() &&
2894 } else if (instr->opcode == aco_opcode::p_extract_vector) {
2895 PhysReg reg = instr->operands[0].physReg();
2896 reg.reg_b += definition->bytes() * instr->operands[1].constantValue();
2897 if (get_reg_specified(ctx, register_file, definition->regClass(), instr, reg))
2899 } else if (instr->opcode == aco_opcode::p_create_vector) {
2901 parallelcopy, instr);
2902 update_renames(ctx, register_file, parallelcopy, instr, (UpdateRenames)0);
2909 PhysReg reg = get_reg(ctx, register_file, tmp, parallelcopy, instr);
2912 add_subdword_definition(program, instr, reg);
2913 definition = &instr->definitions[i]; /* add_subdword_definition can invalidate
2917 definition->setFixed(get_reg(ctx, register_file, tmp, parallelcopy, instr));
2919 update_renames(ctx, register_file, parallelcopy, instr,
2920 instr->opcode != aco_opcode::p_create_vector ? rename_not_killed_ops
2933 handle_pseudo(ctx, register_file, instr.get());
2937 for (const Definition& def : instr->definitions) {
2941 for (unsigned i = 0; i < instr->operands.size(); i++) {
2942 const Operand& op = instr->operands[i];
2946 add_subdword_operand(ctx, instr, i, op.physReg().byte(), op.regClass());
2991 for (const Definition& def : instr->definitions) {
2995 for (const Operand& op : instr->operands) {
3010 !instr->isVOP3() &&
3011 ((instr->format == Format::VOPC && !(instr->definitions[0].physReg() == vcc)) ||
3012 (instr->opcode == aco_opcode::v_cndmask_b32 &&
3013 !(instr->operands[2].physReg() == vcc)) ||
3014 ((instr->opcode == aco_opcode::v_add_co_u32 ||
3015 instr->opcode == aco_opcode::v_addc_co_u32 ||
3016 instr->opcode == aco_opcode::v_sub_co_u32 ||
3017 instr->opcode == aco_opcode::v_subb_co_u32 ||
3018 instr->opcode == aco_opcode::v_subrev_co_u32 ||
3019 instr->opcode == aco_opcode::v_subbrev_co_u32) &&
3020 !(instr->definitions[1].physReg() == vcc)) ||
3021 ((instr->opcode == aco_opcode::v_addc_co_u32 ||
3022 instr->opcode == aco_opcode::v_subb_co_u32 ||
3023 instr->opcode == aco_opcode::v_subbrev_co_u32) &&
3024 !(instr->operands[2].physReg() == vcc)));
3028 if (instr->operands.size() && instr->operands[0].isLiteral() &&
3032 for (const Operand& op : instr->operands) {
3040 for (const Definition& def : instr->definitions)
3042 for (const Operand& op : instr->operands) {
3048 PhysReg reg = get_reg(ctx, tmp_file, tmp, parallelcopy, instr);
3049 update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops);
3058 mov->operands[0] = instr->operands[0];
3062 instr->operands[0] = Operand(tmp);
3063 instr->operands[0].setFixed(reg);
3064 instr->operands[0].setFirstKill(true);
3070 aco_ptr<Instruction> tmp = std::move(instr);
3072 instr.reset(create_instruction<VOP3_instruction>(
3074 std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin());
3075 std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin());