Lines Matching refs:ctx
42 void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte,
208 DefInfo(ra_ctx& ctx, aco_ptr<Instruction>& instr, RegClass rc_, int operand) : rc(rc_)
213 bounds = get_reg_bounds(ctx.program, rc.type());
217 stride = get_subdword_operand_stride(ctx.program->gfx_level, instr, operand, rc);
219 std::pair<unsigned, unsigned> info = get_subdword_definition_info(ctx.program, instr, rc);
232 } else if (instr->isMIMG() && instr->mimg().d16 && ctx.program->gfx_level <= GFX9) {
242 assert(ctx.program->gfx_level == GFX9 && "Image D16 on GFX8 not supported.");
375 std::vector<unsigned> find_vars(ra_ctx& ctx, RegisterFile& reg_file,
418 print_regs(ra_ctx& ctx, bool vgprs, RegisterFile& reg_file)
420 PhysRegInterval regs = get_reg_bounds(ctx.program, vgprs ? RegType::vgpr : RegType::sgpr);
462 for (unsigned id : find_vars(ctx, reg_file, regs)) {
463 const assignment& var = ctx.assignments[id];
474 if (ctx.orig_names.count(size_id.second) &&
475 ctx.orig_names[size_id.second].id() != size_id.second) {
476 printf("(was %%%d) ", ctx.orig_names[size_id.second].id());
534 add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte,
537 amd_gfx_level gfx_level = ctx.program->gfx_level;
745 adjust_max_used_regs(ra_ctx& ctx, RegClass rc, unsigned reg)
747 uint16_t max_addressible_sgpr = ctx.sgpr_limit;
752 ctx.max_used_vgpr = std::max(ctx.max_used_vgpr, hi);
755 ctx.max_used_sgpr = std::max(ctx.max_used_sgpr, std::min(hi, max_addressible_sgpr));
766 update_renames(ra_ctx& ctx, RegisterFile& reg_file,
793 ctx.assignments[def.tempId()].reg = def.physReg();
808 ctx.assignments[other.second.tempId()].reg = other.second.physReg();
829 copy.second.setTemp(ctx.program->allocateTmp(copy.second.regClass()));
830 ctx.assignments.emplace_back(copy.second.physReg(), copy.second.regClass());
831 assert(ctx.assignments.size() == ctx.program->peekAllocationId());
871 get_reg_simple(ra_ctx& ctx, RegisterFile& reg_file, DefInfo info)
884 std::pair<PhysReg, bool> res = get_reg_simple(ctx, reg_file, new_info);
890 { return reg_file[reg_index] == 0 && !ctx.war_hint[reg_index]; };
896 (rc.type() == RegType::vgpr) ? (256 + ctx.max_used_vgpr) : ctx.max_used_sgpr;
918 adjust_max_used_regs(ctx, rc, gap.lo());
943 adjust_max_used_regs(ctx, rc, best_gap.lo());
955 adjust_max_used_regs(ctx, rc, reg_win.lo());
982 adjust_max_used_regs(ctx, rc, entry.first);
994 find_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval reg_interval)
1020 collect_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval reg_interval)
1022 std::vector<unsigned> ids = find_vars(ctx, reg_file, reg_interval);
1026 assignment& var_a = ctx.assignments[a];
1027 assignment& var_b = ctx.assignments[b];
1033 assignment& var = ctx.assignments[id];
1040 get_reg_for_create_vector_copy(ra_ctx& ctx, RegisterFile& reg_file,
1056 if (ctx.program->gfx_level <= GFX8)
1060 assignment& var = ctx.assignments[id];
1073 assignment& op = ctx.assignments[instr->operands[i].tempId()];
1089 get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file,
1096 assignment& var = ctx.assignments[id];
1097 DefInfo info = DefInfo(ctx, ctx.pseudo_dummy, var.rc, -1);
1106 get_reg_for_create_vector_copy(ctx, reg_file, parallelcopies, instr, def_reg, info, id);
1110 info = DefInfo(ctx, instr, var.rc, i);
1113 res = get_reg_simple(ctx, reg_file, info);
1123 res = get_reg_simple(ctx, reg_file, info);
1127 res = get_reg_simple(ctx, reg_file, info);
1175 if (!(ctx.block->kind & block_kind_top_level) &&
1176 ctx.assignments[reg_file[j]].rc.is_linear_vgpr()) {
1187 if (!is_kill && ctx.assignments[reg_file[j]].rc.size() >= size) {
1192 k += ctx.assignments[reg_file[j]].rc.size();
1215 std::vector<unsigned> new_vars = collect_vars(ctx, reg_file, reg_win);
1219 adjust_max_used_regs(ctx, var.rc, reg_win.lo());
1221 if (!get_regs_for_copies(ctx, reg_file, parallelcopies, new_vars, bounds, instr, def_reg))
1236 get_reg_impl(ra_ctx& ctx, RegisterFile& reg_file,
1317 if (ctx.assignments[reg_file[j]].rc.size() >= size) {
1324 if (!(ctx.block->kind & block_kind_top_level) &&
1325 ctx.assignments[reg_file[j]].rc.is_linear_vgpr()) {
1330 k += ctx.assignments[reg_file[j]].rc.size();
1363 std::vector<unsigned> vars = collect_vars(ctx, tmp_file, best_win);
1374 if (!get_regs_for_copies(ctx, tmp_file, pc, vars, bounds, instr, best_win))
1379 adjust_max_used_regs(ctx, rc, best_win.lo());
1384 get_reg_specified(ra_ctx& ctx, RegisterFile& reg_file, RegClass rc, aco_ptr<Instruction>& instr,
1393 sdw_def_info = get_subdword_definition_info(ctx.program, instr, rc);
1404 PhysRegInterval bounds = get_reg_bounds(ctx.program, rc.type());
1407 bool is_vcc = rc.type() == RegType::sgpr && vcc_win.contains(reg_win) && ctx.program->needs_vcc;
1422 adjust_max_used_regs(ctx, rc, reg_win.lo());
1427 increase_register_file(ra_ctx& ctx, RegType type)
1429 if (type == RegType::vgpr && ctx.program->max_reg_demand.vgpr < ctx.vgpr_limit) {
1430 update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr + 1,
1431 ctx.program->max_reg_demand.sgpr));
1432 } else if (type == RegType::sgpr && ctx.program->max_reg_demand.sgpr < ctx.sgpr_limit) {
1433 update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr,
1434 ctx.program->max_reg_demand.sgpr + 1));
1460 compact_relocate_vars(ra_ctx& ctx, const std::vector<IDAndRegClass>& vars,
1468 DefInfo info(ctx, ctx.pseudo_dummy, var.rc, -1);
1474 [&ctx](const IDAndInfo& a, const IDAndInfo& b)
1485 return ctx.assignments[a.id].reg < ctx.assignments[b.id].reg;
1498 if (next_reg != ctx.assignments[var.id].reg) {
1499 RegClass rc = ctx.assignments[var.id].rc;
1503 pc_op.setFixed(ctx.assignments[var.id].reg);
1511 adjust_max_used_regs(ctx, var.info.rc, next_reg);
1520 is_mimg_vaddr_intact(ra_ctx& ctx, RegisterFile& reg_file, Instruction* instr)
1526 if (ctx.assignments[op.tempId()].assigned) {
1527 PhysReg reg = ctx.assignments[op.tempId()].reg;
1530 PhysRegInterval bounds = get_reg_bounds(ctx.program, RegType::vgpr);
1552 get_reg_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp, aco_ptr<Instruction>& instr)
1554 Instruction* vec = ctx.vectors[temp.id()];
1565 if (vec->format != Format::MIMG || is_mimg_vaddr_intact(ctx, reg_file, vec)) {
1574 ctx.assignments[op.tempId()].assigned) {
1575 PhysReg reg = ctx.assignments[op.tempId()].reg;
1577 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, reg))
1591 DefInfo info(ctx, ctx.pseudo_dummy, vec_rc, -1);
1592 std::pair<PhysReg, bool> res = get_reg_simple(ctx, reg_file, info);
1597 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, reg))
1605 get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
1609 auto split_vec = ctx.split_vectors.find(temp.id());
1610 if (split_vec != ctx.split_vectors.end()) {
1613 if (ctx.assignments[def.tempId()].affinity) {
1614 assignment& affinity = ctx.assignments[ctx.assignments[def.tempId()].affinity];
1618 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, reg))
1626 if (ctx.assignments[temp.id()].affinity) {
1627 assignment& affinity = ctx.assignments[ctx.assignments[temp.id()].affinity];
1629 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, affinity.reg))
1633 if (ctx.assignments[temp.id()].vcc) {
1634 if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, vcc))
1640 if (ctx.vectors.find(temp.id()) != ctx.vectors.end()) {
1641 res = get_reg_vector(ctx, reg_file, temp, instr);
1646 DefInfo info(ctx, instr, temp.regClass(), operand_index);
1648 if (!ctx.policy.skip_optimistic_path) {
1650 res = get_reg_simple(ctx, reg_file, info);
1657 res = get_reg_impl(ctx, reg_file, parallelcopies, info, instr);
1668 if (!increase_register_file(ctx, info.rc.type())) {
1672 if (ctx.assignments[def.tempId()].assigned && def.regClass().type() == info.rc.type())
1682 const PhysRegInterval regs = get_reg_bounds(ctx.program, info.rc.type());
1686 for (unsigned id : find_vars(ctx, reg_file, regs))
1687 vars.emplace_back(id, ctx.assignments[id].rc);
1690 PhysReg space = compact_relocate_vars(ctx, vars, parallelcopies, regs.lo());
1698 compact_relocate_vars(ctx, killed_op_vars, parallelcopies, space);
1703 if (ctx.assignments[def.tempId()].assigned && def.regClass().type() == info.rc.type())
1707 return compact_relocate_vars(ctx, def_vars, parallelcopies, space);
1710 return get_reg(ctx, reg_file, temp, parallelcopies, instr, operand_index);
1714 get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
1723 PhysRegInterval bounds = get_reg_bounds(ctx.program, rc.type());
1777 linear_vgpr |= ctx.assignments[reg_file[j]].rc.is_linear_vgpr();
1780 avoid |= ctx.war_hint[j];
1785 if (ctx.block->kind & block_kind_top_level)
1816 return get_reg(ctx, reg_file, temp, parallelcopies, instr);
1818 DefInfo info(ctx, instr, rc, -1);
1819 std::pair<PhysReg, bool> res = get_reg_simple(ctx, reg_file, info);
1836 std::vector<unsigned> vars = collect_vars(ctx, tmp_file, PhysRegInterval{best_pos, size});
1841 get_regs_for_copies(ctx, tmp_file, pc, vars, bounds, instr, PhysRegInterval{best_pos, size});
1844 if (!increase_register_file(ctx, temp.type())) {
1846 return get_reg(ctx, reg_file, temp, parallelcopies, instr);
1848 return get_reg_create_vector(ctx, reg_file, temp, parallelcopies, instr);
1852 adjust_max_used_regs(ctx, rc, best_pos);
1858 handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
1889 (ctx.program->gfx_level <= GFX7 && reads_subdword);
1895 int reg = ctx.max_used_sgpr;
1899 reg = ctx.max_used_sgpr + 1;
1900 for (; reg < ctx.program->max_reg_demand.sgpr && reg_file[PhysReg{(unsigned)reg}]; reg++)
1902 if (reg == ctx.program->max_reg_demand.sgpr) {
1908 adjust_max_used_regs(ctx, s1, reg);
1951 get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
1956 PhysReg src = ctx.assignments[operand.tempId()].reg;
1967 std::vector<unsigned> blocking_vars = collect_vars(ctx, tmp_file, target);
1972 DefInfo info(ctx, instr, operand.regClass(), -1);
1973 get_regs_for_copies(ctx, tmp_file, parallelcopy, blocking_vars, info.bounds, instr,
1981 dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index);
1988 update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops | fill_killed_ops);
1992 get_reg_phi(ra_ctx& ctx, IDSet& live_in, RegisterFile& register_file,
1997 PhysReg reg = get_reg(ctx, register_file, tmp, parallelcopy, phi);
1998 update_renames(ctx, register_file, parallelcopy, phi, rename_not_killed_ops);
2016 ctx.assignments[prev_phi->definitions[0].tempId()] = {pc.second.physReg(),
2022 std::unordered_map<unsigned, Temp>::iterator orig_it = ctx.orig_names.find(pc.first.tempId());
2024 if (orig_it != ctx.orig_names.end())
2027 ctx.orig_names[pc.second.tempId()] = orig;
2028 ctx.renames[block.index][orig.id()] = pc.second.getTemp();
2053 get_regs_for_phis(ra_ctx& ctx, Block& block, RegisterFile& register_file,
2080 if (!get_reg_specified(ctx, register_file, definition.regClass(), phi, reg))
2085 ctx.assignments[definition.tempId()].set(definition);
2095 if (ctx.assignments[definition.tempId()].affinity &&
2096 ctx.assignments[ctx.assignments[definition.tempId()].affinity].assigned) {
2097 assignment& affinity = ctx.assignments[ctx.assignments[definition.tempId()].affinity];
2099 if (get_reg_specified(ctx, register_file, definition.regClass(), phi, affinity.reg)) {
2102 ctx.assignments[definition.tempId()].set(definition);
2114 if (get_reg_specified(ctx, register_file, definition.regClass(), phi, reg)) {
2117 ctx.assignments[definition.tempId()].set(definition);
2133 get_reg_phi(ctx, live_in, register_file, instructions, block, phi, definition.getTemp()));
2136 ctx.assignments[definition.tempId()].set(definition);
2141 read_variable(ra_ctx& ctx, Temp val, unsigned block_idx)
2143 std::unordered_map<unsigned, Temp>::iterator it = ctx.renames[block_idx].find(val.id());
2144 if (it == ctx.renames[block_idx].end())
2151 handle_live_in(ra_ctx& ctx, Temp val, Block* block)
2159 return read_variable(ctx, val, preds[0]);
2169 ops[i] = read_variable(ctx, val, preds[i]);
2183 new_val = ctx.program->allocateTmp(val.regClass());
2185 ctx.assignments.emplace_back();
2186 assert(ctx.assignments.size() == ctx.program->peekAllocationId());
2190 assert(ctx.assignments[ops[i].id()].assigned);
2192 phi->operands[i].setFixed(ctx.assignments[ops[i].id()].reg);
2201 handle_loop_phis(ra_ctx& ctx, const IDSet& live_in, uint32_t loop_header_idx,
2204 Block& loop_header = ctx.program->blocks[loop_header_idx];
2209 Temp val = Temp(t, ctx.program->temp_rc[t]);
2210 Temp prev = read_variable(ctx, val, loop_header_idx - 1);
2211 Temp renamed = handle_live_in(ctx, val, &loop_header);
2217 ctx.orig_names[renamed.id()] = val;
2219 auto it = ctx.renames[idx].emplace(val.id(), renamed);
2233 assignment& var = ctx.assignments[prev.id()];
2234 ctx.assignments[renamed.id()] = var;
2253 std::unordered_map<unsigned, Temp>::iterator it = ctx.orig_names.find(op.tempId());
2254 Temp orig = it != ctx.orig_names.end() ? it->second : op.getTemp();
2256 op.setTemp(read_variable(ctx, orig, preds[j]));
2257 op.setFixed(ctx.assignments[op.tempId()].reg);
2267 Block& current = ctx.program->blocks[idx];
2296 init_reg_file(ra_ctx& ctx, const std::vector<IDSet>& live_out_per_block, Block& block)
2299 uint32_t header = ctx.loop_header.back();
2300 ctx.loop_header.pop_back();
2301 handle_loop_phis(ctx, live_out_per_block[header], header, block.index);
2309 ctx.loop_header.emplace_back(block.index);
2316 operand.setTemp(read_variable(ctx, operand.getTemp(), block.index - 1));
2317 operand.setFixed(ctx.assignments[operand.tempId()].reg);
2321 Temp val = Temp(t, ctx.program->temp_rc[t]);
2322 Temp renamed = read_variable(ctx, val, block.index - 1);
2324 ctx.renames[block.index][val.id()] = renamed;
2325 assignment& var = ctx.assignments[renamed.id()];
2341 operand.setTemp(read_variable(ctx, operand.getTemp(), preds[i]));
2342 operand.setFixed(ctx.assignments[operand.tempId()].reg);
2346 Temp val = Temp(t, ctx.program->temp_rc[t]);
2347 Temp renamed = handle_live_in(ctx, val, &block);
2348 assignment& var = ctx.assignments[renamed.id()];
2354 ctx.renames[block.index].emplace(t, renamed);
2355 ctx.orig_names[renamed.id()] = val;
2364 get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
2369 for (auto block_rit = ctx.program->blocks.rbegin(); block_rit != ctx.program->blocks.rend();
2387 ctx.vectors[op.tempId()] = instr.get();
2391 ctx.vectors[instr->operands[i].tempId()] = instr.get();
2394 ctx.split_vectors[instr->operands[0].tempId()] = instr.get();
2396 if (!instr->isSDWA() || ctx.program->gfx_level == GFX8)
2397 ctx.assignments[instr->definitions[0].tempId()].vcc = true;
2401 ctx.assignments[instr->operands[2].tempId()].vcc = true;
2403 ctx.assignments[instr->definitions[1].tempId()].vcc = true;
2411 ctx.assignments[instr->operands[0].tempId()].vcc = true;
2444 if (ctx.program->gfx_level < GFX10)
2456 if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32)
2540 ctx.assignments[vec[i].id()].affinity = vec[0].id();
2545 optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_file,
2569 if (ctx.assignments[def_id].affinity) {
2570 assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
2596 optimize_encoding_sopk(Program* program, ra_ctx& ctx, RegisterFile& register_file,
2624 if (ctx.assignments[def_id].affinity) {
2625 assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
2652 optimize_encoding(Program* program, ra_ctx& ctx, RegisterFile& register_file,
2656 optimize_encoding_vop2(program, ctx, register_file, instr);
2658 optimize_encoding_sopk(program, ctx, register_file, instr);
2666 ra_ctx ctx(program, policy);
2667 get_affinities(ctx, live_out_per_block);
2670 ctx.block = █
2673 RegisterFile register_file = init_reg_file(ctx, live_out_per_block, block);
2674 ctx.war_hint.reset();
2680 get_regs_for_phis(ctx, block, register_file, instructions, live_out_per_block[block.index]);
2689 PhysReg br_reg = get_reg_phi(ctx, live_out_per_block[block.index], register_file,
2690 instructions, block, ctx.phi_dummy, Temp(0, s2));
2730 read_variable(ctx, phi->operands[idx].getTemp(), block.index));
2731 phi_op.setFixed(ctx.assignments[phi_op.tempId()].reg);
2762 operand.setTemp(read_variable(ctx, operand.getTemp(), block.index));
2763 assert(ctx.assignments[operand.tempId()].assigned);
2765 PhysReg reg = ctx.assignments[operand.tempId()].reg;
2769 get_reg_for_operand(ctx, register_file, parallelcopy, instr, operand, i);
2771 if (instr->isEXP() || (instr->isVMEM() && i == 3 && ctx.program->gfx_level == GFX6) ||
2774 ctx.war_hint.set(operand.physReg().reg() + j);
2784 optimize_encoding(program, ctx, register_file, instr);
2819 ctx.defs_done.reset();
2827 adjust_max_used_regs(ctx, definition.regClass(), definition.physReg());
2833 std::vector<unsigned> vars = collect_vars(ctx, register_file, def_regs);
2843 DefInfo info(ctx, instr, definition.regClass(), -1);
2844 success = get_regs_for_copies(ctx, tmp_file, parallelcopy, vars, info.bounds, instr,
2848 update_renames(ctx, register_file, parallelcopy, instr, (UpdateRenames)0);
2850 ctx.defs_done.set(i);
2855 ctx.assignments[definition.tempId()].set(definition);
2872 if (get_reg_specified(ctx, register_file, rc, instr, reg)) {
2876 DefInfo info(ctx, ctx.pseudo_dummy, vec_rc, -1);
2877 std::pair<PhysReg, bool> res = get_reg_simple(ctx, register_file, info);
2879 if (res.second && get_reg_specified(ctx, register_file, rc, instr, reg))
2884 if (get_reg_specified(ctx, register_file, rc, instr, reg))
2897 if (get_reg_specified(ctx, register_file, definition->regClass(), instr, reg))
2900 PhysReg reg = get_reg_create_vector(ctx, register_file, definition->getTemp(),
2902 update_renames(ctx, register_file, parallelcopy, instr, (UpdateRenames)0);
2909 PhysReg reg = get_reg(ctx, register_file, tmp, parallelcopy, instr);
2917 definition->setFixed(get_reg(ctx, register_file, tmp, parallelcopy, instr));
2919 update_renames(ctx, register_file, parallelcopy, instr,
2928 ctx.defs_done.set(i);
2929 ctx.assignments[definition->tempId()].set(*definition);
2933 handle_pseudo(ctx, register_file, instr.get());
2946 add_subdword_operand(ctx, instr, i, op.physReg().byte(), op.regClass());
2982 ctx.orig_names.find(pc->operands[i].tempId());
2983 Temp orig = it != ctx.orig_names.end() ? it->second : pc->operands[i].getTemp();
2984 ctx.orig_names[pc->definitions[i].tempId()] = orig;
2985 ctx.renames[block.index][orig.id()] = pc->definitions[i].getTemp();
3000 handle_pseudo(ctx, tmp_file, pc.get());
3047 ctx.assignments.emplace_back();
3048 PhysReg reg = get_reg(ctx, tmp_file, tmp, parallelcopy, instr);
3049 update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops);
3086 program->config->num_vgprs = get_vgpr_alloc(program, ctx.max_used_vgpr + 1);
3087 program->config->num_sgprs = get_sgpr_alloc(program, ctx.max_used_sgpr + 1);