Lines Matching defs:program
45 get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr, RegClass rc);
46 void add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg);
71 Program* program;
91 : program(program_), assignments(program->peekAllocationId()),
92 renames(program->blocks.size()), policy(policy_)
98 sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
99 vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
193 get_reg_bounds(Program* program, RegType type)
196 return {PhysReg{256}, (unsigned)program->max_reg_demand.vgpr};
198 return {PhysReg{0}, (unsigned)program->max_reg_demand.sgpr};
213 bounds = get_reg_bounds(ctx.program, rc.type());
217 stride = get_subdword_operand_stride(ctx.program->gfx_level, instr, operand, rc);
219 std::pair<unsigned, unsigned> info = get_subdword_definition_info(ctx.program, instr, rc);
232 } else if (instr->isMIMG() && instr->mimg().d16 && ctx.program->gfx_level <= GFX9) {
242 assert(ctx.program->gfx_level == GFX9 && "Image D16 on GFX8 not supported.");
420 PhysRegInterval regs = get_reg_bounds(ctx.program, vgprs ? RegType::vgpr : RegType::sgpr);
537 amd_gfx_level gfx_level = ctx.program->gfx_level;
601 get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr, RegClass rc)
603 amd_gfx_level gfx_level = program->gfx_level;
649 if (!program->dev.sram_ecc_enabled)
658 if (!program->dev.sram_ecc_enabled)
666 if (instr->isMIMG() && instr->mimg().d16 && !program->dev.sram_ecc_enabled) {
675 add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg)
681 amd_gfx_level gfx_level = program->gfx_level;
829 copy.second.setTemp(ctx.program->allocateTmp(copy.second.regClass()));
831 assert(ctx.assignments.size() == ctx.program->peekAllocationId());
1056 if (ctx.program->gfx_level <= GFX8)
1393 sdw_def_info = get_subdword_definition_info(ctx.program, instr, rc);
1404 PhysRegInterval bounds = get_reg_bounds(ctx.program, rc.type());
1407 bool is_vcc = rc.type() == RegType::sgpr && vcc_win.contains(reg_win) && ctx.program->needs_vcc;
1429 if (type == RegType::vgpr && ctx.program->max_reg_demand.vgpr < ctx.vgpr_limit) {
1430 update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr + 1,
1431 ctx.program->max_reg_demand.sgpr));
1432 } else if (type == RegType::sgpr && ctx.program->max_reg_demand.sgpr < ctx.sgpr_limit) {
1433 update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr,
1434 ctx.program->max_reg_demand.sgpr + 1));
1530 PhysRegInterval bounds = get_reg_bounds(ctx.program, RegType::vgpr);
1682 const PhysRegInterval regs = get_reg_bounds(ctx.program, info.rc.type());
1723 PhysRegInterval bounds = get_reg_bounds(ctx.program, rc.type());
1889 (ctx.program->gfx_level <= GFX7 && reads_subdword);
1900 for (; reg < ctx.program->max_reg_demand.sgpr && reg_file[PhysReg{(unsigned)reg}]; reg++)
1902 if (reg == ctx.program->max_reg_demand.sgpr) {
2183 new_val = ctx.program->allocateTmp(val.regClass());
2186 assert(ctx.assignments.size() == ctx.program->peekAllocationId());
2204 Block& loop_header = ctx.program->blocks[loop_header_idx];
2209 Temp val = Temp(t, ctx.program->temp_rc[t]);
2267 Block& current = ctx.program->blocks[idx];
2321 Temp val = Temp(t, ctx.program->temp_rc[t]);
2346 Temp val = Temp(t, ctx.program->temp_rc[t]);
2369 for (auto block_rit = ctx.program->blocks.rbegin(); block_rit != ctx.program->blocks.rend();
2396 if (!instr->isSDWA() || ctx.program->gfx_level == GFX8)
2444 if (ctx.program->gfx_level < GFX10)
2456 if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32)
2545 optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_file,
2550 (instr->opcode != aco_opcode::v_fma_f32 || program->gfx_level < GFX10) &&
2552 (instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) &&
2553 (instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) &&
2554 (instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) &&
2555 (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) &&
2556 (instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) ||
2596 optimize_encoding_sopk(Program* program, ra_ctx& ctx, RegisterFile& register_file,
2652 optimize_encoding(Program* program, ra_ctx& ctx, RegisterFile& register_file,
2656 optimize_encoding_vop2(program, ctx, register_file, instr);
2658 optimize_encoding_sopk(program, ctx, register_file, instr);
2664 register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra_test_policy policy)
2666 ra_ctx ctx(program, policy);
2669 for (Block& block : program->blocks) {
2688 program->blocks[block.linear_preds[0]].linear_succs.size() == 1)) {
2692 program->blocks[pred].scc_live_out = register_file[scc];
2693 aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
2718 Block& succ = program->blocks[block.logical_succs[0]];
2766 if (operand_can_use_reg(program->gfx_level, instr, i, reg, operand.regClass()))
2771 if (instr->isEXP() || (instr->isVMEM() && i == 3 && ctx.program->gfx_level == GFX6) ||
2784 optimize_encoding(program, ctx, register_file, instr);
2912 add_subdword_definition(program, instr, reg);
3029 program->gfx_level < GFX10) {
3046 Temp tmp = program->allocateTmp(can_sgpr ? s1 : v1);
3086 program->config->num_vgprs = get_vgpr_alloc(program, ctx.max_used_vgpr + 1);
3087 program->config->num_sgprs = get_sgpr_alloc(program, ctx.max_used_sgpr + 1);
3089 program->progress = CompilationProgress::after_ra;