1fd4e5da5Sopenharmony_ci// Copyright (c) 2018 Google LLC. 2fd4e5da5Sopenharmony_ci// 3fd4e5da5Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); 4fd4e5da5Sopenharmony_ci// you may not use this file except in compliance with the License. 5fd4e5da5Sopenharmony_ci// You may obtain a copy of the License at 6fd4e5da5Sopenharmony_ci// 7fd4e5da5Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 8fd4e5da5Sopenharmony_ci// 9fd4e5da5Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 10fd4e5da5Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, 11fd4e5da5Sopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12fd4e5da5Sopenharmony_ci// See the License for the specific language governing permissions and 13fd4e5da5Sopenharmony_ci// limitations under the License. 14fd4e5da5Sopenharmony_ci 15fd4e5da5Sopenharmony_ci#include "source/opt/register_pressure.h" 16fd4e5da5Sopenharmony_ci 17fd4e5da5Sopenharmony_ci#include <algorithm> 18fd4e5da5Sopenharmony_ci#include <iterator> 19fd4e5da5Sopenharmony_ci 20fd4e5da5Sopenharmony_ci#include "source/opt/cfg.h" 21fd4e5da5Sopenharmony_ci#include "source/opt/def_use_manager.h" 22fd4e5da5Sopenharmony_ci#include "source/opt/dominator_tree.h" 23fd4e5da5Sopenharmony_ci#include "source/opt/function.h" 24fd4e5da5Sopenharmony_ci#include "source/opt/ir_context.h" 25fd4e5da5Sopenharmony_ci#include "source/opt/iterator.h" 26fd4e5da5Sopenharmony_ci 27fd4e5da5Sopenharmony_cinamespace spvtools { 28fd4e5da5Sopenharmony_cinamespace opt { 29fd4e5da5Sopenharmony_cinamespace { 30fd4e5da5Sopenharmony_ci// Predicate for the FilterIterator to only consider instructions that are not 31fd4e5da5Sopenharmony_ci// phi instructions defined in the basic block |bb|. 32fd4e5da5Sopenharmony_ciclass ExcludePhiDefinedInBlock { 33fd4e5da5Sopenharmony_ci public: 34fd4e5da5Sopenharmony_ci ExcludePhiDefinedInBlock(IRContext* context, const BasicBlock* bb) 35fd4e5da5Sopenharmony_ci : context_(context), bb_(bb) {} 36fd4e5da5Sopenharmony_ci 37fd4e5da5Sopenharmony_ci bool operator()(Instruction* insn) const { 38fd4e5da5Sopenharmony_ci return !(insn->opcode() == spv::Op::OpPhi && 39fd4e5da5Sopenharmony_ci context_->get_instr_block(insn) == bb_); 40fd4e5da5Sopenharmony_ci } 41fd4e5da5Sopenharmony_ci 42fd4e5da5Sopenharmony_ci private: 43fd4e5da5Sopenharmony_ci IRContext* context_; 44fd4e5da5Sopenharmony_ci const BasicBlock* bb_; 45fd4e5da5Sopenharmony_ci}; 46fd4e5da5Sopenharmony_ci 47fd4e5da5Sopenharmony_ci// Returns true if |insn| generates a SSA register that is likely to require a 48fd4e5da5Sopenharmony_ci// physical register. 49fd4e5da5Sopenharmony_cibool CreatesRegisterUsage(Instruction* insn) { 50fd4e5da5Sopenharmony_ci if (!insn->HasResultId()) return false; 51fd4e5da5Sopenharmony_ci if (insn->opcode() == spv::Op::OpUndef) return false; 52fd4e5da5Sopenharmony_ci if (IsConstantInst(insn->opcode())) return false; 53fd4e5da5Sopenharmony_ci if (insn->opcode() == spv::Op::OpLabel) return false; 54fd4e5da5Sopenharmony_ci return true; 55fd4e5da5Sopenharmony_ci} 56fd4e5da5Sopenharmony_ci 57fd4e5da5Sopenharmony_ci// Compute the register liveness for each basic block of a function. This also 58fd4e5da5Sopenharmony_ci// fill-up some information about the pick register usage and a break down of 59fd4e5da5Sopenharmony_ci// register usage. This implements: "A non-iterative data-flow algorithm for 60fd4e5da5Sopenharmony_ci// computing liveness sets in strict ssa programs" from Boissinot et al. 61fd4e5da5Sopenharmony_ciclass ComputeRegisterLiveness { 62fd4e5da5Sopenharmony_ci public: 63fd4e5da5Sopenharmony_ci ComputeRegisterLiveness(RegisterLiveness* reg_pressure, Function* f) 64fd4e5da5Sopenharmony_ci : reg_pressure_(reg_pressure), 65fd4e5da5Sopenharmony_ci context_(reg_pressure->GetContext()), 66fd4e5da5Sopenharmony_ci function_(f), 67fd4e5da5Sopenharmony_ci cfg_(*reg_pressure->GetContext()->cfg()), 68fd4e5da5Sopenharmony_ci def_use_manager_(*reg_pressure->GetContext()->get_def_use_mgr()), 69fd4e5da5Sopenharmony_ci dom_tree_( 70fd4e5da5Sopenharmony_ci reg_pressure->GetContext()->GetDominatorAnalysis(f)->GetDomTree()), 71fd4e5da5Sopenharmony_ci loop_desc_(*reg_pressure->GetContext()->GetLoopDescriptor(f)) {} 72fd4e5da5Sopenharmony_ci 73fd4e5da5Sopenharmony_ci // Computes the register liveness for |function_| and then estimate the 74fd4e5da5Sopenharmony_ci // register usage. The liveness algorithm works in 2 steps: 75fd4e5da5Sopenharmony_ci // - First, compute the liveness for each basic blocks, but will ignore any 76fd4e5da5Sopenharmony_ci // back-edge; 77fd4e5da5Sopenharmony_ci // - Second, walk loop forest to propagate registers crossing back-edges 78fd4e5da5Sopenharmony_ci // (add iterative values into the liveness set). 79fd4e5da5Sopenharmony_ci void Compute() { 80fd4e5da5Sopenharmony_ci for (BasicBlock& start_bb : *function_) { 81fd4e5da5Sopenharmony_ci if (reg_pressure_->Get(start_bb.id()) != nullptr) { 82fd4e5da5Sopenharmony_ci continue; 83fd4e5da5Sopenharmony_ci } 84fd4e5da5Sopenharmony_ci cfg_.ForEachBlockInPostOrder(&start_bb, [this](BasicBlock* bb) { 85fd4e5da5Sopenharmony_ci if (reg_pressure_->Get(bb->id()) == nullptr) { 86fd4e5da5Sopenharmony_ci ComputePartialLiveness(bb); 87fd4e5da5Sopenharmony_ci } 88fd4e5da5Sopenharmony_ci }); 89fd4e5da5Sopenharmony_ci } 90fd4e5da5Sopenharmony_ci DoLoopLivenessUnification(); 91fd4e5da5Sopenharmony_ci EvaluateRegisterRequirements(); 92fd4e5da5Sopenharmony_ci } 93fd4e5da5Sopenharmony_ci 94fd4e5da5Sopenharmony_ci private: 95fd4e5da5Sopenharmony_ci // Registers all SSA register used by successors of |bb| in their phi 96fd4e5da5Sopenharmony_ci // instructions. 97fd4e5da5Sopenharmony_ci void ComputePhiUses(const BasicBlock& bb, 98fd4e5da5Sopenharmony_ci RegisterLiveness::RegionRegisterLiveness::LiveSet* live) { 99fd4e5da5Sopenharmony_ci uint32_t bb_id = bb.id(); 100fd4e5da5Sopenharmony_ci bb.ForEachSuccessorLabel([live, bb_id, this](uint32_t sid) { 101fd4e5da5Sopenharmony_ci BasicBlock* succ_bb = cfg_.block(sid); 102fd4e5da5Sopenharmony_ci succ_bb->ForEachPhiInst([live, bb_id, this](const Instruction* phi) { 103fd4e5da5Sopenharmony_ci for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) { 104fd4e5da5Sopenharmony_ci if (phi->GetSingleWordInOperand(i + 1) == bb_id) { 105fd4e5da5Sopenharmony_ci Instruction* insn_op = 106fd4e5da5Sopenharmony_ci def_use_manager_.GetDef(phi->GetSingleWordInOperand(i)); 107fd4e5da5Sopenharmony_ci if (CreatesRegisterUsage(insn_op)) { 108fd4e5da5Sopenharmony_ci live->insert(insn_op); 109fd4e5da5Sopenharmony_ci break; 110fd4e5da5Sopenharmony_ci } 111fd4e5da5Sopenharmony_ci } 112fd4e5da5Sopenharmony_ci } 113fd4e5da5Sopenharmony_ci }); 114fd4e5da5Sopenharmony_ci }); 115fd4e5da5Sopenharmony_ci } 116fd4e5da5Sopenharmony_ci 117fd4e5da5Sopenharmony_ci // Computes register liveness for each basic blocks but ignores all 118fd4e5da5Sopenharmony_ci // back-edges. 119fd4e5da5Sopenharmony_ci void ComputePartialLiveness(BasicBlock* bb) { 120fd4e5da5Sopenharmony_ci assert(reg_pressure_->Get(bb) == nullptr && 121fd4e5da5Sopenharmony_ci "Basic block already processed"); 122fd4e5da5Sopenharmony_ci 123fd4e5da5Sopenharmony_ci RegisterLiveness::RegionRegisterLiveness* live_inout = 124fd4e5da5Sopenharmony_ci reg_pressure_->GetOrInsert(bb->id()); 125fd4e5da5Sopenharmony_ci ComputePhiUses(*bb, &live_inout->live_out_); 126fd4e5da5Sopenharmony_ci 127fd4e5da5Sopenharmony_ci const BasicBlock* cbb = bb; 128fd4e5da5Sopenharmony_ci cbb->ForEachSuccessorLabel([&live_inout, bb, this](uint32_t sid) { 129fd4e5da5Sopenharmony_ci // Skip back edges. 130fd4e5da5Sopenharmony_ci if (dom_tree_.Dominates(sid, bb->id())) { 131fd4e5da5Sopenharmony_ci return; 132fd4e5da5Sopenharmony_ci } 133fd4e5da5Sopenharmony_ci 134fd4e5da5Sopenharmony_ci BasicBlock* succ_bb = cfg_.block(sid); 135fd4e5da5Sopenharmony_ci RegisterLiveness::RegionRegisterLiveness* succ_live_inout = 136fd4e5da5Sopenharmony_ci reg_pressure_->Get(succ_bb); 137fd4e5da5Sopenharmony_ci assert(succ_live_inout && 138fd4e5da5Sopenharmony_ci "Successor liveness analysis was not performed"); 139fd4e5da5Sopenharmony_ci 140fd4e5da5Sopenharmony_ci ExcludePhiDefinedInBlock predicate(context_, succ_bb); 141fd4e5da5Sopenharmony_ci auto filter = 142fd4e5da5Sopenharmony_ci MakeFilterIteratorRange(succ_live_inout->live_in_.begin(), 143fd4e5da5Sopenharmony_ci succ_live_inout->live_in_.end(), predicate); 144fd4e5da5Sopenharmony_ci live_inout->live_out_.insert(filter.begin(), filter.end()); 145fd4e5da5Sopenharmony_ci }); 146fd4e5da5Sopenharmony_ci 147fd4e5da5Sopenharmony_ci live_inout->live_in_ = live_inout->live_out_; 148fd4e5da5Sopenharmony_ci for (Instruction& insn : make_range(bb->rbegin(), bb->rend())) { 149fd4e5da5Sopenharmony_ci if (insn.opcode() == spv::Op::OpPhi) { 150fd4e5da5Sopenharmony_ci live_inout->live_in_.insert(&insn); 151fd4e5da5Sopenharmony_ci break; 152fd4e5da5Sopenharmony_ci } 153fd4e5da5Sopenharmony_ci live_inout->live_in_.erase(&insn); 154fd4e5da5Sopenharmony_ci insn.ForEachInId([live_inout, this](uint32_t* id) { 155fd4e5da5Sopenharmony_ci Instruction* insn_op = def_use_manager_.GetDef(*id); 156fd4e5da5Sopenharmony_ci if (CreatesRegisterUsage(insn_op)) { 157fd4e5da5Sopenharmony_ci live_inout->live_in_.insert(insn_op); 158fd4e5da5Sopenharmony_ci } 159fd4e5da5Sopenharmony_ci }); 160fd4e5da5Sopenharmony_ci } 161fd4e5da5Sopenharmony_ci } 162fd4e5da5Sopenharmony_ci 163fd4e5da5Sopenharmony_ci // Propagates the register liveness information of each loop iterators. 164fd4e5da5Sopenharmony_ci void DoLoopLivenessUnification() { 165fd4e5da5Sopenharmony_ci for (const Loop* loop : *loop_desc_.GetPlaceholderRootLoop()) { 166fd4e5da5Sopenharmony_ci DoLoopLivenessUnification(*loop); 167fd4e5da5Sopenharmony_ci } 168fd4e5da5Sopenharmony_ci } 169fd4e5da5Sopenharmony_ci 170fd4e5da5Sopenharmony_ci // Propagates the register liveness information of loop iterators trough-out 171fd4e5da5Sopenharmony_ci // the loop body. 172fd4e5da5Sopenharmony_ci void DoLoopLivenessUnification(const Loop& loop) { 173fd4e5da5Sopenharmony_ci auto blocks_in_loop = MakeFilterIteratorRange( 174fd4e5da5Sopenharmony_ci loop.GetBlocks().begin(), loop.GetBlocks().end(), 175fd4e5da5Sopenharmony_ci [&loop, this](uint32_t bb_id) { 176fd4e5da5Sopenharmony_ci return bb_id != loop.GetHeaderBlock()->id() && 177fd4e5da5Sopenharmony_ci loop_desc_[bb_id] == &loop; 178fd4e5da5Sopenharmony_ci }); 179fd4e5da5Sopenharmony_ci 180fd4e5da5Sopenharmony_ci RegisterLiveness::RegionRegisterLiveness* header_live_inout = 181fd4e5da5Sopenharmony_ci reg_pressure_->Get(loop.GetHeaderBlock()); 182fd4e5da5Sopenharmony_ci assert(header_live_inout && 183fd4e5da5Sopenharmony_ci "Liveness analysis was not performed for the current block"); 184fd4e5da5Sopenharmony_ci 185fd4e5da5Sopenharmony_ci ExcludePhiDefinedInBlock predicate(context_, loop.GetHeaderBlock()); 186fd4e5da5Sopenharmony_ci auto live_loop = 187fd4e5da5Sopenharmony_ci MakeFilterIteratorRange(header_live_inout->live_in_.begin(), 188fd4e5da5Sopenharmony_ci header_live_inout->live_in_.end(), predicate); 189fd4e5da5Sopenharmony_ci 190fd4e5da5Sopenharmony_ci for (uint32_t bb_id : blocks_in_loop) { 191fd4e5da5Sopenharmony_ci BasicBlock* bb = cfg_.block(bb_id); 192fd4e5da5Sopenharmony_ci 193fd4e5da5Sopenharmony_ci RegisterLiveness::RegionRegisterLiveness* live_inout = 194fd4e5da5Sopenharmony_ci reg_pressure_->Get(bb); 195fd4e5da5Sopenharmony_ci live_inout->live_in_.insert(live_loop.begin(), live_loop.end()); 196fd4e5da5Sopenharmony_ci live_inout->live_out_.insert(live_loop.begin(), live_loop.end()); 197fd4e5da5Sopenharmony_ci } 198fd4e5da5Sopenharmony_ci 199fd4e5da5Sopenharmony_ci for (const Loop* inner_loop : loop) { 200fd4e5da5Sopenharmony_ci RegisterLiveness::RegionRegisterLiveness* live_inout = 201fd4e5da5Sopenharmony_ci reg_pressure_->Get(inner_loop->GetHeaderBlock()); 202fd4e5da5Sopenharmony_ci live_inout->live_in_.insert(live_loop.begin(), live_loop.end()); 203fd4e5da5Sopenharmony_ci live_inout->live_out_.insert(live_loop.begin(), live_loop.end()); 204fd4e5da5Sopenharmony_ci 205fd4e5da5Sopenharmony_ci DoLoopLivenessUnification(*inner_loop); 206fd4e5da5Sopenharmony_ci } 207fd4e5da5Sopenharmony_ci } 208fd4e5da5Sopenharmony_ci 209fd4e5da5Sopenharmony_ci // Get the number of required registers for this each basic block. 210fd4e5da5Sopenharmony_ci void EvaluateRegisterRequirements() { 211fd4e5da5Sopenharmony_ci for (BasicBlock& bb : *function_) { 212fd4e5da5Sopenharmony_ci RegisterLiveness::RegionRegisterLiveness* live_inout = 213fd4e5da5Sopenharmony_ci reg_pressure_->Get(bb.id()); 214fd4e5da5Sopenharmony_ci assert(live_inout != nullptr && "Basic block not processed"); 215fd4e5da5Sopenharmony_ci 216fd4e5da5Sopenharmony_ci size_t reg_count = live_inout->live_out_.size(); 217fd4e5da5Sopenharmony_ci for (Instruction* insn : live_inout->live_out_) { 218fd4e5da5Sopenharmony_ci live_inout->AddRegisterClass(insn); 219fd4e5da5Sopenharmony_ci } 220fd4e5da5Sopenharmony_ci live_inout->used_registers_ = reg_count; 221fd4e5da5Sopenharmony_ci 222fd4e5da5Sopenharmony_ci std::unordered_set<uint32_t> die_in_block; 223fd4e5da5Sopenharmony_ci for (Instruction& insn : make_range(bb.rbegin(), bb.rend())) { 224fd4e5da5Sopenharmony_ci // If it is a phi instruction, the register pressure will not change 225fd4e5da5Sopenharmony_ci // anymore. 226fd4e5da5Sopenharmony_ci if (insn.opcode() == spv::Op::OpPhi) { 227fd4e5da5Sopenharmony_ci break; 228fd4e5da5Sopenharmony_ci } 229fd4e5da5Sopenharmony_ci 230fd4e5da5Sopenharmony_ci insn.ForEachInId( 231fd4e5da5Sopenharmony_ci [live_inout, &die_in_block, ®_count, this](uint32_t* id) { 232fd4e5da5Sopenharmony_ci Instruction* op_insn = def_use_manager_.GetDef(*id); 233fd4e5da5Sopenharmony_ci if (!CreatesRegisterUsage(op_insn) || 234fd4e5da5Sopenharmony_ci live_inout->live_out_.count(op_insn)) { 235fd4e5da5Sopenharmony_ci // already taken into account. 236fd4e5da5Sopenharmony_ci return; 237fd4e5da5Sopenharmony_ci } 238fd4e5da5Sopenharmony_ci if (!die_in_block.count(*id)) { 239fd4e5da5Sopenharmony_ci live_inout->AddRegisterClass(def_use_manager_.GetDef(*id)); 240fd4e5da5Sopenharmony_ci reg_count++; 241fd4e5da5Sopenharmony_ci die_in_block.insert(*id); 242fd4e5da5Sopenharmony_ci } 243fd4e5da5Sopenharmony_ci }); 244fd4e5da5Sopenharmony_ci live_inout->used_registers_ = 245fd4e5da5Sopenharmony_ci std::max(live_inout->used_registers_, reg_count); 246fd4e5da5Sopenharmony_ci if (CreatesRegisterUsage(&insn)) { 247fd4e5da5Sopenharmony_ci reg_count--; 248fd4e5da5Sopenharmony_ci } 249fd4e5da5Sopenharmony_ci } 250fd4e5da5Sopenharmony_ci } 251fd4e5da5Sopenharmony_ci } 252fd4e5da5Sopenharmony_ci 253fd4e5da5Sopenharmony_ci RegisterLiveness* reg_pressure_; 254fd4e5da5Sopenharmony_ci IRContext* context_; 255fd4e5da5Sopenharmony_ci Function* function_; 256fd4e5da5Sopenharmony_ci CFG& cfg_; 257fd4e5da5Sopenharmony_ci analysis::DefUseManager& def_use_manager_; 258fd4e5da5Sopenharmony_ci DominatorTree& dom_tree_; 259fd4e5da5Sopenharmony_ci LoopDescriptor& loop_desc_; 260fd4e5da5Sopenharmony_ci}; 261fd4e5da5Sopenharmony_ci} // namespace 262fd4e5da5Sopenharmony_ci 263fd4e5da5Sopenharmony_ci// Get the number of required registers for each basic block. 264fd4e5da5Sopenharmony_civoid RegisterLiveness::RegionRegisterLiveness::AddRegisterClass( 265fd4e5da5Sopenharmony_ci Instruction* insn) { 266fd4e5da5Sopenharmony_ci assert(CreatesRegisterUsage(insn) && "Instruction does not use a register"); 267fd4e5da5Sopenharmony_ci analysis::Type* type = 268fd4e5da5Sopenharmony_ci insn->context()->get_type_mgr()->GetType(insn->type_id()); 269fd4e5da5Sopenharmony_ci 270fd4e5da5Sopenharmony_ci RegisterLiveness::RegisterClass reg_class{type, false}; 271fd4e5da5Sopenharmony_ci 272fd4e5da5Sopenharmony_ci insn->context()->get_decoration_mgr()->WhileEachDecoration( 273fd4e5da5Sopenharmony_ci insn->result_id(), uint32_t(spv::Decoration::Uniform), 274fd4e5da5Sopenharmony_ci [®_class](const Instruction&) { 275fd4e5da5Sopenharmony_ci reg_class.is_uniform_ = true; 276fd4e5da5Sopenharmony_ci return false; 277fd4e5da5Sopenharmony_ci }); 278fd4e5da5Sopenharmony_ci 279fd4e5da5Sopenharmony_ci AddRegisterClass(reg_class); 280fd4e5da5Sopenharmony_ci} 281fd4e5da5Sopenharmony_ci 282fd4e5da5Sopenharmony_civoid RegisterLiveness::Analyze(Function* f) { 283fd4e5da5Sopenharmony_ci block_pressure_.clear(); 284fd4e5da5Sopenharmony_ci ComputeRegisterLiveness(this, f).Compute(); 285fd4e5da5Sopenharmony_ci} 286fd4e5da5Sopenharmony_ci 287fd4e5da5Sopenharmony_civoid RegisterLiveness::ComputeLoopRegisterPressure( 288fd4e5da5Sopenharmony_ci const Loop& loop, RegionRegisterLiveness* loop_reg_pressure) const { 289fd4e5da5Sopenharmony_ci loop_reg_pressure->Clear(); 290fd4e5da5Sopenharmony_ci 291fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* header_live_inout = Get(loop.GetHeaderBlock()); 292fd4e5da5Sopenharmony_ci loop_reg_pressure->live_in_ = header_live_inout->live_in_; 293fd4e5da5Sopenharmony_ci 294fd4e5da5Sopenharmony_ci std::unordered_set<uint32_t> exit_blocks; 295fd4e5da5Sopenharmony_ci loop.GetExitBlocks(&exit_blocks); 296fd4e5da5Sopenharmony_ci 297fd4e5da5Sopenharmony_ci for (uint32_t bb_id : exit_blocks) { 298fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* live_inout = Get(bb_id); 299fd4e5da5Sopenharmony_ci loop_reg_pressure->live_out_.insert(live_inout->live_in_.begin(), 300fd4e5da5Sopenharmony_ci live_inout->live_in_.end()); 301fd4e5da5Sopenharmony_ci } 302fd4e5da5Sopenharmony_ci 303fd4e5da5Sopenharmony_ci std::unordered_set<uint32_t> seen_insn; 304fd4e5da5Sopenharmony_ci for (Instruction* insn : loop_reg_pressure->live_out_) { 305fd4e5da5Sopenharmony_ci loop_reg_pressure->AddRegisterClass(insn); 306fd4e5da5Sopenharmony_ci seen_insn.insert(insn->result_id()); 307fd4e5da5Sopenharmony_ci } 308fd4e5da5Sopenharmony_ci for (Instruction* insn : loop_reg_pressure->live_in_) { 309fd4e5da5Sopenharmony_ci if (!seen_insn.count(insn->result_id())) { 310fd4e5da5Sopenharmony_ci continue; 311fd4e5da5Sopenharmony_ci } 312fd4e5da5Sopenharmony_ci loop_reg_pressure->AddRegisterClass(insn); 313fd4e5da5Sopenharmony_ci seen_insn.insert(insn->result_id()); 314fd4e5da5Sopenharmony_ci } 315fd4e5da5Sopenharmony_ci 316fd4e5da5Sopenharmony_ci loop_reg_pressure->used_registers_ = 0; 317fd4e5da5Sopenharmony_ci 318fd4e5da5Sopenharmony_ci for (uint32_t bb_id : loop.GetBlocks()) { 319fd4e5da5Sopenharmony_ci BasicBlock* bb = context_->cfg()->block(bb_id); 320fd4e5da5Sopenharmony_ci 321fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* live_inout = Get(bb_id); 322fd4e5da5Sopenharmony_ci assert(live_inout != nullptr && "Basic block not processed"); 323fd4e5da5Sopenharmony_ci loop_reg_pressure->used_registers_ = std::max( 324fd4e5da5Sopenharmony_ci loop_reg_pressure->used_registers_, live_inout->used_registers_); 325fd4e5da5Sopenharmony_ci 326fd4e5da5Sopenharmony_ci for (Instruction& insn : *bb) { 327fd4e5da5Sopenharmony_ci if (insn.opcode() == spv::Op::OpPhi || !CreatesRegisterUsage(&insn) || 328fd4e5da5Sopenharmony_ci seen_insn.count(insn.result_id())) { 329fd4e5da5Sopenharmony_ci continue; 330fd4e5da5Sopenharmony_ci } 331fd4e5da5Sopenharmony_ci loop_reg_pressure->AddRegisterClass(&insn); 332fd4e5da5Sopenharmony_ci } 333fd4e5da5Sopenharmony_ci } 334fd4e5da5Sopenharmony_ci} 335fd4e5da5Sopenharmony_ci 336fd4e5da5Sopenharmony_civoid RegisterLiveness::SimulateFusion( 337fd4e5da5Sopenharmony_ci const Loop& l1, const Loop& l2, RegionRegisterLiveness* sim_result) const { 338fd4e5da5Sopenharmony_ci sim_result->Clear(); 339fd4e5da5Sopenharmony_ci 340fd4e5da5Sopenharmony_ci // Compute the live-in state: 341fd4e5da5Sopenharmony_ci // sim_result.live_in = l1.live_in U l2.live_in 342fd4e5da5Sopenharmony_ci // This assumes that |l1| does not generated register that is live-out for 343fd4e5da5Sopenharmony_ci // |l1|. 344fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* l1_header_live_inout = Get(l1.GetHeaderBlock()); 345fd4e5da5Sopenharmony_ci sim_result->live_in_ = l1_header_live_inout->live_in_; 346fd4e5da5Sopenharmony_ci 347fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* l2_header_live_inout = Get(l2.GetHeaderBlock()); 348fd4e5da5Sopenharmony_ci sim_result->live_in_.insert(l2_header_live_inout->live_in_.begin(), 349fd4e5da5Sopenharmony_ci l2_header_live_inout->live_in_.end()); 350fd4e5da5Sopenharmony_ci 351fd4e5da5Sopenharmony_ci // The live-out set of the fused loop is the l2 live-out set. 352fd4e5da5Sopenharmony_ci std::unordered_set<uint32_t> exit_blocks; 353fd4e5da5Sopenharmony_ci l2.GetExitBlocks(&exit_blocks); 354fd4e5da5Sopenharmony_ci 355fd4e5da5Sopenharmony_ci for (uint32_t bb_id : exit_blocks) { 356fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* live_inout = Get(bb_id); 357fd4e5da5Sopenharmony_ci sim_result->live_out_.insert(live_inout->live_in_.begin(), 358fd4e5da5Sopenharmony_ci live_inout->live_in_.end()); 359fd4e5da5Sopenharmony_ci } 360fd4e5da5Sopenharmony_ci 361fd4e5da5Sopenharmony_ci // Compute the register usage information. 362fd4e5da5Sopenharmony_ci std::unordered_set<uint32_t> seen_insn; 363fd4e5da5Sopenharmony_ci for (Instruction* insn : sim_result->live_out_) { 364fd4e5da5Sopenharmony_ci sim_result->AddRegisterClass(insn); 365fd4e5da5Sopenharmony_ci seen_insn.insert(insn->result_id()); 366fd4e5da5Sopenharmony_ci } 367fd4e5da5Sopenharmony_ci for (Instruction* insn : sim_result->live_in_) { 368fd4e5da5Sopenharmony_ci if (!seen_insn.count(insn->result_id())) { 369fd4e5da5Sopenharmony_ci continue; 370fd4e5da5Sopenharmony_ci } 371fd4e5da5Sopenharmony_ci sim_result->AddRegisterClass(insn); 372fd4e5da5Sopenharmony_ci seen_insn.insert(insn->result_id()); 373fd4e5da5Sopenharmony_ci } 374fd4e5da5Sopenharmony_ci 375fd4e5da5Sopenharmony_ci sim_result->used_registers_ = 0; 376fd4e5da5Sopenharmony_ci 377fd4e5da5Sopenharmony_ci // The loop fusion is injecting the l1 before the l2, the latch of l1 will be 378fd4e5da5Sopenharmony_ci // connected to the header of l2. 379fd4e5da5Sopenharmony_ci // To compute the register usage, we inject the loop live-in (union of l1 and 380fd4e5da5Sopenharmony_ci // l2 live-in header blocks) into the live in/out of each basic block of 381fd4e5da5Sopenharmony_ci // l1 to get the peak register usage. We then repeat the operation to for l2 382fd4e5da5Sopenharmony_ci // basic blocks but in this case we inject the live-out of the latch of l1. 383fd4e5da5Sopenharmony_ci auto live_loop = MakeFilterIteratorRange( 384fd4e5da5Sopenharmony_ci sim_result->live_in_.begin(), sim_result->live_in_.end(), 385fd4e5da5Sopenharmony_ci [&l1, &l2](Instruction* insn) { 386fd4e5da5Sopenharmony_ci BasicBlock* bb = insn->context()->get_instr_block(insn); 387fd4e5da5Sopenharmony_ci return insn->HasResultId() && 388fd4e5da5Sopenharmony_ci !(insn->opcode() == spv::Op::OpPhi && 389fd4e5da5Sopenharmony_ci (bb == l1.GetHeaderBlock() || bb == l2.GetHeaderBlock())); 390fd4e5da5Sopenharmony_ci }); 391fd4e5da5Sopenharmony_ci 392fd4e5da5Sopenharmony_ci for (uint32_t bb_id : l1.GetBlocks()) { 393fd4e5da5Sopenharmony_ci BasicBlock* bb = context_->cfg()->block(bb_id); 394fd4e5da5Sopenharmony_ci 395fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* live_inout_info = Get(bb_id); 396fd4e5da5Sopenharmony_ci assert(live_inout_info != nullptr && "Basic block not processed"); 397fd4e5da5Sopenharmony_ci RegionRegisterLiveness::LiveSet live_out = live_inout_info->live_out_; 398fd4e5da5Sopenharmony_ci live_out.insert(live_loop.begin(), live_loop.end()); 399fd4e5da5Sopenharmony_ci sim_result->used_registers_ = 400fd4e5da5Sopenharmony_ci std::max(sim_result->used_registers_, 401fd4e5da5Sopenharmony_ci live_inout_info->used_registers_ + live_out.size() - 402fd4e5da5Sopenharmony_ci live_inout_info->live_out_.size()); 403fd4e5da5Sopenharmony_ci 404fd4e5da5Sopenharmony_ci for (Instruction& insn : *bb) { 405fd4e5da5Sopenharmony_ci if (insn.opcode() == spv::Op::OpPhi || !CreatesRegisterUsage(&insn) || 406fd4e5da5Sopenharmony_ci seen_insn.count(insn.result_id())) { 407fd4e5da5Sopenharmony_ci continue; 408fd4e5da5Sopenharmony_ci } 409fd4e5da5Sopenharmony_ci sim_result->AddRegisterClass(&insn); 410fd4e5da5Sopenharmony_ci } 411fd4e5da5Sopenharmony_ci } 412fd4e5da5Sopenharmony_ci 413fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* l1_latch_live_inout_info = 414fd4e5da5Sopenharmony_ci Get(l1.GetLatchBlock()->id()); 415fd4e5da5Sopenharmony_ci assert(l1_latch_live_inout_info != nullptr && "Basic block not processed"); 416fd4e5da5Sopenharmony_ci RegionRegisterLiveness::LiveSet l1_latch_live_out = 417fd4e5da5Sopenharmony_ci l1_latch_live_inout_info->live_out_; 418fd4e5da5Sopenharmony_ci l1_latch_live_out.insert(live_loop.begin(), live_loop.end()); 419fd4e5da5Sopenharmony_ci 420fd4e5da5Sopenharmony_ci auto live_loop_l2 = 421fd4e5da5Sopenharmony_ci make_range(l1_latch_live_out.begin(), l1_latch_live_out.end()); 422fd4e5da5Sopenharmony_ci 423fd4e5da5Sopenharmony_ci for (uint32_t bb_id : l2.GetBlocks()) { 424fd4e5da5Sopenharmony_ci BasicBlock* bb = context_->cfg()->block(bb_id); 425fd4e5da5Sopenharmony_ci 426fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* live_inout_info = Get(bb_id); 427fd4e5da5Sopenharmony_ci assert(live_inout_info != nullptr && "Basic block not processed"); 428fd4e5da5Sopenharmony_ci RegionRegisterLiveness::LiveSet live_out = live_inout_info->live_out_; 429fd4e5da5Sopenharmony_ci live_out.insert(live_loop_l2.begin(), live_loop_l2.end()); 430fd4e5da5Sopenharmony_ci sim_result->used_registers_ = 431fd4e5da5Sopenharmony_ci std::max(sim_result->used_registers_, 432fd4e5da5Sopenharmony_ci live_inout_info->used_registers_ + live_out.size() - 433fd4e5da5Sopenharmony_ci live_inout_info->live_out_.size()); 434fd4e5da5Sopenharmony_ci 435fd4e5da5Sopenharmony_ci for (Instruction& insn : *bb) { 436fd4e5da5Sopenharmony_ci if (insn.opcode() == spv::Op::OpPhi || !CreatesRegisterUsage(&insn) || 437fd4e5da5Sopenharmony_ci seen_insn.count(insn.result_id())) { 438fd4e5da5Sopenharmony_ci continue; 439fd4e5da5Sopenharmony_ci } 440fd4e5da5Sopenharmony_ci sim_result->AddRegisterClass(&insn); 441fd4e5da5Sopenharmony_ci } 442fd4e5da5Sopenharmony_ci } 443fd4e5da5Sopenharmony_ci} 444fd4e5da5Sopenharmony_ci 445fd4e5da5Sopenharmony_civoid RegisterLiveness::SimulateFission( 446fd4e5da5Sopenharmony_ci const Loop& loop, const std::unordered_set<Instruction*>& moved_inst, 447fd4e5da5Sopenharmony_ci const std::unordered_set<Instruction*>& copied_inst, 448fd4e5da5Sopenharmony_ci RegionRegisterLiveness* l1_sim_result, 449fd4e5da5Sopenharmony_ci RegionRegisterLiveness* l2_sim_result) const { 450fd4e5da5Sopenharmony_ci l1_sim_result->Clear(); 451fd4e5da5Sopenharmony_ci l2_sim_result->Clear(); 452fd4e5da5Sopenharmony_ci 453fd4e5da5Sopenharmony_ci // Filter predicates: consider instructions that only belong to the first and 454fd4e5da5Sopenharmony_ci // second loop. 455fd4e5da5Sopenharmony_ci auto belong_to_loop1 = [&moved_inst, &copied_inst, &loop](Instruction* insn) { 456fd4e5da5Sopenharmony_ci return moved_inst.count(insn) || copied_inst.count(insn) || 457fd4e5da5Sopenharmony_ci !loop.IsInsideLoop(insn); 458fd4e5da5Sopenharmony_ci }; 459fd4e5da5Sopenharmony_ci auto belong_to_loop2 = [&moved_inst](Instruction* insn) { 460fd4e5da5Sopenharmony_ci return !moved_inst.count(insn); 461fd4e5da5Sopenharmony_ci }; 462fd4e5da5Sopenharmony_ci 463fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* header_live_inout = Get(loop.GetHeaderBlock()); 464fd4e5da5Sopenharmony_ci // l1 live-in 465fd4e5da5Sopenharmony_ci { 466fd4e5da5Sopenharmony_ci auto live_loop = MakeFilterIteratorRange( 467fd4e5da5Sopenharmony_ci header_live_inout->live_in_.begin(), header_live_inout->live_in_.end(), 468fd4e5da5Sopenharmony_ci belong_to_loop1); 469fd4e5da5Sopenharmony_ci l1_sim_result->live_in_.insert(live_loop.begin(), live_loop.end()); 470fd4e5da5Sopenharmony_ci } 471fd4e5da5Sopenharmony_ci // l2 live-in 472fd4e5da5Sopenharmony_ci { 473fd4e5da5Sopenharmony_ci auto live_loop = MakeFilterIteratorRange( 474fd4e5da5Sopenharmony_ci header_live_inout->live_in_.begin(), header_live_inout->live_in_.end(), 475fd4e5da5Sopenharmony_ci belong_to_loop2); 476fd4e5da5Sopenharmony_ci l2_sim_result->live_in_.insert(live_loop.begin(), live_loop.end()); 477fd4e5da5Sopenharmony_ci } 478fd4e5da5Sopenharmony_ci 479fd4e5da5Sopenharmony_ci std::unordered_set<uint32_t> exit_blocks; 480fd4e5da5Sopenharmony_ci loop.GetExitBlocks(&exit_blocks); 481fd4e5da5Sopenharmony_ci 482fd4e5da5Sopenharmony_ci // l2 live-out. 483fd4e5da5Sopenharmony_ci for (uint32_t bb_id : exit_blocks) { 484fd4e5da5Sopenharmony_ci const RegionRegisterLiveness* live_inout = Get(bb_id); 485fd4e5da5Sopenharmony_ci l2_sim_result->live_out_.insert(live_inout->live_in_.begin(), 486fd4e5da5Sopenharmony_ci live_inout->live_in_.end()); 487fd4e5da5Sopenharmony_ci } 488fd4e5da5Sopenharmony_ci // l1 live-out. 489fd4e5da5Sopenharmony_ci { 490fd4e5da5Sopenharmony_ci auto live_out = MakeFilterIteratorRange(l2_sim_result->live_out_.begin(), 491fd4e5da5Sopenharmony_ci l2_sim_result->live_out_.end(), 492fd4e5da5Sopenharmony_ci belong_to_loop1); 493fd4e5da5Sopenharmony_ci l1_sim_result->live_out_.insert(live_out.begin(), live_out.end()); 494fd4e5da5Sopenharmony_ci } 495fd4e5da5Sopenharmony_ci { 496fd4e5da5Sopenharmony_ci auto live_out = 497fd4e5da5Sopenharmony_ci MakeFilterIteratorRange(l2_sim_result->live_in_.begin(), 498fd4e5da5Sopenharmony_ci l2_sim_result->live_in_.end(), belong_to_loop1); 499fd4e5da5Sopenharmony_ci l1_sim_result->live_out_.insert(live_out.begin(), live_out.end()); 500fd4e5da5Sopenharmony_ci } 501fd4e5da5Sopenharmony_ci // Lives out of l1 are live out of l2 so are live in of l2 as well. 502fd4e5da5Sopenharmony_ci l2_sim_result->live_in_.insert(l1_sim_result->live_out_.begin(), 503fd4e5da5Sopenharmony_ci l1_sim_result->live_out_.end()); 504fd4e5da5Sopenharmony_ci 505fd4e5da5Sopenharmony_ci for (Instruction* insn : l1_sim_result->live_in_) { 506fd4e5da5Sopenharmony_ci l1_sim_result->AddRegisterClass(insn); 507fd4e5da5Sopenharmony_ci } 508fd4e5da5Sopenharmony_ci for (Instruction* insn : l2_sim_result->live_in_) { 509fd4e5da5Sopenharmony_ci l2_sim_result->AddRegisterClass(insn); 510fd4e5da5Sopenharmony_ci } 511fd4e5da5Sopenharmony_ci 512fd4e5da5Sopenharmony_ci l1_sim_result->used_registers_ = 0; 513fd4e5da5Sopenharmony_ci l2_sim_result->used_registers_ = 0; 514fd4e5da5Sopenharmony_ci 515fd4e5da5Sopenharmony_ci for (uint32_t bb_id : loop.GetBlocks()) { 516fd4e5da5Sopenharmony_ci BasicBlock* bb = context_->cfg()->block(bb_id); 517fd4e5da5Sopenharmony_ci 518fd4e5da5Sopenharmony_ci const RegisterLiveness::RegionRegisterLiveness* live_inout = Get(bb_id); 519fd4e5da5Sopenharmony_ci assert(live_inout != nullptr && "Basic block not processed"); 520fd4e5da5Sopenharmony_ci auto l1_block_live_out = 521fd4e5da5Sopenharmony_ci MakeFilterIteratorRange(live_inout->live_out_.begin(), 522fd4e5da5Sopenharmony_ci live_inout->live_out_.end(), belong_to_loop1); 523fd4e5da5Sopenharmony_ci auto l2_block_live_out = 524fd4e5da5Sopenharmony_ci MakeFilterIteratorRange(live_inout->live_out_.begin(), 525fd4e5da5Sopenharmony_ci live_inout->live_out_.end(), belong_to_loop2); 526fd4e5da5Sopenharmony_ci 527fd4e5da5Sopenharmony_ci size_t l1_reg_count = 528fd4e5da5Sopenharmony_ci std::distance(l1_block_live_out.begin(), l1_block_live_out.end()); 529fd4e5da5Sopenharmony_ci size_t l2_reg_count = 530fd4e5da5Sopenharmony_ci std::distance(l2_block_live_out.begin(), l2_block_live_out.end()); 531fd4e5da5Sopenharmony_ci 532fd4e5da5Sopenharmony_ci std::unordered_set<uint32_t> die_in_block; 533fd4e5da5Sopenharmony_ci for (Instruction& insn : make_range(bb->rbegin(), bb->rend())) { 534fd4e5da5Sopenharmony_ci if (insn.opcode() == spv::Op::OpPhi) { 535fd4e5da5Sopenharmony_ci break; 536fd4e5da5Sopenharmony_ci } 537fd4e5da5Sopenharmony_ci 538fd4e5da5Sopenharmony_ci bool does_belong_to_loop1 = belong_to_loop1(&insn); 539fd4e5da5Sopenharmony_ci bool does_belong_to_loop2 = belong_to_loop2(&insn); 540fd4e5da5Sopenharmony_ci insn.ForEachInId([live_inout, &die_in_block, &l1_reg_count, &l2_reg_count, 541fd4e5da5Sopenharmony_ci does_belong_to_loop1, does_belong_to_loop2, 542fd4e5da5Sopenharmony_ci this](uint32_t* id) { 543fd4e5da5Sopenharmony_ci Instruction* op_insn = context_->get_def_use_mgr()->GetDef(*id); 544fd4e5da5Sopenharmony_ci if (!CreatesRegisterUsage(op_insn) || 545fd4e5da5Sopenharmony_ci live_inout->live_out_.count(op_insn)) { 546fd4e5da5Sopenharmony_ci // already taken into account. 547fd4e5da5Sopenharmony_ci return; 548fd4e5da5Sopenharmony_ci } 549fd4e5da5Sopenharmony_ci if (!die_in_block.count(*id)) { 550fd4e5da5Sopenharmony_ci if (does_belong_to_loop1) { 551fd4e5da5Sopenharmony_ci l1_reg_count++; 552fd4e5da5Sopenharmony_ci } 553fd4e5da5Sopenharmony_ci if (does_belong_to_loop2) { 554fd4e5da5Sopenharmony_ci l2_reg_count++; 555fd4e5da5Sopenharmony_ci } 556fd4e5da5Sopenharmony_ci die_in_block.insert(*id); 557fd4e5da5Sopenharmony_ci } 558fd4e5da5Sopenharmony_ci }); 559fd4e5da5Sopenharmony_ci l1_sim_result->used_registers_ = 560fd4e5da5Sopenharmony_ci std::max(l1_sim_result->used_registers_, l1_reg_count); 561fd4e5da5Sopenharmony_ci l2_sim_result->used_registers_ = 562fd4e5da5Sopenharmony_ci std::max(l2_sim_result->used_registers_, l2_reg_count); 563fd4e5da5Sopenharmony_ci if (CreatesRegisterUsage(&insn)) { 564fd4e5da5Sopenharmony_ci if (does_belong_to_loop1) { 565fd4e5da5Sopenharmony_ci if (!l1_sim_result->live_in_.count(&insn)) { 566fd4e5da5Sopenharmony_ci l1_sim_result->AddRegisterClass(&insn); 567fd4e5da5Sopenharmony_ci } 568fd4e5da5Sopenharmony_ci l1_reg_count--; 569fd4e5da5Sopenharmony_ci } 570fd4e5da5Sopenharmony_ci if (does_belong_to_loop2) { 571fd4e5da5Sopenharmony_ci if (!l2_sim_result->live_in_.count(&insn)) { 572fd4e5da5Sopenharmony_ci l2_sim_result->AddRegisterClass(&insn); 573fd4e5da5Sopenharmony_ci } 574fd4e5da5Sopenharmony_ci l2_reg_count--; 575fd4e5da5Sopenharmony_ci } 576fd4e5da5Sopenharmony_ci } 577fd4e5da5Sopenharmony_ci } 578fd4e5da5Sopenharmony_ci } 579fd4e5da5Sopenharmony_ci} 580fd4e5da5Sopenharmony_ci 581fd4e5da5Sopenharmony_ci} // namespace opt 582fd4e5da5Sopenharmony_ci} // namespace spvtools 583