1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H 17 #define COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H 18 19 #include "optimizer/ir/basicblock.h" 20 #include "optimizer/ir/graph.h" 21 #include "utils/arena_containers.h" 22 #include "utils/hash.h" 23 24 namespace panda::compiler { 25 class BasicBlock; 26 class Graph; 27 class Inst; 28 29 // NOLINTNEXTLINE(readability-redundant-declaration) 30 bool IsLoopSingleBackEdgeExitPoint(Loop *loop); 31 32 enum class UnrollType : uint8_t { UNROLL_WITH_SIDE_EXITS, UNROLL_WITHOUT_SIDE_EXITS, UNROLL_POST_INCREMENT }; 33 34 enum class InstCloneType : uint8_t { CLONE_ALL, CLONE_INSTS }; 35 36 enum class CloneEdgeType : uint8_t { EDGE_PRED, EDGE_SUCC }; 37 38 /** 39 * Helper-class, provides methods to: 40 * - Clone the whole graph; 41 * - Clone loop; 42 * - Unroll loop; 43 * - Peel loop; 44 */ 45 class GraphCloner { 46 using PhiInputsMap = ArenaUnorderedMap<Inst *, Inst *>; 47 48 struct LoopUnrollData { 49 BasicBlock *header {nullptr}; 50 BasicBlock *backedge {nullptr}; 51 BasicBlock *exit_block {nullptr}; 52 BasicBlock *outer {nullptr}; 53 ArenaVector<BasicBlock *> *blocks {nullptr}; 54 InstVector *phi_update_inputs {nullptr}; 55 PhiInputsMap *phi_replaced_inputs {nullptr}; 56 }; 57 58 struct LoopClonerData { 59 BasicBlock *outer {nullptr}; 60 BasicBlock *header {nullptr}; 61 BasicBlock *pre_header {nullptr}; 62 ArenaVector<BasicBlock *> *blocks {nullptr}; 63 }; 64 65 public: 66 explicit GraphCloner(Graph *graph, ArenaAllocator *allocator, ArenaAllocator *local_allocator); 67 68 Graph *CloneGraph(); 69 BasicBlock *CloneLoopHeader(BasicBlock *block, BasicBlock *outer, BasicBlock *replaceable_pred); 70 Loop *CloneLoop(Loop *loop); 71 bool IsLoopClonable(Loop *loop, size_t inst_limit); 72 73 /** 74 * Make equal to the `factor` number of clones of loop body and insert them into the graph 75 * 76 * /----[pre-loop] 77 * | | 78 * | v 79 * | [loop-body]<----\ 80 * | | | | 81 * | | \-------/ 82 * | | 83 * | v 84 * \--->[outside-block] 85 * | 86 * v 87 * ... 88 * 89 * Cloning with side-exits: 90 * 91 * /----[pre-loop] 92 * | | 93 * | v 94 * | [loop-body]---------\ 95 * | | | 96 * | v | 97 * | [loop-body']------->| 98 * | | | 99 * | v | 100 * | [loop-body'']------>| 101 * | | | 102 * | v | 103 * | [resolver-block]<----/ 104 * | | 105 * | v 106 * \--->[outside-block] 107 * ... 108 * 109 * Cloning without side-exits: 110 * 111 * /----[pre-loop] 112 * | | 113 * | v 114 * | [loop-body] 115 * | | 116 * | v 117 * | [loop-body'] 118 * | | 119 * | v 120 * | [loop-body''] 121 * | | 122 * | v 123 * \--->[outside-block] 124 */ 125 template <UnrollType type> UnrollLoopBody(Loop *loop, size_t factor)126 void UnrollLoopBody(Loop *loop, size_t factor) 127 { 128 ASSERT_PRINT(IsLoopSingleBackEdgeExitPoint(loop), "Cloning blocks doesn't have single entry/exit point"); 129 auto marker_holder = MarkerHolder(GetGraph()); 130 clone_marker_ = marker_holder.GetMarker(); 131 auto unroll_data = PrepareLoopToUnroll(loop, type != UnrollType::UNROLL_WITHOUT_SIDE_EXITS); 132 133 auto clone_count = factor - 1; 134 for (size_t i = 0; i < clone_count; i++) { 135 CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, true>(*unroll_data->blocks, GetGraph()); 136 BuildLoopUnrollControlFlow(unroll_data); 137 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 138 if constexpr (type == UnrollType::UNROLL_WITHOUT_SIDE_EXITS) { 139 // Users update should be done on the last no-side-exits unroll iteration 140 // before building loop data-flow 141 if (i + 1 == clone_count) { // last_iteration 142 UpdateUsersAfterNoSideExitsUnroll(unroll_data); 143 } 144 } 145 BuildLoopUnrollDataFlow(unroll_data); 146 } 147 148 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 149 if constexpr (type == UnrollType::UNROLL_POST_INCREMENT) { 150 RemoveLoopBackEdge(unroll_data); 151 } 152 } 153 154 private: 155 // Whole graph cloning 156 void CopyLoop(Loop *loop, Loop *cloned_loop); 157 void CloneLinearOrder(Graph *new_graph); 158 void BuildControlFlow(); 159 void BuildDataFlow(); 160 void CloneAnalyses(Graph *new_graph); 161 // Loop cloning 162 LoopClonerData *PrepareLoopToClone(Loop *loop); 163 LoopClonerData *CreateLoopClonerData(Loop *loop, BasicBlock *pre_header, BasicBlock *outside_succ); 164 BasicBlock *CreateNewOutsideSucc(BasicBlock *outside_succ, BasicBlock *back_edge, BasicBlock *pre_header); 165 void BuildLoopCloneControlFlow(LoopClonerData *unroll_data); 166 void BuildLoopCloneDataFlow(LoopClonerData *unroll_data); 167 void MakeLoopCloneInfo(LoopClonerData *unroll_data); 168 // Unroll cloning 169 LoopUnrollData *PrepareLoopToUnroll(Loop *loop, bool clone_side_exits); 170 BasicBlock *CreateResolverBlock(Loop *loop, BasicBlock *back_edge); 171 BasicBlock *SplitBackEdge(LoopUnrollData *unroll_data, Loop *loop, BasicBlock *back_edge); 172 void UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData *unroll_data); 173 void BuildLoopUnrollControlFlow(LoopUnrollData *unroll_data); 174 void BuildLoopUnrollDataFlow(LoopUnrollData *unroll_data); 175 void RemoveLoopBackEdge(const LoopUnrollData *unroll_data); 176 // Loop header cloning 177 void BuildClonedLoopHeaderDataFlow(const BasicBlock &block, BasicBlock *resolver, BasicBlock *clone); 178 void UpdateUsersForClonedLoopHeader(Inst *inst, BasicBlock *outer_block); 179 // Cloned blocks and instructions getters HasClone(const BasicBlock *block)180 bool HasClone(const BasicBlock *block) 181 { 182 return (block->GetId() < clone_blocks_.size()) && (clone_blocks_[block->GetId()] != nullptr); 183 } 184 GetClone(const BasicBlock *block)185 BasicBlock *GetClone(const BasicBlock *block) 186 { 187 ASSERT(block != nullptr); 188 ASSERT_PRINT(block->GetGraph() == GetGraph(), "GraphCloner probably caught disconnected block"); 189 ASSERT_DO(HasClone(block), block->Dump(&std::cerr)); 190 return clone_blocks_[block->GetId()]; 191 } 192 HasClone(Inst *inst)193 bool HasClone(Inst *inst) 194 { 195 return inst->IsMarked(clone_marker_) && (inst->GetCloneNumber() < clone_instructions_.size()); 196 } 197 GetClone(Inst *inst)198 Inst *GetClone(Inst *inst) 199 { 200 ASSERT(inst != nullptr); 201 ASSERT(HasClone(inst)); 202 203 // We don't use clone_marker_ when cloning the whole graph, so lets at least check the basic block here 204 ASSERT(inst->GetBasicBlock() != nullptr); 205 ASSERT_PRINT(inst->GetBasicBlock()->GetGraph() == GetGraph(), 206 "GraphCloner probably caught an instruction from disconnected block"); 207 // Empty clone_blocks_ means we are cloning only one basic block 208 ASSERT(clone_blocks_.empty() || HasClone(inst->GetBasicBlock())); 209 210 return clone_instructions_[inst->GetCloneNumber()]; 211 } 212 213 /** 214 * For each block of input vector create a new one empty block and populate it with the instructions, 215 * cloned form the original block 216 */ 217 template <InstCloneType type, bool skip_safepoints> CloneBlocksAndInstructions(const ArenaVector<BasicBlock *> &blocks, Graph *target_graph)218 void CloneBlocksAndInstructions(const ArenaVector<BasicBlock *> &blocks, Graph *target_graph) 219 { 220 clone_blocks_.clear(); 221 clone_blocks_.resize(GetGraph()->GetVectorBlocks().size(), nullptr); 222 clone_instructions_.clear(); 223 size_t inst_count = 0; 224 for (const auto &block : blocks) { 225 if (block != nullptr) { 226 auto clone = block->Clone(target_graph); 227 clone_blocks_[block->GetId()] = clone; 228 CloneInstructions<type, skip_safepoints>(block, clone, &inst_count); 229 if (block->IsTryBegin()) { 230 target_graph->AppendTryBeginBlock(clone); 231 } 232 } 233 } 234 } 235 236 /** 237 * Clone block's instructions and append to the block's clone 238 */ 239 template <InstCloneType type, bool skip_safepoints> CloneInstructions(const BasicBlock *block, BasicBlock *clone, size_t *inst_count)240 void CloneInstructions(const BasicBlock *block, BasicBlock *clone, size_t *inst_count) 241 { 242 for (auto inst : block->Insts()) { 243 clone->AppendInst(CloneInstruction(inst, inst_count, clone->GetGraph())); 244 } 245 246 if constexpr (type == InstCloneType::CLONE_ALL) { // NOLINT 247 for (auto phi : block->PhiInsts()) { 248 auto phi_clone = CloneInstruction(phi, inst_count, clone->GetGraph()); 249 clone->AppendPhi(phi_clone->CastToPhi()); 250 } 251 } 252 } 253 254 /** 255 * Clone instruction and mark both: clone and cloned 256 */ CloneInstruction(Inst *inst, size_t *inst_count, Graph *target_graph)257 Inst *CloneInstruction(Inst *inst, size_t *inst_count, Graph *target_graph) 258 { 259 inst->SetCloneNumber((*inst_count)++); 260 auto inst_clone = inst->Clone(target_graph); 261 clone_instructions_.push_back(inst_clone); 262 inst->SetMarker(clone_marker_); 263 inst_clone->SetMarker(clone_marker_); 264 if (inst->GetBasicBlock()->GetGraph() != target_graph) { 265 inst_clone->SetId(inst->GetId()); 266 } 267 return inst_clone; 268 } 269 270 inline bool IsInstLoopHeaderPhi(Inst *inst, Loop *loop); 271 272 /** 273 * Use the following rules cloning the inputs: 274 * - if input of the original instruction has clone - insert this clone as input 275 * - otherwise - use original input as clone instruction input 276 */ 277 template <bool replace_header_phi> SetCloneInputs(Inst *inst, BasicBlock *block = nullptr)278 void SetCloneInputs(Inst *inst, BasicBlock *block = nullptr) 279 { 280 auto clone = GetClone(inst); 281 for (size_t i = 0; i < inst->GetInputsCount(); i++) { 282 auto input = inst->GetInput(i).GetInst(); 283 if (replace_header_phi && IsInstLoopHeaderPhi(input, inst->GetBasicBlock()->GetLoop())) { 284 ASSERT(block != nullptr); 285 input = input->CastToPhi()->GetPhiInput(block); 286 } else if (HasClone(input)) { 287 input = GetClone(input); 288 } 289 290 if (clone->IsOperandsDynamic()) { 291 clone->AppendInput(input); 292 if (clone->IsSaveState()) { 293 static_cast<SaveStateInst *>(clone)->SetVirtualRegister( 294 i, static_cast<SaveStateInst *>(inst)->GetVirtualRegister(i)); 295 } else if (clone->IsPhi()) { 296 clone->CastToPhi()->SetPhiInputBbNum(i, inst->CastToPhi()->GetPhiInputBbNum(i)); 297 } 298 } else { 299 clone->SetInput(i, input); 300 } 301 } 302 } 303 304 template <CloneEdgeType edge_type> CloneEdges(BasicBlock *block)305 void CloneEdges(BasicBlock *block) 306 { 307 auto clone = GetClone(block); 308 auto block_edges = &block->GetPredsBlocks(); 309 auto clone_edges = &clone->GetPredsBlocks(); 310 // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements) 311 if constexpr (edge_type == CloneEdgeType::EDGE_SUCC) { 312 block_edges = &block->GetSuccsBlocks(); 313 clone_edges = &clone->GetSuccsBlocks(); 314 } 315 ASSERT(clone_edges->empty()); 316 clone_edges->reserve(block_edges->size()); 317 for (auto edge : *block_edges) { 318 clone_edges->push_back(GetClone(edge)); 319 } 320 } 321 GetGraph()322 Graph *GetGraph() 323 { 324 return graph_; 325 } 326 327 void UpdateCaller(Inst *inst); 328 329 private: 330 Graph *graph_; 331 ArenaAllocator *allocator_; 332 ArenaAllocator *local_allocator_; 333 ArenaVector<BasicBlock *> clone_blocks_; 334 InstVector clone_instructions_; 335 Marker clone_marker_ {UNDEF_MARKER}; 336 }; 337 } // namespace panda::compiler 338 339 #endif // COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H 340