1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H
17 #define COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H
18 
19 #include "optimizer/ir/basicblock.h"
20 #include "optimizer/ir/graph.h"
21 #include "utils/arena_containers.h"
22 #include "utils/hash.h"
23 
24 namespace panda::compiler {
25 class BasicBlock;
26 class Graph;
27 class Inst;
28 
29 // NOLINTNEXTLINE(readability-redundant-declaration)
30 bool IsLoopSingleBackEdgeExitPoint(Loop *loop);
31 
32 enum class UnrollType : uint8_t { UNROLL_WITH_SIDE_EXITS, UNROLL_WITHOUT_SIDE_EXITS, UNROLL_POST_INCREMENT };
33 
34 enum class InstCloneType : uint8_t { CLONE_ALL, CLONE_INSTS };
35 
36 enum class CloneEdgeType : uint8_t { EDGE_PRED, EDGE_SUCC };
37 
38 /**
39  * Helper-class, provides methods to:
40  * - Clone the whole graph;
41  * - Clone loop;
42  * - Unroll loop;
43  * - Peel loop;
44  */
45 class GraphCloner {
46     using PhiInputsMap = ArenaUnorderedMap<Inst *, Inst *>;
47 
48     struct LoopUnrollData {
49         BasicBlock *header {nullptr};
50         BasicBlock *backedge {nullptr};
51         BasicBlock *exit_block {nullptr};
52         BasicBlock *outer {nullptr};
53         ArenaVector<BasicBlock *> *blocks {nullptr};
54         InstVector *phi_update_inputs {nullptr};
55         PhiInputsMap *phi_replaced_inputs {nullptr};
56     };
57 
58     struct LoopClonerData {
59         BasicBlock *outer {nullptr};
60         BasicBlock *header {nullptr};
61         BasicBlock *pre_header {nullptr};
62         ArenaVector<BasicBlock *> *blocks {nullptr};
63     };
64 
65 public:
66     explicit GraphCloner(Graph *graph, ArenaAllocator *allocator, ArenaAllocator *local_allocator);
67 
68     Graph *CloneGraph();
69     BasicBlock *CloneLoopHeader(BasicBlock *block, BasicBlock *outer, BasicBlock *replaceable_pred);
70     Loop *CloneLoop(Loop *loop);
71     bool IsLoopClonable(Loop *loop, size_t inst_limit);
72 
73     /**
74      * Make equal to the `factor` number of clones of loop body and insert them into the graph
75      *
76      *      /----[pre-loop]
77      *      |        |
78      *      |        v
79      *      |    [loop-body]<----\
80      *      |        |   |       |
81      *      |        |   \-------/
82      *      |        |
83      *      |        v
84      *      \--->[outside-block]
85      *               |
86      *               v
87      *              ...
88      *
89      * Cloning with side-exits:
90      *
91      *      /----[pre-loop]
92      *      |        |
93      *      |        v
94      *      |   [loop-body]---------\
95      *      |        |              |
96      *      |        v              |
97      *      |   [loop-body']------->|
98      *      |        |              |
99      *      |        v              |
100      *      |   [loop-body'']------>|
101      *      |        |              |
102      *      |        v              |
103      *      |  [resolver-block]<----/
104      *      |        |
105      *      |        v
106      *      \--->[outside-block]
107      *              ...
108      *
109      *  Cloning without side-exits:
110      *
111      *      /----[pre-loop]
112      *      |        |
113      *      |        v
114      *      |    [loop-body]
115      *      |        |
116      *      |        v
117      *      |    [loop-body']
118      *      |        |
119      *      |        v
120      *      |    [loop-body'']
121      *      |        |
122      *      |        v
123      *      \--->[outside-block]
124      */
125     template <UnrollType type>
UnrollLoopBody(Loop *loop, size_t factor)126     void UnrollLoopBody(Loop *loop, size_t factor)
127     {
128         ASSERT_PRINT(IsLoopSingleBackEdgeExitPoint(loop), "Cloning blocks doesn't have single entry/exit point");
129         auto marker_holder = MarkerHolder(GetGraph());
130         clone_marker_ = marker_holder.GetMarker();
131         auto unroll_data = PrepareLoopToUnroll(loop, type != UnrollType::UNROLL_WITHOUT_SIDE_EXITS);
132 
133         auto clone_count = factor - 1;
134         for (size_t i = 0; i < clone_count; i++) {
135             CloneBlocksAndInstructions<InstCloneType::CLONE_ALL, true>(*unroll_data->blocks, GetGraph());
136             BuildLoopUnrollControlFlow(unroll_data);
137             // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements)
138             if constexpr (type == UnrollType::UNROLL_WITHOUT_SIDE_EXITS) {
139                 // Users update should be done on the last no-side-exits unroll iteration
140                 // before building loop data-flow
141                 if (i + 1 == clone_count) {  // last_iteration
142                     UpdateUsersAfterNoSideExitsUnroll(unroll_data);
143                 }
144             }
145             BuildLoopUnrollDataFlow(unroll_data);
146         }
147 
148         // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements)
149         if constexpr (type == UnrollType::UNROLL_POST_INCREMENT) {
150             RemoveLoopBackEdge(unroll_data);
151         }
152     }
153 
154 private:
155     // Whole graph cloning
156     void CopyLoop(Loop *loop, Loop *cloned_loop);
157     void CloneLinearOrder(Graph *new_graph);
158     void BuildControlFlow();
159     void BuildDataFlow();
160     void CloneAnalyses(Graph *new_graph);
161     // Loop cloning
162     LoopClonerData *PrepareLoopToClone(Loop *loop);
163     LoopClonerData *CreateLoopClonerData(Loop *loop, BasicBlock *pre_header, BasicBlock *outside_succ);
164     BasicBlock *CreateNewOutsideSucc(BasicBlock *outside_succ, BasicBlock *back_edge, BasicBlock *pre_header);
165     void BuildLoopCloneControlFlow(LoopClonerData *unroll_data);
166     void BuildLoopCloneDataFlow(LoopClonerData *unroll_data);
167     void MakeLoopCloneInfo(LoopClonerData *unroll_data);
168     // Unroll cloning
169     LoopUnrollData *PrepareLoopToUnroll(Loop *loop, bool clone_side_exits);
170     BasicBlock *CreateResolverBlock(Loop *loop, BasicBlock *back_edge);
171     BasicBlock *SplitBackEdge(LoopUnrollData *unroll_data, Loop *loop, BasicBlock *back_edge);
172     void UpdateUsersAfterNoSideExitsUnroll(const LoopUnrollData *unroll_data);
173     void BuildLoopUnrollControlFlow(LoopUnrollData *unroll_data);
174     void BuildLoopUnrollDataFlow(LoopUnrollData *unroll_data);
175     void RemoveLoopBackEdge(const LoopUnrollData *unroll_data);
176     // Loop header cloning
177     void BuildClonedLoopHeaderDataFlow(const BasicBlock &block, BasicBlock *resolver, BasicBlock *clone);
178     void UpdateUsersForClonedLoopHeader(Inst *inst, BasicBlock *outer_block);
179     // Cloned blocks and instructions getters
HasClone(const BasicBlock *block)180     bool HasClone(const BasicBlock *block)
181     {
182         return (block->GetId() < clone_blocks_.size()) && (clone_blocks_[block->GetId()] != nullptr);
183     }
184 
GetClone(const BasicBlock *block)185     BasicBlock *GetClone(const BasicBlock *block)
186     {
187         ASSERT(block != nullptr);
188         ASSERT_PRINT(block->GetGraph() == GetGraph(), "GraphCloner probably caught disconnected block");
189         ASSERT_DO(HasClone(block), block->Dump(&std::cerr));
190         return clone_blocks_[block->GetId()];
191     }
192 
HasClone(Inst *inst)193     bool HasClone(Inst *inst)
194     {
195         return inst->IsMarked(clone_marker_) && (inst->GetCloneNumber() < clone_instructions_.size());
196     }
197 
GetClone(Inst *inst)198     Inst *GetClone(Inst *inst)
199     {
200         ASSERT(inst != nullptr);
201         ASSERT(HasClone(inst));
202 
203         // We don't use clone_marker_ when cloning the whole graph, so lets at least check the basic block here
204         ASSERT(inst->GetBasicBlock() != nullptr);
205         ASSERT_PRINT(inst->GetBasicBlock()->GetGraph() == GetGraph(),
206                      "GraphCloner probably caught an instruction from disconnected block");
207         // Empty clone_blocks_ means we are cloning only one basic block
208         ASSERT(clone_blocks_.empty() || HasClone(inst->GetBasicBlock()));
209 
210         return clone_instructions_[inst->GetCloneNumber()];
211     }
212 
213     /**
214      * For each block of input vector create a new one empty block and populate it with the instructions,
215      * cloned form the original block
216      */
217     template <InstCloneType type, bool skip_safepoints>
CloneBlocksAndInstructions(const ArenaVector<BasicBlock *> &blocks, Graph *target_graph)218     void CloneBlocksAndInstructions(const ArenaVector<BasicBlock *> &blocks, Graph *target_graph)
219     {
220         clone_blocks_.clear();
221         clone_blocks_.resize(GetGraph()->GetVectorBlocks().size(), nullptr);
222         clone_instructions_.clear();
223         size_t inst_count = 0;
224         for (const auto &block : blocks) {
225             if (block != nullptr) {
226                 auto clone = block->Clone(target_graph);
227                 clone_blocks_[block->GetId()] = clone;
228                 CloneInstructions<type, skip_safepoints>(block, clone, &inst_count);
229                 if (block->IsTryBegin()) {
230                     target_graph->AppendTryBeginBlock(clone);
231                 }
232             }
233         }
234     }
235 
236     /**
237      * Clone block's instructions and append to the block's clone
238      */
239     template <InstCloneType type, bool skip_safepoints>
CloneInstructions(const BasicBlock *block, BasicBlock *clone, size_t *inst_count)240     void CloneInstructions(const BasicBlock *block, BasicBlock *clone, size_t *inst_count)
241     {
242         for (auto inst : block->Insts()) {
243             clone->AppendInst(CloneInstruction(inst, inst_count, clone->GetGraph()));
244         }
245 
246         if constexpr (type == InstCloneType::CLONE_ALL) {  // NOLINT
247             for (auto phi : block->PhiInsts()) {
248                 auto phi_clone = CloneInstruction(phi, inst_count, clone->GetGraph());
249                 clone->AppendPhi(phi_clone->CastToPhi());
250             }
251         }
252     }
253 
254     /**
255      * Clone instruction and mark both: clone and cloned
256      */
CloneInstruction(Inst *inst, size_t *inst_count, Graph *target_graph)257     Inst *CloneInstruction(Inst *inst, size_t *inst_count, Graph *target_graph)
258     {
259         inst->SetCloneNumber((*inst_count)++);
260         auto inst_clone = inst->Clone(target_graph);
261         clone_instructions_.push_back(inst_clone);
262         inst->SetMarker(clone_marker_);
263         inst_clone->SetMarker(clone_marker_);
264         if (inst->GetBasicBlock()->GetGraph() != target_graph) {
265             inst_clone->SetId(inst->GetId());
266         }
267         return inst_clone;
268     }
269 
270     inline bool IsInstLoopHeaderPhi(Inst *inst, Loop *loop);
271 
272     /**
273      * Use the following rules cloning the inputs:
274      * - if input of the original instruction has clone - insert this clone as input
275      * - otherwise - use original input as clone instruction input
276      */
277     template <bool replace_header_phi>
SetCloneInputs(Inst *inst, BasicBlock *block = nullptr)278     void SetCloneInputs(Inst *inst, BasicBlock *block = nullptr)
279     {
280         auto clone = GetClone(inst);
281         for (size_t i = 0; i < inst->GetInputsCount(); i++) {
282             auto input = inst->GetInput(i).GetInst();
283             if (replace_header_phi && IsInstLoopHeaderPhi(input, inst->GetBasicBlock()->GetLoop())) {
284                 ASSERT(block != nullptr);
285                 input = input->CastToPhi()->GetPhiInput(block);
286             } else if (HasClone(input)) {
287                 input = GetClone(input);
288             }
289 
290             if (clone->IsOperandsDynamic()) {
291                 clone->AppendInput(input);
292                 if (clone->IsSaveState()) {
293                     static_cast<SaveStateInst *>(clone)->SetVirtualRegister(
294                         i, static_cast<SaveStateInst *>(inst)->GetVirtualRegister(i));
295                 } else if (clone->IsPhi()) {
296                     clone->CastToPhi()->SetPhiInputBbNum(i, inst->CastToPhi()->GetPhiInputBbNum(i));
297                 }
298             } else {
299                 clone->SetInput(i, input);
300             }
301         }
302     }
303 
304     template <CloneEdgeType edge_type>
CloneEdges(BasicBlock *block)305     void CloneEdges(BasicBlock *block)
306     {
307         auto clone = GetClone(block);
308         auto block_edges = &block->GetPredsBlocks();
309         auto clone_edges = &clone->GetPredsBlocks();
310         // NOLINTNEXTLINE(bugprone-suspicious-semicolon, readability-braces-around-statements)
311         if constexpr (edge_type == CloneEdgeType::EDGE_SUCC) {
312             block_edges = &block->GetSuccsBlocks();
313             clone_edges = &clone->GetSuccsBlocks();
314         }
315         ASSERT(clone_edges->empty());
316         clone_edges->reserve(block_edges->size());
317         for (auto edge : *block_edges) {
318             clone_edges->push_back(GetClone(edge));
319         }
320     }
321 
GetGraph()322     Graph *GetGraph()
323     {
324         return graph_;
325     }
326 
327     void UpdateCaller(Inst *inst);
328 
329 private:
330     Graph *graph_;
331     ArenaAllocator *allocator_;
332     ArenaAllocator *local_allocator_;
333     ArenaVector<BasicBlock *> clone_blocks_;
334     InstVector clone_instructions_;
335     Marker clone_marker_ {UNDEF_MARKER};
336 };
337 }  // namespace panda::compiler
338 
339 #endif  // COMPILER_OPTIMIZER_IR_GRAPH_CLONER_H
340