1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "reg_acc_alloc.h"
17 #include "common.h"
18 #include "compiler/optimizer/ir/basicblock.h"
19 #include "compiler/optimizer/ir/inst.h"
20
21 namespace panda::bytecodeopt {
22
23 /**
24 * Decide if accumulator register gets dirty between two instructions.
25 */
IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)26 bool IsAccWriteBetween(compiler::Inst *src_inst, compiler::Inst *dst_inst)
27 {
28 ASSERT(src_inst != dst_inst);
29
30 compiler::BasicBlock *block = src_inst->GetBasicBlock();
31 compiler::Inst *inst = src_inst->GetNext();
32
33 while (inst != dst_inst) {
34 if (UNLIKELY(inst == nullptr)) {
35 do {
36 // TODO(rtakacs): visit all the successors to get information about the
37 // accumulator usage. Only linear flow is supported right now.
38 if (block->GetSuccsBlocks().size() > 1) {
39 return true;
40 }
41
42 ASSERT(block->GetSuccsBlocks().size() == 1);
43 block = block->GetSuccessor(0);
44 // TODO(rtakacs): only linear flow is supported right now.
45 if (!dst_inst->IsPhi() && block->GetPredsBlocks().size() > 1) {
46 return true;
47 }
48 } while (block->IsEmpty() && !block->HasPhi());
49
50 // Get first phi instruction if exist.
51 // This is requred if dst_inst is a phi node.
52 inst = *(block->AllInsts());
53 } else {
54 if (inst->IsAccWrite()) {
55 return true;
56 }
57
58 if (inst->IsAccRead()) {
59 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
60
61 if (input->GetDstReg() != compiler::ACC_REG_ID) {
62 return true;
63 }
64 }
65
66 inst = inst->GetNext();
67 }
68 }
69
70 return false;
71 }
72
73 /**
74 * Return true if Phi instruction is marked as optimizable.
75 */
IsPhiOptimizable(compiler::Inst *phi) const76 inline bool RegAccAlloc::IsPhiOptimizable(compiler::Inst *phi) const
77 {
78 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
79 return phi->IsMarked(acc_marker_);
80 }
81
82 /**
83 * Return true if instruction can read the accumulator.
84 */
IsAccRead(compiler::Inst *inst) const85 bool RegAccAlloc::IsAccRead(compiler::Inst *inst) const
86 {
87 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccRead();
88 }
89
90 /**
91 * Return true if instruction can write the accumulator.
92 */
IsAccWrite(compiler::Inst *inst) const93 bool RegAccAlloc::IsAccWrite(compiler::Inst *inst) const
94 {
95 return UNLIKELY(inst->IsPhi()) ? IsPhiOptimizable(inst) : inst->IsAccWrite();
96 }
97
98 /**
99 * Decide if user can use accumulator as source.
100 * Do modifications on the order of inputs if necessary.
101 *
102 * Return true, if user can be optimized.
103 */
CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const104 bool RegAccAlloc::CanUserReadAcc(compiler::Inst *inst, compiler::Inst *user) const
105 {
106 if (user->IsPhi()) {
107 return IsPhiOptimizable(user);
108 }
109
110 if (!IsAccRead(user) || IsAccWriteBetween(inst, user)) {
111 return false;
112 }
113
114 bool found = false;
115 // Check if the instrucion occures more times as input.
116 // v2. SUB v0, v1
117 // v3. Add v2, v2
118 for (auto input : user->GetInputs()) {
119 compiler::Inst *uinput = input.GetInst();
120
121 if (uinput != inst) {
122 continue;
123 }
124
125 if (!found) {
126 found = true;
127 } else {
128 return false;
129 }
130 }
131
132 if (user->IsCall()) {
133 return user->GetInputsCount() <= (MAX_NUM_NON_RANGE_ARGS + 1); // +1 for SaveState
134 }
135
136 return user->GetInput(AccReadIndex(user)).GetInst() == inst || user->IsCommutative();
137 }
138
139 /**
140 * Check if all the Phi inputs and outputs can use the accumulator register.
141 *
142 * Return true, if Phi can be optimized.
143 */
IsPhiAccReady(compiler::Inst *phi) const144 bool RegAccAlloc::IsPhiAccReady(compiler::Inst *phi) const
145 {
146 ASSERT(phi->GetOpcode() == compiler::Opcode::Phi);
147
148 // TODO(rtakacs): there can be cases when the input/output of a Phi is an other Phi.
149 // These cases are not optimized for accumulator.
150 for (auto input : phi->GetInputs()) {
151 compiler::Inst *phi_input = input.GetInst();
152
153 if (!IsAccWrite(phi_input) || IsAccWriteBetween(phi_input, phi)) {
154 return false;
155 }
156 }
157
158 for (auto &user : phi->GetUsers()) {
159 compiler::Inst *uinst = user.GetInst();
160
161 if (!CanUserReadAcc(phi, uinst)) {
162 return false;
163 }
164 }
165 return true;
166 }
167
168 /**
169 * For most insts we can use their src_reg on the acc-read position
170 * to characterise whether we need lda in the codegen pass.
171 */
SetNeedLda(compiler::Inst *inst, bool need)172 void RegAccAlloc::SetNeedLda(compiler::Inst *inst, bool need)
173 {
174 if (inst->IsPhi() || inst->IsCatchPhi()) {
175 return;
176 }
177 if (!IsAccRead(inst)) {
178 return;
179 }
180 if (inst->IsCall()) { // we never need lda for calls
181 return;
182 }
183 compiler::Register reg = need ? compiler::INVALID_REG : compiler::ACC_REG_ID;
184 inst->SetSrcReg(AccReadIndex(inst), reg);
185 }
186
InitializeSourceRegisters()187 void RegAccAlloc::InitializeSourceRegisters()
188 {
189 for (auto block : GetGraph()->GetBlocksRPO()) {
190 for (auto inst : block->Insts()) {
191 if (inst->IsSaveState() || inst->IsCatchPhi()) {
192 continue;
193 }
194 if (inst->IsConst()) {
195 inst->SetFlag(compiler::inst_flags::ACC_WRITE);
196 }
197 for (size_t i = 0; i < inst->GetInputsCount(); ++i) {
198 inst->SetSrcReg(i, compiler::INVALID_REG);
199 }
200 if (inst->IsConst()) {
201 inst->SetDstReg(compiler::INVALID_REG);
202 }
203 }
204 }
205 }
206
MarkAccForPhiInstructions()207 void RegAccAlloc::MarkAccForPhiInstructions()
208 {
209 for (auto block : GetGraph()->GetBlocksRPO()) {
210 for (auto phi : block->PhiInsts()) {
211 if (IsPhiAccReady(phi)) {
212 phi->SetMarker(acc_marker_);
213 }
214 }
215 }
216 }
217
MarkAccForInstructions(compiler::BasicBlock *block)218 void RegAccAlloc::MarkAccForInstructions(compiler::BasicBlock *block)
219 {
220 for (auto inst : block->AllInsts()) {
221 if (inst->NoDest() || !IsAccWrite(inst)) {
222 continue;
223 }
224
225 bool use_acc_dst_reg = true;
226
227 for (auto &user : inst->GetUsers()) {
228 compiler::Inst *uinst = user.GetInst();
229 if (uinst->IsSaveState()) {
230 continue;
231 }
232 if (CanUserReadAcc(inst, uinst)) {
233 SetNeedLda(uinst, false);
234 } else {
235 use_acc_dst_reg = false;
236 }
237 }
238
239 if (use_acc_dst_reg) {
240 inst->SetDstReg(compiler::ACC_REG_ID);
241 continue;
242 }
243
244 if (!inst->IsConst()) {
245 continue;
246 }
247
248 inst->ClearFlag(compiler::inst_flags::ACC_WRITE);
249
250 for (auto &user : inst->GetUsers()) {
251 compiler::Inst *uinst = user.GetInst();
252
253 if (uinst->IsSaveState()) {
254 continue;
255 }
256
257 SetNeedLda(uinst, true);
258 }
259 }
260 }
261
UpdateInstructionsAfterMark(compiler::BasicBlock *block)262 void RegAccAlloc::UpdateInstructionsAfterMark(compiler::BasicBlock *block)
263 {
264 for (auto inst : block->Insts()) {
265 if (inst->GetInputsCount() == 0) {
266 continue;
267 }
268
269 if (inst->IsCall()) {
270 continue;
271 }
272
273 compiler::Inst *input = inst->GetInput(AccReadIndex(inst)).GetInst();
274 if (!IsAccWriteBetween(input, inst)) {
275 continue;
276 }
277
278 input->SetDstReg(compiler::INVALID_REG);
279 SetNeedLda(inst, true);
280
281 if (input->IsConst()) {
282 input->ClearFlag(compiler::inst_flags::ACC_WRITE);
283 for (auto &user : input->GetUsers()) {
284 compiler::Inst *uinst = user.GetInst();
285 SetNeedLda(uinst, true);
286 }
287 }
288 }
289 }
290
291 /**
292 * Determine the accumulator usage between instructions.
293 * Eliminate unnecessary register allocations by applying
294 * a special value (ACC_REG_ID) to the destination and
295 * source registers.
296 * This special value is a marker for the code generator
297 * not to produce lda/sta instructions.
298 */
RunImpl()299 bool RegAccAlloc::RunImpl()
300 {
301 GetGraph()->InitDefaultLocations();
302 // Initialize all source register of all instructions.
303 InitializeSourceRegisters();
304 // Mark Phi instructions if they can be optimized for acc.
305 MarkAccForPhiInstructions();
306 // Mark instructions if they can be optimized for acc.
307 for (auto block : GetGraph()->GetBlocksRPO()) {
308 MarkAccForInstructions(block);
309 UpdateInstructionsAfterMark(block);
310 }
311
312 #ifndef NDEBUG
313 GetGraph()->SetRegAccAllocApplied();
314 #endif // NDEBUG
315
316 return true;
317 }
318
319 } // namespace panda::bytecodeopt
320