1/**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "reg_encoder.h"
17#include "common.h"
18#include "compiler/optimizer/ir/basicblock.h"
19
20namespace panda::bytecodeopt {
21
22static bool IsIntrinsicRange(Inst *inst)
23{
24    if (inst->GetOpcode() != compiler::Opcode::Intrinsic) {
25        return false;
26    }
27#ifdef ENABLE_BYTECODE_OPT
28    switch (inst->CastToIntrinsic()->GetIntrinsicId()) {
29        case compiler::RuntimeInterface::IntrinsicId::CALLRANGE_IMM8_IMM8_V8:
30        case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLRANGE_PREF_IMM16_V8:
31        case compiler::RuntimeInterface::IntrinsicId::CALLTHISRANGE_IMM8_IMM8_V8:
32        case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLTHISRANGE_PREF_IMM16_V8:
33        case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM8_IMM8_V8:
34        case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM16_IMM8_V8:
35        case compiler::RuntimeInterface::IntrinsicId::WIDE_NEWOBJRANGE_PREF_IMM16_V8:
36        case compiler::RuntimeInterface::IntrinsicId::SUPERCALLTHISRANGE_IMM8_IMM8_V8:
37        case compiler::RuntimeInterface::IntrinsicId::SUPERCALLARROWRANGE_IMM8_IMM8_V8:
38        case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLTHISRANGE_PREF_IMM16_V8:
39        case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLARROWRANGE_PREF_IMM16_V8:
40        case compiler::RuntimeInterface::IntrinsicId::CREATEOBJECTWITHEXCLUDEDKEYS_IMM8_V8_V8:
41        case compiler::RuntimeInterface::IntrinsicId::WIDE_CREATEOBJECTWITHEXCLUDEDKEYS_PREF_IMM16_V8_V8:
42            return true;
43        default:
44            return false;
45    }
46#endif
47    return false;
48}
49
50static bool CanHoldRange(Inst *inst)
51{
52    switch (inst->GetOpcode()) {
53        case compiler::Opcode::Intrinsic:
54            return IsIntrinsicRange(inst);
55        default:
56            return false;
57    }
58}
59
60static compiler::Register CalculateNumNeededRangeTemps(const compiler::Graph *graph)
61{
62    compiler::Register ret = 0;
63
64    for (auto bb : graph->GetBlocksRPO()) {
65        for (const auto &inst : bb->AllInsts()) {
66            if (!CanHoldRange(inst)) {
67                continue;
68            }
69            auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
70            if (ret < nargs) {
71                if (nargs > MAX_NUM_NON_RANGE_ARGS || IsIntrinsicRange(inst)) {
72                    ret = nargs;
73                }
74            }
75        }
76    }
77
78    return ret;
79}
80
81bool RegEncoder::RunImpl()
82{
83    ASSERT(state_ == RegEncoderState::IDLE);
84
85    num_max_range_input_ = CalculateNumNeededRangeTemps(GetGraph());
86
87    state_ = RegEncoderState::RENUMBER_ARGS;
88    if (!RenumberArgRegs()) {
89        return false;
90    }
91
92    state_ = RegEncoderState::RESERVE_TEMPS;
93    ASSERT(num_temps_ == 0);
94
95    const auto num_regs = GetNumRegs();
96
97    auto max_num_temps = num_temps_;
98    CalculateNumNeededTemps();
99
100    while (max_num_temps != num_temps_) {
101        ASSERT(num_temps_ > max_num_temps);
102
103        if (num_regs > compiler::VIRTUAL_FRAME_SIZE - num_temps_) {  // to avoid overflow
104            return false;                                            // no more free registers left in the frame
105        }
106
107        auto delta = static_cast<compiler::Register>(num_temps_ - max_num_temps);
108        range_temps_start_ += delta;
109
110        RenumberRegs(MIN_REGISTER_NUMBER, delta);
111
112        max_num_temps = num_temps_;
113        CalculateNumNeededTemps();
114    }
115
116    if (num_temps_ > 0 || num_max_range_input_ > 0) {
117        state_ = RegEncoderState::INSERT_SPILLS;
118        InsertSpills();
119
120        auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
121        for (compiler::Register r = 0; r < num_regs; r++) {
122            usage_mask->at(num_regs + num_temps_ - r - 1) = usage_mask->at(num_regs - r - 1);
123        }
124        std::fill(usage_mask->begin(), usage_mask->begin() + num_temps_, true);
125    }
126
127    SaveNumLocalsToGraph(GetNumLocalsFromGraph() + num_temps_);
128    state_ = RegEncoderState::IDLE;
129
130    return true;
131}
132
133static panda::compiler::DataType::Type GetRegType(panda::compiler::DataType::Type type)
134{
135    if (type == panda::compiler::DataType::REFERENCE) {
136        return type;
137    }
138    if (panda::compiler::DataType::Is32Bits(type, Arch::NONE)) {
139        return panda::compiler::DataType::UINT32;
140    }
141    return panda::compiler::DataType::UINT64;
142}
143
144static bool RegNeedsRenumbering(panda::compiler::Register r)
145{
146    return r != panda::compiler::ACC_REG_ID && r != panda::compiler::INVALID_REG;
147}
148
149static panda::compiler::Register RenumberReg(const panda::compiler::Register r, const panda::compiler::Register delta)
150{
151    if (r == panda::compiler::ACC_REG_ID) {
152        return r;
153    }
154    return r + delta;
155}
156
157static void RenumberSpillFillRegs(panda::compiler::SpillFillInst *inst, const panda::compiler::Register min_reg,
158                                  const panda::compiler::Register delta)
159{
160    for (auto &sf : inst->GetSpillFills()) {
161        if (sf.SrcType() == compiler::LocationType::REGISTER && sf.SrcValue() >= min_reg) {
162            sf.SetSrc(compiler::Location::MakeRegister(RenumberReg(sf.SrcValue(), delta)));
163        }
164        if (sf.DstType() == compiler::LocationType::REGISTER && sf.DstValue() >= min_reg) {
165            sf.SetDst(compiler::Location::MakeRegister(RenumberReg(sf.DstValue(), delta)));
166        }
167    }
168}
169
170void RegEncoder::RenumberRegs(const compiler::Register min_reg, const compiler::Register delta)
171{
172    // Renumbering always advances register number `delta` positions forward,
173    // wrapping around on overflows with well-defined behavour.
174    // Hence the requirement to keep delta unsigned.
175    static_assert(std::is_unsigned<compiler::Register>::value, "compiler::Register must be unsigned");
176    ASSERT(delta > 0);
177
178    for (auto *bb : GetGraph()->GetBlocksRPO()) {
179        for (auto inst : bb->AllInsts()) {
180            // Renumber output of any instruction, if applicable:
181            if (RegNeedsRenumbering(inst->GetDstReg()) && inst->GetDstReg() >= min_reg) {
182                inst->SetDstReg(RenumberReg(inst->GetDstReg(), delta));
183            }
184
185            if (inst->IsPhi() || inst->IsCatchPhi()) {
186                continue;
187            }
188
189            // Renumber inputs and outputs of SpillFill instructions:
190            if (inst->IsSpillFill()) {
191                RenumberSpillFillRegs(inst->CastToSpillFill(), min_reg, delta);
192                continue;
193            }
194
195            // Fix inputs of common instructions:
196            for (size_t i = 0; i < inst->GetInputsCount(); i++) {
197                if (RegNeedsRenumbering(inst->GetSrcReg(i)) && inst->GetSrcReg(i) >= min_reg) {
198                    inst->SetSrcReg(i, RenumberReg(inst->GetSrcReg(i), delta));
199                }
200            }
201        }
202    }
203}
204
205bool RegEncoder::RenumberArgRegs()
206{
207    const auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>();
208    ASSERT(usage_mask->size() == compiler::VIRTUAL_FRAME_SIZE);
209
210    auto frame_size = static_cast<compiler::Register>(usage_mask->size());
211    const auto num_args = GetNumArgsFromGraph();
212    ASSERT(frame_size >= num_args);
213
214    auto num_non_args = static_cast<compiler::Register>(frame_size - num_args);
215    if (num_max_range_input_ > num_non_args) {
216        LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
217        return false;
218    }
219
220    compiler::Register num_locals = 0;
221    if (num_non_args != 0) {
222        while (num_locals != num_non_args && usage_mask->at(num_locals)) {
223            ++num_locals;
224        }
225    }
226
227    compiler::Register num_temps = 0;
228    if (num_locals != num_non_args) {
229        compiler::Register r = num_non_args - 1;
230        while (r < num_non_args && usage_mask->at(r)) {
231            ++num_temps;
232            --r;
233        }
234    }
235
236    if (num_locals + num_temps > num_non_args - num_max_range_input_) {
237        LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough";
238        return false;
239    }
240
241    range_temps_start_ = num_locals;
242
243    bool do_renumber = true;
244
245    if (num_non_args == 0 && num_max_range_input_ == 0) {  // all registers are arguments: no need to renumber
246        do_renumber = false;
247    }
248
249    // All free regs will be just enough to encode call.rang: no need to renumber
250    if (num_locals + num_temps + num_max_range_input_ == num_non_args) {
251        do_renumber = false;
252    }
253
254    if (num_temps + num_args == 0) {  // no temps and no args: nothing to renumber
255        do_renumber = false;
256    }
257
258    if (do_renumber) {
259        const auto min_reg = static_cast<compiler::Register>(num_non_args - num_temps);
260        ASSERT(min_reg > MIN_REGISTER_NUMBER);
261
262        // Assert that if temps are present, they are marked allocated in the mask:
263        for (compiler::Register r = min_reg; r < min_reg + num_temps; r++) {
264            ASSERT(usage_mask->at(r));
265        }
266
267        // Assert that there are no used regs between locals and temps + arguments:
268        for (compiler::Register r = num_locals; r < min_reg; r++) {
269            ASSERT(!usage_mask->at(r));
270        }
271
272        auto delta = static_cast<compiler::Register>(num_locals + num_temps + num_max_range_input_ - num_non_args);
273        RenumberRegs(min_reg, delta);
274
275        for (compiler::Register r = min_reg; r < frame_size; r++) {
276            usage_mask->at(RenumberReg(r, delta)) = usage_mask->at(r);
277            usage_mask->at(r) = false;
278        }
279    }
280
281    SaveNumLocalsToGraph(num_locals + num_temps + num_max_range_input_);
282    return true;
283}
284
285void RegEncoder::InsertSpills()
286{
287    ASSERT(num_max_range_input_ > 0 || (num_temps_ > 0 && num_temps_ <= MAX_NUM_INPUTS));
288
289    for (auto *bb : GetGraph()->GetBlocksRPO()) {
290        for (auto inst : bb->AllInstsSafe()) {
291            if (inst->GetInputsCount() == 0) {
292                continue;
293            }
294
295            VisitInstruction(inst);
296            // TODO(aantipina): Enable assert here for GetStatus() as soon code generation is fully supported
297        }
298    }
299}
300
301void RegEncoder::CalculateNumNeededTemps()
302{
303    num_temps_ = 0;
304
305    for (auto bb : GetGraph()->GetBlocksRPO()) {
306        for (auto inst : bb->AllInstsSafe()) {
307            if (inst->GetInputsCount() == 0) {
308                continue;
309            }
310
311            VisitInstruction(inst);
312            // TODO(aantipina): Enable here for GetStatus() as soon code generation is fully supported
313        }
314    }
315
316    LOG(DEBUG, BYTECODE_OPTIMIZER) << GetGraph()->GetRuntime()->GetMethodFullName(GetGraph()->GetMethod())
317                                   << ": num_temps_ = " << std::to_string(num_temps_);
318}
319
320template <typename T>
321static void AddMoveBefore(Inst *inst, const T &sp_container)
322{
323    if (sp_container.empty()) {
324        return;
325    }
326    auto sf_inst = inst->GetBasicBlock()->GetGraph()->CreateInstSpillFill();
327    for (auto const &[src, dst] : sp_container) {
328        ASSERT(src != compiler::ACC_REG_ID);
329        sf_inst->AddMove(src, dst.reg, GetRegType(dst.type));
330        LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: Move v" << static_cast<int>(dst.reg) << " <- v"
331                                       << static_cast<int>(src) << " was added";
332    }
333    inst->GetBasicBlock()->InsertBefore(sf_inst, inst);
334}
335
336static bool IsAccReadPosition(compiler::Inst *inst, size_t pos)
337{
338    // Calls can have accumulator at any position, return false for them
339    return !inst->IsCall() && inst->IsAccRead() && pos == AccReadIndex(inst);
340}
341
342void RegEncoder::InsertSpillsForDynInputsInst(compiler::Inst *inst)
343{
344    ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
345    ASSERT(inst->IsIntrinsic());
346
347    RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter());  // src -> (dst, src_type), non-callrange
348    RegContentVec spill_vec(GetGraph()->GetLocalAllocator()->Adapter());  // spill_vec is used to handle callrange
349
350    auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
351    size_t start = 0;
352    bool range = IsIntrinsicRange(inst) || (nargs - start > MAX_NUM_NON_RANGE_ARGS && CanHoldRange(inst));
353
354    compiler::Register temp = range ? range_temps_start_ : 0;
355
356    for (size_t i = start; i < nargs; ++i) {
357        auto src_reg = inst->GetSrcReg(i);
358        auto type = inst->GetInputType(i);
359
360        // do not spillfill for acc-read position. For example, Intrinsic.FSTARR32
361        if (IsAccReadPosition(inst, i)) {
362            continue;
363        }
364
365        if (!range) {
366            if (!RegNeedsRenumbering(src_reg) || src_reg < NUM_COMPACTLY_ENCODED_REGS) {
367                continue;
368            }
369
370            auto res = spill_map.emplace(src_reg, RegContent(temp, type));
371            if (res.second) {
372                inst->SetSrcReg(i, temp++);
373            } else {
374                // Such register is already in map.
375                // It can be ok for cases like: CallStatic v49, v49
376                // Such instructions can be generated by optimizer too.
377                const RegContent &reg_cont = res.first->second;
378                inst->SetSrcReg(i, reg_cont.reg);
379            }
380        } else {
381            spill_vec.emplace_back(src_reg, RegContent(temp, type));
382            inst->SetSrcReg(i, temp++);
383        }
384    }
385
386    AddMoveBefore(inst, spill_map);
387    AddMoveBefore(inst, spill_vec);
388}
389
390void RegEncoder::InsertSpillsForInst(compiler::Inst *inst)
391{
392    ASSERT(state_ == RegEncoderState::INSERT_SPILLS);
393
394    RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter());  // src -> (dst, src_type)
395
396    if (inst->IsOperandsDynamic()) {
397        InsertSpillsForDynInputsInst(inst);
398        return;
399    }
400
401    compiler::Register temp = 0;
402    for (size_t i = 0; i < inst->GetInputsCount(); i++) {
403        auto reg = inst->GetSrcReg(i);
404        if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
405            auto res = spill_map.emplace(reg, RegContent(temp, GetRegType(inst->GetInputType(i))));
406            if (res.second) {
407                inst->SetSrcReg(i, temp++);
408            } else {
409                // Such register is already in map.
410                // It can be ok for cases like: and v49, v49
411                // Such instructions can be generated by optimizer too.
412                const RegContent &reg_cont = res.first->second;
413                inst->SetSrcReg(i, reg_cont.reg);
414            }
415        }
416    }
417
418    AddMoveBefore(inst, spill_map);
419}
420
421static void IncTempsIfNeeded(const compiler::Register reg, compiler::Register &num_temps)
422{
423    if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
424        num_temps++;
425    }
426}
427
428void RegEncoder::CalculateNumNeededTempsForInst(compiler::Inst *inst)
429{
430    ASSERT(state_ == RegEncoderState::RESERVE_TEMPS);
431
432    compiler::Register num_temps = 0;
433
434    if (inst->IsOperandsDynamic()) {
435        if (IsIntrinsicRange(inst)) {
436            return;
437        }
438        ASSERT(inst->IsIntrinsic());
439
440        auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0);
441        size_t start = 0;
442
443        if (nargs - start > MAX_NUM_NON_RANGE_ARGS) {  // is call.range
444            return;
445        }
446
447        for (size_t i = start; i < nargs; i++) {
448            if (IsAccReadPosition(inst, i)) {
449                continue;
450            }
451            auto reg = inst->GetSrcReg(i);
452            if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) {
453                num_temps++;
454            }
455        }
456    } else {
457        for (size_t i = 0; i < inst->GetInputsCount(); i++) {
458            IncTempsIfNeeded(inst->GetSrcReg(i), num_temps);
459        }
460    }
461
462    ASSERT(num_temps <= MAX_NUM_INPUTS);
463
464    num_temps_ = std::max(num_temps, num_temps_);
465}
466
467void RegEncoder::Check4Width(compiler::Inst *inst)
468{
469    switch (state_) {
470        case RegEncoderState::RESERVE_TEMPS: {
471            CalculateNumNeededTempsForInst(inst);
472            break;
473        }
474        case RegEncoderState::INSERT_SPILLS: {
475            InsertSpillsForInst(inst);
476            break;
477        }
478        default:
479            UNREACHABLE();
480    }
481}
482
483void RegEncoder::Check8Width([[maybe_unused]] compiler::Inst *inst)
484{
485    // TODO(aantipina): implement after it became possible to use register numbers more than 256 (#2697)
486}
487
488void RegEncoder::VisitIntrinsic(GraphVisitor *visitor, Inst *inst)
489{
490    auto re = static_cast<RegEncoder *>(visitor);
491    if (IsIntrinsicRange(inst)) {
492        re->Check4Width(inst->CastToIntrinsic());
493        return;
494    }
495
496    re->Check8Width(inst->CastToIntrinsic());
497}
498
499#include "generated/check_width.cpp"
500}  // namespace panda::bytecodeopt
501