1/**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef BYTECODE_OPTIMIZER_REG_ENCODER_H
17#define BYTECODE_OPTIMIZER_REG_ENCODER_H
18
19#include "compiler/optimizer/ir/graph.h"
20#include "compiler/optimizer/pass.h"
21#include "compiler/optimizer/ir/inst.h"
22#include "compiler/optimizer/ir/graph_visitor.h"
23
24/*
25 * Register Encoder.
26 *
27 * After compiler's register allocation layout of the virtual frame looks like:
28 *
29 * |<-locals->|<-free registers->|<-RegAlloc temps->|<-arguments->|
30 *
31 * where:
32 *
33 * * locals (>= 0) are registers allocated for function's local variables;
34 * * temps (>= 0) are temporary registers that can be allocated by spill-fill
35 *   resolver during breaking mov chain cycles;
36 * * arguments (>= 0) are registers allocated for function's arguemnts.
37 *
38 * The size of the frame is fixed (see register allocator for details).
39 *
40 * Locals and temps are allocated densely: if there are more than 0 locals
41 * (or temps) then all registers go strictly in order without "holes".
42 * Space for arguments, however, can contain "holes" (each "hole" corresponds
43 * to an unused argument).
44 *
45 * For locals and temps, it is not guaranteed that their number equals to the
46 * number of registers used in the unoptimized bytecode. Total number of arguments
47 * is of course constant throughout optimization pipeline.
48 *
49 * This pass solves following problems:
50 *
51 * 1. Shift temps and arguments to the left adjacently to locals, and reserve the
52 *    range temps from free registers if necessary, consistently changing all register
53 *    numbers for affected instructions. After this is done, the virtual frame looks like:
54 *
55 * |<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->|
56 *
57 * 2. Check if IR contains any instructions that can encode only [r0, r15] with
58 *    actual inputs r16+. If such instructions are found, some lower registers
59 *    (starting with r0, but no more than MAX_NUM_INPUTS registers) are reserved as
60 *    temps and corresponding spills are emitted where needed. After this is done,
61 *    the virtual frame looks like:
62 *
63 * |<-Renumber temps->|<-locals->|<-range temps->|<-RegAlloc temps->|<-arguments->|<-free registers->|
64 *
65 * After the pass:
66 *
67 *  * Usage mask is updated accordingly.
68 *  * num_locals + num_temps + num_max_range_input is stored to graph with SetStackSlotsCount
69 */
70
71namespace panda::bytecodeopt {
72struct RegContent {
73    compiler::Register reg;
74    compiler::DataType::Type type;
75
76    RegContent() : reg(compiler::INVALID_REG), type(compiler::DataType::NO_TYPE) {}
77    RegContent(compiler::Register r, compiler::DataType::Type t) : reg(r), type(t) {}
78
79    bool operator==(const RegContent &other) const
80    {
81        return reg == other.reg && type == other.type;
82    }
83
84    bool operator!=(const RegContent &other) const
85    {
86        return !(*this == other);
87    }
88};
89using RegContentMap = ArenaUnorderedMap<compiler::Register, RegContent>;
90using RegContentVec = ArenaVector<std::pair<compiler::Register, RegContent>>;
91
92enum class RegEncoderState { IDLE, RENUMBER_ARGS, RESERVE_TEMPS, INSERT_SPILLS };
93
94using panda::compiler::BasicBlock;
95using panda::compiler::Inst;
96using panda::compiler::Opcode;
97
98class RegEncoder : public compiler::Optimization, public compiler::GraphVisitor {
99public:
100    explicit RegEncoder(compiler::Graph *graph)
101        : compiler::Optimization(graph), num_temps_(0), state_(RegEncoderState::IDLE)
102    {
103    }
104
105    ~RegEncoder() override = default;
106
107    // true: Pass applied successfully
108    // false: Unable to apply pass because of insufficient number of registers
109    bool RunImpl() override;
110
111    const char *GetPassName() const override
112    {
113        return "RegEncoder";
114    }
115
116    void Check4Width(compiler::Inst *inst);
117    void Check8Width(compiler::Inst *inst);
118
119    const ArenaVector<BasicBlock *> &GetBlocksToVisit() const override
120    {
121        return GetGraph()->GetBlocksRPO();
122    }
123
124    static void VisitIntrinsic(GraphVisitor *visitor, Inst *inst);
125
126#include "generated/reg_encoder_visitors.inc"
127
128#include "generated/check_width.h"
129
130    void VisitDefault(Inst *inst) override {}
131
132#include "compiler/optimizer/ir/visitor.inc"
133
134private:
135    void CalculateNumNeededTemps();
136    void CalculateNumNeededTempsForInst(compiler::Inst *inst);
137    void RenumberRegs(const compiler::Register min_reg, const compiler::Register delta);
138    bool RenumberArgRegs();
139    void InsertSpills();
140    void InsertSpillsForInst(compiler::Inst *inst);
141    void InsertSpillsForDynInputsInst(compiler::Inst *inst);
142
143    compiler::Register GetNumArgsFromGraph() const
144    {
145        auto adapter = GetGraph()->GetRuntime();
146        auto method = GetGraph()->GetMethod();
147        auto num_args = adapter->GetMethodTotalArgumentsCount(method);
148        ASSERT(num_args <= compiler::VIRTUAL_FRAME_SIZE);
149        return num_args;
150    }
151
152    compiler::Register GetNumLocalsFromGraph() const
153    {
154        auto num_locals = GetGraph()->GetStackSlotsCount();
155        ASSERT(num_locals <= compiler::VIRTUAL_FRAME_SIZE);
156        return num_locals;
157    }
158
159    compiler::Register GetNumRegs() const
160    {
161        auto num_regs = GetNumLocalsFromGraph() + GetNumArgsFromGraph();
162        ASSERT(num_regs <= compiler::VIRTUAL_FRAME_SIZE);
163        return num_regs;
164    }
165
166    void SaveNumLocalsToGraph(uint32_t num_locals) const
167    {
168        ASSERT(num_locals <= compiler::VIRTUAL_FRAME_SIZE);
169        GetGraph()->SetStackSlotsCount(num_locals);
170    }
171
172    bool GetStatus() const
173    {
174        return success_;
175    }
176
177    static void CallHelper(compiler::GraphVisitor *visitor, Inst *inst)
178    {
179        auto *re = static_cast<RegEncoder *>(visitor);
180        re->Check4Width(inst);
181    }
182
183private:
184    compiler::Register num_temps_;
185    RegEncoderState state_;
186    compiler::Register num_max_range_input_ {0};
187    compiler::Register range_temps_start_ {0};
188
189    bool success_ {true};
190};
191}  // namespace panda::bytecodeopt
192
193#endif  // BYTECODE_OPTIMIZER_REG_ENCODER_H
194