1/*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "ecmascript/compiler/assembler/aarch64/macro_assembler_aarch64.h"
17#include <set>
18#include "ecmascript/js_function.h"
19
20namespace panda::ecmascript::kungfu {
21using namespace panda::ecmascript;
22constexpr uint32_t k4BitSize = 4;
23constexpr uint32_t k16BitSize = 16;
24constexpr uint32_t k32BitSize = 32;
25constexpr uint32_t k48BitSize = 48;
26constexpr uint32_t k64BitSize = 64;
27
28const std::set<uint64_t> ValidBitmaskImmSet = {
29#include "valid_bitmask_imm.txt"
30};
31constexpr uint32_t kMaxBitTableSize = 5;
32constexpr std::array<uint64_t, kMaxBitTableSize> kBitmaskImmMultTable = {
33    0x0000000100000001UL, 0x0001000100010001UL, 0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL,
34};
35
36void MacroAssemblerAArch64::Move(const StackSlotOperand &dstStackSlot, Immediate value)
37{
38    aarch64::Register baseReg = (dstStackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
39                                                               aarch64::Register(aarch64::SP);
40    aarch64::MemoryOperand dstOpnd(baseReg, static_cast<int64_t>(dstStackSlot.GetOffset()));
41    assembler.Mov(LOCAL_SCOPE_REGISTER, aarch64::Immediate(value.GetValue()));
42    PickLoadStoreInsn(LOCAL_SCOPE_REGISTER, dstOpnd, false);
43}
44
45void MacroAssemblerAArch64::Move(const StackSlotOperand &dstStackSlot,
46                                 const StackSlotOperand &srcStackSlot)
47{
48    aarch64::Register dstBaseReg = (dstStackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
49                                                                  aarch64::Register(aarch64::SP);
50    aarch64::MemoryOperand dstOpnd(dstBaseReg, static_cast<int64_t>(dstStackSlot.GetOffset()));
51    aarch64::Register srcBaseReg = (srcStackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
52                                                                  aarch64::Register(aarch64::SP);
53    aarch64::MemoryOperand srcOpnd(srcBaseReg, static_cast<int64_t>(srcStackSlot.GetOffset()));
54    PickLoadStoreInsn(LOCAL_SCOPE_REGISTER, srcOpnd);
55    PickLoadStoreInsn(LOCAL_SCOPE_REGISTER, dstOpnd, false);
56}
57
58void MacroAssemblerAArch64::Cmp(const StackSlotOperand &stackSlot, Immediate value)
59{
60    aarch64::Register baseReg = (stackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
61                                                            aarch64::Register(aarch64::SP);
62    aarch64::MemoryOperand opnd(baseReg, static_cast<int64_t>(stackSlot.GetOffset()));
63    aarch64::Operand immOpnd = aarch64::Immediate(value.GetValue());
64    PickLoadStoreInsn(LOCAL_SCOPE_REGISTER, opnd);
65    assembler.Cmp(LOCAL_SCOPE_REGISTER, immOpnd);
66}
67
68void MacroAssemblerAArch64::Bind(JumpLabel &label)
69{
70    assembler.Bind(&label);
71}
72
73void MacroAssemblerAArch64::Jz(JumpLabel &label)
74{
75    assembler.B(aarch64::EQ, &label);
76}
77
78void MacroAssemblerAArch64::Jnz(JumpLabel &label)
79{
80    assembler.B(aarch64::NE, &label);
81}
82
83void MacroAssemblerAArch64::Jump(JumpLabel &label)
84{
85    assembler.B(&label);
86}
87
88void MacroAssemblerAArch64::CallBuiltin(Address funcAddress,
89                                        const std::vector<MacroParameter> &parameters)
90{
91    for (size_t i = 0; i < parameters.size(); ++i) {
92        auto param = parameters[i];
93        if (i == PARAM_REGISTER_COUNT) {
94            std::cout << "not support aarch64 baseline stack parameter " << std::endl;
95            std::abort();
96            break;
97        }
98        MovParameterIntoParamReg(param, registerParamVec[i]);
99    }
100    assembler.Mov(LOCAL_SCOPE_REGISTER, aarch64::Immediate(funcAddress));
101    assembler.Blr(LOCAL_SCOPE_REGISTER);
102}
103
104void MacroAssemblerAArch64::SaveReturnRegister(const StackSlotOperand &dstStackSlot)
105{
106    aarch64::Register baseReg = (dstStackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
107                                                               aarch64::Register(aarch64::SP);
108    aarch64::MemoryOperand dstOpnd(baseReg, static_cast<int64_t>(dstStackSlot.GetOffset()));
109    PickLoadStoreInsn(RETURN_REGISTER, dstOpnd, false);
110}
111
112void MacroAssemblerAArch64::MovParameterIntoParamReg(MacroParameter param, aarch64::Register paramReg)
113{
114    if (std::holds_alternative<BaselineSpecialParameter>(param)) {
115        auto specialParam = std::get<BaselineSpecialParameter>(param);
116        switch (specialParam) {
117            case BaselineSpecialParameter::GLUE: {
118                assembler.Mov(paramReg, GLUE_REGISTER);
119                break;
120            }
121            case BaselineSpecialParameter::PROFILE_TYPE_INFO: {
122                assembler.Ldur(LOCAL_SCOPE_REGISTER,
123                               aarch64::MemoryOperand(aarch64::Register(aarch64::X29),
124                                                      static_cast<int64_t>(FUNCTION_OFFSET_FROM_SP)));
125                assembler.Ldr(LOCAL_SCOPE_REGISTER,
126                              aarch64::MemoryOperand(LOCAL_SCOPE_REGISTER, JSFunction::RAW_PROFILE_TYPE_INFO_OFFSET));
127                assembler.Ldr(paramReg,
128                              aarch64::MemoryOperand(LOCAL_SCOPE_REGISTER, ProfileTypeInfoCell::VALUE_OFFSET));
129                break;
130            }
131            case BaselineSpecialParameter::SP: {
132                assembler.Mov(paramReg, aarch64::Register(aarch64::X29));
133                break;
134            }
135            case BaselineSpecialParameter::HOTNESS_COUNTER: {
136                assembler.Ldur(LOCAL_SCOPE_REGISTER, aarch64::MemoryOperand(aarch64::Register(aarch64::X29),
137                    static_cast<int64_t>(FUNCTION_OFFSET_FROM_SP)));
138                assembler.Ldr(LOCAL_SCOPE_REGISTER,
139                              aarch64::MemoryOperand(LOCAL_SCOPE_REGISTER, JSFunctionBase::METHOD_OFFSET));
140                assembler.Ldr(paramReg,
141                              aarch64::MemoryOperand(LOCAL_SCOPE_REGISTER, Method::LITERAL_INFO_OFFSET));
142                break;
143            }
144            default: {
145                std::cout << "not supported other BaselineSpecialParameter currently" << std::endl;
146                std::abort();
147            }
148        }
149        return;
150    }
151    if (std::holds_alternative<int8_t>(param)) {
152        int16_t num = std::get<int8_t>(param);
153        assembler.Mov(paramReg, aarch64::Immediate(static_cast<int64_t>(num)));
154        return;
155    }
156    if (std::holds_alternative<int16_t>(param)) {
157        int16_t num = std::get<int16_t>(param);
158        assembler.Mov(paramReg, aarch64::Immediate(static_cast<int64_t>(num)));
159        return;
160    }
161    if (std::holds_alternative<int32_t>(param)) {
162        int32_t num = std::get<int32_t>(param);
163        assembler.Mov(paramReg, aarch64::Immediate(static_cast<int64_t>(num)));
164        return;
165    }
166    if (std::holds_alternative<int64_t>(param)) {
167        int64_t num = std::get<int64_t>(param);
168        CopyImm(paramReg, num, k64BitSize);
169        return;
170    }
171    if (std::holds_alternative<StackSlotOperand>(param)) {
172        StackSlotOperand stackSlotOpnd = std::get<StackSlotOperand>(param);
173        aarch64::Register dstBaseReg = (stackSlotOpnd.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
174                                                                       aarch64::Register(aarch64::SP);
175        aarch64::MemoryOperand paramOpnd(dstBaseReg, static_cast<int64_t>(stackSlotOpnd.GetOffset()));
176        PickLoadStoreInsn(paramReg, paramOpnd);
177        return;
178    }
179    std::cout << "not supported other type of aarch64 baseline parameters currently" << std::endl;
180    std::abort();
181}
182
183void MacroAssemblerAArch64::PickLoadStoreInsn(aarch64::Register reg, aarch64::MemoryOperand memOpnd, bool isLoad)
184{
185    int64_t maxNineBitsSignedValue = 255;
186    int64_t minNineBitsSignedValue = -256;
187    int64_t value = memOpnd.GetImmediate().Value();
188    if (value < minNineBitsSignedValue) {
189        std::cout << "not support aarch64 offset in memory operand is less than -256!" << std::endl;
190        std::abort();
191    }
192    if (value > maxNineBitsSignedValue && isLoad) {
193        if (value % 8 != 0) { // 8: offset in memory operand must be a multiple of 8 for ldr insn
194            std::cout << "not support offset in memory operand is not a multiple of 8 for ldr insn!" << std::endl;
195            std::abort();
196        }
197        assembler.Ldr(reg, memOpnd);
198    }
199    if (value > maxNineBitsSignedValue && !isLoad) {
200        if (value % 8 != 0) { // 8: offset in memory operand must be a multiple of 8 for str insn
201            std::cout << "not support offset in memory operand is not a multiple of 8 for str insn!" << std::endl;
202            std::abort();
203        }
204        assembler.Str(reg, memOpnd);
205    }
206    if (value <= maxNineBitsSignedValue && isLoad) {
207        assembler.Ldur(reg, memOpnd);
208    }
209    if (value <= maxNineBitsSignedValue && !isLoad) {
210        assembler.Stur(reg, memOpnd);
211    }
212}
213
214bool MacroAssemblerAArch64::IsMoveWidableImmediate(uint64_t val, uint32_t bitLen)
215{
216    if (bitLen == k64BitSize) {
217        /* 0xHHHH000000000000 or 0x0000HHHH00000000, return true */
218        if (((val & ((static_cast<uint64_t>(0xffff)) << k48BitSize)) == val) ||
219            ((val & ((static_cast<uint64_t>(0xffff)) << k32BitSize)) == val)) {
220            return true;
221        }
222    } else {
223        /* get lower 32 bits */
224        val &= static_cast<uint64_t>(0xffffffff);
225    }
226    /* 0x00000000HHHH0000 or 0x000000000000HHHH, return true */
227    return ((val & ((static_cast<uint64_t>(0xffff)) << k16BitSize)) == val ||
228            (val & ((static_cast<uint64_t>(0xffff)) << 0)) == val);
229}
230
231bool MacroAssemblerAArch64::IsBitmaskImmediate(uint64_t val, uint32_t bitLen)
232{
233    if (static_cast<int64_t>(val) == -1 || val == 0) {
234        std::cout << "IsBitmaskImmediate() don't accept 0 or -1!" << std::endl;
235        std::abort();
236    }
237    if ((bitLen == k32BitSize) && (static_cast<int32_t>(val) == -1)) {
238        return false;
239    }
240    uint64_t val2 = val;
241    if (bitLen == k32BitSize) {
242        val2 = (val2 << k32BitSize) | (val2 & ((1ULL << k32BitSize) - 1));
243    }
244    bool expectedOutcome = (ValidBitmaskImmSet.find(val2) != ValidBitmaskImmSet.end());
245
246    if ((val & 0x1) != 0) {
247        /*
248         * we want to work with
249         * 0000000000000000000000000000000000000000000001100000000000000000
250         * instead of
251         * 1111111111111111111111111111111111111111111110011111111111111111
252         */
253        val = ~val;
254    }
255
256    if (bitLen == k32BitSize) {
257        val = (val << k32BitSize) | (val & ((1ULL << k32BitSize) - 1));
258    }
259
260    /* get the least significant bit set and add it to 'val' */
261    uint64_t tmpVal = val + (val & static_cast<uint64_t>(UINT64_MAX - val + 1));
262
263    /* now check if tmp is a power of 2 or tmpVal==0. */
264    if (tmpVal < 1 || tmpVal > UINT64_MAX) {
265        std::cout << "tmpVal value overflow!" << std::endl;
266        std::abort();
267    }
268    tmpVal = tmpVal & (tmpVal - 1);
269    if (tmpVal == 0) {
270        if (!expectedOutcome) {
271            return false;
272        }
273        /* power of two or zero ; return true */
274        return true;
275    }
276
277    int32_t p0 = __builtin_ctzll(val);
278    int32_t p1 = __builtin_ctzll(tmpVal);
279    int64_t diff = p1 - p0;
280
281    /* check if diff is a power of two; return false if not. */
282    if (static_cast<uint64_t>(diff) < 1 || static_cast<uint64_t>(diff) > UINT64_MAX) {
283        std::cout << "diff value overflow!" << std::endl;
284        std::abort();
285    }
286    if ((static_cast<uint64_t>(diff) & (static_cast<uint64_t>(diff) - 1)) != 0) {
287        return false;
288    }
289
290    uint32_t logDiff = static_cast<uint32_t>(__builtin_ctzll(static_cast<uint64_t>(diff)));
291    uint64_t pattern = val & ((1ULL << static_cast<uint64_t>(diff)) - 1);
292    return val == pattern * kBitmaskImmMultTable[kMaxBitTableSize - logDiff];
293}
294
295bool MacroAssemblerAArch64::IsSingleInstructionMovable(uint64_t val, uint32_t size)
296{
297    return (IsMoveWidableImmediate(val, size) ||
298           IsMoveWidableImmediate(~val, size) || IsBitmaskImmediate(val, size));
299}
300
301bool MacroAssemblerAArch64::BetterUseMOVZ(uint64_t val)
302{
303    int32_t n16zerosChunks = 0;
304    int32_t n16onesChunks = 0;
305    uint64_t sa = 0;
306    /* a 64 bits number is split 4 chunks, each chunk has 16 bits. check each chunk whether is all 1 or is all 0 */
307    for (uint64_t i = 0; i < k4BitSize; ++i, sa += k16BitSize) {
308        uint64_t chunkVal = (val >> (static_cast<uint64_t>(sa))) & 0x0000FFFFUL;
309        if (chunkVal == 0) {
310            ++n16zerosChunks;
311        } else if (chunkVal == 0xFFFFUL) {
312            ++n16onesChunks;
313        }
314    }
315    return (n16zerosChunks >= n16onesChunks);
316}
317
318void MacroAssemblerAArch64::CopyImm(aarch64::Register destReg, int64_t imm, uint32_t size)
319{
320    uint64_t srcVal = static_cast<uint64_t>(imm);
321
322    if (IsSingleInstructionMovable(srcVal, size)) {
323        assembler.Mov(destReg, aarch64::Immediate(imm));
324        return;
325    }
326
327    if (size != k32BitSize && size != k64BitSize) {
328        std::cout << "only support 32 and 64 bits size!" << std::endl;
329        std::abort();
330    }
331
332    if (size == k32BitSize) {
333        /* check lower 16 bits and higher 16 bits respectively */
334        if ((srcVal & 0x0000FFFFULL) == 0 || (srcVal & 0x0000FFFFULL) == 0xFFFFULL) {
335            std::cout << "unexpected val!" << std::endl;
336            std::abort();
337        }
338        if (((srcVal >> k16BitSize) & 0x0000FFFFULL) == 0 || ((srcVal >> k16BitSize) & 0x0000FFFFULL) == 0xFFFFULL) {
339            std::cout << "unexpected val" << std::endl;
340            std::abort();
341        }
342        /* create an imm opereand which represents lower 16 bits of the immediate */
343        int64_t srcLower = static_cast<int64_t>(srcVal & 0x0000FFFFULL);
344        assembler.Mov(destReg, aarch64::Immediate(srcLower));
345        /* create an imm opereand which represents upper 16 bits of the immediate */
346        int64_t srcUpper = static_cast<int64_t>((srcVal >> k16BitSize) & 0x0000FFFFULL);
347        assembler.Movk(destReg, srcUpper, k16BitSize);
348    } else {
349        CopyImmSize64(destReg, srcVal);
350    }
351}
352
353void MacroAssemblerAArch64::CopyImmSize64(aarch64::Register destReg, uint64_t srcVal)
354{
355    /*
356     * partition it into 4 16-bit chunks
357     * if more 0's than 0xFFFF's, use movz as the initial instruction.
358     * otherwise, movn.
359     */
360    bool useMovz = BetterUseMOVZ(srcVal);
361    bool useMovk = false;
362    /* get lower 32 bits of the immediate */
363    uint64_t chunkLval = srcVal & 0xFFFFFFFFULL;
364    /* get upper 32 bits of the immediate */
365    uint64_t chunkHval = (srcVal >> k32BitSize) & 0xFFFFFFFFULL;
366    int32_t maxLoopTime = 4;
367
368    if (chunkLval == chunkHval) {
369        /* compute lower 32 bits, and then copy to higher 32 bits, so only 2 chunks need be processed */
370        maxLoopTime = 2;
371    }
372
373    uint64_t sa = 0;
374    for (int64_t i = 0; i < maxLoopTime; ++i, sa += k16BitSize) {
375        /* create an imm opereand which represents the i-th 16-bit chunk of the immediate */
376        uint64_t chunkVal = (srcVal >> (static_cast<uint64_t>(sa))) & 0x0000FFFFULL;
377        if (useMovz ? (chunkVal == 0) : (chunkVal == 0x0000FFFFULL)) {
378            continue;
379        }
380        int64_t src16 = static_cast<int64_t>(chunkVal);
381        if (!useMovk) {
382            /* use movz or movn */
383            if (!useMovz) {
384                src16 = ~(static_cast<uint64_t>(src16)) & ((1ULL << k16BitSize) - 1UL);
385                assembler.Movn(destReg, src16, sa);
386            } else {
387                assembler.Movz(destReg, src16, sa);
388            }
389            useMovk = true;
390        } else {
391            assembler.Movk(destReg, src16, sa);
392        }
393    }
394
395    if (maxLoopTime == 2) { /* as described above, only 2 chunks need be processed */
396        /* copy lower 32 bits to higher 32 bits */
397        uint32_t immr = -k32BitSize % k64BitSize; // immr = -shift % size
398        uint32_t imms = k32BitSize - 1; // imms = width - 1
399        assembler.Bfm(destReg, destReg, immr, imms);
400    }
401}
402
403}  // namespace panda::ecmascript::kungfu
404