1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/compiler/assembler/aarch64/macro_assembler_aarch64.h"
17 #include <set>
18 #include "ecmascript/js_function.h"
19 
20 namespace panda::ecmascript::kungfu {
21 using namespace panda::ecmascript;
22 constexpr uint32_t k4BitSize = 4;
23 constexpr uint32_t k16BitSize = 16;
24 constexpr uint32_t k32BitSize = 32;
25 constexpr uint32_t k48BitSize = 48;
26 constexpr uint32_t k64BitSize = 64;
27 
28 const std::set<uint64_t> ValidBitmaskImmSet = {
29 #include "valid_bitmask_imm.txt"
30 };
31 constexpr uint32_t kMaxBitTableSize = 5;
32 constexpr std::array<uint64_t, kMaxBitTableSize> kBitmaskImmMultTable = {
33     0x0000000100000001UL, 0x0001000100010001UL, 0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL,
34 };
35 
Move(const StackSlotOperand &dstStackSlot, Immediate value)36 void MacroAssemblerAArch64::Move(const StackSlotOperand &dstStackSlot, Immediate value)
37 {
38     aarch64::Register baseReg = (dstStackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
39                                                                aarch64::Register(aarch64::SP);
40     aarch64::MemoryOperand dstOpnd(baseReg, static_cast<int64_t>(dstStackSlot.GetOffset()));
41     assembler.Mov(LOCAL_SCOPE_REGISTER, aarch64::Immediate(value.GetValue()));
42     PickLoadStoreInsn(LOCAL_SCOPE_REGISTER, dstOpnd, false);
43 }
44 
Move(const StackSlotOperand &dstStackSlot, const StackSlotOperand &srcStackSlot)45 void MacroAssemblerAArch64::Move(const StackSlotOperand &dstStackSlot,
46                                  const StackSlotOperand &srcStackSlot)
47 {
48     aarch64::Register dstBaseReg = (dstStackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
49                                                                   aarch64::Register(aarch64::SP);
50     aarch64::MemoryOperand dstOpnd(dstBaseReg, static_cast<int64_t>(dstStackSlot.GetOffset()));
51     aarch64::Register srcBaseReg = (srcStackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
52                                                                   aarch64::Register(aarch64::SP);
53     aarch64::MemoryOperand srcOpnd(srcBaseReg, static_cast<int64_t>(srcStackSlot.GetOffset()));
54     PickLoadStoreInsn(LOCAL_SCOPE_REGISTER, srcOpnd);
55     PickLoadStoreInsn(LOCAL_SCOPE_REGISTER, dstOpnd, false);
56 }
57 
Cmp(const StackSlotOperand &stackSlot, Immediate value)58 void MacroAssemblerAArch64::Cmp(const StackSlotOperand &stackSlot, Immediate value)
59 {
60     aarch64::Register baseReg = (stackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
61                                                             aarch64::Register(aarch64::SP);
62     aarch64::MemoryOperand opnd(baseReg, static_cast<int64_t>(stackSlot.GetOffset()));
63     aarch64::Operand immOpnd = aarch64::Immediate(value.GetValue());
64     PickLoadStoreInsn(LOCAL_SCOPE_REGISTER, opnd);
65     assembler.Cmp(LOCAL_SCOPE_REGISTER, immOpnd);
66 }
67 
Bind(JumpLabel &label)68 void MacroAssemblerAArch64::Bind(JumpLabel &label)
69 {
70     assembler.Bind(&label);
71 }
72 
Jz(JumpLabel &label)73 void MacroAssemblerAArch64::Jz(JumpLabel &label)
74 {
75     assembler.B(aarch64::EQ, &label);
76 }
77 
Jnz(JumpLabel &label)78 void MacroAssemblerAArch64::Jnz(JumpLabel &label)
79 {
80     assembler.B(aarch64::NE, &label);
81 }
82 
Jump(JumpLabel &label)83 void MacroAssemblerAArch64::Jump(JumpLabel &label)
84 {
85     assembler.B(&label);
86 }
87 
CallBuiltin(Address funcAddress, const std::vector<MacroParameter> &parameters)88 void MacroAssemblerAArch64::CallBuiltin(Address funcAddress,
89                                         const std::vector<MacroParameter> &parameters)
90 {
91     for (size_t i = 0; i < parameters.size(); ++i) {
92         auto param = parameters[i];
93         if (i == PARAM_REGISTER_COUNT) {
94             std::cout << "not support aarch64 baseline stack parameter " << std::endl;
95             std::abort();
96             break;
97         }
98         MovParameterIntoParamReg(param, registerParamVec[i]);
99     }
100     assembler.Mov(LOCAL_SCOPE_REGISTER, aarch64::Immediate(funcAddress));
101     assembler.Blr(LOCAL_SCOPE_REGISTER);
102 }
103 
SaveReturnRegister(const StackSlotOperand &dstStackSlot)104 void MacroAssemblerAArch64::SaveReturnRegister(const StackSlotOperand &dstStackSlot)
105 {
106     aarch64::Register baseReg = (dstStackSlot.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
107                                                                aarch64::Register(aarch64::SP);
108     aarch64::MemoryOperand dstOpnd(baseReg, static_cast<int64_t>(dstStackSlot.GetOffset()));
109     PickLoadStoreInsn(RETURN_REGISTER, dstOpnd, false);
110 }
111 
MovParameterIntoParamReg(MacroParameter param, aarch64::Register paramReg)112 void MacroAssemblerAArch64::MovParameterIntoParamReg(MacroParameter param, aarch64::Register paramReg)
113 {
114     if (std::holds_alternative<BaselineSpecialParameter>(param)) {
115         auto specialParam = std::get<BaselineSpecialParameter>(param);
116         switch (specialParam) {
117             case BaselineSpecialParameter::GLUE: {
118                 assembler.Mov(paramReg, GLUE_REGISTER);
119                 break;
120             }
121             case BaselineSpecialParameter::PROFILE_TYPE_INFO: {
122                 assembler.Ldur(LOCAL_SCOPE_REGISTER,
123                                aarch64::MemoryOperand(aarch64::Register(aarch64::X29),
124                                                       static_cast<int64_t>(FUNCTION_OFFSET_FROM_SP)));
125                 assembler.Ldr(LOCAL_SCOPE_REGISTER,
126                               aarch64::MemoryOperand(LOCAL_SCOPE_REGISTER, JSFunction::RAW_PROFILE_TYPE_INFO_OFFSET));
127                 assembler.Ldr(paramReg,
128                               aarch64::MemoryOperand(LOCAL_SCOPE_REGISTER, ProfileTypeInfoCell::VALUE_OFFSET));
129                 break;
130             }
131             case BaselineSpecialParameter::SP: {
132                 assembler.Mov(paramReg, aarch64::Register(aarch64::X29));
133                 break;
134             }
135             case BaselineSpecialParameter::HOTNESS_COUNTER: {
136                 assembler.Ldur(LOCAL_SCOPE_REGISTER, aarch64::MemoryOperand(aarch64::Register(aarch64::X29),
137                     static_cast<int64_t>(FUNCTION_OFFSET_FROM_SP)));
138                 assembler.Ldr(LOCAL_SCOPE_REGISTER,
139                               aarch64::MemoryOperand(LOCAL_SCOPE_REGISTER, JSFunctionBase::METHOD_OFFSET));
140                 assembler.Ldr(paramReg,
141                               aarch64::MemoryOperand(LOCAL_SCOPE_REGISTER, Method::LITERAL_INFO_OFFSET));
142                 break;
143             }
144             default: {
145                 std::cout << "not supported other BaselineSpecialParameter currently" << std::endl;
146                 std::abort();
147             }
148         }
149         return;
150     }
151     if (std::holds_alternative<int8_t>(param)) {
152         int16_t num = std::get<int8_t>(param);
153         assembler.Mov(paramReg, aarch64::Immediate(static_cast<int64_t>(num)));
154         return;
155     }
156     if (std::holds_alternative<int16_t>(param)) {
157         int16_t num = std::get<int16_t>(param);
158         assembler.Mov(paramReg, aarch64::Immediate(static_cast<int64_t>(num)));
159         return;
160     }
161     if (std::holds_alternative<int32_t>(param)) {
162         int32_t num = std::get<int32_t>(param);
163         assembler.Mov(paramReg, aarch64::Immediate(static_cast<int64_t>(num)));
164         return;
165     }
166     if (std::holds_alternative<int64_t>(param)) {
167         int64_t num = std::get<int64_t>(param);
168         CopyImm(paramReg, num, k64BitSize);
169         return;
170     }
171     if (std::holds_alternative<StackSlotOperand>(param)) {
172         StackSlotOperand stackSlotOpnd = std::get<StackSlotOperand>(param);
173         aarch64::Register dstBaseReg = (stackSlotOpnd.IsFrameBase()) ? aarch64::Register(aarch64::FP) :
174                                                                        aarch64::Register(aarch64::SP);
175         aarch64::MemoryOperand paramOpnd(dstBaseReg, static_cast<int64_t>(stackSlotOpnd.GetOffset()));
176         PickLoadStoreInsn(paramReg, paramOpnd);
177         return;
178     }
179     std::cout << "not supported other type of aarch64 baseline parameters currently" << std::endl;
180     std::abort();
181 }
182 
PickLoadStoreInsn(aarch64::Register reg, aarch64::MemoryOperand memOpnd, bool isLoad)183 void MacroAssemblerAArch64::PickLoadStoreInsn(aarch64::Register reg, aarch64::MemoryOperand memOpnd, bool isLoad)
184 {
185     int64_t maxNineBitsSignedValue = 255;
186     int64_t minNineBitsSignedValue = -256;
187     int64_t value = memOpnd.GetImmediate().Value();
188     if (value < minNineBitsSignedValue) {
189         std::cout << "not support aarch64 offset in memory operand is less than -256!" << std::endl;
190         std::abort();
191     }
192     if (value > maxNineBitsSignedValue && isLoad) {
193         if (value % 8 != 0) { // 8: offset in memory operand must be a multiple of 8 for ldr insn
194             std::cout << "not support offset in memory operand is not a multiple of 8 for ldr insn!" << std::endl;
195             std::abort();
196         }
197         assembler.Ldr(reg, memOpnd);
198     }
199     if (value > maxNineBitsSignedValue && !isLoad) {
200         if (value % 8 != 0) { // 8: offset in memory operand must be a multiple of 8 for str insn
201             std::cout << "not support offset in memory operand is not a multiple of 8 for str insn!" << std::endl;
202             std::abort();
203         }
204         assembler.Str(reg, memOpnd);
205     }
206     if (value <= maxNineBitsSignedValue && isLoad) {
207         assembler.Ldur(reg, memOpnd);
208     }
209     if (value <= maxNineBitsSignedValue && !isLoad) {
210         assembler.Stur(reg, memOpnd);
211     }
212 }
213 
IsMoveWidableImmediate(uint64_t val, uint32_t bitLen)214 bool MacroAssemblerAArch64::IsMoveWidableImmediate(uint64_t val, uint32_t bitLen)
215 {
216     if (bitLen == k64BitSize) {
217         /* 0xHHHH000000000000 or 0x0000HHHH00000000, return true */
218         if (((val & ((static_cast<uint64_t>(0xffff)) << k48BitSize)) == val) ||
219             ((val & ((static_cast<uint64_t>(0xffff)) << k32BitSize)) == val)) {
220             return true;
221         }
222     } else {
223         /* get lower 32 bits */
224         val &= static_cast<uint64_t>(0xffffffff);
225     }
226     /* 0x00000000HHHH0000 or 0x000000000000HHHH, return true */
227     return ((val & ((static_cast<uint64_t>(0xffff)) << k16BitSize)) == val ||
228             (val & ((static_cast<uint64_t>(0xffff)) << 0)) == val);
229 }
230 
IsBitmaskImmediate(uint64_t val, uint32_t bitLen)231 bool MacroAssemblerAArch64::IsBitmaskImmediate(uint64_t val, uint32_t bitLen)
232 {
233     if (static_cast<int64_t>(val) == -1 || val == 0) {
234         std::cout << "IsBitmaskImmediate() don't accept 0 or -1!" << std::endl;
235         std::abort();
236     }
237     if ((bitLen == k32BitSize) && (static_cast<int32_t>(val) == -1)) {
238         return false;
239     }
240     uint64_t val2 = val;
241     if (bitLen == k32BitSize) {
242         val2 = (val2 << k32BitSize) | (val2 & ((1ULL << k32BitSize) - 1));
243     }
244     bool expectedOutcome = (ValidBitmaskImmSet.find(val2) != ValidBitmaskImmSet.end());
245 
246     if ((val & 0x1) != 0) {
247         /*
248          * we want to work with
249          * 0000000000000000000000000000000000000000000001100000000000000000
250          * instead of
251          * 1111111111111111111111111111111111111111111110011111111111111111
252          */
253         val = ~val;
254     }
255 
256     if (bitLen == k32BitSize) {
257         val = (val << k32BitSize) | (val & ((1ULL << k32BitSize) - 1));
258     }
259 
260     /* get the least significant bit set and add it to 'val' */
261     uint64_t tmpVal = val + (val & static_cast<uint64_t>(UINT64_MAX - val + 1));
262 
263     /* now check if tmp is a power of 2 or tmpVal==0. */
264     if (tmpVal < 1 || tmpVal > UINT64_MAX) {
265         std::cout << "tmpVal value overflow!" << std::endl;
266         std::abort();
267     }
268     tmpVal = tmpVal & (tmpVal - 1);
269     if (tmpVal == 0) {
270         if (!expectedOutcome) {
271             return false;
272         }
273         /* power of two or zero ; return true */
274         return true;
275     }
276 
277     int32_t p0 = __builtin_ctzll(val);
278     int32_t p1 = __builtin_ctzll(tmpVal);
279     int64_t diff = p1 - p0;
280 
281     /* check if diff is a power of two; return false if not. */
282     if (static_cast<uint64_t>(diff) < 1 || static_cast<uint64_t>(diff) > UINT64_MAX) {
283         std::cout << "diff value overflow!" << std::endl;
284         std::abort();
285     }
286     if ((static_cast<uint64_t>(diff) & (static_cast<uint64_t>(diff) - 1)) != 0) {
287         return false;
288     }
289 
290     uint32_t logDiff = static_cast<uint32_t>(__builtin_ctzll(static_cast<uint64_t>(diff)));
291     uint64_t pattern = val & ((1ULL << static_cast<uint64_t>(diff)) - 1);
292     return val == pattern * kBitmaskImmMultTable[kMaxBitTableSize - logDiff];
293 }
294 
IsSingleInstructionMovable(uint64_t val, uint32_t size)295 bool MacroAssemblerAArch64::IsSingleInstructionMovable(uint64_t val, uint32_t size)
296 {
297     return (IsMoveWidableImmediate(val, size) ||
298            IsMoveWidableImmediate(~val, size) || IsBitmaskImmediate(val, size));
299 }
300 
BetterUseMOVZ(uint64_t val)301 bool MacroAssemblerAArch64::BetterUseMOVZ(uint64_t val)
302 {
303     int32_t n16zerosChunks = 0;
304     int32_t n16onesChunks = 0;
305     uint64_t sa = 0;
306     /* a 64 bits number is split 4 chunks, each chunk has 16 bits. check each chunk whether is all 1 or is all 0 */
307     for (uint64_t i = 0; i < k4BitSize; ++i, sa += k16BitSize) {
308         uint64_t chunkVal = (val >> (static_cast<uint64_t>(sa))) & 0x0000FFFFUL;
309         if (chunkVal == 0) {
310             ++n16zerosChunks;
311         } else if (chunkVal == 0xFFFFUL) {
312             ++n16onesChunks;
313         }
314     }
315     return (n16zerosChunks >= n16onesChunks);
316 }
317 
CopyImm(aarch64::Register destReg, int64_t imm, uint32_t size)318 void MacroAssemblerAArch64::CopyImm(aarch64::Register destReg, int64_t imm, uint32_t size)
319 {
320     uint64_t srcVal = static_cast<uint64_t>(imm);
321 
322     if (IsSingleInstructionMovable(srcVal, size)) {
323         assembler.Mov(destReg, aarch64::Immediate(imm));
324         return;
325     }
326 
327     if (size != k32BitSize && size != k64BitSize) {
328         std::cout << "only support 32 and 64 bits size!" << std::endl;
329         std::abort();
330     }
331 
332     if (size == k32BitSize) {
333         /* check lower 16 bits and higher 16 bits respectively */
334         if ((srcVal & 0x0000FFFFULL) == 0 || (srcVal & 0x0000FFFFULL) == 0xFFFFULL) {
335             std::cout << "unexpected val!" << std::endl;
336             std::abort();
337         }
338         if (((srcVal >> k16BitSize) & 0x0000FFFFULL) == 0 || ((srcVal >> k16BitSize) & 0x0000FFFFULL) == 0xFFFFULL) {
339             std::cout << "unexpected val" << std::endl;
340             std::abort();
341         }
342         /* create an imm opereand which represents lower 16 bits of the immediate */
343         int64_t srcLower = static_cast<int64_t>(srcVal & 0x0000FFFFULL);
344         assembler.Mov(destReg, aarch64::Immediate(srcLower));
345         /* create an imm opereand which represents upper 16 bits of the immediate */
346         int64_t srcUpper = static_cast<int64_t>((srcVal >> k16BitSize) & 0x0000FFFFULL);
347         assembler.Movk(destReg, srcUpper, k16BitSize);
348     } else {
349         CopyImmSize64(destReg, srcVal);
350     }
351 }
352 
CopyImmSize64(aarch64::Register destReg, uint64_t srcVal)353 void MacroAssemblerAArch64::CopyImmSize64(aarch64::Register destReg, uint64_t srcVal)
354 {
355     /*
356      * partition it into 4 16-bit chunks
357      * if more 0's than 0xFFFF's, use movz as the initial instruction.
358      * otherwise, movn.
359      */
360     bool useMovz = BetterUseMOVZ(srcVal);
361     bool useMovk = false;
362     /* get lower 32 bits of the immediate */
363     uint64_t chunkLval = srcVal & 0xFFFFFFFFULL;
364     /* get upper 32 bits of the immediate */
365     uint64_t chunkHval = (srcVal >> k32BitSize) & 0xFFFFFFFFULL;
366     int32_t maxLoopTime = 4;
367 
368     if (chunkLval == chunkHval) {
369         /* compute lower 32 bits, and then copy to higher 32 bits, so only 2 chunks need be processed */
370         maxLoopTime = 2;
371     }
372 
373     uint64_t sa = 0;
374     for (int64_t i = 0; i < maxLoopTime; ++i, sa += k16BitSize) {
375         /* create an imm opereand which represents the i-th 16-bit chunk of the immediate */
376         uint64_t chunkVal = (srcVal >> (static_cast<uint64_t>(sa))) & 0x0000FFFFULL;
377         if (useMovz ? (chunkVal == 0) : (chunkVal == 0x0000FFFFULL)) {
378             continue;
379         }
380         int64_t src16 = static_cast<int64_t>(chunkVal);
381         if (!useMovk) {
382             /* use movz or movn */
383             if (!useMovz) {
384                 src16 = ~(static_cast<uint64_t>(src16)) & ((1ULL << k16BitSize) - 1UL);
385                 assembler.Movn(destReg, src16, sa);
386             } else {
387                 assembler.Movz(destReg, src16, sa);
388             }
389             useMovk = true;
390         } else {
391             assembler.Movk(destReg, src16, sa);
392         }
393     }
394 
395     if (maxLoopTime == 2) { /* as described above, only 2 chunks need be processed */
396         /* copy lower 32 bits to higher 32 bits */
397         uint32_t immr = -k32BitSize % k64BitSize; // immr = -shift % size
398         uint32_t imms = k32BitSize - 1; // imms = width - 1
399         assembler.Bfm(destReg, destReg, immr, imms);
400     }
401 }
402 
403 }  // namespace panda::ecmascript::kungfu
404