1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef LIBPANDAFILE_BYTECODE_INSTRUCTION_H
17 #define LIBPANDAFILE_BYTECODE_INSTRUCTION_H
18 
19 #include "file.h"
20 
21 #include <cstdint>
22 #include <cstddef>
23 #include <type_traits>
24 
25 #include "utils/bit_helpers.h"
26 
27 #if !PANDA_TARGET_WINDOWS
28 #include "securec.h"
29 #endif
30 
31 namespace panda {
32 
33 enum class BytecodeInstMode { FAST, SAFE };
34 
35 template <const BytecodeInstMode>
36 class BytecodeInstBase;
37 
38 class BytecodeId {
39 public:
BytecodeId(uint32_t id)40     constexpr explicit BytecodeId(uint32_t id) : id_(id) {}
41 
42     constexpr BytecodeId() = default;
43 
44     ~BytecodeId() = default;
45 
46     DEFAULT_COPY_SEMANTIC(BytecodeId);
47     NO_MOVE_SEMANTIC(BytecodeId);
48 
AsIndex() const49     panda_file::File::Index AsIndex() const
50     {
51         ASSERT(id_ < std::numeric_limits<uint16_t>::max());
52         return id_;
53     }
54 
55     panda_file::File::EntityId AsFileId() const
56     {
57         return panda_file::File::EntityId(id_);
58     }
59 
60     uint32_t AsRawValue() const
61     {
62         return id_;
63     }
64 
65     bool IsValid() const
66     {
67         return id_ != INVALID;
68     }
69 
70     bool operator==(BytecodeId id) const noexcept
71     {
72         return id_ == id.id_;
73     }
74 
75     friend std::ostream &operator<<(std::ostream &stream, BytecodeId id)
76     {
77         return stream << id.id_;
78     }
79 
80 private:
81     static constexpr size_t INVALID = std::numeric_limits<uint32_t>::max();
82 
83     uint32_t id_ {INVALID};
84 };
85 
86 template <>
87 // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions, hicpp-special-member-functions)
88 class BytecodeInstBase<BytecodeInstMode::FAST> {
89 public:
90     BytecodeInstBase() = default;
91     explicit BytecodeInstBase(const uint8_t *pc) : pc_ {pc} {}
92     ~BytecodeInstBase() = default;
93 
94 protected:
95     const uint8_t *GetPointer(int32_t offset) const
96     {
97         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
98         return pc_ + offset;
99     }
100 
101     const uint8_t *GetAddress() const
102     {
103         return pc_;
104     }
105 
106     const uint8_t *GetAddress() volatile const
107     {
108         return pc_;
109     }
110 
111     template <class T>
112     T Read(size_t offset) const
113     {
114         using unaligned_type __attribute__((aligned(1))) = const T;
115         return *reinterpret_cast<unaligned_type *>(GetPointer(offset));
116     }
117 
118     void Write(uint32_t value, uint32_t offset, uint32_t width)
119     {
120         auto *dst = const_cast<uint8_t *>(GetPointer(offset));
121         if (memcpy_s(dst, width, &value, width) != 0) {
122             LOG(FATAL, PANDAFILE) << "Cannot write value : " << value << "at the dst offset : " << offset;
123         }
124     }
125 
126     uint8_t ReadByte(size_t offset) const
127     {
128         return Read<uint8_t>(offset);
129     }
130 
131 private:
132     const uint8_t *pc_ {nullptr};
133 };
134 
135 template <>
136 class BytecodeInstBase<BytecodeInstMode::SAFE> {
137 public:
138     BytecodeInstBase() = default;
139     explicit BytecodeInstBase(const uint8_t *pc, const uint8_t *from, const uint8_t *to)
140         : pc_ {pc}, from_ {from}, to_ {to}, valid_ {true}
141     {
142         ASSERT(from_ <= to_ && pc_ >= from_ && pc_ <= to_);
143     }
144 
145 protected:
146     const uint8_t *GetPointer(int32_t offset) const
147     {
148         return GetPointer(offset, 1);
149     }
150 
151     bool IsLast(size_t size) const
152     {
153         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
154         const uint8_t *ptr_next = pc_ + size;
155         return ptr_next > to_;
156     }
157 
158     const uint8_t *GetPointer(int32_t offset, size_t size) const
159     {
160         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
161         const uint8_t *ptr_from = pc_ + offset;
162         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
163         const uint8_t *ptr_to = ptr_from + size - 1;
164         if (from_ == nullptr || ptr_from < from_ || ptr_to > to_) {
165             valid_ = false;
166             return from_;
167         }
168         return ptr_from;
169     }
170 
171     const uint8_t *GetAddress() const
172     {
173         return pc_;
174     }
175 
176     const uint8_t *GetFrom() const
177     {
178         return from_;
179     }
180 
181     const uint8_t *GetTo() const
182     {
183         return to_;
184     }
185 
186     uint32_t GetOffset() const
187     {
188         return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(pc_) - reinterpret_cast<uintptr_t>(from_));
189     }
190 
191     const uint8_t *GetAddress() volatile const
192     {
193         return pc_;
194     }
195 
196     template <class T>
197     T Read(size_t offset) const
198     {
199         using unaligned_type __attribute__((aligned(1))) = const T;
200         auto ptr = reinterpret_cast<unaligned_type *>(GetPointer(offset, sizeof(T)));
201         if (IsValid()) {
202             return *ptr;
203         }
204         return {};
205     }
206 
207     bool IsValid() const
208     {
209         return valid_;
210     }
211 
212 private:
213     const uint8_t *pc_ {nullptr};
214     const uint8_t *from_ {nullptr};
215     const uint8_t *to_ {nullptr};
216     mutable bool valid_ {false};
217 };
218 
219 template <const BytecodeInstMode Mode = BytecodeInstMode::FAST>
220 
221 // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions, hicpp-special-member-functions)
222 class BytecodeInst : public BytecodeInstBase<Mode> {
223     using Base = BytecodeInstBase<Mode>;
224 
225 public:
226 #include <bytecode_instruction_enum_gen.h>
227 
228     BytecodeInst() = default;
229 
230     ~BytecodeInst() = default;
231 
232     template <const BytecodeInstMode M = Mode, typename = std::enable_if_t<M == BytecodeInstMode::FAST>>
233     explicit BytecodeInst(const uint8_t *pc) : Base {pc}
234     {
235     }
236 
237     template <const BytecodeInstMode M = Mode, typename = std::enable_if_t<M == BytecodeInstMode::SAFE>>
238     explicit BytecodeInst(const uint8_t *pc, const uint8_t *from, const uint8_t *to) : Base {pc, from, to}
239     {
240     }
241 
242     template <Format format, size_t idx = 0>
243     BytecodeId GetId() const;
244 
245     template <Format format, size_t idx = 0>
246     uint16_t GetVReg() const;
247 
248     template <Format format, size_t idx = 0, bool is_signed = true>
249     auto GetImm() const;
250 
251     BytecodeId GetId(size_t idx = 0) const;
252 
253     void UpdateId(BytecodeId new_id, uint32_t idx = 0);
254 
255     uint16_t GetVReg(size_t idx = 0) const;
256 
257     // Read imm and return it as int64_t/uint64_t
258     auto GetImm64(size_t idx = 0) const;
259 
260     // Read imm as actually signed / unsigned and cast it to int64 before return
261     auto GetImmData(size_t idx = 0) const;
262 
263     auto GetImmCount() const;
264 
265     /**
266      * Primary and Secondary Opcodes are used in interpreter/verifier instruction dispatch
267      * while full Opcode is typically used for various instruction property query.
268      *
269      * Implementation note: one can describe Opcode in terms of Primary/Secondary opcodes
270      * or vice versa. The first way is more preferable, because Primary/Secondary opcodes
271      * are more performance critical and compiler is not always clever enough to reduce them
272      * to simple byte reads.
273      */
274     BytecodeInst::Opcode GetOpcode() const;
275 
276     uint8_t GetPrimaryOpcode() const
277     {
278         return ReadByte(0);
279     }
280 
281     bool IsPrimaryOpcodeValid() const;
282 
283     uint8_t GetSecondaryOpcode() const;
284 
285     bool IsPrefixed() const;
286 
287     static constexpr uint8_t GetMinPrefixOpcodeIndex();
288 
289     template <const BytecodeInstMode M = Mode>
290     auto JumpTo(int32_t offset) const -> std::enable_if_t<M == BytecodeInstMode::FAST, BytecodeInst>
291     {
292         return BytecodeInst(Base::GetPointer(offset));
293     }
294 
295     template <const BytecodeInstMode M = Mode>
296     auto JumpTo(int32_t offset) const -> std::enable_if_t<M == BytecodeInstMode::SAFE, BytecodeInst>
297     {
298         if (!IsValid()) {
299             return {};
300         }
301         const uint8_t *ptr = Base::GetPointer(offset);
302         if (!IsValid()) {
303             return {};
304         }
305         return BytecodeInst(ptr, Base::GetFrom(), Base::GetTo());
306     }
307 
308     template <const BytecodeInstMode M = Mode>
309     auto IsLast() const -> std::enable_if_t<M == BytecodeInstMode::SAFE, bool>
310     {
311         return Base::IsLast(GetSize());
312     }
313 
314     template <const BytecodeInstMode M = Mode>
315     auto IsValid() const -> std::enable_if_t<M == BytecodeInstMode::SAFE, bool>
316     {
317         return Base::IsValid();
318     }
319 
320     template <Format format>
321     BytecodeInst GetNext() const
322     {
323         return JumpTo(Size(format));
324     }
325 
326     BytecodeInst GetNext() const
327     {
328         return JumpTo(GetSize());
329     }
330 
331     const uint8_t *GetAddress() const
332     {
333         return Base::GetAddress();
334     }
335 
336     const uint8_t *GetAddress() volatile const
337     {
338         return Base::GetAddress();
339     }
340 
341     template <const BytecodeInstMode M = Mode>
342     auto GetFrom() const -> std::enable_if_t<M == BytecodeInstMode::SAFE, const uint8_t *>
343     {
344         return Base::GetFrom();
345     }
346 
347     template <const BytecodeInstMode M = Mode>
348     auto GetTo() const -> std::enable_if_t<M == BytecodeInstMode::SAFE, const uint8_t *>
349     {
350         return Base::GetTo();
351     }
352 
353     template <const BytecodeInstMode M = Mode>
354     auto GetOffset() const -> std::enable_if_t<M == BytecodeInstMode::SAFE, uint32_t>
355     {
356         return Base::GetOffset();
357     }
358 
359     uint8_t ReadByte(size_t offset) const
360     {
361         return Base::template Read<uint8_t>(offset);
362     }
363 
364     template <class R, class S>
365     auto ReadHelper(size_t byteoffset, size_t bytecount, size_t offset, size_t width) const;
366 
367     template <size_t offset, size_t width, bool is_signed = false>
368     auto Read() const;
369 
370     template <bool is_signed = false>
371     auto Read64(size_t offset, size_t width) const;
372 
373     size_t GetSize() const;
374 
375     Format GetFormat() const;
376 
377     bool HasFlag(Flags flag) const;
378 
379     bool IsIdMatchFlag(size_t idx, Flags flag) const;  // idx-th id matches flag or not
380 
381     bool IsThrow(Exceptions exception) const;
382 
383     bool CanThrow() const;
384 
385     bool IsTerminator() const
386     {
387         return HasFlag(Flags::RETURN) || HasFlag(Flags::JUMP) || IsThrow(Exceptions::X_THROW);
388     }
389 
390     bool IsSuspend() const
391     {
392         return HasFlag(Flags::SUSPEND);
393     }
394 
395     static constexpr bool HasId(Format format, size_t idx);
396 
397     static constexpr bool HasVReg(Format format, size_t idx);
398 
399     static constexpr bool HasImm(Format format, size_t idx);
400 
401     static constexpr Format GetFormat(Opcode opcode);
402 
403     static constexpr size_t Size(Format format);
404 
405     static constexpr size_t Size(Opcode opcode)
406     {
407         return Size(GetFormat(opcode));
408     }
409 
410     static std::optional<uint64_t> SafeAdd(uint64_t a, uint64_t b)
411     {
412         if (a > std::numeric_limits<uint64_t>::max() - b) {
413             return std::nullopt;
414         }
415         return a + b;
416     }
417 
418     size_t GetLiteralIndex() const;
419 
420     bool IsJumpInstruction() const;
421 
422     bool IsReturnOrThrowInstruction() const;
423 
424     bool IsRangeInstruction() const;
425 
426     std::optional<uint64_t> GetRangeInsLastRegIdx() const;
427 
428     std::optional<uint64_t> GetLastVReg() const;
429 };
430 
431 template <const BytecodeInstMode Mode>
432 std::ostream &operator<<(std::ostream &os, const BytecodeInst<Mode> &inst);
433 
434 using BytecodeInstruction = BytecodeInst<BytecodeInstMode::FAST>;
435 using BytecodeInstructionSafe = BytecodeInst<BytecodeInstMode::SAFE>;
436 
437 }  // namespace panda
438 
439 #endif  // LIBPANDAFILE_BYTECODE_INSTRUCTION_H
440