1 /**
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef LIBPANDAFILE_FILE_H
17 #define LIBPANDAFILE_FILE_H
18 
19 #include <array>
20 #include <cstdint>
21 #include <fcntl.h>
22 #include <iostream>
23 #include <memory>
24 #include <string>
25 #include <string_view>
26 
27 #include "helpers.h"
28 #include "os/mem.h"
29 #include "os/filesystem.h"
30 #include "utils/span.h"
31 #include "utils/utf.h"
32 #include "utils/logger.h"
33 
34 namespace panda {
35 struct EntryFileStat;
36 }  // namespace panda
37 
38 namespace panda::panda_file {
39 
40 class PandaCache;
41 
42 #define XPM_PROC_LENGTH 50
43 #define PROC_SELF_XPM_REGION_PATH "/proc/self/xpm_region"
44 /*
45  * EntityPairHeader Describes pair for hash value of class's descriptor and its entity id offset,
46  * used to quickly find class by its descriptor.
47  */
48 struct EntityPairHeader {
49     uint32_t descriptor_hash;
50     uint32_t entity_id_offset;
51     uint32_t next_pos;
52 };
53 
54 class File {
55 public:
56     using Index = uint16_t;
57     using Index32 = uint32_t;
58 
59     static constexpr size_t MAGIC_SIZE = 8;
60     static constexpr size_t VERSION_SIZE = 4;
61     static const std::array<uint8_t, MAGIC_SIZE> MAGIC;
62 
63     struct Header {
64         std::array<uint8_t, MAGIC_SIZE> magic;
65         uint32_t checksum;
66         std::array<uint8_t, VERSION_SIZE> version;
67         uint32_t file_size;
68         uint32_t foreign_off;
69         uint32_t foreign_size;
70         uint32_t num_classes;
71         uint32_t class_idx_off;
72         uint32_t num_lnps;
73         uint32_t lnp_idx_off;
74         uint32_t num_literalarrays;
75         uint32_t literalarray_idx_off;
76         uint32_t num_indexes;
77         uint32_t index_section_off;
78     };
79 
80     struct IndexHeader {
81         uint32_t start;
82         uint32_t end;
83         uint32_t class_idx_size;
84         uint32_t class_idx_off;
85         uint32_t method_idx_size;
86         uint32_t method_idx_off;
87         uint32_t field_idx_size;
88         uint32_t field_idx_off;
89         uint32_t proto_idx_size;
90         uint32_t proto_idx_off;
91     };
92 
93     struct StringData {
StringDatapanda::panda_file::File::StringData94         StringData(uint32_t len, const uint8_t *d) : utf16_length(len), is_ascii(false), data(d) {}
95         StringData() = default;
96         uint32_t utf16_length;  // NOLINT(misc-non-private-member-variables-in-classes)
97         bool is_ascii;          // NOLINT(misc-non-private-member-variables-in-classes)
98         const uint8_t *data;    // NOLINT(misc-non-private-member-variables-in-classes)
99     };
100 
101     // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions, hicpp-special-member-functions)
102     class EntityId {
103     public:
EntityId(uint32_t offset)104         explicit constexpr EntityId(uint32_t offset) : offset_(offset) {}
105 
106         EntityId() = default;
107 
108         ~EntityId() = default;
109 
IsValid() const110         bool IsValid() const
111         {
112             return offset_ > sizeof(Header);
113         }
114 
GetOffset() const115         uint32_t GetOffset() const
116         {
117             return offset_;
118         }
119 
GetSize()120         static constexpr size_t GetSize()
121         {
122             return sizeof(uint32_t);
123         }
124 
operator <(const EntityId &l, const EntityId &r)125         friend bool operator<(const EntityId &l, const EntityId &r)
126         {
127             return l.offset_ < r.offset_;
128         }
129 
operator ==(const EntityId &l, const EntityId &r)130         friend bool operator==(const EntityId &l, const EntityId &r)
131         {
132             return l.offset_ == r.offset_;
133         }
134 
operator <<(std::ostream &stream, const EntityId &id)135         friend std::ostream &operator<<(std::ostream &stream, const EntityId &id)
136         {
137             return stream << id.offset_;
138         }
139 
140     private:
141         uint32_t offset_ {0};
142     };
143 
144     enum OpenMode { READ_ONLY, READ_WRITE, WRITE_ONLY };
145 
146     StringData GetStringData(EntityId id) const;
147     EntityId GetLiteralArraysId() const;
148 
149     EntityId GetClassId(const uint8_t *mutf8_name) const;
150 
151     EntityId GetClassIdFromClassHashTable(const uint8_t *mutf8_name) const;
152 
GetHeader() const153     const Header *GetHeader() const
154     {
155         return reinterpret_cast<const Header *>(GetBase());
156     }
157 
GetBase() const158     const uint8_t *GetBase() const
159     {
160         return reinterpret_cast<const uint8_t *>(base_.Get());
161     }
162 
GetPtr() const163     const os::mem::ConstBytePtr &GetPtr() const
164     {
165         return base_;
166     }
167 
IsExternal(EntityId id) const168     bool IsExternal(EntityId id) const
169     {
170         const Header *header = GetHeader();
171         uint32_t foreign_begin = header->foreign_off;
172         uint32_t foreign_end = foreign_begin + header->foreign_size;
173         return id.GetOffset() >= foreign_begin && id.GetOffset() < foreign_end;
174     }
175 
GetIdFromPointer(const uint8_t *ptr) const176     EntityId GetIdFromPointer(const uint8_t *ptr) const
177     {
178         return EntityId(ptr - GetBase());
179     }
180 
GetSpanFromId(EntityId id) const181     Span<const uint8_t> GetSpanFromId(EntityId id) const
182     {
183         const Header *header = GetHeader();
184         Span file(GetBase(), header->file_size);
185         ThrowIfWithCheck(!id.IsValid() || id.GetOffset() >= file.size(), File::INVALID_FILE_OFFSET,
186                          File::GET_SPAN_FROM_ID);
187         return file.Last(file.size() - id.GetOffset());
188     }
189 
GetClasses() const190     Span<const uint32_t> GetClasses() const
191     {
192         const Header *header = GetHeader();
193         Span file(GetBase(), header->file_size);
194         Span class_idx_data = file.SubSpan(header->class_idx_off, header->num_classes * sizeof(uint32_t));
195         return Span(reinterpret_cast<const uint32_t *>(class_idx_data.data()), header->num_classes);
196     }
197 
GetLiteralArrays() const198     Span<const uint32_t> GetLiteralArrays() const
199     {
200         const Header *header = GetHeader();
201         Span file(GetBase(), header->file_size);
202         Span litarr_idx_data = file.SubSpan(header->literalarray_idx_off, header->num_literalarrays * sizeof(uint32_t));
203         return Span(reinterpret_cast<const uint32_t *>(litarr_idx_data.data()), header->num_literalarrays);
204     }
205 
GetIndexHeaders() const206     Span<const IndexHeader> GetIndexHeaders() const
207     {
208         const Header *header = GetHeader();
209         Span file(GetBase(), header->file_size);
210         auto sp = file.SubSpan(header->index_section_off, header->num_indexes * sizeof(IndexHeader));
211         return Span(reinterpret_cast<const IndexHeader *>(sp.data()), header->num_indexes);
212     }
213 
GetIndexHeader(EntityId id) const214     const IndexHeader *GetIndexHeader(EntityId id) const
215     {
216         if (UNLIKELY(!id.IsValid() || id.GetOffset() >= GetHeader()->file_size)) {
217             return nullptr;
218         }
219         auto headers = GetIndexHeaders();
220         auto offset = id.GetOffset();
221         for (const auto &header : headers) {
222             if (header.start <= offset && offset < header.end) {
223                 return &header;
224             }
225         }
226         return nullptr;
227     }
228 
GetClassIndex(const IndexHeader *index_header) const229     Span<const EntityId> GetClassIndex(const IndexHeader *index_header) const
230     {
231         ThrowIfWithCheck(index_header == nullptr, File::NULL_INDEX_HEADER, File::GET_CLASS_INDEX);
232         auto *header = GetHeader();
233         Span file(GetBase(), header->file_size);
234         ASSERT(index_header != nullptr);
235         auto class_idx_size = index_header->class_idx_size * EntityId::GetSize();
236         ThrowIfWithCheck(index_header->class_idx_off > header->file_size || class_idx_size > header->file_size ||
237             index_header->class_idx_off > header->file_size - class_idx_size, File::INVALID_INDEX_HEADER,
238             File::GET_CLASS_INDEX);
239         auto sp = file.SubSpan(index_header->class_idx_off, index_header->class_idx_size * EntityId::GetSize());
240         return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->class_idx_size);
241     }
242 
GetClassIndex(EntityId id) const243     Span<const EntityId> GetClassIndex(EntityId id) const
244     {
245         auto *index_header = GetIndexHeader(id);
246         return GetClassIndex(index_header);
247     }
248 
GetMethodIndex(const IndexHeader *index_header) const249     Span<const EntityId> GetMethodIndex(const IndexHeader *index_header) const
250     {
251         ThrowIfWithCheck(index_header == nullptr, File::NULL_INDEX_HEADER, File::GET_METHOD_INDEX);
252         auto *header = GetHeader();
253         Span file(GetBase(), header->file_size);
254         ASSERT(index_header != nullptr);
255         auto method_idx_size = index_header->method_idx_size * EntityId::GetSize();
256         ThrowIfWithCheck(index_header->method_idx_off > header->file_size || method_idx_size > header->file_size ||
257             index_header->method_idx_off > header->file_size - method_idx_size, File::INVALID_INDEX_HEADER,
258             File::GET_METHOD_INDEX);
259         auto sp = file.SubSpan(index_header->method_idx_off, index_header->method_idx_size * EntityId::GetSize());
260         return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->method_idx_size);
261     }
262 
GetMethodIndex(EntityId id) const263     Span<const EntityId> GetMethodIndex(EntityId id) const
264     {
265         auto *index_header = GetIndexHeader(id);
266         return GetMethodIndex(index_header);
267     }
268 
GetFieldIndex(const IndexHeader *index_header) const269     Span<const EntityId> GetFieldIndex(const IndexHeader *index_header) const
270     {
271         ThrowIfWithCheck(index_header == nullptr, File::NULL_INDEX_HEADER, File::GET_FIELD_INDEX);
272         auto *header = GetHeader();
273         Span file(GetBase(), header->file_size);
274         ASSERT(index_header != nullptr);
275         auto field_idx_size = index_header->field_idx_size * EntityId::GetSize();
276         ThrowIfWithCheck(index_header->field_idx_off > header->file_size || field_idx_size > header->file_size ||
277             index_header->field_idx_off > header->file_size - field_idx_size, File::INVALID_INDEX_HEADER,
278             File::GET_FIELD_INDEX);
279         auto sp = file.SubSpan(index_header->field_idx_off, index_header->field_idx_size * EntityId::GetSize());
280         return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->field_idx_size);
281     }
282 
GetFieldIndex(EntityId id) const283     Span<const EntityId> GetFieldIndex(EntityId id) const
284     {
285         auto *index_header = GetIndexHeader(id);
286         return GetFieldIndex(index_header);
287     }
288 
GetProtoIndex(const IndexHeader *index_header) const289     Span<const EntityId> GetProtoIndex(const IndexHeader *index_header) const
290     {
291         ThrowIfWithCheck(index_header == nullptr, File::NULL_INDEX_HEADER, File::GET_PROTO_INDEX);
292         auto *header = GetHeader();
293         Span file(GetBase(), header->file_size);
294         ASSERT(index_header != nullptr);
295         auto proto_idx_size = index_header->proto_idx_size * EntityId::GetSize();
296         ThrowIfWithCheck(index_header->proto_idx_off > header->file_size || proto_idx_size > header->file_size ||
297             index_header->proto_idx_off > header->file_size - proto_idx_size, File::INVALID_INDEX_HEADER,
298             File::GET_PROTO_INDEX);
299         auto sp = file.SubSpan(index_header->proto_idx_off, index_header->proto_idx_size * EntityId::GetSize());
300         return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->proto_idx_size);
301     }
302 
GetProtoIndex(EntityId id) const303     Span<const EntityId> GetProtoIndex(EntityId id) const
304     {
305         auto *index_header = GetIndexHeader(id);
306         return GetProtoIndex(index_header);
307     }
308 
GetLineNumberProgramIndex() const309     Span<const EntityId> GetLineNumberProgramIndex() const
310     {
311         const Header *header = GetHeader();
312         Span file(GetBase(), header->file_size);
313         Span lnp_idx_data = file.SubSpan(header->lnp_idx_off, header->num_lnps * EntityId::GetSize());
314         return Span(reinterpret_cast<const EntityId *>(lnp_idx_data.data()), header->num_lnps);
315     }
316 
ResolveClassIndex(EntityId id, Index idx) const317     EntityId ResolveClassIndex(EntityId id, Index idx) const
318     {
319         auto index = GetClassIndex(id);
320         if (UNLIKELY(idx >= index.Size())) {
321             return EntityId();
322         }
323         return index[idx];
324     }
325 
ResolveMethodIndex(EntityId id, Index idx) const326     EntityId ResolveMethodIndex(EntityId id, Index idx) const
327     {
328         auto index = GetMethodIndex(id);
329         if (UNLIKELY(idx >= index.Size())) {
330             return EntityId();
331         }
332         return index[idx];
333     }
334 
ResolveOffsetByIndex(EntityId id, Index idx) const335     EntityId ResolveOffsetByIndex(EntityId id, Index idx) const
336     {
337         auto index = GetMethodIndex(id);
338         if (UNLIKELY(idx >= index.Size())) {
339             return EntityId();
340         }
341         return index[idx];
342     }
343 
ResolveFieldIndex(EntityId id, Index idx) const344     EntityId ResolveFieldIndex(EntityId id, Index idx) const
345     {
346         auto index = GetFieldIndex(id);
347         if (UNLIKELY(idx >= index.Size())) {
348             return EntityId();
349         }
350         return index[idx];
351     }
352 
ResolveProtoIndex(EntityId id, Index idx) const353     EntityId ResolveProtoIndex(EntityId id, Index idx) const
354     {
355         auto index = GetProtoIndex(id);
356         if (UNLIKELY(idx >= index.Size())) {
357             return EntityId();
358         }
359         return index[idx];
360     }
361 
ResolveLineNumberProgramIndex(Index32 idx) const362     EntityId ResolveLineNumberProgramIndex(Index32 idx) const
363     {
364         auto index = GetLineNumberProgramIndex();
365         if (UNLIKELY(idx >= index.Size())) {
366             return EntityId();
367         }
368         return index[idx];
369     }
370 
GetFilename() const371     const std::string &GetFilename() const
372     {
373         return FILENAME;
374     }
375 
GetPandaCache() const376     PandaCache *GetPandaCache() const
377     {
378 #ifdef ENABLE_FULL_FILE_FIELDS
379         return panda_cache_.get();
380 #else
381         LOG(WARNING, PANDAFILE) << "Not Support GetPandaCache from ohos side.";
382         return nullptr;
383 #endif
384     }
385 
GetFilenameHash() const386     uint32_t GetFilenameHash() const
387     {
388         return FILENAME_HASH;
389     }
390 
391     // note: intentionally returns uint64_t instead of the field type due to usage
GetUniqId() const392     uint64_t GetUniqId() const
393     {
394         return UNIQ_ID;
395     }
396 
GetFullFileName() const397     const std::string &GetFullFileName() const
398     {
399 #ifdef ENABLE_FULL_FILE_FIELDS
400         return FULL_FILENAME;
401 #else
402         LOG(FATAL, PANDAFILE) << "Not Support GetFullFileName from ohos side.";
403         return FILENAME;
404 #endif
405     }
406 
GetFileBaseOffset()407     static constexpr uint32_t GetFileBaseOffset()
408     {
409         return MEMBER_OFFSET(File, base_);
410     }
411 
GetClassHashTable() const412     Span<const panda::panda_file::EntityPairHeader> GetClassHashTable() const
413     {
414         return class_hash_table_;
415     }
416 
417     static uint32_t CalcFilenameHash(const std::string &filename);
418 
419     static std::unique_ptr<const File> Open(std::string_view filename, OpenMode open_mode = READ_ONLY);
420 
421     static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr);
422 
423     static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr, std::string_view filename);
424 
425     static std::unique_ptr<const File> OpenUncompressedArchive(int fd, const std::string_view &filename, size_t size,
426                                                                uint32_t offset, OpenMode open_mode = READ_ONLY);
427 
SetClassHashTable(panda::Span<const panda::panda_file::EntityPairHeader> class_hash_table) const428     void SetClassHashTable(panda::Span<const panda::panda_file::EntityPairHeader> class_hash_table) const
429     {
430         class_hash_table_ = class_hash_table;
431     }
432 
433     bool ValidateChecksum(uint32_t *cal_checksum_out = nullptr) const;
434 
435     void ThrowIfWithCheck(bool cond, const std::string_view& msg, const std::string_view& tag = "") const;
436 
437     static constexpr const char *INVALID_FILE_OFFSET = "Invalid file offset";
438     static constexpr const char *NULL_INDEX_HEADER = "index_header is null";
439     static constexpr const char *INVALID_INDEX_HEADER = "index_header is invalid";
440 
441     static constexpr const char *GET_CLASS_INDEX = "GetClassIndex";
442     static constexpr const char *GET_METHOD_INDEX = "GetMethodIndex";
443     static constexpr const char *GET_FIELD_INDEX = "GetFieldIndex";
444     static constexpr const char *GET_PROTO_INDEX = "GetProtoIndex";
445 
446     static constexpr const char *ANNOTATION_DATA_ACCESSOR = "AnnotationDataAccessor";
447     static constexpr const char *CLASS_DATA_ACCESSOR = "ClassDataAccessor";
448     static constexpr const char *CODE_DATA_ACCESSOR = "CodeDataAccessor";
449     static constexpr const char *FIELD_DATA_ACCESSOR = "FieldDataAccessor";
450     static constexpr const char *GET_SPAN_FROM_ID = "GetSpanFromId";
451 
452     ~File();
453 
454     NO_COPY_SEMANTIC(File);
455     NO_MOVE_SEMANTIC(File);
456 
457 private:
458     File(std::string filename, os::mem::ConstBytePtr &&base);
459 
460     os::mem::ConstBytePtr base_;
461     const std::string FILENAME;
462     const uint32_t FILENAME_HASH;
463 #ifdef ENABLE_FULL_FILE_FIELDS
464     const std::string FULL_FILENAME;
465     std::unique_ptr<PandaCache> panda_cache_;
466 #endif
467     const uint32_t UNIQ_ID;
468     mutable panda::Span<const panda::panda_file::EntityPairHeader> class_hash_table_;
469 };
470 
471 static_assert(File::GetFileBaseOffset() == 0);
472 
operator ==(const File::StringData &string_data1, const File::StringData &string_data2)473 inline bool operator==(const File::StringData &string_data1, const File::StringData &string_data2)
474 {
475     if (string_data1.utf16_length != string_data2.utf16_length) {
476         return false;
477     }
478 
479     return utf::IsEqual(string_data1.data, string_data2.data);
480 }
481 
operator !=(const File::StringData &string_data1, const File::StringData &string_data2)482 inline bool operator!=(const File::StringData &string_data1, const File::StringData &string_data2)
483 {
484     return !(string_data1 == string_data2);
485 }
486 
operator <(const File::StringData &string_data1, const File::StringData &string_data2)487 inline bool operator<(const File::StringData &string_data1, const File::StringData &string_data2)
488 {
489     if (string_data1.utf16_length == string_data2.utf16_length) {
490         return utf::CompareMUtf8ToMUtf8(string_data1.data, string_data2.data) < 0;
491     }
492 
493     return string_data1.utf16_length < string_data2.utf16_length;
494 }
495 
496 bool CheckSecureMem(uintptr_t mem, size_t size);
497 
498 /*
499  * OpenPandaFileOrZip from location which specicify the name.
500  */
501 std::unique_ptr<const File> OpenPandaFileOrZip(std::string_view location,
502                                                panda_file::File::OpenMode open_mode = panda_file::File::READ_ONLY);
503 
504 /*
505  * OpenPandaFileFromMemory from file buffer.
506  */
507 std::unique_ptr<const File> OpenPandaFileFromMemory(const void *buffer, size_t size, std::string tag = "");
508 
509 /*
510  * OpenPandaFileFromMemory from secure buffer.
511  */
512 std::unique_ptr<const File> OpenPandaFileFromSecureMemory(uint8_t *buffer, size_t size);
513 
514 /*
515  * OpenPandaFile from location which specicify the name.
516  */
517 std::unique_ptr<const File> OpenPandaFile(std::string_view location, std::string_view archive_filename = "",
518                                           panda_file::File::OpenMode open_mode = panda_file::File::READ_ONLY);
519 
520 /*
521  * Check ptr point valid panda file: magic
522  */
523 bool CheckHeader(const os::mem::ConstBytePtr &ptr, const std::string_view &filename = "");
524 void CheckFileVersion(const std::array<uint8_t, File::VERSION_SIZE> &file_version, const std::string_view &filename);
525 
526 // Last version which contains redundance literal array in header
527 constexpr std::array<uint8_t, File::VERSION_SIZE> LAST_CONTAINS_LITERAL_IN_HEADER_VERSION {12, 0, 6, 0};
528 bool ContainsLiteralArrayInHeader(const std::array<uint8_t, File::VERSION_SIZE> &version);
529 
530 // NOLINTNEXTLINE(readability-identifier-naming)
531 extern const char *ARCHIVE_FILENAME;
532 }  // namespace panda::panda_file
533 
534 namespace std {
535 template <>
536 struct hash<panda::panda_file::File::EntityId> {
operator ()panda::std::hash537     std::size_t operator()(panda::panda_file::File::EntityId id) const
538     {
539         return std::hash<uint32_t> {}(id.GetOffset());
540     }
541 };
542 
543 }  // namespace std
544 
545 #endif  // LIBPANDAFILE_FILE_H
546