1 /**
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef LIBPANDAFILE_FILE_H
17 #define LIBPANDAFILE_FILE_H
18
19 #include <array>
20 #include <cstdint>
21 #include <fcntl.h>
22 #include <iostream>
23 #include <memory>
24 #include <string>
25 #include <string_view>
26
27 #include "helpers.h"
28 #include "os/mem.h"
29 #include "os/filesystem.h"
30 #include "utils/span.h"
31 #include "utils/utf.h"
32 #include "utils/logger.h"
33
34 namespace panda {
35 struct EntryFileStat;
36 } // namespace panda
37
38 namespace panda::panda_file {
39
40 class PandaCache;
41
42 #define XPM_PROC_LENGTH 50
43 #define PROC_SELF_XPM_REGION_PATH "/proc/self/xpm_region"
44 /*
45 * EntityPairHeader Describes pair for hash value of class's descriptor and its entity id offset,
46 * used to quickly find class by its descriptor.
47 */
48 struct EntityPairHeader {
49 uint32_t descriptor_hash;
50 uint32_t entity_id_offset;
51 uint32_t next_pos;
52 };
53
54 class File {
55 public:
56 using Index = uint16_t;
57 using Index32 = uint32_t;
58
59 static constexpr size_t MAGIC_SIZE = 8;
60 static constexpr size_t VERSION_SIZE = 4;
61 static const std::array<uint8_t, MAGIC_SIZE> MAGIC;
62
63 struct Header {
64 std::array<uint8_t, MAGIC_SIZE> magic;
65 uint32_t checksum;
66 std::array<uint8_t, VERSION_SIZE> version;
67 uint32_t file_size;
68 uint32_t foreign_off;
69 uint32_t foreign_size;
70 uint32_t num_classes;
71 uint32_t class_idx_off;
72 uint32_t num_lnps;
73 uint32_t lnp_idx_off;
74 uint32_t num_literalarrays;
75 uint32_t literalarray_idx_off;
76 uint32_t num_indexes;
77 uint32_t index_section_off;
78 };
79
80 struct IndexHeader {
81 uint32_t start;
82 uint32_t end;
83 uint32_t class_idx_size;
84 uint32_t class_idx_off;
85 uint32_t method_idx_size;
86 uint32_t method_idx_off;
87 uint32_t field_idx_size;
88 uint32_t field_idx_off;
89 uint32_t proto_idx_size;
90 uint32_t proto_idx_off;
91 };
92
93 struct StringData {
StringDatapanda::panda_file::File::StringData94 StringData(uint32_t len, const uint8_t *d) : utf16_length(len), is_ascii(false), data(d) {}
95 StringData() = default;
96 uint32_t utf16_length; // NOLINT(misc-non-private-member-variables-in-classes)
97 bool is_ascii; // NOLINT(misc-non-private-member-variables-in-classes)
98 const uint8_t *data; // NOLINT(misc-non-private-member-variables-in-classes)
99 };
100
101 // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions, hicpp-special-member-functions)
102 class EntityId {
103 public:
EntityId(uint32_t offset)104 explicit constexpr EntityId(uint32_t offset) : offset_(offset) {}
105
106 EntityId() = default;
107
108 ~EntityId() = default;
109
IsValid() const110 bool IsValid() const
111 {
112 return offset_ > sizeof(Header);
113 }
114
GetOffset() const115 uint32_t GetOffset() const
116 {
117 return offset_;
118 }
119
GetSize()120 static constexpr size_t GetSize()
121 {
122 return sizeof(uint32_t);
123 }
124
operator <(const EntityId &l, const EntityId &r)125 friend bool operator<(const EntityId &l, const EntityId &r)
126 {
127 return l.offset_ < r.offset_;
128 }
129
operator ==(const EntityId &l, const EntityId &r)130 friend bool operator==(const EntityId &l, const EntityId &r)
131 {
132 return l.offset_ == r.offset_;
133 }
134
operator <<(std::ostream &stream, const EntityId &id)135 friend std::ostream &operator<<(std::ostream &stream, const EntityId &id)
136 {
137 return stream << id.offset_;
138 }
139
140 private:
141 uint32_t offset_ {0};
142 };
143
144 enum OpenMode { READ_ONLY, READ_WRITE, WRITE_ONLY };
145
146 StringData GetStringData(EntityId id) const;
147 EntityId GetLiteralArraysId() const;
148
149 EntityId GetClassId(const uint8_t *mutf8_name) const;
150
151 EntityId GetClassIdFromClassHashTable(const uint8_t *mutf8_name) const;
152
GetHeader() const153 const Header *GetHeader() const
154 {
155 return reinterpret_cast<const Header *>(GetBase());
156 }
157
GetBase() const158 const uint8_t *GetBase() const
159 {
160 return reinterpret_cast<const uint8_t *>(base_.Get());
161 }
162
GetPtr() const163 const os::mem::ConstBytePtr &GetPtr() const
164 {
165 return base_;
166 }
167
IsExternal(EntityId id) const168 bool IsExternal(EntityId id) const
169 {
170 const Header *header = GetHeader();
171 uint32_t foreign_begin = header->foreign_off;
172 uint32_t foreign_end = foreign_begin + header->foreign_size;
173 return id.GetOffset() >= foreign_begin && id.GetOffset() < foreign_end;
174 }
175
GetIdFromPointer(const uint8_t *ptr) const176 EntityId GetIdFromPointer(const uint8_t *ptr) const
177 {
178 return EntityId(ptr - GetBase());
179 }
180
GetSpanFromId(EntityId id) const181 Span<const uint8_t> GetSpanFromId(EntityId id) const
182 {
183 const Header *header = GetHeader();
184 Span file(GetBase(), header->file_size);
185 ThrowIfWithCheck(!id.IsValid() || id.GetOffset() >= file.size(), File::INVALID_FILE_OFFSET,
186 File::GET_SPAN_FROM_ID);
187 return file.Last(file.size() - id.GetOffset());
188 }
189
GetClasses() const190 Span<const uint32_t> GetClasses() const
191 {
192 const Header *header = GetHeader();
193 Span file(GetBase(), header->file_size);
194 Span class_idx_data = file.SubSpan(header->class_idx_off, header->num_classes * sizeof(uint32_t));
195 return Span(reinterpret_cast<const uint32_t *>(class_idx_data.data()), header->num_classes);
196 }
197
GetLiteralArrays() const198 Span<const uint32_t> GetLiteralArrays() const
199 {
200 const Header *header = GetHeader();
201 Span file(GetBase(), header->file_size);
202 Span litarr_idx_data = file.SubSpan(header->literalarray_idx_off, header->num_literalarrays * sizeof(uint32_t));
203 return Span(reinterpret_cast<const uint32_t *>(litarr_idx_data.data()), header->num_literalarrays);
204 }
205
GetIndexHeaders() const206 Span<const IndexHeader> GetIndexHeaders() const
207 {
208 const Header *header = GetHeader();
209 Span file(GetBase(), header->file_size);
210 auto sp = file.SubSpan(header->index_section_off, header->num_indexes * sizeof(IndexHeader));
211 return Span(reinterpret_cast<const IndexHeader *>(sp.data()), header->num_indexes);
212 }
213
GetIndexHeader(EntityId id) const214 const IndexHeader *GetIndexHeader(EntityId id) const
215 {
216 if (UNLIKELY(!id.IsValid() || id.GetOffset() >= GetHeader()->file_size)) {
217 return nullptr;
218 }
219 auto headers = GetIndexHeaders();
220 auto offset = id.GetOffset();
221 for (const auto &header : headers) {
222 if (header.start <= offset && offset < header.end) {
223 return &header;
224 }
225 }
226 return nullptr;
227 }
228
GetClassIndex(const IndexHeader *index_header) const229 Span<const EntityId> GetClassIndex(const IndexHeader *index_header) const
230 {
231 ThrowIfWithCheck(index_header == nullptr, File::NULL_INDEX_HEADER, File::GET_CLASS_INDEX);
232 auto *header = GetHeader();
233 Span file(GetBase(), header->file_size);
234 ASSERT(index_header != nullptr);
235 auto class_idx_size = index_header->class_idx_size * EntityId::GetSize();
236 ThrowIfWithCheck(index_header->class_idx_off > header->file_size || class_idx_size > header->file_size ||
237 index_header->class_idx_off > header->file_size - class_idx_size, File::INVALID_INDEX_HEADER,
238 File::GET_CLASS_INDEX);
239 auto sp = file.SubSpan(index_header->class_idx_off, index_header->class_idx_size * EntityId::GetSize());
240 return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->class_idx_size);
241 }
242
GetClassIndex(EntityId id) const243 Span<const EntityId> GetClassIndex(EntityId id) const
244 {
245 auto *index_header = GetIndexHeader(id);
246 return GetClassIndex(index_header);
247 }
248
GetMethodIndex(const IndexHeader *index_header) const249 Span<const EntityId> GetMethodIndex(const IndexHeader *index_header) const
250 {
251 ThrowIfWithCheck(index_header == nullptr, File::NULL_INDEX_HEADER, File::GET_METHOD_INDEX);
252 auto *header = GetHeader();
253 Span file(GetBase(), header->file_size);
254 ASSERT(index_header != nullptr);
255 auto method_idx_size = index_header->method_idx_size * EntityId::GetSize();
256 ThrowIfWithCheck(index_header->method_idx_off > header->file_size || method_idx_size > header->file_size ||
257 index_header->method_idx_off > header->file_size - method_idx_size, File::INVALID_INDEX_HEADER,
258 File::GET_METHOD_INDEX);
259 auto sp = file.SubSpan(index_header->method_idx_off, index_header->method_idx_size * EntityId::GetSize());
260 return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->method_idx_size);
261 }
262
GetMethodIndex(EntityId id) const263 Span<const EntityId> GetMethodIndex(EntityId id) const
264 {
265 auto *index_header = GetIndexHeader(id);
266 return GetMethodIndex(index_header);
267 }
268
GetFieldIndex(const IndexHeader *index_header) const269 Span<const EntityId> GetFieldIndex(const IndexHeader *index_header) const
270 {
271 ThrowIfWithCheck(index_header == nullptr, File::NULL_INDEX_HEADER, File::GET_FIELD_INDEX);
272 auto *header = GetHeader();
273 Span file(GetBase(), header->file_size);
274 ASSERT(index_header != nullptr);
275 auto field_idx_size = index_header->field_idx_size * EntityId::GetSize();
276 ThrowIfWithCheck(index_header->field_idx_off > header->file_size || field_idx_size > header->file_size ||
277 index_header->field_idx_off > header->file_size - field_idx_size, File::INVALID_INDEX_HEADER,
278 File::GET_FIELD_INDEX);
279 auto sp = file.SubSpan(index_header->field_idx_off, index_header->field_idx_size * EntityId::GetSize());
280 return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->field_idx_size);
281 }
282
GetFieldIndex(EntityId id) const283 Span<const EntityId> GetFieldIndex(EntityId id) const
284 {
285 auto *index_header = GetIndexHeader(id);
286 return GetFieldIndex(index_header);
287 }
288
GetProtoIndex(const IndexHeader *index_header) const289 Span<const EntityId> GetProtoIndex(const IndexHeader *index_header) const
290 {
291 ThrowIfWithCheck(index_header == nullptr, File::NULL_INDEX_HEADER, File::GET_PROTO_INDEX);
292 auto *header = GetHeader();
293 Span file(GetBase(), header->file_size);
294 ASSERT(index_header != nullptr);
295 auto proto_idx_size = index_header->proto_idx_size * EntityId::GetSize();
296 ThrowIfWithCheck(index_header->proto_idx_off > header->file_size || proto_idx_size > header->file_size ||
297 index_header->proto_idx_off > header->file_size - proto_idx_size, File::INVALID_INDEX_HEADER,
298 File::GET_PROTO_INDEX);
299 auto sp = file.SubSpan(index_header->proto_idx_off, index_header->proto_idx_size * EntityId::GetSize());
300 return Span(reinterpret_cast<const EntityId *>(sp.data()), index_header->proto_idx_size);
301 }
302
GetProtoIndex(EntityId id) const303 Span<const EntityId> GetProtoIndex(EntityId id) const
304 {
305 auto *index_header = GetIndexHeader(id);
306 return GetProtoIndex(index_header);
307 }
308
GetLineNumberProgramIndex() const309 Span<const EntityId> GetLineNumberProgramIndex() const
310 {
311 const Header *header = GetHeader();
312 Span file(GetBase(), header->file_size);
313 Span lnp_idx_data = file.SubSpan(header->lnp_idx_off, header->num_lnps * EntityId::GetSize());
314 return Span(reinterpret_cast<const EntityId *>(lnp_idx_data.data()), header->num_lnps);
315 }
316
ResolveClassIndex(EntityId id, Index idx) const317 EntityId ResolveClassIndex(EntityId id, Index idx) const
318 {
319 auto index = GetClassIndex(id);
320 if (UNLIKELY(idx >= index.Size())) {
321 return EntityId();
322 }
323 return index[idx];
324 }
325
ResolveMethodIndex(EntityId id, Index idx) const326 EntityId ResolveMethodIndex(EntityId id, Index idx) const
327 {
328 auto index = GetMethodIndex(id);
329 if (UNLIKELY(idx >= index.Size())) {
330 return EntityId();
331 }
332 return index[idx];
333 }
334
ResolveOffsetByIndex(EntityId id, Index idx) const335 EntityId ResolveOffsetByIndex(EntityId id, Index idx) const
336 {
337 auto index = GetMethodIndex(id);
338 if (UNLIKELY(idx >= index.Size())) {
339 return EntityId();
340 }
341 return index[idx];
342 }
343
ResolveFieldIndex(EntityId id, Index idx) const344 EntityId ResolveFieldIndex(EntityId id, Index idx) const
345 {
346 auto index = GetFieldIndex(id);
347 if (UNLIKELY(idx >= index.Size())) {
348 return EntityId();
349 }
350 return index[idx];
351 }
352
ResolveProtoIndex(EntityId id, Index idx) const353 EntityId ResolveProtoIndex(EntityId id, Index idx) const
354 {
355 auto index = GetProtoIndex(id);
356 if (UNLIKELY(idx >= index.Size())) {
357 return EntityId();
358 }
359 return index[idx];
360 }
361
ResolveLineNumberProgramIndex(Index32 idx) const362 EntityId ResolveLineNumberProgramIndex(Index32 idx) const
363 {
364 auto index = GetLineNumberProgramIndex();
365 if (UNLIKELY(idx >= index.Size())) {
366 return EntityId();
367 }
368 return index[idx];
369 }
370
GetFilename() const371 const std::string &GetFilename() const
372 {
373 return FILENAME;
374 }
375
GetPandaCache() const376 PandaCache *GetPandaCache() const
377 {
378 #ifdef ENABLE_FULL_FILE_FIELDS
379 return panda_cache_.get();
380 #else
381 LOG(WARNING, PANDAFILE) << "Not Support GetPandaCache from ohos side.";
382 return nullptr;
383 #endif
384 }
385
GetFilenameHash() const386 uint32_t GetFilenameHash() const
387 {
388 return FILENAME_HASH;
389 }
390
391 // note: intentionally returns uint64_t instead of the field type due to usage
GetUniqId() const392 uint64_t GetUniqId() const
393 {
394 return UNIQ_ID;
395 }
396
GetFullFileName() const397 const std::string &GetFullFileName() const
398 {
399 #ifdef ENABLE_FULL_FILE_FIELDS
400 return FULL_FILENAME;
401 #else
402 LOG(FATAL, PANDAFILE) << "Not Support GetFullFileName from ohos side.";
403 return FILENAME;
404 #endif
405 }
406
GetFileBaseOffset()407 static constexpr uint32_t GetFileBaseOffset()
408 {
409 return MEMBER_OFFSET(File, base_);
410 }
411
GetClassHashTable() const412 Span<const panda::panda_file::EntityPairHeader> GetClassHashTable() const
413 {
414 return class_hash_table_;
415 }
416
417 static uint32_t CalcFilenameHash(const std::string &filename);
418
419 static std::unique_ptr<const File> Open(std::string_view filename, OpenMode open_mode = READ_ONLY);
420
421 static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr);
422
423 static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr, std::string_view filename);
424
425 static std::unique_ptr<const File> OpenUncompressedArchive(int fd, const std::string_view &filename, size_t size,
426 uint32_t offset, OpenMode open_mode = READ_ONLY);
427
SetClassHashTable(panda::Span<const panda::panda_file::EntityPairHeader> class_hash_table) const428 void SetClassHashTable(panda::Span<const panda::panda_file::EntityPairHeader> class_hash_table) const
429 {
430 class_hash_table_ = class_hash_table;
431 }
432
433 bool ValidateChecksum(uint32_t *cal_checksum_out = nullptr) const;
434
435 void ThrowIfWithCheck(bool cond, const std::string_view& msg, const std::string_view& tag = "") const;
436
437 static constexpr const char *INVALID_FILE_OFFSET = "Invalid file offset";
438 static constexpr const char *NULL_INDEX_HEADER = "index_header is null";
439 static constexpr const char *INVALID_INDEX_HEADER = "index_header is invalid";
440
441 static constexpr const char *GET_CLASS_INDEX = "GetClassIndex";
442 static constexpr const char *GET_METHOD_INDEX = "GetMethodIndex";
443 static constexpr const char *GET_FIELD_INDEX = "GetFieldIndex";
444 static constexpr const char *GET_PROTO_INDEX = "GetProtoIndex";
445
446 static constexpr const char *ANNOTATION_DATA_ACCESSOR = "AnnotationDataAccessor";
447 static constexpr const char *CLASS_DATA_ACCESSOR = "ClassDataAccessor";
448 static constexpr const char *CODE_DATA_ACCESSOR = "CodeDataAccessor";
449 static constexpr const char *FIELD_DATA_ACCESSOR = "FieldDataAccessor";
450 static constexpr const char *GET_SPAN_FROM_ID = "GetSpanFromId";
451
452 ~File();
453
454 NO_COPY_SEMANTIC(File);
455 NO_MOVE_SEMANTIC(File);
456
457 private:
458 File(std::string filename, os::mem::ConstBytePtr &&base);
459
460 os::mem::ConstBytePtr base_;
461 const std::string FILENAME;
462 const uint32_t FILENAME_HASH;
463 #ifdef ENABLE_FULL_FILE_FIELDS
464 const std::string FULL_FILENAME;
465 std::unique_ptr<PandaCache> panda_cache_;
466 #endif
467 const uint32_t UNIQ_ID;
468 mutable panda::Span<const panda::panda_file::EntityPairHeader> class_hash_table_;
469 };
470
471 static_assert(File::GetFileBaseOffset() == 0);
472
operator ==(const File::StringData &string_data1, const File::StringData &string_data2)473 inline bool operator==(const File::StringData &string_data1, const File::StringData &string_data2)
474 {
475 if (string_data1.utf16_length != string_data2.utf16_length) {
476 return false;
477 }
478
479 return utf::IsEqual(string_data1.data, string_data2.data);
480 }
481
operator !=(const File::StringData &string_data1, const File::StringData &string_data2)482 inline bool operator!=(const File::StringData &string_data1, const File::StringData &string_data2)
483 {
484 return !(string_data1 == string_data2);
485 }
486
operator <(const File::StringData &string_data1, const File::StringData &string_data2)487 inline bool operator<(const File::StringData &string_data1, const File::StringData &string_data2)
488 {
489 if (string_data1.utf16_length == string_data2.utf16_length) {
490 return utf::CompareMUtf8ToMUtf8(string_data1.data, string_data2.data) < 0;
491 }
492
493 return string_data1.utf16_length < string_data2.utf16_length;
494 }
495
496 bool CheckSecureMem(uintptr_t mem, size_t size);
497
498 /*
499 * OpenPandaFileOrZip from location which specicify the name.
500 */
501 std::unique_ptr<const File> OpenPandaFileOrZip(std::string_view location,
502 panda_file::File::OpenMode open_mode = panda_file::File::READ_ONLY);
503
504 /*
505 * OpenPandaFileFromMemory from file buffer.
506 */
507 std::unique_ptr<const File> OpenPandaFileFromMemory(const void *buffer, size_t size, std::string tag = "");
508
509 /*
510 * OpenPandaFileFromMemory from secure buffer.
511 */
512 std::unique_ptr<const File> OpenPandaFileFromSecureMemory(uint8_t *buffer, size_t size);
513
514 /*
515 * OpenPandaFile from location which specicify the name.
516 */
517 std::unique_ptr<const File> OpenPandaFile(std::string_view location, std::string_view archive_filename = "",
518 panda_file::File::OpenMode open_mode = panda_file::File::READ_ONLY);
519
520 /*
521 * Check ptr point valid panda file: magic
522 */
523 bool CheckHeader(const os::mem::ConstBytePtr &ptr, const std::string_view &filename = "");
524 void CheckFileVersion(const std::array<uint8_t, File::VERSION_SIZE> &file_version, const std::string_view &filename);
525
526 // Last version which contains redundance literal array in header
527 constexpr std::array<uint8_t, File::VERSION_SIZE> LAST_CONTAINS_LITERAL_IN_HEADER_VERSION {12, 0, 6, 0};
528 bool ContainsLiteralArrayInHeader(const std::array<uint8_t, File::VERSION_SIZE> &version);
529
530 // NOLINTNEXTLINE(readability-identifier-naming)
531 extern const char *ARCHIVE_FILENAME;
532 } // namespace panda::panda_file
533
534 namespace std {
535 template <>
536 struct hash<panda::panda_file::File::EntityId> {
operator ()panda::std::hash537 std::size_t operator()(panda::panda_file::File::EntityId id) const
538 {
539 return std::hash<uint32_t> {}(id.GetOffset());
540 }
541 };
542
543 } // namespace std
544
545 #endif // LIBPANDAFILE_FILE_H
546