1 /**
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef LIBPANDAFILE_FILE_H_
17 #define LIBPANDAFILE_FILE_H_
18
19 #include "os/mem.h"
20 #include "utils/span.h"
21 #include "utils/utf.h"
22 #include <cstdint>
23
24 #include <array>
25 #include <iomanip>
26 #include <iostream>
27 #include <memory>
28 #include <string>
29 #include <string_view>
30
31 namespace ark {
32 struct EntryFileStat;
33 } // namespace ark
34
35 namespace ark::panda_file {
36
37 class PandaCache;
38
39 /*
40 * EntityPairHeader Describes pair for hash value of class's descriptor and its entity id offset,
41 * used to quickly find class by its descriptor.
42 */
43 struct EntityPairHeader {
44 uint32_t descriptorHash;
45 uint32_t entityIdOffset;
46 uint32_t nextPos;
47 };
48
49 class File {
50 public:
51 using Index = uint16_t;
52 using Index32 = uint32_t;
53
54 static constexpr size_t MAGIC_SIZE = 8;
55 static constexpr size_t VERSION_SIZE = 4;
56 static const std::array<uint8_t, MAGIC_SIZE> MAGIC;
57
58 struct Header {
59 std::array<uint8_t, MAGIC_SIZE> magic;
60 uint32_t checksum;
61 std::array<uint8_t, VERSION_SIZE> version;
62 uint32_t fileSize;
63 uint32_t foreignOff;
64 uint32_t foreignSize;
65 uint32_t quickenedFlag;
66 uint32_t numClasses;
67 uint32_t classIdxOff;
68 uint32_t numLnps;
69 uint32_t lnpIdxOff;
70 uint32_t numLiteralarrays;
71 uint32_t literalarrayIdxOff;
72 uint32_t numIndexes;
73 uint32_t indexSectionOff;
74 };
75
76 struct RegionHeader {
77 uint32_t start;
78 uint32_t end;
79 uint32_t classIdxSize;
80 uint32_t classIdxOff;
81 uint32_t methodIdxSize;
82 uint32_t methodIdxOff;
83 uint32_t fieldIdxSize;
84 uint32_t fieldIdxOff;
85 uint32_t protoIdxSize;
86 uint32_t protoIdxOff;
87 };
88
89 struct StringData {
StringDataark::panda_file::File::StringData90 StringData(uint32_t len, const uint8_t *d) : utf16Length(len), isAscii(false), data(d) {}
91 StringData() = default;
92 uint32_t utf16Length; // NOLINT(misc-non-private-member-variables-in-classes)
93 bool isAscii; // NOLINT(misc-non-private-member-variables-in-classes)
94 const uint8_t *data; // NOLINT(misc-non-private-member-variables-in-classes)
95 };
96
97 // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions, hicpp-special-member-functions)
98 class EntityId {
99 public:
EntityId(uint32_t offset)100 explicit constexpr EntityId(uint32_t offset) : offset_(offset) {}
101
102 EntityId() = default;
103
104 ~EntityId() = default;
105
IsValid() const106 bool IsValid() const
107 {
108 return offset_ > sizeof(Header);
109 }
110
GetOffset() const111 uint32_t GetOffset() const
112 {
113 return offset_;
114 }
115
GetSize()116 static constexpr size_t GetSize()
117 {
118 return sizeof(uint32_t);
119 }
120
operator <(const EntityId &l, const EntityId &r)121 friend bool operator<(const EntityId &l, const EntityId &r)
122 {
123 return l.offset_ < r.offset_;
124 }
125
operator ==(const EntityId &l, const EntityId &r)126 friend bool operator==(const EntityId &l, const EntityId &r)
127 {
128 return l.offset_ == r.offset_;
129 }
130
operator <<(std::ostream &stream, const EntityId &id)131 friend std::ostream &operator<<(std::ostream &stream, const EntityId &id)
132 {
133 return stream << id.offset_;
134 }
135
136 private:
137 uint32_t offset_ {0};
138 };
139
140 enum OpenMode { READ_ONLY, READ_WRITE, WRITE_ONLY };
141
142 StringData GetStringData(EntityId id) const;
143 EntityId GetLiteralArraysId() const;
144
145 EntityId GetClassId(const uint8_t *mutf8Name) const;
146
147 EntityId GetClassIdFromClassHashTable(const uint8_t *mutf8Name) const;
148
GetHeader() const149 const Header *GetHeader() const
150 {
151 return reinterpret_cast<const Header *>(GetBase());
152 }
153
GetBase() const154 const uint8_t *GetBase() const
155 {
156 return reinterpret_cast<const uint8_t *>(base_.Get());
157 }
158
GetPtr() const159 const os::mem::ConstBytePtr &GetPtr() const
160 {
161 return base_;
162 }
163
IsExternal(EntityId id) const164 bool IsExternal(EntityId id) const
165 {
166 const Header *header = GetHeader();
167 uint32_t foreignBegin = header->foreignOff;
168 uint32_t foreignEnd = foreignBegin + header->foreignSize;
169 return id.GetOffset() >= foreignBegin && id.GetOffset() < foreignEnd;
170 }
171
GetIdFromPointer(const uint8_t *ptr) const172 EntityId GetIdFromPointer(const uint8_t *ptr) const
173 {
174 return EntityId(ptr - GetBase());
175 }
176
GetSpanFromId(EntityId id) const177 Span<const uint8_t> GetSpanFromId(EntityId id) const
178 {
179 const Header *header = GetHeader();
180 Span file(GetBase(), header->fileSize);
181 return file.Last(file.size() - id.GetOffset());
182 }
183
GetClasses() const184 Span<const uint32_t> GetClasses() const
185 {
186 const Header *header = GetHeader();
187 Span file(GetBase(), header->fileSize);
188 Span classIdxData = file.SubSpan(header->classIdxOff, header->numClasses * sizeof(uint32_t));
189 return Span(reinterpret_cast<const uint32_t *>(classIdxData.data()), header->numClasses);
190 }
191
GetLiteralArrays() const192 Span<const uint32_t> GetLiteralArrays() const
193 {
194 const Header *header = GetHeader();
195 Span file(GetBase(), header->fileSize);
196 Span litarrIdxData = file.SubSpan(header->literalarrayIdxOff, header->numLiteralarrays * sizeof(uint32_t));
197 return Span(reinterpret_cast<const uint32_t *>(litarrIdxData.data()), header->numLiteralarrays);
198 }
199
GetRegionHeaders() const200 Span<const RegionHeader> GetRegionHeaders() const
201 {
202 const Header *header = GetHeader();
203 Span file(GetBase(), header->fileSize);
204 auto sp = file.SubSpan(header->indexSectionOff, header->numIndexes * sizeof(RegionHeader));
205 return Span(reinterpret_cast<const RegionHeader *>(sp.data()), header->numIndexes);
206 }
207
GetRegionHeader(EntityId id) const208 const RegionHeader *GetRegionHeader(EntityId id) const
209 {
210 auto headers = GetRegionHeaders();
211 auto offset = id.GetOffset();
212 for (const auto &header : headers) {
213 if (header.start <= offset && offset < header.end) {
214 return &header;
215 }
216 }
217 return nullptr;
218 }
219
GetClassIndex(const RegionHeader *regionHeader) const220 Span<const EntityId> GetClassIndex(const RegionHeader *regionHeader) const
221 {
222 auto *header = GetHeader();
223 Span file(GetBase(), header->fileSize);
224 ASSERT(regionHeader != nullptr);
225 auto sp = file.SubSpan(regionHeader->classIdxOff, regionHeader->classIdxSize * EntityId::GetSize());
226 return Span(reinterpret_cast<const EntityId *>(sp.data()), regionHeader->classIdxSize);
227 }
228
GetClassIndex(EntityId id) const229 Span<const EntityId> GetClassIndex(EntityId id) const
230 {
231 auto *regionHeader = GetRegionHeader(id);
232 ASSERT(regionHeader != nullptr);
233 return GetClassIndex(regionHeader);
234 }
235
GetMethodIndex(const RegionHeader *regionHeader) const236 Span<const EntityId> GetMethodIndex(const RegionHeader *regionHeader) const
237 {
238 auto *header = GetHeader();
239 Span file(GetBase(), header->fileSize);
240 ASSERT(regionHeader != nullptr);
241 auto sp = file.SubSpan(regionHeader->methodIdxOff, regionHeader->methodIdxSize * EntityId::GetSize());
242 return Span(reinterpret_cast<const EntityId *>(sp.data()), regionHeader->methodIdxSize);
243 }
244
GetMethodIndex(EntityId id) const245 Span<const EntityId> GetMethodIndex(EntityId id) const
246 {
247 auto *regionHeader = GetRegionHeader(id);
248 ASSERT(regionHeader != nullptr);
249 return GetMethodIndex(regionHeader);
250 }
251
GetFieldIndex(const RegionHeader *regionHeader) const252 Span<const EntityId> GetFieldIndex(const RegionHeader *regionHeader) const
253 {
254 auto *header = GetHeader();
255 Span file(GetBase(), header->fileSize);
256 ASSERT(regionHeader != nullptr);
257 auto sp = file.SubSpan(regionHeader->fieldIdxOff, regionHeader->fieldIdxSize * EntityId::GetSize());
258 return Span(reinterpret_cast<const EntityId *>(sp.data()), regionHeader->fieldIdxSize);
259 }
260
GetFieldIndex(EntityId id) const261 Span<const EntityId> GetFieldIndex(EntityId id) const
262 {
263 auto *regionHeader = GetRegionHeader(id);
264 ASSERT(regionHeader != nullptr);
265 return GetFieldIndex(regionHeader);
266 }
267
GetProtoIndex(const RegionHeader *regionHeader) const268 Span<const EntityId> GetProtoIndex(const RegionHeader *regionHeader) const
269 {
270 auto *header = GetHeader();
271 Span file(GetBase(), header->fileSize);
272 ASSERT(regionHeader != nullptr);
273 auto sp = file.SubSpan(regionHeader->protoIdxOff, regionHeader->protoIdxSize * EntityId::GetSize());
274 return Span(reinterpret_cast<const EntityId *>(sp.data()), regionHeader->protoIdxSize);
275 }
276
GetProtoIndex(EntityId id) const277 Span<const EntityId> GetProtoIndex(EntityId id) const
278 {
279 auto *regionHeader = GetRegionHeader(id);
280 ASSERT(regionHeader != nullptr);
281 return GetProtoIndex(regionHeader);
282 }
283
GetLineNumberProgramIndex() const284 Span<const EntityId> GetLineNumberProgramIndex() const
285 {
286 const Header *header = GetHeader();
287 Span file(GetBase(), header->fileSize);
288 Span lnpIdxData = file.SubSpan(header->lnpIdxOff, header->numLnps * EntityId::GetSize());
289 return Span(reinterpret_cast<const EntityId *>(lnpIdxData.data()), header->numLnps);
290 }
291
ResolveClassIndex(EntityId id, Index idx) const292 EntityId ResolveClassIndex(EntityId id, Index idx) const
293 {
294 auto index = GetClassIndex(id);
295 return index[idx];
296 }
297
ResolveMethodIndex(EntityId id, Index idx) const298 EntityId ResolveMethodIndex(EntityId id, Index idx) const
299 {
300 auto index = GetMethodIndex(id);
301 return index[idx];
302 }
303
ResolveFieldIndex(EntityId id, Index idx) const304 EntityId ResolveFieldIndex(EntityId id, Index idx) const
305 {
306 auto index = GetFieldIndex(id);
307 return index[idx];
308 }
309
ResolveProtoIndex(EntityId id, Index idx) const310 EntityId ResolveProtoIndex(EntityId id, Index idx) const
311 {
312 auto index = GetProtoIndex(id);
313 return index[idx];
314 }
315
ResolveLineNumberProgramIndex(Index32 idx) const316 EntityId ResolveLineNumberProgramIndex(Index32 idx) const
317 {
318 auto index = GetLineNumberProgramIndex();
319 return index[idx];
320 }
321
GetFilename() const322 const std::string &GetFilename() const
323 {
324 return filename_;
325 }
326
GetPandaCache() const327 PandaCache *GetPandaCache() const
328 {
329 return pandaCache_.get();
330 }
331
GetFilenameHash() const332 uint32_t GetFilenameHash() const
333 {
334 return filenameHash_;
335 }
336
337 // note: intentionally returns uint64_t instead of the field type due to usage
GetUniqId() const338 uint64_t GetUniqId() const
339 {
340 return uniqId_;
341 }
342
GetFullFileName() const343 const std::string &GetFullFileName() const
344 {
345 return fullFilename_;
346 }
347
GetFileBaseOffset()348 static constexpr uint32_t GetFileBaseOffset()
349 {
350 return MEMBER_OFFSET(File, base_);
351 }
352
GetClassHashTable() const353 Span<const ark::panda_file::EntityPairHeader> GetClassHashTable() const
354 {
355 return classHashTable_;
356 }
357
GetPaddedChecksum() const358 std::string GetPaddedChecksum() const
359 {
360 std::stringstream paddedChecksum;
361 // Length of hexed maximum unit32_t value of checksum (0xFFFFFFFF) is equal to 8
362 constexpr size_t CHECKSUM_LENGTH = 8;
363 paddedChecksum << std::setfill('0') << std::setw(CHECKSUM_LENGTH) << std::hex << GetHeader()->checksum;
364 return paddedChecksum.str();
365 }
366
367 static uint32_t CalcFilenameHash(const std::string &filename);
368
369 static std::unique_ptr<const File> Open(std::string_view filename, OpenMode openMode = READ_ONLY);
370
371 static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr);
372
373 static std::unique_ptr<const File> OpenFromMemory(os::mem::ConstBytePtr &&ptr, std::string_view filename);
374
375 static std::unique_ptr<const File> OpenUncompressedArchive(int fd, const std::string_view &filename, size_t size,
376 uint32_t offset, OpenMode openMode = READ_ONLY);
377
SetClassHashTable(ark::Span<const ark::panda_file::EntityPairHeader> classHashTable) const378 void SetClassHashTable(ark::Span<const ark::panda_file::EntityPairHeader> classHashTable) const
379 {
380 classHashTable_ = classHashTable;
381 }
382
383 ~File();
384
385 NO_COPY_SEMANTIC(File);
386 NO_MOVE_SEMANTIC(File);
387
388 private:
389 File(std::string filename, os::mem::ConstBytePtr &&base);
390
391 os::mem::ConstBytePtr base_;
392 const std::string filename_;
393 const uint32_t filenameHash_;
394 const std::string fullFilename_;
395 std::unique_ptr<PandaCache> pandaCache_;
396 const uint32_t uniqId_;
397 mutable ark::Span<const ark::panda_file::EntityPairHeader> classHashTable_;
398 };
399
400 static_assert(File::GetFileBaseOffset() == 0);
401
operator ==(const File::StringData &stringData1, const File::StringData &stringData2)402 inline bool operator==(const File::StringData &stringData1, const File::StringData &stringData2)
403 {
404 if (stringData1.utf16Length != stringData2.utf16Length) {
405 return false;
406 }
407
408 return utf::IsEqual(stringData1.data, stringData2.data);
409 }
410
operator !=(const File::StringData &stringData1, const File::StringData &stringData2)411 inline bool operator!=(const File::StringData &stringData1, const File::StringData &stringData2)
412 {
413 return !(stringData1 == stringData2);
414 }
415
operator <(const File::StringData &stringData1, const File::StringData &stringData2)416 inline bool operator<(const File::StringData &stringData1, const File::StringData &stringData2)
417 {
418 if (stringData1.utf16Length == stringData2.utf16Length) {
419 return utf::CompareMUtf8ToMUtf8(stringData1.data, stringData2.data) < 0;
420 }
421
422 return stringData1.utf16Length < stringData2.utf16Length;
423 }
424
425 /*
426 * OpenPandaFileOrZip from location which specicify the name.
427 */
428 std::unique_ptr<const File> OpenPandaFileOrZip(std::string_view location,
429 panda_file::File::OpenMode openMode = panda_file::File::READ_ONLY);
430
431 /*
432 * OpenPandaFileFromMemory from file buffer.
433 */
434 std::unique_ptr<const File> OpenPandaFileFromMemory(const void *buffer, size_t size, std::string tag = "");
435
436 /*
437 * OpenPandaFile from location which specicify the name.
438 */
439 std::unique_ptr<const File> OpenPandaFile(std::string_view location, std::string_view archiveFilename = "",
440 panda_file::File::OpenMode openMode = panda_file::File::READ_ONLY);
441
442 /*
443 * Check ptr point valid panda file: magic
444 */
445 bool CheckHeader(const os::mem::ConstBytePtr &ptr, const std::string_view &filename = "");
446
447 // NOLINTNEXTLINE(readability-identifier-naming)
448 extern const char *ARCHIVE_FILENAME;
449 } // namespace ark::panda_file
450
451 namespace std {
452 template <>
453 struct hash<ark::panda_file::File::EntityId> {
operator ()ark::std::hash454 std::size_t operator()(ark::panda_file::File::EntityId id) const
455 {
456 return std::hash<uint32_t> {}(id.GetOffset());
457 }
458 };
459 } // namespace std
460
461 #endif
462