1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <codecvt>
16 #include <locale>
17 
18 #include "verifier.h"
19 #include "class_data_accessor-inl.h"
20 #include "libpandafile/util/collect_util.h"
21 #include "zlib.h"
22 
23 namespace panda::verifier {
24 
Verifier(const std::string &filename)25 Verifier::Verifier(const std::string &filename)
26 {
27     auto file_to_verify = panda_file::File::Open(filename);
28     file_.swap(file_to_verify);
29 }
30 
Verify()31 bool Verifier::Verify()
32 {
33     if (!VerifyChecksum()) {
34         return false;
35     }
36 
37     if (!CollectIdInfos()) {
38         return false;
39     }
40 
41     if (!VerifyConstantPool()) {
42         return false;
43     }
44 
45     return true;
46 }
47 
CollectIdInfos()48 bool Verifier::CollectIdInfos()
49 {
50     if (file_ == nullptr) {
51         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
52         return false;
53     }
54     GetConstantPoolIds();
55     if (include_literal_array_ids) {
56         GetLiteralIds();
57     }
58     return CheckConstantPool(verifier::ActionType::COLLECTINFOS);
59 }
60 
VerifyChecksum()61 bool Verifier::VerifyChecksum()
62 {
63     if (file_ == nullptr) {
64         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
65         return false;
66     }
67     uint32_t file_size = file_->GetHeader()->file_size;
68     ASSERT(file_size > FILE_CONTENT_OFFSET);
69     uint32_t cal_checksum = adler32(1, file_->GetBase() + FILE_CONTENT_OFFSET, file_size - FILE_CONTENT_OFFSET);
70     return file_->GetHeader()->checksum == cal_checksum;
71 }
72 
VerifyConstantPool()73 bool Verifier::VerifyConstantPool()
74 {
75     if (file_ == nullptr) {
76         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
77         return false;
78     }
79 
80     if (!CheckConstantPoolIndex()) {
81         return false;
82     }
83 
84     if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLCONTENT)) {
85         return false;
86     }
87 
88     if (!VerifyLiteralArrays()) {
89         return false;
90     }
91 
92     return true;
93 }
94 
VerifyRegisterIndex()95 bool Verifier::VerifyRegisterIndex()
96 {
97     if (file_ == nullptr) {
98         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
99         return false;
100     }
101 
102     for (const auto id : all_method_ids_) {
103         const panda_file::File::EntityId method_id = panda_file::File::EntityId(id);
104         panda_file::MethodDataAccessor method_accessor {*file_, method_id};
105         if (!method_accessor.GetCodeId().has_value()) {
106             continue;
107         }
108         panda_file::CodeDataAccessor code_data(*file_, method_accessor.GetCodeId().value());
109         const uint64_t reg_nums = code_data.GetNumVregs();
110         const uint64_t arg_nums = code_data.GetNumArgs();
111         const std::optional<uint64_t> valid_regs_num = SafeAdd(reg_nums, arg_nums);
112         if (!valid_regs_num.has_value()) {
113             LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
114             return false;
115         }
116         if (valid_regs_num.value() > MAX_REGISTER_INDEX + 1) {
117             LOG(ERROR, VERIFIER) << "Register index exceeds the maximum allowable value (0xffff)!";
118             return false;
119         }
120         auto bc_ins = BytecodeInstruction(code_data.GetInstructions());
121         const auto bc_ins_last = bc_ins.JumpTo(code_data.GetCodeSize());
122         ASSERT(arg_nums >= DEFAULT_ARGUMENT_NUMBER);
123         while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) {
124             const size_t count = GetVRegCount(bc_ins);
125             if (count == 0) { // Skip instructions that do not use registers
126                 bc_ins = bc_ins.GetNext();
127                 continue;
128             }
129             if (!CheckVRegIdx(bc_ins, count, valid_regs_num.value())) {
130                 return false;
131             }
132             bc_ins = bc_ins.GetNext();
133         }
134     }
135     return true;
136 }
137 
VerifyConstantPoolIndex()138 bool Verifier::VerifyConstantPoolIndex()
139 {
140     if (file_ == nullptr) {
141         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
142         return false;
143     }
144 
145     if (!CheckConstantPoolIndex()) {
146         return false;
147     }
148 
149     return true;
150 }
151 
VerifyConstantPoolContent()152 bool Verifier::VerifyConstantPoolContent()
153 {
154     if (file_ == nullptr) {
155         LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
156         return false;
157     }
158 
159     if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLCONTENT)) {
160         return false;
161     }
162 
163     if (!VerifyLiteralArrays()) {
164         return false;
165     }
166 
167     return true;
168 }
169 
GetConstantPoolIds()170 void Verifier::GetConstantPoolIds()
171 {
172     if (constant_pool_ids_.size() != 0) {
173         return;
174     }
175     auto index_headers = file_->GetIndexHeaders();
176     for (const auto &index_header : index_headers) {
177         auto region_indexs = file_->GetMethodIndex(&index_header);
178         for (auto &index : region_indexs) {
179             constant_pool_ids_.push_back(index.GetOffset());
180         }
181     }
182 }
183 
GetLiteralIds()184 void Verifier::GetLiteralIds()
185 {
186     if (literal_ids_.size() != 0) {
187         return;
188     }
189 
190     if (panda_file::ContainsLiteralArrayInHeader(file_->GetHeader()->version)) {
191         const auto literal_arrays = file_->GetLiteralArrays();
192         PushToLiteralIds(literal_arrays);
193     } else {
194         panda::libpandafile::CollectUtil collect_util;
195         std::unordered_set<uint32_t> literal_array_ids;
196         collect_util.CollectLiteralArray(*file_, literal_array_ids);
197         PushToLiteralIds(literal_array_ids);
198     }
199 }
200 
201 template <typename T>
PushToLiteralIds(T &ids)202 void Verifier::PushToLiteralIds(T &ids)
203 {
204     for (const auto id : ids) {
205         literal_ids_.push_back(id);
206     }
207 }
208 
CheckConstantPoolActions(const verifier::ActionType type, panda_file::File::EntityId method_id)209 bool Verifier::CheckConstantPoolActions(const verifier::ActionType type, panda_file::File::EntityId method_id)
210 {
211     switch (type) {
212         case verifier::ActionType::CHECKCONSTPOOLCONTENT: {
213             return CheckConstantPoolMethodContent(method_id);
214         }
215         case verifier::ActionType::COLLECTINFOS: {
216             all_method_ids_.push_back(method_id.GetOffset());
217             return CollectIdInInstructions(method_id);
218         }
219         default: {
220             return true;
221         }
222     }
223 }
224 
CollectIdInInstructions(const panda_file::File::EntityId &method_id)225 bool Verifier::CollectIdInInstructions(const panda_file::File::EntityId &method_id)
226 {
227     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
228     ASSERT(method_accessor.GetCodeId().has_value());
229     panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value());
230     const auto ins_size = code_accessor.GetCodeSize();
231     const auto ins_arr = code_accessor.GetInstructions();
232 
233     auto bc_ins = BytecodeInstruction(ins_arr);
234     const auto bc_ins_last = bc_ins.JumpTo(ins_size);
235 
236     while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) {
237         if (!bc_ins.IsPrimaryOpcodeValid()) {
238             LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!";
239             return false;
240         }
241         if (bc_ins.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
242             // the idx of any instruction with a literal id is 0
243             // except defineclasswithbuffer/callruntime.definesendableclass
244             size_t idx = bc_ins.GetLiteralIndex();
245             const auto arg_literal_idx = bc_ins.GetId(idx).AsIndex();
246             const auto literal_id = file_->ResolveMethodIndex(method_id, arg_literal_idx);
247             ins_literal_ids_.insert(literal_id.GetOffset());
248         }
249         if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
250             const auto arg_method_idx = bc_ins.GetId().AsIndex();
251             const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
252             ins_method_ids_.insert(arg_method_id.GetOffset());
253         }
254         if (bc_ins.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
255             const auto arg_string_idx = bc_ins.GetId().AsIndex();
256             const auto string_id = file_->ResolveOffsetByIndex(method_id, arg_string_idx);
257             ins_string_ids_.insert(string_id.GetOffset());
258         }
259         bc_ins = bc_ins.GetNext();
260     }
261     return true;
262 }
263 
CollectModuleLiteralId(const panda_file::File::EntityId &field_id)264 void Verifier::CollectModuleLiteralId(const panda_file::File::EntityId &field_id)
265 {
266     panda_file::FieldDataAccessor field_accessor(*file_, field_id);
267     const auto literal_id = field_accessor.GetValue<uint32_t>().value();
268     if (std::find(literal_ids_.begin(), literal_ids_.end(), literal_id) != literal_ids_.end()) {
269         module_literals_.insert(literal_id);
270     }
271 }
272 
CheckConstantPool(const verifier::ActionType type)273 bool Verifier::CheckConstantPool(const verifier::ActionType type)
274 {
275     const auto class_idx = file_->GetClasses();
276     for (size_t i = 0; i < class_idx.size(); i++) {
277         uint32_t class_id = class_idx[i];
278         if (class_id > file_->GetHeader()->file_size) {
279             LOG(ERROR, VERIFIER) << "Binary file corrupted. out of bounds (0x" << std::hex
280                                  << file_->GetHeader()->file_size;
281             return false;
282         }
283         const panda_file::File::EntityId record_id {class_id};
284         if (!file_->IsExternal(record_id)) {
285             panda_file::ClassDataAccessor class_accessor {*file_, record_id};
286             bool check_res = true;
287             class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
288                 check_res = check_res && CheckConstantPoolActions(type, method_accessor.GetMethodId());
289             });
290             if (!check_res) {
291                 return false;
292             }
293             if (type == verifier::ActionType::COLLECTINFOS) {
294                 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
295                     CollectModuleLiteralId(field_accessor.GetFieldId());
296                 });
297             }
298         }
299     }
300 
301     return true;
302 }
303 
GetVRegCount(const BytecodeInstruction &bc_ins)304 size_t Verifier::GetVRegCount(const BytecodeInstruction &bc_ins)
305 {
306     size_t idx = 0; // Represents the idxTH register index in an instruction
307     BytecodeInstruction::Format format = bc_ins.GetFormat();
308     while (bc_ins.HasVReg(format, idx)) {
309         idx++;
310     }
311     return idx;
312 }
313 
IsRangeInstAndHasInvalidRegIdx(const BytecodeInstruction &bc_ins, const size_t count, uint64_t valid_regs_num)314 bool Verifier::IsRangeInstAndHasInvalidRegIdx(const BytecodeInstruction &bc_ins,
315                                               const size_t count, uint64_t valid_regs_num)
316 {
317     ASSERT(bc_ins.IsRangeInstruction());
318 
319     uint64_t reg_idx = bc_ins.GetVReg(FIRST_INDEX);
320     if (IsRegIdxOutOfBounds(reg_idx, valid_regs_num)) { // for [format: +AA/+AAAA vBB vCC], vBB can be verified here
321         return true;
322     }
323 
324     std::optional<uint64_t> max_ins_reg_idx_opt = bc_ins.GetRangeInsLastRegIdx();
325     if (!max_ins_reg_idx_opt.has_value()) {
326         LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
327         return true;
328     }
329 
330     reg_idx = max_ins_reg_idx_opt.value();
331     if (IsRegIdxOutOfBounds(reg_idx, valid_regs_num)) {
332         return true;
333     }
334 
335     return false;
336 }
337 
IsRegIdxOutOfBounds(uint64_t reg_idx, uint64_t valid_regs_num)338 bool Verifier::IsRegIdxOutOfBounds(uint64_t reg_idx, uint64_t valid_regs_num)
339 {
340     if (reg_idx >= valid_regs_num) {
341         LOG(ERROR, VERIFIER) << "Register index out of bounds: 0x" << std::hex
342                              << reg_idx << ", Max allowed: 0x" << std::hex << valid_regs_num;
343         return true;
344     }
345     return false;
346 }
347 
CheckVRegIdx(const BytecodeInstruction &bc_ins, const size_t count, uint64_t valid_regs_num)348 bool Verifier::CheckVRegIdx(const BytecodeInstruction &bc_ins, const size_t count, uint64_t valid_regs_num)
349 {
350     if (bc_ins.IsRangeInstruction() &&
351         IsRangeInstAndHasInvalidRegIdx(bc_ins, count, valid_regs_num)) {
352         return false;
353     }
354     for (size_t idx = 0; idx < count; idx++) { // Represents the idxTH register index in an instruction
355         uint16_t reg_idx = bc_ins.GetVReg(idx);
356         if (reg_idx >= valid_regs_num) {
357             LOG(ERROR, VERIFIER) << "Register index out of bounds: 0x" << std::hex
358                                  << reg_idx << ", Max allowed: 0x" << std::hex << valid_regs_num;
359             return false;
360         }
361     }
362     return true;
363 }
364 
VerifyMethodId(const uint32_t &method_id) const365 bool Verifier::VerifyMethodId(const uint32_t &method_id) const
366 {
367     auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), method_id);
368     if (iter == constant_pool_ids_.end() ||
369         (std::find(literal_ids_.begin(), literal_ids_.end(), method_id) != literal_ids_.end()) ||
370         ins_string_ids_.count(method_id)) {
371         LOG(ERROR, VERIFIER) << "Fail to verify method id. method_id(0x" << std::hex << method_id << ")!";
372         return false;
373     }
374     return true;
375 }
376 
VerifyLiteralId(const uint32_t &literal_id) const377 bool Verifier::VerifyLiteralId(const uint32_t &literal_id) const
378 {
379     auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), literal_id);
380     if (iter == constant_pool_ids_.end() ||
381         (std::find(all_method_ids_.begin(), all_method_ids_.end(), literal_id) != all_method_ids_.end()) ||
382         ins_string_ids_.count(literal_id)) {
383         LOG(ERROR, VERIFIER) << "Fail to verify literal id. literal_id(0x" << std::hex << literal_id << ")!";
384         return false;
385     }
386     return true;
387 }
388 
VerifyStringId(const uint32_t &string_id) const389 bool Verifier::VerifyStringId(const uint32_t &string_id) const
390 {
391     auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), string_id);
392     if (iter == constant_pool_ids_.end() ||
393         ins_method_ids_.count(string_id) ||
394         (std::find(literal_ids_.begin(), literal_ids_.end(), string_id) != literal_ids_.end())) {
395         LOG(ERROR, VERIFIER) << "Fail to verify string id. string_id(0x" << std::hex << string_id << ")!";
396         return false;
397     }
398     return true;
399 }
400 
GetFirstImmFromInstruction(const BytecodeInstruction &bc_ins)401 std::optional<int64_t> Verifier::GetFirstImmFromInstruction(const BytecodeInstruction &bc_ins)
402 {
403     std::optional<int64_t> first_imm = std::optional<int64_t> {};
404     size_t index = 0;
405     const auto format = bc_ins.GetFormat();
406     if (bc_ins.HasImm(format, index)) {
407         first_imm = bc_ins.GetImm64(index);
408     }
409 
410     return first_imm;
411 }
412 
GetSlotNumberFromAnnotation(panda_file::MethodDataAccessor &method_accessor)413 std::optional<uint64_t> Verifier::GetSlotNumberFromAnnotation(panda_file::MethodDataAccessor &method_accessor)
414 {
415     std::optional<uint64_t> slot_number {};
416     method_accessor.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
417         panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
418         auto *annotation_name = reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data);
419         if (::strcmp("L_ESSlotNumberAnnotation;", annotation_name) == 0) {
420             uint32_t elem_count = ada.GetCount();
421             for (uint32_t i = 0; i < elem_count; i++) {
422                 panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
423                 auto *elem_name = reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data);
424                 if (::strcmp("SlotNumber", elem_name) == 0) {
425                     slot_number = adae.GetScalarValue().GetValue();
426                 }
427             }
428         }
429     });
430     return slot_number;
431 }
432 
VerifyMethodIdInLiteralArray(const uint32_t &id)433 bool Verifier::VerifyMethodIdInLiteralArray(const uint32_t &id)
434 {
435     const auto method_id = panda_file::File::EntityId(id).GetOffset();
436     auto iter = std::find(all_method_ids_.begin(), all_method_ids_.end(), method_id);
437     if (iter == all_method_ids_.end()) {
438         LOG(ERROR, VERIFIER) << "Invalid method id(0x" << id << ") in literal array";
439         return false;
440     }
441     return true;
442 }
443 
VerifyStringIdInLiteralArray(const uint32_t &id)444 bool Verifier::VerifyStringIdInLiteralArray(const uint32_t &id)
445 {
446     auto string_data = file_->GetStringData(panda_file::File::EntityId(id));
447     if (string_data.data == nullptr) {
448         LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << id << ")!";
449         return false;
450     }
451     auto desc = std::string(utf::Mutf8AsCString(string_data.data));
452     std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
453     std::wstring utf16_desc = converter.from_bytes(desc);
454     if (string_data.utf16_length != utf16_desc.length()) {
455         LOG(ERROR, VERIFIER) << "Invalid string value(0x" << id << ") in literal array";
456         return false;
457     }
458     return true;
459 }
460 
VerifyLiteralIdInLiteralArray(const uint32_t &id)461 bool Verifier::VerifyLiteralIdInLiteralArray(const uint32_t &id)
462 {
463     auto iter = std::find(literal_ids_.begin(), literal_ids_.end(), id);
464     if (iter == literal_ids_.end()) {
465         LOG(ERROR, VERIFIER) << "Invalid literal id(0x" << id << ") in literal array";
466         return false;
467     }
468     return true;
469 }
470 
VerifySingleLiteralArray(const panda_file::File::EntityId &literal_id)471 bool Verifier::VerifySingleLiteralArray(const panda_file::File::EntityId &literal_id)
472 {
473     auto sp = file_->GetSpanFromId(literal_id);
474     const auto literal_vals_num = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
475     for (size_t i = 0; i < literal_vals_num; i += 2U) { // 2u skip literal item
476         const auto tag = static_cast<panda_file::LiteralTag>(panda_file::helpers::Read<panda_file::TAG_SIZE>(&sp));
477         switch (tag) {
478             case panda_file::LiteralTag::TAGVALUE:
479             case panda_file::LiteralTag::BOOL:
480             case panda_file::LiteralTag::ACCESSOR:
481             case panda_file::LiteralTag::NULLVALUE:
482             case panda_file::LiteralTag::BUILTINTYPEINDEX: {
483                 sp = sp.SubSpan(sizeof(uint8_t)); // run next sp
484                 break;
485             }
486             case panda_file::LiteralTag::METHODAFFILIATE: {
487                 sp = sp.SubSpan(sizeof(uint16_t));
488                 break;
489             }
490             case panda_file::LiteralTag::INTEGER:
491             case panda_file::LiteralTag::FLOAT:
492             case panda_file::LiteralTag::GETTER:
493             case panda_file::LiteralTag::SETTER:
494             case panda_file::LiteralTag::GENERATORMETHOD:
495             case panda_file::LiteralTag::LITERALBUFFERINDEX:
496             case panda_file::LiteralTag::ASYNCGENERATORMETHOD: {
497                 sp = sp.SubSpan(sizeof(uint32_t));
498                 break;
499             }
500             case panda_file::LiteralTag::DOUBLE: {
501                 const auto value = bit_cast<double>(panda_file::helpers::Read<sizeof(uint64_t)>(&sp));
502                 // true: High 16-bit of double value >= 0xffff
503                 if (IsImpureNaN(value)) {
504                     LOG(ERROR, VERIFIER) << "Fail to verify double value " << value << " in literal array";
505                     return false;
506                 }
507                 break;
508             }
509             case panda_file::LiteralTag::ARRAY_U1:
510             case panda_file::LiteralTag::ARRAY_U8:
511             case panda_file::LiteralTag::ARRAY_I8:
512             case panda_file::LiteralTag::ARRAY_U16:
513             case panda_file::LiteralTag::ARRAY_I16:
514             case panda_file::LiteralTag::ARRAY_U32:
515             case panda_file::LiteralTag::ARRAY_I32:
516             case panda_file::LiteralTag::ARRAY_U64:
517             case panda_file::LiteralTag::ARRAY_I64:
518             case panda_file::LiteralTag::ARRAY_F32:
519             case panda_file::LiteralTag::ARRAY_F64:
520             case panda_file::LiteralTag::ARRAY_STRING: {
521                 i = literal_vals_num;
522                 break;
523             }
524             case panda_file::LiteralTag::STRING: {
525                 panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
526                 break;
527             }
528             case panda_file::LiteralTag::METHOD: {
529                 const auto value = static_cast<uint32_t>(panda_file::helpers::Read<sizeof(uint32_t)>(&sp));
530                 inner_method_map_.emplace(literal_id.GetOffset(), value);
531                 if (!VerifyMethodIdInLiteralArray(value)) {
532                     return false;
533                 }
534                 break;
535             }
536             case panda_file::LiteralTag::LITERALARRAY: {
537                 const auto value = static_cast<uint32_t>(panda_file::helpers::Read<sizeof(uint32_t)>(&sp));
538                 inner_literal_map_.emplace(literal_id.GetOffset(), value);
539                 if (!VerifyLiteralIdInLiteralArray(value)) {
540                     return false;
541                 }
542                 break;
543             }
544             default: {
545                 LOG(ERROR, VERIFIER) << "Invalid literal tag";
546                 return false;
547             }
548         }
549     }
550     return true;
551 }
552 
IsModuleLiteralId(const panda_file::File::EntityId &id) const553 bool Verifier::IsModuleLiteralId(const panda_file::File::EntityId &id) const
554 {
555     return module_literals_.find(id.GetOffset()) != module_literals_.end();
556 }
557 
VerifyLiteralArrays()558 bool Verifier::VerifyLiteralArrays()
559 {
560     for (const auto &arg_literal_id : literal_ids_) {
561         const auto literal_id = panda_file::File::EntityId(arg_literal_id);
562         if (!IsModuleLiteralId(literal_id) && !VerifySingleLiteralArray(literal_id)) {
563             return false;
564         }
565     }
566     return true;
567 }
568 
PrecomputeInstructionIndices(const BytecodeInstruction &bc_ins_start, const BytecodeInstruction &bc_ins_last)569 bool Verifier::PrecomputeInstructionIndices(const BytecodeInstruction &bc_ins_start,
570                                             const BytecodeInstruction &bc_ins_last)
571 {
572     instruction_index_map_.clear();
573     size_t index = 0;
574     auto current_ins = bc_ins_start;
575     instruction_index_map_[current_ins.GetAddress()] = index;
576 
577     while (current_ins.GetAddress() < bc_ins_last.GetAddress()) {
578         //Must keep IsPrimaryOpcodeValid is the first check item
579         if (!current_ins.IsPrimaryOpcodeValid()) {
580             LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!";
581             return false;
582         }
583         current_ins = current_ins.GetNext();
584         index++;
585         instruction_index_map_[current_ins.GetAddress()] = index;
586     }
587     return true;
588 }
589 
IsMethodBytecodeInstruction(const BytecodeInstruction &bc_ins_cur)590 bool Verifier::IsMethodBytecodeInstruction(const BytecodeInstruction &bc_ins_cur)
591 {
592     if (instruction_index_map_.find(bc_ins_cur.GetAddress()) != instruction_index_map_.end()) {
593         return true;
594     }
595     return false;
596 }
597 
VerifyJumpInstruction(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last, const BytecodeInstruction &bc_ins_first, const uint8_t *ins_arr, panda_file::File::EntityId code_id)598 bool Verifier::VerifyJumpInstruction(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
599                                      const BytecodeInstruction &bc_ins_first, const uint8_t *ins_arr,
600                                      panda_file::File::EntityId code_id)
601 {
602     // update maximum forward offset
603     const auto bc_ins_forward_size = bc_ins_last.GetAddress() - bc_ins.GetAddress();
604     // update maximum backward offset
605     const auto bc_ins_backward_size = bc_ins.GetAddress() - bc_ins_first.GetAddress();
606 
607     if (bc_ins.IsJumpInstruction()) {
608         std::optional<int64_t> immdata = GetFirstImmFromInstruction(bc_ins);
609         if (!immdata.has_value()) {
610             LOG(ERROR, VERIFIER) << "Fail to get immediate data!";
611             return false;
612         }
613         if ((immdata.value() > 0) && (immdata.value() >= bc_ins_forward_size)) {
614             LOG(ERROR, VERIFIER) << "Jump forward out of boundary";
615             return false;
616         }
617         if ((immdata.value() < 0) && (bc_ins_backward_size + immdata.value() < 0)) {
618             LOG(ERROR, VERIFIER) << "Jump backward out of boundary";
619             return false;
620         }
621 
622         const auto bc_ins_dest = bc_ins.JumpTo(immdata.value());
623         if (!bc_ins_dest.IsPrimaryOpcodeValid()) {
624             LOG(ERROR, VERIFIER) << "Fail to verify target jump primary opcode!";
625             return false;
626         }
627         if (!IsMethodBytecodeInstruction(bc_ins_dest)) {
628             LOG(ERROR, VERIFIER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
629                                  << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
630                                  << ": invalid jump offset 0x" << immdata.value()
631                                  << " - jumping in the middle of another instruction!";
632             return false;
633         }
634     }
635 
636     return true;
637 }
638 
GetIcSlotFromInstruction(const BytecodeInstruction &bc_ins, uint32_t &first_slot_index, bool &has_slot, bool &is_two_slot)639 bool Verifier::GetIcSlotFromInstruction(const BytecodeInstruction &bc_ins, uint32_t &first_slot_index,
640                                         bool &has_slot, bool &is_two_slot)
641 {
642     std::optional<uint64_t> first_imm = {};
643     if (bc_ins.HasFlag(BytecodeInstruction::Flags::ONE_SLOT)) {
644         first_imm = GetFirstImmFromInstruction(bc_ins);
645         if (!first_imm.has_value()) {
646             LOG(ERROR, VERIFIER) << "Fail to get first immediate data!";
647             return false;
648         }
649         first_slot_index = first_imm.value();
650         is_two_slot = false;
651         has_slot = true;
652     } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::TWO_SLOT)) {
653         first_imm = GetFirstImmFromInstruction(bc_ins);
654         if (!first_imm.has_value()) {
655             LOG(ERROR, VERIFIER) << "Fail to get first immediate data!";
656             return false;
657         }
658         first_slot_index = first_imm.value();
659         has_slot = true;
660         is_two_slot = true;
661     }
662 
663     return true;
664 }
665 
VerifyCatchBlocks(panda_file::CodeDataAccessor::TryBlock &try_block, const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last)666 bool Verifier::VerifyCatchBlocks(panda_file::CodeDataAccessor::TryBlock &try_block, const BytecodeInstruction &bc_ins,
667                                  const BytecodeInstruction &bc_ins_last)
668 {
669     bool result = true;
670 
671     try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
672         const auto handler_begin_offset = catch_block.GetHandlerPc();
673         // GetCodeSize() returns a unsigned long value, which is always >= 0,
674         // so handler_end_offset is guaranteed to be >= handler_begin_offset
675         const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
676 
677         const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
678         const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
679 
680         const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
681         const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
682 
683         if (!handler_begin_offset_in_range) {
684             LOG(ERROR, VERIFIER) << "> Invalid catch block begin offset range! address is: 0x" << std::hex
685                                  << handler_begin_bc_ins.GetAddress();
686             result = false;
687             return false;
688         }
689         if (!IsMethodBytecodeInstruction(handler_begin_bc_ins)) {
690             LOG(ERROR, VERIFIER) << "> Invalid catch block begin offset validity! address is: 0x" << std::hex
691                                  << handler_begin_bc_ins.GetAddress();
692             result = false;
693             return false;
694         }
695         if (!handler_end_offset_in_range) {
696             LOG(ERROR, VERIFIER) << "> Invalid catch block end offset range! address is: 0x" << std::hex
697                                  << handler_end_bc_ins.GetAddress();
698             result = false;
699             return false;
700         }
701         if (!IsMethodBytecodeInstruction(handler_end_bc_ins)) {
702             LOG(ERROR, VERIFIER) << "> Invalid catch block end offset validity! address is: 0x" << std::hex
703                                  << handler_end_bc_ins.GetAddress();
704             result = false;
705             return false;
706         }
707 
708         return true;
709     });
710 
711     return result;
712 }
713 
VerifyTryBlocks(panda_file::CodeDataAccessor &code_accessor, const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last)714 bool Verifier::VerifyTryBlocks(panda_file::CodeDataAccessor &code_accessor, const BytecodeInstruction &bc_ins,
715                                const BytecodeInstruction &bc_ins_last)
716 {
717     bool result = true;
718 
719     code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
720         const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
721         // GetLength() returns a uint32 value, which is always >= 0,
722         // so try_end_bc_ins is guaranteed to be >= try_begin_bc_ins
723         const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
724 
725         const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
726         const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
727 
728         if (!try_begin_offset_in_range) {
729             LOG(ERROR, VERIFIER) << "> Invalid try block begin offset range! address is: 0x" << std::hex
730                                  << try_begin_bc_ins.GetAddress();
731             result = false;
732             return false;
733         }
734         if (!IsMethodBytecodeInstruction(try_begin_bc_ins)) {
735             LOG(ERROR, VERIFIER) << "> Invalid try block begin offset validity! address is: 0x" << std::hex
736                                  << try_begin_bc_ins.GetAddress();
737             result = false;
738             return false;
739         }
740         if (!try_end_offset_in_range) {
741             LOG(ERROR, VERIFIER) << "> Invalid try block end offset range! address is: 0x" << std::hex
742                                  << try_end_bc_ins.GetAddress();
743             result = false;
744             return false;
745         }
746         if (!IsMethodBytecodeInstruction(try_end_bc_ins)) {
747             LOG(ERROR, VERIFIER) << "> Invalid try block end offset validity! address is: 0x" << std::hex
748                                  << try_end_bc_ins.GetAddress();
749             result = false;
750             return false;
751         }
752         if (!VerifyCatchBlocks(try_block, bc_ins, bc_ins_last)) {
753             LOG(ERROR, VERIFIER) << "Catch block validation failed!";
754             result = false;
755             return false;
756         }
757 
758         return true;
759     });
760 
761     return result;
762 }
763 
764 
VerifySlotNumber(panda_file::MethodDataAccessor &method_accessor, const uint32_t &slot_number, const panda_file::File::EntityId &method_id)765 bool Verifier::VerifySlotNumber(panda_file::MethodDataAccessor &method_accessor, const uint32_t &slot_number,
766                                 const panda_file::File::EntityId &method_id)
767 {
768     const auto ann_slot_number = GetSlotNumberFromAnnotation(method_accessor);
769     if (!ann_slot_number.has_value()) {
770         LOG(INFO, VERIFIER) << "There is no slot number information in annotaion.";
771         // To be compatible with old abc, slot number verification is not continued
772         return true;
773     }
774     if (slot_number == ann_slot_number.value()) {
775         return true;
776     }
777 
778     LOG(ERROR, VERIFIER) << "Slot number has been falsified in method 0x" << method_id;
779     return false;
780 }
781 
VerifyMethodRegisterIndex(panda_file::CodeDataAccessor &code_accessor, std::optional<uint64_t> &valid_regs_num)782 bool Verifier::VerifyMethodRegisterIndex(panda_file::CodeDataAccessor &code_accessor,
783                                          std::optional<uint64_t> &valid_regs_num)
784 {
785     const uint64_t reg_nums = code_accessor.GetNumVregs();
786     const uint64_t arg_nums = code_accessor.GetNumArgs();
787     valid_regs_num = SafeAdd(reg_nums, arg_nums);
788     if (!valid_regs_num.has_value()) {
789         LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
790         return false;
791     }
792     if (valid_regs_num.value() > MAX_REGISTER_INDEX + 1) {
793         LOG(ERROR, VERIFIER) << "Register index exceeds the maximum allowable value (0xffff)!";
794         return false;
795     }
796     return true;
797 }
798 
VerifyMethodInstructions(const MethodInfos &infos)799 bool Verifier::VerifyMethodInstructions(const MethodInfos &infos)
800 {
801     auto current_ins = infos.bc_ins;
802     auto last_ins = infos.bc_ins_last;
803     auto code_id = infos.method_accessor.GetCodeId().value();
804     auto method_id = infos.method_id;
805     auto valid_regs_num = infos.valid_regs_num.value();
806     auto ins_slot_num = infos.ins_slot_num;
807     auto has_slot = infos.has_slot;
808     auto is_two_slot = infos.is_two_slot;
809 
810     while (current_ins.GetAddress() != last_ins.GetAddress()) {
811         if (current_ins.GetAddress() > last_ins.GetAddress()) {
812             LOG(ERROR, VERIFIER) << "> error encountered at " << code_id
813                                  << " (0x" << std::hex << code_id
814                                  << "). bytecode instructions sequence corrupted for method "
815                                  << method_id
816                                  << "! went out of bounds";
817             return false;
818         }
819         if (!current_ins.IsJumpInstruction() && !current_ins.IsReturnOrThrowInstruction()
820             && current_ins.GetNext().GetAddress() == last_ins.GetAddress()) {
821             LOG(ERROR, VERIFIER) << "> error encountered at " << code_id
822                                  << " (0x" << std::hex << code_id
823                                  << "). bytecode instructions sequence corrupted for method "
824                                  << method_id
825                                  << "! went out of bounds";
826             return false;
827         }
828         const size_t count = GetVRegCount(current_ins);
829         if (count != 0 && !CheckVRegIdx(current_ins, count, valid_regs_num)) {
830             return false;
831         }
832         if (!VerifyJumpInstruction(current_ins, last_ins,
833                                    infos.bc_ins_init, infos.ins_arr,
834                                    code_id)) {
835             LOG(ERROR, VERIFIER) << "Invalid target position of jump instruction";
836             return false;
837         }
838         if (!GetIcSlotFromInstruction(current_ins, ins_slot_num,
839                                       has_slot, is_two_slot)) {
840             LOG(ERROR, VERIFIER) << "Fail to get first slot index!";
841             return false;
842         }
843         current_ins = current_ins.GetNext();
844     }
845     return true;
846 }
847 
CheckConstantPoolMethodContent(const panda_file::File::EntityId &method_id)848 bool Verifier::CheckConstantPoolMethodContent(const panda_file::File::EntityId &method_id)
849 {
850     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
851     if (!method_accessor.GetCodeId().has_value()) {
852         LOG(ERROR, VERIFIER) << "Fail to get code id!";
853         return false;
854     }
855     panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value());
856     const auto ins_size = code_accessor.GetCodeSize();
857     const auto ins_arr = code_accessor.GetInstructions();
858     auto bc_ins = BytecodeInstruction(ins_arr);
859     const auto bc_ins_last = bc_ins.JumpTo(ins_size);
860     const auto bc_ins_init = bc_ins; // initial PC value
861     uint32_t ins_slot_num = 0; // For ic slot index verification
862     bool has_slot = false;
863     bool is_two_slot = false;
864     std::optional<uint64_t> valid_regs_num = 0;
865     MethodInfos infos = {bc_ins_init, bc_ins, bc_ins_last, method_accessor, method_id,
866                          valid_regs_num, ins_arr, ins_slot_num, has_slot, is_two_slot};
867     if (ins_size <= 0) {
868         LOG(ERROR, VERIFIER) << "Fail to verify code size!";
869         return false;
870     }
871     if (!VerifyMethodRegisterIndex(code_accessor, valid_regs_num)) {
872         LOG(ERROR, VERIFIER) << "Fail to verify method register index!";
873         return false;
874     }
875     if (!PrecomputeInstructionIndices(bc_ins, bc_ins_last)) {
876         LOG(ERROR, VERIFIER) << "Fail to precompute instruction indices!";
877         return false;
878     }
879     if (!IsMethodBytecodeInstruction(bc_ins)) {
880         LOG(ERROR, VERIFIER) << "Fail to verify method first bytecode instruction!";
881     }
882     if (!VerifyTryBlocks(code_accessor, bc_ins, bc_ins_last)) {
883         LOG(ERROR, VERIFIER) << "Fail to verify try blocks or catch blocks!";
884         return false;
885     }
886     if (!VerifyMethodInstructions(infos)) {
887         LOG(ERROR, VERIFIER) << "Fail to verify method instructions!";
888         return false;
889     }
890     if (has_slot) {
891         if (is_two_slot) {
892             ins_slot_num += 1; // when there are two slots for the last instruction, the slot index increases
893         }
894         ins_slot_num += 1; // slot index starts with zero
895     }
896     return true;
897 }
898 
CheckConstantPoolIndex() const899 bool Verifier::CheckConstantPoolIndex() const
900 {
901     for (auto &id : ins_method_ids_) {
902         if (!VerifyMethodId(id)) {
903             return false;
904         }
905     }
906 
907     for (auto &id : ins_literal_ids_) {
908         if (!VerifyLiteralId(id)) {
909             return false;
910         }
911     }
912 
913     for (auto &id : ins_string_ids_) {
914         if (!VerifyStringId(id)) {
915             return false;
916         }
917     }
918 
919     return true;
920 }
921 
SafeAdd(uint64_t a, uint64_t b) const922 std::optional<uint64_t> Verifier::SafeAdd(uint64_t a, uint64_t b) const
923 {
924     if (a > std::numeric_limits<uint64_t>::max() - b) {
925         return std::nullopt;
926     }
927     return a + b;
928 }
929 } // namespace panda::verifier
930