1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include <codecvt>
16 #include <locale>
17
18 #include "verifier.h"
19 #include "class_data_accessor-inl.h"
20 #include "libpandafile/util/collect_util.h"
21 #include "zlib.h"
22
23 namespace panda::verifier {
24
Verifier(const std::string &filename)25 Verifier::Verifier(const std::string &filename)
26 {
27 auto file_to_verify = panda_file::File::Open(filename);
28 file_.swap(file_to_verify);
29 }
30
Verify()31 bool Verifier::Verify()
32 {
33 if (!VerifyChecksum()) {
34 return false;
35 }
36
37 if (!CollectIdInfos()) {
38 return false;
39 }
40
41 if (!VerifyConstantPool()) {
42 return false;
43 }
44
45 return true;
46 }
47
CollectIdInfos()48 bool Verifier::CollectIdInfos()
49 {
50 if (file_ == nullptr) {
51 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
52 return false;
53 }
54 GetConstantPoolIds();
55 if (include_literal_array_ids) {
56 GetLiteralIds();
57 }
58 return CheckConstantPool(verifier::ActionType::COLLECTINFOS);
59 }
60
VerifyChecksum()61 bool Verifier::VerifyChecksum()
62 {
63 if (file_ == nullptr) {
64 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
65 return false;
66 }
67 uint32_t file_size = file_->GetHeader()->file_size;
68 ASSERT(file_size > FILE_CONTENT_OFFSET);
69 uint32_t cal_checksum = adler32(1, file_->GetBase() + FILE_CONTENT_OFFSET, file_size - FILE_CONTENT_OFFSET);
70 return file_->GetHeader()->checksum == cal_checksum;
71 }
72
VerifyConstantPool()73 bool Verifier::VerifyConstantPool()
74 {
75 if (file_ == nullptr) {
76 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
77 return false;
78 }
79
80 if (!CheckConstantPoolIndex()) {
81 return false;
82 }
83
84 if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLCONTENT)) {
85 return false;
86 }
87
88 if (!VerifyLiteralArrays()) {
89 return false;
90 }
91
92 return true;
93 }
94
VerifyRegisterIndex()95 bool Verifier::VerifyRegisterIndex()
96 {
97 if (file_ == nullptr) {
98 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
99 return false;
100 }
101
102 for (const auto id : all_method_ids_) {
103 const panda_file::File::EntityId method_id = panda_file::File::EntityId(id);
104 panda_file::MethodDataAccessor method_accessor {*file_, method_id};
105 if (!method_accessor.GetCodeId().has_value()) {
106 continue;
107 }
108 panda_file::CodeDataAccessor code_data(*file_, method_accessor.GetCodeId().value());
109 const uint64_t reg_nums = code_data.GetNumVregs();
110 const uint64_t arg_nums = code_data.GetNumArgs();
111 const std::optional<uint64_t> valid_regs_num = SafeAdd(reg_nums, arg_nums);
112 if (!valid_regs_num.has_value()) {
113 LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
114 return false;
115 }
116 if (valid_regs_num.value() > MAX_REGISTER_INDEX + 1) {
117 LOG(ERROR, VERIFIER) << "Register index exceeds the maximum allowable value (0xffff)!";
118 return false;
119 }
120 auto bc_ins = BytecodeInstruction(code_data.GetInstructions());
121 const auto bc_ins_last = bc_ins.JumpTo(code_data.GetCodeSize());
122 ASSERT(arg_nums >= DEFAULT_ARGUMENT_NUMBER);
123 while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) {
124 const size_t count = GetVRegCount(bc_ins);
125 if (count == 0) { // Skip instructions that do not use registers
126 bc_ins = bc_ins.GetNext();
127 continue;
128 }
129 if (!CheckVRegIdx(bc_ins, count, valid_regs_num.value())) {
130 return false;
131 }
132 bc_ins = bc_ins.GetNext();
133 }
134 }
135 return true;
136 }
137
VerifyConstantPoolIndex()138 bool Verifier::VerifyConstantPoolIndex()
139 {
140 if (file_ == nullptr) {
141 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
142 return false;
143 }
144
145 if (!CheckConstantPoolIndex()) {
146 return false;
147 }
148
149 return true;
150 }
151
VerifyConstantPoolContent()152 bool Verifier::VerifyConstantPoolContent()
153 {
154 if (file_ == nullptr) {
155 LOG(ERROR, VERIFIER) << "Failed to verify empty abc file!";
156 return false;
157 }
158
159 if (!CheckConstantPool(verifier::ActionType::CHECKCONSTPOOLCONTENT)) {
160 return false;
161 }
162
163 if (!VerifyLiteralArrays()) {
164 return false;
165 }
166
167 return true;
168 }
169
GetConstantPoolIds()170 void Verifier::GetConstantPoolIds()
171 {
172 if (constant_pool_ids_.size() != 0) {
173 return;
174 }
175 auto index_headers = file_->GetIndexHeaders();
176 for (const auto &index_header : index_headers) {
177 auto region_indexs = file_->GetMethodIndex(&index_header);
178 for (auto &index : region_indexs) {
179 constant_pool_ids_.push_back(index.GetOffset());
180 }
181 }
182 }
183
GetLiteralIds()184 void Verifier::GetLiteralIds()
185 {
186 if (literal_ids_.size() != 0) {
187 return;
188 }
189
190 if (panda_file::ContainsLiteralArrayInHeader(file_->GetHeader()->version)) {
191 const auto literal_arrays = file_->GetLiteralArrays();
192 PushToLiteralIds(literal_arrays);
193 } else {
194 panda::libpandafile::CollectUtil collect_util;
195 std::unordered_set<uint32_t> literal_array_ids;
196 collect_util.CollectLiteralArray(*file_, literal_array_ids);
197 PushToLiteralIds(literal_array_ids);
198 }
199 }
200
201 template <typename T>
PushToLiteralIds(T &ids)202 void Verifier::PushToLiteralIds(T &ids)
203 {
204 for (const auto id : ids) {
205 literal_ids_.push_back(id);
206 }
207 }
208
CheckConstantPoolActions(const verifier::ActionType type, panda_file::File::EntityId method_id)209 bool Verifier::CheckConstantPoolActions(const verifier::ActionType type, panda_file::File::EntityId method_id)
210 {
211 switch (type) {
212 case verifier::ActionType::CHECKCONSTPOOLCONTENT: {
213 return CheckConstantPoolMethodContent(method_id);
214 }
215 case verifier::ActionType::COLLECTINFOS: {
216 all_method_ids_.push_back(method_id.GetOffset());
217 return CollectIdInInstructions(method_id);
218 }
219 default: {
220 return true;
221 }
222 }
223 }
224
CollectIdInInstructions(const panda_file::File::EntityId &method_id)225 bool Verifier::CollectIdInInstructions(const panda_file::File::EntityId &method_id)
226 {
227 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
228 ASSERT(method_accessor.GetCodeId().has_value());
229 panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value());
230 const auto ins_size = code_accessor.GetCodeSize();
231 const auto ins_arr = code_accessor.GetInstructions();
232
233 auto bc_ins = BytecodeInstruction(ins_arr);
234 const auto bc_ins_last = bc_ins.JumpTo(ins_size);
235
236 while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) {
237 if (!bc_ins.IsPrimaryOpcodeValid()) {
238 LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!";
239 return false;
240 }
241 if (bc_ins.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
242 // the idx of any instruction with a literal id is 0
243 // except defineclasswithbuffer/callruntime.definesendableclass
244 size_t idx = bc_ins.GetLiteralIndex();
245 const auto arg_literal_idx = bc_ins.GetId(idx).AsIndex();
246 const auto literal_id = file_->ResolveMethodIndex(method_id, arg_literal_idx);
247 ins_literal_ids_.insert(literal_id.GetOffset());
248 }
249 if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
250 const auto arg_method_idx = bc_ins.GetId().AsIndex();
251 const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
252 ins_method_ids_.insert(arg_method_id.GetOffset());
253 }
254 if (bc_ins.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
255 const auto arg_string_idx = bc_ins.GetId().AsIndex();
256 const auto string_id = file_->ResolveOffsetByIndex(method_id, arg_string_idx);
257 ins_string_ids_.insert(string_id.GetOffset());
258 }
259 bc_ins = bc_ins.GetNext();
260 }
261 return true;
262 }
263
CollectModuleLiteralId(const panda_file::File::EntityId &field_id)264 void Verifier::CollectModuleLiteralId(const panda_file::File::EntityId &field_id)
265 {
266 panda_file::FieldDataAccessor field_accessor(*file_, field_id);
267 const auto literal_id = field_accessor.GetValue<uint32_t>().value();
268 if (std::find(literal_ids_.begin(), literal_ids_.end(), literal_id) != literal_ids_.end()) {
269 module_literals_.insert(literal_id);
270 }
271 }
272
CheckConstantPool(const verifier::ActionType type)273 bool Verifier::CheckConstantPool(const verifier::ActionType type)
274 {
275 const auto class_idx = file_->GetClasses();
276 for (size_t i = 0; i < class_idx.size(); i++) {
277 uint32_t class_id = class_idx[i];
278 if (class_id > file_->GetHeader()->file_size) {
279 LOG(ERROR, VERIFIER) << "Binary file corrupted. out of bounds (0x" << std::hex
280 << file_->GetHeader()->file_size;
281 return false;
282 }
283 const panda_file::File::EntityId record_id {class_id};
284 if (!file_->IsExternal(record_id)) {
285 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
286 bool check_res = true;
287 class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
288 check_res = check_res && CheckConstantPoolActions(type, method_accessor.GetMethodId());
289 });
290 if (!check_res) {
291 return false;
292 }
293 if (type == verifier::ActionType::COLLECTINFOS) {
294 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
295 CollectModuleLiteralId(field_accessor.GetFieldId());
296 });
297 }
298 }
299 }
300
301 return true;
302 }
303
GetVRegCount(const BytecodeInstruction &bc_ins)304 size_t Verifier::GetVRegCount(const BytecodeInstruction &bc_ins)
305 {
306 size_t idx = 0; // Represents the idxTH register index in an instruction
307 BytecodeInstruction::Format format = bc_ins.GetFormat();
308 while (bc_ins.HasVReg(format, idx)) {
309 idx++;
310 }
311 return idx;
312 }
313
IsRangeInstAndHasInvalidRegIdx(const BytecodeInstruction &bc_ins, const size_t count, uint64_t valid_regs_num)314 bool Verifier::IsRangeInstAndHasInvalidRegIdx(const BytecodeInstruction &bc_ins,
315 const size_t count, uint64_t valid_regs_num)
316 {
317 ASSERT(bc_ins.IsRangeInstruction());
318
319 uint64_t reg_idx = bc_ins.GetVReg(FIRST_INDEX);
320 if (IsRegIdxOutOfBounds(reg_idx, valid_regs_num)) { // for [format: +AA/+AAAA vBB vCC], vBB can be verified here
321 return true;
322 }
323
324 std::optional<uint64_t> max_ins_reg_idx_opt = bc_ins.GetRangeInsLastRegIdx();
325 if (!max_ins_reg_idx_opt.has_value()) {
326 LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
327 return true;
328 }
329
330 reg_idx = max_ins_reg_idx_opt.value();
331 if (IsRegIdxOutOfBounds(reg_idx, valid_regs_num)) {
332 return true;
333 }
334
335 return false;
336 }
337
IsRegIdxOutOfBounds(uint64_t reg_idx, uint64_t valid_regs_num)338 bool Verifier::IsRegIdxOutOfBounds(uint64_t reg_idx, uint64_t valid_regs_num)
339 {
340 if (reg_idx >= valid_regs_num) {
341 LOG(ERROR, VERIFIER) << "Register index out of bounds: 0x" << std::hex
342 << reg_idx << ", Max allowed: 0x" << std::hex << valid_regs_num;
343 return true;
344 }
345 return false;
346 }
347
CheckVRegIdx(const BytecodeInstruction &bc_ins, const size_t count, uint64_t valid_regs_num)348 bool Verifier::CheckVRegIdx(const BytecodeInstruction &bc_ins, const size_t count, uint64_t valid_regs_num)
349 {
350 if (bc_ins.IsRangeInstruction() &&
351 IsRangeInstAndHasInvalidRegIdx(bc_ins, count, valid_regs_num)) {
352 return false;
353 }
354 for (size_t idx = 0; idx < count; idx++) { // Represents the idxTH register index in an instruction
355 uint16_t reg_idx = bc_ins.GetVReg(idx);
356 if (reg_idx >= valid_regs_num) {
357 LOG(ERROR, VERIFIER) << "Register index out of bounds: 0x" << std::hex
358 << reg_idx << ", Max allowed: 0x" << std::hex << valid_regs_num;
359 return false;
360 }
361 }
362 return true;
363 }
364
VerifyMethodId(const uint32_t &method_id) const365 bool Verifier::VerifyMethodId(const uint32_t &method_id) const
366 {
367 auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), method_id);
368 if (iter == constant_pool_ids_.end() ||
369 (std::find(literal_ids_.begin(), literal_ids_.end(), method_id) != literal_ids_.end()) ||
370 ins_string_ids_.count(method_id)) {
371 LOG(ERROR, VERIFIER) << "Fail to verify method id. method_id(0x" << std::hex << method_id << ")!";
372 return false;
373 }
374 return true;
375 }
376
VerifyLiteralId(const uint32_t &literal_id) const377 bool Verifier::VerifyLiteralId(const uint32_t &literal_id) const
378 {
379 auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), literal_id);
380 if (iter == constant_pool_ids_.end() ||
381 (std::find(all_method_ids_.begin(), all_method_ids_.end(), literal_id) != all_method_ids_.end()) ||
382 ins_string_ids_.count(literal_id)) {
383 LOG(ERROR, VERIFIER) << "Fail to verify literal id. literal_id(0x" << std::hex << literal_id << ")!";
384 return false;
385 }
386 return true;
387 }
388
VerifyStringId(const uint32_t &string_id) const389 bool Verifier::VerifyStringId(const uint32_t &string_id) const
390 {
391 auto iter = std::find(constant_pool_ids_.begin(), constant_pool_ids_.end(), string_id);
392 if (iter == constant_pool_ids_.end() ||
393 ins_method_ids_.count(string_id) ||
394 (std::find(literal_ids_.begin(), literal_ids_.end(), string_id) != literal_ids_.end())) {
395 LOG(ERROR, VERIFIER) << "Fail to verify string id. string_id(0x" << std::hex << string_id << ")!";
396 return false;
397 }
398 return true;
399 }
400
GetFirstImmFromInstruction(const BytecodeInstruction &bc_ins)401 std::optional<int64_t> Verifier::GetFirstImmFromInstruction(const BytecodeInstruction &bc_ins)
402 {
403 std::optional<int64_t> first_imm = std::optional<int64_t> {};
404 size_t index = 0;
405 const auto format = bc_ins.GetFormat();
406 if (bc_ins.HasImm(format, index)) {
407 first_imm = bc_ins.GetImm64(index);
408 }
409
410 return first_imm;
411 }
412
GetSlotNumberFromAnnotation(panda_file::MethodDataAccessor &method_accessor)413 std::optional<uint64_t> Verifier::GetSlotNumberFromAnnotation(panda_file::MethodDataAccessor &method_accessor)
414 {
415 std::optional<uint64_t> slot_number {};
416 method_accessor.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
417 panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
418 auto *annotation_name = reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data);
419 if (::strcmp("L_ESSlotNumberAnnotation;", annotation_name) == 0) {
420 uint32_t elem_count = ada.GetCount();
421 for (uint32_t i = 0; i < elem_count; i++) {
422 panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
423 auto *elem_name = reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data);
424 if (::strcmp("SlotNumber", elem_name) == 0) {
425 slot_number = adae.GetScalarValue().GetValue();
426 }
427 }
428 }
429 });
430 return slot_number;
431 }
432
VerifyMethodIdInLiteralArray(const uint32_t &id)433 bool Verifier::VerifyMethodIdInLiteralArray(const uint32_t &id)
434 {
435 const auto method_id = panda_file::File::EntityId(id).GetOffset();
436 auto iter = std::find(all_method_ids_.begin(), all_method_ids_.end(), method_id);
437 if (iter == all_method_ids_.end()) {
438 LOG(ERROR, VERIFIER) << "Invalid method id(0x" << id << ") in literal array";
439 return false;
440 }
441 return true;
442 }
443
VerifyStringIdInLiteralArray(const uint32_t &id)444 bool Verifier::VerifyStringIdInLiteralArray(const uint32_t &id)
445 {
446 auto string_data = file_->GetStringData(panda_file::File::EntityId(id));
447 if (string_data.data == nullptr) {
448 LOG(ERROR, VERIFIER) << "Invalid string_id. string_id(0x" << std::hex << id << ")!";
449 return false;
450 }
451 auto desc = std::string(utf::Mutf8AsCString(string_data.data));
452 std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
453 std::wstring utf16_desc = converter.from_bytes(desc);
454 if (string_data.utf16_length != utf16_desc.length()) {
455 LOG(ERROR, VERIFIER) << "Invalid string value(0x" << id << ") in literal array";
456 return false;
457 }
458 return true;
459 }
460
VerifyLiteralIdInLiteralArray(const uint32_t &id)461 bool Verifier::VerifyLiteralIdInLiteralArray(const uint32_t &id)
462 {
463 auto iter = std::find(literal_ids_.begin(), literal_ids_.end(), id);
464 if (iter == literal_ids_.end()) {
465 LOG(ERROR, VERIFIER) << "Invalid literal id(0x" << id << ") in literal array";
466 return false;
467 }
468 return true;
469 }
470
VerifySingleLiteralArray(const panda_file::File::EntityId &literal_id)471 bool Verifier::VerifySingleLiteralArray(const panda_file::File::EntityId &literal_id)
472 {
473 auto sp = file_->GetSpanFromId(literal_id);
474 const auto literal_vals_num = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
475 for (size_t i = 0; i < literal_vals_num; i += 2U) { // 2u skip literal item
476 const auto tag = static_cast<panda_file::LiteralTag>(panda_file::helpers::Read<panda_file::TAG_SIZE>(&sp));
477 switch (tag) {
478 case panda_file::LiteralTag::TAGVALUE:
479 case panda_file::LiteralTag::BOOL:
480 case panda_file::LiteralTag::ACCESSOR:
481 case panda_file::LiteralTag::NULLVALUE:
482 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
483 sp = sp.SubSpan(sizeof(uint8_t)); // run next sp
484 break;
485 }
486 case panda_file::LiteralTag::METHODAFFILIATE: {
487 sp = sp.SubSpan(sizeof(uint16_t));
488 break;
489 }
490 case panda_file::LiteralTag::INTEGER:
491 case panda_file::LiteralTag::FLOAT:
492 case panda_file::LiteralTag::GETTER:
493 case panda_file::LiteralTag::SETTER:
494 case panda_file::LiteralTag::GENERATORMETHOD:
495 case panda_file::LiteralTag::LITERALBUFFERINDEX:
496 case panda_file::LiteralTag::ASYNCGENERATORMETHOD: {
497 sp = sp.SubSpan(sizeof(uint32_t));
498 break;
499 }
500 case panda_file::LiteralTag::DOUBLE: {
501 const auto value = bit_cast<double>(panda_file::helpers::Read<sizeof(uint64_t)>(&sp));
502 // true: High 16-bit of double value >= 0xffff
503 if (IsImpureNaN(value)) {
504 LOG(ERROR, VERIFIER) << "Fail to verify double value " << value << " in literal array";
505 return false;
506 }
507 break;
508 }
509 case panda_file::LiteralTag::ARRAY_U1:
510 case panda_file::LiteralTag::ARRAY_U8:
511 case panda_file::LiteralTag::ARRAY_I8:
512 case panda_file::LiteralTag::ARRAY_U16:
513 case panda_file::LiteralTag::ARRAY_I16:
514 case panda_file::LiteralTag::ARRAY_U32:
515 case panda_file::LiteralTag::ARRAY_I32:
516 case panda_file::LiteralTag::ARRAY_U64:
517 case panda_file::LiteralTag::ARRAY_I64:
518 case panda_file::LiteralTag::ARRAY_F32:
519 case panda_file::LiteralTag::ARRAY_F64:
520 case panda_file::LiteralTag::ARRAY_STRING: {
521 i = literal_vals_num;
522 break;
523 }
524 case panda_file::LiteralTag::STRING: {
525 panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
526 break;
527 }
528 case panda_file::LiteralTag::METHOD: {
529 const auto value = static_cast<uint32_t>(panda_file::helpers::Read<sizeof(uint32_t)>(&sp));
530 inner_method_map_.emplace(literal_id.GetOffset(), value);
531 if (!VerifyMethodIdInLiteralArray(value)) {
532 return false;
533 }
534 break;
535 }
536 case panda_file::LiteralTag::LITERALARRAY: {
537 const auto value = static_cast<uint32_t>(panda_file::helpers::Read<sizeof(uint32_t)>(&sp));
538 inner_literal_map_.emplace(literal_id.GetOffset(), value);
539 if (!VerifyLiteralIdInLiteralArray(value)) {
540 return false;
541 }
542 break;
543 }
544 default: {
545 LOG(ERROR, VERIFIER) << "Invalid literal tag";
546 return false;
547 }
548 }
549 }
550 return true;
551 }
552
IsModuleLiteralId(const panda_file::File::EntityId &id) const553 bool Verifier::IsModuleLiteralId(const panda_file::File::EntityId &id) const
554 {
555 return module_literals_.find(id.GetOffset()) != module_literals_.end();
556 }
557
VerifyLiteralArrays()558 bool Verifier::VerifyLiteralArrays()
559 {
560 for (const auto &arg_literal_id : literal_ids_) {
561 const auto literal_id = panda_file::File::EntityId(arg_literal_id);
562 if (!IsModuleLiteralId(literal_id) && !VerifySingleLiteralArray(literal_id)) {
563 return false;
564 }
565 }
566 return true;
567 }
568
PrecomputeInstructionIndices(const BytecodeInstruction &bc_ins_start, const BytecodeInstruction &bc_ins_last)569 bool Verifier::PrecomputeInstructionIndices(const BytecodeInstruction &bc_ins_start,
570 const BytecodeInstruction &bc_ins_last)
571 {
572 instruction_index_map_.clear();
573 size_t index = 0;
574 auto current_ins = bc_ins_start;
575 instruction_index_map_[current_ins.GetAddress()] = index;
576
577 while (current_ins.GetAddress() < bc_ins_last.GetAddress()) {
578 //Must keep IsPrimaryOpcodeValid is the first check item
579 if (!current_ins.IsPrimaryOpcodeValid()) {
580 LOG(ERROR, VERIFIER) << "Fail to verify primary opcode!";
581 return false;
582 }
583 current_ins = current_ins.GetNext();
584 index++;
585 instruction_index_map_[current_ins.GetAddress()] = index;
586 }
587 return true;
588 }
589
IsMethodBytecodeInstruction(const BytecodeInstruction &bc_ins_cur)590 bool Verifier::IsMethodBytecodeInstruction(const BytecodeInstruction &bc_ins_cur)
591 {
592 if (instruction_index_map_.find(bc_ins_cur.GetAddress()) != instruction_index_map_.end()) {
593 return true;
594 }
595 return false;
596 }
597
VerifyJumpInstruction(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last, const BytecodeInstruction &bc_ins_first, const uint8_t *ins_arr, panda_file::File::EntityId code_id)598 bool Verifier::VerifyJumpInstruction(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
599 const BytecodeInstruction &bc_ins_first, const uint8_t *ins_arr,
600 panda_file::File::EntityId code_id)
601 {
602 // update maximum forward offset
603 const auto bc_ins_forward_size = bc_ins_last.GetAddress() - bc_ins.GetAddress();
604 // update maximum backward offset
605 const auto bc_ins_backward_size = bc_ins.GetAddress() - bc_ins_first.GetAddress();
606
607 if (bc_ins.IsJumpInstruction()) {
608 std::optional<int64_t> immdata = GetFirstImmFromInstruction(bc_ins);
609 if (!immdata.has_value()) {
610 LOG(ERROR, VERIFIER) << "Fail to get immediate data!";
611 return false;
612 }
613 if ((immdata.value() > 0) && (immdata.value() >= bc_ins_forward_size)) {
614 LOG(ERROR, VERIFIER) << "Jump forward out of boundary";
615 return false;
616 }
617 if ((immdata.value() < 0) && (bc_ins_backward_size + immdata.value() < 0)) {
618 LOG(ERROR, VERIFIER) << "Jump backward out of boundary";
619 return false;
620 }
621
622 const auto bc_ins_dest = bc_ins.JumpTo(immdata.value());
623 if (!bc_ins_dest.IsPrimaryOpcodeValid()) {
624 LOG(ERROR, VERIFIER) << "Fail to verify target jump primary opcode!";
625 return false;
626 }
627 if (!IsMethodBytecodeInstruction(bc_ins_dest)) {
628 LOG(ERROR, VERIFIER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
629 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
630 << ": invalid jump offset 0x" << immdata.value()
631 << " - jumping in the middle of another instruction!";
632 return false;
633 }
634 }
635
636 return true;
637 }
638
GetIcSlotFromInstruction(const BytecodeInstruction &bc_ins, uint32_t &first_slot_index, bool &has_slot, bool &is_two_slot)639 bool Verifier::GetIcSlotFromInstruction(const BytecodeInstruction &bc_ins, uint32_t &first_slot_index,
640 bool &has_slot, bool &is_two_slot)
641 {
642 std::optional<uint64_t> first_imm = {};
643 if (bc_ins.HasFlag(BytecodeInstruction::Flags::ONE_SLOT)) {
644 first_imm = GetFirstImmFromInstruction(bc_ins);
645 if (!first_imm.has_value()) {
646 LOG(ERROR, VERIFIER) << "Fail to get first immediate data!";
647 return false;
648 }
649 first_slot_index = first_imm.value();
650 is_two_slot = false;
651 has_slot = true;
652 } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::TWO_SLOT)) {
653 first_imm = GetFirstImmFromInstruction(bc_ins);
654 if (!first_imm.has_value()) {
655 LOG(ERROR, VERIFIER) << "Fail to get first immediate data!";
656 return false;
657 }
658 first_slot_index = first_imm.value();
659 has_slot = true;
660 is_two_slot = true;
661 }
662
663 return true;
664 }
665
VerifyCatchBlocks(panda_file::CodeDataAccessor::TryBlock &try_block, const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last)666 bool Verifier::VerifyCatchBlocks(panda_file::CodeDataAccessor::TryBlock &try_block, const BytecodeInstruction &bc_ins,
667 const BytecodeInstruction &bc_ins_last)
668 {
669 bool result = true;
670
671 try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
672 const auto handler_begin_offset = catch_block.GetHandlerPc();
673 // GetCodeSize() returns a unsigned long value, which is always >= 0,
674 // so handler_end_offset is guaranteed to be >= handler_begin_offset
675 const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
676
677 const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
678 const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
679
680 const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
681 const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
682
683 if (!handler_begin_offset_in_range) {
684 LOG(ERROR, VERIFIER) << "> Invalid catch block begin offset range! address is: 0x" << std::hex
685 << handler_begin_bc_ins.GetAddress();
686 result = false;
687 return false;
688 }
689 if (!IsMethodBytecodeInstruction(handler_begin_bc_ins)) {
690 LOG(ERROR, VERIFIER) << "> Invalid catch block begin offset validity! address is: 0x" << std::hex
691 << handler_begin_bc_ins.GetAddress();
692 result = false;
693 return false;
694 }
695 if (!handler_end_offset_in_range) {
696 LOG(ERROR, VERIFIER) << "> Invalid catch block end offset range! address is: 0x" << std::hex
697 << handler_end_bc_ins.GetAddress();
698 result = false;
699 return false;
700 }
701 if (!IsMethodBytecodeInstruction(handler_end_bc_ins)) {
702 LOG(ERROR, VERIFIER) << "> Invalid catch block end offset validity! address is: 0x" << std::hex
703 << handler_end_bc_ins.GetAddress();
704 result = false;
705 return false;
706 }
707
708 return true;
709 });
710
711 return result;
712 }
713
VerifyTryBlocks(panda_file::CodeDataAccessor &code_accessor, const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last)714 bool Verifier::VerifyTryBlocks(panda_file::CodeDataAccessor &code_accessor, const BytecodeInstruction &bc_ins,
715 const BytecodeInstruction &bc_ins_last)
716 {
717 bool result = true;
718
719 code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
720 const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
721 // GetLength() returns a uint32 value, which is always >= 0,
722 // so try_end_bc_ins is guaranteed to be >= try_begin_bc_ins
723 const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
724
725 const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
726 const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
727
728 if (!try_begin_offset_in_range) {
729 LOG(ERROR, VERIFIER) << "> Invalid try block begin offset range! address is: 0x" << std::hex
730 << try_begin_bc_ins.GetAddress();
731 result = false;
732 return false;
733 }
734 if (!IsMethodBytecodeInstruction(try_begin_bc_ins)) {
735 LOG(ERROR, VERIFIER) << "> Invalid try block begin offset validity! address is: 0x" << std::hex
736 << try_begin_bc_ins.GetAddress();
737 result = false;
738 return false;
739 }
740 if (!try_end_offset_in_range) {
741 LOG(ERROR, VERIFIER) << "> Invalid try block end offset range! address is: 0x" << std::hex
742 << try_end_bc_ins.GetAddress();
743 result = false;
744 return false;
745 }
746 if (!IsMethodBytecodeInstruction(try_end_bc_ins)) {
747 LOG(ERROR, VERIFIER) << "> Invalid try block end offset validity! address is: 0x" << std::hex
748 << try_end_bc_ins.GetAddress();
749 result = false;
750 return false;
751 }
752 if (!VerifyCatchBlocks(try_block, bc_ins, bc_ins_last)) {
753 LOG(ERROR, VERIFIER) << "Catch block validation failed!";
754 result = false;
755 return false;
756 }
757
758 return true;
759 });
760
761 return result;
762 }
763
764
VerifySlotNumber(panda_file::MethodDataAccessor &method_accessor, const uint32_t &slot_number, const panda_file::File::EntityId &method_id)765 bool Verifier::VerifySlotNumber(panda_file::MethodDataAccessor &method_accessor, const uint32_t &slot_number,
766 const panda_file::File::EntityId &method_id)
767 {
768 const auto ann_slot_number = GetSlotNumberFromAnnotation(method_accessor);
769 if (!ann_slot_number.has_value()) {
770 LOG(INFO, VERIFIER) << "There is no slot number information in annotaion.";
771 // To be compatible with old abc, slot number verification is not continued
772 return true;
773 }
774 if (slot_number == ann_slot_number.value()) {
775 return true;
776 }
777
778 LOG(ERROR, VERIFIER) << "Slot number has been falsified in method 0x" << method_id;
779 return false;
780 }
781
VerifyMethodRegisterIndex(panda_file::CodeDataAccessor &code_accessor, std::optional<uint64_t> &valid_regs_num)782 bool Verifier::VerifyMethodRegisterIndex(panda_file::CodeDataAccessor &code_accessor,
783 std::optional<uint64_t> &valid_regs_num)
784 {
785 const uint64_t reg_nums = code_accessor.GetNumVregs();
786 const uint64_t arg_nums = code_accessor.GetNumArgs();
787 valid_regs_num = SafeAdd(reg_nums, arg_nums);
788 if (!valid_regs_num.has_value()) {
789 LOG(ERROR, VERIFIER) << "Integer overflow detected during register index calculation!";
790 return false;
791 }
792 if (valid_regs_num.value() > MAX_REGISTER_INDEX + 1) {
793 LOG(ERROR, VERIFIER) << "Register index exceeds the maximum allowable value (0xffff)!";
794 return false;
795 }
796 return true;
797 }
798
VerifyMethodInstructions(const MethodInfos &infos)799 bool Verifier::VerifyMethodInstructions(const MethodInfos &infos)
800 {
801 auto current_ins = infos.bc_ins;
802 auto last_ins = infos.bc_ins_last;
803 auto code_id = infos.method_accessor.GetCodeId().value();
804 auto method_id = infos.method_id;
805 auto valid_regs_num = infos.valid_regs_num.value();
806 auto ins_slot_num = infos.ins_slot_num;
807 auto has_slot = infos.has_slot;
808 auto is_two_slot = infos.is_two_slot;
809
810 while (current_ins.GetAddress() != last_ins.GetAddress()) {
811 if (current_ins.GetAddress() > last_ins.GetAddress()) {
812 LOG(ERROR, VERIFIER) << "> error encountered at " << code_id
813 << " (0x" << std::hex << code_id
814 << "). bytecode instructions sequence corrupted for method "
815 << method_id
816 << "! went out of bounds";
817 return false;
818 }
819 if (!current_ins.IsJumpInstruction() && !current_ins.IsReturnOrThrowInstruction()
820 && current_ins.GetNext().GetAddress() == last_ins.GetAddress()) {
821 LOG(ERROR, VERIFIER) << "> error encountered at " << code_id
822 << " (0x" << std::hex << code_id
823 << "). bytecode instructions sequence corrupted for method "
824 << method_id
825 << "! went out of bounds";
826 return false;
827 }
828 const size_t count = GetVRegCount(current_ins);
829 if (count != 0 && !CheckVRegIdx(current_ins, count, valid_regs_num)) {
830 return false;
831 }
832 if (!VerifyJumpInstruction(current_ins, last_ins,
833 infos.bc_ins_init, infos.ins_arr,
834 code_id)) {
835 LOG(ERROR, VERIFIER) << "Invalid target position of jump instruction";
836 return false;
837 }
838 if (!GetIcSlotFromInstruction(current_ins, ins_slot_num,
839 has_slot, is_two_slot)) {
840 LOG(ERROR, VERIFIER) << "Fail to get first slot index!";
841 return false;
842 }
843 current_ins = current_ins.GetNext();
844 }
845 return true;
846 }
847
CheckConstantPoolMethodContent(const panda_file::File::EntityId &method_id)848 bool Verifier::CheckConstantPoolMethodContent(const panda_file::File::EntityId &method_id)
849 {
850 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
851 if (!method_accessor.GetCodeId().has_value()) {
852 LOG(ERROR, VERIFIER) << "Fail to get code id!";
853 return false;
854 }
855 panda_file::CodeDataAccessor code_accessor(*file_, method_accessor.GetCodeId().value());
856 const auto ins_size = code_accessor.GetCodeSize();
857 const auto ins_arr = code_accessor.GetInstructions();
858 auto bc_ins = BytecodeInstruction(ins_arr);
859 const auto bc_ins_last = bc_ins.JumpTo(ins_size);
860 const auto bc_ins_init = bc_ins; // initial PC value
861 uint32_t ins_slot_num = 0; // For ic slot index verification
862 bool has_slot = false;
863 bool is_two_slot = false;
864 std::optional<uint64_t> valid_regs_num = 0;
865 MethodInfos infos = {bc_ins_init, bc_ins, bc_ins_last, method_accessor, method_id,
866 valid_regs_num, ins_arr, ins_slot_num, has_slot, is_two_slot};
867 if (ins_size <= 0) {
868 LOG(ERROR, VERIFIER) << "Fail to verify code size!";
869 return false;
870 }
871 if (!VerifyMethodRegisterIndex(code_accessor, valid_regs_num)) {
872 LOG(ERROR, VERIFIER) << "Fail to verify method register index!";
873 return false;
874 }
875 if (!PrecomputeInstructionIndices(bc_ins, bc_ins_last)) {
876 LOG(ERROR, VERIFIER) << "Fail to precompute instruction indices!";
877 return false;
878 }
879 if (!IsMethodBytecodeInstruction(bc_ins)) {
880 LOG(ERROR, VERIFIER) << "Fail to verify method first bytecode instruction!";
881 }
882 if (!VerifyTryBlocks(code_accessor, bc_ins, bc_ins_last)) {
883 LOG(ERROR, VERIFIER) << "Fail to verify try blocks or catch blocks!";
884 return false;
885 }
886 if (!VerifyMethodInstructions(infos)) {
887 LOG(ERROR, VERIFIER) << "Fail to verify method instructions!";
888 return false;
889 }
890 if (has_slot) {
891 if (is_two_slot) {
892 ins_slot_num += 1; // when there are two slots for the last instruction, the slot index increases
893 }
894 ins_slot_num += 1; // slot index starts with zero
895 }
896 return true;
897 }
898
CheckConstantPoolIndex() const899 bool Verifier::CheckConstantPoolIndex() const
900 {
901 for (auto &id : ins_method_ids_) {
902 if (!VerifyMethodId(id)) {
903 return false;
904 }
905 }
906
907 for (auto &id : ins_literal_ids_) {
908 if (!VerifyLiteralId(id)) {
909 return false;
910 }
911 }
912
913 for (auto &id : ins_string_ids_) {
914 if (!VerifyStringId(id)) {
915 return false;
916 }
917 }
918
919 return true;
920 }
921
SafeAdd(uint64_t a, uint64_t b) const922 std::optional<uint64_t> Verifier::SafeAdd(uint64_t a, uint64_t b) const
923 {
924 if (a > std::numeric_limits<uint64_t>::max() - b) {
925 return std::nullopt;
926 }
927 return a + b;
928 }
929 } // namespace panda::verifier
930