1/* 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "disassembler.h" 17#include "libpandafile/util/collect_util.h" 18#include "mangling.h" 19#include "utils/logger.h" 20#include "utils/const_value.h" 21 22#include <iomanip> 23#include <type_traits> 24 25#include "get_language_specific_metadata.inc" 26 27namespace panda::disasm { 28 29void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings) 30{ 31 auto file_new = panda_file::File::Open(filename_in); 32 file_.swap(file_new); 33 34 if (file_ != nullptr) { 35 prog_ = pandasm::Program {}; 36 37 record_name_to_id_.clear(); 38 method_name_to_id_.clear(); 39 string_offset_to_name_.clear(); 40 skip_strings_ = skip_strings; 41 quiet_ = quiet; 42 43 prog_info_ = ProgInfo {}; 44 45 prog_ann_ = ProgAnnotations {}; 46 47 GetRecords(); 48 GetLiteralArrays(); 49 50 GetLanguageSpecificMetadata(); 51 } else { 52 LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">"; 53 } 54} 55 56void Disassembler::CollectInfo() 57{ 58 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n"; 59 60 debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get()); 61 62 for (const auto &pair : record_name_to_id_) { 63 GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]); 64 } 65 66 for (const auto &pair : method_name_to_id_) { 67 GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]); 68 } 69} 70 71void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const 72{ 73 if (os.bad()) { 74 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n"; 75 76 return; 77 } 78 79 if (file_ != nullptr) { 80 std::string abc_file = GetFileNameByPath(file_->GetFilename()); 81 os << "# source binary: " << abc_file << "\n\n"; 82 } 83 84 SerializeLanguage(os); 85 86 if (add_separators) { 87 os << "# ====================\n" 88 "# LITERALS\n\n"; 89 } 90 91 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]"; 92 93 for (const auto &[key, lit_arr] : prog_.literalarray_table) { 94 Serialize(key, lit_arr, os); 95 } 96 97 for (const auto &[module_offset, array_table] : modulearray_table_) { 98 Serialize(module_offset, array_table, os); 99 } 100 101 os << "\n"; 102 103 if (add_separators) { 104 os << "# ====================\n" 105 "# RECORDS\n\n"; 106 } 107 108 LOG(DEBUG, DISASSEMBLER) << "[serializing records]"; 109 110 for (const auto &r : prog_.record_table) { 111 Serialize(r.second, os, print_information); 112 } 113 114 if (add_separators) { 115 os << "# ====================\n" 116 "# METHODS\n\n"; 117 } 118 119 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]"; 120 121 for (const auto &m : prog_.function_table) { 122 Serialize(m.second, os, print_information); 123 } 124 125 if (add_separators) { 126 os << "# ====================\n" 127 "# STRING\n\n"; 128 } 129 130 LOG(DEBUG, DISASSEMBLER) << "[serializing strings]"; 131 132 for (const auto &[offset, name_value] : string_offset_to_name_) { 133 SerializeStrings(offset, name_value, os); 134 } 135} 136 137inline bool Disassembler::IsSystemType(const std::string &type_name) 138{ 139 bool is_array_type = type_name.find('[') != std::string::npos; 140 bool is_global = type_name == "_GLOBAL"; 141 142 return is_array_type || is_global; 143} 144 145void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id) 146{ 147 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")"; 148 149 if (record == nullptr) { 150 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!"; 151 152 return; 153 } 154 155 record->name = GetFullRecordName(record_id); 156 157 LOG(DEBUG, DISASSEMBLER) << "name: " << record->name; 158 159 GetMetaData(record, record_id); 160 161 if (!file_->IsExternal(record_id)) { 162 GetMethods(record_id); 163 GetFields(record, record_id); 164 } 165} 166 167void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id) 168{ 169 pandasm::Function new_method("", file_language_); 170 GetMethod(&new_method, method_id); 171 172 const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params); 173 if (prog_.function_table.find(signature) != prog_.function_table.end()) { 174 return; 175 } 176 177 GetMethodAnnotations(new_method, method_id); 178 method_name_to_id_.emplace(signature, method_id); 179 prog_.function_synonyms[new_method.name].push_back(signature); 180 prog_.function_table.emplace(signature, std::move(new_method)); 181} 182 183void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id) 184{ 185 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")"; 186 187 if (method == nullptr) { 188 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!"; 189 190 return; 191 } 192 193 panda_file::MethodDataAccessor method_accessor(*file_, method_id); 194 195 method->name = GetFullMethodName(method_id); 196 197 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name; 198 199 GetMetaData(method, method_id); 200 201 if (method_accessor.GetCodeId().has_value()) { 202 auto code_id = method_accessor.GetCodeId().value(); 203 GetParams(method, code_id); 204 const IdList id_list = GetInstructions(method, method_id, code_id); 205 206 for (const auto &id : id_list) { 207 AddMethodToTables(id); 208 } 209 } else { 210 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id 211 << "). implementation of method expected, but no \'CODE\' tag was found!"; 212 213 return; 214 } 215} 216 217template <typename T> 218void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag, 219 const panda_file::LiteralDataAccessor::LiteralValue &value) const 220{ 221 panda_file::File::EntityId id(std::get<uint32_t>(value)); 222 auto sp = file_->GetSpanFromId(id); 223 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp); 224 if (tag != panda_file::LiteralTag::ARRAY_STRING) { 225 for (size_t i = 0; i < len; i++) { 226 pandasm::LiteralArray::Literal lit; 227 lit.tag_ = tag; 228 lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp)); 229 lit_array->literals_.push_back(lit); 230 } 231 return; 232 } 233 for (size_t i = 0; i < len; i++) { 234 auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp); 235 pandasm::LiteralArray::Literal lit; 236 lit.tag_ = tag; 237 lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id))); 238 lit_array->literals_.push_back(lit); 239 } 240} 241 242void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array, 243 const panda_file::LiteralDataAccessor::LiteralValue &value, 244 const panda_file::LiteralTag &tag) const 245{ 246 pandasm::LiteralArray::Literal lit; 247 lit.tag_ = tag; 248 switch (tag) { 249 case panda_file::LiteralTag::BOOL: { 250 lit.value_ = std::get<bool>(value); 251 break; 252 } 253 case panda_file::LiteralTag::ACCESSOR: 254 case panda_file::LiteralTag::NULLVALUE: 255 case panda_file::LiteralTag::BUILTINTYPEINDEX: { 256 lit.value_ = std::get<uint8_t>(value); 257 break; 258 } 259 case panda_file::LiteralTag::METHODAFFILIATE: { 260 lit.value_ = std::get<uint16_t>(value); 261 break; 262 } 263 case panda_file::LiteralTag::LITERALBUFFERINDEX: 264 case panda_file::LiteralTag::INTEGER: { 265 lit.value_ = std::get<uint32_t>(value); 266 break; 267 } 268 case panda_file::LiteralTag::DOUBLE: { 269 lit.value_ = std::get<double>(value); 270 break; 271 } 272 case panda_file::LiteralTag::STRING: { 273 auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value))); 274 lit.value_ = StringDataToString(str_data); 275 break; 276 } 277 case panda_file::LiteralTag::METHOD: 278 case panda_file::LiteralTag::GETTER: 279 case panda_file::LiteralTag::SETTER: 280 case panda_file::LiteralTag::GENERATORMETHOD: { 281 panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value))); 282 lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId())); 283 break; 284 } 285 case panda_file::LiteralTag::LITERALARRAY: { 286 std::stringstream ss; 287 ss << "0x" << std::hex << std::get<uint32_t>(value); 288 lit.value_ = ss.str(); 289 break; 290 } 291 case panda_file::LiteralTag::TAGVALUE: { 292 return; 293 } 294 default: { 295 UNREACHABLE(); 296 } 297 } 298 lit_array->literals_.push_back(lit); 299} 300 301void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const 302{ 303 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId()); 304 lit_array_accessor.EnumerateLiteralVals( 305 offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value, 306 const panda_file::LiteralTag &tag) { 307 switch (tag) { 308 case panda_file::LiteralTag::ARRAY_U1: { 309 FillLiteralArrayData<bool>(lit_array, tag, value); 310 break; 311 } 312 case panda_file::LiteralTag::ARRAY_I8: 313 case panda_file::LiteralTag::ARRAY_U8: { 314 FillLiteralArrayData<uint8_t>(lit_array, tag, value); 315 break; 316 } 317 case panda_file::LiteralTag::ARRAY_I16: 318 case panda_file::LiteralTag::ARRAY_U16: { 319 FillLiteralArrayData<uint16_t>(lit_array, tag, value); 320 break; 321 } 322 case panda_file::LiteralTag::ARRAY_I32: 323 case panda_file::LiteralTag::ARRAY_U32: { 324 FillLiteralArrayData<uint32_t>(lit_array, tag, value); 325 break; 326 } 327 case panda_file::LiteralTag::ARRAY_I64: 328 case panda_file::LiteralTag::ARRAY_U64: { 329 FillLiteralArrayData<uint64_t>(lit_array, tag, value); 330 break; 331 } 332 case panda_file::LiteralTag::ARRAY_F32: { 333 FillLiteralArrayData<float>(lit_array, tag, value); 334 break; 335 } 336 case panda_file::LiteralTag::ARRAY_F64: { 337 FillLiteralArrayData<double>(lit_array, tag, value); 338 break; 339 } 340 case panda_file::LiteralTag::ARRAY_STRING: { 341 FillLiteralArrayData<uint32_t>(lit_array, tag, value); 342 break; 343 } 344 default: { 345 FillLiteralData(lit_array, value, tag); 346 break; 347 } 348 } 349 }); 350} 351 352void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const 353{ 354 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId()); 355 GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index)); 356} 357 358bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const 359{ 360 return module_literals_.find(id.GetOffset()) != module_literals_.end(); 361} 362 363void Disassembler::GetLiteralArrays() 364{ 365 if (panda_file::ContainsLiteralArrayInHeader(file_->GetHeader()->version)) { 366 const auto lit_arrays_id = file_->GetLiteralArraysId(); 367 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex 368 << lit_arrays_id << ")"; 369 370 panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id); 371 size_t num_litarrays = lda.GetLiteralNum(); 372 for (size_t index = 0; index < num_litarrays; index++) { 373 auto id = lda.GetLiteralArrayId(index); 374 if (module_request_phase_literals_.count(id.GetOffset())) { 375 continue; 376 } 377 FillLiteralArrayTable(id, index); 378 } 379 } else { 380 panda::libpandafile::CollectUtil collect_util; 381 std::unordered_set<uint32_t> literal_array_ids; 382 collect_util.CollectLiteralArray(*file_, literal_array_ids); 383 size_t index = 0; 384 for (uint32_t literal_array_id : literal_array_ids) { 385 panda_file::File::EntityId id {literal_array_id}; 386 FillLiteralArrayTable(id, index); 387 index++; 388 } 389 } 390} 391 392void Disassembler::FillLiteralArrayTable(panda_file::File::EntityId &id, size_t index) 393{ 394 if (IsModuleLiteralOffset(id)) { 395 std::stringstream ss; 396 ss << index << " 0x" << std::hex << id.GetOffset(); 397 modulearray_table_.emplace(ss.str(), GetModuleLiteralArray(id)); 398 return; 399 } 400 std::stringstream ss; 401 ss << index << " 0x" << std::hex << id.GetOffset(); 402 panda::pandasm::LiteralArray lit_arr; 403 GetLiteralArrayByOffset(&lit_arr, id); 404 prog_.literalarray_table.emplace(ss.str(), lit_arr); 405} 406 407std::string Disassembler::ModuleTagToString(panda_file::ModuleTag &tag) const 408{ 409 switch (tag) { 410 case panda_file::ModuleTag::REGULAR_IMPORT: 411 return "REGULAR_IMPORT"; 412 case panda_file::ModuleTag::NAMESPACE_IMPORT: 413 return "NAMESPACE_IMPORT"; 414 case panda_file::ModuleTag::LOCAL_EXPORT: 415 return "LOCAL_EXPORT"; 416 case panda_file::ModuleTag::INDIRECT_EXPORT: 417 return "INDIRECT_EXPORT"; 418 case panda_file::ModuleTag::STAR_EXPORT: 419 return "STAR_EXPORT"; 420 default: { 421 UNREACHABLE(); 422 break; 423 } 424 } 425 return ""; 426} 427 428std::vector<std::string> Disassembler::GetModuleLiteralArray(panda_file::File::EntityId &module_id) const 429{ 430 panda_file::ModuleDataAccessor mda(*file_, module_id); 431 const std::vector<uint32_t> &request_modules_offset = mda.getRequestModules(); 432 std::vector<std::string> module_literal_array; 433 std::stringstream module_requests_stringstream; 434 module_requests_stringstream << "\tMODULE_REQUEST_ARRAY: {\n"; 435 for (size_t index = 0; index < request_modules_offset.size(); ++index) { 436 module_requests_stringstream << "\t\t" << index << 437 " : " << GetStringByOffset(request_modules_offset[index]) << ",\n"; 438 } 439 module_requests_stringstream << "\t}"; 440 module_literal_array.push_back(module_requests_stringstream.str()); 441 mda.EnumerateModuleRecord([&](panda_file::ModuleTag tag, uint32_t export_name_offset, 442 uint32_t request_module_idx, uint32_t import_name_offset, 443 uint32_t local_name_offset) { 444 std::stringstream ss; 445 ss << "\tModuleTag: " << ModuleTagToString(tag); 446 if (tag == panda_file::ModuleTag::REGULAR_IMPORT || 447 tag == panda_file::ModuleTag::NAMESPACE_IMPORT || tag == panda_file::ModuleTag::LOCAL_EXPORT) { 448 if (!IsValidOffset(local_name_offset)) { 449 LOG(FATAL, DISASSEMBLER) << "Get invalid local name offset!" << std::endl; 450 } 451 ss << ", local_name: " << GetStringByOffset(local_name_offset); 452 } 453 if (tag == panda_file::ModuleTag::LOCAL_EXPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) { 454 if (!IsValidOffset(export_name_offset)) { 455 LOG(FATAL, DISASSEMBLER) << "Get invalid export name offset!" << std::endl; 456 } 457 ss << ", export_name: " << GetStringByOffset(export_name_offset); 458 } 459 if (tag == panda_file::ModuleTag::REGULAR_IMPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) { 460 if (!IsValidOffset(import_name_offset)) { 461 LOG(FATAL, DISASSEMBLER) << "Get invalid import name offset!" << std::endl; 462 } 463 ss << ", import_name: " << GetStringByOffset(import_name_offset); 464 } 465 auto request_module_offset = request_modules_offset[request_module_idx]; 466 if (tag != panda_file::ModuleTag::LOCAL_EXPORT) { 467 if (request_module_idx >= request_modules_offset.size() || !IsValidOffset(request_module_offset)) { 468 LOG(FATAL, DISASSEMBLER) << "Get invalid request module offset!" << std::endl; 469 } 470 ss << ", module_request: " << GetStringByOffset(request_module_offset); 471 } 472 module_literal_array.push_back(ss.str()); 473 }); 474 475 return module_literal_array; 476} 477 478void Disassembler::GetRecords() 479{ 480 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n"; 481 482 const auto class_idx = file_->GetClasses(); 483 484 for (size_t i = 0; i < class_idx.size(); i++) { 485 uint32_t class_id = class_idx[i]; 486 auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i; 487 488 if (class_id > file_->GetHeader()->file_size) { 489 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex 490 << class_off << "). binary file corrupted. record offset (0x" << class_id 491 << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!"; 492 break; 493 } 494 495 const panda_file::File::EntityId record_id {class_id}; 496 auto language = GetRecordLanguage(record_id); 497 if (language != file_language_) { 498 if (file_language_ == panda_file::SourceLang::PANDA_ASSEMBLY) { 499 file_language_ = language; 500 } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) { 501 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << class_off << " (0x" 502 << std::hex << class_off << "). record's language (" 503 << panda_file::LanguageToString(language) 504 << ") differs from file's language (" 505 << panda_file::LanguageToString(file_language_) << ")!"; 506 } 507 } 508 509 pandasm::Record record("", file_language_); 510 GetRecord(&record, record_id); 511 512 if (prog_.record_table.find(record.name) == prog_.record_table.end()) { 513 record_name_to_id_.emplace(record.name, record_id); 514 prog_.record_table.emplace(record.name, std::move(record)); 515 } 516 } 517} 518 519void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id) 520{ 521 panda_file::ClassDataAccessor class_accessor {*file_, record_id}; 522 523 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void { 524 pandasm::Field field(file_language_); 525 526 panda_file::File::EntityId field_name_id = field_accessor.GetNameId(); 527 field.name = StringDataToString(file_->GetStringData(field_name_id)); 528 529 uint32_t field_type = field_accessor.GetType(); 530 field.type = FieldTypeToPandasmType(field_type); 531 532 GetMetaData(&field, field_accessor.GetFieldId(), record->name == ark::SCOPE_NAME_RECORD); 533 534 record->field_list.push_back(std::move(field)); 535 }); 536} 537 538void Disassembler::GetMethods(const panda_file::File::EntityId &record_id) 539{ 540 panda_file::ClassDataAccessor class_accessor {*file_, record_id}; 541 542 class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void { 543 AddMethodToTables(method_accessor.GetMethodId()); 544 }); 545} 546 547void Disassembler::GetAnnotationElements(pandasm::Function &method, const panda_file::AnnotationDataAccessor &ada, 548 const std::string &annotation_name) 549{ 550 uint32_t elem_count = ada.GetCount(); 551 for (uint32_t i = 0; i < elem_count; i++) { 552 panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i); 553 const auto &elem_name = 554 std::string {reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data)}; 555 panda_file::AnnotationDataAccessor::Tag tag = ada.GetTag(i); 556 auto value_type = pandasm::Value::GetCharAsType(tag.GetItem()); 557 switch (value_type) { 558 case pandasm::Value::Type::U1: { 559 bool ann_elem_value = adae.GetScalarValue().Get<bool>(); 560 AddAnnotationElement<bool>(method, annotation_name, elem_name, ann_elem_value); 561 break; 562 } 563 case pandasm::Value::Type::U32: { 564 uint32_t ann_elem_value = adae.GetScalarValue().Get<uint32_t>(); 565 AddAnnotationElement<uint32_t>(method, annotation_name, elem_name, ann_elem_value); 566 break; 567 } 568 case pandasm::Value::Type::F64: { 569 double ann_elem_value = adae.GetScalarValue().Get<double>(); 570 AddAnnotationElement<double>(method, annotation_name, elem_name, ann_elem_value); 571 break; 572 } 573 case pandasm::Value::Type::STRING: { 574 uint32_t string_id = adae.GetScalarValue().Get<uint32_t>(); 575 std::string_view ann_elem_value { 576 reinterpret_cast<const char *>(file_->GetStringData(panda_file::File::EntityId(string_id)).data)}; 577 AddAnnotationElement<std::string_view>(method, annotation_name, elem_name, ann_elem_value); 578 break; 579 } 580 case pandasm::Value::Type::LITERALARRAY: { 581 uint32_t literalArray_offset = adae.GetScalarValue().Get<uint32_t>(); 582 AddAnnotationElement<panda::pandasm::LiteralArray, std::string_view>( 583 method, annotation_name, elem_name, std::string_view {std::to_string(literalArray_offset)}); 584 break; 585 } 586 default: 587 UNREACHABLE(); 588 } 589 } 590} 591 592void Disassembler::GetMethodAnnotations(pandasm::Function &method, const panda_file::File::EntityId &method_id) 593{ 594 panda_file::MethodDataAccessor mda(*file_, method_id); 595 mda.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) { 596 panda_file::AnnotationDataAccessor ada(*file_, annotation_id); 597 auto annotation_name = 598 std::string {reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data)}; 599 annotation_name.pop_back(); // remove ; from annotation name 600 601 if (annotation_name.empty()) { 602 return; 603 } 604 605 std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations(); 606 std::vector<pandasm::AnnotationElement> elements; 607 pandasm::AnnotationData ann_data(annotation_name, elements); 608 std::vector<pandasm::AnnotationData> annotations; 609 annotations.push_back(std::move(ann_data)); 610 method.metadata->AddAnnotations(annotations); 611 612 GetAnnotationElements(method, ada, annotation_name); 613 }); 614} 615 616template <typename T, typename U = T> 617void Disassembler::AddAnnotationElement(pandasm::Function &method, const std::string &annotation_name, 618 const std::string &key, const U &value) 619{ 620 if (key.empty()) { 621 return; 622 } 623 624 std::unique_ptr<pandasm::Value> pandasmValue; 625 if constexpr (std::is_same<T, uint32_t>::value) { 626 pandasmValue = std::move( 627 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(value))); 628 } else if constexpr (std::is_same<T, double>::value) { 629 pandasmValue = std::move( 630 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(value))); 631 } else if constexpr (std::is_same<T, bool>::value) { 632 pandasmValue = std::move( 633 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(value))); 634 } else if constexpr (std::is_same<T, std::string_view>::value) { 635 pandasmValue = std::move( 636 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(value))); 637 } else if constexpr (std::is_same<T, panda::pandasm::LiteralArray>::value) { 638 static_assert(std::is_same<U, std::string_view>::value); 639 pandasmValue = std::move(std::make_unique<pandasm::ScalarValue>( 640 pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(value))); 641 } else { 642 UNREACHABLE(); 643 } 644 645 std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations(); 646 const auto ann_iter = 647 std::find_if(method_annotation.begin(), method_annotation.end(), 648 [&](pandasm::AnnotationData &ann) -> bool { return ann.GetName() == annotation_name; }); 649 650 pandasm::AnnotationElement annotation_element(key, std::move(pandasmValue)); 651 ann_iter->AddElement(std::move(annotation_element)); 652 method.metadata->SetAnnotations(std::move(method_annotation)); 653} 654 655std::optional<std::vector<std::string>> Disassembler::GetAnnotationByMethodName(const std::string &method_name) const 656{ 657 const auto method_synonyms_iter = prog_.function_synonyms.find(method_name); 658 bool is_signature = method_synonyms_iter != prog_.function_synonyms.end(); 659 if (!is_signature) { 660 return std::nullopt; 661 } 662 663 const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back()); 664 bool is_method = method_iter != prog_.function_table.end(); 665 const auto annotations = method_iter->second.metadata->GetAnnotations(); 666 if (!is_method || annotations.empty()) { 667 return std::nullopt; 668 } 669 670 std::vector<std::string> ann; 671 for (const auto &ann_data : annotations) { 672 ann.emplace_back(ann_data.GetName()); 673 } 674 return ann; 675} 676 677std::optional<std::string> Disassembler::GetSerializedMethodAnnotation(const std::string &method_name, 678 const std::string &anno_name) const 679{ 680 const auto method_synonyms_iter = prog_.function_synonyms.find(method_name); 681 if (method_synonyms_iter == prog_.function_synonyms.end()) { 682 return std::nullopt; 683 } 684 685 const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back()); 686 if (method_iter == prog_.function_table.end()) { 687 return std::nullopt; 688 } 689 690 const auto annotations = method_iter->second.metadata->GetAnnotations(); 691 if (annotations.empty()) { 692 return std::nullopt; 693 } 694 695 const auto annotation_iter = 696 std::find_if(annotations.begin(), annotations.end(), 697 [&](const pandasm::AnnotationData &ann) -> bool { return ann.GetName() == anno_name; }); 698 if (annotation_iter == annotations.end()) { 699 return std::nullopt; 700 } 701 702 std::ostringstream os; 703 SerializeMethodAnnotation(*annotation_iter, os); 704 return os.str(); 705} 706 707std::optional<std::string> Disassembler::GetSerializedRecord(const std::string &record_name) const 708{ 709 const auto record_iter = prog_.record_table.find(record_name); 710 if (record_iter == prog_.record_table.end()) { 711 return std::nullopt; 712 } 713 std::ostringstream os; 714 Serialize(record_iter->second, os, false); 715 return os.str(); 716} 717 718std::vector<std::string> Disassembler::GetStrings() const 719{ 720 std::vector<std::string> strings; 721 for (auto &str_info : string_offset_to_name_) { 722 strings.emplace_back(str_info.second); 723 } 724 725 return strings; 726} 727 728std::vector<std::string> Disassembler::GetModuleLiterals() const 729{ 730 std::vector<std::string> module_literals; 731 for (auto &module_array : modulearray_table_) { 732 for (auto &module : module_array.second) { 733 module_literals.emplace_back(module); 734 } 735 } 736 737 return module_literals; 738} 739 740void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &code_id) const 741{ 742 /** 743 * frame size - 2^16 - 1 744 */ 745 static const uint32_t MAX_ARG_NUM = 0xFFFF; 746 747 LOG(DEBUG, DISASSEMBLER) << "[getting params number]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")"; 748 749 if (method == nullptr) { 750 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!"; 751 752 return; 753 } 754 755 panda_file::CodeDataAccessor code_accessor(*file_, code_id); 756 757 auto params_num = code_accessor.GetNumArgs(); 758 if (params_num > MAX_ARG_NUM) { 759 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id 760 << "). number of function's arguments (" << std::dec << params_num 761 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !"; 762 763 return; 764 } 765 766 method->return_type = pandasm::Type("any", 0); 767 768 for (uint8_t i = 0; i < params_num; i++) { 769 method->params.push_back(pandasm::Function::Parameter(pandasm::Type("any", 0), file_language_)); 770 } 771} 772 773LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id, 774 panda_file::File::EntityId code_id) const 775{ 776 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")"; 777 778 if (method == nullptr) { 779 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n"; 780 return LabelTable {}; 781 } 782 783 panda_file::CodeDataAccessor code_accessor(*file_, code_id); 784 785 const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions()); 786 const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize()); 787 788 size_t try_idx = 0; 789 LabelTable label_table {}; 790 code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) { 791 pandasm::Function::CatchBlock catch_block_pa {}; 792 if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) { 793 return false; 794 } 795 size_t catch_idx = 0; 796 try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) { 797 auto class_idx = catch_block.GetTypeIdx(); 798 if (class_idx == panda_file::INVALID_INDEX) { 799 catch_block_pa.exception_record = ""; 800 } else { 801 const auto class_id = file_->ResolveClassIndex(method_id, class_idx); 802 catch_block_pa.exception_record = GetFullRecordName(class_id); 803 } 804 if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx, 805 catch_idx)) { 806 return false; 807 } 808 809 method->catch_blocks.push_back(catch_block_pa); 810 catch_block_pa.catch_begin_label = ""; 811 catch_block_pa.catch_end_label = ""; 812 catch_idx++; 813 814 return true; 815 }); 816 try_idx++; 817 818 return true; 819 }); 820 821 return label_table; 822} 823 824static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur) 825{ 826 size_t count = 0; 827 828 while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) { 829 count++; 830 bc_ins_first = bc_ins_first.GetNext(); 831 if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) { 832 return std::numeric_limits<size_t>::max(); 833 } 834 } 835 836 return count; 837} 838 839bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last, 840 const panda_file::CodeDataAccessor::TryBlock &try_block, 841 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table, 842 size_t try_idx) const 843{ 844 const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc()); 845 const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength()); 846 847 const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins); 848 const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins); 849 850 const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress(); 851 const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress(); 852 const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max(); 853 const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max(); 854 855 if (!try_begin_offset_in_range || !try_begin_offset_valid) { 856 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex 857 << try_begin_bc_ins.GetAddress(); 858 return false; 859 } else { 860 std::stringstream ss {}; 861 ss << "try_begin_label_" << try_idx; 862 863 LabelTable::iterator it = label_table->find(try_begin_idx); 864 if (it == label_table->end()) { 865 catch_block_pa->try_begin_label = ss.str(); 866 label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str())); 867 } else { 868 catch_block_pa->try_begin_label = it->second; 869 } 870 } 871 872 if (!try_end_offset_in_range || !try_end_offset_valid) { 873 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex 874 << try_end_bc_ins.GetAddress(); 875 return false; 876 } else { 877 std::stringstream ss {}; 878 ss << "try_end_label_" << try_idx; 879 880 LabelTable::iterator it = label_table->find(try_end_idx); 881 if (it == label_table->end()) { 882 catch_block_pa->try_end_label = ss.str(); 883 label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str())); 884 } else { 885 catch_block_pa->try_end_label = it->second; 886 } 887 } 888 889 return true; 890} 891 892bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last, 893 const panda_file::CodeDataAccessor::CatchBlock &catch_block, 894 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table, 895 size_t try_idx, size_t catch_idx) const 896{ 897 const auto handler_begin_offset = catch_block.GetHandlerPc(); 898 const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize(); 899 900 const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset); 901 const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset); 902 903 const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins); 904 const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins); 905 906 const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress(); 907 const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress(); 908 const bool handler_end_present = catch_block.GetCodeSize() != 0; 909 const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max(); 910 const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max(); 911 912 if (!handler_begin_offset_in_range || !handler_begin_offset_valid) { 913 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex 914 << handler_begin_bc_ins.GetAddress(); 915 return false; 916 } else { 917 std::stringstream ss {}; 918 ss << "handler_begin_label_" << try_idx << "_" << catch_idx; 919 920 LabelTable::iterator it = label_table->find(handler_begin_idx); 921 if (it == label_table->end()) { 922 catch_block_pa->catch_begin_label = ss.str(); 923 label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str())); 924 } else { 925 catch_block_pa->catch_begin_label = it->second; 926 } 927 } 928 929 if (!handler_end_offset_in_range || !handler_end_offset_valid) { 930 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex 931 << handler_end_bc_ins.GetAddress(); 932 return false; 933 } else if (handler_end_present) { 934 std::stringstream ss {}; 935 ss << "handler_end_label_" << try_idx << "_" << catch_idx; 936 937 LabelTable::iterator it = label_table->find(handler_end_idx); 938 if (it == label_table->end()) { 939 catch_block_pa->catch_end_label = ss.str(); 940 label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str())); 941 } else { 942 catch_block_pa->catch_end_label = it->second; 943 } 944 } 945 946 return true; 947} 948 949void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const 950{ 951 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id 952 << ")"; 953 954 if (method == nullptr) { 955 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!"; 956 957 return; 958 } 959 960 panda_file::MethodDataAccessor method_accessor(*file_, method_id); 961 962 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId())); 963 964 if (!method_accessor.IsStatic()) { 965 const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId())); 966 auto this_type = pandasm::Type::FromDescriptor(class_name); 967 968 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw 969 << "\') is not static. emplacing self-argument of type " << this_type.GetName(); 970 971 method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, file_language_)); 972 } else { 973 method->metadata->SetAttribute("static"); 974 } 975 976 if (file_->IsExternal(method_accessor.GetMethodId())) { 977 method->metadata->SetAttribute("external"); 978 } 979 980 std::string ctor_name = panda::panda_file::GetCtorName(file_language_); 981 std::string cctor_name = panda::panda_file::GetCctorName(file_language_); 982 983 const bool is_ctor = (method_name_raw == ctor_name); 984 const bool is_cctor = (method_name_raw == cctor_name); 985 986 if (is_ctor) { 987 method->metadata->SetAttribute("ctor"); 988 method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_"); 989 } else if (is_cctor) { 990 method->metadata->SetAttribute("cctor"); 991 method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_"); 992 } 993} 994 995void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const 996{ 997 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id 998 << ")"; 999 1000 if (record == nullptr) { 1001 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!"; 1002 1003 return; 1004 } 1005 1006 if (file_->IsExternal(record_id)) { 1007 record->metadata->SetAttribute("external"); 1008 } 1009} 1010 1011void Disassembler::GetMetadataFieldValue(panda_file::FieldDataAccessor &field_accessor, pandasm::Field *field, 1012 bool isScopeNamesRecord) 1013{ 1014 if (field->type.GetId() == panda_file::Type::TypeId::U32) { 1015 const auto offset = field_accessor.GetValue<uint32_t>().value(); 1016 bool isScopeNameField = isScopeNamesRecord || field->name == ark::SCOPE_NAMES; 1017 if (field->name == ark::MODULE_REQUEST_PAHSE_IDX) { 1018 module_request_phase_literals_.insert(offset); 1019 } else if (field->name != ark::TYPE_SUMMARY_FIELD_NAME && !isScopeNameField) { 1020 LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset 1021 << " is excluded"; 1022 module_literals_.insert(offset); 1023 } 1024 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset)); 1025 } else if (field->type.GetId() == panda_file::Type::TypeId::U8) { 1026 const uint8_t val = field_accessor.GetValue<uint8_t>().value(); 1027 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val)); 1028 } else if (field->type.GetId() == panda_file::Type::TypeId::F64) { 1029 std::optional<double> val = field_accessor.GetValue<double>(); 1030 if (val.has_value()) { 1031 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(val.value())); 1032 } 1033 } else if (field->type.GetId() == panda_file::Type::TypeId::U1) { 1034 std::optional<bool> val = field_accessor.GetValue<bool>(); 1035 if (val.has_value()) { 1036 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(val.value())); 1037 } 1038 } else if (field->type.GetId() == panda_file::Type::TypeId::REFERENCE && field->type.GetName() == "panda.String") { 1039 std::optional<uint32_t> string_offset_val = field_accessor.GetValue<uint32_t>(); 1040 if (string_offset_val.has_value()) { 1041 std::string_view val {reinterpret_cast<const char *>( 1042 file_->GetStringData(panda_file::File::EntityId(string_offset_val.value())).data)}; 1043 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(val)); 1044 } 1045 } else if (field->type.GetRank() > 0) { 1046 std::optional<uint32_t> litarray_offset_val = field_accessor.GetValue<uint32_t>(); 1047 if (litarray_offset_val.has_value()) { 1048 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>( 1049 std::string_view {std::to_string(litarray_offset_val.value())})); 1050 } 1051 } else { 1052 UNREACHABLE(); 1053 } 1054} 1055 1056void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id, 1057 bool is_scope_names_record) 1058{ 1059 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")"; 1060 1061 if (field == nullptr) { 1062 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!"; 1063 1064 return; 1065 } 1066 1067 panda_file::FieldDataAccessor field_accessor(*file_, field_id); 1068 1069 if (field_accessor.IsExternal()) { 1070 field->metadata->SetAttribute("external"); 1071 } 1072 1073 if (field_accessor.IsStatic()) { 1074 field->metadata->SetAttribute("static"); 1075 } 1076 1077 GetMetadataFieldValue(field_accessor, field, is_scope_names_record); 1078} 1079 1080std::string Disassembler::AnnotationTagToString(const char tag) const 1081{ 1082 switch (tag) { 1083 case '1': 1084 return "u1"; 1085 case '2': 1086 return "i8"; 1087 case '3': 1088 return "u8"; 1089 case '4': 1090 return "i16"; 1091 case '5': 1092 return "u16"; 1093 case '6': 1094 return "i32"; 1095 case '7': 1096 return "u32"; 1097 case '8': 1098 return "i64"; 1099 case '9': 1100 return "u64"; 1101 case 'A': 1102 return "f32"; 1103 case 'B': 1104 return "f64"; 1105 case 'C': 1106 return "string"; 1107 case 'D': 1108 return "record"; 1109 case 'E': 1110 return "method"; 1111 case 'F': 1112 return "enum"; 1113 case 'G': 1114 return "annotation"; 1115 case 'I': 1116 return "void"; 1117 case 'J': 1118 return "method_handle"; 1119 case 'K': 1120 return "u1[]"; 1121 case 'L': 1122 return "i8[]"; 1123 case 'M': 1124 return "u8[]"; 1125 case 'N': 1126 return "i16[]"; 1127 case 'O': 1128 return "u16[]"; 1129 case 'P': 1130 return "i32[]"; 1131 case 'Q': 1132 return "u32[]"; 1133 case 'R': 1134 return "i64[]"; 1135 case 'S': 1136 return "u64[]"; 1137 case 'T': 1138 return "f32[]"; 1139 case 'U': 1140 return "f64[]"; 1141 case 'V': 1142 return "string[]"; 1143 case 'W': 1144 return "record[]"; 1145 case 'X': 1146 return "method[]"; 1147 case 'Y': 1148 return "enum[]"; 1149 case 'Z': 1150 return "annotation[]"; 1151 case '@': 1152 return "method_handle[]"; 1153 case '*': 1154 return "nullptr string"; 1155 default: 1156 return std::string(); 1157 } 1158} 1159 1160std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type) 1161{ 1162 std::stringstream ss; 1163 1164 if (type == "i8") { 1165 int8_t res = value.Get<int8_t>(); 1166 ss << static_cast<int>(res); 1167 } else if (type == "u1" || type == "u8") { 1168 uint8_t res = value.Get<uint8_t>(); 1169 ss << static_cast<unsigned int>(res); 1170 } else if (type == "i16") { 1171 ss << value.Get<int16_t>(); 1172 } else if (type == "u16") { 1173 ss << value.Get<uint16_t>(); 1174 } else if (type == "i32") { 1175 ss << value.Get<int32_t>(); 1176 } else if (type == "u32") { 1177 ss << value.Get<uint32_t>(); 1178 } else if (type == "i64") { 1179 ss << value.Get<int64_t>(); 1180 } else if (type == "u64") { 1181 ss << value.Get<uint64_t>(); 1182 } else if (type == "f32") { 1183 ss << value.Get<float>(); 1184 } else if (type == "f64") { 1185 ss << value.Get<double>(); 1186 } else if (type == "string") { 1187 const auto id = value.Get<panda_file::File::EntityId>(); 1188 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\""; 1189 } else if (type == "record") { 1190 const auto id = value.Get<panda_file::File::EntityId>(); 1191 ss << GetFullRecordName(id); 1192 } else if (type == "method") { 1193 const auto id = value.Get<panda_file::File::EntityId>(); 1194 AddMethodToTables(id); 1195 ss << GetMethodSignature(id); 1196 } else if (type == "enum") { 1197 const auto id = value.Get<panda_file::File::EntityId>(); 1198 panda_file::FieldDataAccessor field_accessor(*file_, id); 1199 ss << GetFullRecordName(field_accessor.GetClassId()) << "." 1200 << StringDataToString(file_->GetStringData(field_accessor.GetNameId())); 1201 } else if (type == "annotation") { 1202 const auto id = value.Get<panda_file::File::EntityId>(); 1203 ss << "id_" << id; 1204 } else if (type == "void") { 1205 return std::string(); 1206 } else if (type == "method_handle") { 1207 } 1208 1209 return ss.str(); 1210} 1211 1212std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type, 1213 const size_t idx) 1214{ 1215 std::stringstream ss; 1216 1217 if (type == "i8") { 1218 int8_t res = value.Get<int8_t>(idx); 1219 ss << static_cast<int>(res); 1220 } else if (type == "u1" || type == "u8") { 1221 uint8_t res = value.Get<uint8_t>(idx); 1222 ss << static_cast<unsigned int>(res); 1223 } else if (type == "i16") { 1224 ss << value.Get<int16_t>(idx); 1225 } else if (type == "u16") { 1226 ss << value.Get<uint16_t>(idx); 1227 } else if (type == "i32") { 1228 ss << value.Get<int32_t>(idx); 1229 } else if (type == "u32") { 1230 ss << value.Get<uint32_t>(idx); 1231 } else if (type == "i64") { 1232 ss << value.Get<int64_t>(idx); 1233 } else if (type == "u64") { 1234 ss << value.Get<uint64_t>(idx); 1235 } else if (type == "f32") { 1236 ss << value.Get<float>(idx); 1237 } else if (type == "f64") { 1238 ss << value.Get<double>(idx); 1239 } else if (type == "string") { 1240 const auto id = value.Get<panda_file::File::EntityId>(idx); 1241 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"'; 1242 } else if (type == "record") { 1243 const auto id = value.Get<panda_file::File::EntityId>(idx); 1244 ss << GetFullRecordName(id); 1245 } else if (type == "method") { 1246 const auto id = value.Get<panda_file::File::EntityId>(idx); 1247 AddMethodToTables(id); 1248 ss << GetMethodSignature(id); 1249 } else if (type == "enum") { 1250 const auto id = value.Get<panda_file::File::EntityId>(idx); 1251 panda_file::FieldDataAccessor field_accessor(*file_, id); 1252 ss << GetFullRecordName(field_accessor.GetClassId()) << "." 1253 << StringDataToString(file_->GetStringData(field_accessor.GetNameId())); 1254 } else if (type == "annotation") { 1255 const auto id = value.Get<panda_file::File::EntityId>(idx); 1256 ss << "id_" << id; 1257 } else if (type == "method_handle") { 1258 } else if (type == "nullptr string") { 1259 } 1260 1261 return ss.str(); 1262} 1263 1264std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const 1265{ 1266 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id); 1267 1268 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId())); 1269 1270 std::string class_name = GetFullRecordName(method_accessor.GetClassId()); 1271 if (IsSystemType(class_name)) { 1272 class_name = ""; 1273 } else { 1274 class_name += "."; 1275 } 1276 1277 return class_name + method_name_raw; 1278} 1279 1280std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const 1281{ 1282 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id); 1283 1284 pandasm::Function method(GetFullMethodName(method_id), file_language_); 1285 if (method_accessor.GetCodeId().has_value()) { 1286 GetParams(&method, method_accessor.GetCodeId().value()); 1287 } 1288 GetMetaData(&method, method_id); 1289 1290 return pandasm::GetFunctionSignatureFromName(method.name, method.params); 1291} 1292 1293std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const 1294{ 1295 std::string name = StringDataToString(file_->GetStringData(class_id)); 1296 1297 auto type = pandasm::Type::FromDescriptor(name); 1298 type = pandasm::Type(type.GetComponentName(), type.GetRank()); 1299 1300 return type.GetPandasmName(); 1301} 1302 1303void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const 1304{ 1305 constexpr size_t DEFAULT_OFFSET_WIDTH = 4; 1306 1307 if (file_->IsExternal(record_id)) { 1308 return; 1309 } 1310 1311 panda_file::ClassDataAccessor class_accessor {*file_, record_id}; 1312 std::stringstream ss; 1313 1314 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex 1315 << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) 1316 << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")"; 1317 1318 record_info->record_info = ss.str(); 1319 ss.str(std::string()); 1320 1321 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void { 1322 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex 1323 << field_accessor.GetFieldId(); 1324 1325 record_info->fields_info.push_back(ss.str()); 1326 1327 ss.str(std::string()); 1328 }); 1329} 1330 1331void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const 1332{ 1333 constexpr size_t DEFAULT_OFFSET_WIDTH = 4; 1334 1335 panda_file::MethodDataAccessor method_accessor {*file_, method_id}; 1336 std::stringstream ss; 1337 1338 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex 1339 << method_accessor.GetMethodId(); 1340 1341 if (method_accessor.GetCodeId().has_value()) { 1342 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex 1343 << method_accessor.GetCodeId().value(); 1344 1345 GetInsInfo(method_accessor.GetCodeId().value(), method_info); 1346 } else { 1347 ss << ", <no code>"; 1348 } 1349 1350 method_info->method_info = ss.str(); 1351 1352 if (method_accessor.GetCodeId()) { 1353 ASSERT(debug_info_extractor_ != nullptr); 1354 method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id); 1355 method_info->column_number_table = debug_info_extractor_->GetColumnNumberTable(method_id); 1356 method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id); 1357 1358 // Add information about parameters into the table 1359 panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value()); 1360 auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs()); 1361 uint32_t code_size = codeda.GetCodeSize(); 1362 for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) { 1363 panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size}; 1364 method_info->local_variable_table.emplace_back(arg_info); 1365 } 1366 } 1367} 1368 1369static bool IsArray(const panda_file::LiteralTag &tag) 1370{ 1371 switch (tag) { 1372 case panda_file::LiteralTag::ARRAY_U1: 1373 case panda_file::LiteralTag::ARRAY_U8: 1374 case panda_file::LiteralTag::ARRAY_I8: 1375 case panda_file::LiteralTag::ARRAY_U16: 1376 case panda_file::LiteralTag::ARRAY_I16: 1377 case panda_file::LiteralTag::ARRAY_U32: 1378 case panda_file::LiteralTag::ARRAY_I32: 1379 case panda_file::LiteralTag::ARRAY_U64: 1380 case panda_file::LiteralTag::ARRAY_I64: 1381 case panda_file::LiteralTag::ARRAY_F32: 1382 case panda_file::LiteralTag::ARRAY_F64: 1383 case panda_file::LiteralTag::ARRAY_STRING: 1384 return true; 1385 default: 1386 return false; 1387 } 1388} 1389 1390std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const 1391{ 1392 std::stringstream ret; 1393 if (lit_array.literals_.empty()) { 1394 return ""; 1395 } 1396 1397 std::stringstream ss; 1398 ss << "{ "; 1399 const auto &tag = lit_array.literals_[0].tag_; 1400 if (IsArray(tag)) { 1401 ss << LiteralTagToString(tag); 1402 } 1403 ss << lit_array.literals_.size(); 1404 ss << " [ "; 1405 SerializeValues(lit_array, ss); 1406 ss << "]}"; 1407 return ss.str(); 1408} 1409 1410void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const 1411{ 1412 os << key << " "; 1413 os << SerializeLiteralArray(lit_array); 1414 os << "\n"; 1415} 1416 1417void Disassembler::Serialize(const std::string &module_offset, const std::vector<std::string> &module_array, 1418 std::ostream &os) const 1419{ 1420 os << module_offset << " "; 1421 os << SerializeModuleLiteralArray(module_array); 1422 os << "\n"; 1423} 1424 1425std::string Disassembler::SerializeModuleLiteralArray(const std::vector<std::string> &module_array) const 1426{ 1427 if (module_array.empty()) { 1428 return ""; 1429 } 1430 1431 std::stringstream ss; 1432 ss << "{ "; 1433 ss << (module_array.size() - 1); // Only needs to show the count of module tag, exclude module request array 1434 ss << " [\n"; 1435 for (size_t index = 0; index < module_array.size(); index++) { 1436 ss << module_array[index] << ";\n"; 1437 } 1438 ss << "]}"; 1439 return ss.str(); 1440} 1441 1442std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const 1443{ 1444 switch (tag) { 1445 case panda_file::LiteralTag::BOOL: 1446 case panda_file::LiteralTag::ARRAY_U1: 1447 return "u1"; 1448 case panda_file::LiteralTag::ARRAY_U8: 1449 return "u8"; 1450 case panda_file::LiteralTag::ARRAY_I8: 1451 return "i8"; 1452 case panda_file::LiteralTag::ARRAY_U16: 1453 return "u16"; 1454 case panda_file::LiteralTag::ARRAY_I16: 1455 return "i16"; 1456 case panda_file::LiteralTag::ARRAY_U32: 1457 return "u32"; 1458 case panda_file::LiteralTag::INTEGER: 1459 case panda_file::LiteralTag::ARRAY_I32: 1460 return "i32"; 1461 case panda_file::LiteralTag::ARRAY_U64: 1462 return "u64"; 1463 case panda_file::LiteralTag::ARRAY_I64: 1464 return "i64"; 1465 case panda_file::LiteralTag::ARRAY_F32: 1466 return "f32"; 1467 case panda_file::LiteralTag::DOUBLE: 1468 case panda_file::LiteralTag::ARRAY_F64: 1469 return "f64"; 1470 case panda_file::LiteralTag::STRING: 1471 case panda_file::LiteralTag::ARRAY_STRING: 1472 return "string"; 1473 case panda_file::LiteralTag::METHOD: 1474 return "method"; 1475 case panda_file::LiteralTag::GETTER: 1476 return "getter"; 1477 case panda_file::LiteralTag::SETTER: 1478 return "setter"; 1479 case panda_file::LiteralTag::GENERATORMETHOD: 1480 return "generator_method"; 1481 case panda_file::LiteralTag::ACCESSOR: 1482 return "accessor"; 1483 case panda_file::LiteralTag::METHODAFFILIATE: 1484 return "method_affiliate"; 1485 case panda_file::LiteralTag::NULLVALUE: 1486 return "null_value"; 1487 case panda_file::LiteralTag::TAGVALUE: 1488 return "tagvalue"; 1489 case panda_file::LiteralTag::LITERALBUFFERINDEX: 1490 return "lit_index"; 1491 case panda_file::LiteralTag::LITERALARRAY: 1492 return "lit_offset"; 1493 case panda_file::LiteralTag::BUILTINTYPEINDEX: 1494 return "builtin_type"; 1495 default: 1496 UNREACHABLE(); 1497 } 1498} 1499 1500template <typename T> 1501void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const 1502{ 1503 switch (lit_array.literals_[0].tag_) { 1504 case panda_file::LiteralTag::ARRAY_U1: { 1505 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1506 os << std::get<bool>(lit_array.literals_[i].value_) << " "; 1507 } 1508 break; 1509 } 1510 case panda_file::LiteralTag::ARRAY_U8: { 1511 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1512 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " "; 1513 } 1514 break; 1515 } 1516 case panda_file::LiteralTag::ARRAY_I8: { 1517 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1518 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " "; 1519 } 1520 break; 1521 } 1522 case panda_file::LiteralTag::ARRAY_U16: { 1523 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1524 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " "; 1525 } 1526 break; 1527 } 1528 case panda_file::LiteralTag::ARRAY_I16: { 1529 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1530 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " "; 1531 } 1532 break; 1533 } 1534 case panda_file::LiteralTag::ARRAY_U32: { 1535 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1536 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " "; 1537 } 1538 break; 1539 } 1540 case panda_file::LiteralTag::ARRAY_I32: { 1541 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1542 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " "; 1543 } 1544 break; 1545 } 1546 case panda_file::LiteralTag::ARRAY_U64: { 1547 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1548 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " "; 1549 } 1550 break; 1551 } 1552 case panda_file::LiteralTag::ARRAY_I64: { 1553 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1554 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " "; 1555 } 1556 break; 1557 } 1558 case panda_file::LiteralTag::ARRAY_F32: { 1559 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1560 os << std::get<float>(lit_array.literals_[i].value_) << " "; 1561 } 1562 break; 1563 } 1564 case panda_file::LiteralTag::ARRAY_F64: { 1565 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1566 os << std::get<double>(lit_array.literals_[i].value_) << " "; 1567 } 1568 break; 1569 } 1570 case panda_file::LiteralTag::ARRAY_STRING: { 1571 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1572 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" "; 1573 } 1574 break; 1575 } 1576 default: 1577 SerializeLiterals(lit_array, os); 1578 } 1579} 1580 1581template <typename T> 1582void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const 1583{ 1584 for (size_t i = 0; i < lit_array.literals_.size(); i++) { 1585 const auto &tag = lit_array.literals_[i].tag_; 1586 os << LiteralTagToString(tag) << ":"; 1587 const auto &val = lit_array.literals_[i].value_; 1588 switch (lit_array.literals_[i].tag_) { 1589 case panda_file::LiteralTag::BOOL: { 1590 os << std::get<bool>(val); 1591 break; 1592 } 1593 case panda_file::LiteralTag::LITERALBUFFERINDEX: 1594 case panda_file::LiteralTag::INTEGER: { 1595 os << bit_cast<int32_t>(std::get<uint32_t>(val)); 1596 break; 1597 } 1598 case panda_file::LiteralTag::DOUBLE: { 1599 os << std::get<double>(val); 1600 break; 1601 } 1602 case panda_file::LiteralTag::STRING: { 1603 os << "\"" << std::get<std::string>(val) << "\""; 1604 break; 1605 } 1606 case panda_file::LiteralTag::METHOD: 1607 case panda_file::LiteralTag::GETTER: 1608 case panda_file::LiteralTag::SETTER: 1609 case panda_file::LiteralTag::GENERATORMETHOD: { 1610 os << std::get<std::string>(val); 1611 break; 1612 } 1613 case panda_file::LiteralTag::NULLVALUE: 1614 case panda_file::LiteralTag::ACCESSOR: { 1615 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val))); 1616 break; 1617 } 1618 case panda_file::LiteralTag::METHODAFFILIATE: { 1619 os << std::get<uint16_t>(val); 1620 break; 1621 } 1622 case panda_file::LiteralTag::LITERALARRAY: { 1623 os << std::get<std::string>(val); 1624 break; 1625 } 1626 case panda_file::LiteralTag::BUILTINTYPEINDEX: { 1627 os << static_cast<int16_t>(std::get<uint8_t>(val)); 1628 break; 1629 } 1630 default: 1631 UNREACHABLE(); 1632 } 1633 os << ", "; 1634 } 1635} 1636 1637void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const 1638{ 1639 if (IsSystemType(record.name)) { 1640 return; 1641 } 1642 1643 os << ".record " << record.name; 1644 1645 const auto record_iter = prog_ann_.record_annotations.find(record.name); 1646 const bool record_in_table = record_iter != prog_ann_.record_annotations.end(); 1647 if (record_in_table) { 1648 Serialize(*record.metadata, record_iter->second.ann_list, os); 1649 } else { 1650 Serialize(*record.metadata, {}, os); 1651 } 1652 1653 if (record.metadata->IsForeign()) { 1654 os << "\n\n"; 1655 return; 1656 } 1657 1658 os << " {"; 1659 1660 if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) { 1661 os << " # " << prog_info_.records_info.at(record.name).record_info << "\n"; 1662 SerializeFields(record, os, true); 1663 } else { 1664 os << "\n"; 1665 SerializeFields(record, os, false); 1666 } 1667 1668 os << "}\n\n"; 1669} 1670 1671void Disassembler::DumpLiteralArray(const pandasm::LiteralArray &literal_array, std::stringstream &ss) const 1672{ 1673 ss << "["; 1674 bool firstItem = true; 1675 for (const auto &item : literal_array.literals_) { 1676 if (!firstItem) { 1677 ss << ", "; 1678 } else { 1679 firstItem = false; 1680 } 1681 1682 switch (item.tag_) { 1683 case panda_file::LiteralTag::DOUBLE: { 1684 ss << std::get<double>(item.value_); 1685 break; 1686 } 1687 case panda_file::LiteralTag::BOOL: { 1688 ss << std::get<bool>(item.value_); 1689 break; 1690 } 1691 case panda_file::LiteralTag::STRING: { 1692 ss << "\"" << std::get<std::string>(item.value_) << "\""; 1693 break; 1694 } 1695 case panda_file::LiteralTag::LITERALARRAY: { 1696 std::string offset_str = std::get<std::string>(item.value_); 1697 uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16); 1698 pandasm::LiteralArray lit_array; 1699 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset)); 1700 DumpLiteralArray(lit_array, ss); 1701 break; 1702 } 1703 case panda_file::LiteralTag::BUILTINTYPEINDEX: { 1704 // By convention, BUILTINTYPEINDEX is used to store type of empty arrays, 1705 // therefore it has no value 1706 break; 1707 } 1708 default: { 1709 UNREACHABLE(); 1710 break; 1711 } 1712 } 1713 } 1714 ss << "]"; 1715} 1716 1717void Disassembler::SerializeFieldValue(const pandasm::Field &f, std::stringstream &ss) const 1718{ 1719 if (f.type.GetId() == panda_file::Type::TypeId::U32) { 1720 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>(); 1721 } else if (f.type.GetId() == panda_file::Type::TypeId::U8) { 1722 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>()); 1723 } else if (f.type.GetId() == panda_file::Type::TypeId::F64) { 1724 ss << " = " << static_cast<double>(f.metadata->GetValue().value().GetValue<double>()); 1725 } else if (f.type.GetId() == panda_file::Type::TypeId::U1) { 1726 ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<bool>()); 1727 } else if (f.type.GetId() == panda_file::Type::TypeId::REFERENCE && f.type.GetName() == "panda.String") { 1728 ss << " = \"" << static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()) << "\""; 1729 } else if (f.type.GetRank() > 0) { 1730 uint32_t lit_array_fffset = 1731 std::stoi(static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>())); 1732 pandasm::LiteralArray lit_array; 1733 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset)); 1734 ss << " = "; 1735 DumpLiteralArray(lit_array, ss); 1736 } 1737} 1738 1739void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const 1740{ 1741 constexpr size_t INFO_OFFSET = 80; 1742 1743 const auto record_iter = prog_ann_.record_annotations.find(record.name); 1744 const bool record_in_table = record_iter != prog_ann_.record_annotations.end(); 1745 1746 const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {}); 1747 1748 size_t field_idx = 0; 1749 1750 std::stringstream ss; 1751 for (const auto &f : record.field_list) { 1752 std::string file = GetFileNameByPath(f.name); 1753 ss << "\t" << f.type.GetPandasmName() << " " << file; 1754 if (f.metadata->GetValue().has_value()) { 1755 SerializeFieldValue(f, ss); 1756 } 1757 if (record_in_table) { 1758 const auto field_iter = record_iter->second.field_annotations.find(f.name); 1759 if (field_iter != record_iter->second.field_annotations.end()) { 1760 Serialize(*f.metadata, field_iter->second, ss); 1761 } else { 1762 Serialize(*f.metadata, {}, ss); 1763 } 1764 } else { 1765 Serialize(*f.metadata, {}, ss); 1766 } 1767 1768 if (print_information) { 1769 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n"; 1770 } else { 1771 os << ss.str() << "\n"; 1772 } 1773 1774 ss.str(std::string()); 1775 ss.clear(); 1776 1777 field_idx++; 1778 } 1779} 1780 1781std::string Disassembler::getLiteralArrayTypeFromValue(const pandasm::LiteralArray &literal_array) const 1782{ 1783 [[maybe_unused]] auto size = literal_array.literals_.size(); 1784 ASSERT(size > 0); 1785 switch (literal_array.literals_[0].tag_) { 1786 case panda_file::LiteralTag::DOUBLE: { 1787 return "f64[]"; 1788 } 1789 case panda_file::LiteralTag::BOOL: { 1790 return "u1[]"; 1791 } 1792 case panda_file::LiteralTag::STRING: { 1793 return "panda.String[]"; 1794 } 1795 case panda_file::LiteralTag::LITERALARRAY: { 1796 std::string offset_str = std::get<std::string>(literal_array.literals_[0].value_); 1797 uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16); 1798 pandasm::LiteralArray lit_array; 1799 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset)); 1800 return getLiteralArrayTypeFromValue(lit_array) + "[]"; 1801 } 1802 case panda_file::LiteralTag::BUILTINTYPEINDEX: { 1803 uint8_t typeIndex = std::get<uint8_t>(literal_array.literals_[0].value_); 1804 static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE = 0; 1805 static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE = 1; 1806 static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE = 2; 1807 switch (typeIndex) { 1808 case EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE: 1809 return "f64[]"; 1810 case EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE: 1811 return "u1[]"; 1812 case EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE: 1813 return "panda.String[]"; 1814 default: 1815 UNREACHABLE(); 1816 break; 1817 } 1818 } 1819 default: { 1820 UNREACHABLE(); 1821 break; 1822 } 1823 } 1824} 1825 1826void Disassembler::SerializeAnnotationElement(const std::vector<pandasm::AnnotationElement> &elements, 1827 std::stringstream &ss, uint32_t idx) const 1828{ 1829 for (const auto &elem : elements) { 1830 auto type = elem.GetValue()->GetType(); 1831 if (type == pandasm::Value::Type::U32) { 1832 ss << "\t" 1833 << "u32" 1834 << " " << elem.GetName() << " { "; 1835 ss << "0x" << std::hex << elem.GetValue()->GetAsScalar()->GetValue<uint32_t>() << " }"; 1836 } else if (type == pandasm::Value::Type::F64) { 1837 ss << "\t" 1838 << "f64" 1839 << " " << elem.GetName() << " { "; 1840 ss << elem.GetValue()->GetAsScalar()->GetValue<double>() << " }"; 1841 } else if (type == pandasm::Value::Type::U1) { 1842 ss << "\t" 1843 << "u1" 1844 << " " << elem.GetName() << " { "; 1845 ss << elem.GetValue()->GetAsScalar()->GetValue<bool>() << " }"; 1846 } else if (type == pandasm::Value::Type::STRING) { 1847 ss << "\t" 1848 << "panda.String" 1849 << " " << elem.GetName() << " { \""; 1850 ss << elem.GetValue()->GetAsScalar()->GetValue<std::string>() << "\" }"; 1851 } else if (type == pandasm::Value::Type::LITERALARRAY) { 1852 uint32_t lit_array_fffset = std::stoi(elem.GetValue()->GetAsScalar()->GetValue<std::string>()); 1853 pandasm::LiteralArray lit_array; 1854 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset)); 1855 std::string typeName = getLiteralArrayTypeFromValue(lit_array); 1856 ss << "\t" << typeName << " " << elem.GetName() << " { "; 1857 DumpLiteralArray(lit_array, ss); 1858 ss << " }"; 1859 } else { 1860 UNREACHABLE(); 1861 } 1862 if (idx > 0) { 1863 ss << "\n"; 1864 } 1865 --idx; 1866 } 1867} 1868 1869void Disassembler::SerializeMethodAnnotation(const pandasm::AnnotationData &ann, std::ostream &os) const 1870{ 1871 os << ann.GetName() << ":\n"; 1872 std::stringstream ss; 1873 std::vector<pandasm::AnnotationElement> elements = ann.GetElements(); 1874 if (elements.empty()) { 1875 return; 1876 } 1877 uint32_t idx = elements.size() - 1; 1878 SerializeAnnotationElement(elements, ss, idx); 1879 os << ss.str() << "\n"; 1880} 1881 1882void Disassembler::SerializeMethodAnnotations(const pandasm::Function &method, std::ostream &os) const 1883{ 1884 const auto annotations = method.metadata->GetAnnotations(); 1885 if (annotations.empty()) { 1886 return; 1887 } 1888 1889 for (const auto &ann : annotations) { 1890 SerializeMethodAnnotation(ann, os); 1891 } 1892} 1893 1894void Disassembler::SerializeInstructions(const pandasm::Function &method, std::ostream &os, 1895 const std::map<std::string, MethodInfo>::const_iterator &method_info_it, 1896 bool print_method_info) const 1897{ 1898 std::string delim = ": "; 1899 size_t width = 0; 1900 if (print_method_info) { 1901 for (const auto &i : method.ins) { 1902 size_t ins_size = i.ToString().size(); 1903 if (i.set_label) { 1904 ins_size = ins_size - i.label.size() - delim.length(); 1905 } 1906 1907 if (ins_size > width && ins_size < ark::INSTRUCTION_WIDTH_LIMIT) { 1908 width = i.ToString().size(); 1909 } 1910 } 1911 } 1912 1913 for (size_t i = 0; i < method.ins.size(); i++) { 1914 std::string ins = method.ins[i].ToString("", true, method.regs_num); 1915 if (method.ins[i].set_label) { 1916 size_t pos = ins.find(delim); 1917 std::string label = ins.substr(0, pos); 1918 ins.erase(0, pos + delim.length()); 1919 os << label << ":\n"; 1920 } 1921 1922 if (ins != "") { 1923 os << "\t" << std::setw(width) << std::left << ins; 1924 if (print_method_info && i < method_info_it->second.instructions_info.size()) { 1925 os << " # " << method_info_it->second.instructions_info.at(i); 1926 } 1927 os << "\n"; 1928 } 1929 } 1930} 1931 1932void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const 1933{ 1934 SerializeMethodAnnotations(method, os); 1935 os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "("; 1936 1937 if (method.params.size() > 0) { 1938 os << method.params[0].type.GetPandasmName() << " a0"; 1939 1940 for (uint8_t i = 1; i < method.params.size(); i++) { 1941 os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i; 1942 } 1943 } 1944 os << ")"; 1945 1946 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params); 1947 1948 const auto method_iter = prog_ann_.method_annotations.find(signature); 1949 if (method_iter != prog_ann_.method_annotations.end()) { 1950 Serialize(*method.metadata, method_iter->second, os); 1951 } else { 1952 Serialize(*method.metadata, {}, os); 1953 } 1954 1955 auto method_info_it = prog_info_.methods_info.find(signature); 1956 bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end(); 1957 if (print_method_info) { 1958 os << " { # " << method_info_it->second.method_info << "\n# CODE:\n"; 1959 } else { 1960 os << " {\n"; 1961 } 1962 SerializeInstructions(method, os, method_info_it, print_method_info); 1963 1964 if (method.catch_blocks.size() != 0) { 1965 os << "\n"; 1966 1967 for (const auto &catch_block : method.catch_blocks) { 1968 Serialize(catch_block, os); 1969 1970 os << "\n"; 1971 } 1972 } 1973 1974 if (print_method_info) { 1975 const MethodInfo &method_info = method_info_it->second; 1976 SerializeLineNumberTable(method_info.line_number_table, os); 1977 SerializeColumnNumberTable(method_info.column_number_table, os); 1978 SerializeLocalVariableTable(method_info.local_variable_table, method, os); 1979 } 1980 1981 os << "}\n\n"; 1982} 1983 1984void Disassembler::SerializeStrings(const panda_file::File::EntityId &offset, const std::string &name_value, 1985 std::ostream &os) const 1986{ 1987 os << "[offset:0x" << std::hex << offset << ", name_value:" << name_value << "]" << std::endl; 1988} 1989 1990void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const 1991{ 1992 if (catch_block.exception_record == "") { 1993 os << ".catchall "; 1994 } else { 1995 os << ".catch " << catch_block.exception_record << ", "; 1996 } 1997 1998 os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label; 1999 2000 if (catch_block.catch_end_label != "") { 2001 os << ", " << catch_block.catch_end_label; 2002 } 2003} 2004 2005void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const 2006{ 2007 auto bool_attributes = meta.GetBoolAttributes(); 2008 auto attributes = meta.GetAttributes(); 2009 if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) { 2010 return; 2011 } 2012 2013 os << " <"; 2014 2015 size_t size = bool_attributes.size(); 2016 size_t idx = 0; 2017 for (const auto &attr : bool_attributes) { 2018 os << attr; 2019 ++idx; 2020 2021 if (!attributes.empty() || !ann_list.empty() || idx < size) { 2022 os << ", "; 2023 } 2024 } 2025 2026 size = attributes.size(); 2027 idx = 0; 2028 for (const auto &[key, values] : attributes) { 2029 for (size_t i = 0; i < values.size(); i++) { 2030 os << key << "=" << values[i]; 2031 2032 if (i < values.size() - 1) { 2033 os << ", "; 2034 } 2035 } 2036 2037 ++idx; 2038 2039 if (!ann_list.empty() || idx < size) { 2040 os << ", "; 2041 } 2042 } 2043 2044 size = ann_list.size(); 2045 idx = 0; 2046 for (const auto &[key, value] : ann_list) { 2047 os << key << "=" << value; 2048 2049 ++idx; 2050 2051 if (idx < size) { 2052 os << ", "; 2053 } 2054 } 2055 2056 os << ">"; 2057} 2058 2059void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table, 2060 std::ostream &os) const 2061{ 2062 if (line_number_table.empty()) { 2063 return; 2064 } 2065 2066 os << "\n# LINE_NUMBER_TABLE:\n"; 2067 for (const auto &line_info : line_number_table) { 2068 os << "#\tline " << line_info.line << ": " << line_info.offset << "\n"; 2069 } 2070} 2071 2072void Disassembler::SerializeColumnNumberTable(const panda_file::ColumnNumberTable &column_number_table, 2073 std::ostream &os) const 2074{ 2075 if (column_number_table.empty()) { 2076 return; 2077 } 2078 2079 os << "\n# COLUMN_NUMBER_TABLE:\n"; 2080 for (const auto &column_info : column_number_table) { 2081 os << "#\tcolumn " << column_info.column << ": " << column_info.offset << "\n"; 2082 } 2083} 2084 2085void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table, 2086 const pandasm::Function &method, std::ostream &os) const 2087{ 2088 if (local_variable_table.empty()) { 2089 return; 2090 } 2091 2092 os << "\n# LOCAL_VARIABLE_TABLE:\n"; 2093 os << "#\t Start End Register Name Signature\n"; 2094 const int START_WIDTH = 5; 2095 const int END_WIDTH = 4; 2096 const int REG_WIDTH = 8; 2097 const int NAME_WIDTH = 14; 2098 for (const auto &variable_info : local_variable_table) { 2099 std::ostringstream reg_stream; 2100 reg_stream << variable_info.reg_number << '('; 2101 if (variable_info.reg_number < 0) { 2102 reg_stream << "acc"; 2103 } else { 2104 uint32_t vreg = variable_info.reg_number; 2105 uint32_t first_arg_reg = method.GetTotalRegs(); 2106 if (vreg < first_arg_reg) { 2107 reg_stream << 'v' << vreg; 2108 } else { 2109 reg_stream << 'a' << vreg - first_arg_reg; 2110 } 2111 } 2112 reg_stream << ')'; 2113 2114 os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << " "; 2115 os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << " "; 2116 os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " "; 2117 os << std::setw(NAME_WIDTH) << std::right << variable_info.name << " " << variable_info.type; 2118 if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) { 2119 os << " (" << variable_info.type_signature << ")"; 2120 } 2121 os << "\n"; 2122 } 2123} 2124 2125pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const 2126{ 2127 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o)); 2128} 2129 2130std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id, size_t idx) const 2131{ 2132 std::stringstream name; 2133 const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex()); 2134 std::string str_data = StringDataToString(file_->GetStringData(offset)); 2135 if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) { 2136 name << GetMethodSignature(offset); 2137 } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) { 2138 name << '\"'; 2139 name << str_data; 2140 name << '\"'; 2141 string_offset_to_name_.emplace(offset, str_data); 2142 } else { 2143 ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID)); 2144 pandasm::LiteralArray lit_array; 2145 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset)); 2146 name << SerializeLiteralArray(lit_array); 2147 } 2148 2149 return name.str(); 2150} 2151 2152panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const 2153{ 2154 if (file_->IsExternal(class_id)) { 2155 return panda::panda_file::SourceLang::PANDA_ASSEMBLY; 2156 } 2157 2158 panda_file::ClassDataAccessor cda(*file_, class_id); 2159 return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY); 2160} 2161 2162static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr, 2163 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last, 2164 panda_file::File::EntityId code_id) 2165{ 2166 const int32_t jmp_offset = std::get<int64_t>(pa_ins->imms.at(0)); 2167 const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset); 2168 if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) { 2169 size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest); 2170 if (idx != std::numeric_limits<size_t>::max()) { 2171 if (label_table->find(idx) == label_table->end()) { 2172 std::stringstream ss {}; 2173 ss << "jump_label_" << label_table->size(); 2174 (*label_table)[idx] = ss.str(); 2175 } 2176 2177 pa_ins->imms.clear(); 2178 pa_ins->ids.push_back(label_table->at(idx)); 2179 } else { 2180 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id 2181 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr) 2182 << ": invalid jump offset 0x" << jmp_offset 2183 << " - jumping in the middle of another instruction!"; 2184 } 2185 } else { 2186 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id 2187 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr) 2188 << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!"; 2189 } 2190} 2191 2192IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id, 2193 panda_file::File::EntityId code_id) const 2194{ 2195 panda_file::CodeDataAccessor code_accessor(*file_, code_id); 2196 2197 const auto ins_sz = code_accessor.GetCodeSize(); 2198 const auto ins_arr = code_accessor.GetInstructions(); 2199 2200 method->regs_num = code_accessor.GetNumVregs(); 2201 2202 auto bc_ins = BytecodeInstruction(ins_arr); 2203 const auto bc_ins_last = bc_ins.JumpTo(ins_sz); 2204 2205 LabelTable label_table = GetExceptions(method, method_id, code_id); 2206 2207 IdList unknown_external_methods {}; 2208 2209 while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) { 2210 if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) { 2211 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id 2212 << "). bytecode instructions sequence corrupted for method " << method->name 2213 << "! went out of bounds"; 2214 2215 break; 2216 } 2217 2218 auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id); 2219 if (pa_ins.IsJump()) { 2220 translateImmToLabel(&pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id); 2221 } 2222 2223 // check if method id is unknown external method. if so, emplace it in table 2224 if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) { 2225 const auto arg_method_idx = bc_ins.GetId().AsIndex(); 2226 const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx); 2227 2228 const auto arg_method_signature = GetMethodSignature(arg_method_id); 2229 2230 const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend(); 2231 const bool is_external = file_->IsExternal(arg_method_id); 2232 if (is_external && !is_present) { 2233 unknown_external_methods.push_back(arg_method_id); 2234 } 2235 } 2236 2237 method->AddInstruction(pa_ins); 2238 bc_ins = bc_ins.GetNext(); 2239 } 2240 2241 size_t instruction_count = method->ins.size(); 2242 for (const auto &pair : label_table) { 2243 if (pair.first > instruction_count) { 2244 LOG(ERROR, DISASSEMBLER) << "> Wrong label index got, count of instructions is " << instruction_count 2245 << ", but the label index is " << pair.first; 2246 continue; 2247 } 2248 2249 // In some case, the end label can be after the last instruction 2250 // Creating an invalid instruction for the label to make sure it can be serialized 2251 if (pair.first == instruction_count) { 2252 pandasm::Ins ins {}; 2253 ins.opcode = pandasm::Opcode::INVALID; 2254 method->AddInstruction(ins); 2255 } 2256 2257 method->ins[pair.first].label = pair.second; 2258 method->ins[pair.first].set_label = true; 2259 } 2260 2261 return unknown_external_methods; 2262} 2263 2264std::vector<size_t> Disassembler::GetColumnNumber() 2265{ 2266 std::vector<size_t> columnNumber; 2267 for (const auto &method_info : prog_info_.methods_info) { 2268 for (const auto &column_number : method_info.second.column_number_table) { 2269 columnNumber.push_back(column_number.column); 2270 } 2271 } 2272 return columnNumber; 2273} 2274 2275std::vector<size_t> Disassembler::GetLineNumber() 2276{ 2277 std::vector<size_t> lineNumber; 2278 for (const auto &method_info : prog_info_.methods_info) { 2279 for (const auto &line_number : method_info.second.line_number_table) { 2280 lineNumber.push_back(line_number.line); 2281 } 2282 } 2283 return lineNumber; 2284} 2285 2286} // namespace panda::disasm 2287