1/*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "disassembler.h"
17#include "libpandafile/util/collect_util.h"
18#include "mangling.h"
19#include "utils/logger.h"
20#include "utils/const_value.h"
21
22#include <iomanip>
23#include <type_traits>
24
25#include "get_language_specific_metadata.inc"
26
27namespace panda::disasm {
28
29void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
30{
31    auto file_new = panda_file::File::Open(filename_in);
32    file_.swap(file_new);
33
34    if (file_ != nullptr) {
35        prog_ = pandasm::Program {};
36
37        record_name_to_id_.clear();
38        method_name_to_id_.clear();
39        string_offset_to_name_.clear();
40        skip_strings_ = skip_strings;
41        quiet_ = quiet;
42
43        prog_info_ = ProgInfo {};
44
45        prog_ann_ = ProgAnnotations {};
46
47        GetRecords();
48        GetLiteralArrays();
49
50        GetLanguageSpecificMetadata();
51    } else {
52        LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
53    }
54}
55
56void Disassembler::CollectInfo()
57{
58    LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
59
60    debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
61
62    for (const auto &pair : record_name_to_id_) {
63        GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
64    }
65
66    for (const auto &pair : method_name_to_id_) {
67        GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
68    }
69}
70
71void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
72{
73    if (os.bad()) {
74        LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
75
76        return;
77    }
78
79    if (file_ != nullptr) {
80        std::string abc_file = GetFileNameByPath(file_->GetFilename());
81        os << "# source binary: " << abc_file << "\n\n";
82    }
83
84    SerializeLanguage(os);
85
86    if (add_separators) {
87        os << "# ====================\n"
88              "# LITERALS\n\n";
89    }
90
91    LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
92
93    for (const auto &[key, lit_arr] : prog_.literalarray_table) {
94        Serialize(key, lit_arr, os);
95    }
96
97    for (const auto &[module_offset, array_table] : modulearray_table_) {
98        Serialize(module_offset, array_table, os);
99    }
100
101    os << "\n";
102
103    if (add_separators) {
104        os << "# ====================\n"
105              "# RECORDS\n\n";
106    }
107
108    LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
109
110    for (const auto &r : prog_.record_table) {
111        Serialize(r.second, os, print_information);
112    }
113
114    if (add_separators) {
115        os << "# ====================\n"
116              "# METHODS\n\n";
117    }
118
119    LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
120
121    for (const auto &m : prog_.function_table) {
122        Serialize(m.second, os, print_information);
123    }
124
125    if (add_separators) {
126        os << "# ====================\n"
127              "# STRING\n\n";
128    }
129
130    LOG(DEBUG, DISASSEMBLER) << "[serializing strings]";
131
132    for (const auto &[offset, name_value] : string_offset_to_name_) {
133        SerializeStrings(offset, name_value, os);
134    }
135}
136
137inline bool Disassembler::IsSystemType(const std::string &type_name)
138{
139    bool is_array_type = type_name.find('[') != std::string::npos;
140    bool is_global = type_name == "_GLOBAL";
141
142    return is_array_type || is_global;
143}
144
145void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
146{
147    LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
148
149    if (record == nullptr) {
150        LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
151
152        return;
153    }
154
155    record->name = GetFullRecordName(record_id);
156
157    LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
158
159    GetMetaData(record, record_id);
160
161    if (!file_->IsExternal(record_id)) {
162        GetMethods(record_id);
163        GetFields(record, record_id);
164    }
165}
166
167void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
168{
169    pandasm::Function new_method("", file_language_);
170    GetMethod(&new_method, method_id);
171
172    const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
173    if (prog_.function_table.find(signature) != prog_.function_table.end()) {
174        return;
175    }
176
177    GetMethodAnnotations(new_method, method_id);
178    method_name_to_id_.emplace(signature, method_id);
179    prog_.function_synonyms[new_method.name].push_back(signature);
180    prog_.function_table.emplace(signature, std::move(new_method));
181}
182
183void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
184{
185    LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
186
187    if (method == nullptr) {
188        LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
189
190        return;
191    }
192
193    panda_file::MethodDataAccessor method_accessor(*file_, method_id);
194
195    method->name = GetFullMethodName(method_id);
196
197    LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
198
199    GetMetaData(method, method_id);
200
201    if (method_accessor.GetCodeId().has_value()) {
202        auto code_id = method_accessor.GetCodeId().value();
203        GetParams(method, code_id);
204        const IdList id_list = GetInstructions(method, method_id, code_id);
205
206        for (const auto &id : id_list) {
207            AddMethodToTables(id);
208        }
209    } else {
210        LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
211                                 << "). implementation of method expected, but no \'CODE\' tag was found!";
212
213        return;
214    }
215}
216
217template <typename T>
218void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
219                                        const panda_file::LiteralDataAccessor::LiteralValue &value) const
220{
221    panda_file::File::EntityId id(std::get<uint32_t>(value));
222    auto sp = file_->GetSpanFromId(id);
223    auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
224    if (tag != panda_file::LiteralTag::ARRAY_STRING) {
225        for (size_t i = 0; i < len; i++) {
226            pandasm::LiteralArray::Literal lit;
227            lit.tag_ = tag;
228            lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
229            lit_array->literals_.push_back(lit);
230        }
231        return;
232    }
233    for (size_t i = 0; i < len; i++) {
234        auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
235        pandasm::LiteralArray::Literal lit;
236        lit.tag_ = tag;
237        lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
238        lit_array->literals_.push_back(lit);
239    }
240}
241
242void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
243                                   const panda_file::LiteralDataAccessor::LiteralValue &value,
244                                   const panda_file::LiteralTag &tag) const
245{
246    pandasm::LiteralArray::Literal lit;
247    lit.tag_ = tag;
248    switch (tag) {
249        case panda_file::LiteralTag::BOOL: {
250            lit.value_ = std::get<bool>(value);
251            break;
252        }
253        case panda_file::LiteralTag::ACCESSOR:
254        case panda_file::LiteralTag::NULLVALUE:
255        case panda_file::LiteralTag::BUILTINTYPEINDEX: {
256            lit.value_ = std::get<uint8_t>(value);
257            break;
258        }
259        case panda_file::LiteralTag::METHODAFFILIATE: {
260            lit.value_ = std::get<uint16_t>(value);
261            break;
262        }
263        case panda_file::LiteralTag::LITERALBUFFERINDEX:
264        case panda_file::LiteralTag::INTEGER: {
265            lit.value_ = std::get<uint32_t>(value);
266            break;
267        }
268        case panda_file::LiteralTag::DOUBLE: {
269            lit.value_ = std::get<double>(value);
270            break;
271        }
272        case panda_file::LiteralTag::STRING: {
273            auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
274            lit.value_ = StringDataToString(str_data);
275            break;
276        }
277        case panda_file::LiteralTag::METHOD:
278        case panda_file::LiteralTag::GETTER:
279        case panda_file::LiteralTag::SETTER:
280        case panda_file::LiteralTag::GENERATORMETHOD: {
281            panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
282            lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
283            break;
284        }
285        case panda_file::LiteralTag::LITERALARRAY: {
286            std::stringstream ss;
287            ss << "0x" << std::hex << std::get<uint32_t>(value);
288            lit.value_ = ss.str();
289            break;
290        }
291        case panda_file::LiteralTag::TAGVALUE: {
292            return;
293        }
294        default: {
295            UNREACHABLE();
296        }
297    }
298    lit_array->literals_.push_back(lit);
299}
300
301void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
302{
303    panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
304    lit_array_accessor.EnumerateLiteralVals(
305        offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
306                                  const panda_file::LiteralTag &tag) {
307            switch (tag) {
308                case panda_file::LiteralTag::ARRAY_U1: {
309                    FillLiteralArrayData<bool>(lit_array, tag, value);
310                    break;
311                }
312                case panda_file::LiteralTag::ARRAY_I8:
313                case panda_file::LiteralTag::ARRAY_U8: {
314                    FillLiteralArrayData<uint8_t>(lit_array, tag, value);
315                    break;
316                }
317                case panda_file::LiteralTag::ARRAY_I16:
318                case panda_file::LiteralTag::ARRAY_U16: {
319                    FillLiteralArrayData<uint16_t>(lit_array, tag, value);
320                    break;
321                }
322                case panda_file::LiteralTag::ARRAY_I32:
323                case panda_file::LiteralTag::ARRAY_U32: {
324                    FillLiteralArrayData<uint32_t>(lit_array, tag, value);
325                    break;
326                }
327                case panda_file::LiteralTag::ARRAY_I64:
328                case panda_file::LiteralTag::ARRAY_U64: {
329                    FillLiteralArrayData<uint64_t>(lit_array, tag, value);
330                    break;
331                }
332                case panda_file::LiteralTag::ARRAY_F32: {
333                    FillLiteralArrayData<float>(lit_array, tag, value);
334                    break;
335                }
336                case panda_file::LiteralTag::ARRAY_F64: {
337                    FillLiteralArrayData<double>(lit_array, tag, value);
338                    break;
339                }
340                case panda_file::LiteralTag::ARRAY_STRING: {
341                    FillLiteralArrayData<uint32_t>(lit_array, tag, value);
342                    break;
343                }
344                default: {
345                    FillLiteralData(lit_array, value, tag);
346                    break;
347                }
348            }
349        });
350}
351
352void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
353{
354    panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
355    GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
356}
357
358bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
359{
360    return module_literals_.find(id.GetOffset()) != module_literals_.end();
361}
362
363void Disassembler::GetLiteralArrays()
364{
365    if (panda_file::ContainsLiteralArrayInHeader(file_->GetHeader()->version)) {
366        const auto lit_arrays_id = file_->GetLiteralArraysId();
367        LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
368                                 << lit_arrays_id << ")";
369
370        panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
371        size_t num_litarrays = lda.GetLiteralNum();
372        for (size_t index = 0; index < num_litarrays; index++) {
373            auto id = lda.GetLiteralArrayId(index);
374            if (module_request_phase_literals_.count(id.GetOffset())) {
375                continue;
376            }
377            FillLiteralArrayTable(id, index);
378        }
379    } else {
380        panda::libpandafile::CollectUtil collect_util;
381        std::unordered_set<uint32_t> literal_array_ids;
382        collect_util.CollectLiteralArray(*file_, literal_array_ids);
383        size_t index = 0;
384        for (uint32_t literal_array_id : literal_array_ids) {
385            panda_file::File::EntityId id {literal_array_id};
386            FillLiteralArrayTable(id, index);
387            index++;
388        }
389    }
390}
391
392void Disassembler::FillLiteralArrayTable(panda_file::File::EntityId &id, size_t index)
393{
394    if (IsModuleLiteralOffset(id)) {
395        std::stringstream ss;
396        ss << index << " 0x" << std::hex << id.GetOffset();
397        modulearray_table_.emplace(ss.str(), GetModuleLiteralArray(id));
398        return;
399    }
400    std::stringstream ss;
401    ss << index << " 0x" << std::hex << id.GetOffset();
402    panda::pandasm::LiteralArray lit_arr;
403    GetLiteralArrayByOffset(&lit_arr, id);
404    prog_.literalarray_table.emplace(ss.str(), lit_arr);
405}
406
407std::string Disassembler::ModuleTagToString(panda_file::ModuleTag &tag) const
408{
409    switch (tag) {
410        case panda_file::ModuleTag::REGULAR_IMPORT:
411            return "REGULAR_IMPORT";
412        case panda_file::ModuleTag::NAMESPACE_IMPORT:
413            return "NAMESPACE_IMPORT";
414        case panda_file::ModuleTag::LOCAL_EXPORT:
415            return "LOCAL_EXPORT";
416        case panda_file::ModuleTag::INDIRECT_EXPORT:
417            return "INDIRECT_EXPORT";
418        case panda_file::ModuleTag::STAR_EXPORT:
419            return "STAR_EXPORT";
420        default: {
421            UNREACHABLE();
422            break;
423        }
424    }
425    return "";
426}
427
428std::vector<std::string> Disassembler::GetModuleLiteralArray(panda_file::File::EntityId &module_id) const
429{
430    panda_file::ModuleDataAccessor mda(*file_, module_id);
431    const std::vector<uint32_t> &request_modules_offset = mda.getRequestModules();
432    std::vector<std::string> module_literal_array;
433    std::stringstream module_requests_stringstream;
434    module_requests_stringstream << "\tMODULE_REQUEST_ARRAY: {\n";
435    for (size_t index = 0; index < request_modules_offset.size(); ++index) {
436        module_requests_stringstream << "\t\t" << index <<
437            " : " << GetStringByOffset(request_modules_offset[index]) << ",\n";
438    }
439    module_requests_stringstream << "\t}";
440    module_literal_array.push_back(module_requests_stringstream.str());
441    mda.EnumerateModuleRecord([&](panda_file::ModuleTag tag, uint32_t export_name_offset,
442                                  uint32_t request_module_idx, uint32_t import_name_offset,
443                                  uint32_t local_name_offset) {
444        std::stringstream ss;
445        ss << "\tModuleTag: " << ModuleTagToString(tag);
446        if (tag == panda_file::ModuleTag::REGULAR_IMPORT ||
447            tag == panda_file::ModuleTag::NAMESPACE_IMPORT || tag == panda_file::ModuleTag::LOCAL_EXPORT) {
448            if (!IsValidOffset(local_name_offset)) {
449                LOG(FATAL, DISASSEMBLER) << "Get invalid local name offset!" << std::endl;
450            }
451            ss << ", local_name: " << GetStringByOffset(local_name_offset);
452        }
453        if (tag == panda_file::ModuleTag::LOCAL_EXPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
454            if (!IsValidOffset(export_name_offset)) {
455                LOG(FATAL, DISASSEMBLER) << "Get invalid export name offset!" << std::endl;
456            }
457            ss << ", export_name: " << GetStringByOffset(export_name_offset);
458        }
459        if (tag == panda_file::ModuleTag::REGULAR_IMPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
460            if (!IsValidOffset(import_name_offset)) {
461                LOG(FATAL, DISASSEMBLER) << "Get invalid import name offset!" << std::endl;
462            }
463            ss << ", import_name: " << GetStringByOffset(import_name_offset);
464        }
465        auto request_module_offset = request_modules_offset[request_module_idx];
466        if (tag != panda_file::ModuleTag::LOCAL_EXPORT) {
467            if (request_module_idx >= request_modules_offset.size() || !IsValidOffset(request_module_offset)) {
468                LOG(FATAL, DISASSEMBLER) << "Get invalid request module offset!" << std::endl;
469            }
470            ss << ", module_request: " << GetStringByOffset(request_module_offset);
471        }
472        module_literal_array.push_back(ss.str());
473    });
474
475    return module_literal_array;
476}
477
478void Disassembler::GetRecords()
479{
480    LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
481
482    const auto class_idx = file_->GetClasses();
483
484    for (size_t i = 0; i < class_idx.size(); i++) {
485        uint32_t class_id = class_idx[i];
486        auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
487
488        if (class_id > file_->GetHeader()->file_size) {
489            LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
490                                     << class_off << "). binary file corrupted. record offset (0x" << class_id
491                                     << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
492            break;
493        }
494
495        const panda_file::File::EntityId record_id {class_id};
496        auto language = GetRecordLanguage(record_id);
497        if (language != file_language_) {
498            if (file_language_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
499                file_language_ = language;
500            } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
501                LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << class_off << " (0x"
502                                         << std::hex << class_off << "). record's language  ("
503                                         << panda_file::LanguageToString(language)
504                                         << ")  differs from file's language ("
505                                         << panda_file::LanguageToString(file_language_) << ")!";
506            }
507        }
508
509        pandasm::Record record("", file_language_);
510        GetRecord(&record, record_id);
511
512        if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
513            record_name_to_id_.emplace(record.name, record_id);
514            prog_.record_table.emplace(record.name, std::move(record));
515        }
516    }
517}
518
519void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
520{
521    panda_file::ClassDataAccessor class_accessor {*file_, record_id};
522
523    class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
524        pandasm::Field field(file_language_);
525
526        panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
527        field.name = StringDataToString(file_->GetStringData(field_name_id));
528
529        uint32_t field_type = field_accessor.GetType();
530        field.type = FieldTypeToPandasmType(field_type);
531
532        GetMetaData(&field, field_accessor.GetFieldId(), record->name == ark::SCOPE_NAME_RECORD);
533
534        record->field_list.push_back(std::move(field));
535    });
536}
537
538void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
539{
540    panda_file::ClassDataAccessor class_accessor {*file_, record_id};
541
542    class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
543        AddMethodToTables(method_accessor.GetMethodId());
544    });
545}
546
547void Disassembler::GetAnnotationElements(pandasm::Function &method, const panda_file::AnnotationDataAccessor &ada,
548                                         const std::string &annotation_name)
549{
550    uint32_t elem_count = ada.GetCount();
551    for (uint32_t i = 0; i < elem_count; i++) {
552        panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
553        const auto &elem_name =
554            std::string {reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data)};
555        panda_file::AnnotationDataAccessor::Tag tag = ada.GetTag(i);
556        auto value_type = pandasm::Value::GetCharAsType(tag.GetItem());
557        switch (value_type) {
558            case pandasm::Value::Type::U1: {
559                bool ann_elem_value = adae.GetScalarValue().Get<bool>();
560                AddAnnotationElement<bool>(method, annotation_name, elem_name, ann_elem_value);
561                break;
562            }
563            case pandasm::Value::Type::U32: {
564                uint32_t ann_elem_value = adae.GetScalarValue().Get<uint32_t>();
565                AddAnnotationElement<uint32_t>(method, annotation_name, elem_name, ann_elem_value);
566                break;
567            }
568            case pandasm::Value::Type::F64: {
569                double ann_elem_value = adae.GetScalarValue().Get<double>();
570                AddAnnotationElement<double>(method, annotation_name, elem_name, ann_elem_value);
571                break;
572            }
573            case pandasm::Value::Type::STRING: {
574                uint32_t string_id = adae.GetScalarValue().Get<uint32_t>();
575                std::string_view ann_elem_value {
576                    reinterpret_cast<const char *>(file_->GetStringData(panda_file::File::EntityId(string_id)).data)};
577                AddAnnotationElement<std::string_view>(method, annotation_name, elem_name, ann_elem_value);
578                break;
579            }
580            case pandasm::Value::Type::LITERALARRAY: {
581                uint32_t literalArray_offset = adae.GetScalarValue().Get<uint32_t>();
582                AddAnnotationElement<panda::pandasm::LiteralArray, std::string_view>(
583                    method, annotation_name, elem_name, std::string_view {std::to_string(literalArray_offset)});
584                break;
585            }
586            default:
587                UNREACHABLE();
588        }
589    }
590}
591
592void Disassembler::GetMethodAnnotations(pandasm::Function &method, const panda_file::File::EntityId &method_id)
593{
594    panda_file::MethodDataAccessor mda(*file_, method_id);
595    mda.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
596        panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
597        auto annotation_name =
598            std::string {reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data)};
599        annotation_name.pop_back(); // remove ; from annotation name
600
601        if (annotation_name.empty()) {
602            return;
603        }
604
605        std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
606        std::vector<pandasm::AnnotationElement> elements;
607        pandasm::AnnotationData ann_data(annotation_name, elements);
608        std::vector<pandasm::AnnotationData> annotations;
609        annotations.push_back(std::move(ann_data));
610        method.metadata->AddAnnotations(annotations);
611
612        GetAnnotationElements(method, ada, annotation_name);
613    });
614}
615
616template <typename T, typename U = T>
617void Disassembler::AddAnnotationElement(pandasm::Function &method, const std::string &annotation_name,
618                                        const std::string &key, const U &value)
619{
620    if (key.empty()) {
621        return;
622    }
623
624    std::unique_ptr<pandasm::Value> pandasmValue;
625    if constexpr (std::is_same<T, uint32_t>::value) {
626        pandasmValue = std::move(
627            std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(value)));
628    } else if constexpr (std::is_same<T, double>::value) {
629        pandasmValue = std::move(
630            std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(value)));
631    } else if constexpr (std::is_same<T, bool>::value) {
632        pandasmValue = std::move(
633            std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(value)));
634    } else if constexpr (std::is_same<T, std::string_view>::value) {
635        pandasmValue = std::move(
636            std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(value)));
637    } else if constexpr (std::is_same<T, panda::pandasm::LiteralArray>::value) {
638        static_assert(std::is_same<U, std::string_view>::value);
639        pandasmValue = std::move(std::make_unique<pandasm::ScalarValue>(
640            pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(value)));
641    } else {
642        UNREACHABLE();
643    }
644
645    std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
646    const auto ann_iter =
647        std::find_if(method_annotation.begin(), method_annotation.end(),
648                     [&](pandasm::AnnotationData &ann) -> bool { return ann.GetName() == annotation_name; });
649
650    pandasm::AnnotationElement annotation_element(key, std::move(pandasmValue));
651    ann_iter->AddElement(std::move(annotation_element));
652    method.metadata->SetAnnotations(std::move(method_annotation));
653}
654
655std::optional<std::vector<std::string>> Disassembler::GetAnnotationByMethodName(const std::string &method_name) const
656{
657    const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
658    bool is_signature = method_synonyms_iter != prog_.function_synonyms.end();
659    if (!is_signature) {
660        return std::nullopt;
661    }
662
663    const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
664    bool is_method = method_iter != prog_.function_table.end();
665    const auto annotations = method_iter->second.metadata->GetAnnotations();
666    if (!is_method || annotations.empty()) {
667        return std::nullopt;
668    }
669
670    std::vector<std::string> ann;
671    for (const auto &ann_data : annotations) {
672        ann.emplace_back(ann_data.GetName());
673    }
674    return ann;
675}
676
677std::optional<std::string> Disassembler::GetSerializedMethodAnnotation(const std::string &method_name,
678                                                                       const std::string &anno_name) const
679{
680    const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
681    if (method_synonyms_iter == prog_.function_synonyms.end()) {
682        return std::nullopt;
683    }
684
685    const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
686    if (method_iter == prog_.function_table.end()) {
687        return std::nullopt;
688    }
689
690    const auto annotations = method_iter->second.metadata->GetAnnotations();
691    if (annotations.empty()) {
692        return std::nullopt;
693    }
694
695    const auto annotation_iter =
696        std::find_if(annotations.begin(), annotations.end(),
697                     [&](const pandasm::AnnotationData &ann) -> bool { return ann.GetName() == anno_name; });
698    if (annotation_iter == annotations.end()) {
699        return std::nullopt;
700    }
701
702    std::ostringstream os;
703    SerializeMethodAnnotation(*annotation_iter, os);
704    return os.str();
705}
706
707std::optional<std::string> Disassembler::GetSerializedRecord(const std::string &record_name) const
708{
709    const auto record_iter = prog_.record_table.find(record_name);
710    if (record_iter == prog_.record_table.end()) {
711        return std::nullopt;
712    }
713    std::ostringstream os;
714    Serialize(record_iter->second, os, false);
715    return os.str();
716}
717
718std::vector<std::string> Disassembler::GetStrings() const
719{
720    std::vector<std::string> strings;
721    for (auto &str_info : string_offset_to_name_) {
722        strings.emplace_back(str_info.second);
723    }
724
725    return strings;
726}
727
728std::vector<std::string> Disassembler::GetModuleLiterals() const
729{
730    std::vector<std::string> module_literals;
731    for (auto &module_array : modulearray_table_) {
732        for (auto &module : module_array.second) {
733            module_literals.emplace_back(module);
734        }
735    }
736
737    return module_literals;
738}
739
740void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &code_id) const
741{
742    /**
743     * frame size - 2^16 - 1
744     */
745    static const uint32_t MAX_ARG_NUM = 0xFFFF;
746
747    LOG(DEBUG, DISASSEMBLER) << "[getting params number]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
748
749    if (method == nullptr) {
750        LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
751
752        return;
753    }
754
755    panda_file::CodeDataAccessor code_accessor(*file_, code_id);
756
757    auto params_num = code_accessor.GetNumArgs();
758    if (params_num > MAX_ARG_NUM) {
759        LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
760                                 << "). number of function's arguments (" << std::dec << params_num
761                                 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
762
763        return;
764    }
765
766    method->return_type = pandasm::Type("any", 0);
767
768    for (uint8_t i = 0; i < params_num; i++) {
769        method->params.push_back(pandasm::Function::Parameter(pandasm::Type("any", 0), file_language_));
770    }
771}
772
773LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
774                                       panda_file::File::EntityId code_id) const
775{
776    LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
777
778    if (method == nullptr) {
779        LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
780        return LabelTable {};
781    }
782
783    panda_file::CodeDataAccessor code_accessor(*file_, code_id);
784
785    const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
786    const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
787
788    size_t try_idx = 0;
789    LabelTable label_table {};
790    code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
791        pandasm::Function::CatchBlock catch_block_pa {};
792        if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
793            return false;
794        }
795        size_t catch_idx = 0;
796        try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
797            auto class_idx = catch_block.GetTypeIdx();
798            if (class_idx == panda_file::INVALID_INDEX) {
799                catch_block_pa.exception_record = "";
800            } else {
801                const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
802                catch_block_pa.exception_record = GetFullRecordName(class_id);
803            }
804            if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
805                                  catch_idx)) {
806                return false;
807            }
808
809            method->catch_blocks.push_back(catch_block_pa);
810            catch_block_pa.catch_begin_label = "";
811            catch_block_pa.catch_end_label = "";
812            catch_idx++;
813
814            return true;
815        });
816        try_idx++;
817
818        return true;
819    });
820
821    return label_table;
822}
823
824static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
825{
826    size_t count = 0;
827
828    while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
829        count++;
830        bc_ins_first = bc_ins_first.GetNext();
831        if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
832            return std::numeric_limits<size_t>::max();
833        }
834    }
835
836    return count;
837}
838
839bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
840                                  const panda_file::CodeDataAccessor::TryBlock &try_block,
841                                  pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
842                                  size_t try_idx) const
843{
844    const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
845    const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
846
847    const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
848    const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
849
850    const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
851    const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
852    const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
853    const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
854
855    if (!try_begin_offset_in_range || !try_begin_offset_valid) {
856        LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
857                                 << try_begin_bc_ins.GetAddress();
858        return false;
859    } else {
860        std::stringstream ss {};
861        ss << "try_begin_label_" << try_idx;
862
863        LabelTable::iterator it = label_table->find(try_begin_idx);
864        if (it == label_table->end()) {
865            catch_block_pa->try_begin_label = ss.str();
866            label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
867        } else {
868            catch_block_pa->try_begin_label = it->second;
869        }
870    }
871
872    if (!try_end_offset_in_range || !try_end_offset_valid) {
873        LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
874                                 << try_end_bc_ins.GetAddress();
875        return false;
876    } else {
877        std::stringstream ss {};
878        ss << "try_end_label_" << try_idx;
879
880        LabelTable::iterator it = label_table->find(try_end_idx);
881        if (it == label_table->end()) {
882            catch_block_pa->try_end_label = ss.str();
883            label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
884        } else {
885            catch_block_pa->try_end_label = it->second;
886        }
887    }
888
889    return true;
890}
891
892bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
893                                    const panda_file::CodeDataAccessor::CatchBlock &catch_block,
894                                    pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
895                                    size_t try_idx, size_t catch_idx) const
896{
897    const auto handler_begin_offset = catch_block.GetHandlerPc();
898    const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
899
900    const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
901    const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
902
903    const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
904    const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
905
906    const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
907    const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
908    const bool handler_end_present = catch_block.GetCodeSize() != 0;
909    const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
910    const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
911
912    if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
913        LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
914                                 << handler_begin_bc_ins.GetAddress();
915        return false;
916    } else {
917        std::stringstream ss {};
918        ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
919
920        LabelTable::iterator it = label_table->find(handler_begin_idx);
921        if (it == label_table->end()) {
922            catch_block_pa->catch_begin_label = ss.str();
923            label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
924        } else {
925            catch_block_pa->catch_begin_label = it->second;
926        }
927    }
928
929    if (!handler_end_offset_in_range || !handler_end_offset_valid) {
930        LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
931                                 << handler_end_bc_ins.GetAddress();
932        return false;
933    } else if (handler_end_present) {
934        std::stringstream ss {};
935        ss << "handler_end_label_" << try_idx << "_" << catch_idx;
936
937        LabelTable::iterator it = label_table->find(handler_end_idx);
938        if (it == label_table->end()) {
939            catch_block_pa->catch_end_label = ss.str();
940            label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
941        } else {
942            catch_block_pa->catch_end_label = it->second;
943        }
944    }
945
946    return true;
947}
948
949void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
950{
951    LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
952                             << ")";
953
954    if (method == nullptr) {
955        LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
956
957        return;
958    }
959
960    panda_file::MethodDataAccessor method_accessor(*file_, method_id);
961
962    const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
963
964    if (!method_accessor.IsStatic()) {
965        const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
966        auto this_type = pandasm::Type::FromDescriptor(class_name);
967
968        LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
969                                 << "\') is not static. emplacing self-argument of type " << this_type.GetName();
970
971        method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, file_language_));
972    } else {
973        method->metadata->SetAttribute("static");
974    }
975
976    if (file_->IsExternal(method_accessor.GetMethodId())) {
977        method->metadata->SetAttribute("external");
978    }
979
980    std::string ctor_name = panda::panda_file::GetCtorName(file_language_);
981    std::string cctor_name = panda::panda_file::GetCctorName(file_language_);
982
983    const bool is_ctor = (method_name_raw == ctor_name);
984    const bool is_cctor = (method_name_raw == cctor_name);
985
986    if (is_ctor) {
987        method->metadata->SetAttribute("ctor");
988        method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
989    } else if (is_cctor) {
990        method->metadata->SetAttribute("cctor");
991        method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
992    }
993}
994
995void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
996{
997    LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
998                             << ")";
999
1000    if (record == nullptr) {
1001        LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
1002
1003        return;
1004    }
1005
1006    if (file_->IsExternal(record_id)) {
1007        record->metadata->SetAttribute("external");
1008    }
1009}
1010
1011void Disassembler::GetMetadataFieldValue(panda_file::FieldDataAccessor &field_accessor, pandasm::Field *field,
1012                                         bool isScopeNamesRecord)
1013{
1014    if (field->type.GetId() == panda_file::Type::TypeId::U32) {
1015        const auto offset = field_accessor.GetValue<uint32_t>().value();
1016        bool isScopeNameField = isScopeNamesRecord || field->name == ark::SCOPE_NAMES;
1017        if (field->name == ark::MODULE_REQUEST_PAHSE_IDX) {
1018            module_request_phase_literals_.insert(offset);
1019        } else if (field->name != ark::TYPE_SUMMARY_FIELD_NAME && !isScopeNameField) {
1020            LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
1021                                     << " is excluded";
1022            module_literals_.insert(offset);
1023        }
1024        field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
1025    } else if (field->type.GetId() == panda_file::Type::TypeId::U8) {
1026        const uint8_t val = field_accessor.GetValue<uint8_t>().value();
1027        field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
1028    } else if (field->type.GetId() == panda_file::Type::TypeId::F64) {
1029        std::optional<double> val = field_accessor.GetValue<double>();
1030        if (val.has_value()) {
1031            field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(val.value()));
1032        }
1033    } else if (field->type.GetId() == panda_file::Type::TypeId::U1) {
1034        std::optional<bool> val = field_accessor.GetValue<bool>();
1035        if (val.has_value()) {
1036            field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(val.value()));
1037        }
1038    } else if (field->type.GetId() == panda_file::Type::TypeId::REFERENCE && field->type.GetName() == "panda.String") {
1039        std::optional<uint32_t> string_offset_val = field_accessor.GetValue<uint32_t>();
1040        if (string_offset_val.has_value()) {
1041            std::string_view val {reinterpret_cast<const char *>(
1042                file_->GetStringData(panda_file::File::EntityId(string_offset_val.value())).data)};
1043            field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(val));
1044        }
1045    } else if (field->type.GetRank() > 0) {
1046        std::optional<uint32_t> litarray_offset_val = field_accessor.GetValue<uint32_t>();
1047        if (litarray_offset_val.has_value()) {
1048            field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(
1049                std::string_view {std::to_string(litarray_offset_val.value())}));
1050        }
1051    } else {
1052        UNREACHABLE();
1053    }
1054}
1055
1056void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id,
1057                               bool is_scope_names_record)
1058{
1059    LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
1060
1061    if (field == nullptr) {
1062        LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
1063
1064        return;
1065    }
1066
1067    panda_file::FieldDataAccessor field_accessor(*file_, field_id);
1068
1069    if (field_accessor.IsExternal()) {
1070        field->metadata->SetAttribute("external");
1071    }
1072
1073    if (field_accessor.IsStatic()) {
1074        field->metadata->SetAttribute("static");
1075    }
1076
1077    GetMetadataFieldValue(field_accessor, field, is_scope_names_record);
1078}
1079
1080std::string Disassembler::AnnotationTagToString(const char tag) const
1081{
1082    switch (tag) {
1083        case '1':
1084            return "u1";
1085        case '2':
1086            return "i8";
1087        case '3':
1088            return "u8";
1089        case '4':
1090            return "i16";
1091        case '5':
1092            return "u16";
1093        case '6':
1094            return "i32";
1095        case '7':
1096            return "u32";
1097        case '8':
1098            return "i64";
1099        case '9':
1100            return "u64";
1101        case 'A':
1102            return "f32";
1103        case 'B':
1104            return "f64";
1105        case 'C':
1106            return "string";
1107        case 'D':
1108            return "record";
1109        case 'E':
1110            return "method";
1111        case 'F':
1112            return "enum";
1113        case 'G':
1114            return "annotation";
1115        case 'I':
1116            return "void";
1117        case 'J':
1118            return "method_handle";
1119        case 'K':
1120            return "u1[]";
1121        case 'L':
1122            return "i8[]";
1123        case 'M':
1124            return "u8[]";
1125        case 'N':
1126            return "i16[]";
1127        case 'O':
1128            return "u16[]";
1129        case 'P':
1130            return "i32[]";
1131        case 'Q':
1132            return "u32[]";
1133        case 'R':
1134            return "i64[]";
1135        case 'S':
1136            return "u64[]";
1137        case 'T':
1138            return "f32[]";
1139        case 'U':
1140            return "f64[]";
1141        case 'V':
1142            return "string[]";
1143        case 'W':
1144            return "record[]";
1145        case 'X':
1146            return "method[]";
1147        case 'Y':
1148            return "enum[]";
1149        case 'Z':
1150            return "annotation[]";
1151        case '@':
1152            return "method_handle[]";
1153        case '*':
1154            return "nullptr string";
1155        default:
1156            return std::string();
1157    }
1158}
1159
1160std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1161{
1162    std::stringstream ss;
1163
1164    if (type == "i8") {
1165        int8_t res = value.Get<int8_t>();
1166        ss << static_cast<int>(res);
1167    } else if (type == "u1" || type == "u8") {
1168        uint8_t res = value.Get<uint8_t>();
1169        ss << static_cast<unsigned int>(res);
1170    } else if (type == "i16") {
1171        ss << value.Get<int16_t>();
1172    } else if (type == "u16") {
1173        ss << value.Get<uint16_t>();
1174    } else if (type == "i32") {
1175        ss << value.Get<int32_t>();
1176    } else if (type == "u32") {
1177        ss << value.Get<uint32_t>();
1178    } else if (type == "i64") {
1179        ss << value.Get<int64_t>();
1180    } else if (type == "u64") {
1181        ss << value.Get<uint64_t>();
1182    } else if (type == "f32") {
1183        ss << value.Get<float>();
1184    } else if (type == "f64") {
1185        ss << value.Get<double>();
1186    } else if (type == "string") {
1187        const auto id = value.Get<panda_file::File::EntityId>();
1188        ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1189    } else if (type == "record") {
1190        const auto id = value.Get<panda_file::File::EntityId>();
1191        ss << GetFullRecordName(id);
1192    } else if (type == "method") {
1193        const auto id = value.Get<panda_file::File::EntityId>();
1194        AddMethodToTables(id);
1195        ss << GetMethodSignature(id);
1196    } else if (type == "enum") {
1197        const auto id = value.Get<panda_file::File::EntityId>();
1198        panda_file::FieldDataAccessor field_accessor(*file_, id);
1199        ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1200           << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1201    } else if (type == "annotation") {
1202        const auto id = value.Get<panda_file::File::EntityId>();
1203        ss << "id_" << id;
1204    } else if (type == "void") {
1205        return std::string();
1206    } else if (type == "method_handle") {
1207    }
1208
1209    return ss.str();
1210}
1211
1212std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1213                                             const size_t idx)
1214{
1215    std::stringstream ss;
1216
1217    if (type == "i8") {
1218        int8_t res = value.Get<int8_t>(idx);
1219        ss << static_cast<int>(res);
1220    } else if (type == "u1" || type == "u8") {
1221        uint8_t res = value.Get<uint8_t>(idx);
1222        ss << static_cast<unsigned int>(res);
1223    } else if (type == "i16") {
1224        ss << value.Get<int16_t>(idx);
1225    } else if (type == "u16") {
1226        ss << value.Get<uint16_t>(idx);
1227    } else if (type == "i32") {
1228        ss << value.Get<int32_t>(idx);
1229    } else if (type == "u32") {
1230        ss << value.Get<uint32_t>(idx);
1231    } else if (type == "i64") {
1232        ss << value.Get<int64_t>(idx);
1233    } else if (type == "u64") {
1234        ss << value.Get<uint64_t>(idx);
1235    } else if (type == "f32") {
1236        ss << value.Get<float>(idx);
1237    } else if (type == "f64") {
1238        ss << value.Get<double>(idx);
1239    } else if (type == "string") {
1240        const auto id = value.Get<panda_file::File::EntityId>(idx);
1241        ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1242    } else if (type == "record") {
1243        const auto id = value.Get<panda_file::File::EntityId>(idx);
1244        ss << GetFullRecordName(id);
1245    } else if (type == "method") {
1246        const auto id = value.Get<panda_file::File::EntityId>(idx);
1247        AddMethodToTables(id);
1248        ss << GetMethodSignature(id);
1249    } else if (type == "enum") {
1250        const auto id = value.Get<panda_file::File::EntityId>(idx);
1251        panda_file::FieldDataAccessor field_accessor(*file_, id);
1252        ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1253           << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1254    } else if (type == "annotation") {
1255        const auto id = value.Get<panda_file::File::EntityId>(idx);
1256        ss << "id_" << id;
1257    } else if (type == "method_handle") {
1258    } else if (type == "nullptr string") {
1259    }
1260
1261    return ss.str();
1262}
1263
1264std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
1265{
1266    panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1267
1268    const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
1269
1270    std::string class_name = GetFullRecordName(method_accessor.GetClassId());
1271    if (IsSystemType(class_name)) {
1272        class_name = "";
1273    } else {
1274        class_name += ".";
1275    }
1276
1277    return class_name + method_name_raw;
1278}
1279
1280std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
1281{
1282    panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1283
1284    pandasm::Function method(GetFullMethodName(method_id), file_language_);
1285    if (method_accessor.GetCodeId().has_value()) {
1286        GetParams(&method, method_accessor.GetCodeId().value());
1287    }
1288    GetMetaData(&method, method_id);
1289
1290    return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1291}
1292
1293std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
1294{
1295    std::string name = StringDataToString(file_->GetStringData(class_id));
1296
1297    auto type = pandasm::Type::FromDescriptor(name);
1298    type = pandasm::Type(type.GetComponentName(), type.GetRank());
1299
1300    return type.GetPandasmName();
1301}
1302
1303void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
1304{
1305    constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1306
1307    if (file_->IsExternal(record_id)) {
1308        return;
1309    }
1310
1311    panda_file::ClassDataAccessor class_accessor {*file_, record_id};
1312    std::stringstream ss;
1313
1314    ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1315       << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
1316       << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
1317
1318    record_info->record_info = ss.str();
1319    ss.str(std::string());
1320
1321    class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
1322        ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1323           << field_accessor.GetFieldId();
1324
1325        record_info->fields_info.push_back(ss.str());
1326
1327        ss.str(std::string());
1328    });
1329}
1330
1331void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
1332{
1333    constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1334
1335    panda_file::MethodDataAccessor method_accessor {*file_, method_id};
1336    std::stringstream ss;
1337
1338    ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1339       << method_accessor.GetMethodId();
1340
1341    if (method_accessor.GetCodeId().has_value()) {
1342        ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1343           << method_accessor.GetCodeId().value();
1344
1345        GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1346    } else {
1347        ss << ", <no code>";
1348    }
1349
1350    method_info->method_info = ss.str();
1351
1352    if (method_accessor.GetCodeId()) {
1353        ASSERT(debug_info_extractor_ != nullptr);
1354        method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1355        method_info->column_number_table = debug_info_extractor_->GetColumnNumberTable(method_id);
1356        method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1357
1358        // Add information about parameters into the table
1359        panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1360        auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1361        uint32_t code_size = codeda.GetCodeSize();
1362        for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1363            panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1364            method_info->local_variable_table.emplace_back(arg_info);
1365        }
1366    }
1367}
1368
1369static bool IsArray(const panda_file::LiteralTag &tag)
1370{
1371    switch (tag) {
1372        case panda_file::LiteralTag::ARRAY_U1:
1373        case panda_file::LiteralTag::ARRAY_U8:
1374        case panda_file::LiteralTag::ARRAY_I8:
1375        case panda_file::LiteralTag::ARRAY_U16:
1376        case panda_file::LiteralTag::ARRAY_I16:
1377        case panda_file::LiteralTag::ARRAY_U32:
1378        case panda_file::LiteralTag::ARRAY_I32:
1379        case panda_file::LiteralTag::ARRAY_U64:
1380        case panda_file::LiteralTag::ARRAY_I64:
1381        case panda_file::LiteralTag::ARRAY_F32:
1382        case panda_file::LiteralTag::ARRAY_F64:
1383        case panda_file::LiteralTag::ARRAY_STRING:
1384            return true;
1385        default:
1386            return false;
1387    }
1388}
1389
1390std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1391{
1392    std::stringstream ret;
1393    if (lit_array.literals_.empty()) {
1394        return "";
1395    }
1396
1397    std::stringstream ss;
1398    ss << "{ ";
1399    const auto &tag = lit_array.literals_[0].tag_;
1400    if (IsArray(tag)) {
1401        ss << LiteralTagToString(tag);
1402    }
1403    ss << lit_array.literals_.size();
1404    ss << " [ ";
1405    SerializeValues(lit_array, ss);
1406    ss << "]}";
1407    return ss.str();
1408}
1409
1410void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1411{
1412    os << key << " ";
1413    os << SerializeLiteralArray(lit_array);
1414    os << "\n";
1415}
1416
1417void Disassembler::Serialize(const std::string &module_offset, const std::vector<std::string> &module_array,
1418                             std::ostream &os) const
1419{
1420    os << module_offset << " ";
1421    os << SerializeModuleLiteralArray(module_array);
1422    os << "\n";
1423}
1424
1425std::string Disassembler::SerializeModuleLiteralArray(const std::vector<std::string> &module_array) const
1426{
1427    if (module_array.empty()) {
1428        return "";
1429    }
1430
1431    std::stringstream ss;
1432    ss << "{ ";
1433    ss << (module_array.size() - 1); // Only needs to show the count of module tag, exclude module request array
1434    ss << " [\n";
1435    for (size_t index = 0; index < module_array.size(); index++) {
1436        ss << module_array[index] << ";\n";
1437    }
1438    ss << "]}";
1439    return ss.str();
1440}
1441
1442std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1443{
1444    switch (tag) {
1445        case panda_file::LiteralTag::BOOL:
1446        case panda_file::LiteralTag::ARRAY_U1:
1447            return "u1";
1448        case panda_file::LiteralTag::ARRAY_U8:
1449            return "u8";
1450        case panda_file::LiteralTag::ARRAY_I8:
1451            return "i8";
1452        case panda_file::LiteralTag::ARRAY_U16:
1453            return "u16";
1454        case panda_file::LiteralTag::ARRAY_I16:
1455            return "i16";
1456        case panda_file::LiteralTag::ARRAY_U32:
1457            return "u32";
1458        case panda_file::LiteralTag::INTEGER:
1459        case panda_file::LiteralTag::ARRAY_I32:
1460            return "i32";
1461        case panda_file::LiteralTag::ARRAY_U64:
1462            return "u64";
1463        case panda_file::LiteralTag::ARRAY_I64:
1464            return "i64";
1465        case panda_file::LiteralTag::ARRAY_F32:
1466            return "f32";
1467        case panda_file::LiteralTag::DOUBLE:
1468        case panda_file::LiteralTag::ARRAY_F64:
1469            return "f64";
1470        case panda_file::LiteralTag::STRING:
1471        case panda_file::LiteralTag::ARRAY_STRING:
1472            return "string";
1473        case panda_file::LiteralTag::METHOD:
1474            return "method";
1475        case panda_file::LiteralTag::GETTER:
1476            return "getter";
1477        case panda_file::LiteralTag::SETTER:
1478            return "setter";
1479        case panda_file::LiteralTag::GENERATORMETHOD:
1480            return "generator_method";
1481        case panda_file::LiteralTag::ACCESSOR:
1482            return "accessor";
1483        case panda_file::LiteralTag::METHODAFFILIATE:
1484            return "method_affiliate";
1485        case panda_file::LiteralTag::NULLVALUE:
1486            return "null_value";
1487        case panda_file::LiteralTag::TAGVALUE:
1488            return "tagvalue";
1489        case panda_file::LiteralTag::LITERALBUFFERINDEX:
1490            return "lit_index";
1491        case panda_file::LiteralTag::LITERALARRAY:
1492            return "lit_offset";
1493        case panda_file::LiteralTag::BUILTINTYPEINDEX:
1494            return "builtin_type";
1495        default:
1496            UNREACHABLE();
1497    }
1498}
1499
1500template <typename T>
1501void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1502{
1503    switch (lit_array.literals_[0].tag_) {
1504        case panda_file::LiteralTag::ARRAY_U1: {
1505            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1506                os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1507            }
1508            break;
1509        }
1510        case panda_file::LiteralTag::ARRAY_U8: {
1511            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1512                os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1513            }
1514            break;
1515        }
1516        case panda_file::LiteralTag::ARRAY_I8: {
1517            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1518                os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1519            }
1520            break;
1521        }
1522        case panda_file::LiteralTag::ARRAY_U16: {
1523            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1524                os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1525            }
1526            break;
1527        }
1528        case panda_file::LiteralTag::ARRAY_I16: {
1529            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1530                os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1531            }
1532            break;
1533        }
1534        case panda_file::LiteralTag::ARRAY_U32: {
1535            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1536                os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1537            }
1538            break;
1539        }
1540        case panda_file::LiteralTag::ARRAY_I32: {
1541            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1542                os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1543            }
1544            break;
1545        }
1546        case panda_file::LiteralTag::ARRAY_U64: {
1547            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1548                os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1549            }
1550            break;
1551        }
1552        case panda_file::LiteralTag::ARRAY_I64: {
1553            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1554                os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1555            }
1556            break;
1557        }
1558        case panda_file::LiteralTag::ARRAY_F32: {
1559            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1560                os << std::get<float>(lit_array.literals_[i].value_) << " ";
1561            }
1562            break;
1563        }
1564        case panda_file::LiteralTag::ARRAY_F64: {
1565            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1566                os << std::get<double>(lit_array.literals_[i].value_) << " ";
1567            }
1568            break;
1569        }
1570        case panda_file::LiteralTag::ARRAY_STRING: {
1571            for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1572                os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1573            }
1574            break;
1575        }
1576        default:
1577            SerializeLiterals(lit_array, os);
1578    }
1579}
1580
1581template <typename T>
1582void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1583{
1584    for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1585        const auto &tag = lit_array.literals_[i].tag_;
1586        os << LiteralTagToString(tag) << ":";
1587        const auto &val = lit_array.literals_[i].value_;
1588        switch (lit_array.literals_[i].tag_) {
1589            case panda_file::LiteralTag::BOOL: {
1590                os << std::get<bool>(val);
1591                break;
1592            }
1593            case panda_file::LiteralTag::LITERALBUFFERINDEX:
1594            case panda_file::LiteralTag::INTEGER: {
1595                os << bit_cast<int32_t>(std::get<uint32_t>(val));
1596                break;
1597            }
1598            case panda_file::LiteralTag::DOUBLE: {
1599                os << std::get<double>(val);
1600                break;
1601            }
1602            case panda_file::LiteralTag::STRING: {
1603                os << "\"" << std::get<std::string>(val) << "\"";
1604                break;
1605            }
1606            case panda_file::LiteralTag::METHOD:
1607            case panda_file::LiteralTag::GETTER:
1608            case panda_file::LiteralTag::SETTER:
1609            case panda_file::LiteralTag::GENERATORMETHOD: {
1610                os << std::get<std::string>(val);
1611                break;
1612            }
1613            case panda_file::LiteralTag::NULLVALUE:
1614            case panda_file::LiteralTag::ACCESSOR: {
1615                os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1616                break;
1617            }
1618            case panda_file::LiteralTag::METHODAFFILIATE: {
1619                os << std::get<uint16_t>(val);
1620                break;
1621            }
1622            case panda_file::LiteralTag::LITERALARRAY: {
1623                os << std::get<std::string>(val);
1624                break;
1625            }
1626            case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1627                os << static_cast<int16_t>(std::get<uint8_t>(val));
1628                break;
1629            }
1630            default:
1631                UNREACHABLE();
1632        }
1633        os << ", ";
1634    }
1635}
1636
1637void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1638{
1639    if (IsSystemType(record.name)) {
1640        return;
1641    }
1642
1643    os << ".record " << record.name;
1644
1645    const auto record_iter = prog_ann_.record_annotations.find(record.name);
1646    const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1647    if (record_in_table) {
1648        Serialize(*record.metadata, record_iter->second.ann_list, os);
1649    } else {
1650        Serialize(*record.metadata, {}, os);
1651    }
1652
1653    if (record.metadata->IsForeign()) {
1654        os << "\n\n";
1655        return;
1656    }
1657
1658    os << " {";
1659
1660    if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1661        os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1662        SerializeFields(record, os, true);
1663    } else {
1664        os << "\n";
1665        SerializeFields(record, os, false);
1666    }
1667
1668    os << "}\n\n";
1669}
1670
1671void Disassembler::DumpLiteralArray(const pandasm::LiteralArray &literal_array, std::stringstream &ss) const
1672{
1673    ss << "[";
1674    bool firstItem = true;
1675    for (const auto &item : literal_array.literals_) {
1676        if (!firstItem) {
1677            ss << ", ";
1678        } else {
1679            firstItem = false;
1680        }
1681
1682        switch (item.tag_) {
1683            case panda_file::LiteralTag::DOUBLE: {
1684                ss << std::get<double>(item.value_);
1685                break;
1686            }
1687            case panda_file::LiteralTag::BOOL: {
1688                ss << std::get<bool>(item.value_);
1689                break;
1690            }
1691            case panda_file::LiteralTag::STRING: {
1692                ss << "\"" << std::get<std::string>(item.value_) << "\"";
1693                break;
1694            }
1695            case panda_file::LiteralTag::LITERALARRAY: {
1696                std::string offset_str = std::get<std::string>(item.value_);
1697                uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16);
1698                pandasm::LiteralArray lit_array;
1699                GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1700                DumpLiteralArray(lit_array, ss);
1701                break;
1702            }
1703            case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1704                // By convention, BUILTINTYPEINDEX is used to store type of empty arrays,
1705                // therefore it has no value
1706                break;
1707            }
1708            default: {
1709                UNREACHABLE();
1710                break;
1711            }
1712        }
1713    }
1714    ss << "]";
1715}
1716
1717void Disassembler::SerializeFieldValue(const pandasm::Field &f, std::stringstream &ss) const
1718{
1719    if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1720        ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1721    } else if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1722        ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1723    } else if (f.type.GetId() == panda_file::Type::TypeId::F64) {
1724        ss << " = " << static_cast<double>(f.metadata->GetValue().value().GetValue<double>());
1725    } else if (f.type.GetId() == panda_file::Type::TypeId::U1) {
1726        ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<bool>());
1727    } else if (f.type.GetId() == panda_file::Type::TypeId::REFERENCE && f.type.GetName() == "panda.String") {
1728        ss << " = \"" << static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()) << "\"";
1729    } else if (f.type.GetRank() > 0) {
1730        uint32_t lit_array_fffset =
1731            std::stoi(static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()));
1732        pandasm::LiteralArray lit_array;
1733        GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1734        ss << " = ";
1735        DumpLiteralArray(lit_array, ss);
1736    }
1737}
1738
1739void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1740{
1741    constexpr size_t INFO_OFFSET = 80;
1742
1743    const auto record_iter = prog_ann_.record_annotations.find(record.name);
1744    const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1745
1746    const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1747
1748    size_t field_idx = 0;
1749
1750    std::stringstream ss;
1751    for (const auto &f : record.field_list) {
1752        std::string file = GetFileNameByPath(f.name);
1753        ss << "\t" << f.type.GetPandasmName() << " " << file;
1754        if (f.metadata->GetValue().has_value()) {
1755            SerializeFieldValue(f, ss);
1756        }
1757        if (record_in_table) {
1758            const auto field_iter = record_iter->second.field_annotations.find(f.name);
1759            if (field_iter != record_iter->second.field_annotations.end()) {
1760                Serialize(*f.metadata, field_iter->second, ss);
1761            } else {
1762                Serialize(*f.metadata, {}, ss);
1763            }
1764        } else {
1765            Serialize(*f.metadata, {}, ss);
1766        }
1767
1768        if (print_information) {
1769            os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1770        } else {
1771            os << ss.str() << "\n";
1772        }
1773
1774        ss.str(std::string());
1775        ss.clear();
1776
1777        field_idx++;
1778    }
1779}
1780
1781std::string Disassembler::getLiteralArrayTypeFromValue(const pandasm::LiteralArray &literal_array) const
1782{
1783    [[maybe_unused]] auto size = literal_array.literals_.size();
1784    ASSERT(size > 0);
1785    switch (literal_array.literals_[0].tag_) {
1786        case panda_file::LiteralTag::DOUBLE: {
1787            return "f64[]";
1788        }
1789        case panda_file::LiteralTag::BOOL: {
1790            return "u1[]";
1791        }
1792        case panda_file::LiteralTag::STRING: {
1793            return "panda.String[]";
1794        }
1795        case panda_file::LiteralTag::LITERALARRAY: {
1796            std::string offset_str = std::get<std::string>(literal_array.literals_[0].value_);
1797            uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16);
1798            pandasm::LiteralArray lit_array;
1799            GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1800            return getLiteralArrayTypeFromValue(lit_array) + "[]";
1801        }
1802        case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1803            uint8_t typeIndex = std::get<uint8_t>(literal_array.literals_[0].value_);
1804            static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE = 0;
1805            static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE = 1;
1806            static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE = 2;
1807            switch (typeIndex) {
1808                case EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE:
1809                    return "f64[]";
1810                case EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE:
1811                    return "u1[]";
1812                case EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE:
1813                    return "panda.String[]";
1814                default:
1815                    UNREACHABLE();
1816                    break;
1817            }
1818        }
1819        default: {
1820            UNREACHABLE();
1821            break;
1822        }
1823    }
1824}
1825
1826void Disassembler::SerializeAnnotationElement(const std::vector<pandasm::AnnotationElement> &elements,
1827                                              std::stringstream &ss, uint32_t idx) const
1828{
1829    for (const auto &elem : elements) {
1830        auto type = elem.GetValue()->GetType();
1831        if (type == pandasm::Value::Type::U32) {
1832            ss << "\t"
1833               << "u32"
1834               << " " << elem.GetName() << " { ";
1835            ss << "0x" << std::hex << elem.GetValue()->GetAsScalar()->GetValue<uint32_t>() << " }";
1836        } else if (type == pandasm::Value::Type::F64) {
1837            ss << "\t"
1838               << "f64"
1839               << " " << elem.GetName() << " { ";
1840            ss << elem.GetValue()->GetAsScalar()->GetValue<double>() << " }";
1841        } else if (type == pandasm::Value::Type::U1) {
1842            ss << "\t"
1843               << "u1"
1844               << " " << elem.GetName() << " { ";
1845            ss << elem.GetValue()->GetAsScalar()->GetValue<bool>() << " }";
1846        } else if (type == pandasm::Value::Type::STRING) {
1847            ss << "\t"
1848               << "panda.String"
1849               << " " << elem.GetName() << " { \"";
1850            ss << elem.GetValue()->GetAsScalar()->GetValue<std::string>() << "\" }";
1851        } else if (type == pandasm::Value::Type::LITERALARRAY) {
1852            uint32_t lit_array_fffset = std::stoi(elem.GetValue()->GetAsScalar()->GetValue<std::string>());
1853            pandasm::LiteralArray lit_array;
1854            GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1855            std::string typeName = getLiteralArrayTypeFromValue(lit_array);
1856            ss << "\t" << typeName << " " << elem.GetName() << " { ";
1857            DumpLiteralArray(lit_array, ss);
1858            ss << " }";
1859        } else {
1860            UNREACHABLE();
1861        }
1862        if (idx > 0) {
1863            ss << "\n";
1864        }
1865        --idx;
1866    }
1867}
1868
1869void Disassembler::SerializeMethodAnnotation(const pandasm::AnnotationData &ann, std::ostream &os) const
1870{
1871    os << ann.GetName() << ":\n";
1872    std::stringstream ss;
1873    std::vector<pandasm::AnnotationElement> elements = ann.GetElements();
1874    if (elements.empty()) {
1875        return;
1876    }
1877    uint32_t idx = elements.size() - 1;
1878    SerializeAnnotationElement(elements, ss, idx);
1879    os << ss.str() << "\n";
1880}
1881
1882void Disassembler::SerializeMethodAnnotations(const pandasm::Function &method, std::ostream &os) const
1883{
1884    const auto annotations = method.metadata->GetAnnotations();
1885    if (annotations.empty()) {
1886        return;
1887    }
1888
1889    for (const auto &ann : annotations) {
1890        SerializeMethodAnnotation(ann, os);
1891    }
1892}
1893
1894void Disassembler::SerializeInstructions(const pandasm::Function &method, std::ostream &os,
1895                                         const std::map<std::string, MethodInfo>::const_iterator &method_info_it,
1896                                         bool print_method_info) const
1897{
1898    std::string delim = ": ";
1899    size_t width = 0;
1900    if (print_method_info) {
1901        for (const auto &i : method.ins) {
1902            size_t ins_size = i.ToString().size();
1903            if (i.set_label) {
1904                ins_size = ins_size - i.label.size() - delim.length();
1905            }
1906
1907            if (ins_size > width && ins_size < ark::INSTRUCTION_WIDTH_LIMIT) {
1908                width = i.ToString().size();
1909            }
1910        }
1911    }
1912
1913    for (size_t i = 0; i < method.ins.size(); i++) {
1914        std::string ins = method.ins[i].ToString("", true, method.regs_num);
1915        if (method.ins[i].set_label) {
1916            size_t pos = ins.find(delim);
1917            std::string label = ins.substr(0, pos);
1918            ins.erase(0, pos + delim.length());
1919            os << label << ":\n";
1920        }
1921
1922        if (ins != "") {
1923            os << "\t" << std::setw(width) << std::left << ins;
1924            if (print_method_info && i < method_info_it->second.instructions_info.size()) {
1925                os << " # " << method_info_it->second.instructions_info.at(i);
1926            }
1927            os << "\n";
1928        }
1929    }
1930}
1931
1932void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1933{
1934    SerializeMethodAnnotations(method, os);
1935    os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1936
1937    if (method.params.size() > 0) {
1938        os << method.params[0].type.GetPandasmName() << " a0";
1939
1940        for (uint8_t i = 1; i < method.params.size(); i++) {
1941            os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1942        }
1943    }
1944    os << ")";
1945
1946    const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1947
1948    const auto method_iter = prog_ann_.method_annotations.find(signature);
1949    if (method_iter != prog_ann_.method_annotations.end()) {
1950        Serialize(*method.metadata, method_iter->second, os);
1951    } else {
1952        Serialize(*method.metadata, {}, os);
1953    }
1954
1955    auto method_info_it = prog_info_.methods_info.find(signature);
1956    bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1957    if (print_method_info) {
1958        os << " { # " << method_info_it->second.method_info << "\n#   CODE:\n";
1959    } else {
1960        os << " {\n";
1961    }
1962    SerializeInstructions(method, os, method_info_it, print_method_info);
1963
1964    if (method.catch_blocks.size() != 0) {
1965        os << "\n";
1966
1967        for (const auto &catch_block : method.catch_blocks) {
1968            Serialize(catch_block, os);
1969
1970            os << "\n";
1971        }
1972    }
1973
1974    if (print_method_info) {
1975        const MethodInfo &method_info = method_info_it->second;
1976        SerializeLineNumberTable(method_info.line_number_table, os);
1977        SerializeColumnNumberTable(method_info.column_number_table, os);
1978        SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1979    }
1980
1981    os << "}\n\n";
1982}
1983
1984void Disassembler::SerializeStrings(const panda_file::File::EntityId &offset, const std::string &name_value,
1985                                    std::ostream &os) const
1986{
1987    os << "[offset:0x" << std::hex << offset << ", name_value:" << name_value << "]" << std::endl;
1988}
1989
1990void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1991{
1992    if (catch_block.exception_record == "") {
1993        os << ".catchall ";
1994    } else {
1995        os << ".catch " << catch_block.exception_record << ", ";
1996    }
1997
1998    os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1999
2000    if (catch_block.catch_end_label != "") {
2001        os << ", " << catch_block.catch_end_label;
2002    }
2003}
2004
2005void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
2006{
2007    auto bool_attributes = meta.GetBoolAttributes();
2008    auto attributes = meta.GetAttributes();
2009    if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
2010        return;
2011    }
2012
2013    os << " <";
2014
2015    size_t size = bool_attributes.size();
2016    size_t idx = 0;
2017    for (const auto &attr : bool_attributes) {
2018        os << attr;
2019        ++idx;
2020
2021        if (!attributes.empty() || !ann_list.empty() || idx < size) {
2022            os << ", ";
2023        }
2024    }
2025
2026    size = attributes.size();
2027    idx = 0;
2028    for (const auto &[key, values] : attributes) {
2029        for (size_t i = 0; i < values.size(); i++) {
2030            os << key << "=" << values[i];
2031
2032            if (i < values.size() - 1) {
2033                os << ", ";
2034            }
2035        }
2036
2037        ++idx;
2038
2039        if (!ann_list.empty() || idx < size) {
2040            os << ", ";
2041        }
2042    }
2043
2044    size = ann_list.size();
2045    idx = 0;
2046    for (const auto &[key, value] : ann_list) {
2047        os << key << "=" << value;
2048
2049        ++idx;
2050
2051        if (idx < size) {
2052            os << ", ";
2053        }
2054    }
2055
2056    os << ">";
2057}
2058
2059void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
2060                                            std::ostream &os) const
2061{
2062    if (line_number_table.empty()) {
2063        return;
2064    }
2065
2066    os << "\n#   LINE_NUMBER_TABLE:\n";
2067    for (const auto &line_info : line_number_table) {
2068        os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
2069    }
2070}
2071
2072void Disassembler::SerializeColumnNumberTable(const panda_file::ColumnNumberTable &column_number_table,
2073                                              std::ostream &os) const
2074{
2075    if (column_number_table.empty()) {
2076        return;
2077    }
2078
2079    os << "\n#   COLUMN_NUMBER_TABLE:\n";
2080    for (const auto &column_info : column_number_table) {
2081        os << "#\tcolumn " << column_info.column << ": " << column_info.offset << "\n";
2082    }
2083}
2084
2085void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
2086                                               const pandasm::Function &method, std::ostream &os) const
2087{
2088    if (local_variable_table.empty()) {
2089        return;
2090    }
2091
2092    os << "\n#   LOCAL_VARIABLE_TABLE:\n";
2093    os << "#\t Start   End  Register           Name   Signature\n";
2094    const int START_WIDTH = 5;
2095    const int END_WIDTH = 4;
2096    const int REG_WIDTH = 8;
2097    const int NAME_WIDTH = 14;
2098    for (const auto &variable_info : local_variable_table) {
2099        std::ostringstream reg_stream;
2100        reg_stream << variable_info.reg_number << '(';
2101        if (variable_info.reg_number < 0) {
2102            reg_stream << "acc";
2103        } else {
2104            uint32_t vreg = variable_info.reg_number;
2105            uint32_t first_arg_reg = method.GetTotalRegs();
2106            if (vreg < first_arg_reg) {
2107                reg_stream << 'v' << vreg;
2108            } else {
2109                reg_stream << 'a' << vreg - first_arg_reg;
2110            }
2111        }
2112        reg_stream << ')';
2113
2114        os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << "  ";
2115        os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << "  ";
2116        os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
2117        os << std::setw(NAME_WIDTH) << std::right << variable_info.name << "   " << variable_info.type;
2118        if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
2119            os << " (" << variable_info.type_signature << ")";
2120        }
2121        os << "\n";
2122    }
2123}
2124
2125pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
2126{
2127    return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
2128}
2129
2130std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id, size_t idx) const
2131{
2132    std::stringstream name;
2133    const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
2134    std::string str_data = StringDataToString(file_->GetStringData(offset));
2135    if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
2136        name << GetMethodSignature(offset);
2137    } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
2138        name << '\"';
2139        name << str_data;
2140        name << '\"';
2141        string_offset_to_name_.emplace(offset, str_data);
2142    } else {
2143        ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
2144        pandasm::LiteralArray lit_array;
2145        GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
2146        name << SerializeLiteralArray(lit_array);
2147    }
2148
2149    return name.str();
2150}
2151
2152panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
2153{
2154    if (file_->IsExternal(class_id)) {
2155        return panda::panda_file::SourceLang::PANDA_ASSEMBLY;
2156    }
2157
2158    panda_file::ClassDataAccessor cda(*file_, class_id);
2159    return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
2160}
2161
2162static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
2163                                BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
2164                                panda_file::File::EntityId code_id)
2165{
2166    const int32_t jmp_offset = std::get<int64_t>(pa_ins->imms.at(0));
2167    const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
2168    if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
2169        size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
2170        if (idx != std::numeric_limits<size_t>::max()) {
2171            if (label_table->find(idx) == label_table->end()) {
2172                std::stringstream ss {};
2173                ss << "jump_label_" << label_table->size();
2174                (*label_table)[idx] = ss.str();
2175            }
2176
2177            pa_ins->imms.clear();
2178            pa_ins->ids.push_back(label_table->at(idx));
2179        } else {
2180            LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2181                                     << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
2182                                     << ": invalid jump offset 0x" << jmp_offset
2183                                     << " - jumping in the middle of another instruction!";
2184        }
2185    } else {
2186        LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2187                                 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
2188                                 << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
2189    }
2190}
2191
2192IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
2193                                     panda_file::File::EntityId code_id) const
2194{
2195    panda_file::CodeDataAccessor code_accessor(*file_, code_id);
2196
2197    const auto ins_sz = code_accessor.GetCodeSize();
2198    const auto ins_arr = code_accessor.GetInstructions();
2199
2200    method->regs_num = code_accessor.GetNumVregs();
2201
2202    auto bc_ins = BytecodeInstruction(ins_arr);
2203    const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
2204
2205    LabelTable label_table = GetExceptions(method, method_id, code_id);
2206
2207    IdList unknown_external_methods {};
2208
2209    while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
2210        if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
2211            LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2212                                     << "). bytecode instructions sequence corrupted for method " << method->name
2213                                     << "! went out of bounds";
2214
2215            break;
2216        }
2217
2218        auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
2219        if (pa_ins.IsJump()) {
2220            translateImmToLabel(&pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
2221        }
2222
2223        // check if method id is unknown external method. if so, emplace it in table
2224        if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
2225            const auto arg_method_idx = bc_ins.GetId().AsIndex();
2226            const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
2227
2228            const auto arg_method_signature = GetMethodSignature(arg_method_id);
2229
2230            const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
2231            const bool is_external = file_->IsExternal(arg_method_id);
2232            if (is_external && !is_present) {
2233                unknown_external_methods.push_back(arg_method_id);
2234            }
2235        }
2236
2237        method->AddInstruction(pa_ins);
2238        bc_ins = bc_ins.GetNext();
2239    }
2240
2241    size_t instruction_count = method->ins.size();
2242    for (const auto &pair : label_table) {
2243        if (pair.first > instruction_count) {
2244            LOG(ERROR, DISASSEMBLER) << "> Wrong label index got, count of instructions is " << instruction_count
2245                                     << ", but the label index is " << pair.first;
2246            continue;
2247        }
2248
2249        // In some case, the end label can be after the last instruction
2250        // Creating an invalid instruction for the label to make sure it can be serialized
2251        if (pair.first == instruction_count) {
2252            pandasm::Ins ins {};
2253            ins.opcode = pandasm::Opcode::INVALID;
2254            method->AddInstruction(ins);
2255        }
2256
2257        method->ins[pair.first].label = pair.second;
2258        method->ins[pair.first].set_label = true;
2259    }
2260
2261    return unknown_external_methods;
2262}
2263
2264std::vector<size_t> Disassembler::GetColumnNumber()
2265{
2266    std::vector<size_t> columnNumber;
2267    for (const auto &method_info : prog_info_.methods_info) {
2268        for (const auto &column_number : method_info.second.column_number_table) {
2269            columnNumber.push_back(column_number.column);
2270        }
2271    }
2272    return columnNumber;
2273}
2274
2275std::vector<size_t> Disassembler::GetLineNumber()
2276{
2277    std::vector<size_t> lineNumber;
2278    for (const auto &method_info : prog_info_.methods_info) {
2279        for (const auto &line_number : method_info.second.line_number_table) {
2280            lineNumber.push_back(line_number.line);
2281        }
2282    }
2283    return lineNumber;
2284}
2285
2286}  // namespace panda::disasm
2287