1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "collect_util.h"
17 
18 #include <codecvt>
19 #include <locale>
20 
21 namespace panda::libpandafile {
22 
23 /**
24  * processed_ids: The literal array ids are collected from field and ins, needn't process nest.
25  * nest_unprocessed_ids: The literal array ids are collected from ins, need process nest.
26  */
CollectLiteralArray(const panda_file::File &file_, std::unordered_set<uint32_t> &processed_ids)27 void CollectUtil::CollectLiteralArray(const panda_file::File &file_, std::unordered_set<uint32_t> &processed_ids)
28 {
29     std::unordered_set<uint32_t> nest_unprocessed_ids;
30 
31     for (uint32_t id : file_.GetClasses()) {
32         panda_file::File::EntityId class_id(id);
33         if (file_.IsExternal(class_id)) {
34             continue;
35         }
36         panda_file::ClassDataAccessor class_data_accessor(file_, class_id);
37         CollectClassLiteralArray(class_data_accessor, processed_ids, nest_unprocessed_ids);
38     }
39     ProcessNestLiteralArray(file_, processed_ids, nest_unprocessed_ids);
40 }
41 
CollectClassLiteralArray(panda_file::ClassDataAccessor &class_data_accessor, std::unordered_set<uint32_t> &processed_ids, std::unordered_set<uint32_t> &nest_unprocessed_ids)42 void CollectUtil::CollectClassLiteralArray(panda_file::ClassDataAccessor &class_data_accessor,
43                                            std::unordered_set<uint32_t> &processed_ids,
44                                            std::unordered_set<uint32_t> &nest_unprocessed_ids)
45 {
46     panda_file::File::StringData csd = class_data_accessor.GetName();
47     const char *cn = utf::Mutf8AsCString(csd.data);
48     class_data_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
49         panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
50         panda_file::File::StringData fsd = class_data_accessor.GetPandaFile().GetStringData(field_name_id);
51         const char *fn = utf::Mutf8AsCString(fsd.data);
52         if (std::strcmp(cn, ES_MODULE_RECORD.data()) != 0 &&
53             std::strcmp(cn, ES_SCOPE_NAMES_RECORD.data()) != 0 &&
54             std::strcmp(fn, SCOPE_NAMES.data()) != 0 &&
55             std::strcmp(fn, MODULE_RECORD_IDX.data()) != 0) {
56             return;
57         }
58         auto module_offset = field_accessor.GetValue<uint32_t>().value();
59         processed_ids.emplace(module_offset);
60     });
61     class_data_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
62         if (!method_accessor.GetCodeId().has_value()) {
63             return;
64         }
65         panda_file::File::EntityId method_id = method_accessor.GetMethodId();
66         panda_file::File::EntityId code_id = method_accessor.GetCodeId().value();
67         panda_file::CodeDataAccessor code_data_accessor {class_data_accessor.GetPandaFile(), code_id};
68         uint32_t ins_size_ = code_data_accessor.GetCodeSize();
69         const uint8_t *ins_arr = code_data_accessor.GetInstructions();
70         auto bc_ins = panda::BytecodeInst<BytecodeInstMode::FAST>(ins_arr);
71         const auto bc_ins_last = bc_ins.JumpTo(ins_size_);
72         while (bc_ins.GetAddress() < bc_ins_last.GetAddress()) {
73             if (!bc_ins.IsPrimaryOpcodeValid()) {
74                 LOG(FATAL, PANDAFILE) << "Fail to verify primary opcode!";
75             }
76             if (bc_ins.HasFlag(panda::BytecodeInst<BytecodeInstMode::FAST>::Flags::LITERALARRAY_ID)) {
77                 const auto literal_id =
78                     GetLiteralArrayIdInBytecodeInst(class_data_accessor.GetPandaFile(), method_id, bc_ins);
79                 nest_unprocessed_ids.insert(literal_id.GetOffset());
80             }
81             bc_ins = bc_ins.GetNext();
82         }
83     });
84 }
85 
ProcessNestLiteralArray(const panda_file::File &file_, std::unordered_set<uint32_t> &processed_ids, std::unordered_set<uint32_t> &nest_unprocessed_ids)86 void CollectUtil::ProcessNestLiteralArray(const panda_file::File &file_, std::unordered_set<uint32_t> &processed_ids,
87                                           std::unordered_set<uint32_t> &nest_unprocessed_ids)
88 {
89     if (nest_unprocessed_ids.empty()) {
90         return;
91     }
92 
93     panda_file::File::EntityId lit_array_invalid(panda_file::INVALID_OFFSET);
94     panda_file::LiteralDataAccessor literal_data_accessor {file_, lit_array_invalid};
95     while (!nest_unprocessed_ids.empty()) {
96         auto nest_unprocess_id_iterator = nest_unprocessed_ids.begin();
97         uint32_t nest_unprocess_id = *nest_unprocess_id_iterator;
98         processed_ids.emplace(nest_unprocess_id);
99         panda_file::File::EntityId nest_unprocess_id_entity_id(nest_unprocess_id);
100         literal_data_accessor.EnumerateLiteralVals(
101             nest_unprocess_id_entity_id,
102             [processed_ids, &nest_unprocessed_ids](const panda_file::LiteralDataAccessor::LiteralValue &value,
103                                                  const panda_file::LiteralTag &tag) {
104                 if (tag != panda_file::LiteralTag::LITERALARRAY) {
105                     return;
106                 }
107                 uint32_t idx = std::get<uint32_t>(value);
108                 if ((processed_ids.find(idx) != processed_ids.end()) ||
109                     (nest_unprocessed_ids.find(idx) != nest_unprocessed_ids.end())) {
110                     return;
111                 }
112                 nest_unprocessed_ids.emplace(idx);
113             });
114         nest_unprocessed_ids.erase(nest_unprocess_id);
115     }
116 }
117 
GetLiteralArrayIdInBytecodeInst( const panda_file::File &file_, panda_file::File::EntityId method_id, panda::BytecodeInst<BytecodeInstMode::FAST> bc_ins)118 panda_file::File::EntityId CollectUtil::GetLiteralArrayIdInBytecodeInst(
119     const panda_file::File &file_, panda_file::File::EntityId method_id,
120     panda::BytecodeInst<BytecodeInstMode::FAST> bc_ins)
121 {
122     size_t idx = bc_ins.GetLiteralIndex();
123     if (idx < 0) {
124         LOG(FATAL, PANDAFILE) << "Fail to verify ID Index!";
125     }
126     const auto arg_literal_idx = bc_ins.GetId(idx).AsIndex();
127     const auto literal_id = file_.ResolveMethodIndex(method_id, arg_literal_idx);
128     return literal_id;
129 }
130 
131 }  // namespace panda::libpandafile