1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "watchdog_inner.h"
17 
18 #include <cerrno>
19 #include <climits>
20 #include <mutex>
21 
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <pthread.h>
26 #include <unistd.h>
27 #include <csignal>
28 #include <string>
29 
30 #include <securec.h>
31 #include <dlfcn.h>
32 
33 #include "backtrace_local.h"
34 #ifdef HISYSEVENT_ENABLE
35 #include "hisysevent.h"
36 #endif
37 #include "ipc_skeleton.h"
38 #include "xcollie_utils.h"
39 #include "xcollie_define.h"
40 #include "dfx_define.h"
41 #include "parameter.h"
42 
43 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
44 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
45 namespace OHOS {
46 namespace HiviewDFX {
47 namespace {
48 enum DumpStackState {
49     DEFAULT = 0,
50     COMPLETE = 1,
51     SAMPLE_COMPLETE = 2
52 };
53 constexpr char IPC_CHECKER[] = "IpcChecker";
54 constexpr char STACK_CHECKER[] = "ThreadSampler";
55 constexpr char TRACE_CHECKER[] = "TraceCollector";
56 constexpr int64_t ONE_DAY_LIMIT = 86400000;
57 constexpr int64_t ONE_HOUR_LIMIT = 3600000;
58 constexpr int MILLISEC_TO_NANOSEC = 1000000;
59 const int FFRT_BUFFER_SIZE = 512 * 1024;
60 const int DETECT_STACK_COUNT = 2;
61 const int COLLECT_STACK_COUNT = 10;
62 const int COLLECT_TRACE_MIN = 1;
63 const int COLLECT_TRACE_MAX = 20;
64 const int TASK_INTERVAL = 155;
65 const int DURATION_TIME = 150;
66 const int DISTRIBUTE_TIME = 2000;
67 const int DUMPTRACE_TIME = 450;
68 constexpr const char* const KEY_SCB_STATE = "com.ohos.sceneboard";
69 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
70 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
71 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
72 constexpr uint32_t TIME_MS_TO_S = 1000;
73 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
74 constexpr int32_t FOUNDATION_UID = 5523;
75 constexpr int32_t RENDER_SERVICE_UID = 1003;
76 constexpr int SERVICE_WARNING = 1;
77 const char* SYS_KERNEL_HUNGTASK_USERLIST = "/sys/kernel/hungtask/userlist";
78 const char* HMOS_HUNGTASK_USERLIST = "/proc/sys/hguard/user_list";
79 const int32_t NOT_OPEN = -1;
80 constexpr uint64_t MAX_START_TIME = 10 * 1000;
81 const char* LIB_THREAD_SAMPLER_PATH = "libthread_sampler.z.so";
82 constexpr size_t STACK_LENGTH = 32 * 1024;
83 }
84 std::mutex WatchdogInner::lockFfrt_;
85 static uint64_t g_nextKickTime = GetCurrentTickMillseconds();
86 static int32_t g_fd = NOT_OPEN;
87 static bool g_existFile = true;
88 typedef int (*ThreadSamplerInitFunc)(int);
89 typedef int32_t (*ThreadSamplerSampleFunc)();
90 typedef int (*ThreadSamplerCollectFunc)(char*, size_t, int);
91 typedef int (*ThreadSamplerDeinitFunc)();
92 
93 namespace {
ThreadInfo(char *buf __attribute__((unused)), size_t len __attribute__((unused)), void* ucontext __attribute__((unused)))94 void ThreadInfo(char *buf  __attribute__((unused)),
95                 size_t len  __attribute__((unused)),
96                 void* ucontext  __attribute__((unused)))
97 {
98     if (ucontext == nullptr) {
99         return;
100     }
101 
102     auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
103         WatchdogInner::GetInstance().currentScene_.size());
104     if (ret != 0) {
105         return;
106     }
107 }
108 
SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)109 void SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)
110 {
111     sigset_t set;
112     sigemptyset(&set);
113     pthread_sigmask(SIG_SETMASK, nullptr, &set);
114     if (isAddSignal) {
115         sigaddset(&set, signo);
116     } else {
117         sigdelset(&set, signo);
118     }
119     if (isBlock) {
120         pthread_sigmask(SIG_BLOCK, &set, nullptr);
121     } else {
122         pthread_sigmask(SIG_UNBLOCK, &set, nullptr);
123     }
124 }
125 }
126 
WatchdogInner()127 WatchdogInner::WatchdogInner()
128     : cntCallback_(0), timeCallback_(0), sampleTaskState_(0)
129 {
130     currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
131 }
132 
~WatchdogInner()133 WatchdogInner::~WatchdogInner()
134 {
135     Stop();
136 }
137 
IsInAppspwan()138 static bool IsInAppspwan()
139 {
140     if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
141         return true;
142     }
143 
144     if (getuid() == 0 && GetSelfProcName().find("nativespawn") != std::string::npos) {
145         return true;
146     }
147 
148     return false;
149 }
150 
SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)151 void WatchdogInner::SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)
152 {
153     bundleName_ = bundleName;
154     bundleVersion_ = bundleVersion;
155 }
156 
SetForeground(const bool& isForeground)157 void WatchdogInner::SetForeground(const bool& isForeground)
158 {
159     isForeground_ = isForeground;
160 }
161 
ReportMainThreadEvent()162 bool WatchdogInner::ReportMainThreadEvent()
163 {
164     std::string stack = "";
165     CollectStack(stack);
166 
167     std::string path = "";
168     std::string eventName = "MAIN_THREAD_JANK";
169     if (!buissnessThreadInfo_.empty()) {
170         eventName = "BUSSINESS_THREAD_JANK";
171     }
172     if (!WriteStackToFd(getprocpid(), path, stack, eventName)) {
173         XCOLLIE_LOGI("MainThread WriteStackToFd Failed");
174         return false;
175     }
176 #ifdef HISYSEVENT_ENABLE
177     int result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "MAIN_THREAD_JANK",
178         HiSysEvent::EventType::FAULT,
179         "BUNDLE_VERSION", bundleVersion_,
180         "BUNDLE_NAME", bundleName_,
181         "BEGIN_TIME", timeContent_.reportBegin / MILLISEC_TO_NANOSEC,
182         "END_TIME", timeContent_.reportEnd / MILLISEC_TO_NANOSEC,
183         "EXTERNAL_LOG", path,
184         "STACK", stack,
185         "JANK_LEVEL", 0,
186         "THREAD_NAME", GetSelfProcName(),
187         "FOREGROUND", isForeground_,
188         "LOG_TIME", GetTimeStamp() / MILLISEC_TO_NANOSEC);
189     XCOLLIE_LOGI("MainThread HiSysEventWrite result=%{public}d", result);
190     return result >= 0;
191 #else
192     XCOLLIE_LOGI("hisysevent not exists");
193 #endif
194 }
195 
CheckEventTimer(const int64_t& currentTime)196 bool WatchdogInner::CheckEventTimer(const int64_t& currentTime)
197 {
198     if (timeContent_.reportBegin == timeContent_.curBegin &&
199         timeContent_.reportEnd == timeContent_.curEnd) {
200         return false;
201     }
202     return (timeContent_.curEnd <= timeContent_.curBegin &&
203         (currentTime - timeContent_.curBegin >= DURATION_TIME * MILLISEC_TO_NANOSEC)) ||
204         (timeContent_.curEnd - timeContent_.curBegin > DURATION_TIME * MILLISEC_TO_NANOSEC);
205 }
206 
ThreadSampleTask(int (*threadSamplerInitFunc)(int), int32_t (*threadSamplerSampleFunc)())207 void WatchdogInner::ThreadSampleTask(int (*threadSamplerInitFunc)(int), int32_t (*threadSamplerSampleFunc)())
208 {
209     if (sampleTaskState_ == DumpStackState::DEFAULT) {
210         sampleTaskState_++;
211         int initThreadSamplerRet = threadSamplerInitFunc(COLLECT_STACK_COUNT);
212         if (initThreadSamplerRet != 0) {
213             isMainThreadProfileTaskEnabled_ = true;
214             XCOLLIE_LOGE("Thread sampler init failed. ret %{public}d\n", initThreadSamplerRet);
215             return;
216         }
217         XCOLLIE_LOGI("Thread sampler initialized. ret %{public}d\n", initThreadSamplerRet);
218         return;
219     }
220     int64_t currentTime = GetTimeStamp();
221     if (stackContent_.collectCount > DumpStackState::DEFAULT &&
222         stackContent_.collectCount < COLLECT_STACK_COUNT) {
223         threadSamplerSampleFunc();
224         stackContent_.collectCount++;
225     } else if (stackContent_.collectCount == COLLECT_STACK_COUNT) {
226         ReportMainThreadEvent();
227         isMainThreadProfileTaskEnabled_ = true;
228         return;
229     } else {
230         if (CheckEventTimer(currentTime)) {
231             threadSamplerSampleFunc();
232             stackContent_.collectCount++;
233         } else {
234             stackContent_.detectorCount++;
235         }
236     }
237     if (stackContent_.detectorCount == DETECT_STACK_COUNT) {
238         isMainThreadProfileTaskEnabled_ = true;
239     }
240 }
241 
StartProfileMainThread(int32_t interval)242 int32_t WatchdogInner::StartProfileMainThread(int32_t interval)
243 {
244     std::unique_lock<std::mutex> lock(lock_);
245 
246     uint64_t now = GetCurrentTickMillseconds();
247     if (now - watchdogStartTime_ < MAX_START_TIME) {
248         XCOLLIE_LOGI("application is in starting period.\n");
249         stackContent_.stackState = DumpStackState::DEFAULT;
250         return -1;
251     }
252 
253     funcHandler_ = dlopen(LIB_THREAD_SAMPLER_PATH, RTLD_LAZY);
254     if (funcHandler_ == nullptr) {
255         XCOLLIE_LOGE("dlopen failed, funcHandler is nullptr.\n");
256         return -1;
257     }
258 
259     auto threadSamplerInitFunc =
260         reinterpret_cast<ThreadSamplerInitFunc>(FunctionOpen(funcHandler_, "ThreadSamplerInit"));
261     auto threadSamplerSampleFunc =
262         reinterpret_cast<ThreadSamplerSampleFunc>(FunctionOpen(funcHandler_, "ThreadSamplerSample"));
263     if (threadSamplerInitFunc == nullptr || threadSamplerSampleFunc == nullptr) {
264         dlclose(funcHandler_);
265         funcHandler_ = nullptr;
266         return -1;
267     }
268 
269     sampleTaskState_ = 0;
270     stackContent_.detectorCount = 0;
271     stackContent_.collectCount = 0;
272     auto sampleTask = [this, threadSamplerInitFunc, threadSamplerSampleFunc]() {
273         ThreadSampleTask(threadSamplerInitFunc, threadSamplerSampleFunc);
274     };
275 
276     WatchdogTask task("ThreadSampler", sampleTask, 0, interval, true);
277     InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
278     return 0;
279 }
280 
CollectStack(std::string& stack)281 bool WatchdogInner::CollectStack(std::string& stack)
282 {
283     if (funcHandler_ == nullptr) {
284         XCOLLIE_LOGE("open library failed.");
285         return false;
286     }
287 
288     auto threadSamplerCollectFunc =
289         reinterpret_cast<ThreadSamplerCollectFunc>(FunctionOpen(funcHandler_, "ThreadSamplerCollect"));
290     if (threadSamplerCollectFunc == nullptr) {
291         dlclose(funcHandler_);
292         funcHandler_ = nullptr;
293         return false;
294     }
295     int treeFormat = 1;
296     char* stk = new char[STACK_LENGTH];
297     int collectRet = threadSamplerCollectFunc(stk, STACK_LENGTH, treeFormat);
298     stack = stk;
299     delete[] stk;
300     return collectRet == 0;
301 }
302 
Deinit()303 bool WatchdogInner::Deinit()
304 {
305     if (funcHandler_ == nullptr) {
306         XCOLLIE_LOGE("open library failed.");
307         return false;
308     }
309 
310     auto threadSamplerDeinitFunc =
311         reinterpret_cast<ThreadSamplerDeinitFunc>(FunctionOpen(funcHandler_, "ThreadSamplerDeinit"));
312     if (threadSamplerDeinitFunc == nullptr) {
313         dlclose(funcHandler_);
314         funcHandler_ = nullptr;
315         return false;
316     }
317     int ret = threadSamplerDeinitFunc();
318     return ret == 0;
319 }
320 
ChangeState(int& state, int targetState)321 void WatchdogInner::ChangeState(int& state, int targetState)
322 {
323     timeContent_.reportBegin = timeContent_.curBegin;
324     timeContent_.reportEnd = timeContent_.curEnd;
325     state = targetState;
326 }
327 
DayChecker(int& state, TimePoint currenTime, TimePoint lastEndTime, int64_t checkTimer)328 void WatchdogInner::DayChecker(int& state, TimePoint currenTime, TimePoint lastEndTime,
329     int64_t checkTimer)
330 {
331     auto diff = currenTime - lastEndTime;
332     int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>
333         (diff).count();
334     if (intervalTime >= checkTimer) {
335         XCOLLIE_LOGD("MainThread StartProfileMainThread Over checkTimer: "
336             "%{public}" PRId64 " ms", checkTimer);
337         state = DumpStackState::DEFAULT;
338     }
339 }
340 
StartTraceProfile(int32_t interval)341 void WatchdogInner::StartTraceProfile(int32_t interval)
342 {
343     if (traceCollector_ == nullptr) {
344         XCOLLIE_LOGI("MainThread TraceCollector Failed.");
345         return;
346     }
347     traceContent_.dumpCount = 0;
348     traceContent_.traceCount = 0;
349     auto traceTask = [this]() {
350         traceContent_.traceCount++;
351         int64_t currentTime = GetTimeStamp();
352         if (CheckEventTimer(currentTime)) {
353             traceContent_.dumpCount++;
354         }
355         if (traceContent_.traceCount >= COLLECT_TRACE_MAX) {
356             if (traceContent_.dumpCount >= COLLECT_TRACE_MIN) {
357                 CreateWatchdogDir();
358                 appCaller_.actionId = UCollectClient::ACTION_ID_DUMP_TRACE;
359                 appCaller_.isBusinessJank = !buissnessThreadInfo_.empty();
360                 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
361                 XCOLLIE_LOGI("MainThread TraceCollector Dump result: %{public}d", result.retCode);
362             }
363             isMainThreadTraceEnabled_ = true;
364         }
365     };
366     WatchdogTask task("TraceCollector", traceTask, 0, interval, true);
367     std::unique_lock<std::mutex> lock(lock_);
368     InsertWatchdogTaskLocked("TraceCollector", std::move(task));
369 }
370 
CollectTrace()371 void WatchdogInner::CollectTrace()
372 {
373     traceCollector_ = UCollectClient::TraceCollector::Create();
374     int32_t pid = getprocpid();
375     int32_t uid = static_cast<int64_t>(getuid());
376     appCaller_.actionId = UCollectClient::ACTION_ID_START_TRACE;
377     appCaller_.bundleName = bundleName_;
378     appCaller_.bundleVersion = bundleVersion_;
379     appCaller_.uid = uid;
380     appCaller_.pid = pid;
381     appCaller_.threadName = GetSelfProcName();
382     appCaller_.foreground = isForeground_;
383     appCaller_.happenTime = GetTimeStamp() / MILLISEC_TO_NANOSEC;
384     appCaller_.beginTime = timeContent_.reportBegin / MILLISEC_TO_NANOSEC;
385     appCaller_.endTime = timeContent_.reportEnd / MILLISEC_TO_NANOSEC;
386     auto result = traceCollector_->CaptureDurationTrace(appCaller_);
387     XCOLLIE_LOGI("MainThread TraceCollector Start result: %{public}d", result.retCode);
388     if (result.retCode != 0) {
389         return;
390     }
391     StartTraceProfile(DURATION_TIME);
392 }
393 
DistributeStart(const std::string& name)394 static TimePoint DistributeStart(const std::string& name)
395 {
396     WatchdogInner::GetInstance().timeContent_.curBegin = GetTimeStamp();
397     return std::chrono::steady_clock::now();
398 }
399 
DistributeEnd(const std::string& name, const TimePoint& startTime)400 static void DistributeEnd(const std::string& name, const TimePoint& startTime)
401 {
402     TimePoint endTime = std::chrono::steady_clock::now();
403     auto duration = endTime - startTime;
404     int64_t durationTime = std::chrono::duration_cast<std::chrono::milliseconds>
405         (duration).count();
406     if (duration > std::chrono::milliseconds(DISTRIBUTE_TIME)) {
407         XCOLLIE_LOGI("BlockMonitor event name: %{public}s, Duration Time: %{public}" PRId64 " ms",
408             name.c_str(), durationTime);
409     }
410 #ifdef HICOLLIE_JANK_ENABLE
411     WatchdogInner::GetInstance().timeContent_.curEnd = GetTimeStamp();
412     if (WatchdogInner::GetInstance().stackContent_.stackState == DumpStackState::COMPLETE) {
413         int64_t checkTimer = ONE_DAY_LIMIT;
414         if (IsDeveloperOpen() || (GetProcessNameFromProcCmdline(getpid()) == KEY_SCB_STATE)) {
415             checkTimer = ONE_HOUR_LIMIT;
416         }
417         WatchdogInner::GetInstance().DayChecker(WatchdogInner::GetInstance().stackContent_.stackState,
418             endTime, WatchdogInner::GetInstance().lastStackTime_, checkTimer);
419     }
420     if (WatchdogInner::GetInstance().traceContent_.traceState == DumpStackState::COMPLETE) {
421         WatchdogInner::GetInstance().DayChecker(WatchdogInner::GetInstance().traceContent_.traceState,
422             endTime, WatchdogInner::GetInstance().lastTraceTime_, ONE_DAY_LIMIT);
423     }
424     if (duration > std::chrono::milliseconds(DURATION_TIME) && duration < std::chrono::milliseconds(DUMPTRACE_TIME) &&
425         WatchdogInner::GetInstance().stackContent_.stackState == DumpStackState::DEFAULT) {
426         if (IsEnableVersion()) {
427             return;
428         }
429         WatchdogInner::GetInstance().ChangeState(WatchdogInner::GetInstance().stackContent_.stackState,
430             DumpStackState::COMPLETE);
431         WatchdogInner::GetInstance().lastStackTime_ = endTime;
432 
433         int32_t ret = WatchdogInner::GetInstance().StartProfileMainThread(TASK_INTERVAL);
434         XCOLLIE_LOGI("MainThread StartProfileMainThread ret: %{public}d  "
435             "Duration Time: %{public}" PRId64 " ms", ret, durationTime);
436     }
437     if (duration > std::chrono::milliseconds(DUMPTRACE_TIME) &&
438         WatchdogInner::GetInstance().traceContent_.traceState == DumpStackState::DEFAULT) {
439         if (IsBetaVersion() || IsEnableVersion()) {
440             return;
441         }
442         XCOLLIE_LOGI("MainThread TraceCollector Duration Time: %{public}" PRId64 " ms", durationTime);
443         WatchdogInner::GetInstance().ChangeState(WatchdogInner::GetInstance().traceContent_.traceState,
444             DumpStackState::COMPLETE);
445         WatchdogInner::GetInstance().lastTraceTime_ = endTime;
446         WatchdogInner::GetInstance().CollectTrace();
447     }
448 #endif // HICOLLIE_JANK_ENABLE
449 }
450 
AddThread(const std::string &name, std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)451 int WatchdogInner::AddThread(const std::string &name,
452     std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
453 {
454     if (name.empty() || handler == nullptr) {
455         XCOLLIE_LOGE("Add thread fail, invalid args!");
456         return -1;
457     }
458 
459     if (IsInAppspwan()) {
460         return -1;
461     }
462 
463     std::string limitedName = GetLimitedSizeName(name);
464     XCOLLIE_LOGI("Add thread %{public}s to watchdog.", limitedName.c_str());
465     std::unique_lock<std::mutex> lock(lock_);
466 
467     IpcCheck();
468 
469     if (!InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, handler, timeOutCallback, interval))) {
470         return -1;
471     }
472     return 0;
473 }
474 
RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)475 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
476 {
477     if (name.empty() || task == nullptr) {
478         XCOLLIE_LOGE("Add task fail, invalid args!");
479         return;
480     }
481 
482     if (IsInAppspwan()) {
483         return;
484     }
485 
486     std::unique_lock<std::mutex> lock(lock_);
487     std::string limitedName = GetLimitedSizeName(name);
488     InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, 0, true));
489 }
490 
RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func, void *arg, unsigned int flag)491 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
492     void *arg, unsigned int flag)
493 {
494     if (name.empty() || timeout == 0) {
495         XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
496         return INVALID_ID;
497     }
498 
499     if (IsInAppspwan()) {
500         return INVALID_ID;
501     }
502 
503     std::unique_lock<std::mutex> lock(lock_);
504     std::string limitedName = GetLimitedSizeName(name);
505     return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeout, func, arg, flag));
506 }
507 
RemoveXCollieTask(int64_t id)508 void WatchdogInner::RemoveXCollieTask(int64_t id)
509 {
510     std::priority_queue<WatchdogTask> tmpQueue;
511     std::unique_lock<std::mutex> lock(lock_);
512     size_t size = checkerQueue_.size();
513     if (size == 0) {
514         XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
515         return;
516     }
517     while (!checkerQueue_.empty()) {
518         const WatchdogTask& task = checkerQueue_.top();
519         if (task.id != id || task.timeout == 0) {
520             tmpQueue.push(task);
521         }
522         checkerQueue_.pop();
523     }
524     if (tmpQueue.size() == size) {
525         XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
526             static_cast<long long>(id), size);
527     }
528     tmpQueue.swap(checkerQueue_);
529 }
530 
RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)531 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
532 {
533     if (name.empty() || task == nullptr) {
534         XCOLLIE_LOGE("Add task fail, invalid args!");
535         return;
536     }
537 
538     if (IsInAppspwan()) {
539         return;
540     }
541 
542     std::string limitedName = GetLimitedSizeName(name);
543     XCOLLIE_LOGD("Add periodical task %{public}s to watchdog.", name.c_str());
544     std::unique_lock<std::mutex> lock(lock_);
545     InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, interval, false));
546 }
547 
SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)548 int64_t WatchdogInner::SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)
549 {
550     if (name.empty() || timeLimit == 0 || countLimit <= 0) {
551         XCOLLIE_LOGE("SetTimerCountTask fail, invalid args!");
552         return INVALID_ID;
553     }
554 
555     if (IsInAppspwan()) {
556         return INVALID_ID;
557     }
558     std::string limitedName = GetLimitedSizeName(name);
559     XCOLLIE_LOGD("SetTimerCountTask name : %{public}s", name.c_str());
560     std::unique_lock<std::mutex> lock(lock_);
561     return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeLimit, countLimit));
562 }
563 
TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)564 void WatchdogInner::TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)
565 {
566     std::unique_lock<std::mutex> lock(lock_);
567 
568     if (checkerQueue_.empty()) {
569         XCOLLIE_LOGE("TriggerTimerCountTask name : %{public}s fail, empty queue!", name.c_str());
570         return;
571     }
572 
573     bool isTaskExist = false;
574     uint64_t now = GetCurrentTickMillseconds();
575     std::priority_queue<WatchdogTask> tmpQueue;
576     while (!checkerQueue_.empty()) {
577         WatchdogTask task = checkerQueue_.top();
578         if (task.name == name) {
579             isTaskExist = true;
580             if (bTrigger) {
581                 task.triggerTimes.push_back(now);
582                 task.message = message;
583             } else {
584                 task.triggerTimes.clear();
585             }
586         }
587         tmpQueue.push(task);
588         checkerQueue_.pop();
589     }
590     tmpQueue.swap(checkerQueue_);
591 
592     if (!isTaskExist) {
593         XCOLLIE_LOGE("TriggerTimerCount name : %{public}s does not exist!", name.c_str());
594     }
595 }
596 
IsTaskExistLocked(const std::string& name)597 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
598 {
599     return (taskNameSet_.find(name) != taskNameSet_.end());
600 }
601 
IsExceedMaxTaskLocked()602 bool WatchdogInner::IsExceedMaxTaskLocked()
603 {
604     if (checkerQueue_.size() >= MAX_WATCH_NUM) {
605         XCOLLIE_LOGE("Exceed max watchdog task!");
606         return true;
607     }
608 
609     return false;
610 }
611 
InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)612 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
613 {
614     if (!task.isOneshotTask && IsTaskExistLocked(name)) {
615         XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
616         return 0;
617     }
618 
619     if (IsExceedMaxTaskLocked()) {
620         XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
621         return 0;
622     }
623     int64_t id = task.id;
624     checkerQueue_.push(std::move(task));
625     if (!task.isOneshotTask) {
626         taskNameSet_.insert(name);
627     }
628     CreateWatchdogThreadIfNeed();
629     condition_.notify_all();
630 
631     return id;
632 }
633 
StopWatchdog()634 void WatchdogInner::StopWatchdog()
635 {
636     Stop();
637 }
638 
IsCallbackLimit(unsigned int flag)639 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
640 {
641     bool ret = false;
642     time_t startTime = time(nullptr);
643     if (!(flag & XCOLLIE_FLAG_LOG)) {
644         return ret;
645     }
646     if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
647         timeCallback_ = startTime;
648     } else {
649         if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
650             ret = true;
651         }
652     }
653     return ret;
654 }
655 
IPCProxyLimitCallback(uint64_t num)656 void IPCProxyLimitCallback(uint64_t num)
657 {
658     XCOLLIE_LOGE("ipc proxy num %{public}" PRIu64 " exceed limit", num);
659     if (getuid() >= MIN_APP_UID && IsBetaVersion()) {
660         XCOLLIE_LOGI("Process is going to exit, reason: ipc proxy num exceed limit");
661         _exit(0);
662     }
663 }
664 
CreateWatchdogThreadIfNeed()665 void WatchdogInner::CreateWatchdogThreadIfNeed()
666 {
667     std::call_once(flag_, [this] {
668         if (threadLoop_ == nullptr) {
669             if (mainRunner_ == nullptr) {
670                 mainRunner_ = AppExecFwk::EventRunner::GetMainEventRunner();
671             }
672             mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
673             const uint64_t limitNum = 20000;
674             IPCDfx::SetIPCProxyLimit(limitNum, IPCProxyLimitCallback);
675             threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
676             if (getpid() == gettid()) {
677                 SetThreadSignalMask(SIGDUMP, true, true);
678             }
679             XCOLLIE_LOGD("Watchdog is running!");
680         }
681     });
682 }
683 
FetchNextTask(uint64_t now, WatchdogTask& task)684 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
685 {
686     std::unique_lock<std::mutex> lock(lock_);
687     if (isNeedStop_) {
688         while (!checkerQueue_.empty()) {
689             checkerQueue_.pop();
690         }
691         return DEFAULT_TIMEOUT;
692     }
693 
694     if (checkerQueue_.empty()) {
695         return DEFAULT_TIMEOUT;
696     }
697 
698     const WatchdogTask& queuedTaskCheck = checkerQueue_.top();
699     bool popCheck = true;
700     if (queuedTaskCheck.name.empty()) {
701         checkerQueue_.pop();
702         XCOLLIE_LOGW("queuedTask name is empty.");
703     } else if (queuedTaskCheck.name == STACK_CHECKER && isMainThreadProfileTaskEnabled_) {
704         checkerQueue_.pop();
705         taskNameSet_.erase("ThreadSampler");
706         isMainThreadProfileTaskEnabled_ = false;
707         if (Deinit()) {
708             dlclose(funcHandler_);
709             funcHandler_ = nullptr;
710         }
711         XCOLLIE_LOGI("STACK_CHECKER Task pop");
712     } else if (queuedTaskCheck.name == TRACE_CHECKER && isMainThreadTraceEnabled_) {
713         checkerQueue_.pop();
714         taskNameSet_.erase("TraceCollector");
715         isMainThreadTraceEnabled_ = false;
716         XCOLLIE_LOGI("TRACE_CHECKER Task pop");
717     } else {
718         popCheck = false;
719     }
720     if (popCheck && checkerQueue_.empty()) {
721         return DEFAULT_TIMEOUT;
722     }
723 
724     const WatchdogTask& queuedTask = checkerQueue_.top();
725     if (g_existFile && queuedTask.name == IPC_FULL && now - g_nextKickTime > INTERVAL_KICK_TIME) {
726         if (KickWatchdog()) {
727             g_nextKickTime = now;
728         }
729     }
730     if (queuedTask.nextTickTime > now) {
731         return queuedTask.nextTickTime - now;
732     }
733 
734     currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
735     task = queuedTask;
736     checkerQueue_.pop();
737     return 0;
738 }
739 
ReInsertTaskIfNeed(WatchdogTask& task)740 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
741 {
742     if (task.checkInterval == 0) {
743         return;
744     }
745 
746     std::unique_lock<std::mutex> lock(lock_);
747     task.nextTickTime = task.nextTickTime + task.checkInterval;
748     checkerQueue_.push(task);
749 }
750 
Start()751 bool WatchdogInner::Start()
752 {
753     if (pthread_setname_np(pthread_self(), "OS_DfxWatchdog") != 0) {
754         XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
755     }
756     SetThreadSignalMask(SIGDUMP, false, false);
757     watchdogStartTime_ = GetCurrentTickMillseconds();
758     XCOLLIE_LOGD("Watchdog is running in thread(%{public}d)!", getproctid());
759     if (SetThreadInfoCallback != nullptr) {
760         SetThreadInfoCallback(ThreadInfo);
761         XCOLLIE_LOGD("Watchdog Set Thread Info Callback");
762     }
763     while (!isNeedStop_) {
764         uint64_t now = GetCurrentTickMillseconds();
765         WatchdogTask task;
766         uint64_t leftTimeMill = FetchNextTask(now, task);
767         if (leftTimeMill == 0) {
768             task.Run(now);
769             ReInsertTaskIfNeed(task);
770             currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
771             continue;
772         } else if (isNeedStop_) {
773             break;
774         } else {
775             std::unique_lock<std::mutex> lock(lock_);
776             condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
777         }
778     }
779     if (SetThreadInfoCallback != nullptr) {
780         SetThreadInfoCallback(nullptr);
781     }
782     return true;
783 }
784 
SendMsgToHungtask(const std::string& msg)785 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
786 {
787     if (g_fd == NOT_OPEN) {
788         g_fd = open(SYS_KERNEL_HUNGTASK_USERLIST, O_WRONLY);
789         if (g_fd < 0) {
790             g_fd = open(HMOS_HUNGTASK_USERLIST, O_WRONLY);
791             if (g_fd < 0) {
792                 XCOLLIE_LOGE("can't open hungtask file");
793                 g_existFile = false;
794                 return false;
795             }
796             XCOLLIE_LOGE("change to hmos kernel");
797             isHmos = true;
798         } else {
799             XCOLLIE_LOGE("change to linux kernel");
800         }
801     }
802 
803     ssize_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
804     if (watchdogWrite < 0 || watchdogWrite != static_cast<ssize_t>(msg.size())) {
805         XCOLLIE_LOGE("watchdogWrite msg failed");
806         close(g_fd);
807         g_fd = NOT_OPEN;
808         return false;
809     }
810     XCOLLIE_LOGE("Send %{public}s to hungtask Successful\n", msg.c_str());
811     return true;
812 }
813 
KickWatchdog()814 bool WatchdogInner::KickWatchdog()
815 {
816     return true;
817 }
818 
IpcCheck()819 void WatchdogInner::IpcCheck()
820 {
821     uint32_t uid = getuid();
822     if (uid == FOUNDATION_UID || uid == RENDER_SERVICE_UID || GetSelfProcName() == "ohos.sceneboard") {
823         if (binderCheckHander_ == nullptr) {
824             auto runner = AppExecFwk::EventRunner::Create(IPC_CHECKER);
825             binderCheckHander_ = std::make_shared<AppExecFwk::EventHandler>(runner);
826             if (!InsertWatchdogTaskLocked(IPC_CHECKER, WatchdogTask(IPC_FULL, binderCheckHander_,
827                 nullptr, IPC_CHECKER_TIME))) {
828                 XCOLLIE_LOGE("Add %{public}s thread fail", IPC_CHECKER);
829             }
830         }
831     }
832 }
833 
WriteStringToFile(uint32_t pid, const char *str)834 void WatchdogInner::WriteStringToFile(uint32_t pid, const char *str)
835 {
836     char file[PATH_LEN] = {0};
837     int32_t newPid = static_cast<int32_t>(pid);
838     if (snprintf_s(file, PATH_LEN, PATH_LEN - 1, "/proc/%d/unexpected_die_catch", newPid) == -1) {
839         XCOLLIE_LOGI("failed to build path for %{public}d.", newPid);
840     }
841     int fd = open(file, O_RDWR);
842     if (fd == -1) {
843         return;
844     }
845     if (write(fd, str, strlen(str)) < 0) {
846         XCOLLIE_LOGI("failed to write 0 for %{public}s", file);
847     }
848     close(fd);
849     return;
850 }
851 
FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)852 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
853 {
854     std::string description = "FfrtCallback: task(";
855     description += taskInfo;
856     description += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
857     bool isExist = false;
858     {
859         std::unique_lock<std::mutex> lock(lockFfrt_);
860         auto &map = WatchdogInner::GetInstance().taskIdCnt;
861         auto search = map.find(taskId);
862         if (search != map.end()) {
863             isExist = true;
864         } else {
865             map[taskId] = SERVICE_WARNING;
866         }
867     }
868 
869     if (isExist) {
870         description += ", report twice instead of exiting process."; // 1s = 1000ms
871         WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
872         WatchdogInner::GetInstance().taskIdCnt.erase(taskId);
873         WatchdogInner::KillPeerBinderProcess(description);
874     } else {
875         WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo);
876     }
877 }
878 
InitFfrtWatchdog()879 void WatchdogInner::InitFfrtWatchdog()
880 {
881     CreateWatchdogThreadIfNeed();
882     ffrt_task_timeout_set_cb(FfrtCallback);
883     ffrt_task_timeout_set_threshold(FFRT_CALLBACK_TIME);
884     std::unique_lock<std::mutex> lock(lock_);
885     IpcCheck();
886 }
887 
SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo)888 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo)
889 {
890     int32_t pid = getprocpid();
891     if (IsProcessDebug(pid)) {
892         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
893         return;
894     }
895     uint32_t gid = getgid();
896     uint32_t uid = getuid();
897     time_t curTime = time(nullptr);
898     std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
899         "\n" + msg + "\n";
900     char* buffer = new char[FFRT_BUFFER_SIZE + 1]();
901     buffer[FFRT_BUFFER_SIZE] = 0;
902     ffrt_dump(DUMP_INFO_ALL, buffer, FFRT_BUFFER_SIZE);
903     sendMsg += buffer;
904     delete[] buffer;
905     int32_t tid = pid;
906     GetFfrtTaskTid(tid, sendMsg);
907 #ifdef HISYSEVENT_ENABLE
908     int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
909         "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo, "PROCESS_NAME", GetSelfProcName(),
910         "MSG", sendMsg, "STACK", GetProcessStacktrace());
911     if (ret == ERR_OVER_SIZE) {
912         std::string stack = "";
913         GetBacktraceStringByTid(stack, tid, 0, true);
914         ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
915             "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo,
916             "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg, "STACK", stack);
917     }
918 
919     XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
920         "msg=%{public}s", ret, eventName.c_str(), msg.c_str());
921 #else
922     XCOLLIE_LOGI("hisysevent not exists");
923 #endif
924 }
925 
GetFfrtTaskTid(int32_t& tid, const std::string& msg)926 void WatchdogInner::GetFfrtTaskTid(int32_t& tid, const std::string& msg)
927 {
928     std::string queueNameFrontStr = "us. queue name [";
929     size_t queueNameFrontPos = msg.find(queueNameFrontStr);
930     if (queueNameFrontPos == std::string::npos) {
931         return;
932     }
933     size_t queueNameRearPos = msg.find("], remaining tasks count=");
934     size_t queueStartPos = queueNameFrontPos + queueNameFrontStr.length();
935     if (queueNameRearPos == std::string::npos || queueNameRearPos <= queueStartPos) {
936         return;
937     }
938     size_t queueNameLength = queueNameRearPos - queueStartPos;
939     std::string workerTidFrontStr = " worker tid ";
940     std::string taskIdFrontStr = " is running, task id ";
941     std::string queueNameStr = " name " + msg.substr(queueStartPos, queueNameLength);
942     std::istringstream issMsg(msg);
943     std::string line;
944     while (std::getline(issMsg, line, '\n')) {
945         size_t workerTidFrontPos = line.find(workerTidFrontStr);
946         size_t taskIdFrontPos = line.find(taskIdFrontStr);
947         size_t queueNamePos = line.find(queueNameStr);
948         size_t workerStartPos = workerTidFrontPos + workerTidFrontStr.length();
949         if (workerTidFrontPos == std::string::npos || taskIdFrontPos == std::string::npos ||
950             queueNamePos == std::string::npos || taskIdFrontPos <= workerStartPos) {
951             continue;
952         }
953         size_t tidLength = taskIdFrontPos - workerStartPos;
954         if (tidLength < std::to_string(INT32_MAX).length()) {
955             std::string tidStr = line.substr(workerStartPos, tidLength);
956             if (std::all_of(std::begin(tidStr), std::end(tidStr), [] (const char& c) {
957                 return isdigit(c);
958             })) {
959                 tid = std::stoi(tidStr);
960                 return;
961             }
962         }
963     }
964 }
965 
LeftTimeExitProcess(const std::string &description)966 void WatchdogInner::LeftTimeExitProcess(const std::string &description)
967 {
968     int32_t pid = getprocpid();
969     if (IsProcessDebug(pid)) {
970         XCOLLIE_LOGI("heap dump or debug for %{public}d, don't exit.", pid);
971         return;
972     }
973     DelayBeforeExit(10); // sleep 10s for hiview dump
974     XCOLLIE_LOGI("Process is going to exit, reason:%{public}s.", description.c_str());
975     WatchdogInner::WriteStringToFile(pid, "0");
976 
977     _exit(0);
978 }
979 
Stop()980 bool WatchdogInner::Stop()
981 {
982     IPCDfx::SetIPCProxyLimit(0, nullptr);
983     if (mainRunner_ != nullptr) {
984         mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
985     }
986     isNeedStop_.store(true);
987     condition_.notify_all();
988     if (threadLoop_ != nullptr && threadLoop_->joinable()) {
989         threadLoop_->join();
990         threadLoop_ = nullptr;
991     }
992     if (g_fd != NOT_OPEN) {
993         close(g_fd);
994         g_fd = NOT_OPEN;
995     }
996     return true;
997 }
998 
KillPeerBinderProcess(const std::string &description)999 void WatchdogInner::KillPeerBinderProcess(const std::string &description)
1000 {
1001     bool result = false;
1002     if (getuid() == FOUNDATION_UID) {
1003         result = KillProcessByPid(getprocpid());
1004     }
1005     if (!result) {
1006         WatchdogInner::LeftTimeExitProcess(description);
1007     }
1008 }
1009 
RemoveInnerTask(const std::string& name)1010 void WatchdogInner::RemoveInnerTask(const std::string& name)
1011 {
1012     if (name.empty()) {
1013         XCOLLIE_LOGI("RemoveInnerTask fail, cname is null");
1014         return;
1015     }
1016     std::priority_queue<WatchdogTask> tmpQueue;
1017     std::unique_lock<std::mutex> lock(lock_);
1018     size_t size = checkerQueue_.size();
1019     if (size == 0) {
1020         XCOLLIE_LOGE("RemoveInnerTask %{public}s fail, empty queue!", name.c_str());
1021         return;
1022     }
1023     while (!checkerQueue_.empty()) {
1024         const WatchdogTask& task = checkerQueue_.top();
1025         if (task.name != name) {
1026             tmpQueue.push(task);
1027         } else {
1028             size_t nameSize = taskNameSet_.size();
1029             if (nameSize != 0 && !task.isOneshotTask) {
1030                 taskNameSet_.erase(name);
1031                 XCOLLIE_LOGD("RemoveInnerTask name %{public}s, remove result=%{public}d",
1032                     name.c_str(), nameSize > taskNameSet_.size());
1033             }
1034         }
1035         checkerQueue_.pop();
1036     }
1037     if (tmpQueue.size() == size) {
1038         XCOLLIE_LOGE("RemoveInnerTask fail, can not find name %{public}s, size=%{public}zu!",
1039             name.c_str(), size);
1040     }
1041     tmpQueue.swap(checkerQueue_);
1042 }
1043 
InitBeginFunc(const char* name)1044 void InitBeginFunc(const char* name)
1045 {
1046     std::string nameStr(name);
1047     WatchdogInner::GetInstance().bussinessBeginTime_ = DistributeStart(nameStr);
1048 }
1049 
InitEndFunc(const char* name)1050 void InitEndFunc(const char* name)
1051 {
1052     std::string nameStr(name);
1053     DistributeEnd(nameStr, WatchdogInner::GetInstance().bussinessBeginTime_);
1054 }
1055 
InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc, WatchdogInnerEndFunc* endFunc)1056 void WatchdogInner::InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc,
1057     WatchdogInnerEndFunc* endFunc)
1058 {
1059     int64_t tid = getproctid();
1060     if (beginFunc && endFunc) {
1061         if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1062             XCOLLIE_LOGI("Tid =%{public}" PRId64 "already exits, "
1063                 "no repeated initialization.", tid);
1064             return;
1065         }
1066         if (mainRunner_ != nullptr) {
1067             mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1068         }
1069         *beginFunc = InitBeginFunc;
1070         *endFunc = InitEndFunc;
1071         buissnessThreadInfo_.insert(tid);
1072     } else {
1073         if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1074             XCOLLIE_LOGI("Remove already init tid=%{public}." PRId64, tid);
1075             mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
1076             buissnessThreadInfo_.erase(tid);
1077         }
1078     }
1079 }
1080 
SetAppDebug(bool isAppDebug)1081 void WatchdogInner::SetAppDebug(bool isAppDebug)
1082 {
1083     isAppDebug_ = isAppDebug;
1084 }
1085 
GetAppDebug()1086 bool WatchdogInner::GetAppDebug()
1087 {
1088     return isAppDebug_;
1089 }
1090 } // end of namespace HiviewDFX
1091 } // end of namespace OHOS
1092