1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "watchdog_inner.h"
17
18 #include <cerrno>
19 #include <climits>
20 #include <mutex>
21
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <fcntl.h>
25 #include <pthread.h>
26 #include <unistd.h>
27 #include <csignal>
28 #include <string>
29
30 #include <securec.h>
31 #include <dlfcn.h>
32
33 #include "backtrace_local.h"
34 #ifdef HISYSEVENT_ENABLE
35 #include "hisysevent.h"
36 #endif
37 #include "ipc_skeleton.h"
38 #include "xcollie_utils.h"
39 #include "xcollie_define.h"
40 #include "dfx_define.h"
41 #include "parameter.h"
42
43 typedef void(*ThreadInfoCallBack)(char* buf, size_t len, void* ucontext);
44 extern "C" void SetThreadInfoCallback(ThreadInfoCallBack func) __attribute__((weak));
45 namespace OHOS {
46 namespace HiviewDFX {
47 namespace {
48 enum DumpStackState {
49 DEFAULT = 0,
50 COMPLETE = 1,
51 SAMPLE_COMPLETE = 2
52 };
53 constexpr char IPC_CHECKER[] = "IpcChecker";
54 constexpr char STACK_CHECKER[] = "ThreadSampler";
55 constexpr char TRACE_CHECKER[] = "TraceCollector";
56 constexpr int64_t ONE_DAY_LIMIT = 86400000;
57 constexpr int64_t ONE_HOUR_LIMIT = 3600000;
58 constexpr int MILLISEC_TO_NANOSEC = 1000000;
59 const int FFRT_BUFFER_SIZE = 512 * 1024;
60 const int DETECT_STACK_COUNT = 2;
61 const int COLLECT_STACK_COUNT = 10;
62 const int COLLECT_TRACE_MIN = 1;
63 const int COLLECT_TRACE_MAX = 20;
64 const int TASK_INTERVAL = 155;
65 const int DURATION_TIME = 150;
66 const int DISTRIBUTE_TIME = 2000;
67 const int DUMPTRACE_TIME = 450;
68 constexpr const char* const KEY_SCB_STATE = "com.ohos.sceneboard";
69 constexpr uint64_t DEFAULT_TIMEOUT = 60 * 1000;
70 constexpr uint32_t FFRT_CALLBACK_TIME = 30 * 1000;
71 constexpr uint32_t IPC_CHECKER_TIME = 30 * 1000;
72 constexpr uint32_t TIME_MS_TO_S = 1000;
73 constexpr int INTERVAL_KICK_TIME = 6 * 1000;
74 constexpr int32_t FOUNDATION_UID = 5523;
75 constexpr int32_t RENDER_SERVICE_UID = 1003;
76 constexpr int SERVICE_WARNING = 1;
77 const char* SYS_KERNEL_HUNGTASK_USERLIST = "/sys/kernel/hungtask/userlist";
78 const char* HMOS_HUNGTASK_USERLIST = "/proc/sys/hguard/user_list";
79 const int32_t NOT_OPEN = -1;
80 constexpr uint64_t MAX_START_TIME = 10 * 1000;
81 const char* LIB_THREAD_SAMPLER_PATH = "libthread_sampler.z.so";
82 constexpr size_t STACK_LENGTH = 32 * 1024;
83 }
84 std::mutex WatchdogInner::lockFfrt_;
85 static uint64_t g_nextKickTime = GetCurrentTickMillseconds();
86 static int32_t g_fd = NOT_OPEN;
87 static bool g_existFile = true;
88 typedef int (*ThreadSamplerInitFunc)(int);
89 typedef int32_t (*ThreadSamplerSampleFunc)();
90 typedef int (*ThreadSamplerCollectFunc)(char*, size_t, int);
91 typedef int (*ThreadSamplerDeinitFunc)();
92
93 namespace {
ThreadInfo(char *buf __attribute__((unused)), size_t len __attribute__((unused)), void* ucontext __attribute__((unused)))94 void ThreadInfo(char *buf __attribute__((unused)),
95 size_t len __attribute__((unused)),
96 void* ucontext __attribute__((unused)))
97 {
98 if (ucontext == nullptr) {
99 return;
100 }
101
102 auto ret = memcpy_s(buf, len, WatchdogInner::GetInstance().currentScene_.c_str(),
103 WatchdogInner::GetInstance().currentScene_.size());
104 if (ret != 0) {
105 return;
106 }
107 }
108
SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)109 void SetThreadSignalMask(int signo, bool isAddSignal, bool isBlock)
110 {
111 sigset_t set;
112 sigemptyset(&set);
113 pthread_sigmask(SIG_SETMASK, nullptr, &set);
114 if (isAddSignal) {
115 sigaddset(&set, signo);
116 } else {
117 sigdelset(&set, signo);
118 }
119 if (isBlock) {
120 pthread_sigmask(SIG_BLOCK, &set, nullptr);
121 } else {
122 pthread_sigmask(SIG_UNBLOCK, &set, nullptr);
123 }
124 }
125 }
126
WatchdogInner()127 WatchdogInner::WatchdogInner()
128 : cntCallback_(0), timeCallback_(0), sampleTaskState_(0)
129 {
130 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
131 }
132
~WatchdogInner()133 WatchdogInner::~WatchdogInner()
134 {
135 Stop();
136 }
137
IsInAppspwan()138 static bool IsInAppspwan()
139 {
140 if (getuid() == 0 && GetSelfProcName().find("appspawn") != std::string::npos) {
141 return true;
142 }
143
144 if (getuid() == 0 && GetSelfProcName().find("nativespawn") != std::string::npos) {
145 return true;
146 }
147
148 return false;
149 }
150
SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)151 void WatchdogInner::SetBundleInfo(const std::string& bundleName, const std::string& bundleVersion)
152 {
153 bundleName_ = bundleName;
154 bundleVersion_ = bundleVersion;
155 }
156
SetForeground(const bool& isForeground)157 void WatchdogInner::SetForeground(const bool& isForeground)
158 {
159 isForeground_ = isForeground;
160 }
161
ReportMainThreadEvent()162 bool WatchdogInner::ReportMainThreadEvent()
163 {
164 std::string stack = "";
165 CollectStack(stack);
166
167 std::string path = "";
168 std::string eventName = "MAIN_THREAD_JANK";
169 if (!buissnessThreadInfo_.empty()) {
170 eventName = "BUSSINESS_THREAD_JANK";
171 }
172 if (!WriteStackToFd(getprocpid(), path, stack, eventName)) {
173 XCOLLIE_LOGI("MainThread WriteStackToFd Failed");
174 return false;
175 }
176 #ifdef HISYSEVENT_ENABLE
177 int result = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, "MAIN_THREAD_JANK",
178 HiSysEvent::EventType::FAULT,
179 "BUNDLE_VERSION", bundleVersion_,
180 "BUNDLE_NAME", bundleName_,
181 "BEGIN_TIME", timeContent_.reportBegin / MILLISEC_TO_NANOSEC,
182 "END_TIME", timeContent_.reportEnd / MILLISEC_TO_NANOSEC,
183 "EXTERNAL_LOG", path,
184 "STACK", stack,
185 "JANK_LEVEL", 0,
186 "THREAD_NAME", GetSelfProcName(),
187 "FOREGROUND", isForeground_,
188 "LOG_TIME", GetTimeStamp() / MILLISEC_TO_NANOSEC);
189 XCOLLIE_LOGI("MainThread HiSysEventWrite result=%{public}d", result);
190 return result >= 0;
191 #else
192 XCOLLIE_LOGI("hisysevent not exists");
193 #endif
194 }
195
CheckEventTimer(const int64_t& currentTime)196 bool WatchdogInner::CheckEventTimer(const int64_t& currentTime)
197 {
198 if (timeContent_.reportBegin == timeContent_.curBegin &&
199 timeContent_.reportEnd == timeContent_.curEnd) {
200 return false;
201 }
202 return (timeContent_.curEnd <= timeContent_.curBegin &&
203 (currentTime - timeContent_.curBegin >= DURATION_TIME * MILLISEC_TO_NANOSEC)) ||
204 (timeContent_.curEnd - timeContent_.curBegin > DURATION_TIME * MILLISEC_TO_NANOSEC);
205 }
206
ThreadSampleTask(int (*threadSamplerInitFunc)(int), int32_t (*threadSamplerSampleFunc)())207 void WatchdogInner::ThreadSampleTask(int (*threadSamplerInitFunc)(int), int32_t (*threadSamplerSampleFunc)())
208 {
209 if (sampleTaskState_ == DumpStackState::DEFAULT) {
210 sampleTaskState_++;
211 int initThreadSamplerRet = threadSamplerInitFunc(COLLECT_STACK_COUNT);
212 if (initThreadSamplerRet != 0) {
213 isMainThreadProfileTaskEnabled_ = true;
214 XCOLLIE_LOGE("Thread sampler init failed. ret %{public}d\n", initThreadSamplerRet);
215 return;
216 }
217 XCOLLIE_LOGI("Thread sampler initialized. ret %{public}d\n", initThreadSamplerRet);
218 return;
219 }
220 int64_t currentTime = GetTimeStamp();
221 if (stackContent_.collectCount > DumpStackState::DEFAULT &&
222 stackContent_.collectCount < COLLECT_STACK_COUNT) {
223 threadSamplerSampleFunc();
224 stackContent_.collectCount++;
225 } else if (stackContent_.collectCount == COLLECT_STACK_COUNT) {
226 ReportMainThreadEvent();
227 isMainThreadProfileTaskEnabled_ = true;
228 return;
229 } else {
230 if (CheckEventTimer(currentTime)) {
231 threadSamplerSampleFunc();
232 stackContent_.collectCount++;
233 } else {
234 stackContent_.detectorCount++;
235 }
236 }
237 if (stackContent_.detectorCount == DETECT_STACK_COUNT) {
238 isMainThreadProfileTaskEnabled_ = true;
239 }
240 }
241
StartProfileMainThread(int32_t interval)242 int32_t WatchdogInner::StartProfileMainThread(int32_t interval)
243 {
244 std::unique_lock<std::mutex> lock(lock_);
245
246 uint64_t now = GetCurrentTickMillseconds();
247 if (now - watchdogStartTime_ < MAX_START_TIME) {
248 XCOLLIE_LOGI("application is in starting period.\n");
249 stackContent_.stackState = DumpStackState::DEFAULT;
250 return -1;
251 }
252
253 funcHandler_ = dlopen(LIB_THREAD_SAMPLER_PATH, RTLD_LAZY);
254 if (funcHandler_ == nullptr) {
255 XCOLLIE_LOGE("dlopen failed, funcHandler is nullptr.\n");
256 return -1;
257 }
258
259 auto threadSamplerInitFunc =
260 reinterpret_cast<ThreadSamplerInitFunc>(FunctionOpen(funcHandler_, "ThreadSamplerInit"));
261 auto threadSamplerSampleFunc =
262 reinterpret_cast<ThreadSamplerSampleFunc>(FunctionOpen(funcHandler_, "ThreadSamplerSample"));
263 if (threadSamplerInitFunc == nullptr || threadSamplerSampleFunc == nullptr) {
264 dlclose(funcHandler_);
265 funcHandler_ = nullptr;
266 return -1;
267 }
268
269 sampleTaskState_ = 0;
270 stackContent_.detectorCount = 0;
271 stackContent_.collectCount = 0;
272 auto sampleTask = [this, threadSamplerInitFunc, threadSamplerSampleFunc]() {
273 ThreadSampleTask(threadSamplerInitFunc, threadSamplerSampleFunc);
274 };
275
276 WatchdogTask task("ThreadSampler", sampleTask, 0, interval, true);
277 InsertWatchdogTaskLocked("ThreadSampler", std::move(task));
278 return 0;
279 }
280
CollectStack(std::string& stack)281 bool WatchdogInner::CollectStack(std::string& stack)
282 {
283 if (funcHandler_ == nullptr) {
284 XCOLLIE_LOGE("open library failed.");
285 return false;
286 }
287
288 auto threadSamplerCollectFunc =
289 reinterpret_cast<ThreadSamplerCollectFunc>(FunctionOpen(funcHandler_, "ThreadSamplerCollect"));
290 if (threadSamplerCollectFunc == nullptr) {
291 dlclose(funcHandler_);
292 funcHandler_ = nullptr;
293 return false;
294 }
295 int treeFormat = 1;
296 char* stk = new char[STACK_LENGTH];
297 int collectRet = threadSamplerCollectFunc(stk, STACK_LENGTH, treeFormat);
298 stack = stk;
299 delete[] stk;
300 return collectRet == 0;
301 }
302
Deinit()303 bool WatchdogInner::Deinit()
304 {
305 if (funcHandler_ == nullptr) {
306 XCOLLIE_LOGE("open library failed.");
307 return false;
308 }
309
310 auto threadSamplerDeinitFunc =
311 reinterpret_cast<ThreadSamplerDeinitFunc>(FunctionOpen(funcHandler_, "ThreadSamplerDeinit"));
312 if (threadSamplerDeinitFunc == nullptr) {
313 dlclose(funcHandler_);
314 funcHandler_ = nullptr;
315 return false;
316 }
317 int ret = threadSamplerDeinitFunc();
318 return ret == 0;
319 }
320
ChangeState(int& state, int targetState)321 void WatchdogInner::ChangeState(int& state, int targetState)
322 {
323 timeContent_.reportBegin = timeContent_.curBegin;
324 timeContent_.reportEnd = timeContent_.curEnd;
325 state = targetState;
326 }
327
DayChecker(int& state, TimePoint currenTime, TimePoint lastEndTime, int64_t checkTimer)328 void WatchdogInner::DayChecker(int& state, TimePoint currenTime, TimePoint lastEndTime,
329 int64_t checkTimer)
330 {
331 auto diff = currenTime - lastEndTime;
332 int64_t intervalTime = std::chrono::duration_cast<std::chrono::milliseconds>
333 (diff).count();
334 if (intervalTime >= checkTimer) {
335 XCOLLIE_LOGD("MainThread StartProfileMainThread Over checkTimer: "
336 "%{public}" PRId64 " ms", checkTimer);
337 state = DumpStackState::DEFAULT;
338 }
339 }
340
StartTraceProfile(int32_t interval)341 void WatchdogInner::StartTraceProfile(int32_t interval)
342 {
343 if (traceCollector_ == nullptr) {
344 XCOLLIE_LOGI("MainThread TraceCollector Failed.");
345 return;
346 }
347 traceContent_.dumpCount = 0;
348 traceContent_.traceCount = 0;
349 auto traceTask = [this]() {
350 traceContent_.traceCount++;
351 int64_t currentTime = GetTimeStamp();
352 if (CheckEventTimer(currentTime)) {
353 traceContent_.dumpCount++;
354 }
355 if (traceContent_.traceCount >= COLLECT_TRACE_MAX) {
356 if (traceContent_.dumpCount >= COLLECT_TRACE_MIN) {
357 CreateWatchdogDir();
358 appCaller_.actionId = UCollectClient::ACTION_ID_DUMP_TRACE;
359 appCaller_.isBusinessJank = !buissnessThreadInfo_.empty();
360 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
361 XCOLLIE_LOGI("MainThread TraceCollector Dump result: %{public}d", result.retCode);
362 }
363 isMainThreadTraceEnabled_ = true;
364 }
365 };
366 WatchdogTask task("TraceCollector", traceTask, 0, interval, true);
367 std::unique_lock<std::mutex> lock(lock_);
368 InsertWatchdogTaskLocked("TraceCollector", std::move(task));
369 }
370
CollectTrace()371 void WatchdogInner::CollectTrace()
372 {
373 traceCollector_ = UCollectClient::TraceCollector::Create();
374 int32_t pid = getprocpid();
375 int32_t uid = static_cast<int64_t>(getuid());
376 appCaller_.actionId = UCollectClient::ACTION_ID_START_TRACE;
377 appCaller_.bundleName = bundleName_;
378 appCaller_.bundleVersion = bundleVersion_;
379 appCaller_.uid = uid;
380 appCaller_.pid = pid;
381 appCaller_.threadName = GetSelfProcName();
382 appCaller_.foreground = isForeground_;
383 appCaller_.happenTime = GetTimeStamp() / MILLISEC_TO_NANOSEC;
384 appCaller_.beginTime = timeContent_.reportBegin / MILLISEC_TO_NANOSEC;
385 appCaller_.endTime = timeContent_.reportEnd / MILLISEC_TO_NANOSEC;
386 auto result = traceCollector_->CaptureDurationTrace(appCaller_);
387 XCOLLIE_LOGI("MainThread TraceCollector Start result: %{public}d", result.retCode);
388 if (result.retCode != 0) {
389 return;
390 }
391 StartTraceProfile(DURATION_TIME);
392 }
393
DistributeStart(const std::string& name)394 static TimePoint DistributeStart(const std::string& name)
395 {
396 WatchdogInner::GetInstance().timeContent_.curBegin = GetTimeStamp();
397 return std::chrono::steady_clock::now();
398 }
399
DistributeEnd(const std::string& name, const TimePoint& startTime)400 static void DistributeEnd(const std::string& name, const TimePoint& startTime)
401 {
402 TimePoint endTime = std::chrono::steady_clock::now();
403 auto duration = endTime - startTime;
404 int64_t durationTime = std::chrono::duration_cast<std::chrono::milliseconds>
405 (duration).count();
406 if (duration > std::chrono::milliseconds(DISTRIBUTE_TIME)) {
407 XCOLLIE_LOGI("BlockMonitor event name: %{public}s, Duration Time: %{public}" PRId64 " ms",
408 name.c_str(), durationTime);
409 }
410 #ifdef HICOLLIE_JANK_ENABLE
411 WatchdogInner::GetInstance().timeContent_.curEnd = GetTimeStamp();
412 if (WatchdogInner::GetInstance().stackContent_.stackState == DumpStackState::COMPLETE) {
413 int64_t checkTimer = ONE_DAY_LIMIT;
414 if (IsDeveloperOpen() || (GetProcessNameFromProcCmdline(getpid()) == KEY_SCB_STATE)) {
415 checkTimer = ONE_HOUR_LIMIT;
416 }
417 WatchdogInner::GetInstance().DayChecker(WatchdogInner::GetInstance().stackContent_.stackState,
418 endTime, WatchdogInner::GetInstance().lastStackTime_, checkTimer);
419 }
420 if (WatchdogInner::GetInstance().traceContent_.traceState == DumpStackState::COMPLETE) {
421 WatchdogInner::GetInstance().DayChecker(WatchdogInner::GetInstance().traceContent_.traceState,
422 endTime, WatchdogInner::GetInstance().lastTraceTime_, ONE_DAY_LIMIT);
423 }
424 if (duration > std::chrono::milliseconds(DURATION_TIME) && duration < std::chrono::milliseconds(DUMPTRACE_TIME) &&
425 WatchdogInner::GetInstance().stackContent_.stackState == DumpStackState::DEFAULT) {
426 if (IsEnableVersion()) {
427 return;
428 }
429 WatchdogInner::GetInstance().ChangeState(WatchdogInner::GetInstance().stackContent_.stackState,
430 DumpStackState::COMPLETE);
431 WatchdogInner::GetInstance().lastStackTime_ = endTime;
432
433 int32_t ret = WatchdogInner::GetInstance().StartProfileMainThread(TASK_INTERVAL);
434 XCOLLIE_LOGI("MainThread StartProfileMainThread ret: %{public}d "
435 "Duration Time: %{public}" PRId64 " ms", ret, durationTime);
436 }
437 if (duration > std::chrono::milliseconds(DUMPTRACE_TIME) &&
438 WatchdogInner::GetInstance().traceContent_.traceState == DumpStackState::DEFAULT) {
439 if (IsBetaVersion() || IsEnableVersion()) {
440 return;
441 }
442 XCOLLIE_LOGI("MainThread TraceCollector Duration Time: %{public}" PRId64 " ms", durationTime);
443 WatchdogInner::GetInstance().ChangeState(WatchdogInner::GetInstance().traceContent_.traceState,
444 DumpStackState::COMPLETE);
445 WatchdogInner::GetInstance().lastTraceTime_ = endTime;
446 WatchdogInner::GetInstance().CollectTrace();
447 }
448 #endif // HICOLLIE_JANK_ENABLE
449 }
450
AddThread(const std::string &name, std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)451 int WatchdogInner::AddThread(const std::string &name,
452 std::shared_ptr<AppExecFwk::EventHandler> handler, TimeOutCallback timeOutCallback, uint64_t interval)
453 {
454 if (name.empty() || handler == nullptr) {
455 XCOLLIE_LOGE("Add thread fail, invalid args!");
456 return -1;
457 }
458
459 if (IsInAppspwan()) {
460 return -1;
461 }
462
463 std::string limitedName = GetLimitedSizeName(name);
464 XCOLLIE_LOGI("Add thread %{public}s to watchdog.", limitedName.c_str());
465 std::unique_lock<std::mutex> lock(lock_);
466
467 IpcCheck();
468
469 if (!InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, handler, timeOutCallback, interval))) {
470 return -1;
471 }
472 return 0;
473 }
474
RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)475 void WatchdogInner::RunOneShotTask(const std::string& name, Task&& task, uint64_t delay)
476 {
477 if (name.empty() || task == nullptr) {
478 XCOLLIE_LOGE("Add task fail, invalid args!");
479 return;
480 }
481
482 if (IsInAppspwan()) {
483 return;
484 }
485
486 std::unique_lock<std::mutex> lock(lock_);
487 std::string limitedName = GetLimitedSizeName(name);
488 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, 0, true));
489 }
490
RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func, void *arg, unsigned int flag)491 int64_t WatchdogInner::RunXCollieTask(const std::string& name, uint64_t timeout, XCollieCallback func,
492 void *arg, unsigned int flag)
493 {
494 if (name.empty() || timeout == 0) {
495 XCOLLIE_LOGE("Add XCollieTask fail, invalid args!");
496 return INVALID_ID;
497 }
498
499 if (IsInAppspwan()) {
500 return INVALID_ID;
501 }
502
503 std::unique_lock<std::mutex> lock(lock_);
504 std::string limitedName = GetLimitedSizeName(name);
505 return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeout, func, arg, flag));
506 }
507
RemoveXCollieTask(int64_t id)508 void WatchdogInner::RemoveXCollieTask(int64_t id)
509 {
510 std::priority_queue<WatchdogTask> tmpQueue;
511 std::unique_lock<std::mutex> lock(lock_);
512 size_t size = checkerQueue_.size();
513 if (size == 0) {
514 XCOLLIE_LOGE("Remove XCollieTask %{public}lld fail, empty queue!", static_cast<long long>(id));
515 return;
516 }
517 while (!checkerQueue_.empty()) {
518 const WatchdogTask& task = checkerQueue_.top();
519 if (task.id != id || task.timeout == 0) {
520 tmpQueue.push(task);
521 }
522 checkerQueue_.pop();
523 }
524 if (tmpQueue.size() == size) {
525 XCOLLIE_LOGE("Remove XCollieTask fail, can not find timer %{public}lld, size=%{public}zu!",
526 static_cast<long long>(id), size);
527 }
528 tmpQueue.swap(checkerQueue_);
529 }
530
RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)531 void WatchdogInner::RunPeriodicalTask(const std::string& name, Task&& task, uint64_t interval, uint64_t delay)
532 {
533 if (name.empty() || task == nullptr) {
534 XCOLLIE_LOGE("Add task fail, invalid args!");
535 return;
536 }
537
538 if (IsInAppspwan()) {
539 return;
540 }
541
542 std::string limitedName = GetLimitedSizeName(name);
543 XCOLLIE_LOGD("Add periodical task %{public}s to watchdog.", name.c_str());
544 std::unique_lock<std::mutex> lock(lock_);
545 InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, std::move(task), delay, interval, false));
546 }
547
SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)548 int64_t WatchdogInner::SetTimerCountTask(const std::string &name, uint64_t timeLimit, int countLimit)
549 {
550 if (name.empty() || timeLimit == 0 || countLimit <= 0) {
551 XCOLLIE_LOGE("SetTimerCountTask fail, invalid args!");
552 return INVALID_ID;
553 }
554
555 if (IsInAppspwan()) {
556 return INVALID_ID;
557 }
558 std::string limitedName = GetLimitedSizeName(name);
559 XCOLLIE_LOGD("SetTimerCountTask name : %{public}s", name.c_str());
560 std::unique_lock<std::mutex> lock(lock_);
561 return InsertWatchdogTaskLocked(limitedName, WatchdogTask(limitedName, timeLimit, countLimit));
562 }
563
TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)564 void WatchdogInner::TriggerTimerCountTask(const std::string &name, bool bTrigger, const std::string &message)
565 {
566 std::unique_lock<std::mutex> lock(lock_);
567
568 if (checkerQueue_.empty()) {
569 XCOLLIE_LOGE("TriggerTimerCountTask name : %{public}s fail, empty queue!", name.c_str());
570 return;
571 }
572
573 bool isTaskExist = false;
574 uint64_t now = GetCurrentTickMillseconds();
575 std::priority_queue<WatchdogTask> tmpQueue;
576 while (!checkerQueue_.empty()) {
577 WatchdogTask task = checkerQueue_.top();
578 if (task.name == name) {
579 isTaskExist = true;
580 if (bTrigger) {
581 task.triggerTimes.push_back(now);
582 task.message = message;
583 } else {
584 task.triggerTimes.clear();
585 }
586 }
587 tmpQueue.push(task);
588 checkerQueue_.pop();
589 }
590 tmpQueue.swap(checkerQueue_);
591
592 if (!isTaskExist) {
593 XCOLLIE_LOGE("TriggerTimerCount name : %{public}s does not exist!", name.c_str());
594 }
595 }
596
IsTaskExistLocked(const std::string& name)597 bool WatchdogInner::IsTaskExistLocked(const std::string& name)
598 {
599 return (taskNameSet_.find(name) != taskNameSet_.end());
600 }
601
IsExceedMaxTaskLocked()602 bool WatchdogInner::IsExceedMaxTaskLocked()
603 {
604 if (checkerQueue_.size() >= MAX_WATCH_NUM) {
605 XCOLLIE_LOGE("Exceed max watchdog task!");
606 return true;
607 }
608
609 return false;
610 }
611
InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)612 int64_t WatchdogInner::InsertWatchdogTaskLocked(const std::string& name, WatchdogTask&& task)
613 {
614 if (!task.isOneshotTask && IsTaskExistLocked(name)) {
615 XCOLLIE_LOGI("Task with %{public}s already exist, failed to insert.", name.c_str());
616 return 0;
617 }
618
619 if (IsExceedMaxTaskLocked()) {
620 XCOLLIE_LOGE("Exceed max watchdog task, failed to insert.");
621 return 0;
622 }
623 int64_t id = task.id;
624 checkerQueue_.push(std::move(task));
625 if (!task.isOneshotTask) {
626 taskNameSet_.insert(name);
627 }
628 CreateWatchdogThreadIfNeed();
629 condition_.notify_all();
630
631 return id;
632 }
633
StopWatchdog()634 void WatchdogInner::StopWatchdog()
635 {
636 Stop();
637 }
638
IsCallbackLimit(unsigned int flag)639 bool WatchdogInner::IsCallbackLimit(unsigned int flag)
640 {
641 bool ret = false;
642 time_t startTime = time(nullptr);
643 if (!(flag & XCOLLIE_FLAG_LOG)) {
644 return ret;
645 }
646 if (timeCallback_ + XCOLLIE_CALLBACK_TIMEWIN_MAX < startTime) {
647 timeCallback_ = startTime;
648 } else {
649 if (++cntCallback_ > XCOLLIE_CALLBACK_HISTORY_MAX) {
650 ret = true;
651 }
652 }
653 return ret;
654 }
655
IPCProxyLimitCallback(uint64_t num)656 void IPCProxyLimitCallback(uint64_t num)
657 {
658 XCOLLIE_LOGE("ipc proxy num %{public}" PRIu64 " exceed limit", num);
659 if (getuid() >= MIN_APP_UID && IsBetaVersion()) {
660 XCOLLIE_LOGI("Process is going to exit, reason: ipc proxy num exceed limit");
661 _exit(0);
662 }
663 }
664
CreateWatchdogThreadIfNeed()665 void WatchdogInner::CreateWatchdogThreadIfNeed()
666 {
667 std::call_once(flag_, [this] {
668 if (threadLoop_ == nullptr) {
669 if (mainRunner_ == nullptr) {
670 mainRunner_ = AppExecFwk::EventRunner::GetMainEventRunner();
671 }
672 mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
673 const uint64_t limitNum = 20000;
674 IPCDfx::SetIPCProxyLimit(limitNum, IPCProxyLimitCallback);
675 threadLoop_ = std::make_unique<std::thread>(&WatchdogInner::Start, this);
676 if (getpid() == gettid()) {
677 SetThreadSignalMask(SIGDUMP, true, true);
678 }
679 XCOLLIE_LOGD("Watchdog is running!");
680 }
681 });
682 }
683
FetchNextTask(uint64_t now, WatchdogTask& task)684 uint64_t WatchdogInner::FetchNextTask(uint64_t now, WatchdogTask& task)
685 {
686 std::unique_lock<std::mutex> lock(lock_);
687 if (isNeedStop_) {
688 while (!checkerQueue_.empty()) {
689 checkerQueue_.pop();
690 }
691 return DEFAULT_TIMEOUT;
692 }
693
694 if (checkerQueue_.empty()) {
695 return DEFAULT_TIMEOUT;
696 }
697
698 const WatchdogTask& queuedTaskCheck = checkerQueue_.top();
699 bool popCheck = true;
700 if (queuedTaskCheck.name.empty()) {
701 checkerQueue_.pop();
702 XCOLLIE_LOGW("queuedTask name is empty.");
703 } else if (queuedTaskCheck.name == STACK_CHECKER && isMainThreadProfileTaskEnabled_) {
704 checkerQueue_.pop();
705 taskNameSet_.erase("ThreadSampler");
706 isMainThreadProfileTaskEnabled_ = false;
707 if (Deinit()) {
708 dlclose(funcHandler_);
709 funcHandler_ = nullptr;
710 }
711 XCOLLIE_LOGI("STACK_CHECKER Task pop");
712 } else if (queuedTaskCheck.name == TRACE_CHECKER && isMainThreadTraceEnabled_) {
713 checkerQueue_.pop();
714 taskNameSet_.erase("TraceCollector");
715 isMainThreadTraceEnabled_ = false;
716 XCOLLIE_LOGI("TRACE_CHECKER Task pop");
717 } else {
718 popCheck = false;
719 }
720 if (popCheck && checkerQueue_.empty()) {
721 return DEFAULT_TIMEOUT;
722 }
723
724 const WatchdogTask& queuedTask = checkerQueue_.top();
725 if (g_existFile && queuedTask.name == IPC_FULL && now - g_nextKickTime > INTERVAL_KICK_TIME) {
726 if (KickWatchdog()) {
727 g_nextKickTime = now;
728 }
729 }
730 if (queuedTask.nextTickTime > now) {
731 return queuedTask.nextTickTime - now;
732 }
733
734 currentScene_ = "thread DfxWatchdog: Current scenario is task name: " + queuedTask.name + "\n";
735 task = queuedTask;
736 checkerQueue_.pop();
737 return 0;
738 }
739
ReInsertTaskIfNeed(WatchdogTask& task)740 void WatchdogInner::ReInsertTaskIfNeed(WatchdogTask& task)
741 {
742 if (task.checkInterval == 0) {
743 return;
744 }
745
746 std::unique_lock<std::mutex> lock(lock_);
747 task.nextTickTime = task.nextTickTime + task.checkInterval;
748 checkerQueue_.push(task);
749 }
750
Start()751 bool WatchdogInner::Start()
752 {
753 if (pthread_setname_np(pthread_self(), "OS_DfxWatchdog") != 0) {
754 XCOLLIE_LOGW("Failed to set threadName for watchdog, errno:%d.", errno);
755 }
756 SetThreadSignalMask(SIGDUMP, false, false);
757 watchdogStartTime_ = GetCurrentTickMillseconds();
758 XCOLLIE_LOGD("Watchdog is running in thread(%{public}d)!", getproctid());
759 if (SetThreadInfoCallback != nullptr) {
760 SetThreadInfoCallback(ThreadInfo);
761 XCOLLIE_LOGD("Watchdog Set Thread Info Callback");
762 }
763 while (!isNeedStop_) {
764 uint64_t now = GetCurrentTickMillseconds();
765 WatchdogTask task;
766 uint64_t leftTimeMill = FetchNextTask(now, task);
767 if (leftTimeMill == 0) {
768 task.Run(now);
769 ReInsertTaskIfNeed(task);
770 currentScene_ = "thread DfxWatchdog: Current scenario is hicollie.\n";
771 continue;
772 } else if (isNeedStop_) {
773 break;
774 } else {
775 std::unique_lock<std::mutex> lock(lock_);
776 condition_.wait_for(lock, std::chrono::milliseconds(leftTimeMill));
777 }
778 }
779 if (SetThreadInfoCallback != nullptr) {
780 SetThreadInfoCallback(nullptr);
781 }
782 return true;
783 }
784
SendMsgToHungtask(const std::string& msg)785 bool WatchdogInner::SendMsgToHungtask(const std::string& msg)
786 {
787 if (g_fd == NOT_OPEN) {
788 g_fd = open(SYS_KERNEL_HUNGTASK_USERLIST, O_WRONLY);
789 if (g_fd < 0) {
790 g_fd = open(HMOS_HUNGTASK_USERLIST, O_WRONLY);
791 if (g_fd < 0) {
792 XCOLLIE_LOGE("can't open hungtask file");
793 g_existFile = false;
794 return false;
795 }
796 XCOLLIE_LOGE("change to hmos kernel");
797 isHmos = true;
798 } else {
799 XCOLLIE_LOGE("change to linux kernel");
800 }
801 }
802
803 ssize_t watchdogWrite = write(g_fd, msg.c_str(), msg.size());
804 if (watchdogWrite < 0 || watchdogWrite != static_cast<ssize_t>(msg.size())) {
805 XCOLLIE_LOGE("watchdogWrite msg failed");
806 close(g_fd);
807 g_fd = NOT_OPEN;
808 return false;
809 }
810 XCOLLIE_LOGE("Send %{public}s to hungtask Successful\n", msg.c_str());
811 return true;
812 }
813
KickWatchdog()814 bool WatchdogInner::KickWatchdog()
815 {
816 return true;
817 }
818
IpcCheck()819 void WatchdogInner::IpcCheck()
820 {
821 uint32_t uid = getuid();
822 if (uid == FOUNDATION_UID || uid == RENDER_SERVICE_UID || GetSelfProcName() == "ohos.sceneboard") {
823 if (binderCheckHander_ == nullptr) {
824 auto runner = AppExecFwk::EventRunner::Create(IPC_CHECKER);
825 binderCheckHander_ = std::make_shared<AppExecFwk::EventHandler>(runner);
826 if (!InsertWatchdogTaskLocked(IPC_CHECKER, WatchdogTask(IPC_FULL, binderCheckHander_,
827 nullptr, IPC_CHECKER_TIME))) {
828 XCOLLIE_LOGE("Add %{public}s thread fail", IPC_CHECKER);
829 }
830 }
831 }
832 }
833
WriteStringToFile(uint32_t pid, const char *str)834 void WatchdogInner::WriteStringToFile(uint32_t pid, const char *str)
835 {
836 char file[PATH_LEN] = {0};
837 int32_t newPid = static_cast<int32_t>(pid);
838 if (snprintf_s(file, PATH_LEN, PATH_LEN - 1, "/proc/%d/unexpected_die_catch", newPid) == -1) {
839 XCOLLIE_LOGI("failed to build path for %{public}d.", newPid);
840 }
841 int fd = open(file, O_RDWR);
842 if (fd == -1) {
843 return;
844 }
845 if (write(fd, str, strlen(str)) < 0) {
846 XCOLLIE_LOGI("failed to write 0 for %{public}s", file);
847 }
848 close(fd);
849 return;
850 }
851
FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)852 void WatchdogInner::FfrtCallback(uint64_t taskId, const char *taskInfo, uint32_t delayedTaskCount)
853 {
854 std::string description = "FfrtCallback: task(";
855 description += taskInfo;
856 description += ") blocked " + std::to_string(FFRT_CALLBACK_TIME / TIME_MS_TO_S) + "s";
857 bool isExist = false;
858 {
859 std::unique_lock<std::mutex> lock(lockFfrt_);
860 auto &map = WatchdogInner::GetInstance().taskIdCnt;
861 auto search = map.find(taskId);
862 if (search != map.end()) {
863 isExist = true;
864 } else {
865 map[taskId] = SERVICE_WARNING;
866 }
867 }
868
869 if (isExist) {
870 description += ", report twice instead of exiting process."; // 1s = 1000ms
871 WatchdogInner::SendFfrtEvent(description, "SERVICE_BLOCK", taskInfo);
872 WatchdogInner::GetInstance().taskIdCnt.erase(taskId);
873 WatchdogInner::KillPeerBinderProcess(description);
874 } else {
875 WatchdogInner::SendFfrtEvent(description, "SERVICE_WARNING", taskInfo);
876 }
877 }
878
InitFfrtWatchdog()879 void WatchdogInner::InitFfrtWatchdog()
880 {
881 CreateWatchdogThreadIfNeed();
882 ffrt_task_timeout_set_cb(FfrtCallback);
883 ffrt_task_timeout_set_threshold(FFRT_CALLBACK_TIME);
884 std::unique_lock<std::mutex> lock(lock_);
885 IpcCheck();
886 }
887
SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo)888 void WatchdogInner::SendFfrtEvent(const std::string &msg, const std::string &eventName, const char * taskInfo)
889 {
890 int32_t pid = getprocpid();
891 if (IsProcessDebug(pid)) {
892 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't report.", pid);
893 return;
894 }
895 uint32_t gid = getgid();
896 uint32_t uid = getuid();
897 time_t curTime = time(nullptr);
898 std::string sendMsg = std::string((ctime(&curTime) == nullptr) ? "" : ctime(&curTime)) +
899 "\n" + msg + "\n";
900 char* buffer = new char[FFRT_BUFFER_SIZE + 1]();
901 buffer[FFRT_BUFFER_SIZE] = 0;
902 ffrt_dump(DUMP_INFO_ALL, buffer, FFRT_BUFFER_SIZE);
903 sendMsg += buffer;
904 delete[] buffer;
905 int32_t tid = pid;
906 GetFfrtTaskTid(tid, sendMsg);
907 #ifdef HISYSEVENT_ENABLE
908 int ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
909 "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo, "PROCESS_NAME", GetSelfProcName(),
910 "MSG", sendMsg, "STACK", GetProcessStacktrace());
911 if (ret == ERR_OVER_SIZE) {
912 std::string stack = "";
913 GetBacktraceStringByTid(stack, tid, 0, true);
914 ret = HiSysEventWrite(HiSysEvent::Domain::FRAMEWORK, eventName, HiSysEvent::EventType::FAULT,
915 "PID", pid, "TID", tid, "TGID", gid, "UID", uid, "MODULE_NAME", taskInfo,
916 "PROCESS_NAME", GetSelfProcName(), "MSG", sendMsg, "STACK", stack);
917 }
918
919 XCOLLIE_LOGI("hisysevent write result=%{public}d, send event [FRAMEWORK,%{public}s], "
920 "msg=%{public}s", ret, eventName.c_str(), msg.c_str());
921 #else
922 XCOLLIE_LOGI("hisysevent not exists");
923 #endif
924 }
925
GetFfrtTaskTid(int32_t& tid, const std::string& msg)926 void WatchdogInner::GetFfrtTaskTid(int32_t& tid, const std::string& msg)
927 {
928 std::string queueNameFrontStr = "us. queue name [";
929 size_t queueNameFrontPos = msg.find(queueNameFrontStr);
930 if (queueNameFrontPos == std::string::npos) {
931 return;
932 }
933 size_t queueNameRearPos = msg.find("], remaining tasks count=");
934 size_t queueStartPos = queueNameFrontPos + queueNameFrontStr.length();
935 if (queueNameRearPos == std::string::npos || queueNameRearPos <= queueStartPos) {
936 return;
937 }
938 size_t queueNameLength = queueNameRearPos - queueStartPos;
939 std::string workerTidFrontStr = " worker tid ";
940 std::string taskIdFrontStr = " is running, task id ";
941 std::string queueNameStr = " name " + msg.substr(queueStartPos, queueNameLength);
942 std::istringstream issMsg(msg);
943 std::string line;
944 while (std::getline(issMsg, line, '\n')) {
945 size_t workerTidFrontPos = line.find(workerTidFrontStr);
946 size_t taskIdFrontPos = line.find(taskIdFrontStr);
947 size_t queueNamePos = line.find(queueNameStr);
948 size_t workerStartPos = workerTidFrontPos + workerTidFrontStr.length();
949 if (workerTidFrontPos == std::string::npos || taskIdFrontPos == std::string::npos ||
950 queueNamePos == std::string::npos || taskIdFrontPos <= workerStartPos) {
951 continue;
952 }
953 size_t tidLength = taskIdFrontPos - workerStartPos;
954 if (tidLength < std::to_string(INT32_MAX).length()) {
955 std::string tidStr = line.substr(workerStartPos, tidLength);
956 if (std::all_of(std::begin(tidStr), std::end(tidStr), [] (const char& c) {
957 return isdigit(c);
958 })) {
959 tid = std::stoi(tidStr);
960 return;
961 }
962 }
963 }
964 }
965
LeftTimeExitProcess(const std::string &description)966 void WatchdogInner::LeftTimeExitProcess(const std::string &description)
967 {
968 int32_t pid = getprocpid();
969 if (IsProcessDebug(pid)) {
970 XCOLLIE_LOGI("heap dump or debug for %{public}d, don't exit.", pid);
971 return;
972 }
973 DelayBeforeExit(10); // sleep 10s for hiview dump
974 XCOLLIE_LOGI("Process is going to exit, reason:%{public}s.", description.c_str());
975 WatchdogInner::WriteStringToFile(pid, "0");
976
977 _exit(0);
978 }
979
Stop()980 bool WatchdogInner::Stop()
981 {
982 IPCDfx::SetIPCProxyLimit(0, nullptr);
983 if (mainRunner_ != nullptr) {
984 mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
985 }
986 isNeedStop_.store(true);
987 condition_.notify_all();
988 if (threadLoop_ != nullptr && threadLoop_->joinable()) {
989 threadLoop_->join();
990 threadLoop_ = nullptr;
991 }
992 if (g_fd != NOT_OPEN) {
993 close(g_fd);
994 g_fd = NOT_OPEN;
995 }
996 return true;
997 }
998
KillPeerBinderProcess(const std::string &description)999 void WatchdogInner::KillPeerBinderProcess(const std::string &description)
1000 {
1001 bool result = false;
1002 if (getuid() == FOUNDATION_UID) {
1003 result = KillProcessByPid(getprocpid());
1004 }
1005 if (!result) {
1006 WatchdogInner::LeftTimeExitProcess(description);
1007 }
1008 }
1009
RemoveInnerTask(const std::string& name)1010 void WatchdogInner::RemoveInnerTask(const std::string& name)
1011 {
1012 if (name.empty()) {
1013 XCOLLIE_LOGI("RemoveInnerTask fail, cname is null");
1014 return;
1015 }
1016 std::priority_queue<WatchdogTask> tmpQueue;
1017 std::unique_lock<std::mutex> lock(lock_);
1018 size_t size = checkerQueue_.size();
1019 if (size == 0) {
1020 XCOLLIE_LOGE("RemoveInnerTask %{public}s fail, empty queue!", name.c_str());
1021 return;
1022 }
1023 while (!checkerQueue_.empty()) {
1024 const WatchdogTask& task = checkerQueue_.top();
1025 if (task.name != name) {
1026 tmpQueue.push(task);
1027 } else {
1028 size_t nameSize = taskNameSet_.size();
1029 if (nameSize != 0 && !task.isOneshotTask) {
1030 taskNameSet_.erase(name);
1031 XCOLLIE_LOGD("RemoveInnerTask name %{public}s, remove result=%{public}d",
1032 name.c_str(), nameSize > taskNameSet_.size());
1033 }
1034 }
1035 checkerQueue_.pop();
1036 }
1037 if (tmpQueue.size() == size) {
1038 XCOLLIE_LOGE("RemoveInnerTask fail, can not find name %{public}s, size=%{public}zu!",
1039 name.c_str(), size);
1040 }
1041 tmpQueue.swap(checkerQueue_);
1042 }
1043
InitBeginFunc(const char* name)1044 void InitBeginFunc(const char* name)
1045 {
1046 std::string nameStr(name);
1047 WatchdogInner::GetInstance().bussinessBeginTime_ = DistributeStart(nameStr);
1048 }
1049
InitEndFunc(const char* name)1050 void InitEndFunc(const char* name)
1051 {
1052 std::string nameStr(name);
1053 DistributeEnd(nameStr, WatchdogInner::GetInstance().bussinessBeginTime_);
1054 }
1055
InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc, WatchdogInnerEndFunc* endFunc)1056 void WatchdogInner::InitMainLooperWatcher(WatchdogInnerBeginFunc* beginFunc,
1057 WatchdogInnerEndFunc* endFunc)
1058 {
1059 int64_t tid = getproctid();
1060 if (beginFunc && endFunc) {
1061 if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1062 XCOLLIE_LOGI("Tid =%{public}" PRId64 "already exits, "
1063 "no repeated initialization.", tid);
1064 return;
1065 }
1066 if (mainRunner_ != nullptr) {
1067 mainRunner_->SetMainLooperWatcher(nullptr, nullptr);
1068 }
1069 *beginFunc = InitBeginFunc;
1070 *endFunc = InitEndFunc;
1071 buissnessThreadInfo_.insert(tid);
1072 } else {
1073 if (buissnessThreadInfo_.find(tid) != buissnessThreadInfo_.end()) {
1074 XCOLLIE_LOGI("Remove already init tid=%{public}." PRId64, tid);
1075 mainRunner_->SetMainLooperWatcher(DistributeStart, DistributeEnd);
1076 buissnessThreadInfo_.erase(tid);
1077 }
1078 }
1079 }
1080
SetAppDebug(bool isAppDebug)1081 void WatchdogInner::SetAppDebug(bool isAppDebug)
1082 {
1083 isAppDebug_ = isAppDebug;
1084 }
1085
GetAppDebug()1086 bool WatchdogInner::GetAppDebug()
1087 {
1088 return isAppDebug_;
1089 }
1090 } // end of namespace HiviewDFX
1091 } // end of namespace OHOS
1092