1// Copyright 2013 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/profiler/tick-sample.h" 6 7#include <cinttypes> 8 9#include "include/v8-profiler.h" 10#include "src/base/sanitizer/asan.h" 11#include "src/base/sanitizer/msan.h" 12#include "src/execution/embedder-state.h" 13#include "src/execution/frames-inl.h" 14#include "src/execution/simulator.h" 15#include "src/execution/vm-state-inl.h" 16#include "src/heap/heap-inl.h" // For Heap::code_range. 17#include "src/logging/counters.h" 18#include "src/profiler/profiler-stats.h" 19 20namespace v8 { 21namespace internal { 22namespace { 23 24bool IsSamePage(i::Address ptr1, i::Address ptr2) { 25 const uint32_t kPageSize = 4096; 26 i::Address mask = ~static_cast<i::Address>(kPageSize - 1); 27 return (ptr1 & mask) == (ptr2 & mask); 28} 29 30// Check if the code at specified address could potentially be a 31// frame setup code. 32bool IsNoFrameRegion(i::Address address) { 33 struct Pattern { 34 int bytes_count; 35 i::byte bytes[8]; 36 int offsets[4]; 37 }; 38 static Pattern patterns[] = { 39#if V8_HOST_ARCH_IA32 40 // push %ebp 41 // mov %esp,%ebp 42 {3, {0x55, 0x89, 0xE5}, {0, 1, -1}}, 43 // pop %ebp 44 // ret N 45 {2, {0x5D, 0xC2}, {0, 1, -1}}, 46 // pop %ebp 47 // ret 48 {2, {0x5D, 0xC3}, {0, 1, -1}}, 49#elif V8_HOST_ARCH_X64 50 // pushq %rbp 51 // movq %rsp,%rbp 52 {4, {0x55, 0x48, 0x89, 0xE5}, {0, 1, -1}}, 53 // popq %rbp 54 // ret N 55 {2, {0x5D, 0xC2}, {0, 1, -1}}, 56 // popq %rbp 57 // ret 58 {2, {0x5D, 0xC3}, {0, 1, -1}}, 59#endif 60 {0, {}, {}} 61 }; 62 i::byte* pc = reinterpret_cast<i::byte*>(address); 63 for (Pattern* pattern = patterns; pattern->bytes_count; ++pattern) { 64 for (int* offset_ptr = pattern->offsets; *offset_ptr != -1; ++offset_ptr) { 65 int offset = *offset_ptr; 66 if (!offset || IsSamePage(address, address - offset)) { 67 MSAN_MEMORY_IS_INITIALIZED(pc - offset, pattern->bytes_count); 68 if (!memcmp(pc - offset, pattern->bytes, pattern->bytes_count)) 69 return true; 70 } else { 71 // It is not safe to examine bytes on another page as it might not be 72 // allocated thus causing a SEGFAULT. 73 // Check the pattern part that's on the same page and 74 // pessimistically assume it could be the entire pattern match. 75 MSAN_MEMORY_IS_INITIALIZED(pc, pattern->bytes_count - offset); 76 if (!memcmp(pc, pattern->bytes + offset, pattern->bytes_count - offset)) 77 return true; 78 } 79 } 80 } 81 return false; 82} 83 84#if defined(USE_SIMULATOR) 85class SimulatorHelper { 86 public: 87 // Returns true if register values were successfully retrieved 88 // from the simulator, otherwise returns false. 89 static bool FillRegisters(Isolate* isolate, v8::RegisterState* state); 90}; 91 92bool SimulatorHelper::FillRegisters(Isolate* isolate, 93 v8::RegisterState* state) { 94 Simulator* simulator = isolate->thread_local_top()->simulator_; 95 // Check if there is active simulator. 96 if (simulator == nullptr) return false; 97#if V8_TARGET_ARCH_ARM 98 if (!simulator->has_bad_pc()) { 99 state->pc = reinterpret_cast<void*>(simulator->get_pc()); 100 } 101 state->sp = reinterpret_cast<void*>(simulator->get_register(Simulator::sp)); 102 state->fp = reinterpret_cast<void*>(simulator->get_register(Simulator::r11)); 103 state->lr = reinterpret_cast<void*>(simulator->get_register(Simulator::lr)); 104#elif V8_TARGET_ARCH_ARM64 105 state->pc = reinterpret_cast<void*>(simulator->pc()); 106 state->sp = reinterpret_cast<void*>(simulator->sp()); 107 state->fp = reinterpret_cast<void*>(simulator->fp()); 108 state->lr = reinterpret_cast<void*>(simulator->lr()); 109#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_LOONG64 110 if (!simulator->has_bad_pc()) { 111 state->pc = reinterpret_cast<void*>(simulator->get_pc()); 112 } 113 state->sp = reinterpret_cast<void*>(simulator->get_register(Simulator::sp)); 114 state->fp = reinterpret_cast<void*>(simulator->get_register(Simulator::fp)); 115#elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64 116 if (!simulator->has_bad_pc()) { 117 state->pc = reinterpret_cast<void*>(simulator->get_pc()); 118 } 119 state->sp = reinterpret_cast<void*>(simulator->get_register(Simulator::sp)); 120 state->fp = reinterpret_cast<void*>(simulator->get_register(Simulator::fp)); 121 state->lr = reinterpret_cast<void*>(simulator->get_lr()); 122#elif V8_TARGET_ARCH_S390 123 if (!simulator->has_bad_pc()) { 124 state->pc = reinterpret_cast<void*>(simulator->get_pc()); 125 } 126 state->sp = reinterpret_cast<void*>(simulator->get_register(Simulator::sp)); 127 state->fp = reinterpret_cast<void*>(simulator->get_register(Simulator::fp)); 128 state->lr = reinterpret_cast<void*>(simulator->get_register(Simulator::ra)); 129#elif V8_TARGET_ARCH_RISCV64 130 if (!simulator->has_bad_pc()) { 131 state->pc = reinterpret_cast<void*>(simulator->get_pc()); 132 } 133 state->sp = reinterpret_cast<void*>(simulator->get_register(Simulator::sp)); 134 state->fp = reinterpret_cast<void*>(simulator->get_register(Simulator::fp)); 135 state->lr = reinterpret_cast<void*>(simulator->get_register(Simulator::ra)); 136#endif 137 if (state->sp == 0 || state->fp == 0) { 138 // It possible that the simulator is interrupted while it is updating 139 // the sp or fp register. ARM64 simulator does this in two steps: 140 // first setting it to zero and then setting it to the new value. 141 // Bailout if sp/fp doesn't contain the new value. 142 // 143 // FIXME: The above doesn't really solve the issue. 144 // If a 64-bit target is executed on a 32-bit host even the final 145 // write is non-atomic, so it might obtain a half of the result. 146 // Moreover as long as the register set code uses memcpy (as of now), 147 // it is not guaranteed to be atomic even when both host and target 148 // are of same bitness. 149 return false; 150 } 151 return true; 152} 153#endif // USE_SIMULATOR 154 155} // namespace 156 157DISABLE_ASAN void TickSample::Init(Isolate* v8_isolate, 158 const RegisterState& reg_state, 159 RecordCEntryFrame record_c_entry_frame, 160 bool update_stats, 161 bool use_simulator_reg_state, 162 base::TimeDelta sampling_interval) { 163 update_stats_ = update_stats; 164 SampleInfo info; 165 RegisterState regs = reg_state; 166 if (!GetStackSample(v8_isolate, ®s, record_c_entry_frame, stack, 167 kMaxFramesCount, &info, &state, 168 use_simulator_reg_state)) { 169 // It is executing JS but failed to collect a stack trace. 170 // Mark the sample as spoiled. 171 pc = nullptr; 172 return; 173 } 174 175 if (state != StateTag::EXTERNAL) { 176 state = info.vm_state; 177 } 178 pc = regs.pc; 179 frames_count = static_cast<unsigned>(info.frames_count); 180 has_external_callback = info.external_callback_entry != nullptr; 181 context = info.context; 182 embedder_context = info.embedder_context; 183 embedder_state = info.embedder_state; 184 if (has_external_callback) { 185 external_callback_entry = info.external_callback_entry; 186 } else if (frames_count) { 187 // sp register may point at an arbitrary place in memory, make 188 // sure sanitizers don't complain about it. 189 ASAN_UNPOISON_MEMORY_REGION(regs.sp, sizeof(void*)); 190 MSAN_MEMORY_IS_INITIALIZED(regs.sp, sizeof(void*)); 191 // Sample potential return address value for frameless invocation of 192 // stubs (we'll figure out later, if this value makes sense). 193 194 // TODO(petermarshall): This read causes guard page violations on Windows. 195 // Either fix this mechanism for frameless stubs or remove it. 196 // tos = 197 // i::ReadUnalignedValue<void*>(reinterpret_cast<i::Address>(regs.sp)); 198 tos = nullptr; 199 } else { 200 tos = nullptr; 201 } 202 sampling_interval_ = sampling_interval; 203 timestamp = base::TimeTicks::Now(); 204} 205 206bool TickSample::GetStackSample(Isolate* v8_isolate, RegisterState* regs, 207 RecordCEntryFrame record_c_entry_frame, 208 void** frames, size_t frames_limit, 209 v8::SampleInfo* sample_info, 210 StateTag* out_state, 211 bool use_simulator_reg_state) { 212 i::Isolate* isolate = reinterpret_cast<i::Isolate*>(v8_isolate); 213 sample_info->frames_count = 0; 214 sample_info->vm_state = isolate->current_vm_state(); 215 sample_info->external_callback_entry = nullptr; 216 sample_info->embedder_state = EmbedderStateTag::EMPTY; 217 sample_info->embedder_context = nullptr; 218 sample_info->context = nullptr; 219 220 if (sample_info->vm_state == GC) return true; 221 222 EmbedderState* embedder_state = isolate->current_embedder_state(); 223 if (embedder_state != nullptr) { 224 sample_info->embedder_context = 225 reinterpret_cast<void*>(embedder_state->native_context_address()); 226 sample_info->embedder_state = embedder_state->GetState(); 227 } 228 229 Context top_context = isolate->context(); 230 if (top_context.ptr() != i::Context::kNoContext && 231 top_context.ptr() != i::Context::kInvalidContext) { 232 NativeContext top_native_context = top_context.native_context(); 233 sample_info->context = reinterpret_cast<void*>(top_native_context.ptr()); 234 } 235 236 i::Address js_entry_sp = isolate->js_entry_sp(); 237 if (js_entry_sp == 0) return true; // Not executing JS now. 238 239#if defined(USE_SIMULATOR) 240 if (use_simulator_reg_state) { 241 if (!i::SimulatorHelper::FillRegisters(isolate, regs)) { 242 i::ProfilerStats::Instance()->AddReason( 243 i::ProfilerStats::Reason::kSimulatorFillRegistersFailed); 244 return false; 245 } 246 } 247#else 248 USE(use_simulator_reg_state); 249#endif 250 DCHECK(regs->sp); 251 252 // Check whether we interrupted setup/teardown of a stack frame in JS code. 253 // Avoid this check for C++ code, as that would trigger false positives. 254 // TODO(petermarshall): Code range is always null on ia32 so this check for 255 // IsNoFrameRegion will never actually run there. 256 if (regs->pc && 257 isolate->heap()->code_region().contains( 258 reinterpret_cast<i::Address>(regs->pc)) && 259 IsNoFrameRegion(reinterpret_cast<i::Address>(regs->pc))) { 260 // The frame is not setup, so it'd be hard to iterate the stack. Bailout. 261 i::ProfilerStats::Instance()->AddReason( 262 i::ProfilerStats::Reason::kNoFrameRegion); 263 return false; 264 } 265 266 i::ExternalCallbackScope* scope = isolate->external_callback_scope(); 267 i::Address handler = i::Isolate::handler(isolate->thread_local_top()); 268 // If there is a handler on top of the external callback scope then 269 // we have already entered JavaScript again and the external callback 270 // is not the top function. 271 if (scope && scope->scope_address() < handler) { 272 i::Address* external_callback_entry_ptr = 273 scope->callback_entrypoint_address(); 274 sample_info->external_callback_entry = 275 external_callback_entry_ptr == nullptr 276 ? nullptr 277 : reinterpret_cast<void*>(*external_callback_entry_ptr); 278 } 279 // 'Fast API calls' are similar to fast C calls (see frames.cc) in that 280 // they don't build an exit frame when entering C from JS. They have the 281 // added speciality of having separate "fast" and "default" callbacks, the 282 // latter being the regular API callback called before the JS function is 283 // optimized. When TurboFan optimizes the JS caller, the fast callback 284 // gets executed instead of the default one, therefore we need to store 285 // its address in the sample. 286 IsolateData* isolate_data = isolate->isolate_data(); 287 Address fast_c_fp = isolate_data->fast_c_call_caller_fp(); 288 if (fast_c_fp != kNullAddress && 289 isolate_data->fast_api_call_target() != kNullAddress) { 290 sample_info->external_callback_entry = 291 reinterpret_cast<void*>(isolate_data->fast_api_call_target()); 292 if (out_state) { 293 *out_state = StateTag::EXTERNAL; 294 } 295 } 296 297 i::SafeStackFrameIterator it(isolate, reinterpret_cast<i::Address>(regs->pc), 298 reinterpret_cast<i::Address>(regs->fp), 299 reinterpret_cast<i::Address>(regs->sp), 300 reinterpret_cast<i::Address>(regs->lr), 301 js_entry_sp); 302 303 if (it.done()) return true; 304 305 size_t i = 0; 306 if (record_c_entry_frame == kIncludeCEntryFrame && 307 (it.top_frame_type() == internal::StackFrame::EXIT || 308 it.top_frame_type() == internal::StackFrame::BUILTIN_EXIT)) { 309 frames[i] = reinterpret_cast<void*>(isolate->c_function()); 310 i++; 311 } 312#ifdef V8_RUNTIME_CALL_STATS 313 i::RuntimeCallTimer* timer = 314 isolate->counters()->runtime_call_stats()->current_timer(); 315#endif // V8_RUNTIME_CALL_STATS 316 for (; !it.done() && i < frames_limit; it.Advance()) { 317#ifdef V8_RUNTIME_CALL_STATS 318 while (timer && reinterpret_cast<i::Address>(timer) < it.frame()->fp() && 319 i < frames_limit) { 320 frames[i++] = reinterpret_cast<void*>(timer->counter()); 321 timer = timer->parent(); 322 } 323#endif // V8_RUNTIME_CALL_STATS 324 if (i == frames_limit) break; 325 326 if (it.frame()->is_interpreted()) { 327 // For interpreted frames use the bytecode array pointer as the pc. 328 i::InterpretedFrame* frame = 329 static_cast<i::InterpretedFrame*>(it.frame()); 330 // Since the sampler can interrupt execution at any point the 331 // bytecode_array might be garbage, so don't actually dereference it. We 332 // avoid the frame->GetXXX functions since they call BytecodeArray::cast, 333 // which has a heap access in its DCHECK. 334 i::Address bytecode_array = base::Memory<i::Address>( 335 frame->fp() + i::InterpreterFrameConstants::kBytecodeArrayFromFp); 336 i::Address bytecode_offset = base::Memory<i::Address>( 337 frame->fp() + i::InterpreterFrameConstants::kBytecodeOffsetFromFp); 338 339 // If the bytecode array is a heap object and the bytecode offset is a 340 // Smi, use those, otherwise fall back to using the frame's pc. 341 if (HAS_STRONG_HEAP_OBJECT_TAG(bytecode_array) && 342 HAS_SMI_TAG(bytecode_offset)) { 343 frames[i++] = reinterpret_cast<void*>( 344 bytecode_array + i::Internals::SmiValue(bytecode_offset)); 345 continue; 346 } 347 } 348 // For arm64, the PC for the frame sometimes doesn't come from the stack, 349 // but from the link register instead. For this reason, we skip 350 // authenticating it. 351 frames[i++] = reinterpret_cast<void*>(it.frame()->unauthenticated_pc()); 352 } 353 sample_info->frames_count = i; 354 return true; 355} 356 357void TickSample::print() const { 358 PrintF("TickSample: at %p\n", this); 359 PrintF(" - state: %s\n", StateToString(state)); 360 PrintF(" - pc: %p\n", pc); 361 PrintF(" - stack: (%u frames)\n", frames_count); 362 for (unsigned i = 0; i < frames_count; i++) { 363 PrintF(" %p\n", stack[i]); 364 } 365 PrintF(" - has_external_callback: %d\n", has_external_callback); 366 PrintF(" - %s: %p\n", 367 has_external_callback ? "external_callback_entry" : "tos", tos); 368 PrintF(" - update_stats: %d\n", update_stats_); 369 PrintF(" - sampling_interval: %" PRId64 "\n", 370 sampling_interval_.InMicroseconds()); 371 PrintF("\n"); 372} 373 374} // namespace internal 375} // namespace v8 376