Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,25 @@ __thread unsigned long long functions_entered_counter = 0;
// By calling fork(), the child process and the parent process will have their own address space,
// which means that the child process won't be able to modify the parent process's variables.
// We need a way to enable communication between the child and parent processes.
// This is done by creating a shared memory region and use it as a flag to indicate that
// the application is crashing.
// This variable will be a pointer to that shared memory region.
// This is done by creating a shared memory region and save crashing data in it.
// crashing_data_t is a struct that contains the following fields:
// - is_app_crashing: use it as a flag to indicate that the application is crashing.
// - thread_context: the thread context of the crashing thread. Unwind the crashing thread's callstack
// skipping the signal frame. Useful for alpine.

typedef struct crashing_data_t {
int is_app_crashing;
ucontext_t* thread_context;
} crashing_data_t;
__attribute__((visibility("hidden")))
int* is_app_crashing = NULL;
crashing_data_t* crash_data = NULL;

// this function is called by the profiler
unsigned long long dd_inside_wrapped_functions()
{
int app_is_crashing = 0;
if (is_app_crashing != NULL) {
app_is_crashing = *is_app_crashing;
if (crash_data != NULL) {
app_is_crashing = crash_data->is_app_crashing;
}
return functions_entered_counter + app_is_crashing;
}
Expand Down Expand Up @@ -472,6 +479,25 @@ int ShouldCallCustomCreatedump(const char* pathname, char* const argv[])
return 0;
}

static void ptr_to_decimal(char* buf, unsigned long val) {
char tmp[21];
int i = 0;
if (val == 0) {
buf[0] = '0';
buf[1] = '\0';
return;
}
while (val > 0) {
tmp[i++] = '0' + (val % 10);
val /= 10;
}
// reverse into buf
for (int j = 0; j < i; j++) {
buf[j] = tmp[i - 1 - j];
}
buf[i] = '\0';
}

int execve(const char* pathname, char* const argv[], char* const envp[])
{
check_init();
Expand All @@ -483,8 +509,11 @@ int execve(const char* pathname, char* const argv[], char* const envp[])
return __real_execve(pathname, argv, envp);
}

if (is_app_crashing != NULL) {
*is_app_crashing = 1;
ucontext_t* thread_context = NULL;

if (crash_data != NULL) {
crash_data->is_app_crashing = 1;
thread_context = crash_data->thread_context;
}
// Execute the alternative crash handler, and prepend "createdump" to the arguments

Expand All @@ -493,7 +522,13 @@ int execve(const char* pathname, char* const argv[], char* const envp[])
while (argv[argc++] != NULL);

// We add two arguments: the path to dd-dotnet, and "createdump"
char** newArgv = malloc((argc + 2) * sizeof(char*));
int newArgc = argc + 2;

if (thread_context != NULL) {
newArgc += 2;
}

char** newArgv = malloc((newArgc) * sizeof(char*));

// By convention, argv[0] contains the name of the executable
// Insert createdump as the first actual argument
Expand All @@ -520,6 +555,14 @@ int execve(const char* pathname, char* const argv[], char* const envp[])
newArgv[new_idx++] = argv[idx++];
}
}

char context_addr[21]; // 20 hex digits + null
if (thread_context != NULL) {
ptr_to_decimal(context_addr, (unsigned long)thread_context);
newArgv[new_idx++] = "--dd-thread-context";
newArgv[new_idx++] = context_addr;
}

newArgv[new_idx] = NULL; // NULL terminate the array

size_t envp_count;
Expand Down Expand Up @@ -673,6 +716,64 @@ pid_t fork()
}
#endif
#endif

typedef void (*sigsegv_handler_fn)(int signum, siginfo_t* info, void* context);
static _Atomic sigsegv_handler_fn sigsegv_current_handler;
static void dd_sigsegv_handler(int signum, siginfo_t* info, void* context)
{
if (crash_data != NULL) {
crash_data->thread_context = (ucontext_t*)context;
}
sigsegv_handler_fn handler = sigsegv_current_handler;
if (handler != NULL) {
handler(signum, info, context);
}
}

static pthread_mutex_t sigaction_lock = PTHREAD_MUTEX_INITIALIZER;
static int (*__real_sigaction)(int signum, const struct sigaction *_Nullable restrict act, struct sigaction *_Nullable restrict oldact) = NULL;
int dd_sigaction(int signum,
const struct sigaction *_Nullable restrict act,
struct sigaction *_Nullable restrict oldact)
{
check_init();

if (signum == SIGSEGV && act != NULL && ((act->sa_flags & SA_SIGINFO) == SA_SIGINFO))
{
struct sigaction new_act = *act;
new_act.sa_sigaction = dd_sigsegv_handler;

pthread_mutex_lock(&sigaction_lock);
void (*prev_handler)(int signum, siginfo_t* info, void* context) = sigsegv_current_handler;
sigsegv_current_handler = act->sa_sigaction;
int result = __real_sigaction(signum, &new_act, oldact);
if (oldact != NULL &&
((oldact->sa_flags & SA_SIGINFO) == SA_SIGINFO) &&
(oldact->sa_sigaction == dd_sigsegv_handler))
{
oldact->sa_sigaction = prev_handler;
}
pthread_mutex_unlock(&sigaction_lock);

return result;
}

if (signum == SIGSEGV && act == NULL && oldact != NULL)
{
pthread_mutex_lock(&sigaction_lock);
int result = __real_sigaction(signum, act, oldact);
if (((oldact->sa_flags & SA_SIGINFO) == SA_SIGINFO) &&
(oldact->sa_sigaction == dd_sigsegv_handler))
{
oldact->sa_sigaction = sigsegv_current_handler;
}
pthread_mutex_unlock(&sigaction_lock);
return result;
}

return __real_sigaction(signum, act, oldact);
}

static pthread_once_t once_control = PTHREAD_ONCE_INIT;

static void init()
Expand All @@ -682,6 +783,7 @@ static void init()
__real_dlclose = __dd_dlsym(RTLD_NEXT, "dlclose");
__real_dladdr = __dd_dlsym(RTLD_NEXT, "dladdr");
__real_execve = __dd_dlsym(RTLD_NEXT, "execve");
__real_sigaction = __dd_dlsym(RTLD_NEXT, "sigaction");
#ifdef DD_ALPINE
__real_pthread_create = __dd_dlsym(RTLD_NEXT, "pthread_create");
__real_pthread_attr_init = __dd_dlsym(RTLD_NEXT, "pthread_attr_init");
Expand All @@ -691,10 +793,10 @@ static void init()
#endif
// if we failed at allocating memory for the shared variable
// the parent process won't be notified that the app is crashing.
is_app_crashing = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
crash_data = mmap(NULL, sizeof(crashing_data_t), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (is_app_crashing != MAP_FAILED) {
*is_app_crashing = 0; // Initialize flag
if (crash_data != MAP_FAILED) {
memset(crash_data, 0, sizeof(crashing_data_t));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <sstream>
#include <string.h>
#include <sys/ptrace.h>
#include <sys/uio.h>
#include <sys/wait.h>

extern "C"
Expand Down Expand Up @@ -139,21 +140,88 @@ std::vector<ModuleInfo> CrashReportingLinux::GetModules()
return modules;
}

std::vector<StackFrame> CrashReportingLinux::GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* context)
#define SET_REG(cursor, reg, value, default) \
do {\
if (unw_set_reg(&cursor, reg, value) != 0) \
{\
return {default};\
}\
} while (0);

std::optional<unw_cursor_t> create_cursor_from_context(uint32_t pid, unw_addr_space_t& addressSpace, void* libunwindContext, void* threadContext)
{
std::vector<StackFrame> frames;
unw_cursor_t cursor;
if (unw_init_remote(&cursor, addressSpace, threadContext) != 0)
{
return std::nullopt;
}

auto libunwindContext = _UPT_create(tid);
return {cursor};
if (threadContext == nullptr)
{
return {cursor};
}

unw_cursor_t cursor;
ucontext_t remoteCtx;
// threadContext is a pointer in the remote process address space pointing at the thread ucontext_t
// read it via process_vm_readv (more efficient than ptrace for bulk reads)
struct iovec local = { .iov_base = &remoteCtx, .iov_len = sizeof(remoteCtx) };
struct iovec remote = { .iov_base = threadContext, .iov_len = sizeof(remoteCtx) };

if (process_vm_readv(pid, &local, 1, &remote, 1, 0) == sizeof(remoteCtx)) {
unw_cursor_t newCursor = cursor;

// Override cursor with the register state from the crash point.
// This skips the signal frame entirely, which is required on
// musl/Alpine where libunwind cannot unwind past the signal trampoline.
#if defined(AMD64)
SET_REG(newCursor, UNW_REG_IP, remoteCtx.uc_mcontext.gregs[REG_RIP], cursor);
SET_REG(newCursor, UNW_REG_SP, remoteCtx.uc_mcontext.gregs[REG_RSP], cursor);
SET_REG(newCursor, UNW_X86_64_RBP, remoteCtx.uc_mcontext.gregs[REG_RBP], cursor);
// Callee-saved registers (DWARF unwind rules may reference these)
SET_REG(newCursor, UNW_X86_64_RBX, remoteCtx.uc_mcontext.gregs[REG_RBX], cursor);
SET_REG(newCursor, UNW_X86_64_R12, remoteCtx.uc_mcontext.gregs[REG_R12], cursor);
SET_REG(newCursor, UNW_X86_64_R13, remoteCtx.uc_mcontext.gregs[REG_R13], cursor);
SET_REG(newCursor, UNW_X86_64_R14, remoteCtx.uc_mcontext.gregs[REG_R14], cursor);
SET_REG(newCursor, UNW_X86_64_R15, remoteCtx.uc_mcontext.gregs[REG_R15], cursor);
#elif defined(ARM64)
SET_REG(newCursor, UNW_REG_IP, remoteCtx.uc_mcontext.pc, cursor);
SET_REG(newCursor, UNW_REG_SP, remoteCtx.uc_mcontext.sp, cursor);
SET_REG(newCursor, UNW_AARCH64_X29, remoteCtx.uc_mcontext.regs[29], cursor); // FP
SET_REG(newCursor, UNW_AARCH64_X30, remoteCtx.uc_mcontext.regs[30], cursor); // LR
// Callee-saved registers (DWARF unwind rules may reference these)
SET_REG(newCursor, UNW_AARCH64_X19, remoteCtx.uc_mcontext.regs[19], cursor);
SET_REG(newCursor, UNW_AARCH64_X20, remoteCtx.uc_mcontext.regs[20], cursor);
SET_REG(newCursor, UNW_AARCH64_X21, remoteCtx.uc_mcontext.regs[21], cursor);
SET_REG(newCursor, UNW_AARCH64_X22, remoteCtx.uc_mcontext.regs[22], cursor);
SET_REG(newCursor, UNW_AARCH64_X23, remoteCtx.uc_mcontext.regs[23], cursor);
SET_REG(newCursor, UNW_AARCH64_X24, remoteCtx.uc_mcontext.regs[24], cursor);
SET_REG(newCursor, UNW_AARCH64_X25, remoteCtx.uc_mcontext.regs[25], cursor);
SET_REG(newCursor, UNW_AARCH64_X26, remoteCtx.uc_mcontext.regs[26], cursor);
SET_REG(newCursor, UNW_AARCH64_X27, remoteCtx.uc_mcontext.regs[27], cursor);
SET_REG(newCursor, UNW_AARCH64_X28, remoteCtx.uc_mcontext.regs[28], cursor);
#else
#error "Unsupported architecture"
#endif
return {newCursor};
}
return {cursor};
}

std::vector<StackFrame> CrashReportingLinux::GetThreadFrames(int32_t tid, void* threadContext, ResolveManagedCallstack resolveManagedCallstack, void* context)
{
std::vector<StackFrame> frames;

auto result = unw_init_remote(&cursor, _addressSpace, libunwindContext);
auto libunwindContext = _UPT_create(tid);

if (result != 0)
auto cursorOpt = create_cursor_from_context(_pid, _addressSpace, libunwindContext, threadContext);
if (!cursorOpt.has_value())
{
return frames;
}

auto cursor = cursorOpt.value();

// Get the managed callstack
ResolveMethodData* managedCallstack;
int32_t numberOfManagedFrames;
Expand Down Expand Up @@ -205,7 +273,7 @@ std::vector<StackFrame> CrashReportingLinux::GetThreadFrames(int32_t tid, Resolv
bool hasName = false;

unw_proc_info_t procInfo;
result = unw_get_proc_info(&cursor, &procInfo);
auto result = unw_get_proc_info(&cursor, &procInfo);

if (result == 0)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class CrashReportingLinux : public CrashReporting

private:
std::vector<std::pair<int32_t, std::string>> GetThreads() override;
std::vector<StackFrame> GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* context) override;
std::vector<StackFrame> GetThreadFrames(int32_t tid, void* threadContext, ResolveManagedCallstack resolveManagedCallstack, void* context) override;
const ModuleInfo* FindModule(uintptr_t ip);
std::vector<ModuleInfo> GetModules();
std::string GetSignalInfo(int32_t signal) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ std::vector<std::pair<int32_t, std::string>> CrashReportingWindows::GetThreads()
return threads;
}

std::vector<StackFrame> CrashReportingWindows::GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* callbackContext)
std::vector<StackFrame> CrashReportingWindows::GetThreadFrames(int32_t tid, void* ignoredThreadContext, ResolveManagedCallstack resolveManagedCallstack, void* callbackContext)
{
std::vector<StackFrame> frames;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class CrashReportingWindows : public CrashReporting

private:
std::vector<std::pair<int32_t, std::string>> GetThreads() override;
std::vector<StackFrame> GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* context) override;
std::vector<StackFrame> GetThreadFrames(int32_t tid, void* ignoredThreadContext, ResolveManagedCallstack resolveManagedCallstack, void* context) override;
std::string GetSignalInfo(int32_t signal) override;
std::vector<ModuleInfo> GetModules();
const ModuleInfo* FindModule(uintptr_t ip);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ int32_t CrashReporting::SetSignalInfo(int32_t signal, const char* description)
return 0;
}

int32_t CrashReporting::ResolveStacks(int32_t crashingThreadId, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious)
int32_t CrashReporting::ResolveStacks(int32_t crashingThreadId, void* threadContext, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious)
{
auto threads = GetThreads();

Expand All @@ -226,7 +226,8 @@ int32_t CrashReporting::ResolveStacks(int32_t crashingThreadId, ResolveManagedCa

for (auto const& [threadId, threadName] : threads)
{
auto frames = GetThreadFrames(threadId, resolveCallback, context);
auto context = threadContext != nullptr ? threadContext : nullptr;
auto frames = GetThreadFrames(threadId, context, resolveCallback, context);

auto [stackTrace, succeeded] = ExtractResult(ddog_crasht_StackTrace_new());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ ICrashReporting : public IUnknown
// only for tests
virtual int32_t STDMETHODCALLTYPE Panic() = 0;
virtual int32_t STDMETHODCALLTYPE SetSignalInfo(int32_t signal, const char* description) = 0;
virtual int32_t STDMETHODCALLTYPE ResolveStacks(int32_t crashingThreadId, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious) = 0;
virtual int32_t STDMETHODCALLTYPE ResolveStacks(int32_t crashingThreadId, void* threadContext, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious) = 0;
virtual int32_t STDMETHODCALLTYPE SetMetadata(const char* libraryName, const char* libraryVersion, const char* family, Tag* tags, int32_t tagCount) = 0;
virtual int32_t STDMETHODCALLTYPE Send() = 0;
virtual int32_t STDMETHODCALLTYPE WriteToFile(const char* url) = 0;
Expand All @@ -128,7 +128,7 @@ class CrashReporting : public ICrashReporting
int32_t STDMETHODCALLTYPE Initialize() override;
int32_t STDMETHODCALLTYPE Panic() override;
int32_t STDMETHODCALLTYPE SetSignalInfo(int32_t signal, const char* description) override;
int32_t STDMETHODCALLTYPE ResolveStacks(int32_t crashingThreadId, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious) override;
int32_t STDMETHODCALLTYPE ResolveStacks(int32_t crashingThreadId, void* threadContext, ResolveManagedCallstack resolveCallback, void* context, bool* isSuspicious) override;
int32_t STDMETHODCALLTYPE SetMetadata(const char* libraryName, const char* libraryVersion, const char* family, Tag* tags, int32_t tagCount) override;
int32_t STDMETHODCALLTYPE Send() override;
int32_t STDMETHODCALLTYPE WriteToFile(const char* url) override;
Expand All @@ -141,7 +141,7 @@ class CrashReporting : public ICrashReporting
ddog_crasht_Handle_CrashInfoBuilder _builder;
void SetLastError(ddog_Error error);
virtual std::vector<std::pair<int32_t, std::string>> GetThreads() = 0;
virtual std::vector<StackFrame> GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* context) = 0;
virtual std::vector<StackFrame> GetThreadFrames(int32_t tid, void* threadContext, ResolveManagedCallstack resolveManagedCallstack, void* context) = 0;
virtual std::string GetSignalInfo(int32_t signal) = 0;

#ifdef DD_TEST
Expand Down
Loading
Loading