Skip to content

Commit d27c741

Browse files
authored
remove heap allocations from signal handlers. (redis#12655)
Using heap allocation during signal handlers is unsafe. This PR purpose is to replace all the heap allocations done within the signal handlers raised upon server crash and assertions. These were added in redis#12453. writeStacktraces(): allocates the stacktraces output array on the calling thread's stack and assigns the address to a global variable. It calls `ThreadsManager_runOnThreads()` that invokes `collect_stacktrace_data()` by each thread: each thread writes to a different location in the above array to allow sync writes. get_ready_to_signal_threads_tids(): instead of allocating the `tids` array, it receives it as a fixed size array parameter, allocated on on the stack of the calling function, and returns the number of valid threads. The array size is hard-coded to 50. `ThreadsManager_runOnThreads():` To avoid the outputs array allocation, the **callback signature** was changed. Now it should return void. This function return type has also changed to int - returns 1 if successful, and 0 otherwise. Other unsafe calls will be handled in following PRs
1 parent 0270abd commit d27c741

File tree

3 files changed

+61
-76
lines changed

3 files changed

+61
-76
lines changed

src/debug.c

Lines changed: 49 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,12 +1822,14 @@ void closeDirectLogFiledes(int fd) {
18221822

18231823
#ifdef HAVE_BACKTRACE
18241824
#define BACKTRACE_MAX_SIZE 100
1825+
18251826
#ifdef __linux__
18261827
#include <sys/prctl.h>
18271828
#include <sys/syscall.h>
18281829
#include <dirent.h>
18291830

1830-
static pid_t *get_ready_to_signal_threads_tids(pid_t pid, int sig_num, size_t *tids_len_output);
1831+
#define TIDS_MAX_SIZE 50
1832+
static size_t get_ready_to_signal_threads_tids(pid_t pid, int sig_num, pid_t tids[TIDS_MAX_SIZE]);
18311833

18321834
#define MAX_BUFF_LENGTH 256
18331835
typedef struct {
@@ -1837,60 +1839,70 @@ typedef struct {
18371839
void *trace[BACKTRACE_MAX_SIZE];
18381840
} stacktrace_data;
18391841

1840-
__attribute__ ((noinline)) static void *collect_stacktrace_data(void) {
1842+
static stacktrace_data *stacktraces_mempool = NULL;
1843+
static redisAtomic size_t g_thread_ids = 0;
1844+
1845+
static stacktrace_data *get_stack_trace_pool(void) {
1846+
size_t thread_id;
1847+
atomicGetIncr(g_thread_ids, thread_id, 1);
1848+
return stacktraces_mempool + thread_id;
1849+
}
1850+
1851+
__attribute__ ((noinline)) static void collect_stacktrace_data(void) {
18411852
/* allocate stacktrace_data struct */
1842-
stacktrace_data *trace_data = zmalloc(sizeof(stacktrace_data));
1853+
stacktrace_data *trace_data = get_stack_trace_pool();
18431854

18441855
/* Get the stack trace first! */
18451856
trace_data->trace_size = backtrace(trace_data->trace, BACKTRACE_MAX_SIZE);
1846-
1857+
18471858
/* get the thread name */
18481859
prctl(PR_GET_NAME, trace_data->thread_name);
18491860

18501861
/* get the thread id */
18511862
trace_data->tid = syscall(SYS_gettid);
1852-
1853-
/* return the trace data */
1854-
return trace_data;
18551863
}
18561864

1857-
__attribute__ ((noinline))
1865+
__attribute__ ((noinline))
18581866
static void writeStacktraces(int fd, int uplevel) {
18591867
/* get the list of all the process's threads that don't block or ignore the THREADS_SIGNAL */
18601868
pid_t pid = getpid();
1861-
size_t len_tids = 0;
1862-
pid_t *tids = get_ready_to_signal_threads_tids(pid, THREADS_SIGNAL, &len_tids);
1869+
pid_t tids[TIDS_MAX_SIZE];
1870+
size_t len_tids = get_ready_to_signal_threads_tids(pid, THREADS_SIGNAL, tids);
1871+
if (!len_tids) {
1872+
serverLogFromHandler(LL_WARNING, "writeStacktraces(): Failed to get the process's threads.");
1873+
}
18631874

1864-
/* This call returns either NULL or the stacktraces data from all tids */
1865-
stacktrace_data **stacktraces_data = (stacktrace_data **)ThreadsManager_runOnThreads(tids, len_tids, collect_stacktrace_data);
1875+
stacktrace_data stacktraces[len_tids];
1876+
stacktraces_mempool = stacktraces;
1877+
memset(stacktraces, 0, sizeof(stacktrace_data) * len_tids);
18661878

1867-
/* free tids */
1868-
zfree(tids);
1879+
/* restart mempool iterator*/
1880+
atomicSet(g_thread_ids, 0);
18691881

1870-
/* ThreadsManager_runOnThreads returns NULL if it is already running */
1871-
if (!stacktraces_data) return;
1882+
/* ThreadsManager_runOnThreads returns 0 if it is already running */
1883+
if (!ThreadsManager_runOnThreads(tids, len_tids, collect_stacktrace_data)) return;
18721884

18731885
size_t skipped = 0;
18741886

18751887
char buff[MAX_BUFF_LENGTH];
18761888
pid_t calling_tid = syscall(SYS_gettid);
18771889
/* for backtrace_data in backtraces_data: */
18781890
for (size_t i = 0; i < len_tids; i++) {
1879-
stacktrace_data *curr_stacktrace_data = stacktraces_data[i];
1891+
stacktrace_data curr_stacktrace_data = stacktraces[i];
18801892
/*ThreadsManager_runOnThreads might fail to collect the thread's data */
1881-
if (!curr_stacktrace_data) {
1893+
if (0 == curr_stacktrace_data.trace_size) {
18821894
skipped++;
18831895
continue;
18841896
}
18851897

18861898
/* stacktrace header includes the tid and the thread's name */
1887-
snprintf(buff, MAX_BUFF_LENGTH, "\n%d %s", curr_stacktrace_data->tid, curr_stacktrace_data->thread_name);
1899+
snprintf(buff, MAX_BUFF_LENGTH, "\n%d %s", curr_stacktrace_data.tid, curr_stacktrace_data.thread_name);
18881900
if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */};
18891901

18901902
/* skip kernel call to the signal handler, the signal handler and the callback addresses */
18911903
int curr_uplevel = 3;
18921904

1893-
if (curr_stacktrace_data->tid == calling_tid) {
1905+
if (curr_stacktrace_data.tid == calling_tid) {
18941906
/* skip signal syscall and ThreadsManager_runOnThreads */
18951907
curr_uplevel += uplevel + 2;
18961908
/* Add an indication to header of the thread that is handling the log file */
@@ -1903,11 +1915,8 @@ static void writeStacktraces(int fd, int uplevel) {
19031915
if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */};
19041916

19051917
/* add the stacktrace */
1906-
backtrace_symbols_fd(curr_stacktrace_data->trace+curr_uplevel, curr_stacktrace_data->trace_size-curr_uplevel, fd);
1907-
1908-
zfree(curr_stacktrace_data);
1918+
backtrace_symbols_fd(curr_stacktrace_data.trace+curr_uplevel, curr_stacktrace_data.trace_size-curr_uplevel, fd);
19091919
}
1910-
zfree(stacktraces_data);
19111920

19121921
snprintf(buff, MAX_BUFF_LENGTH, "\n%zu/%zu expected stacktraces.\n", len_tids - skipped, len_tids);
19131922
if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */};
@@ -2486,9 +2495,8 @@ void debugDelay(int usec) {
24862495
#ifdef __linux__
24872496

24882497
/* =========================== Stacktrace Utils ============================ */
2489-
#define TIDS_INITIAL_SIZE 50
24902498

2491-
/** If it doesn't block and doesn't ignore, return 1 (the thread will handle the signal)
2499+
/** If it doesn't block and doesn't ignore, return 1 (the thread will handle the signal)
24922500
* If thread tid blocks or ignores sig_num returns 0 (thread is not ready to catch the signal).
24932501
* also returns 0 if something is wrong and prints a warning message to the log file **/
24942502
static int is_thread_ready_to_signal(pid_t pid, pid_t tid, int sig_num) {
@@ -2527,23 +2535,21 @@ static int is_thread_ready_to_signal(pid_t pid, pid_t tid, int sig_num) {
25272535
serverLog(LL_WARNING,
25282536
"tid:%d: failed to find SigBlk or/and SigIgn field(s) in /proc/%d/task/%d/status file", tid, pid, tid);
25292537
}
2530-
return ret;
2538+
return ret;
25312539
}
25322540

2533-
/** Returns a list of all the process's (pid) threads that can receive signal sig_num.
2534-
* Also updates tids_len_output to the number of valid threads' ids in the returned array
2535-
* NOTE: It is the caller responsibility to free the returned array with zfree(). */
2536-
static pid_t *get_ready_to_signal_threads_tids(pid_t pid, int sig_num, size_t *tids_len_output) {
2541+
/** Returns the number of the process's threads that can receive signal sig_num.
2542+
* Writes into tids the tids of these threads.
2543+
* If it fails, returns 0.
2544+
*/
2545+
static size_t get_ready_to_signal_threads_tids(pid_t pid, int sig_num, pid_t tids[TIDS_MAX_SIZE]) {
25372546
/* Initialize the path the process threads' directory. */
25382547
char path_buff[MAX_BUFF_LENGTH];
25392548
snprintf(path_buff, MAX_BUFF_LENGTH, "/proc/%d/task", pid);
25402549

25412550
/* Get the directory handler. */
25422551
DIR *dir;
2543-
if (!(dir = opendir(path_buff))) return NULL;
2544-
2545-
size_t tids_cap = TIDS_INITIAL_SIZE;
2546-
pid_t *tids = zmalloc(sizeof(pid_t) * tids_cap);
2552+
if (!(dir = opendir(path_buff))) return 0;
25472553

25482554
size_t tids_count = 0;
25492555
struct dirent *entry;
@@ -2555,7 +2561,7 @@ static pid_t *get_ready_to_signal_threads_tids(pid_t pid, int sig_num, size_t *t
25552561
if (entry->d_type == DT_DIR) {
25562562
/* Skip irrelevant directories. */
25572563
if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
2558-
/* the thread's directory name is equivalent to its tid. */
2564+
/* the thread's directory name is equivalent to its tid. */
25592565
pid_t tid = atoi(entry->d_name);
25602566

25612567
if(!is_thread_ready_to_signal(pid, tid, sig_num)) continue;
@@ -2564,31 +2570,28 @@ static pid_t *get_ready_to_signal_threads_tids(pid_t pid, int sig_num, size_t *t
25642570
current_thread_index = tids_count;
25652571
}
25662572

2567-
/* increase tids capacity if needed */
2568-
if(tids_count >= tids_cap) {
2569-
tids_cap *= 2;
2570-
tids = zrealloc(tids, sizeof(pid_t) * tids_cap);
2571-
}
2572-
25732573
/* save the thread id */
25742574
tids[tids_count++] = tid;
25752575
}
25762576
}
2577+
/* Stop if we reached the maximum threads number. */
2578+
if(tids_count == TIDS_MAX_SIZE) {
2579+
serverLogFromHandler(LL_WARNING,"get_ready_to_signal_threads_tids(): Reached the limit of the tids buffer.");
2580+
break;
2581+
}
25772582
}
25782583

25792584
/* Swap the last tid with the the current thread id */
25802585
if(current_thread_index != -1) {
25812586
pid_t last_tid = tids[tids_count - 1];
2582-
2587+
25832588
tids[tids_count - 1] = calling_tid;
25842589
tids[current_thread_index] = last_tid;
25852590
}
25862591

2587-
25882592
closedir(dir);
25892593

2590-
*tids_len_output = tids_count;
2591-
return tids;
2594+
return tids_count;
25922595
}
25932596
#endif /* __linux__ */
25942597
#endif /* HAVE_BACKTRACE */

src/threads_mngr.c

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ static const clock_t RUN_ON_THREADS_TIMEOUT = 2;
4949

5050
static run_on_thread_cb g_callback = NULL;
5151
static volatile size_t g_tids_len = 0;
52-
static void **g_output_array = NULL;
53-
static redisAtomic size_t g_thread_ids = 0;
5452
static redisAtomic size_t g_num_threads_done = 0;
5553

5654
static sem_t wait_for_threads_sem;
@@ -83,11 +81,11 @@ void ThreadsManager_init(void) {
8381
sigaction(SIGUSR2, &act, NULL);
8482
}
8583

86-
__attribute__ ((noinline))
87-
void **ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback) {
84+
__attribute__ ((noinline))
85+
int ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback) {
8886
/* Check if it is safe to start running. If not - return */
8987
if(test_and_start() == IN_PROGRESS) {
90-
return NULL;
88+
return 0;
9189
}
9290

9391
/* Update g_callback */
@@ -96,9 +94,6 @@ void **ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_c
9694
/* Set g_tids_len */
9795
g_tids_len = tids_len;
9896

99-
/* Allocate the output buffer */
100-
g_output_array = zcalloc(sizeof(void*) * tids_len);
101-
10297
/* Initialize a semaphore that we will be waiting on for the threads
10398
use pshared = 0 to indicate the semaphore is shared between the process's threads (and not between processes),
10499
and value = 0 as the initial semaphore value. */
@@ -113,12 +108,10 @@ void **ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_c
113108
/* Wait for all the threads to write to the output array, or until timeout is reached */
114109
wait_threads();
115110

116-
void **ret = g_output_array;
117-
118111
/* Cleanups to allow next execution */
119112
ThreadsManager_cleanups();
120113

121-
return ret;
114+
return 1;
122115
}
123116

124117
/*============================ Internal functions implementations ========================== */
@@ -133,7 +126,7 @@ static int test_and_start(void) {
133126
return prev_state;
134127
}
135128

136-
__attribute__ ((noinline))
129+
__attribute__ ((noinline))
137130
static void invoke_callback(int sig) {
138131
UNUSED(sig);
139132

@@ -143,10 +136,8 @@ static void invoke_callback(int sig) {
143136
return;
144137
}
145138

146-
if (g_output_array) {
147-
size_t thread_id;
148-
atomicGetIncr(g_thread_ids, thread_id, 1);
149-
g_output_array[thread_id] = g_callback();
139+
if (g_callback) {
140+
g_callback();
150141
size_t curr_done_count;
151142
atomicIncrGet(g_num_threads_done, curr_done_count, 1);
152143

@@ -181,8 +172,6 @@ static void ThreadsManager_cleanups(void) {
181172

182173
g_callback = NULL;
183174
g_tids_len = 0;
184-
g_output_array = NULL;
185-
g_thread_ids = 0;
186175
g_num_threads_done = 0;
187176
sem_destroy(&wait_for_threads_sem);
188177

@@ -197,12 +186,12 @@ void ThreadsManager_init(void) {
197186
/* DO NOTHING */
198187
}
199188

200-
void **ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback) {
189+
int ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback) {
201190
/* DO NOTHING */
202191
UNUSED(tids);
203192
UNUSED(tids_len);
204193
UNUSED(callback);
205-
return NULL;
194+
return 1;
206195
}
207196

208197
#endif /* __linux__ */

src/threads_mngr.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
#define THREADS_SIGNAL SIGUSR2
4343

4444
/* Callback signature */
45-
typedef void*(*run_on_thread_cb)(void);
45+
typedef void(*run_on_thread_cb)(void);
4646

4747
/* Register the process to THREADS_SIGNAL */
4848
void ThreadsManager_init(void);
@@ -62,15 +62,8 @@ void ThreadsManager_init(void);
6262
*
6363
* The function returns only when @param tids_len threads have returned from @param callback.
6464
*
65-
* @return NULL If ThreadsManager_runOnThreads is already in the middle of execution.
66-
* Otherwise, it returns an array of the threads return value from @param callback.
67-
* NOTES:
68-
* The indices of the outputs in the output array are NOT associated with the threads indices in @param tids.
69-
*
70-
* The returned array length will be @param tids_len, but some of the entries might be set to NULL if the
71-
* invocation of @param callback was unsuccessful.
65+
* @return 1 if successful, 0 If ThreadsManager_runOnThreads is already in the middle of execution.
7266
*
73-
* The output array should be freed by the caller by calling zfree().
7467
**/
7568

76-
void **ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback);
69+
int ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback);

0 commit comments

Comments
 (0)