diff --git a/runtest-moduleapi b/runtest-moduleapi index ff685afb6..910d581f2 100755 --- a/runtest-moduleapi +++ b/runtest-moduleapi @@ -55,4 +55,5 @@ $TCLSH tests/test_helper.tcl \ --single unit/moduleapi/async_rm_call \ --single unit/moduleapi/moduleauth \ --single unit/moduleapi/rdbloadsave \ +--single unit/moduleapi/crash \ "${@}" diff --git a/src/debug.c b/src/debug.c index b924d9ed3..facbd6104 100644 --- a/src/debug.c +++ b/src/debug.c @@ -72,10 +72,10 @@ static pthread_mutex_t signal_handler_lock; static pthread_mutexattr_t signal_handler_lock_attr; static volatile int signal_handler_lock_initialized = 0; /* Forward declarations */ -void bugReportStart(void); +int bugReportStart(void); void printCrashReport(void); void bugReportEnd(int killViaSignal, int sig); -void logStackTrace(void *eip, int uplevel); +void logStackTrace(void *eip, int uplevel, int current_thread); void dbGetStats(char *buf, size_t bufsize, redisDb *db, int full, dbKeyType keyType); void sigalrmSignalHandler(int sig, siginfo_t *info, void *secret); @@ -1031,15 +1031,17 @@ NULL __attribute__ ((noinline)) void _serverAssert(const char *estr, const char *file, int line) { - bugReportStart(); - serverLog(LL_WARNING,"=== ASSERTION FAILED ==="); + int new_report = bugReportStart(); + serverLog(LL_WARNING,"=== %sASSERTION FAILED ===", new_report ? "" : "RECURSIVE "); serverLog(LL_WARNING,"==> %s:%d '%s' is not true",file,line,estr); if (server.crashlog_enabled) { #ifdef HAVE_BACKTRACE - logStackTrace(NULL, 1); + logStackTrace(NULL, 1, 0); #endif - printCrashReport(); + /* If this was a recursive assertion, it what most likely generated + * from printCrashReport. */ + if (new_report) printCrashReport(); } // remove the signal handler so on abort() we will output the crash report. @@ -1127,16 +1129,18 @@ void _serverPanic(const char *file, int line, const char *msg, ...) { vsnprintf(fmtmsg,sizeof(fmtmsg),msg,ap); va_end(ap); - bugReportStart(); + int new_report = bugReportStart(); serverLog(LL_WARNING,"------------------------------------------------"); serverLog(LL_WARNING,"!!! Software Failure. Press left mouse button to continue"); serverLog(LL_WARNING,"Guru Meditation: %s #%s:%d",fmtmsg,file,line); if (server.crashlog_enabled) { #ifdef HAVE_BACKTRACE - logStackTrace(NULL, 1); + logStackTrace(NULL, 1, 0); #endif - printCrashReport(); + /* If this was a recursive panic, it what most likely generated + * from printCrashReport. */ + if (new_report) printCrashReport(); } // remove the signal handler so on abort() we will output the crash report. @@ -1144,14 +1148,18 @@ void _serverPanic(const char *file, int line, const char *msg, ...) { bugReportEnd(0, 0); } -void bugReportStart(void) { +/* Start a bug report, returning 1 if this is the first time this function was called, 0 otherwise. */ +int bugReportStart(void) { pthread_mutex_lock(&bug_report_start_mutex); if (bug_report_start == 0) { serverLogRaw(LL_WARNING|LL_RAW, "\n\n=== REDIS BUG REPORT START: Cut & paste starting from here ===\n"); bug_report_start = 1; + pthread_mutex_unlock(&bug_report_start_mutex); + return 1; } pthread_mutex_unlock(&bug_report_start_mutex); + return 0; } #ifdef HAVE_BACKTRACE @@ -1895,9 +1903,9 @@ static void writeStacktraces(int fd, int uplevel) { } -#else /* __linux__*/ +#endif /* __linux__ */ __attribute__ ((noinline)) -static void writeStacktraces(int fd, int uplevel) { +static void writeCurrentThreadsStackTrace(int fd, int uplevel) { void *trace[BACKTRACE_MAX_SIZE]; int trace_size = backtrace(trace, BACKTRACE_MAX_SIZE); @@ -1906,7 +1914,6 @@ static void writeStacktraces(int fd, int uplevel) { if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */}; backtrace_symbols_fd(trace+uplevel, trace_size-uplevel, fd); } -#endif /* __linux__ */ /* Logs the stack trace using the backtrace() call. This function is designed * to be called from signal handlers safely. @@ -1916,7 +1923,7 @@ static void writeStacktraces(int fd, int uplevel) { * __attribute__ ((noinline)) to make sure the compiler won't inline them. */ __attribute__ ((noinline)) -void logStackTrace(void *eip, int uplevel) { +void logStackTrace(void *eip, int uplevel, int current_thread) { int fd = openDirectLogFiledes(); char *msg; uplevel++; /* skip this function */ @@ -1935,7 +1942,17 @@ void logStackTrace(void *eip, int uplevel) { /* Write symbols to log file */ ++uplevel; - writeStacktraces(fd, uplevel); +#ifdef __linux__ + if (current_thread) { + writeCurrentThreadsStackTrace(fd, uplevel); + } else { + writeStacktraces(fd, uplevel); + } +#else + /* Outside of linux, we only support writing the current thread. */ + UNUSED(current_thread); + writeCurrentThreadsStackTrace(fd, uplevel); +#endif msg = "\n------ STACK TRACE DONE ------\n"; if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */}; @@ -2218,15 +2235,14 @@ __attribute__ ((noinline)) static void sigsegvHandler(int sig, siginfo_t *info, void *secret) { UNUSED(secret); UNUSED(info); + int print_full_crash_info = 1; /* Check if it is safe to enter the signal handler. second thread crashing at the same time will deadlock. */ if(pthread_mutex_lock(&signal_handler_lock) == EDEADLK) { - /* If this thread already owns the lock (meaning we crashed during handling a signal) - * log that the crash report can't be generated. */ + /* If this thread already owns the lock (meaning we crashed during handling a signal) switch + * to printing the minimal information about the crash. */ serverLogRawFromHandler(LL_WARNING, - "Crashed running signal handler. Can't continue to generate the crash report"); - /* gracefully exit */ - bugReportEnd(1, sig); - return; + "Crashed running signal handler. Providing reduced version of recursive crash report."); + print_full_crash_info = 0; } bugReportStart(); @@ -2260,7 +2276,9 @@ static void sigsegvHandler(int sig, siginfo_t *info, void *secret) { getAndSetMcontextEip(uc, ptr); } - logStackTrace(eip, 1); + /* When printing the reduced crash info, just print the current thread + * to avoid race conditions with the multi-threaded stack collector. */ + logStackTrace(eip, 1, !print_full_crash_info); if (eip == info->si_addr) { /* Restore old eip */ @@ -2270,7 +2288,7 @@ static void sigsegvHandler(int sig, siginfo_t *info, void *secret) { logRegisters(uc); #endif - printCrashReport(); + if (print_full_crash_info) printCrashReport(); #ifdef HAVE_BACKTRACE if (eip != NULL) @@ -2430,7 +2448,7 @@ void sigalrmSignalHandler(int sig, siginfo_t *info, void *secret) { serverLogRawFromHandler(LL_WARNING, "\nReceived SIGALRM"); } #ifdef HAVE_BACKTRACE - logStackTrace(getAndSetMcontextEip(uc, NULL), 1); + logStackTrace(getAndSetMcontextEip(uc, NULL), 1, 0); #else serverLogRawFromHandler(LL_WARNING,"Sorry: no support for backtrace()."); #endif diff --git a/tests/modules/Makefile b/tests/modules/Makefile index d63c8548d..586e66e06 100644 --- a/tests/modules/Makefile +++ b/tests/modules/Makefile @@ -4,10 +4,10 @@ uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') warning_cflags = -W -Wall -Wno-missing-field-initializers ifeq ($(uname_S),Darwin) - SHOBJ_CFLAGS ?= $(warning_cflags) -dynamic -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_CFLAGS ?= $(warning_cflags) -dynamic -fno-common -g -ggdb -std=gnu11 -O2 SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup else # Linux, others - SHOBJ_CFLAGS ?= $(warning_cflags) -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_CFLAGS ?= $(warning_cflags) -fno-common -g -ggdb -std=gnu11 -O2 SHOBJ_LDFLAGS ?= -shared endif @@ -62,7 +62,8 @@ TEST_MODULES = \ usercall.so \ postnotifications.so \ moduleauthtwo.so \ - rdbloadsave.so + rdbloadsave.so \ + crash.so .PHONY: all diff --git a/tests/modules/crash.c b/tests/modules/crash.c new file mode 100644 index 000000000..c7eccda52 --- /dev/null +++ b/tests/modules/crash.c @@ -0,0 +1,39 @@ +#include "redismodule.h" + +#include +#include + +#define UNUSED(V) ((void) V) + +void assertCrash(RedisModuleInfoCtx *ctx, int for_crash_report) { + UNUSED(ctx); + UNUSED(for_crash_report); + RedisModule_Assert(0); +} + +void segfaultCrash(RedisModuleInfoCtx *ctx, int for_crash_report) { + UNUSED(ctx); + UNUSED(for_crash_report); + /* Compiler gives warnings about writing to a random address + * e.g "*((char*)-1) = 'x';". As a workaround, we map a read-only area + * and try to write there to trigger segmentation fault. */ + char *p = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + *p = 'x'; +} + +int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_Init(ctx,"infocrash",1,REDISMODULE_APIVER_1) + == REDISMODULE_ERR) return REDISMODULE_ERR; + RedisModule_Assert(argc == 1); + if (!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "segfault")) { + if (RedisModule_RegisterInfoFunc(ctx, segfaultCrash) == REDISMODULE_ERR) return REDISMODULE_ERR; + } else if(!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "assert")) { + if (RedisModule_RegisterInfoFunc(ctx, assertCrash) == REDISMODULE_ERR) return REDISMODULE_ERR; + } else { + return REDISMODULE_ERR; + } + + return REDISMODULE_OK; +} diff --git a/tests/unit/moduleapi/crash.tcl b/tests/unit/moduleapi/crash.tcl new file mode 100644 index 000000000..82b32f954 --- /dev/null +++ b/tests/unit/moduleapi/crash.tcl @@ -0,0 +1,52 @@ +# This file is used to test certain crash edge cases to make sure they produce +# correct stack traces for debugging. +set testmodule [file normalize tests/modules/crash.so] + +# Valgrind will complain that the process terminated by a signal, skip it. +if {!$::valgrind} { + start_server {tags {"modules"}} { + r module load $testmodule assert + test {Test module crash when info crashes with an assertion } { + catch {r 0 info infocrash} + set res [wait_for_log_messages 0 {"*=== REDIS BUG REPORT START: Cut & paste starting from here ===*"} 0 10 1000] + set loglines [lindex $res 1] + + set res [wait_for_log_messages 0 {"*ASSERTION FAILED*"} $loglines 10 1000] + set loglines [lindex $res 1] + + set res [wait_for_log_messages 0 {"*RECURSIVE ASSERTION FAILED*"} $loglines 10 1000] + set loglines [lindex $res 1] + + wait_for_log_messages 0 {"*=== REDIS BUG REPORT END. Make sure to include from START to END. ===*"} $loglines 10 1000 + assert_equal 1 [count_log_message 0 "=== REDIS BUG REPORT END. Make sure to include from START to END. ==="] + assert_equal 2 [count_log_message 0 "ASSERTION FAILED"] + # There will be 3 crash assertions, 1 in the first stack trace and 2 in the second + assert_equal 3 [count_log_message 0 "assertCrash"] + assert_equal 1 [count_log_message 0 "RECURSIVE ASSERTION FAILED"] + assert_equal 1 [count_log_message 0 "=== REDIS BUG REPORT START: Cut & paste starting from here ==="] + } + } + + start_server {tags {"modules"}} { + r module load $testmodule segfault + test {Test module crash when info crashes with a segfault} { + catch {r 0 info infocrash} + set res [wait_for_log_messages 0 {"*=== REDIS BUG REPORT START: Cut & paste starting from here ===*"} 0 10 1000] + set loglines [lindex $res 1] + + set res [wait_for_log_messages 0 {"*Crashed running the instruction at*"} $loglines 10 1000] + set loglines [lindex $res 1] + + set res [wait_for_log_messages 0 {"*Crashed running signal handler. Providing reduced version of recursive crash report*"} $loglines 10 1000] + set loglines [lindex $res 1] + set res [wait_for_log_messages 0 {"*Crashed running the instruction at*"} $loglines 10 1000] + set loglines [lindex $res 1] + + wait_for_log_messages 0 {"*=== REDIS BUG REPORT END. Make sure to include from START to END. ===*"} $loglines 10 1000 + assert_equal 1 [count_log_message 0 "=== REDIS BUG REPORT END. Make sure to include from START to END. ==="] + assert_equal 1 [count_log_message 0 "Crashed running signal handler. Providing reduced version of recursive crash report"] + assert_equal 2 [count_log_message 0 "Crashed running the instruction at"] + assert_equal 1 [count_log_message 0 "=== REDIS BUG REPORT START: Cut & paste starting from here ==="] + } + } +}