From 2c0efc633422876da5fe57222dacdf6d54208113 Mon Sep 17 00:00:00 2001 From: Andrew Bettison Date: Thu, 8 May 2014 11:19:43 +0930 Subject: [PATCH] Set up SEGV crash handler in main() not server() Now all commands will attempt to log a GDB backtrace on a crash, not just the server daemon process. Added strbuf_append_signal_name() --- log.c | 10 +-- main.c | 32 ++++++++- serval.h | 1 + server.c | 173 ++++++----------------------------------------- strbuf_helpers.c | 107 +++++++++++++++++++++++++++++ strbuf_helpers.h | 6 ++ 6 files changed, 169 insertions(+), 160 deletions(-) diff --git a/log.c b/log.c index a3fc0a07..a956b3fc 100644 --- a/log.c +++ b/log.c @@ -733,8 +733,8 @@ int logBacktrace(int level, struct __sourceloc whence) _exit(-2); } close(stdout_fds[0]); - /* XXX: Need the cast on Solaris because it defins NULL as 0L and gcc doesn't - * see it as a sentinal */ + // Need the (void*) cast on Solaris because it defines NULL as 0L and gcc doesn't accept it as a + // sentinal execlp("gdb", "gdb", "-n", "-batch", "-x", tempfile, execpath, pidstr, (void*)NULL); perror("execlp(\"gdb\")"); do { _exit(-3); } while (1); @@ -754,14 +754,14 @@ int logBacktrace(int level, struct __sourceloc whence) for (; p < readp; ++p) if (*p == '\n' || *p == '\0') { *p = '\0'; - _log_iterator_printf_nl(&it, level, __NOWHERE__, "%s", linep); + _log_iterator_printf_nl(&it, level, __NOWHERE__, "GDB %s", linep); linep = p + 1; } if (readp >= bufe && linep == buf) { // Line does not fit into buffer. char t = bufe[-1]; bufe[-1] = '\0'; - _log_iterator_printf_nl(&it, level, __NOWHERE__, "%s", buf); + _log_iterator_printf_nl(&it, level, __NOWHERE__, "GDB %s", buf); buf[0] = t; readp = buf + 1; } else if (readp + 120 >= bufe && linep != buf) { @@ -777,7 +777,7 @@ int logBacktrace(int level, struct __sourceloc whence) WHY_perror("read"); if (readp > linep) { *readp = '\0'; - _log_iterator_printf_nl(&it, level, __NOWHERE__, "%s", linep); + _log_iterator_printf_nl(&it, level, __NOWHERE__, "GDB %s", linep); } close(stdout_fds[0]); int status = 0; diff --git a/main.c b/main.c index c0437063..500a1eb2 100644 --- a/main.c +++ b/main.c @@ -20,13 +20,26 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #include "serval.h" #include "conf.h" +static void crash_handler(int signal); + int main(int argc, char **argv) { #if defined WIN32 WSADATA wsa_data; WSAStartup(MAKEWORD(1,1), &wsa_data); #endif - /* Setup signal handlers */ + /* Catch crash signals so that we can log a backtrace before expiring. */ + struct sigaction sig; + sig.sa_handler = crash_handler; + sigemptyset(&sig.sa_mask); // Don't block any signals during handler + sig.sa_flags = SA_NODEFER | SA_RESETHAND; // So the signal handler can kill the process by re-sending the same signal to itself + sigaction(SIGSEGV, &sig, NULL); + sigaction(SIGFPE, &sig, NULL); + sigaction(SIGILL, &sig, NULL); + sigaction(SIGBUS, &sig, NULL); + sigaction(SIGABRT, &sig, NULL); + + /* Setup i/o signal handlers */ signal(SIGPIPE,sigPipeHandler); signal(SIGIO,sigIoHandler); @@ -40,6 +53,23 @@ int main(int argc, char **argv) return status; } +char crash_handler_clue[1024] = "no clue"; + +static void crash_handler(int signal) +{ + LOGF(LOG_LEVEL_FATAL, "Caught signal %s", alloca_signal_name(signal)); + LOGF(LOG_LEVEL_FATAL, "The following clue may help: %s", crash_handler_clue); + dump_stack(LOG_LEVEL_FATAL); + BACKTRACE; + serverRespawn(); + // Now die of the same signal, so that our exit status reflects the cause. + INFOF("Re-sending signal %d to self", signal); + kill(getpid(), signal); + // If that didn't work, then die normally. + INFOF("exit(%d)", -signal); + exit(-signal); +} + #if 0 #include #define MAX_DEPTH 64 diff --git a/serval.h b/serval.h index 3c6353c0..e39f49a4 100644 --- a/serval.h +++ b/serval.h @@ -234,6 +234,7 @@ int server_check_stopfile(); void server_watchdog(struct sched_ent *alarm); void overlay_mdp_clean_socket_files(); void serverCleanUp(); +void serverRespawn(); int overlay_forward_payload(struct overlay_frame *f); int packetOkOverlay(struct overlay_interface *interface,unsigned char *packet, size_t len, diff --git a/server.c b/server.c index a178327f..78d4919a 100644 --- a/server.c +++ b/server.c @@ -47,7 +47,6 @@ int servalShutdown = 0; static int server_getpid = 0; void signal_handler(int signal); -void crash_handler(int signal); /** Return the PID of the currently running server process, return 0 if there is none. */ @@ -111,18 +110,8 @@ int server() serverMode = 1; - /* Catch crash signals so that we can log a backtrace before expiring. */ - struct sigaction sig; - sig.sa_handler = crash_handler; - sigemptyset(&sig.sa_mask); // Don't block any signals during handler - sig.sa_flags = SA_NODEFER | SA_RESETHAND; // So the signal handler can kill the process by re-sending the same signal to itself - sigaction(SIGSEGV, &sig, NULL); - sigaction(SIGFPE, &sig, NULL); - sigaction(SIGILL, &sig, NULL); - sigaction(SIGBUS, &sig, NULL); - sigaction(SIGABRT, &sig, NULL); - /* Catch SIGHUP etc so that we can respond to requests to do things, eg, shut down. */ + struct sigaction sig; sig.sa_handler = signal_handler; sigemptyset(&sig.sa_mask); // Block the same signals during handler sigaddset(&sig.sa_mask, SIGHUP); @@ -415,111 +404,24 @@ void serverCleanUp() server_remove_stopfile(); } -static void signame(char *buf, size_t len, int signal) +void serverRespawn() { - const char *desc = ""; - switch(signal) { -#ifdef SIGHUP - case SIGHUP: desc = "HUP"; break; -#endif -#ifdef SIGINT - case SIGINT: desc = "INT"; break; -#endif -#ifdef SIGQUIT - case SIGQUIT: desc = "QUIT"; break; -#endif -#ifdef SIGILL - case SIGILL: desc = "ILL (not reset when caught)"; break; -#endif -#ifdef SIGTRAP - case SIGTRAP: desc = "TRAP (not reset when caught)"; break; -#endif -#ifdef SIGABRT - case SIGABRT: desc = "ABRT"; break; -#endif -#ifdef SIGPOLL - case SIGPOLL: desc = "POLL ([XSR] generated, not supported)"; break; -#endif -#ifdef SIGEMT - case SIGEMT: desc = "EMT"; break; -#endif -#ifdef SIGFPE - case SIGFPE: desc = "FPE"; break; -#endif -#ifdef SIGKILL - case SIGKILL: desc = "KILL (cannot be caught or ignored)"; break; -#endif -#ifdef SIGBUS - case SIGBUS: desc = "BUS"; break; -#endif -#ifdef SIGSEGV - case SIGSEGV: desc = "SEGV"; break; -#endif -#ifdef SIGSYS - case SIGSYS: desc = "SYS"; break; -#endif -#ifdef SIGPIPE - case SIGPIPE: desc = "PIPE"; break; -#endif -#ifdef SIGALRM - case SIGALRM: desc = "ALRM"; break; -#endif -#ifdef SIGTERM - case SIGTERM: desc = "TERM"; break; -#endif -#ifdef SIGURG - case SIGURG: desc = "URG"; break; -#endif -#ifdef SIGSTOP - case SIGSTOP: desc = "STOP"; break; -#endif -#ifdef SIGTSTP - case SIGTSTP: desc = "TSTP"; break; -#endif -#ifdef SIGCONT - case SIGCONT: desc = "CONT"; break; -#endif -#ifdef SIGCHLD - case SIGCHLD: desc = "CHLD"; break; -#endif -#ifdef SIGTTIN - case SIGTTIN: desc = "TTIN"; break; -#endif -#ifdef SIGTTOU - case SIGTTOU: desc = "TTOU"; break; -#endif -#ifdef SIGIO -#if SIGIO != SIGPOLL - case SIGIO: desc = "IO"; break; -#endif -#endif -#ifdef SIGXCPU - case SIGXCPU: desc = "XCPU"; break; -#endif -#ifdef SIGXFSZ - case SIGXFSZ: desc = "XFSZ"; break; -#endif -#ifdef SIGVTALRM - case SIGVTALRM: desc = "VTALRM"; break; -#endif -#ifdef SIGPROF - case SIGPROF: desc = "PROF"; break; -#endif -#ifdef SIGWINCH - case SIGWINCH: desc = "WINCH"; break; -#endif -#ifdef SIGINFO - case SIGINFO: desc = "INFO"; break; -#endif -#ifdef SIGUSR1 - case SIGUSR1: desc = "USR1"; break; -#endif -#ifdef SIGUSR2 - case SIGUSR2: desc = "USR2"; break; -#endif + if (serverMode && config.server.respawn_on_crash) { + unsigned i; + overlay_interface_close_all(); + char execpath[160]; + if (get_self_executable_path(execpath, sizeof execpath) != -1) { + strbuf b = strbuf_alloca(1024); + for (i = 0; i < exec_argc; ++i) + strbuf_append_shell_quotemeta(strbuf_puts(b, i ? " " : ""), exec_args[i]); + INFOF("Respawning %s as %s", execpath, strbuf_str(b)); + execv(execpath, exec_args); + /* Quit if the exec() fails */ + WHY_perror("execv"); + } else { + WHY("Cannot respawn"); + } } - snprintf(buf, len, "SIG%s (%d) %s", desc, signal, strsignal(signal)); - buf[len - 1] = '\0'; } void signal_handler(int signal) @@ -536,47 +438,10 @@ void signal_handler(int signal) return; } - char buf[80]; - signame(buf, sizeof(buf), signal); - - LOGF(LOG_LEVEL_FATAL, "Caught signal %s", buf); - LOGF(LOG_LEVEL_FATAL, "The following clue may help: %s",crash_handler_clue); + LOGF(LOG_LEVEL_FATAL, "Caught signal %s", alloca_signal_name(signal)); + LOGF(LOG_LEVEL_FATAL, "The following clue may help: %s", crash_handler_clue); dump_stack(LOG_LEVEL_FATAL); serverCleanUp(); exit(0); } - -char crash_handler_clue[1024]="no clue"; -void crash_handler(int signal) -{ - char buf[80]; - signame(buf, sizeof(buf), signal); - LOGF(LOG_LEVEL_FATAL, "Caught signal %s", buf); - LOGF(LOG_LEVEL_FATAL, "The following clue may help: %s",crash_handler_clue); - dump_stack(LOG_LEVEL_FATAL); - - BACKTRACE; - if (config.server.respawn_on_crash) { - unsigned i; - overlay_interface_close_all(); - char execpath[160]; - if (get_self_executable_path(execpath, sizeof execpath) != -1) { - strbuf b = strbuf_alloca(1024); - for (i = 0; i < exec_argc; ++i) - strbuf_append_shell_quotemeta(strbuf_puts(b, i ? " " : ""), exec_args[i]); - INFOF("Respawning %s as %s", execpath, strbuf_str(b)); - execv(execpath, exec_args); - /* Quit if the exec() fails */ - WHY_perror("execv"); - } else { - WHY("Cannot respawn"); - } - } - // Now die of the same signal, so that our exit status reflects the cause. - INFOF("Re-sending signal %d to self", signal); - kill(getpid(), signal); - // If that didn't work, then die normally. - INFOF("exit(%d)", -signal); - exit(-signal); -} diff --git a/strbuf_helpers.c b/strbuf_helpers.c index 0f5f4bc6..05c271e8 100644 --- a/strbuf_helpers.c +++ b/strbuf_helpers.c @@ -318,6 +318,113 @@ strbuf strbuf_append_socket_domain(strbuf sb, int domain) return sb; } +strbuf strbuf_append_signal_name(strbuf sb, int signal) +{ + const char *desc = ""; + switch(signal) { +#ifdef SIGHUP + case SIGHUP: desc = "HUP"; break; +#endif +#ifdef SIGINT + case SIGINT: desc = "INT"; break; +#endif +#ifdef SIGQUIT + case SIGQUIT: desc = "QUIT"; break; +#endif +#ifdef SIGILL + case SIGILL: desc = "ILL (not reset when caught)"; break; +#endif +#ifdef SIGTRAP + case SIGTRAP: desc = "TRAP (not reset when caught)"; break; +#endif +#ifdef SIGABRT + case SIGABRT: desc = "ABRT"; break; +#endif +#ifdef SIGPOLL + case SIGPOLL: desc = "POLL ([XSR] generated, not supported)"; break; +#endif +#ifdef SIGEMT + case SIGEMT: desc = "EMT"; break; +#endif +#ifdef SIGFPE + case SIGFPE: desc = "FPE"; break; +#endif +#ifdef SIGKILL + case SIGKILL: desc = "KILL (cannot be caught or ignored)"; break; +#endif +#ifdef SIGBUS + case SIGBUS: desc = "BUS"; break; +#endif +#ifdef SIGSEGV + case SIGSEGV: desc = "SEGV"; break; +#endif +#ifdef SIGSYS + case SIGSYS: desc = "SYS"; break; +#endif +#ifdef SIGPIPE + case SIGPIPE: desc = "PIPE"; break; +#endif +#ifdef SIGALRM + case SIGALRM: desc = "ALRM"; break; +#endif +#ifdef SIGTERM + case SIGTERM: desc = "TERM"; break; +#endif +#ifdef SIGURG + case SIGURG: desc = "URG"; break; +#endif +#ifdef SIGSTOP + case SIGSTOP: desc = "STOP"; break; +#endif +#ifdef SIGTSTP + case SIGTSTP: desc = "TSTP"; break; +#endif +#ifdef SIGCONT + case SIGCONT: desc = "CONT"; break; +#endif +#ifdef SIGCHLD + case SIGCHLD: desc = "CHLD"; break; +#endif +#ifdef SIGTTIN + case SIGTTIN: desc = "TTIN"; break; +#endif +#ifdef SIGTTOU + case SIGTTOU: desc = "TTOU"; break; +#endif +#ifdef SIGIO +#if SIGIO != SIGPOLL + case SIGIO: desc = "IO"; break; +#endif +#endif +#ifdef SIGXCPU + case SIGXCPU: desc = "XCPU"; break; +#endif +#ifdef SIGXFSZ + case SIGXFSZ: desc = "XFSZ"; break; +#endif +#ifdef SIGVTALRM + case SIGVTALRM: desc = "VTALRM"; break; +#endif +#ifdef SIGPROF + case SIGPROF: desc = "PROF"; break; +#endif +#ifdef SIGWINCH + case SIGWINCH: desc = "WINCH"; break; +#endif +#ifdef SIGINFO + case SIGINFO: desc = "INFO"; break; +#endif +#ifdef SIGUSR1 + case SIGUSR1: desc = "USR1"; break; +#endif +#ifdef SIGUSR2 + case SIGUSR2: desc = "USR2"; break; +#endif + } + strbuf_sprintf(sb, "SIG%s (%d) %s", desc, signal, strsignal(signal)); + return sb; +} + strbuf strbuf_append_socket_type(strbuf sb, int type) { const char *typ = NULL; diff --git a/strbuf_helpers.h b/strbuf_helpers.h index 94bed1bd..71e79aa6 100644 --- a/strbuf_helpers.h +++ b/strbuf_helpers.h @@ -106,6 +106,12 @@ strbuf strbuf_append_argv(strbuf sb, int argc, const char *const *argv); */ strbuf strbuf_append_exit_status(strbuf sb, int status); +/* Append a textual description of a signal as used by kill(2) and signal(2). + * @author Andrew Bettison + */ +strbuf strbuf_append_signal_name(strbuf sb, int signal); +#define alloca_signal_name(sig) strbuf_str(strbuf_append_signal_name(strbuf_alloca(80), (sig))) + /* Append a textual description of a socket domain code (AF_...). * @author Andrew Bettison */