Set up SEGV crash handler in main() not server()

Now all commands will attempt to log a GDB backtrace on a crash, not
just the server daemon process.

Added strbuf_append_signal_name()
This commit is contained in:
Andrew Bettison 2014-05-08 11:19:43 +09:30
parent 7a3095de09
commit 2c0efc6334
6 changed files with 169 additions and 160 deletions

10
log.c
View File

@ -733,8 +733,8 @@ int logBacktrace(int level, struct __sourceloc whence)
_exit(-2);
}
close(stdout_fds[0]);
/* XXX: Need the cast on Solaris because it defins NULL as 0L and gcc doesn't
* see it as a sentinal */
// Need the (void*) cast on Solaris because it defines NULL as 0L and gcc doesn't accept it as a
// sentinal
execlp("gdb", "gdb", "-n", "-batch", "-x", tempfile, execpath, pidstr, (void*)NULL);
perror("execlp(\"gdb\")");
do { _exit(-3); } while (1);
@ -754,14 +754,14 @@ int logBacktrace(int level, struct __sourceloc whence)
for (; p < readp; ++p)
if (*p == '\n' || *p == '\0') {
*p = '\0';
_log_iterator_printf_nl(&it, level, __NOWHERE__, "%s", linep);
_log_iterator_printf_nl(&it, level, __NOWHERE__, "GDB %s", linep);
linep = p + 1;
}
if (readp >= bufe && linep == buf) {
// Line does not fit into buffer.
char t = bufe[-1];
bufe[-1] = '\0';
_log_iterator_printf_nl(&it, level, __NOWHERE__, "%s", buf);
_log_iterator_printf_nl(&it, level, __NOWHERE__, "GDB %s", buf);
buf[0] = t;
readp = buf + 1;
} else if (readp + 120 >= bufe && linep != buf) {
@ -777,7 +777,7 @@ int logBacktrace(int level, struct __sourceloc whence)
WHY_perror("read");
if (readp > linep) {
*readp = '\0';
_log_iterator_printf_nl(&it, level, __NOWHERE__, "%s", linep);
_log_iterator_printf_nl(&it, level, __NOWHERE__, "GDB %s", linep);
}
close(stdout_fds[0]);
int status = 0;

32
main.c
View File

@ -20,13 +20,26 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include "serval.h"
#include "conf.h"
static void crash_handler(int signal);
int main(int argc, char **argv)
{
#if defined WIN32
WSADATA wsa_data;
WSAStartup(MAKEWORD(1,1), &wsa_data);
#endif
/* Setup signal handlers */
/* Catch crash signals so that we can log a backtrace before expiring. */
struct sigaction sig;
sig.sa_handler = crash_handler;
sigemptyset(&sig.sa_mask); // Don't block any signals during handler
sig.sa_flags = SA_NODEFER | SA_RESETHAND; // So the signal handler can kill the process by re-sending the same signal to itself
sigaction(SIGSEGV, &sig, NULL);
sigaction(SIGFPE, &sig, NULL);
sigaction(SIGILL, &sig, NULL);
sigaction(SIGBUS, &sig, NULL);
sigaction(SIGABRT, &sig, NULL);
/* Setup i/o signal handlers */
signal(SIGPIPE,sigPipeHandler);
signal(SIGIO,sigIoHandler);
@ -40,6 +53,23 @@ int main(int argc, char **argv)
return status;
}
char crash_handler_clue[1024] = "no clue";
static void crash_handler(int signal)
{
LOGF(LOG_LEVEL_FATAL, "Caught signal %s", alloca_signal_name(signal));
LOGF(LOG_LEVEL_FATAL, "The following clue may help: %s", crash_handler_clue);
dump_stack(LOG_LEVEL_FATAL);
BACKTRACE;
serverRespawn();
// Now die of the same signal, so that our exit status reflects the cause.
INFOF("Re-sending signal %d to self", signal);
kill(getpid(), signal);
// If that didn't work, then die normally.
INFOF("exit(%d)", -signal);
exit(-signal);
}
#if 0
#include <execinfo.h>
#define MAX_DEPTH 64

View File

@ -234,6 +234,7 @@ int server_check_stopfile();
void server_watchdog(struct sched_ent *alarm);
void overlay_mdp_clean_socket_files();
void serverCleanUp();
void serverRespawn();
int overlay_forward_payload(struct overlay_frame *f);
int packetOkOverlay(struct overlay_interface *interface,unsigned char *packet, size_t len,

173
server.c
View File

@ -47,7 +47,6 @@ int servalShutdown = 0;
static int server_getpid = 0;
void signal_handler(int signal);
void crash_handler(int signal);
/** Return the PID of the currently running server process, return 0 if there is none.
*/
@ -111,18 +110,8 @@ int server()
serverMode = 1;
/* Catch crash signals so that we can log a backtrace before expiring. */
struct sigaction sig;
sig.sa_handler = crash_handler;
sigemptyset(&sig.sa_mask); // Don't block any signals during handler
sig.sa_flags = SA_NODEFER | SA_RESETHAND; // So the signal handler can kill the process by re-sending the same signal to itself
sigaction(SIGSEGV, &sig, NULL);
sigaction(SIGFPE, &sig, NULL);
sigaction(SIGILL, &sig, NULL);
sigaction(SIGBUS, &sig, NULL);
sigaction(SIGABRT, &sig, NULL);
/* Catch SIGHUP etc so that we can respond to requests to do things, eg, shut down. */
struct sigaction sig;
sig.sa_handler = signal_handler;
sigemptyset(&sig.sa_mask); // Block the same signals during handler
sigaddset(&sig.sa_mask, SIGHUP);
@ -415,111 +404,24 @@ void serverCleanUp()
server_remove_stopfile();
}
static void signame(char *buf, size_t len, int signal)
void serverRespawn()
{
const char *desc = "";
switch(signal) {
#ifdef SIGHUP
case SIGHUP: desc = "HUP"; break;
#endif
#ifdef SIGINT
case SIGINT: desc = "INT"; break;
#endif
#ifdef SIGQUIT
case SIGQUIT: desc = "QUIT"; break;
#endif
#ifdef SIGILL
case SIGILL: desc = "ILL (not reset when caught)"; break;
#endif
#ifdef SIGTRAP
case SIGTRAP: desc = "TRAP (not reset when caught)"; break;
#endif
#ifdef SIGABRT
case SIGABRT: desc = "ABRT"; break;
#endif
#ifdef SIGPOLL
case SIGPOLL: desc = "POLL ([XSR] generated, not supported)"; break;
#endif
#ifdef SIGEMT
case SIGEMT: desc = "EMT"; break;
#endif
#ifdef SIGFPE
case SIGFPE: desc = "FPE"; break;
#endif
#ifdef SIGKILL
case SIGKILL: desc = "KILL (cannot be caught or ignored)"; break;
#endif
#ifdef SIGBUS
case SIGBUS: desc = "BUS"; break;
#endif
#ifdef SIGSEGV
case SIGSEGV: desc = "SEGV"; break;
#endif
#ifdef SIGSYS
case SIGSYS: desc = "SYS"; break;
#endif
#ifdef SIGPIPE
case SIGPIPE: desc = "PIPE"; break;
#endif
#ifdef SIGALRM
case SIGALRM: desc = "ALRM"; break;
#endif
#ifdef SIGTERM
case SIGTERM: desc = "TERM"; break;
#endif
#ifdef SIGURG
case SIGURG: desc = "URG"; break;
#endif
#ifdef SIGSTOP
case SIGSTOP: desc = "STOP"; break;
#endif
#ifdef SIGTSTP
case SIGTSTP: desc = "TSTP"; break;
#endif
#ifdef SIGCONT
case SIGCONT: desc = "CONT"; break;
#endif
#ifdef SIGCHLD
case SIGCHLD: desc = "CHLD"; break;
#endif
#ifdef SIGTTIN
case SIGTTIN: desc = "TTIN"; break;
#endif
#ifdef SIGTTOU
case SIGTTOU: desc = "TTOU"; break;
#endif
#ifdef SIGIO
#if SIGIO != SIGPOLL
case SIGIO: desc = "IO"; break;
#endif
#endif
#ifdef SIGXCPU
case SIGXCPU: desc = "XCPU"; break;
#endif
#ifdef SIGXFSZ
case SIGXFSZ: desc = "XFSZ"; break;
#endif
#ifdef SIGVTALRM
case SIGVTALRM: desc = "VTALRM"; break;
#endif
#ifdef SIGPROF
case SIGPROF: desc = "PROF"; break;
#endif
#ifdef SIGWINCH
case SIGWINCH: desc = "WINCH"; break;
#endif
#ifdef SIGINFO
case SIGINFO: desc = "INFO"; break;
#endif
#ifdef SIGUSR1
case SIGUSR1: desc = "USR1"; break;
#endif
#ifdef SIGUSR2
case SIGUSR2: desc = "USR2"; break;
#endif
if (serverMode && config.server.respawn_on_crash) {
unsigned i;
overlay_interface_close_all();
char execpath[160];
if (get_self_executable_path(execpath, sizeof execpath) != -1) {
strbuf b = strbuf_alloca(1024);
for (i = 0; i < exec_argc; ++i)
strbuf_append_shell_quotemeta(strbuf_puts(b, i ? " " : ""), exec_args[i]);
INFOF("Respawning %s as %s", execpath, strbuf_str(b));
execv(execpath, exec_args);
/* Quit if the exec() fails */
WHY_perror("execv");
} else {
WHY("Cannot respawn");
}
}
snprintf(buf, len, "SIG%s (%d) %s", desc, signal, strsignal(signal));
buf[len - 1] = '\0';
}
void signal_handler(int signal)
@ -536,47 +438,10 @@ void signal_handler(int signal)
return;
}
char buf[80];
signame(buf, sizeof(buf), signal);
LOGF(LOG_LEVEL_FATAL, "Caught signal %s", buf);
LOGF(LOG_LEVEL_FATAL, "The following clue may help: %s",crash_handler_clue);
LOGF(LOG_LEVEL_FATAL, "Caught signal %s", alloca_signal_name(signal));
LOGF(LOG_LEVEL_FATAL, "The following clue may help: %s", crash_handler_clue);
dump_stack(LOG_LEVEL_FATAL);
serverCleanUp();
exit(0);
}
char crash_handler_clue[1024]="no clue";
void crash_handler(int signal)
{
char buf[80];
signame(buf, sizeof(buf), signal);
LOGF(LOG_LEVEL_FATAL, "Caught signal %s", buf);
LOGF(LOG_LEVEL_FATAL, "The following clue may help: %s",crash_handler_clue);
dump_stack(LOG_LEVEL_FATAL);
BACKTRACE;
if (config.server.respawn_on_crash) {
unsigned i;
overlay_interface_close_all();
char execpath[160];
if (get_self_executable_path(execpath, sizeof execpath) != -1) {
strbuf b = strbuf_alloca(1024);
for (i = 0; i < exec_argc; ++i)
strbuf_append_shell_quotemeta(strbuf_puts(b, i ? " " : ""), exec_args[i]);
INFOF("Respawning %s as %s", execpath, strbuf_str(b));
execv(execpath, exec_args);
/* Quit if the exec() fails */
WHY_perror("execv");
} else {
WHY("Cannot respawn");
}
}
// Now die of the same signal, so that our exit status reflects the cause.
INFOF("Re-sending signal %d to self", signal);
kill(getpid(), signal);
// If that didn't work, then die normally.
INFOF("exit(%d)", -signal);
exit(-signal);
}

View File

@ -318,6 +318,113 @@ strbuf strbuf_append_socket_domain(strbuf sb, int domain)
return sb;
}
strbuf strbuf_append_signal_name(strbuf sb, int signal)
{
const char *desc = "";
switch(signal) {
#ifdef SIGHUP
case SIGHUP: desc = "HUP"; break;
#endif
#ifdef SIGINT
case SIGINT: desc = "INT"; break;
#endif
#ifdef SIGQUIT
case SIGQUIT: desc = "QUIT"; break;
#endif
#ifdef SIGILL
case SIGILL: desc = "ILL (not reset when caught)"; break;
#endif
#ifdef SIGTRAP
case SIGTRAP: desc = "TRAP (not reset when caught)"; break;
#endif
#ifdef SIGABRT
case SIGABRT: desc = "ABRT"; break;
#endif
#ifdef SIGPOLL
case SIGPOLL: desc = "POLL ([XSR] generated, not supported)"; break;
#endif
#ifdef SIGEMT
case SIGEMT: desc = "EMT"; break;
#endif
#ifdef SIGFPE
case SIGFPE: desc = "FPE"; break;
#endif
#ifdef SIGKILL
case SIGKILL: desc = "KILL (cannot be caught or ignored)"; break;
#endif
#ifdef SIGBUS
case SIGBUS: desc = "BUS"; break;
#endif
#ifdef SIGSEGV
case SIGSEGV: desc = "SEGV"; break;
#endif
#ifdef SIGSYS
case SIGSYS: desc = "SYS"; break;
#endif
#ifdef SIGPIPE
case SIGPIPE: desc = "PIPE"; break;
#endif
#ifdef SIGALRM
case SIGALRM: desc = "ALRM"; break;
#endif
#ifdef SIGTERM
case SIGTERM: desc = "TERM"; break;
#endif
#ifdef SIGURG
case SIGURG: desc = "URG"; break;
#endif
#ifdef SIGSTOP
case SIGSTOP: desc = "STOP"; break;
#endif
#ifdef SIGTSTP
case SIGTSTP: desc = "TSTP"; break;
#endif
#ifdef SIGCONT
case SIGCONT: desc = "CONT"; break;
#endif
#ifdef SIGCHLD
case SIGCHLD: desc = "CHLD"; break;
#endif
#ifdef SIGTTIN
case SIGTTIN: desc = "TTIN"; break;
#endif
#ifdef SIGTTOU
case SIGTTOU: desc = "TTOU"; break;
#endif
#ifdef SIGIO
#if SIGIO != SIGPOLL
case SIGIO: desc = "IO"; break;
#endif
#endif
#ifdef SIGXCPU
case SIGXCPU: desc = "XCPU"; break;
#endif
#ifdef SIGXFSZ
case SIGXFSZ: desc = "XFSZ"; break;
#endif
#ifdef SIGVTALRM
case SIGVTALRM: desc = "VTALRM"; break;
#endif
#ifdef SIGPROF
case SIGPROF: desc = "PROF"; break;
#endif
#ifdef SIGWINCH
case SIGWINCH: desc = "WINCH"; break;
#endif
#ifdef SIGINFO
case SIGINFO: desc = "INFO"; break;
#endif
#ifdef SIGUSR1
case SIGUSR1: desc = "USR1"; break;
#endif
#ifdef SIGUSR2
case SIGUSR2: desc = "USR2"; break;
#endif
}
strbuf_sprintf(sb, "SIG%s (%d) %s", desc, signal, strsignal(signal));
return sb;
}
strbuf strbuf_append_socket_type(strbuf sb, int type)
{
const char *typ = NULL;

View File

@ -106,6 +106,12 @@ strbuf strbuf_append_argv(strbuf sb, int argc, const char *const *argv);
*/
strbuf strbuf_append_exit_status(strbuf sb, int status);
/* Append a textual description of a signal as used by kill(2) and signal(2).
* @author Andrew Bettison <andrew@servalproject.com>
*/
strbuf strbuf_append_signal_name(strbuf sb, int signal);
#define alloca_signal_name(sig) strbuf_str(strbuf_append_signal_name(strbuf_alloca(80), (sig)))
/* Append a textual description of a socket domain code (AF_...).
* @author Andrew Bettison <andrew@servalproject.com>
*/