Fix Android server thread start bug

Was failing to start if a stale pidfile was present.

Introduced the 'debug.server' config option to help diagnose pidfile
issues.
This commit is contained in:
Andrew Bettison 2016-10-20 13:13:05 +10:30
parent 5191d424cb
commit 95cce9109f
4 changed files with 35 additions and 10 deletions

View File

@ -265,6 +265,7 @@ ATOM(bool_t, rhizome_rx, 0, boolean,, "")
ATOM(bool_t, rhizome_ads, 0, boolean,, "") ATOM(bool_t, rhizome_ads, 0, boolean,, "")
ATOM(bool_t, rhizome_mdp_rx, 0, boolean,, "") ATOM(bool_t, rhizome_mdp_rx, 0, boolean,, "")
ATOM(bool_t, rhizome_direct, 0, boolean,, "") ATOM(bool_t, rhizome_direct, 0, boolean,, "")
ATOM(bool_t, server, 0, boolean,, "")
ATOM(bool_t, subscriber, 0, boolean,, "") ATOM(bool_t, subscriber, 0, boolean,, "")
ATOM(bool_t, meshms, 0, boolean,, "") ATOM(bool_t, meshms, 0, boolean,, "")
ATOM(bool_t, vomp, 0, boolean,, "") ATOM(bool_t, vomp, 0, boolean,, "")

View File

@ -106,9 +106,9 @@ static int tgkill(int tgid, int tid, int signum)
static struct pid_tid read_pidfile(const char *path) static struct pid_tid read_pidfile(const char *path)
{ {
int fd = open(path, O_RDONLY); int fd = open(path, O_RDONLY);
if (fd == -1 && errno == ENOENT)
return (struct pid_tid){ .pid = 0, .tid = 0 };
if (fd == -1) { if (fd == -1) {
if (errno == ENOENT)
return (struct pid_tid){ .pid = 0, .tid = 0 };
WHYF_perror("open(%s, O_RDONLY)", alloca_str_toprint(path)); WHYF_perror("open(%s, O_RDONLY)", alloca_str_toprint(path));
return (struct pid_tid){ .pid = -1, .tid = -1 }; return (struct pid_tid){ .pid = -1, .tid = -1 };
} }
@ -116,13 +116,20 @@ static struct pid_tid read_pidfile(const char *path)
int32_t tid = -1; int32_t tid = -1;
char buf[30]; char buf[30];
ssize_t len = read(fd, buf, sizeof buf); ssize_t len = read(fd, buf, sizeof buf);
if (len > 0 && (size_t)len < sizeof buf) { if (len == -1) {
WHYF_perror("read(%s, %p, %zu)", alloca_str_toprint(path), buf, sizeof buf);
} else if (len > 0 && (size_t)len < sizeof buf) {
DEBUGF(server, "Read from pidfile %s: %s", path, alloca_toprint(-1, buf, len));
buf[len] = '\0'; buf[len] = '\0';
const char *e = NULL; const char *e = NULL;
if (str_to_int32(buf, 10, &pid, &e) && *e == ' ') if (!str_to_int32(buf, 10, &pid, &e))
pid = -1;
else if (*e == ' ')
str_to_int32(e + 1, 10, &tid, NULL); str_to_int32(e + 1, 10, &tid, NULL);
else else
tid = pid; tid = pid;
} else {
WARNF("Pidfile %s has invalid content: %s", path, alloca_toprint(-1, buf, len));
} }
if (pid > 0) { if (pid > 0) {
// Only return a valid pid/tid if the file is currently locked by the same process that it // Only return a valid pid/tid if the file is currently locked by the same process that it
@ -134,8 +141,10 @@ static struct pid_tid read_pidfile(const char *path)
lock.l_whence = SEEK_SET; lock.l_whence = SEEK_SET;
lock.l_len = len; lock.l_len = len;
fcntl(fd, F_GETLK, &lock); fcntl(fd, F_GETLK, &lock);
if (lock.l_type == F_UNLCK || lock.l_pid != pid) if (lock.l_type == F_UNLCK || lock.l_pid != pid) {
DEBUGF(server, "Pidfile %s is not locked by pid %d", path, pid);
pid = tid = -1; pid = tid = -1;
}
} }
close(fd); close(fd);
return (struct pid_tid){ .pid = pid, .tid = tid }; return (struct pid_tid){ .pid = pid, .tid = tid };
@ -168,6 +177,7 @@ static struct pid_tid get_server_pid_tid()
if (id.pid == -1) { if (id.pid == -1) {
INFOF("Unlinking stale pidfile %s", pidfile_path); INFOF("Unlinking stale pidfile %s", pidfile_path);
unlink(pidfile_path); unlink(pidfile_path);
id.pid = 0;
} }
return id; return id;
error: error:
@ -367,10 +377,13 @@ static int server_write_pid()
// Create the temporary pidfile and lock it, deleting any stale temporaries if necessary. Leave // Create the temporary pidfile and lock it, deleting any stale temporaries if necessary. Leave
// the temporary pidfile open to retain the lock -- if successful it will eventually be the real // the temporary pidfile open to retain the lock -- if successful it will eventually be the real
// pidfile. // pidfile.
DEBUGF(server, "unlink(%s)", alloca_str_toprint(tmpfile_path));
unlink(tmpfile_path); unlink(tmpfile_path);
DEBUGF(server, "open(%s, O_RDWR|O_CREAT|O_CLOEXEC)", alloca_str_toprint(tmpfile_path));
int fd = open(tmpfile_path, O_RDWR | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); int fd = open(tmpfile_path, O_RDWR | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd==-1) if (fd==-1)
return WHYF_perror("Cannot create temporary pidfile: open(%s, O_RDWR|O_CREAT|O_CLOEXEC)", alloca_str_toprint(tmpfile_path)); return WHYF_perror("Cannot create temporary pidfile: open(%s, O_RDWR|O_CREAT|O_CLOEXEC)", alloca_str_toprint(tmpfile_path));
DEBUG(server, "lock");
if (fcntl(fd, F_SETLK, &lock) == -1) { if (fcntl(fd, F_SETLK, &lock) == -1) {
WHYF_perror("Cannot lock temporary pidfile %s: fcntl(%d, F_SETLK, &lock)", tmpfile_path, fd); WHYF_perror("Cannot lock temporary pidfile %s: fcntl(%d, F_SETLK, &lock)", tmpfile_path, fd);
close(fd); close(fd);
@ -380,6 +393,7 @@ static int server_write_pid()
close(fd); close(fd);
return WHYF_perror("ftruncate(%d, 0)", fd); return WHYF_perror("ftruncate(%d, 0)", fd);
} }
DEBUGF(server, "write %s", alloca_toprint(-1, content, content_size));
if (write(fd, content, content_size) != (ssize_t)content_size){ if (write(fd, content, content_size) != (ssize_t)content_size){
close(fd); close(fd);
return WHYF_perror("write(%d, %s, %zu)", fd, alloca_str_toprint(content), content_size); return WHYF_perror("write(%d, %s, %zu)", fd, alloca_str_toprint(content), content_size);
@ -392,7 +406,10 @@ static int server_write_pid()
// exists, then if the existent pidfile is locked, there is another daemon running, so bail out. // exists, then if the existent pidfile is locked, there is another daemon running, so bail out.
// If the existent pidfile is not locked, then it is stale, so delete it and re-try the link. // If the existent pidfile is not locked, then it is stale, so delete it and re-try the link.
unsigned int tries = 0; unsigned int tries = 0;
while (link(tmpfile_path, pidfile_path) == -1) { while (1) {
INFOF("link(%s, %s)", alloca_str_toprint(tmpfile_path), alloca_str_toprint(pidfile_path));
if (link(tmpfile_path, pidfile_path) != -1)
break;
if (errno == EEXIST && ++tries < 2) { if (errno == EEXIST && ++tries < 2) {
struct pid_tid id = read_pidfile(pidfile_path); struct pid_tid id = read_pidfile(pidfile_path);
if (id.pid == -1) { if (id.pid == -1) {
@ -402,14 +419,14 @@ static int server_write_pid()
INFOF("Another daemon is running, pid=%d tid=%d", id.pid, id.tid); INFOF("Another daemon is running, pid=%d tid=%d", id.pid, id.tid);
return 1; return 1;
} }
} } else {
else { WHYF_perror("Cannot link temporary pidfile %s to %s", tmpfile_path, pidfile_path);
WHYF_perror("Cannot link temporary pidfile %s to pidfile %s", tmpfile_path, pidfile_path);
close(fd); close(fd);
unlink(tmpfile_path); unlink(tmpfile_path);
return -1; return -1;
} }
} }
DEBUGF(server, "Created pidfile %s", pidfile_path);
// The link was successful, so delete the temporary pidfile but leave the pid file open so that // The link was successful, so delete the temporary pidfile but leave the pid file open so that
// the lock remains held! // the lock remains held!

View File

@ -174,6 +174,7 @@ setup_servaldThread() {
create_single_identity create_single_identity
executeOk_servald config \ executeOk_servald config \
set debug.jni 1 \ set debug.jni 1 \
set debug.server 1 \
set debug.verbose 1 \ set debug.verbose 1 \
set log.console.level debug set log.console.level debug
} }

View File

@ -37,7 +37,8 @@ configure_servald_server() {
executeOk_servald config \ executeOk_servald config \
set log.console.level debug \ set log.console.level debug \
set log.console.show_time true \ set log.console.show_time true \
set log.console.show_pid true set log.console.show_pid true \
set debug.server true
} }
doc_StartCreateInstanceDir="Starting server creates instance directory" doc_StartCreateInstanceDir="Starting server creates instance directory"
@ -178,6 +179,7 @@ setup_MonitorQuit() {
set log.console.level debug \ set log.console.level debug \
set log.console.show_time true \ set log.console.show_time true \
set log.console.show_pid true \ set log.console.show_pid true \
set debug.server true \
set debug.monitor on set debug.monitor on
} }
setup setup
@ -228,6 +230,7 @@ EOF
set log.console.level debug \ set log.console.level debug \
set log.console.show_time true \ set log.console.show_time true \
set log.console.show_pid true \ set log.console.show_pid true \
set debug.server on \
set debug.watchdog on \ set debug.watchdog on \
set server.watchdog.executable "$PWD/watchdog" \ set server.watchdog.executable "$PWD/watchdog" \
set server.watchdog.interval_ms 100 set server.watchdog.interval_ms 100
@ -262,6 +265,7 @@ EOF
set log.console.level debug \ set log.console.level debug \
set log.console.show_time true \ set log.console.show_time true \
set log.console.show_pid true \ set log.console.show_pid true \
set debug.server on \
set debug.watchdog on \ set debug.watchdog on \
set server.watchdog.executable "$PWD/watchdog1" \ set server.watchdog.executable "$PWD/watchdog1" \
set server.watchdog.interval_ms 100 set server.watchdog.interval_ms 100
@ -297,6 +301,7 @@ EOF
set log.console.show_time true \ set log.console.show_time true \
set log.console.show_pid true \ set log.console.show_pid true \
set debug.config on \ set debug.config on \
set debug.server on \
set debug.watchdog on \ set debug.watchdog on \
set debug.mdprequests on \ set debug.mdprequests on \
set server.config_reload_interval_ms 600000 \ set server.config_reload_interval_ms 600000 \
@ -332,6 +337,7 @@ setup_ReloadConfigSetSync() {
set log.console.show_time true \ set log.console.show_time true \
set log.console.show_pid true \ set log.console.show_pid true \
set debug.config on \ set debug.config on \
set debug.server on \
set debug.mdprequests on \ set debug.mdprequests on \
set server.config_reload_interval_ms 600000 \ set server.config_reload_interval_ms 600000 \
set server.motd "Abcdef" set server.motd "Abcdef"