From 2241a15e487234ef94ca418edbc03a60a3c1346c Mon Sep 17 00:00:00 2001 From: Jeremy Lakeman Date: Wed, 6 Apr 2016 14:57:07 +0930 Subject: [PATCH] Use file locking to better detect if the server really is running --- server.c | 115 +++++++++++++++++++++++++++------------------------ tests/server | 23 ----------- 2 files changed, 62 insertions(+), 76 deletions(-) diff --git a/server.c b/server.c index 035d3c02..98115a20 100644 --- a/server.c +++ b/server.c @@ -49,11 +49,11 @@ __thread keyring_file *keyring=NULL; static char pidfile_path[256]; static int server_getpid = 0; +static int server_pidfd = -1; static int server_bind(); static void server_loop(); static int server(); static int server_write_pid(); -static int server_unlink_pid(); static void signal_handler(int signal); static void serverCleanUp(); static const char *_server_pidfile_path(struct __sourceloc __whence); @@ -83,15 +83,32 @@ int server_pid() return -1; const char *p = strrchr(ppath, '/'); assert(p != NULL); - FILE *f = fopen(ppath, "r"); - if (f == NULL) { + int pid = -1; + int fd = open(ppath, O_RDONLY); + if (fd == -1) { if (errno != ENOENT) - return WHYF_perror("fopen(%s,\"r\")", alloca_str_toprint(ppath)); + return WHYF_perror("open(%s, O_RDONLY)", alloca_str_toprint(ppath)); } else { char buf[20]; - int pid = (fgets(buf, sizeof buf, f) != NULL) ? atoi(buf) : -1; - fclose(f); - if (pid > 0 && kill(pid, 0) != -1) + ssize_t len = read(fd, buf, sizeof buf); + if (len>0){ + buf[len]=0; + pid = atoi(buf); + } + if (pid > 0){ + // check that the server process still holds a lock + struct flock lock; + bzero(&lock, sizeof lock); + lock.l_type = F_RDLCK; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = len; + fcntl(fd, F_GETLK, &lock); + if (lock.l_type == F_UNLCK || lock.l_pid != pid) + pid = -1; + } + close(fd); + if (pid > 0) return pid; INFOF("Unlinking stale pidfile %s", ppath); unlink(ppath); @@ -246,25 +263,6 @@ static void wokeup() #endif -DEFINE_ALARM(server_shutdown_check); -void server_shutdown_check(struct sched_ent *alarm) -{ - // TODO we should watch a descriptor and quit when it closes - /* If this server has been supplanted with another or Serval has been uninstalled, then its PID - file will change or be unaccessible. In this case, shut down without all the cleanup. - Perform this check at most once per second. */ - static time_ms_t server_pid_time_ms = 0; - time_ms_t now = gettime_ms(); - if (server_pid_time_ms == 0 || now - server_pid_time_ms > 1000) { - server_pid_time_ms = now; - if (server_pid() != server_getpid) { - WARNF("Server pid file no longer contains pid=%d -- shutting down without cleanup", server_getpid); - exit(1); - } - } - RESCHEDULE(alarm, now+1000, TIME_MS_NEVER_WILL, now+1100); -} - static int server_bind() { serverMode = SERVER_RUNNING; @@ -335,9 +333,6 @@ static int server_bind() time_ms_t now = gettime_ms(); - // Periodically check for server shut down - RESCHEDULE(&ALARM_STRUCT(server_shutdown_check), now, TIME_MS_NEVER_WILL, now); - olsr_init_socket(); /* Calculate (and possibly show) CPU usage stats periodically */ @@ -357,12 +352,13 @@ static void server_loop() while((serverMode==SERVER_RUNNING) && fd_poll2(waiting, wokeup)) ; serverCleanUp(); - - /* It is safe to unlink the pidfile here without checking whether it actually contains our own - * PID, because server_shutdown_check() will have been executed very recently (in fd_poll()), so - * if the code reaches here, the check has been done recently. - */ - server_unlink_pid(); + + if (server_pidfd!=-1){ + close(server_pidfd); + server_pidfd = -1; + const char *ppath = server_pidfile_path(); + unlink(ppath); + } serverMode = 0; } @@ -389,25 +385,38 @@ static int server_write_pid() const char *ppath = server_pidfile_path(); if (ppath == NULL) return -1; - - FILE *f = fopen(ppath, "w"); - if (!f) - return WHYF_perror("fopen(%s,\"w\")", alloca_str_toprint(ppath)); - server_getpid = getpid(); - fprintf(f,"%d\n", server_getpid); - fclose(f); - - return 0; -} -static int server_unlink_pid() -{ - /* Remove PID file to indicate that the server is no longer running */ - const char *ppath = server_pidfile_path(); - if (ppath == NULL) - return -1; - if (unlink(ppath) == -1) - WHYF_perror("unlink(%s)", alloca_str_toprint(ppath)); + int fd = open(ppath, O_RDWR | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd==-1) + return WHYF_perror("open(%s, O_RDWR | O_CREAT)", alloca_str_toprint(ppath)); + + int pid = server_getpid = getpid(); + char buf[20]; + int len = snprintf(buf, sizeof buf, "%d", pid); + + struct flock lock; + bzero(&lock, sizeof lock); + lock.l_type = F_WRLCK; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = len; + if (fcntl(fd, F_SETLK, &lock)==-1){ + close(fd); + return WHYF_perror("fcntl(%d, F_SETLK, &lock)", fd); + } + + if (ftruncate(fd, 0)==-1){ + close(fd); + return WHYF_perror("ftruncate(%d, 0)", fd); + } + + if (write(fd, buf, len)!=len){ + close(fd); + return WHYF_perror("write(%d, %p, %d)", fd, buf, len); + } + + // leave the pid file open! + server_pidfd = fd; return 0; } diff --git a/tests/server b/tests/server index c34af55a..fe82f6f8 100755 --- a/tests/server +++ b/tests/server @@ -141,29 +141,6 @@ test_StartTwice() { assert_no_servald_processes } -doc_RemovePid="Server stops when pid file removed" -setup_RemovePid() { - setup - # Set a watchdog alarm to ensure that the daemon wakes regularly, so the - # pidfile is checked quickly, otherwise the daemon will wait 30 seconds - # before waking itself to check. - >watchdog - chmod 0550 watchdog - executeOk_servald config \ - set log.console.level debug \ - set log.console.show_time true \ - set log.console.show_pid true \ - set debug.io true \ - set debug.watchdog on \ - set server.watchdog.executable "$PWD/watchdog" \ - set server.watchdog.interval_ms 100 - start_servald_server -} -test_RemovePid() { - rm $instance_servald_pidfile - wait_until ! kill -0 $servald_pid 2>/dev/null -} - doc_MonitorQuit="Server stops due to monitor client disconnection" setup_MonitorQuit() { setup