mirror of
https://github.com/servalproject/serval-dna.git
synced 2025-04-08 03:24:13 +00:00
Use file locking to better detect if the server really is running
This commit is contained in:
parent
1cf46dcef9
commit
2241a15e48
115
server.c
115
server.c
@ -49,11 +49,11 @@ __thread keyring_file *keyring=NULL;
|
||||
|
||||
static char pidfile_path[256];
|
||||
static int server_getpid = 0;
|
||||
static int server_pidfd = -1;
|
||||
static int server_bind();
|
||||
static void server_loop();
|
||||
static int server();
|
||||
static int server_write_pid();
|
||||
static int server_unlink_pid();
|
||||
static void signal_handler(int signal);
|
||||
static void serverCleanUp();
|
||||
static const char *_server_pidfile_path(struct __sourceloc __whence);
|
||||
@ -83,15 +83,32 @@ int server_pid()
|
||||
return -1;
|
||||
const char *p = strrchr(ppath, '/');
|
||||
assert(p != NULL);
|
||||
FILE *f = fopen(ppath, "r");
|
||||
if (f == NULL) {
|
||||
int pid = -1;
|
||||
int fd = open(ppath, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
if (errno != ENOENT)
|
||||
return WHYF_perror("fopen(%s,\"r\")", alloca_str_toprint(ppath));
|
||||
return WHYF_perror("open(%s, O_RDONLY)", alloca_str_toprint(ppath));
|
||||
} else {
|
||||
char buf[20];
|
||||
int pid = (fgets(buf, sizeof buf, f) != NULL) ? atoi(buf) : -1;
|
||||
fclose(f);
|
||||
if (pid > 0 && kill(pid, 0) != -1)
|
||||
ssize_t len = read(fd, buf, sizeof buf);
|
||||
if (len>0){
|
||||
buf[len]=0;
|
||||
pid = atoi(buf);
|
||||
}
|
||||
if (pid > 0){
|
||||
// check that the server process still holds a lock
|
||||
struct flock lock;
|
||||
bzero(&lock, sizeof lock);
|
||||
lock.l_type = F_RDLCK;
|
||||
lock.l_start = 0;
|
||||
lock.l_whence = SEEK_SET;
|
||||
lock.l_len = len;
|
||||
fcntl(fd, F_GETLK, &lock);
|
||||
if (lock.l_type == F_UNLCK || lock.l_pid != pid)
|
||||
pid = -1;
|
||||
}
|
||||
close(fd);
|
||||
if (pid > 0)
|
||||
return pid;
|
||||
INFOF("Unlinking stale pidfile %s", ppath);
|
||||
unlink(ppath);
|
||||
@ -246,25 +263,6 @@ static void wokeup()
|
||||
|
||||
#endif
|
||||
|
||||
DEFINE_ALARM(server_shutdown_check);
|
||||
void server_shutdown_check(struct sched_ent *alarm)
|
||||
{
|
||||
// TODO we should watch a descriptor and quit when it closes
|
||||
/* If this server has been supplanted with another or Serval has been uninstalled, then its PID
|
||||
file will change or be unaccessible. In this case, shut down without all the cleanup.
|
||||
Perform this check at most once per second. */
|
||||
static time_ms_t server_pid_time_ms = 0;
|
||||
time_ms_t now = gettime_ms();
|
||||
if (server_pid_time_ms == 0 || now - server_pid_time_ms > 1000) {
|
||||
server_pid_time_ms = now;
|
||||
if (server_pid() != server_getpid) {
|
||||
WARNF("Server pid file no longer contains pid=%d -- shutting down without cleanup", server_getpid);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
RESCHEDULE(alarm, now+1000, TIME_MS_NEVER_WILL, now+1100);
|
||||
}
|
||||
|
||||
static int server_bind()
|
||||
{
|
||||
serverMode = SERVER_RUNNING;
|
||||
@ -335,9 +333,6 @@ static int server_bind()
|
||||
|
||||
time_ms_t now = gettime_ms();
|
||||
|
||||
// Periodically check for server shut down
|
||||
RESCHEDULE(&ALARM_STRUCT(server_shutdown_check), now, TIME_MS_NEVER_WILL, now);
|
||||
|
||||
olsr_init_socket();
|
||||
|
||||
/* Calculate (and possibly show) CPU usage stats periodically */
|
||||
@ -357,12 +352,13 @@ static void server_loop()
|
||||
while((serverMode==SERVER_RUNNING) && fd_poll2(waiting, wokeup))
|
||||
;
|
||||
serverCleanUp();
|
||||
|
||||
/* It is safe to unlink the pidfile here without checking whether it actually contains our own
|
||||
* PID, because server_shutdown_check() will have been executed very recently (in fd_poll()), so
|
||||
* if the code reaches here, the check has been done recently.
|
||||
*/
|
||||
server_unlink_pid();
|
||||
|
||||
if (server_pidfd!=-1){
|
||||
close(server_pidfd);
|
||||
server_pidfd = -1;
|
||||
const char *ppath = server_pidfile_path();
|
||||
unlink(ppath);
|
||||
}
|
||||
serverMode = 0;
|
||||
}
|
||||
|
||||
@ -389,25 +385,38 @@ static int server_write_pid()
|
||||
const char *ppath = server_pidfile_path();
|
||||
if (ppath == NULL)
|
||||
return -1;
|
||||
|
||||
FILE *f = fopen(ppath, "w");
|
||||
if (!f)
|
||||
return WHYF_perror("fopen(%s,\"w\")", alloca_str_toprint(ppath));
|
||||
server_getpid = getpid();
|
||||
fprintf(f,"%d\n", server_getpid);
|
||||
fclose(f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int server_unlink_pid()
|
||||
{
|
||||
/* Remove PID file to indicate that the server is no longer running */
|
||||
const char *ppath = server_pidfile_path();
|
||||
if (ppath == NULL)
|
||||
return -1;
|
||||
if (unlink(ppath) == -1)
|
||||
WHYF_perror("unlink(%s)", alloca_str_toprint(ppath));
|
||||
int fd = open(ppath, O_RDWR | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
||||
if (fd==-1)
|
||||
return WHYF_perror("open(%s, O_RDWR | O_CREAT)", alloca_str_toprint(ppath));
|
||||
|
||||
int pid = server_getpid = getpid();
|
||||
char buf[20];
|
||||
int len = snprintf(buf, sizeof buf, "%d", pid);
|
||||
|
||||
struct flock lock;
|
||||
bzero(&lock, sizeof lock);
|
||||
lock.l_type = F_WRLCK;
|
||||
lock.l_start = 0;
|
||||
lock.l_whence = SEEK_SET;
|
||||
lock.l_len = len;
|
||||
if (fcntl(fd, F_SETLK, &lock)==-1){
|
||||
close(fd);
|
||||
return WHYF_perror("fcntl(%d, F_SETLK, &lock)", fd);
|
||||
}
|
||||
|
||||
if (ftruncate(fd, 0)==-1){
|
||||
close(fd);
|
||||
return WHYF_perror("ftruncate(%d, 0)", fd);
|
||||
}
|
||||
|
||||
if (write(fd, buf, len)!=len){
|
||||
close(fd);
|
||||
return WHYF_perror("write(%d, %p, %d)", fd, buf, len);
|
||||
}
|
||||
|
||||
// leave the pid file open!
|
||||
server_pidfd = fd;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
23
tests/server
23
tests/server
@ -141,29 +141,6 @@ test_StartTwice() {
|
||||
assert_no_servald_processes
|
||||
}
|
||||
|
||||
doc_RemovePid="Server stops when pid file removed"
|
||||
setup_RemovePid() {
|
||||
setup
|
||||
# Set a watchdog alarm to ensure that the daemon wakes regularly, so the
|
||||
# pidfile is checked quickly, otherwise the daemon will wait 30 seconds
|
||||
# before waking itself to check.
|
||||
>watchdog
|
||||
chmod 0550 watchdog
|
||||
executeOk_servald config \
|
||||
set log.console.level debug \
|
||||
set log.console.show_time true \
|
||||
set log.console.show_pid true \
|
||||
set debug.io true \
|
||||
set debug.watchdog on \
|
||||
set server.watchdog.executable "$PWD/watchdog" \
|
||||
set server.watchdog.interval_ms 100
|
||||
start_servald_server
|
||||
}
|
||||
test_RemovePid() {
|
||||
rm $instance_servald_pidfile
|
||||
wait_until ! kill -0 $servald_pid 2>/dev/null
|
||||
}
|
||||
|
||||
doc_MonitorQuit="Server stops due to monitor client disconnection"
|
||||
setup_MonitorQuit() {
|
||||
setup
|
||||
|
Loading…
x
Reference in New Issue
Block a user