mirror of
https://github.com/servalproject/serval-dna.git
synced 2024-12-30 09:58:55 +00:00
1031 lines
33 KiB
C
1031 lines
33 KiB
C
/*
|
|
Serval DNA server main loop
|
|
Copyright (C) 2010 Paul Gardner-Stephen
|
|
Copyright (C) 2011-2015 Serval Project Inc.
|
|
Copyright (C) 2016 Flinders University
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include <assert.h>
|
|
#include <dirent.h>
|
|
#include <signal.h>
|
|
#include <unistd.h>
|
|
#include <time.h>
|
|
#include <libgen.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/types.h>
|
|
#ifdef HAVE_LINUX_THREADS
|
|
#include <sys/syscall.h>
|
|
#endif
|
|
|
|
#include "server.h"
|
|
#include "serval.h"
|
|
#include "rhizome.h"
|
|
#include "conf.h"
|
|
#include "log.h"
|
|
#include "str.h"
|
|
#include "numeric_str.h"
|
|
#include "strbuf.h"
|
|
#include "strbuf_helpers.h"
|
|
#include "overlay_interface.h"
|
|
#include "overlay_packet.h"
|
|
#include "keyring.h"
|
|
#include "commandline.h"
|
|
#include "mdp_client.h"
|
|
#include "route_link.h"
|
|
#include "httpd.h"
|
|
#include "debug.h"
|
|
|
|
DEFINE_FEATURE(cli_server);
|
|
|
|
#define PROC_SUBDIR "proc"
|
|
#define PIDFILE_NAME "servald.pid"
|
|
#define STOPFILE_NAME "servald.stop"
|
|
|
|
__thread enum server_mode serverMode = SERVER_NOT_RUNNING;
|
|
|
|
struct pid_tid {
|
|
pid_t pid;
|
|
pid_t tid;
|
|
};
|
|
|
|
static struct pid_tid server_pid_tid = { .pid = 0, .tid = 0 };
|
|
static int server_pidfd = -1;
|
|
static int server();
|
|
static int server_write_pid();
|
|
static void signal_handler(int signal);
|
|
static void serverCleanUp();
|
|
static const char *_server_pidfile_path(struct __sourceloc __whence);
|
|
#define server_pidfile_path() (_server_pidfile_path(__WHENCE__))
|
|
|
|
static int server_get_proc_state(const char *path, char *buff, size_t buff_len);
|
|
static void server_stop_alarms();
|
|
|
|
// Define our own gettid() and tgkill() if <unistd.h> doesn't provide them (eg, it does on Android).
|
|
|
|
#ifndef HAVE_GETTID
|
|
static pid_t gettid()
|
|
{
|
|
#ifdef HAVE_LINUX_THREADS
|
|
return syscall(SYS_gettid);
|
|
#else
|
|
return getpid();
|
|
#endif
|
|
}
|
|
#endif // !HAVE_GETTID
|
|
|
|
#ifdef HAVE_LINUX_THREADS
|
|
#ifndef HAVE_TGKILL
|
|
static int tgkill(int tgid, int tid, int signum)
|
|
{
|
|
return syscall(SYS_tgkill, tgid, tid, signum);
|
|
}
|
|
#endif // !HAVE_TGKILL
|
|
#endif // HAVE_LINUX_THREADS
|
|
|
|
// Read the PID and TID from the given pidfile, returning a PID of 0 if the file does not exist, or
|
|
// a PID of -1 if the file exists but contains invalid content or is not locked by process PID,
|
|
// otherwise the PID/TID of the process that has the lock on the file.
|
|
static struct pid_tid read_pidfile(const char *path)
|
|
{
|
|
int fd = open(path, O_RDONLY);
|
|
if (fd == -1) {
|
|
if (errno == ENOENT)
|
|
return (struct pid_tid){ .pid = 0, .tid = 0 };
|
|
WHYF_perror("open(%s, O_RDONLY)", alloca_str_toprint(path));
|
|
return (struct pid_tid){ .pid = -1, .tid = -1 };
|
|
}
|
|
int32_t pid = -1;
|
|
int32_t tid = -1;
|
|
char buf[30];
|
|
ssize_t len = read(fd, buf, sizeof buf);
|
|
if (len == -1) {
|
|
WHYF_perror("read(%s, %p, %zu)", alloca_str_toprint(path), buf, sizeof buf);
|
|
} else if (len > 0 && (size_t)len < sizeof buf) {
|
|
DEBUGF(server, "Read from pidfile %s: %s", path, alloca_toprint(-1, buf, len));
|
|
buf[len] = '\0';
|
|
const char *e = NULL;
|
|
if (!str_to_int32(buf, 10, &pid, &e))
|
|
pid = -1;
|
|
else if (*e == ' ')
|
|
str_to_int32(e + 1, 10, &tid, NULL);
|
|
else
|
|
tid = pid;
|
|
} else {
|
|
WARNF("Pidfile %s has invalid content: %s", path, alloca_toprint(-1, buf, len));
|
|
}
|
|
if (pid > 0) {
|
|
// Only return a valid pid/tid if the file is currently locked by the same process that it
|
|
// identifies.
|
|
struct flock lock;
|
|
bzero(&lock, sizeof lock);
|
|
lock.l_type = F_RDLCK;
|
|
lock.l_start = 0;
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_len = len;
|
|
fcntl(fd, F_GETLK, &lock);
|
|
if (lock.l_type == F_UNLCK || lock.l_pid != pid) {
|
|
DEBUGF(server, "Pidfile %s is not locked by pid %d", path, pid);
|
|
pid = tid = -1;
|
|
}
|
|
}
|
|
close(fd);
|
|
return (struct pid_tid){ .pid = pid, .tid = tid };
|
|
}
|
|
|
|
static struct pid_tid get_server_pid_tid()
|
|
{
|
|
// If the server process closes another handle on the same file, its lock will disappear, so this
|
|
// guards against that happening.
|
|
if (server_pid_tid.pid == getpid())
|
|
return server_pid_tid;
|
|
// Attempt to read the pid, and optionally the tid (Linux thread ID) from the pid file.
|
|
char dirname[1024];
|
|
if (!FORMF_SERVAL_RUN_PATH(dirname, NULL))
|
|
goto error;
|
|
struct stat st;
|
|
if (stat(dirname, &st) == -1) {
|
|
WHYF_perror("stat(%s)", alloca_str_toprint(dirname));
|
|
goto error;
|
|
}
|
|
if ((st.st_mode & S_IFMT) != S_IFDIR) {
|
|
WHYF("Not a directory: %s", dirname);
|
|
goto error;
|
|
}
|
|
const char *pidfile_path = server_pidfile_path();
|
|
if (pidfile_path == NULL)
|
|
goto error;
|
|
assert(strrchr(pidfile_path, '/') != NULL);
|
|
struct pid_tid id = read_pidfile(pidfile_path);
|
|
if (id.pid == -1) {
|
|
DEBUGF(server, "Unlinking stale pidfile %s", pidfile_path);
|
|
unlink(pidfile_path);
|
|
id.pid = 0;
|
|
}
|
|
return id;
|
|
error:
|
|
return (struct pid_tid){ .pid = -1, .tid = -1 };
|
|
}
|
|
|
|
// Send a signal to a given process. Returns 0 if sent, 1 if not sent because the process is non
|
|
// existent (ESRCH), or -1 if not sent due to another error (eg, EPERM).
|
|
static int send_signal(const struct pid_tid *id, int signum)
|
|
{
|
|
#ifdef HAVE_LINUX_THREADS
|
|
if (id->tid > 0) {
|
|
if (tgkill(id->pid, id->tid, signum) == -1) {
|
|
if (errno == ESRCH)
|
|
return 1;
|
|
WHYF_perror("Cannot send %s to Servald pid=%d tid=%d (pidfile %s)", alloca_signal_name(signum), id->pid, id->tid, server_pidfile_path());
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif // !HAVE_LINUX_THREADS
|
|
if (kill(id->pid, signum) == -1) {
|
|
if (errno == ESRCH)
|
|
return 1;
|
|
WHYF_perror("Cannot send %s to Servald pid=%d (pidfile %s)", alloca_signal_name(signum), id->pid, server_pidfile_path());
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static const char *_server_pidfile_path(struct __sourceloc __whence)
|
|
{
|
|
static char pidfile_path[256];
|
|
if (!pidfile_path[0]) {
|
|
if (!FORMF_SERVAL_RUN_PATH(pidfile_path, PIDFILE_NAME))
|
|
return NULL;
|
|
}
|
|
return pidfile_path;
|
|
}
|
|
|
|
int server_pid()
|
|
{
|
|
return get_server_pid_tid().pid;
|
|
}
|
|
|
|
int server_bind()
|
|
{
|
|
serverMode = SERVER_RUNNING;
|
|
|
|
// Warn, not merely Info, if there is no configured log file.
|
|
serval_log_level_NoLogFileConfigured = LOG_LEVEL_WARN;
|
|
|
|
/* Catch SIGHUP etc so that we can respond to requests to do things, eg, shut down. */
|
|
struct sigaction sig;
|
|
bzero(&sig, sizeof sig);
|
|
|
|
sig.sa_flags = 0;
|
|
sig.sa_handler = signal_handler;
|
|
sigemptyset(&sig.sa_mask); // Block the same signals during handler
|
|
sigaddset(&sig.sa_mask, SIGHUP);
|
|
sigaddset(&sig.sa_mask, SIGINT);
|
|
sigaddset(&sig.sa_mask, SIGIO);
|
|
|
|
#ifdef ANDROID
|
|
// batphone depends on this constant to wake up the scheduler
|
|
// break the build if it changes.
|
|
assert(SIGIO==29);
|
|
#endif
|
|
|
|
sigaction(SIGHUP, &sig, NULL);
|
|
sigaction(SIGINT, &sig, NULL);
|
|
sigaction(SIGIO, &sig, NULL);
|
|
|
|
// Perform additional startup, which should be limited to tasks like binding sockets
|
|
// So that clients can initiate a connection once servald start has returned.
|
|
// serverMode should be cleared to indicate failures
|
|
// Any CPU or IO heavy initialisation should be performed in a config changed trigger
|
|
|
|
CALL_TRIGGER(startup);
|
|
if (serverMode == SERVER_NOT_RUNNING)
|
|
return -1;
|
|
|
|
// start the HTTP server if enabled
|
|
if (httpd_server_start(config.rhizome.http.port, config.rhizome.http.port + HTTPD_PORT_RANGE)==-1) {
|
|
serverMode = SERVER_NOT_RUNNING;
|
|
return -1;
|
|
}
|
|
|
|
/* For testing, it can be very helpful to delay the start of the server process, for example to
|
|
* check that the start/stop logic is robust.
|
|
*/
|
|
const char *delay = getenv("SERVALD_SERVER_START_DELAY");
|
|
if (delay){
|
|
time_ms_t milliseconds = atoi(delay);
|
|
DEBUGF(server, "Sleeping for %"PRId64" milliseconds", (int64_t) milliseconds);
|
|
sleep_ms(milliseconds);
|
|
}
|
|
|
|
/* record PID file so that servald start can return */
|
|
if (server_write_pid()) {
|
|
serverMode = SERVER_NOT_RUNNING;
|
|
return -1;
|
|
}
|
|
|
|
overlay_queue_init();
|
|
|
|
time_ms_t now = gettime_ms();
|
|
|
|
/* Calculate (and possibly show) CPU usage stats periodically */
|
|
RESCHEDULE(&ALARM_STRUCT(fd_periodicstats), now+3000, TIME_MS_NEVER_WILL, now+3500);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void server_loop(time_ms_t (*waiting)(time_ms_t, time_ms_t, time_ms_t), void (*wokeup)())
|
|
{
|
|
// possible race condition... Shutting down before we even started
|
|
CALL_TRIGGER(conf_change);
|
|
|
|
// This log message is used by tests to wait for the server to start.
|
|
INFOF("Server initialised, entering main loop");
|
|
|
|
/* Check for activitiy and respond to it */
|
|
while (fd_poll2(waiting, wokeup))
|
|
;
|
|
|
|
INFOF("Server finished, exiting main loop");
|
|
fd_showstats();
|
|
serverCleanUp();
|
|
|
|
if (server_pidfd!=-1){
|
|
close(server_pidfd);
|
|
server_pidfd = -1;
|
|
unlink(server_pidfile_path());
|
|
}
|
|
}
|
|
|
|
static int server()
|
|
{
|
|
IN();
|
|
if (server_bind()==-1)
|
|
RETURN(-1);
|
|
|
|
server_loop(NULL, NULL);
|
|
|
|
RETURN(0);
|
|
}
|
|
|
|
static int server_write_pid()
|
|
{
|
|
server_write_proc_state("http_port", "%d", httpd_server_port);
|
|
server_write_proc_state("mdp_inet_port", "%d", mdp_loopback_port);
|
|
|
|
// Create or unlink the "primary_sid" proc state file.
|
|
get_my_subscriber(0);
|
|
|
|
// Create a locked pidfile to advertise that the server is now running.
|
|
const char *pidfile_path = server_pidfile_path();
|
|
if (pidfile_path == NULL)
|
|
return -1;
|
|
|
|
// The pidfile content is simply the ASCII decimal PID, optionally followed by a single space and
|
|
// the ASCII decimal Thread ID (tid) if the server is running as a Linux thread that is not the
|
|
// process's main thread.
|
|
int32_t pid = server_pid_tid.pid = getpid();
|
|
int32_t tid = server_pid_tid.tid = gettid();
|
|
char content[30];
|
|
size_t content_size = 0;
|
|
{
|
|
strbuf sb = strbuf_local_buf(content);
|
|
strbuf_sprintf(sb, "%" PRId32, pid);
|
|
if (tid != pid)
|
|
strbuf_sprintf(sb, " %" PRId32, tid);
|
|
assert(!strbuf_overrun(sb));
|
|
content_size = strbuf_len(sb);
|
|
}
|
|
|
|
// The pidfile lock covers its whole content.
|
|
struct flock lock;
|
|
bzero(&lock, sizeof lock);
|
|
lock.l_type = F_WRLCK;
|
|
lock.l_start = 0;
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_len = content_size;
|
|
|
|
// Form the name of the temporary pidfile.
|
|
strbuf tmpfile_path_sb = strbuf_alloca(strlen(pidfile_path) + 25);
|
|
strbuf_puts(tmpfile_path_sb, pidfile_path);
|
|
strbuf_sprintf(tmpfile_path_sb, ".%d-%d", pid, tid);
|
|
assert(!strbuf_overrun(tmpfile_path_sb));
|
|
const char *tmpfile_path = strbuf_str(tmpfile_path_sb);
|
|
|
|
// Create the temporary pidfile and lock it, deleting any stale temporaries if necessary. Leave
|
|
// the temporary pidfile open to retain the lock -- if successful it will eventually be the real
|
|
// pidfile.
|
|
DEBUGF(server, "unlink(%s)", alloca_str_toprint(tmpfile_path));
|
|
unlink(tmpfile_path);
|
|
DEBUGF(server, "open(%s, O_RDWR|O_CREAT|O_CLOEXEC)", alloca_str_toprint(tmpfile_path));
|
|
int fd = open(tmpfile_path, O_RDWR | O_CREAT | O_CLOEXEC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
|
if (fd==-1)
|
|
return WHYF_perror("Cannot create temporary pidfile: open(%s, O_RDWR|O_CREAT|O_CLOEXEC)", alloca_str_toprint(tmpfile_path));
|
|
DEBUG(server, "lock");
|
|
if (fcntl(fd, F_SETLK, &lock) == -1) {
|
|
WHYF_perror("Cannot lock temporary pidfile %s: fcntl(%d, F_SETLK, &lock)", tmpfile_path, fd);
|
|
close(fd);
|
|
return -1;
|
|
}
|
|
if (ftruncate(fd, 0) == -1){
|
|
close(fd);
|
|
return WHYF_perror("ftruncate(%d, 0)", fd);
|
|
}
|
|
DEBUGF(server, "write %s", alloca_toprint(-1, content, content_size));
|
|
if (write(fd, content, content_size) != (ssize_t)content_size){
|
|
close(fd);
|
|
return WHYF_perror("write(%d, %s, %zu)", fd, alloca_str_toprint(content), content_size);
|
|
}
|
|
|
|
// Now the locked temporary has been created, link(2) it to the pidfile's proper name, to ensure
|
|
// that the pidfile is locked from the instant of its existence. Note that link(2) fails if the
|
|
// destination path already exists. This logic prevents racing with other processes deleting
|
|
// stale (unlocked) pidfiles. If the link(2) fails the first time because the pidfile already
|
|
// exists, then if the existent pidfile is locked, there is another daemon running, so bail out.
|
|
// If the existent pidfile is not locked, then it is stale, so delete it and re-try the link.
|
|
unsigned int tries = 0;
|
|
while (1) {
|
|
DEBUGF(server, "link(%s, %s)", alloca_str_toprint(tmpfile_path), alloca_str_toprint(pidfile_path));
|
|
if (link(tmpfile_path, pidfile_path) != -1)
|
|
break;
|
|
if (errno == EEXIST && ++tries < 2) {
|
|
struct pid_tid id = read_pidfile(pidfile_path);
|
|
if (id.pid == -1) {
|
|
DEBUGF(server, "Unlinking stale pidfile %s", pidfile_path);
|
|
unlink(pidfile_path);
|
|
} else if (id.pid > 0) {
|
|
INFOF("Another daemon is running, pid=%d tid=%d", id.pid, id.tid);
|
|
return 1;
|
|
}
|
|
} else {
|
|
WARNF_perror("Cannot link temporary pidfile %s to %s", tmpfile_path, pidfile_path);
|
|
// Android 6 wont let us link, giving a permission error (sigh), lets just rename it then
|
|
if (rename(tmpfile_path, pidfile_path)==-1){
|
|
WHYF_perror("Cannot link or rename temporary pidfile %s to %s", tmpfile_path, pidfile_path);
|
|
close(fd);
|
|
unlink(tmpfile_path);
|
|
return -1;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
DEBUGF(server, "Created pidfile %s", pidfile_path);
|
|
|
|
// The link was successful, so delete the temporary pidfile but leave the pid file open so that
|
|
// the lock remains held!
|
|
unlink(tmpfile_path);
|
|
server_pidfd = fd;
|
|
return 0;
|
|
}
|
|
|
|
static int get_proc_path(const char *path, char *buf, size_t bufsiz)
|
|
{
|
|
if (!formf_serval_run_path(buf, bufsiz, PROC_SUBDIR "/%s", path))
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
int server_write_proc_state(const char *path, const char *fmt, ...)
|
|
{
|
|
// Only a running server process/thread may modify the proc files.
|
|
assert(serverMode != SERVER_NOT_RUNNING);
|
|
|
|
char path_buf[400];
|
|
if (get_proc_path(path, path_buf, sizeof path_buf)==-1)
|
|
return -1;
|
|
|
|
// Create the directory that contains the path, if it does not already exist.
|
|
size_t dirsiz = strlen(path_buf) + 1;
|
|
char dir_buf[dirsiz];
|
|
strcpy(dir_buf, path_buf);
|
|
const char *dir = dirname(dir_buf); // modifies dir_buf[]
|
|
if (emkdirs_info(dir, 0700) == -1)
|
|
return -1;
|
|
|
|
// Format the file's new content in a local buffer on the stack.
|
|
strbuf sb;
|
|
STRBUF_ALLOCA_FIT(sb, 1024, strbuf_va_printf(sb, fmt));
|
|
|
|
// Overwrite the file, creating it if necessary, using a single write(2) system call, followed by
|
|
// a ftruncate(2) system call (in case the file already existed and was longer than its new
|
|
// content). This allows potential race conditions to be avoided for files that are overwritten
|
|
// while the server runs, as long as the written contents are always the same size, or always
|
|
// contain a terminating sequence (eg, newline).
|
|
int fd = open(path_buf, O_CREAT | O_WRONLY, 0700);
|
|
if (fd == -1)
|
|
return WHYF_perror("open(%s, O_CREAT|O_WRONLY, 0700)", alloca_str_toprint(path_buf));
|
|
int ret = write_all(fd, strbuf_str(sb), strbuf_len(sb));
|
|
if (ret != -1 && (ret = ftruncate(fd, strbuf_len(sb)) == -1))
|
|
ret = WHYF_perror("ftruncate(%s, %zu)", alloca_str_toprint(path_buf), strbuf_len(sb));
|
|
close(fd);
|
|
return ret;
|
|
}
|
|
|
|
int server_unlink_proc_state(const char *path)
|
|
{
|
|
// Only a running server process/thread may modify the proc files.
|
|
assert(serverMode != SERVER_NOT_RUNNING);
|
|
|
|
char path_buf[400];
|
|
if (get_proc_path(path, path_buf, sizeof path_buf)==-1)
|
|
return -1;
|
|
if (unlink(path) == -1 && errno != ENOENT)
|
|
return WHYF_perror("unlink(%s)", alloca_str_toprint(path));
|
|
return 0;
|
|
}
|
|
|
|
static int server_get_proc_state(const char *path, char *buff, size_t buff_len)
|
|
{
|
|
char path_buf[400];
|
|
if (get_proc_path(path, path_buf, sizeof path_buf)==-1)
|
|
return -1;
|
|
FILE *f = fopen(path_buf, "r");
|
|
if (!f) {
|
|
if (errno != ENOENT)
|
|
return WHYF_perror("fopen(%s)", alloca_str_toprint(path_buf));
|
|
return 1;
|
|
}
|
|
int ret = 0;
|
|
errno = 0; // fgets() does not set errno on end-of-file
|
|
if (!fgets(buff, buff_len, f)) {
|
|
if (errno)
|
|
ret = WHYF_perror("fgets from %s", alloca_str_toprint(path_buf));
|
|
else
|
|
ret = 1;
|
|
}
|
|
fclose(f);
|
|
return ret;
|
|
}
|
|
|
|
/* Called periodically by the server process in its main loop.
|
|
*/
|
|
DEFINE_ALARM(server_config_reload);
|
|
void server_config_reload(struct sched_ent *alarm)
|
|
{
|
|
if (serverMode == SERVER_CLOSING){
|
|
// All shutdown triggers should unschedule their respective alarms. Once there are no alarms
|
|
// left, the fd_poll2() in server_loop() will return zero.
|
|
CALL_TRIGGER(shutdown);
|
|
return;
|
|
}
|
|
|
|
switch (cf_reload_strict()) {
|
|
case -1:
|
|
WARN("server continuing with prior config");
|
|
break;
|
|
case 0:
|
|
break;
|
|
default:
|
|
INFO("server config reloaded");
|
|
break;
|
|
}
|
|
switch (reload_mdp_packet_rules()) {
|
|
case -1:
|
|
WARN("server continuing with prior packet filter rules");
|
|
break;
|
|
case 0:
|
|
break;
|
|
default:
|
|
INFO("server packet filter rules reloaded");
|
|
break;
|
|
}
|
|
if (alarm){
|
|
time_ms_t now = gettime_ms();
|
|
RESCHEDULE(alarm,
|
|
now+config.server.config_reload_interval_ms,
|
|
TIME_MS_NEVER_WILL,
|
|
now+config.server.config_reload_interval_ms+100);
|
|
}
|
|
}
|
|
|
|
/* Called periodically by the server process in its main loop.
|
|
*/
|
|
DEFINE_ALARM(server_watchdog);
|
|
void server_watchdog(struct sched_ent *alarm)
|
|
{
|
|
if (config.server.watchdog.executable[0]) {
|
|
const char *argv[2];
|
|
argv[0] = config.server.watchdog.executable;
|
|
argv[1] = NULL;
|
|
strbuf argv_sb = strbuf_append_argv(strbuf_alloca(1024), 1, argv);
|
|
switch (fork()) {
|
|
case 0: {
|
|
/* Child, should fork() again to create orphan process. */
|
|
pid_t watchdog_pid;
|
|
switch (watchdog_pid = fork()) {
|
|
case 0:
|
|
/* Grandchild, should exec() watchdog. */
|
|
serval_log_close();
|
|
signal(SIGTERM, SIG_DFL);
|
|
close(0);
|
|
close(1);
|
|
close(2);
|
|
execv(config.server.watchdog.executable, (char **)argv);
|
|
// Don't use FATALF_perror() because we want to use _exit(2) not exit(2).
|
|
LOGF_perror(LOG_LEVEL_FATAL, "execv(%s, [%s])",
|
|
alloca_str_toprint(config.server.watchdog.executable),
|
|
strbuf_str(argv_sb)
|
|
);
|
|
break;
|
|
case -1:
|
|
/* grandchild fork failed */
|
|
WHY_perror("fork");
|
|
break;
|
|
default:
|
|
/* Child, report grandchild's PID. */
|
|
DEBUGF(watchdog, "STARTED WATCHDOG pid=%u executable=%s argv=[%s]",
|
|
watchdog_pid,
|
|
alloca_str_toprint(config.server.watchdog.executable),
|
|
strbuf_str(argv_sb)
|
|
);
|
|
do { _exit(0); } while (1);
|
|
break;
|
|
}
|
|
do { _exit(-1); } while (1);
|
|
break;
|
|
}
|
|
case -1:
|
|
/* child fork failed */
|
|
WHY_perror("fork");
|
|
break;
|
|
}
|
|
if (alarm) {
|
|
time_ms_t now = gettime_ms();
|
|
RESCHEDULE(alarm,
|
|
now+config.server.watchdog.interval_ms,
|
|
now+config.server.watchdog.interval_ms,
|
|
now+100);
|
|
}
|
|
}
|
|
}
|
|
|
|
DEFINE_ALARM(rhizome_clean_db);
|
|
void rhizome_clean_db(struct sched_ent *alarm)
|
|
{
|
|
if (!config.rhizome.enable || !rhizome_database.db)
|
|
return;
|
|
|
|
time_ms_t now = gettime_ms();
|
|
rhizome_cleanup(NULL);
|
|
// clean up every 30 minutes or so
|
|
RESCHEDULE(alarm, now + 30*60*1000, TIME_MS_NEVER_WILL, TIME_MS_NEVER_WILL);
|
|
}
|
|
|
|
static void server_on_config_change()
|
|
{
|
|
if (serverMode == SERVER_NOT_RUNNING)
|
|
return;
|
|
|
|
time_ms_t now = gettime_ms();
|
|
|
|
if (config.server.watchdog.executable[0])
|
|
RESCHEDULE(&ALARM_STRUCT(server_watchdog),
|
|
now+config.server.watchdog.interval_ms,
|
|
now+config.server.watchdog.interval_ms,
|
|
now+100);
|
|
|
|
// Periodically check for modified configuration
|
|
RESCHEDULE(&ALARM_STRUCT(server_config_reload),
|
|
now+config.server.config_reload_interval_ms,
|
|
TIME_MS_NEVER_WILL,
|
|
now+config.server.config_reload_interval_ms+100);
|
|
|
|
// Open the Rhizome database immediately if Rhizome is enabled and close it if disabled; this
|
|
// cannot be deferred because is_rhizome_http_enabled() only returns true if the database is open.
|
|
if (config.rhizome.enable){
|
|
rhizome_opendb();
|
|
RESCHEDULE(&ALARM_STRUCT(rhizome_clean_db), now + 30*60*1000, TIME_MS_NEVER_WILL, TIME_MS_NEVER_WILL);
|
|
}else if(rhizome_database.db){
|
|
rhizome_close_db();
|
|
}
|
|
}
|
|
DEFINE_TRIGGER(conf_change, server_on_config_change);
|
|
|
|
static void clean_proc()
|
|
{
|
|
char path_buf[400];
|
|
if (FORMF_SERVAL_RUN_PATH(path_buf, PROC_SUBDIR)) {
|
|
DIR *dir;
|
|
struct dirent *dp;
|
|
if ((dir = opendir(path_buf)) == NULL) {
|
|
WARNF_perror("opendir(%s)", alloca_str_toprint(path_buf));
|
|
return;
|
|
}
|
|
while ((dp = readdir(dir)) != NULL) {
|
|
if (FORMF_SERVAL_RUN_PATH(path_buf, PROC_SUBDIR "/%s", dp->d_name)) {
|
|
struct stat st;
|
|
if (lstat(path_buf, &st) == -1)
|
|
WARNF_perror("stat(%s)", path_buf);
|
|
else if (S_ISREG(st.st_mode))
|
|
unlink(path_buf);
|
|
}
|
|
}
|
|
closedir(dir);
|
|
}
|
|
}
|
|
|
|
void server_close(){
|
|
if (serverMode != SERVER_RUNNING)
|
|
return;
|
|
|
|
DEBUGF(server,"Graceful shutdown");
|
|
|
|
// Cause the next server_config_reload() alarm to invoke the "shutdown" trigger, which in turn
|
|
// will cause an orderly exit from server_loop().
|
|
serverMode = SERVER_CLOSING;
|
|
|
|
// Schedule the server_config_reload() alarm to go off immediately.
|
|
time_ms_t now = gettime_ms();
|
|
RESCHEDULE(&ALARM_STRUCT(server_config_reload),
|
|
now,
|
|
now,
|
|
TIME_MS_NEVER_WILL);
|
|
}
|
|
|
|
static void server_stop_alarms()
|
|
{
|
|
DEBUGF(server,"Stopping alarms");
|
|
unschedule(&ALARM_STRUCT(fd_periodicstats));
|
|
unschedule(&ALARM_STRUCT(server_watchdog));
|
|
unschedule(&ALARM_STRUCT(server_config_reload));
|
|
unschedule(&ALARM_STRUCT(rhizome_clean_db));
|
|
}
|
|
DEFINE_TRIGGER(shutdown, server_stop_alarms);
|
|
|
|
static void serverCleanUp()
|
|
{
|
|
assert(serverMode != SERVER_NOT_RUNNING);
|
|
INFOF("Server cleaning up");
|
|
|
|
// release alarms, in case we aborted without attempting a graceful close
|
|
server_stop_alarms();
|
|
|
|
rhizome_close_db();
|
|
release_my_subscriber();
|
|
|
|
serverMode = SERVER_NOT_RUNNING;
|
|
clean_proc();
|
|
}
|
|
|
|
static void signal_handler(int signum)
|
|
{
|
|
switch (signum) {
|
|
case SIGIO:
|
|
// noop to break out of poll
|
|
return;
|
|
case SIGHUP:
|
|
case SIGINT:
|
|
switch (serverMode) {
|
|
case SERVER_RUNNING:
|
|
// Trigger the server to close gracefully after any current alarm has completed.
|
|
INFOF("Caught signal %s -- attempting clean shutdown", alloca_signal_name(signum));
|
|
server_close();
|
|
return;
|
|
case SERVER_CLOSING:
|
|
// If a second signal is received before the server has gracefully shut down, then forcibly
|
|
// terminate it immediately. If the server is running in a thread, then this will only call
|
|
// serverCleanUp() if the signal was received by the same thread that is running the server,
|
|
// because serverMode is thread-local. The zero exit status indicates a clean shutdown. So
|
|
// the "stop" command must send SIGHUP to the correct thread.
|
|
WHYF("Caught signal %s -- forced shutdown", alloca_signal_name(signum));
|
|
list_alarms(LOG_LEVEL_ERROR);
|
|
serverCleanUp();
|
|
exit(0);
|
|
case SERVER_NOT_RUNNING:
|
|
// If this thread is not running a server, then treat the signal as immediately fatal.
|
|
break;
|
|
}
|
|
// fall through...
|
|
default:
|
|
LOGF(LOG_LEVEL_FATAL, "Caught signal %s", alloca_signal_name(signum));
|
|
dump_stack(LOG_LEVEL_FATAL);
|
|
break;
|
|
}
|
|
|
|
// Exit with a status code indicating the caught signal. This involves removing the signal
|
|
// handler for the caught signal then re-sending the same signal to ourself.
|
|
struct sigaction sig;
|
|
bzero(&sig, sizeof sig);
|
|
sig.sa_flags = 0;
|
|
sig.sa_handler = SIG_DFL;
|
|
sigemptyset(&sig.sa_mask);
|
|
sigaction(signum, &sig, NULL);
|
|
kill(getpid(), signum);
|
|
|
|
// Just in case...
|
|
FATALF("Sending %s to self (pid=%d) did not cause exit", alloca_signal_name(signum), getpid());
|
|
}
|
|
|
|
static void cli_server_details(struct cli_context *context, const struct pid_tid *id)
|
|
{
|
|
const char *ipath = instance_path();
|
|
if (ipath) {
|
|
cli_field_name(context, "instancepath", ":");
|
|
cli_put_string(context, ipath, "\n");
|
|
}
|
|
|
|
cli_field_name(context, "pidfile", ":");
|
|
cli_put_string(context, server_pidfile_path(), "\n");
|
|
cli_field_name(context, "status", ":");
|
|
cli_put_string(context, id->pid > 0 ? "running" : "stopped", "\n");
|
|
|
|
if (id->pid > 0) {
|
|
cli_field_name(context, "pid", ":");
|
|
cli_put_long(context, id->pid, "\n");
|
|
if (id->tid > 0 && id->tid != id->pid) {
|
|
cli_field_name(context, "tid", ":");
|
|
cli_put_long(context, id->tid, "\n");
|
|
}
|
|
char buff[256];
|
|
if (server_get_proc_state("primary_sid", buff, sizeof buff) == 0){
|
|
cli_field_name(context, "primary_sid", ":");
|
|
cli_put_string(context, buff, "\n");
|
|
}
|
|
if (server_get_proc_state("http_port", buff, sizeof buff) == 0){
|
|
cli_field_name(context, "http_port", ":");
|
|
cli_put_string(context, buff, "\n");
|
|
}
|
|
if (server_get_proc_state("mdp_inet_port", buff, sizeof buff) == 0){
|
|
cli_field_name(context, "mdp_inet_port", ":");
|
|
cli_put_string(context, buff, "\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
DEFINE_CMD(app_server_start, 0,
|
|
"Start daemon with instance path from SERVALINSTANCE_PATH environment variable.",
|
|
"start" KEYRING_PIN_OPTIONS, "[--seed]", "[foreground|exec <path>]");
|
|
static int app_server_start(const struct cli_parsed *parsed, struct cli_context *context)
|
|
{
|
|
IN();
|
|
DEBUG_cli_parsed(verbose, parsed);
|
|
/* Process optional arguments */
|
|
int cpid=-1;
|
|
const char *execpath;
|
|
if (cli_arg(parsed, "exec", &execpath, cli_absolute_path, NULL) == -1)
|
|
RETURN(-1);
|
|
int seed = cli_arg(parsed, "--seed", NULL, NULL, NULL) == 0;
|
|
int foregroundP = cli_arg(parsed, "foreground", NULL, NULL, NULL) == 0;
|
|
if (config.interfaces.ac == 0)
|
|
NOWHENCE(WARN("No network interfaces configured (empty 'interfaces' config option)"));
|
|
/* Create the instance directory if it does not yet exist */
|
|
if (create_serval_instance_dir() == -1)
|
|
RETURN(-1);
|
|
// Work out the Process and Thread IDs of any currently running server process, by reading an
|
|
// existing pidfile.
|
|
struct pid_tid id = get_server_pid_tid();
|
|
if (id.pid < 0)
|
|
RETURN(-1);
|
|
int ret = -1;
|
|
// If the pidfile identifies this process, it probably means we are re-spawning after a SEGV, so
|
|
// go ahead and do the fork/exec.
|
|
if (id.pid > 0 && id.pid != getpid()) {
|
|
WARNF("Server already running (pid=%d)", id.pid);
|
|
ret = 10;
|
|
} else {
|
|
if (foregroundP)
|
|
INFOF("Foreground server process %s", execpath ? execpath : "without exec");
|
|
else
|
|
INFOF("Starting background server %s", execpath ? execpath : "without exec");
|
|
/* Start the Serval process. All server settings will be read by the server process from the
|
|
instance directory when it starts up. */
|
|
// Open the keyring and ensure it contains at least one unlocked identity.
|
|
keyring = keyring_open_instance_cli(parsed);
|
|
if (!keyring)
|
|
RETURN(WHY("Could not open keyring file"));
|
|
if (seed && !keyring->identities){
|
|
if (keyring_create_identity(keyring, "")==NULL){
|
|
ret = WHY("Could not create new identity");
|
|
goto exit;
|
|
}
|
|
keyring_commit(keyring);
|
|
}
|
|
if (foregroundP) {
|
|
ret = server();
|
|
// Warning: The server is not rigorous about freeing all memory it allocates, so to avoid
|
|
// memory leaks, the caller should exit() immediately.
|
|
goto exit;
|
|
}
|
|
const char *dir = getenv("SERVALD_SERVER_CHDIR");
|
|
if (!dir)
|
|
dir = config.server.chdir;
|
|
switch (cpid = fork()) {
|
|
case -1:
|
|
/* Main process. Fork failed. There is no child process. */
|
|
WHY_perror("fork");
|
|
goto exit;
|
|
case 0: {
|
|
/* Child process. Fork then exit, to disconnect daemon from parent process, so that
|
|
when daemon exits it does not live on as a zombie. N.B. On Android, do not return from
|
|
within this process; that will unroll the JNI call stack and cause havoc -- call _exit()
|
|
instead (not exit(), because we want to avoid any Java atexit(3) callbacks as well). If
|
|
_exit() is used on non-Android systems, then source code coverage does not get reported,
|
|
because it relies on an atexit() callback to write the accumulated counters into .gcda
|
|
files. */
|
|
DEBUG(verbose, "Child Process");
|
|
// Ensure that all stdio streams are flushed before forking, so that if a child calls
|
|
// exit(), it will not result in any buffered output being written twice to the file
|
|
// descriptor.
|
|
fflush(stdout);
|
|
fflush(stderr);
|
|
switch (fork()) {
|
|
case -1:
|
|
exit(WHY_perror("fork"));
|
|
case 0: {
|
|
/* Grandchild process. Close logfile (so that it gets re-opened again on demand, with
|
|
our own file pointer), disable logging to stderr (about to get redirected to
|
|
/dev/null), disconnect from current directory, disconnect standard I/O streams, and
|
|
start a new process session so that if we are being started by an adb shell session
|
|
on an Android device, then we don't receive a SIGHUP when the adb shell process ends.
|
|
*/
|
|
DEBUG(verbose, "Grand-Child Process, reopening log");
|
|
serval_log_close();
|
|
int fd;
|
|
if ((fd = open("/dev/null", O_RDWR, 0)) == -1)
|
|
exit(WHY_perror("open(\"/dev/null\")"));
|
|
if (setsid() == -1)
|
|
exit(WHY_perror("setsid"));
|
|
if (chdir(dir) == -1)
|
|
exit(WHYF_perror("chdir(%s)", alloca_str_toprint(dir)));
|
|
if (dup2(fd, STDIN_FILENO) == -1)
|
|
exit(WHYF_perror("dup2(%d,stdin)", fd));
|
|
if (dup2(fd, STDOUT_FILENO) == -1)
|
|
exit(WHYF_perror("dup2(%d,stdout)", fd));
|
|
/* Redirect standard error to the current log file, so that any diagnostic messages
|
|
* printed directly to stderr by libraries or child processes will end up being captured
|
|
* in a log file. If standard error is not redirected, then simply direct it to
|
|
* /dev/null.
|
|
*/
|
|
if (!serval_log_capture_fd(STDERR_FILENO) && dup2(fd, STDERR_FILENO) == -1)
|
|
exit(WHYF_perror("dup2(%d,stderr)", fd));
|
|
if (fd > STDERR_FILENO)
|
|
(void)close(fd);
|
|
/* The execpath option is provided so that a JNI call to "start" can be made which
|
|
creates a new server daemon process with the correct argv[0]. Otherwise, the servald
|
|
process appears as a process with argv[0] = "org.servalproject". */
|
|
if (execpath) {
|
|
/* Need the cast on Solaris because it defines NULL as 0L and gcc doesn't see it as a
|
|
sentinal. */
|
|
DEBUGF(verbose, "Calling execl %s start foreground", execpath);
|
|
execl(execpath, "servald", "start", "foreground", (void *)NULL);
|
|
WHYF_perror("execl(%s, \"servald\", \"start\", \"foreground\")", alloca_str_toprint(execpath));
|
|
exit(-1);
|
|
}
|
|
exit(server());
|
|
// UNREACHABLE
|
|
}
|
|
}
|
|
// TODO wait for server_write_pid() to signal more directly?
|
|
exit(0); // Main process is waitpid()-ing for this.
|
|
}
|
|
}
|
|
/* Main process. Wait for the child process to fork the grandchild and exit. */
|
|
waitpid(cpid, NULL, 0);
|
|
/* Allow a few seconds for the grandchild process to report for duty. */
|
|
time_ms_t timeout = gettime_ms() + 5000;
|
|
do {
|
|
sleep_ms(200); // 5 Hz
|
|
} while ((id = get_server_pid_tid()).pid == 0 && gettime_ms() < timeout);
|
|
if (id.pid == -1)
|
|
goto exit;
|
|
if (id.pid == 0) {
|
|
WHY("Server process did not start");
|
|
goto exit;
|
|
}
|
|
ret = 0;
|
|
}
|
|
cli_server_details(context, &id);
|
|
cli_flush(context);
|
|
/* Sleep before returning if env var is set. This is used in testing, to simulate the situation
|
|
on Android phones where the "start" command is invoked via the JNI interface and the calling
|
|
process does not die.
|
|
*/
|
|
const char *post_sleep = getenv("SERVALD_START_POST_SLEEP");
|
|
if (post_sleep) {
|
|
time_ms_t milliseconds = atoi(post_sleep);
|
|
INFOF("Sleeping for %"PRId64" milliseconds", (int64_t) milliseconds);
|
|
sleep_ms(milliseconds);
|
|
}
|
|
exit:
|
|
keyring_free(keyring);
|
|
keyring = NULL;
|
|
RETURN(ret);
|
|
OUT();
|
|
}
|
|
|
|
DEFINE_CMD(app_server_stop,CLIFLAG_PERMISSIVE_CONFIG,
|
|
"Stop a running daemon with instance path from SERVALINSTANCE_PATH environment variable.",
|
|
"stop");
|
|
static int app_server_stop(const struct cli_parsed *parsed, struct cli_context *context)
|
|
{
|
|
DEBUG_cli_parsed(verbose, parsed);
|
|
const struct pid_tid id = get_server_pid_tid();
|
|
cli_server_details(context, &id);
|
|
/* Not running, nothing to stop */
|
|
if (id.pid <= 0)
|
|
return 1;
|
|
INFOF("Stopping server (pid=%d)", id.pid);
|
|
/* Set the stop file and signal the process */
|
|
unsigned tries = 0;
|
|
pid_t running = id.pid;
|
|
while (running == id.pid) {
|
|
if (tries >= 5) {
|
|
WHYF("Servald pid=%d (pidfile=%s) did not stop after %d SIGHUP signals",
|
|
id.pid, server_pidfile_path(), tries);
|
|
return 253;
|
|
}
|
|
++tries;
|
|
switch (send_signal(&id, SIGHUP)) {
|
|
case -1:
|
|
return 252;
|
|
case 0: {
|
|
// Allow a few seconds for the process to die.
|
|
time_ms_t timeout = gettime_ms() + 2000;
|
|
do
|
|
sleep_ms(200); // 5 Hz
|
|
while ((running = get_server_pid_tid().pid) == id.pid && gettime_ms() < timeout);
|
|
}
|
|
break;
|
|
default:
|
|
// Process no longer exists. DO NOT call serverCleanUp() (once used to!) because that would
|
|
// race with a starting server process.
|
|
running = 0;
|
|
break;
|
|
}
|
|
}
|
|
cli_field_name(context, "tries", ":");
|
|
cli_put_long(context, tries, "\n");
|
|
return 0;
|
|
}
|
|
|
|
DEFINE_CMD(app_server_status,CLIFLAG_PERMISSIVE_CONFIG,
|
|
"Display information about running daemon.",
|
|
"status");
|
|
static int app_server_status(const struct cli_parsed *parsed, struct cli_context *context)
|
|
{
|
|
DEBUG_cli_parsed(verbose, parsed);
|
|
const struct pid_tid id = get_server_pid_tid();
|
|
cli_server_details(context, &id);
|
|
return id.pid > 0 ? 0 : 1;
|
|
}
|