X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdaemon.c;h=64e2f9e9ab769229719f82057c84299952ef9e56;hb=a328a943f173391cd9a9a54e257c8dabcd463402;hp=1e3f0029514ee474b7e07b07eb5696e593fd1759;hpb=e7bd7d78b1e1dbc2162f991374c7889c7d7bf60c;p=sliver-openvswitch.git diff --git a/lib/daemon.c b/lib/daemon.c index 1e3f00295..64e2f9e9a 100644 --- a/lib/daemon.c +++ b/lib/daemon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,36 +18,57 @@ #include "daemon.h" #include #include +#include #include #include +#include +#include +#include #include +#include "command-line.h" #include "fatal-signal.h" #include "dirs.h" +#include "lockfile.h" +#include "process.h" +#include "socket-util.h" +#include "timeval.h" #include "util.h" - -#define THIS_MODULE VLM_daemon #include "vlog.h" -/* Should we run in the background? */ +VLOG_DEFINE_THIS_MODULE(daemon); + +/* --detach: Should we run in the background? */ static bool detach; -/* Name of pidfile (null if none). */ +/* --pidfile: Name of pidfile (null if none). */ static char *pidfile; -/* Create pidfile even if one already exists and is locked? */ +/* Device and inode of pidfile, so we can avoid reopening it. */ +static dev_t pidfile_dev; +static ino_t pidfile_ino; + +/* --overwrite-pidfile: Create pidfile even if one already exists and is + locked? */ static bool overwrite_pidfile; -/* Should we chdir to "/". */ +/* --no-chdir: Should we chdir to "/"? */ static bool chdir_ = true; +/* File descriptor used by daemonize_start() and daemonize_complete(). */ +static int daemonize_fd = -1; + +/* --monitor: Should a supervisory process monitor the daemon and restart it if + * it dies due to an error signal? */ +static bool monitor; + /* Returns the file name that would be used for a pidfile if 'name' were * provided to set_pidfile(). The caller must free the returned string. */ char * -make_pidfile_name(const char *name) +make_pidfile_name(const char *name) { - return (!name ? xasprintf("%s/%s.pid", ovs_rundir, program_name) - : *name == '/' ? xstrdup(name) - : xasprintf("%s/%s", ovs_rundir, name)); + return (!name + ? xasprintf("%s/%s.pid", ovs_rundir(), program_name) + : abs_file_name(ovs_rundir(), name)); } /* Sets up a following call to daemonize() to create a pidfile named 'name'. @@ -79,6 +100,13 @@ set_no_chdir(void) chdir_ = false; } +/* Will we chdir to "/" as part of daemonizing? */ +bool +is_chdir_enabled(void) +{ + return chdir_; +} + /* Normally, die_if_already_running() will terminate the program with a message * if a locked pidfile already exists. If this function is called, * die_if_already_running() will merely log a warning. */ @@ -96,28 +124,19 @@ set_detach(void) detach = true; } -/* If a pidfile has been configured and that pidfile already exists and is - * locked by a running process, returns the pid of the running process. - * Otherwise, returns 0. */ -static pid_t -already_running(void) +/* Will daemonize() really detach? */ +bool +get_detach(void) { - pid_t pid = 0; - if (pidfile) { - int fd = open(pidfile, O_RDWR); - if (fd >= 0) { - struct flock lck; - lck.l_type = F_WRLCK; - lck.l_whence = SEEK_SET; - lck.l_start = 0; - lck.l_len = 0; - if (fcntl(fd, F_GETLK, &lck) != -1 && lck.l_type != F_UNLCK) { - pid = lck.l_pid; - } - close(fd); - } - } - return pid; + return detach; +} + +/* Sets up a following call to daemonize() to fork a supervisory process to + * monitor the daemon and restart it if it dies due to an error signal. */ +void +daemon_set_monitor(void) +{ + monitor = true; } /* If a locked pidfile exists, issue a warning message and, unless @@ -125,9 +144,15 @@ already_running(void) void die_if_already_running(void) { - pid_t pid = already_running(); - if (pid) { + pid_t pid; + if (!pidfile) { + return; + } + pid = read_pidfile_if_exists(pidfile); + if (pid > 0) { if (!overwrite_pidfile) { + VLOG_ERR("%s: %s already running as pid %ld, aborting", + get_pidfile(), program_name, (long int) pid); ovs_fatal(0, "%s: already running as pid %ld", get_pidfile(), (long int) pid); } else { @@ -137,9 +162,9 @@ die_if_already_running(void) } } -/* If a pidfile has been configured, creates it and stores the running process' - * pid init. Ensures that the pidfile will be deleted when the process - * exits. */ +/* If a pidfile has been configured, creates it and stores the running + * process's pid in it. Ensures that the pidfile will be deleted when the + * process exits. */ static void make_pidfile(void) { @@ -170,12 +195,21 @@ make_pidfile(void) close(fd); } else { /* Keep 'fd' open to retain the lock. */ + struct stat s; + + if (!fstat(fd, &s)) { + pidfile_dev = s.st_dev; + pidfile_ino = s.st_ino; + } else { + VLOG_ERR("%s: fstat failed: %s", + pidfile, strerror(errno)); + } } - free(text); } else { VLOG_ERR("%s: write failed: %s", tmpfile, strerror(errno)); close(fd); } + free(text); } else { VLOG_ERR("%s: fcntl failed: %s", tmpfile, strerror(errno)); close(fd); @@ -195,42 +229,249 @@ make_pidfile(void) void daemonize(void) { - if (detach) { - char c = 0; - int fds[2]; - if (pipe(fds) < 0) { - ovs_fatal(errno, "pipe failed"); + daemonize_start(); + daemonize_complete(); +} + +static pid_t +fork_and_wait_for_startup(int *fdp) +{ + int fds[2]; + pid_t pid; + + xpipe(fds); + + pid = fork(); + if (pid > 0) { + /* Running in parent process. */ + char c; + + close(fds[1]); + fatal_signal_fork(); + if (read(fds[0], &c, 1) != 1) { + int retval; + int status; + + do { + retval = waitpid(pid, &status, 0); + } while (retval == -1 && errno == EINTR); + + if (retval == pid + && WIFEXITED(status) + && WEXITSTATUS(status)) { + /* Child exited with an error. Convey the same error to + * our parent process as a courtesy. */ + exit(WEXITSTATUS(status)); + } + + VLOG_FATAL("fork child failed to signal startup (%s)", + strerror(errno)); } + close(fds[0]); + *fdp = -1; + } else if (!pid) { + /* Running in child process. */ + close(fds[0]); + time_postfork(); + lockfile_postfork(); + *fdp = fds[1]; + } else { + VLOG_FATAL("fork failed (%s)", strerror(errno)); + } + + return pid; +} + +static void +fork_notify_startup(int fd) +{ + if (fd != -1) { + size_t bytes_written; + int error; - switch (fork()) { - default: - /* Parent process: wait for child to create pidfile, then exit. */ - close(fds[1]); - fatal_signal_fork(); - if (read(fds[0], &c, 1) != 1) { - ovs_fatal(errno, "daemon child failed to signal startup"); + error = write_fully(fd, "", 1, &bytes_written); + if (error) { + VLOG_FATAL("pipe write failed (%s)", strerror(error)); + } + + close(fd); + } +} + +static bool +should_restart(int status) +{ + if (WIFSIGNALED(status)) { + static const int error_signals[] = { + SIGABRT, SIGALRM, SIGBUS, SIGFPE, SIGILL, SIGPIPE, SIGSEGV, + SIGXCPU, SIGXFSZ + }; + + size_t i; + + for (i = 0; i < ARRAY_SIZE(error_signals); i++) { + if (error_signals[i] == WTERMSIG(status)) { + return true; } - exit(0); + } + } + return false; +} + +static void +monitor_daemon(pid_t daemon_pid) +{ + /* XXX Should log daemon's stderr output at startup time. */ + const char *saved_program_name; + time_t last_restart; + char *status_msg; + int crashes; - case 0: - /* Child process. */ - close(fds[0]); - make_pidfile(); - write(fds[1], &c, 1); - close(fds[1]); - setsid(); - if (chdir_) { - chdir("/"); + saved_program_name = program_name; + program_name = xasprintf("monitor(%s)", program_name); + status_msg = xstrdup("healthy"); + last_restart = TIME_MIN; + crashes = 0; + for (;;) { + int retval; + int status; + + proctitle_set("%s: monitoring pid %lu (%s)", + saved_program_name, (unsigned long int) daemon_pid, + status_msg); + + do { + retval = waitpid(daemon_pid, &status, 0); + } while (retval == -1 && errno == EINTR); + + if (retval == -1) { + VLOG_FATAL("waitpid failed (%s)", strerror(errno)); + } else if (retval == daemon_pid) { + char *s = process_status_msg(status); + if (should_restart(status)) { + free(status_msg); + status_msg = xasprintf("%d crashes: pid %lu died, %s", + ++crashes, + (unsigned long int) daemon_pid, s); + free(s); + + if (WCOREDUMP(status)) { + /* Disable further core dumps to save disk space. */ + struct rlimit r; + + r.rlim_cur = 0; + r.rlim_max = 0; + if (setrlimit(RLIMIT_CORE, &r) == -1) { + VLOG_WARN("failed to disable core dumps: %s", + strerror(errno)); + } + } + + /* Throttle restarts to no more than once every 10 seconds. */ + if (time(NULL) < last_restart + 10) { + VLOG_WARN("%s, waiting until 10 seconds since last " + "restart", status_msg); + for (;;) { + time_t now = time(NULL); + time_t wakeup = last_restart + 10; + if (now >= wakeup) { + break; + } + sleep(wakeup - now); + } + } + last_restart = time(NULL); + + VLOG_ERR("%s, restarting", status_msg); + daemon_pid = fork_and_wait_for_startup(&daemonize_fd); + if (!daemon_pid) { + break; + } + } else { + VLOG_INFO("pid %lu died, %s, exiting", + (unsigned long int) daemon_pid, s); + free(s); + exit(0); } - break; + } + } + free(status_msg); - case -1: - /* Error. */ - ovs_fatal(errno, "could not fork"); - break; + /* Running in new daemon process. */ + proctitle_restore(); + free((char *) program_name); + program_name = saved_program_name; +} + +/* Close stdin, stdout, stderr. If we're started from e.g. an SSH session, + * then this keeps us from holding that session open artificially. */ +static void +close_standard_fds(void) +{ + int null_fd = get_null_fd(); + if (null_fd >= 0) { + dup2(null_fd, STDIN_FILENO); + dup2(null_fd, STDOUT_FILENO); + dup2(null_fd, STDERR_FILENO); + } +} + +/* If daemonization is configured, then starts daemonization, by forking and + * returning in the child process. The parent process hangs around until the + * child lets it know either that it completed startup successfully (by calling + * daemon_complete()) or that it failed to start up (by exiting with a nonzero + * exit code). */ +void +daemonize_start(void) +{ + daemonize_fd = -1; + + if (detach) { + if (fork_and_wait_for_startup(&daemonize_fd) > 0) { + /* Running in parent process. */ + exit(0); } - } else { - make_pidfile(); + /* Running in daemon or monitor process. */ + } + + if (monitor) { + int saved_daemonize_fd = daemonize_fd; + pid_t daemon_pid; + + daemon_pid = fork_and_wait_for_startup(&daemonize_fd); + if (daemon_pid > 0) { + /* Running in monitor process. */ + fork_notify_startup(saved_daemonize_fd); + close_standard_fds(); + monitor_daemon(daemon_pid); + } + /* Running in daemon process. */ + } + + make_pidfile(); + + /* Make sure that the unixctl commands for vlog get registered in a + * daemon, even before the first log message. */ + vlog_init(); +} + +/* If daemonization is configured, then this function notifies the parent + * process that the child process has completed startup successfully. + * + * Calling this function more than once has no additional effect. */ +void +daemonize_complete(void) +{ + fork_notify_startup(daemonize_fd); + daemonize_fd = -1; + + if (detach) { + setsid(); + if (chdir_) { + ignore(chdir("/")); + } + close_standard_fds(); + detach = false; } } @@ -244,21 +485,34 @@ daemon_usage(void) " --pidfile[=FILE] create pidfile (default: %s/%s.pid)\n" " --overwrite-pidfile with --pidfile, start even if already " "running\n", - ovs_rundir, program_name); + ovs_rundir(), program_name); } -/* Opens and reads a PID from 'pidfile'. Returns the nonnegative PID if - * successful, otherwise a negative errno value. */ -pid_t -read_pidfile(const char *pidfile) +static pid_t +read_pidfile__(const char *pidfile, bool must_exist) { char line[128]; struct flock lck; + struct stat s; FILE *file; int error; + if ((pidfile_ino || pidfile_dev) + && !stat(pidfile, &s) + && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) { + /* It's our own pidfile. We can't afford to open it, because closing + * *any* fd for a file that a process has locked also releases all the + * locks on that file. + * + * Fortunately, we know the associated pid anyhow: */ + return getpid(); + } + file = fopen(pidfile, "r"); if (!file) { + if (errno == ENOENT && !must_exist) { + return 0; + } error = errno; VLOG_WARN("%s: open: %s", pidfile, strerror(error)); goto error; @@ -268,6 +522,7 @@ read_pidfile(const char *pidfile) lck.l_whence = SEEK_SET; lck.l_start = 0; lck.l_len = 0; + lck.l_pid = 0; if (fcntl(fileno(file), F_GETLK, &lck)) { error = errno; VLOG_WARN("%s: fcntl: %s", pidfile, strerror(error)); @@ -306,3 +561,21 @@ error: } return -error; } + +/* Opens and reads a PID from 'pidfile'. Returns the positive PID if + * successful, otherwise a negative errno value. */ +pid_t +read_pidfile(const char *pidfile) +{ + return read_pidfile__(pidfile, true); +} + + +/* Opens and reads a PID from 'pidfile', if it exists. Returns 0 if 'pidfile' + * doesn't exist, the positive PID if successful, otherwise a negative errno + * value. */ +pid_t +read_pidfile_if_exists(const char *pidfile) +{ + return read_pidfile__(pidfile, false); +}