X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdaemon.c;h=a9cada86646ddae0d6b9acbc98fea54c1d596238;hb=00c08589876b7c1cd8f57e5ebb3e66bb164c5a3d;hp=9a1be55dbfc868d14274a138e9bd28c1491dc521;hpb=b8781ff08d9981258e75789c6f4ed18a56991577;p=sliver-openvswitch.git diff --git a/lib/daemon.c b/lib/daemon.c index 9a1be55db..a9cada866 100644 --- a/lib/daemon.c +++ b/lib/daemon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,43 +18,57 @@ #include "daemon.h" #include #include +#include #include #include +#include #include +#include #include +#include "command-line.h" #include "fatal-signal.h" #include "dirs.h" #include "lockfile.h" +#include "process.h" #include "socket-util.h" #include "timeval.h" #include "util.h" - -#define THIS_MODULE VLM_daemon #include "vlog.h" -/* Should we run in the background? */ +VLOG_DEFINE_THIS_MODULE(daemon); + +/* --detach: Should we run in the background? */ static bool detach; -/* Name of pidfile (null if none). */ +/* --pidfile: Name of pidfile (null if none). */ static char *pidfile; -/* Create pidfile even if one already exists and is locked? */ +/* Device and inode of pidfile, so we can avoid reopening it. */ +static dev_t pidfile_dev; +static ino_t pidfile_ino; + +/* --overwrite-pidfile: Create pidfile even if one already exists and is + locked? */ static bool overwrite_pidfile; -/* Should we chdir to "/"? */ +/* --no-chdir: Should we chdir to "/"? */ static bool chdir_ = true; -/* File descriptors used by daemonize_start() and daemonize_complete(). */ -static int daemonize_fds[2]; +/* File descriptor used by daemonize_start() and daemonize_complete(). */ +static int daemonize_fd = -1; + +/* --monitor: Should a supervisory process monitor the daemon and restart it if + * it dies due to an error signal? */ +static bool monitor; /* Returns the file name that would be used for a pidfile if 'name' were * provided to set_pidfile(). The caller must free the returned string. */ char * -make_pidfile_name(const char *name) +make_pidfile_name(const char *name) { - return (!name ? xasprintf("%s/%s.pid", ovs_rundir, program_name) - : *name == '/' ? xstrdup(name) - : xasprintf("%s/%s", ovs_rundir, name)); + return (!name + ? xasprintf("%s/%s.pid", ovs_rundir(), program_name) + : abs_file_name(ovs_rundir(), name)); } /* Sets up a following call to daemonize() to create a pidfile named 'name'. @@ -93,9 +107,9 @@ is_chdir_enabled(void) return chdir_; } -/* Normally, die_if_already_running() will terminate the program with a message - * if a locked pidfile already exists. If this function is called, - * die_if_already_running() will merely log a warning. */ +/* Normally, daemonize() or damonize_start() will terminate the program with a + * message if a locked pidfile already exists. If this function is called, an + * existing pidfile will be replaced, with a warning. */ void ignore_existing_pidfile(void) { @@ -117,38 +131,28 @@ get_detach(void) return detach; } -/* If a pidfile has been configured and that pidfile already exists and is - * locked by a running process, returns the pid of the running process. - * Otherwise, returns 0. */ -static pid_t -already_running(void) +/* Sets up a following call to daemonize() to fork a supervisory process to + * monitor the daemon and restart it if it dies due to an error signal. */ +void +daemon_set_monitor(void) { - pid_t pid = 0; - if (pidfile) { - int fd = open(pidfile, O_RDWR); - if (fd >= 0) { - struct flock lck; - lck.l_type = F_WRLCK; - lck.l_whence = SEEK_SET; - lck.l_start = 0; - lck.l_len = 0; - if (fcntl(fd, F_GETLK, &lck) != -1 && lck.l_type != F_UNLCK) { - pid = lck.l_pid; - } - close(fd); - } - } - return pid; + monitor = true; } /* If a locked pidfile exists, issue a warning message and, unless * ignore_existing_pidfile() has been called, terminate the program. */ -void +static void die_if_already_running(void) { - pid_t pid = already_running(); - if (pid) { + pid_t pid; + if (!pidfile) { + return; + } + pid = read_pidfile_if_exists(pidfile); + if (pid > 0) { if (!overwrite_pidfile) { + VLOG_ERR("%s: %s already running as pid %ld, aborting", + get_pidfile(), program_name, (long int) pid); ovs_fatal(0, "%s: already running as pid %ld", get_pidfile(), (long int) pid); } else { @@ -158,9 +162,9 @@ die_if_already_running(void) } } -/* If a pidfile has been configured, creates it and stores the running process' - * pid init. Ensures that the pidfile will be deleted when the process - * exits. */ +/* If a pidfile has been configured, creates it and stores the running + * process's pid in it. Ensures that the pidfile will be deleted when the + * process exits. */ static void make_pidfile(void) { @@ -191,12 +195,21 @@ make_pidfile(void) close(fd); } else { /* Keep 'fd' open to retain the lock. */ + struct stat s; + + if (!fstat(fd, &s)) { + pidfile_dev = s.st_dev; + pidfile_ino = s.st_ino; + } else { + VLOG_ERR("%s: fstat failed: %s", + pidfile, strerror(errno)); + } } - free(text); } else { VLOG_ERR("%s: write failed: %s", tmpfile, strerror(errno)); close(fd); } + free(text); } else { VLOG_ERR("%s: fcntl failed: %s", tmpfile, strerror(errno)); close(fd); @@ -220,6 +233,190 @@ daemonize(void) daemonize_complete(); } +static pid_t +fork_and_wait_for_startup(int *fdp) +{ + int fds[2]; + pid_t pid; + + xpipe(fds); + + pid = fork(); + if (pid > 0) { + /* Running in parent process. */ + size_t bytes_read; + char c; + + close(fds[1]); + fatal_signal_fork(); + if (read_fully(fds[0], &c, 1, &bytes_read) != 0) { + int retval; + int status; + + do { + retval = waitpid(pid, &status, 0); + } while (retval == -1 && errno == EINTR); + + if (retval == pid + && WIFEXITED(status) + && WEXITSTATUS(status)) { + /* Child exited with an error. Convey the same error to + * our parent process as a courtesy. */ + exit(WEXITSTATUS(status)); + } + + VLOG_FATAL("fork child failed to signal startup (%s)", + strerror(errno)); + } + close(fds[0]); + *fdp = -1; + } else if (!pid) { + /* Running in child process. */ + close(fds[0]); + time_postfork(); + lockfile_postfork(); + *fdp = fds[1]; + } else { + VLOG_FATAL("fork failed (%s)", strerror(errno)); + } + + return pid; +} + +static void +fork_notify_startup(int fd) +{ + if (fd != -1) { + size_t bytes_written; + int error; + + error = write_fully(fd, "", 1, &bytes_written); + if (error) { + VLOG_FATAL("pipe write failed (%s)", strerror(error)); + } + + close(fd); + } +} + +static bool +should_restart(int status) +{ + if (WIFSIGNALED(status)) { + static const int error_signals[] = { + SIGABRT, SIGALRM, SIGBUS, SIGFPE, SIGILL, SIGPIPE, SIGSEGV, + SIGXCPU, SIGXFSZ + }; + + size_t i; + + for (i = 0; i < ARRAY_SIZE(error_signals); i++) { + if (error_signals[i] == WTERMSIG(status)) { + return true; + } + } + } + return false; +} + +static void +monitor_daemon(pid_t daemon_pid) +{ + /* XXX Should log daemon's stderr output at startup time. */ + const char *saved_program_name; + time_t last_restart; + char *status_msg; + int crashes; + + saved_program_name = program_name; + program_name = xasprintf("monitor(%s)", program_name); + status_msg = xstrdup("healthy"); + last_restart = TIME_MIN; + crashes = 0; + for (;;) { + int retval; + int status; + + proctitle_set("%s: monitoring pid %lu (%s)", + saved_program_name, (unsigned long int) daemon_pid, + status_msg); + + do { + retval = waitpid(daemon_pid, &status, 0); + } while (retval == -1 && errno == EINTR); + + if (retval == -1) { + VLOG_FATAL("waitpid failed (%s)", strerror(errno)); + } else if (retval == daemon_pid) { + char *s = process_status_msg(status); + if (should_restart(status)) { + free(status_msg); + status_msg = xasprintf("%d crashes: pid %lu died, %s", + ++crashes, + (unsigned long int) daemon_pid, s); + free(s); + + if (WCOREDUMP(status)) { + /* Disable further core dumps to save disk space. */ + struct rlimit r; + + r.rlim_cur = 0; + r.rlim_max = 0; + if (setrlimit(RLIMIT_CORE, &r) == -1) { + VLOG_WARN("failed to disable core dumps: %s", + strerror(errno)); + } + } + + /* Throttle restarts to no more than once every 10 seconds. */ + if (time(NULL) < last_restart + 10) { + VLOG_WARN("%s, waiting until 10 seconds since last " + "restart", status_msg); + for (;;) { + time_t now = time(NULL); + time_t wakeup = last_restart + 10; + if (now >= wakeup) { + break; + } + sleep(wakeup - now); + } + } + last_restart = time(NULL); + + VLOG_ERR("%s, restarting", status_msg); + daemon_pid = fork_and_wait_for_startup(&daemonize_fd); + if (!daemon_pid) { + break; + } + } else { + VLOG_INFO("pid %lu died, %s, exiting", + (unsigned long int) daemon_pid, s); + free(s); + exit(0); + } + } + } + free(status_msg); + + /* Running in new daemon process. */ + proctitle_restore(); + free((char *) program_name); + program_name = saved_program_name; +} + +/* Close stdin, stdout, stderr. If we're started from e.g. an SSH session, + * then this keeps us from holding that session open artificially. */ +static void +close_standard_fds(void) +{ + int null_fd = get_null_fd(); + if (null_fd >= 0) { + dup2(null_fd, STDIN_FILENO); + dup2(null_fd, STDOUT_FILENO); + dup2(null_fd, STDERR_FILENO); + } +} + /* If daemonization is configured, then starts daemonization, by forking and * returning in the child process. The parent process hangs around until the * child lets it know either that it completed startup successfully (by calling @@ -228,72 +425,55 @@ daemonize(void) void daemonize_start(void) { - if (detach) { - pid_t pid; + daemonize_fd = -1; - if (pipe(daemonize_fds) < 0) { - ovs_fatal(errno, "pipe failed"); + if (detach) { + if (fork_and_wait_for_startup(&daemonize_fd) > 0) { + /* Running in parent process. */ + exit(0); } + /* Running in daemon or monitor process. */ + } - pid = fork(); - if (pid > 0) { - /* Running in parent process. */ - char c; - - close(daemonize_fds[1]); - fatal_signal_fork(); - if (read(daemonize_fds[0], &c, 1) != 1) { - int retval; - int status; - - do { - retval = waitpid(pid, &status, 0); - } while (retval == -1 && errno == EINTR); - - if (retval == pid - && WIFEXITED(status) - && WEXITSTATUS(status)) { - /* Child exited with an error. Convey the same error to - * our parent process as a courtesy. */ - exit(WEXITSTATUS(status)); - } + if (monitor) { + int saved_daemonize_fd = daemonize_fd; + pid_t daemon_pid; - ovs_fatal(errno, "daemon child failed to signal startup"); - } - exit(0); - } else if (!pid) { - /* Running in child process. */ - close(daemonize_fds[0]); - make_pidfile(); - time_postfork(); - lockfile_postfork(); - } else { - ovs_fatal(errno, "could not fork"); + daemon_pid = fork_and_wait_for_startup(&daemonize_fd); + if (daemon_pid > 0) { + /* Running in monitor process. */ + fork_notify_startup(saved_daemonize_fd); + close_standard_fds(); + monitor_daemon(daemon_pid); } - } else { - make_pidfile(); + /* Running in daemon process. */ } + + die_if_already_running(); + make_pidfile(); + + /* Make sure that the unixctl commands for vlog get registered in a + * daemon, even before the first log message. */ + vlog_init(); } /* If daemonization is configured, then this function notifies the parent - * process that the child process has completed startup successfully. */ + * process that the child process has completed startup successfully. + * + * Calling this function more than once has no additional effect. */ void daemonize_complete(void) { - if (detach) { - size_t bytes_written; - int error; - - error = write_fully(daemonize_fds[1], "", 1, &bytes_written); - if (error) { - ovs_fatal(error, "could not write to pipe"); - } + fork_notify_startup(daemonize_fd); + daemonize_fd = -1; - close(daemonize_fds[1]); + if (detach) { setsid(); if (chdir_) { ignore(chdir("/")); } + close_standard_fds(); + detach = false; } } @@ -307,21 +487,34 @@ daemon_usage(void) " --pidfile[=FILE] create pidfile (default: %s/%s.pid)\n" " --overwrite-pidfile with --pidfile, start even if already " "running\n", - ovs_rundir, program_name); + ovs_rundir(), program_name); } -/* Opens and reads a PID from 'pidfile'. Returns the nonnegative PID if - * successful, otherwise a negative errno value. */ -pid_t -read_pidfile(const char *pidfile) +static pid_t +read_pidfile__(const char *pidfile, bool must_exist) { char line[128]; struct flock lck; + struct stat s; FILE *file; int error; + if ((pidfile_ino || pidfile_dev) + && !stat(pidfile, &s) + && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) { + /* It's our own pidfile. We can't afford to open it, because closing + * *any* fd for a file that a process has locked also releases all the + * locks on that file. + * + * Fortunately, we know the associated pid anyhow: */ + return getpid(); + } + file = fopen(pidfile, "r"); if (!file) { + if (errno == ENOENT && !must_exist) { + return 0; + } error = errno; VLOG_WARN("%s: open: %s", pidfile, strerror(error)); goto error; @@ -331,6 +524,7 @@ read_pidfile(const char *pidfile) lck.l_whence = SEEK_SET; lck.l_start = 0; lck.l_len = 0; + lck.l_pid = 0; if (fcntl(fileno(file), F_GETLK, &lck)) { error = errno; VLOG_WARN("%s: fcntl: %s", pidfile, strerror(error)); @@ -369,3 +563,21 @@ error: } return -error; } + +/* Opens and reads a PID from 'pidfile'. Returns the positive PID if + * successful, otherwise a negative errno value. */ +pid_t +read_pidfile(const char *pidfile) +{ + return read_pidfile__(pidfile, true); +} + + +/* Opens and reads a PID from 'pidfile', if it exists. Returns 0 if 'pidfile' + * doesn't exist, the positive PID if successful, otherwise a negative errno + * value. */ +pid_t +read_pidfile_if_exists(const char *pidfile) +{ + return read_pidfile__(pidfile, false); +}