X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fdaemon.c;h=3c1e5c3b7817a63dfa6e0dffd8a290960868f999;hb=10a89ef04df5669c5cdd02f786150a7ab8454e01;hp=e78538cb9113e9ac4742c8cd2c43b5b47ca24825;hpb=ac718c9dbde6340a85d18c5c8d555d8e0ec88bb3;p=sliver-openvswitch.git diff --git a/lib/daemon.c b/lib/daemon.c index e78538cb9..3c1e5c3b7 100644 --- a/lib/daemon.c +++ b/lib/daemon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,38 +18,66 @@ #include "daemon.h" #include #include +#include #include #include +#include +#include +#include #include +#include "command-line.h" #include "fatal-signal.h" #include "dirs.h" #include "lockfile.h" +#include "ovs-thread.h" +#include "process.h" +#include "socket-util.h" #include "timeval.h" #include "util.h" - -#define THIS_MODULE VLM_daemon #include "vlog.h" -/* Should we run in the background? */ -static bool detach; +VLOG_DEFINE_THIS_MODULE(daemon); + +/* --detach: Should we run in the background? */ +static bool detach; /* Was --detach specified? */ +static bool detached; /* Have we already detached? */ -/* Name of pidfile (null if none). */ +/* --pidfile: Name of pidfile (null if none). */ static char *pidfile; -/* Create pidfile even if one already exists and is locked? */ +/* Device and inode of pidfile, so we can avoid reopening it. */ +static dev_t pidfile_dev; +static ino_t pidfile_ino; + +/* --overwrite-pidfile: Create pidfile even if one already exists and is + locked? */ static bool overwrite_pidfile; -/* Should we chdir to "/". */ +/* --no-chdir: Should we chdir to "/"? */ static bool chdir_ = true; +/* File descriptor used by daemonize_start() and daemonize_complete(). */ +static int daemonize_fd = -1; + +/* --monitor: Should a supervisory process monitor the daemon and restart it if + * it dies due to an error signal? */ +static bool monitor; + +/* For each of the standard file descriptors, whether to replace it by + * /dev/null (if false) or keep it for the daemon to use (if true). */ +static bool save_fds[3]; + +static void check_already_running(void); +static int lock_pidfile(FILE *, int command); + /* Returns the file name that would be used for a pidfile if 'name' were * provided to set_pidfile(). The caller must free the returned string. */ char * -make_pidfile_name(const char *name) +make_pidfile_name(const char *name) { - return (!name ? xasprintf("%s/%s.pid", ovs_rundir, program_name) - : *name == '/' ? xstrdup(name) - : xasprintf("%s/%s", ovs_rundir, name)); + return (!name + ? xasprintf("%s/%s.pid", ovs_rundir(), program_name) + : abs_file_name(ovs_rundir(), name)); } /* Sets up a following call to daemonize() to create a pidfile named 'name'. @@ -61,6 +89,7 @@ make_pidfile_name(const char *name) void set_pidfile(const char *name) { + assert_single_threaded(); free(pidfile); pidfile = make_pidfile_name(name); } @@ -81,9 +110,16 @@ set_no_chdir(void) chdir_ = false; } -/* Normally, die_if_already_running() will terminate the program with a message - * if a locked pidfile already exists. If this function is called, - * die_if_already_running() will merely log a warning. */ +/* Will we chdir to "/" as part of daemonizing? */ +bool +is_chdir_enabled(void) +{ + return chdir_; +} + +/* Normally, daemonize() or damonize_start() will terminate the program with a + * message if a locked pidfile already exists. If this function is called, an + * existing pidfile will be replaced, with a warning. */ void ignore_existing_pidfile(void) { @@ -98,144 +134,448 @@ set_detach(void) detach = true; } -/* If a pidfile has been configured and that pidfile already exists and is - * locked by a running process, returns the pid of the running process. - * Otherwise, returns 0. */ -static pid_t -already_running(void) +/* Will daemonize() really detach? */ +bool +get_detach(void) +{ + return detach; +} + +/* Sets up a following call to daemonize() to fork a supervisory process to + * monitor the daemon and restart it if it dies due to an error signal. */ +void +daemon_set_monitor(void) +{ + monitor = true; +} + +/* A daemon doesn't normally have any use for the file descriptors for stdin, + * stdout, and stderr after it detaches. To keep these file descriptors from + * e.g. holding an SSH session open, by default detaching replaces each of + * these file descriptors by /dev/null. But a few daemons expect the user to + * redirect stdout or stderr to a file, in which case it is desirable to keep + * these file descriptors. This function, therefore, disables replacing 'fd' + * by /dev/null when the daemon detaches. */ +void +daemon_save_fd(int fd) +{ + ovs_assert(fd == STDIN_FILENO || + fd == STDOUT_FILENO || + fd == STDERR_FILENO); + save_fds[fd] = true; +} + +/* Unregisters pidfile from being unlinked when the program terminates via +* exit() or a fatal signal. */ +void +remove_pidfile_from_unlink(void) { - pid_t pid = 0; if (pidfile) { - int fd = open(pidfile, O_RDWR); - if (fd >= 0) { - struct flock lck; - lck.l_type = F_WRLCK; - lck.l_whence = SEEK_SET; - lck.l_start = 0; - lck.l_len = 0; - if (fcntl(fd, F_GETLK, &lck) != -1 && lck.l_type != F_UNLCK) { - pid = lck.l_pid; - } - close(fd); - } + fatal_signal_remove_file_to_unlink(pidfile); } - return pid; } -/* If a locked pidfile exists, issue a warning message and, unless - * ignore_existing_pidfile() has been called, terminate the program. */ +/* Registers pidfile to be unlinked when the program terminates via exit() or a + * fatal signal. */ void -die_if_already_running(void) +add_pidfile_to_unlink(void) { - pid_t pid = already_running(); - if (pid) { - if (!overwrite_pidfile) { - ovs_fatal(0, "%s: already running as pid %ld", - get_pidfile(), (long int) pid); - } else { - VLOG_WARN("%s: %s already running as pid %ld", - get_pidfile(), program_name, (long int) pid); - } + if (pidfile) { + fatal_signal_add_file_to_unlink(pidfile); } } -/* If a pidfile has been configured, creates it and stores the running process' - * pid init. Ensures that the pidfile will be deleted when the process - * exits. */ +/* If a pidfile has been configured, creates it and stores the running + * process's pid in it. Ensures that the pidfile will be deleted when the + * process exits. */ static void make_pidfile(void) { - if (pidfile) { - /* Create pidfile via temporary file, so that observers never see an - * empty pidfile or an unlocked pidfile. */ - long int pid = getpid(); - char *tmpfile; - int fd; + long int pid = getpid(); + struct stat s; + char *tmpfile; + FILE *file; + int error; + /* Create a temporary pidfile. */ + if (overwrite_pidfile) { tmpfile = xasprintf("%s.tmp%ld", pidfile, pid); fatal_signal_add_file_to_unlink(tmpfile); - fd = open(tmpfile, O_CREAT | O_WRONLY | O_TRUNC, 0666); - if (fd >= 0) { - struct flock lck; - lck.l_type = F_WRLCK; - lck.l_whence = SEEK_SET; - lck.l_start = 0; - lck.l_len = 0; - if (fcntl(fd, F_SETLK, &lck) != -1) { - char *text = xasprintf("%ld\n", pid); - if (write(fd, text, strlen(text)) == strlen(text)) { - fatal_signal_add_file_to_unlink(pidfile); - if (rename(tmpfile, pidfile) < 0) { - VLOG_ERR("failed to rename \"%s\" to \"%s\": %s", - tmpfile, pidfile, strerror(errno)); - fatal_signal_remove_file_to_unlink(pidfile); - close(fd); - } else { - /* Keep 'fd' open to retain the lock. */ - } - free(text); + } else { + /* Everyone shares the same file which will be treated as a lock. To + * avoid some uncomfortable race conditions, we can't set up the fatal + * signal unlink until we've acquired it. */ + tmpfile = xasprintf("%s.tmp", pidfile); + } + + file = fopen(tmpfile, "a+"); + if (!file) { + VLOG_FATAL("%s: create failed (%s)", tmpfile, ovs_strerror(errno)); + } + + error = lock_pidfile(file, F_SETLK); + if (error) { + /* Looks like we failed to acquire the lock. Note that, if we failed + * for some other reason (and '!overwrite_pidfile'), we will have + * left 'tmpfile' as garbage in the file system. */ + VLOG_FATAL("%s: fcntl(F_SETLK) failed (%s)", tmpfile, + ovs_strerror(error)); + } + + if (!overwrite_pidfile) { + /* We acquired the lock. Make sure to clean up on exit, and verify + * that we're allowed to create the actual pidfile. */ + fatal_signal_add_file_to_unlink(tmpfile); + check_already_running(); + } + + if (fstat(fileno(file), &s) == -1) { + VLOG_FATAL("%s: fstat failed (%s)", tmpfile, ovs_strerror(errno)); + } + + if (ftruncate(fileno(file), 0) == -1) { + VLOG_FATAL("%s: truncate failed (%s)", tmpfile, ovs_strerror(errno)); + } + + fprintf(file, "%ld\n", pid); + if (fflush(file) == EOF) { + VLOG_FATAL("%s: write failed (%s)", tmpfile, ovs_strerror(errno)); + } + + error = rename(tmpfile, pidfile); + + /* Due to a race, 'tmpfile' may be owned by a different process, so we + * shouldn't delete it on exit. */ + fatal_signal_remove_file_to_unlink(tmpfile); + + if (error < 0) { + VLOG_FATAL("failed to rename \"%s\" to \"%s\" (%s)", + tmpfile, pidfile, ovs_strerror(errno)); + } + + /* Ensure that the pidfile will get deleted on exit. */ + fatal_signal_add_file_to_unlink(pidfile); + + /* Clean up. + * + * We don't close 'file' because its file descriptor must remain open to + * hold the lock. */ + pidfile_dev = s.st_dev; + pidfile_ino = s.st_ino; + free(tmpfile); +} + +/* If configured with set_pidfile() or set_detach(), creates the pid file and + * detaches from the foreground session. */ +void +daemonize(void) +{ + daemonize_start(); + daemonize_complete(); +} + +/* Calls fork() and on success returns its return value. On failure, logs an + * error and exits unsuccessfully. + * + * Post-fork, but before returning, this function calls a few other functions + * that are generally useful if the child isn't planning to exec a new + * process. */ +pid_t +fork_and_clean_up(void) +{ + pid_t pid = xfork(); + if (pid > 0) { + /* Running in parent process. */ + fatal_signal_fork(); + } else if (!pid) { + /* Running in child process. */ + time_postfork(); + lockfile_postfork(); + } + return pid; +} + +/* Forks, then: + * + * - In the parent, waits for the child to signal that it has completed its + * startup sequence. Then stores -1 in '*fdp' and returns the child's pid. + * + * - In the child, stores a fd in '*fdp' and returns 0. The caller should + * pass the fd to fork_notify_startup() after it finishes its startup + * sequence. + * + * If something goes wrong with the fork, logs a critical error and aborts the + * process. */ +static pid_t +fork_and_wait_for_startup(int *fdp) +{ + int fds[2]; + pid_t pid; + + xpipe(fds); + + pid = fork_and_clean_up(); + if (pid > 0) { + /* Running in parent process. */ + size_t bytes_read; + char c; + + close(fds[1]); + if (read_fully(fds[0], &c, 1, &bytes_read) != 0) { + int retval; + int status; + + do { + retval = waitpid(pid, &status, 0); + } while (retval == -1 && errno == EINTR); + + if (retval == pid) { + if (WIFEXITED(status) && WEXITSTATUS(status)) { + /* Child exited with an error. Convey the same error + * to our parent process as a courtesy. */ + exit(WEXITSTATUS(status)); } else { - VLOG_ERR("%s: write failed: %s", tmpfile, strerror(errno)); - close(fd); + char *status_msg = process_status_msg(status); + VLOG_FATAL("fork child died before signaling startup (%s)", + status_msg); } + } else if (retval < 0) { + VLOG_FATAL("waitpid failed (%s)", ovs_strerror(errno)); } else { - VLOG_ERR("%s: fcntl failed: %s", tmpfile, strerror(errno)); - close(fd); + NOT_REACHED(); } - } else { - VLOG_ERR("%s: create failed: %s", tmpfile, strerror(errno)); } - fatal_signal_remove_file_to_unlink(tmpfile); - free(tmpfile); + close(fds[0]); + *fdp = -1; + } else if (!pid) { + /* Running in child process. */ + close(fds[0]); + *fdp = fds[1]; } - free(pidfile); - pidfile = NULL; + + return pid; } -/* If configured with set_pidfile() or set_detach(), creates the pid file and - * detaches from the foreground session. */ -void -daemonize(void) +static void +fork_notify_startup(int fd) { - if (detach) { - char c = 0; - int fds[2]; - if (pipe(fds) < 0) { - ovs_fatal(errno, "pipe failed"); + if (fd != -1) { + size_t bytes_written; + int error; + + error = write_fully(fd, "", 1, &bytes_written); + if (error) { + VLOG_FATAL("pipe write failed (%s)", ovs_strerror(error)); } - switch (fork()) { - default: - /* Parent process: wait for child to create pidfile, then exit. */ - close(fds[1]); - fatal_signal_fork(); - if (read(fds[0], &c, 1) != 1) { - ovs_fatal(errno, "daemon child failed to signal startup"); + close(fd); + } +} + +static bool +should_restart(int status) +{ + if (WIFSIGNALED(status)) { + static const int error_signals[] = { + SIGABRT, SIGALRM, SIGBUS, SIGFPE, SIGILL, SIGPIPE, SIGSEGV, + SIGXCPU, SIGXFSZ + }; + + size_t i; + + for (i = 0; i < ARRAY_SIZE(error_signals); i++) { + if (error_signals[i] == WTERMSIG(status)) { + return true; } - exit(0); + } + } + return false; +} + +static void +monitor_daemon(pid_t daemon_pid) +{ + /* XXX Should log daemon's stderr output at startup time. */ + time_t last_restart; + char *status_msg; + int crashes; - case 0: - /* Child process. */ - close(fds[0]); - make_pidfile(); - write(fds[1], &c, 1); - close(fds[1]); - setsid(); - if (chdir_) { - chdir("/"); + subprogram_name = "monitor"; + status_msg = xstrdup("healthy"); + last_restart = TIME_MIN; + crashes = 0; + for (;;) { + int retval; + int status; + + proctitle_set("monitoring pid %lu (%s)", + (unsigned long int) daemon_pid, status_msg); + + do { + retval = waitpid(daemon_pid, &status, 0); + } while (retval == -1 && errno == EINTR); + + if (retval == -1) { + VLOG_FATAL("waitpid failed (%s)", ovs_strerror(errno)); + } else if (retval == daemon_pid) { + char *s = process_status_msg(status); + if (should_restart(status)) { + free(status_msg); + status_msg = xasprintf("%d crashes: pid %lu died, %s", + ++crashes, + (unsigned long int) daemon_pid, s); + free(s); + + if (WCOREDUMP(status)) { + /* Disable further core dumps to save disk space. */ + struct rlimit r; + + r.rlim_cur = 0; + r.rlim_max = 0; + if (setrlimit(RLIMIT_CORE, &r) == -1) { + VLOG_WARN("failed to disable core dumps: %s", + ovs_strerror(errno)); + } + } + + /* Throttle restarts to no more than once every 10 seconds. */ + if (time(NULL) < last_restart + 10) { + VLOG_WARN("%s, waiting until 10 seconds since last " + "restart", status_msg); + for (;;) { + time_t now = time(NULL); + time_t wakeup = last_restart + 10; + if (now >= wakeup) { + break; + } + sleep(wakeup - now); + } + } + last_restart = time(NULL); + + VLOG_ERR("%s, restarting", status_msg); + daemon_pid = fork_and_wait_for_startup(&daemonize_fd); + if (!daemon_pid) { + break; + } + } else { + VLOG_INFO("pid %lu died, %s, exiting", + (unsigned long int) daemon_pid, s); + free(s); + exit(0); } - time_postfork(); - lockfile_postfork(); - break; - - case -1: - /* Error. */ - ovs_fatal(errno, "could not fork"); - break; } - } else { + } + free(status_msg); + + /* Running in new daemon process. */ + proctitle_restore(); + subprogram_name = ""; +} + +/* Close standard file descriptors (except any that the client has requested we + * leave open by calling daemon_save_fd()). If we're started from e.g. an SSH + * session, then this keeps us from holding that session open artificially. */ +static void +close_standard_fds(void) +{ + int null_fd = get_null_fd(); + if (null_fd >= 0) { + int fd; + + for (fd = 0; fd < 3; fd++) { + if (!save_fds[fd]) { + dup2(null_fd, fd); + } + } + } + + /* Disable logging to stderr to avoid wasting CPU time. */ + vlog_set_levels(NULL, VLF_CONSOLE, VLL_OFF); +} + +/* If daemonization is configured, then starts daemonization, by forking and + * returning in the child process. The parent process hangs around until the + * child lets it know either that it completed startup successfully (by calling + * daemon_complete()) or that it failed to start up (by exiting with a nonzero + * exit code). */ +void +daemonize_start(void) +{ + assert_single_threaded(); + daemonize_fd = -1; + + if (detach) { + if (fork_and_wait_for_startup(&daemonize_fd) > 0) { + /* Running in parent process. */ + exit(0); + } + + /* Running in daemon or monitor process. */ + setsid(); + } + + if (monitor) { + int saved_daemonize_fd = daemonize_fd; + pid_t daemon_pid; + + daemon_pid = fork_and_wait_for_startup(&daemonize_fd); + if (daemon_pid > 0) { + /* Running in monitor process. */ + fork_notify_startup(saved_daemonize_fd); + close_standard_fds(); + monitor_daemon(daemon_pid); + } + /* Running in daemon process. */ + } + + if (pidfile) { make_pidfile(); } + + /* Make sure that the unixctl commands for vlog get registered in a + * daemon, even before the first log message. */ + vlog_init(); +} + +/* If daemonization is configured, then this function notifies the parent + * process that the child process has completed startup successfully. It also + * call daemonize_post_detach(). + * + * Calling this function more than once has no additional effect. */ +void +daemonize_complete(void) +{ + if (pidfile) { + free(pidfile); + pidfile = NULL; + } + + if (!detached) { + detached = true; + + fork_notify_startup(daemonize_fd); + daemonize_fd = -1; + daemonize_post_detach(); + } +} + +/* If daemonization is configured, then this function does traditional Unix + * daemonization behavior: join a new session, chdir to the root (if not + * disabled), and close the standard file descriptors. + * + * It only makes sense to call this function as part of an implementation of a + * special daemon subprocess. A normal daemon should just call + * daemonize_complete(). */ +void +daemonize_post_detach(void) +{ + if (detach) { + if (chdir_) { + ignore(chdir("/")); + } + close_standard_fds(); + } } void @@ -248,45 +588,117 @@ daemon_usage(void) " --pidfile[=FILE] create pidfile (default: %s/%s.pid)\n" " --overwrite-pidfile with --pidfile, start even if already " "running\n", - ovs_rundir, program_name); + ovs_rundir(), program_name); } -/* Opens and reads a PID from 'pidfile'. Returns the nonnegative PID if - * successful, otherwise a negative errno value. */ -pid_t -read_pidfile(const char *pidfile) +static int +lock_pidfile__(FILE *file, int command, struct flock *lck) +{ + int error; + + lck->l_type = F_WRLCK; + lck->l_whence = SEEK_SET; + lck->l_start = 0; + lck->l_len = 0; + lck->l_pid = 0; + + do { + error = fcntl(fileno(file), command, lck) == -1 ? errno : 0; + } while (error == EINTR); + return error; +} + +static int +lock_pidfile(FILE *file, int command) { - char line[128]; struct flock lck; + + return lock_pidfile__(file, command, &lck); +} + +static pid_t +read_pidfile__(const char *pidfile, bool delete_if_stale) +{ + struct stat s, s2; + struct flock lck; + char line[128]; FILE *file; int error; - file = fopen(pidfile, "r"); + if ((pidfile_ino || pidfile_dev) + && !stat(pidfile, &s) + && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) { + /* It's our own pidfile. We can't afford to open it, because closing + * *any* fd for a file that a process has locked also releases all the + * locks on that file. + * + * Fortunately, we know the associated pid anyhow: */ + return getpid(); + } + + file = fopen(pidfile, "r+"); if (!file) { + if (errno == ENOENT && delete_if_stale) { + return 0; + } error = errno; - VLOG_WARN("%s: open: %s", pidfile, strerror(error)); + VLOG_WARN("%s: open: %s", pidfile, ovs_strerror(error)); goto error; } - lck.l_type = F_WRLCK; - lck.l_whence = SEEK_SET; - lck.l_start = 0; - lck.l_len = 0; - if (fcntl(fileno(file), F_GETLK, &lck)) { - error = errno; - VLOG_WARN("%s: fcntl: %s", pidfile, strerror(error)); + error = lock_pidfile__(file, F_GETLK, &lck); + if (error) { + VLOG_WARN("%s: fcntl: %s", pidfile, ovs_strerror(error)); goto error; } if (lck.l_type == F_UNLCK) { - error = ESRCH; - VLOG_WARN("%s: pid file is not locked", pidfile); - goto error; + /* pidfile exists but it isn't locked by anyone. We need to delete it + * so that a new pidfile can go in its place. But just calling + * unlink(pidfile) makes a nasty race: what if someone else unlinks it + * before we do and then replaces it by a valid pidfile? We'd unlink + * their valid pidfile. We do a little dance to avoid the race, by + * locking the invalid pidfile. Only one process can have the invalid + * pidfile locked, and only that process has the right to unlink it. */ + if (!delete_if_stale) { + error = ESRCH; + VLOG_DBG("%s: pid file is stale", pidfile); + goto error; + } + + /* Get the lock. */ + error = lock_pidfile(file, F_SETLK); + if (error) { + /* We lost a race with someone else doing the same thing. */ + VLOG_WARN("%s: lost race to lock pidfile", pidfile); + goto error; + } + + /* Is the file we have locked still named 'pidfile'? */ + if (stat(pidfile, &s) || fstat(fileno(file), &s2) + || s.st_ino != s2.st_ino || s.st_dev != s2.st_dev) { + /* No. We lost a race with someone else who got the lock before + * us, deleted the pidfile, and closed it (releasing the lock). */ + error = EALREADY; + VLOG_WARN("%s: lost race to delete pidfile", pidfile); + goto error; + } + + /* We won the right to delete the stale pidfile. */ + if (unlink(pidfile)) { + error = errno; + VLOG_WARN("%s: failed to delete stale pidfile (%s)", + pidfile, ovs_strerror(error)); + goto error; + } + VLOG_DBG("%s: deleted stale pidfile", pidfile); + fclose(file); + return 0; } if (!fgets(line, sizeof line, file)) { if (ferror(file)) { error = errno; - VLOG_WARN("%s: read: %s", pidfile, strerror(error)); + VLOG_WARN("%s: read: %s", pidfile, ovs_strerror(error)); } else { error = ESRCH; VLOG_WARN("%s: read: unexpected end of file", pidfile); @@ -295,9 +707,12 @@ read_pidfile(const char *pidfile) } if (lck.l_pid != strtoul(line, NULL, 10)) { + /* The process that has the pidfile locked is not the process that + * created it. It must be stale, with the process that has it locked + * preparing to delete it. */ error = ESRCH; - VLOG_WARN("l_pid (%ld) != %s pid (%s)", - (long int) lck.l_pid, pidfile, line); + VLOG_WARN("%s: stale pidfile for pid %s being deleted by pid %ld", + pidfile, line, (long int) lck.l_pid); goto error; } @@ -310,3 +725,25 @@ error: } return -error; } + +/* Opens and reads a PID from 'pidfile'. Returns the positive PID if + * successful, otherwise a negative errno value. */ +pid_t +read_pidfile(const char *pidfile) +{ + return read_pidfile__(pidfile, false); +} + +/* Checks whether a process with the given 'pidfile' is already running and, + * if so, aborts. If 'pidfile' is stale, deletes it. */ +static void +check_already_running(void) +{ + long int pid = read_pidfile__(pidfile, true); + if (pid > 0) { + VLOG_FATAL("%s: already running as pid %ld, aborting", pidfile, pid); + } else if (pid < 0) { + VLOG_FATAL("%s: pidfile check failed (%s), aborting", + pidfile, ovs_strerror(-pid)); + } +}