* it dies due to an error signal? */
static bool monitor;
+static void check_already_running(void);
+static int lock_pidfile(FILE *, int command);
+
/* Returns the file name that would be used for a pidfile if 'name' were
* provided to set_pidfile(). The caller must free the returned string. */
char *
monitor = true;
}
-/* If a locked pidfile exists, issue a warning message and, unless
- * ignore_existing_pidfile() has been called, terminate the program. */
-static void
-die_if_already_running(void)
-{
- pid_t pid;
- if (!pidfile) {
- return;
- }
- pid = read_pidfile_if_exists(pidfile);
- if (pid > 0) {
- if (!overwrite_pidfile) {
- VLOG_ERR("%s: %s already running as pid %ld, aborting",
- get_pidfile(), program_name, (long int) pid);
- ovs_fatal(0, "%s: already running as pid %ld",
- get_pidfile(), (long int) pid);
- } else {
- VLOG_WARN("%s: %s already running as pid %ld",
- get_pidfile(), program_name, (long int) pid);
- }
- }
-}
-
/* If a pidfile has been configured, creates it and stores the running
* process's pid in it. Ensures that the pidfile will be deleted when the
* process exits. */
static void
make_pidfile(void)
{
- if (pidfile) {
- /* Create pidfile via temporary file, so that observers never see an
- * empty pidfile or an unlocked pidfile. */
- long int pid = getpid();
- char *tmpfile;
- int fd;
-
- tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
- fatal_signal_add_file_to_unlink(tmpfile);
- fd = open(tmpfile, O_CREAT | O_WRONLY | O_TRUNC, 0666);
- if (fd >= 0) {
- struct flock lck;
- lck.l_type = F_WRLCK;
- lck.l_whence = SEEK_SET;
- lck.l_start = 0;
- lck.l_len = 0;
- if (fcntl(fd, F_SETLK, &lck) != -1) {
- char *text = xasprintf("%ld\n", pid);
- if (write(fd, text, strlen(text)) == strlen(text)) {
- fatal_signal_add_file_to_unlink(pidfile);
- if (rename(tmpfile, pidfile) < 0) {
- VLOG_ERR("failed to rename \"%s\" to \"%s\": %s",
- tmpfile, pidfile, strerror(errno));
- fatal_signal_remove_file_to_unlink(pidfile);
- close(fd);
- } else {
- /* Keep 'fd' open to retain the lock. */
- struct stat s;
-
- if (!fstat(fd, &s)) {
- pidfile_dev = s.st_dev;
- pidfile_ino = s.st_ino;
- } else {
- VLOG_ERR("%s: fstat failed: %s",
- pidfile, strerror(errno));
- }
- }
- } else {
- VLOG_ERR("%s: write failed: %s", tmpfile, strerror(errno));
- close(fd);
- }
- free(text);
- } else {
- VLOG_ERR("%s: fcntl failed: %s", tmpfile, strerror(errno));
- close(fd);
+ long int pid = getpid();
+ struct stat s;
+ char *tmpfile;
+ FILE *file;
+ int error;
+
+ /* Create a temporary pidfile. */
+ tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
+ fatal_signal_add_file_to_unlink(tmpfile);
+ file = fopen(tmpfile, "w+");
+ if (!file) {
+ VLOG_FATAL("%s: create failed (%s)", tmpfile, strerror(errno));
+ }
+
+ if (fstat(fileno(file), &s) == -1) {
+ VLOG_FATAL("%s: fstat failed (%s)", tmpfile, strerror(errno));
+ }
+
+ fprintf(file, "%ld\n", pid);
+ if (fflush(file) == EOF) {
+ VLOG_FATAL("%s: write failed (%s)", tmpfile, strerror(errno));
+ }
+
+ error = lock_pidfile(file, F_SETLK);
+ if (error) {
+ VLOG_FATAL("%s: fcntl(F_SETLK) failed (%s)", tmpfile, strerror(error));
+ }
+
+ /* Rename or link it to the correct name. */
+ if (overwrite_pidfile) {
+ if (rename(tmpfile, pidfile) < 0) {
+ VLOG_FATAL("failed to rename \"%s\" to \"%s\" (%s)",
+ tmpfile, pidfile, strerror(errno));
+ }
+ } else {
+ do {
+ error = link(tmpfile, pidfile) == -1 ? errno : 0;
+ if (error == EEXIST) {
+ check_already_running();
}
- } else {
- VLOG_ERR("%s: create failed: %s", tmpfile, strerror(errno));
+ } while (error == EINTR || error == EEXIST);
+ if (error) {
+ VLOG_FATAL("failed to link \"%s\" as \"%s\" (%s)",
+ tmpfile, pidfile, strerror(error));
+ }
+ }
+
+ /* Ensure that the pidfile will get deleted on exit. */
+ fatal_signal_add_file_to_unlink(pidfile);
+
+ /* Delete the temporary pidfile if it still exists. */
+ if (!overwrite_pidfile) {
+ error = fatal_signal_unlink_file_now(tmpfile);
+ if (error) {
+ VLOG_FATAL("%s: unlink failed (%s)", tmpfile, strerror(error));
}
- fatal_signal_remove_file_to_unlink(tmpfile);
- free(tmpfile);
}
+
+ /* Clean up.
+ *
+ * We don't close 'file' because its file descriptor must remain open to
+ * hold the lock. */
+ pidfile_dev = s.st_dev;
+ pidfile_ino = s.st_ino;
+ free(tmpfile);
free(pidfile);
pidfile = NULL;
}
/* Running in daemon process. */
}
- die_if_already_running();
- make_pidfile();
+ if (pidfile) {
+ make_pidfile();
+ }
/* Make sure that the unixctl commands for vlog get registered in a
* daemon, even before the first log message. */
ovs_rundir(), program_name);
}
+static int
+lock_pidfile__(FILE *file, int command, struct flock *lck)
+{
+ int error;
+
+ lck->l_type = F_WRLCK;
+ lck->l_whence = SEEK_SET;
+ lck->l_start = 0;
+ lck->l_len = 0;
+ lck->l_pid = 0;
+
+ do {
+ error = fcntl(fileno(file), command, lck) == -1 ? errno : 0;
+ } while (error == EINTR);
+ return error;
+}
+
+static int
+lock_pidfile(FILE *file, int command)
+{
+ struct flock lck;
+
+ return lock_pidfile__(file, command, &lck);
+}
+
static pid_t
-read_pidfile__(const char *pidfile, bool must_exist)
+read_pidfile__(const char *pidfile, bool delete_if_stale)
{
- char line[128];
+ struct stat s, s2;
struct flock lck;
- struct stat s;
+ char line[128];
FILE *file;
int error;
return getpid();
}
- file = fopen(pidfile, "r");
+ file = fopen(pidfile, "r+");
if (!file) {
- if (errno == ENOENT && !must_exist) {
+ if (errno == ENOENT && delete_if_stale) {
return 0;
}
error = errno;
goto error;
}
- lck.l_type = F_WRLCK;
- lck.l_whence = SEEK_SET;
- lck.l_start = 0;
- lck.l_len = 0;
- lck.l_pid = 0;
- if (fcntl(fileno(file), F_GETLK, &lck)) {
- error = errno;
+ error = lock_pidfile__(file, F_GETLK, &lck);
+ if (error) {
VLOG_WARN("%s: fcntl: %s", pidfile, strerror(error));
goto error;
}
if (lck.l_type == F_UNLCK) {
- error = ESRCH;
- VLOG_WARN("%s: pid file is not locked", pidfile);
- goto error;
+ /* pidfile exists but it isn't locked by anyone. We need to delete it
+ * so that a new pidfile can go in its place. But just calling
+ * unlink(pidfile) makes a nasty race: what if someone else unlinks it
+ * before we do and then replaces it by a valid pidfile? We'd unlink
+ * their valid pidfile. We do a little dance to avoid the race, by
+ * locking the invalid pidfile. Only one process can have the invalid
+ * pidfile locked, and only that process has the right to unlink it. */
+ if (!delete_if_stale) {
+ error = ESRCH;
+ VLOG_WARN("%s: pid file is stale", pidfile);
+ goto error;
+ }
+
+ /* Get the lock. */
+ error = lock_pidfile(file, F_SETLK);
+ if (error) {
+ /* We lost a race with someone else doing the same thing. */
+ VLOG_WARN("%s: lost race to lock pidfile", pidfile);
+ goto error;
+ }
+
+ /* Is the file we have locked still named 'pidfile'? */
+ if (stat(pidfile, &s) || fstat(fileno(file), &s2)
+ || s.st_ino != s2.st_ino || s.st_dev != s2.st_dev) {
+ /* No. We lost a race with someone else who got the lock before
+ * us, deleted the pidfile, and closed it (releasing the lock). */
+ error = EALREADY;
+ VLOG_WARN("%s: lost race to delete pidfile", pidfile);
+ goto error;
+ }
+
+ /* We won the right to delete the stale pidfile. */
+ if (unlink(pidfile)) {
+ error = errno;
+ VLOG_WARN("%s: failed to delete stale pidfile (%s)",
+ pidfile, strerror(error));
+ goto error;
+ }
+ VLOG_DBG("%s: deleted stale pidfile", pidfile);
+ fclose(file);
+ return 0;
}
if (!fgets(line, sizeof line, file)) {
}
if (lck.l_pid != strtoul(line, NULL, 10)) {
+ /* The process that has the pidfile locked is not the process that
+ * created it. It must be stale, with the process that has it locked
+ * preparing to delete it. */
error = ESRCH;
- VLOG_WARN("l_pid (%ld) != %s pid (%s)",
- (long int) lck.l_pid, pidfile, line);
+ VLOG_WARN("%s: stale pidfile for pid %s being deleted by pid %ld",
+ pidfile, line, (long int) lck.l_pid);
goto error;
}
pid_t
read_pidfile(const char *pidfile)
{
- return read_pidfile__(pidfile, true);
+ return read_pidfile__(pidfile, false);
}
-
-/* Opens and reads a PID from 'pidfile', if it exists. Returns 0 if 'pidfile'
- * doesn't exist, the positive PID if successful, otherwise a negative errno
- * value. */
-pid_t
-read_pidfile_if_exists(const char *pidfile)
+/* Checks whether a process with the given 'pidfile' is already running and,
+ * if so, aborts. If 'pidfile' is stale, deletes it. */
+static void
+check_already_running(void)
{
- return read_pidfile__(pidfile, false);
+ long int pid = read_pidfile__(pidfile, true);
+ if (pid > 0) {
+ VLOG_FATAL("%s: already running as pid %ld, aborting", pidfile, pid);
+ } else if (pid < 0) {
+ VLOG_FATAL("%s: pidfile check failed (%s), aborting",
+ pidfile, strerror(-pid));
+ }
}
global _monitor
_monitor = True
-def _die_if_already_running():
- """If a locked pidfile exists, issue a warning message and, unless
- ignore_existing_pidfile() has been called, terminate the program."""
- if _pidfile is None:
- return
- pid = read_pidfile_if_exists(_pidfile)
- if pid > 0:
- if not _overwrite_pidfile:
- msg = "%s: already running as pid %d" % (_pidfile, pid)
- logging.error("%s, aborting" % msg)
- sys.stderr.write("%s\n" % msg)
- sys.exit(1)
- else:
- logging.warn("%s: %s already running"
- % (get_pidfile(), ovs.util.PROGRAM_NAME))
+def _fatal(msg):
+ logging.error(msg)
+ sys.stderr.write("%s\n" % msg)
+ sys.exit(1)
def _make_pidfile():
"""If a pidfile has been configured, creates it and stores the running
process's pid in it. Ensures that the pidfile will be deleted when the
process exits."""
- if _pidfile is not None:
- # Create pidfile via temporary file, so that observers never see an
- # empty pidfile or an unlocked pidfile.
- pid = os.getpid()
- tmpfile = "%s.tmp%d" % (_pidfile, pid)
- ovs.fatal_signal.add_file_to_unlink(tmpfile)
+ pid = os.getpid()
- try:
- # This is global to keep Python from garbage-collecting and
- # therefore closing our file after this function exits. That would
- # unlock the lock for us, and we don't want that.
- global file
+ # Create a temporary pidfile.
+ tmpfile = "%s.tmp%d" % (_pidfile, pid)
+ ovs.fatal_signal.add_file_to_unlink(tmpfile)
+ try:
+ # This is global to keep Python from garbage-collecting and
+ # therefore closing our file after this function exits. That would
+ # unlock the lock for us, and we don't want that.
+ global file
- file = open(tmpfile, "w")
- except IOError, e:
- logging.error("%s: create failed: %s"
- % (tmpfile, os.strerror(e.errno)))
- return
+ file = open(tmpfile, "w")
+ except IOError, e:
+ _fatal("%s: create failed (%s)" % (tmpfile, e.strerror))
- try:
- fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
- except IOError, e:
- logging.error("%s: fcntl failed: %s"
- % (tmpfile, os.strerror(e.errno)))
- file.close()
- return
+ try:
+ s = os.fstat(file.fileno())
+ except IOError, e:
+ _fatal("%s: fstat failed (%s)" % (tmpfile, e.strerror))
- try:
- file.write("%s\n" % pid)
- file.flush()
- ovs.fatal_signal.add_file_to_unlink(_pidfile)
- except OSError, e:
- logging.error("%s: write failed: %s"
- % (tmpfile, os.strerror(e.errno)))
- file.close()
- return
-
+ try:
+ file.write("%s\n" % pid)
+ file.flush()
+ except OSError, e:
+ _fatal("%s: write failed: %s" % (tmpfile, e.strerror))
+
+ try:
+ fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except IOError, e:
+ _fatal("%s: fcntl failed: %s" % (tmpfile, e.strerror))
+
+ # Rename or link it to the correct name.
+ if _overwrite_pidfile:
try:
os.rename(tmpfile, _pidfile)
except OSError, e:
- ovs.fatal_signal.remove_file_to_unlink(_pidfile)
- logging.error("failed to rename \"%s\" to \"%s\": %s"
- % (tmpfile, _pidfile, os.strerror(e.errno)))
- file.close()
- return
+ _fatal("failed to rename \"%s\" to \"%s\" (%s)"
+ % (tmpfile, _pidfile, e.strerror))
+ else:
+ while True:
+ try:
+ os.link(tmpfile, _pidfile)
+ error = 0
+ except OSError, e:
+ error = e.errno
+ if error == errno.EEXIST:
+ _check_already_running()
+ elif error != errno.EINTR:
+ break
+ if error:
+ _fatal("failed to link \"%s\" as \"%s\" (%s)"
+ % (tmpfile, _pidfile, os.strerror(error)))
- s = os.fstat(file.fileno())
- _pidfile_dev = s.st_dev
- _pidfile_ino = s.st_ino
+
+ # Ensure that the pidfile will get deleted on exit.
+ ovs.fatal_signal.add_file_to_unlink(_pidfile)
+
+ # Delete the temporary pidfile if it still exists.
+ if not _overwrite_pidfile:
+ error = ovs.fatal_signal.unlink_file_now(tmpfile)
+ if error:
+ _fatal("%s: unlink failed (%s)" % (tmpfile, os.strerror(error)))
+
+ _pidfile_dev = s.st_dev
+ _pidfile_ino = s.st_ino
def daemonize():
"""If configured with set_pidfile() or set_detach(), creates the pid file
_monitor_daemon(daemon_pid)
# Running in daemon process
- _die_if_already_running()
- _make_pidfile()
+ if _pidfile:
+ _make_pidfile()
def daemonize_complete():
"""If daemonization is configured, then this function notifies the parent
--overwrite-pidfile with --pidfile, start even if already running
""" % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME))
-def __read_pidfile(pidfile, must_exist):
+def __read_pidfile(pidfile, delete_if_stale):
if _pidfile_dev is not None:
try:
s = os.stat(pidfile)
pass
try:
- file = open(pidfile, "r")
+ file = open(pidfile, "r+")
except IOError, e:
- if e.errno == errno.ENOENT and not must_exist:
+ if e.errno == errno.ENOENT and delete_if_stale:
return 0
- logging.warning("%s: open: %s" % (pidfile, os.strerror(e.errno)))
+ logging.warning("%s: open: %s" % (pidfile, e.strerror))
return -e.errno
# Python fcntl doesn't directly support F_GETLK so we have to just try
- # to lock it. If we get a conflicting lock that's "success"; otherwise
- # the file is not locked.
+ # to lock it.
try:
fcntl.lockf(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
- # File isn't locked if we get here, so treat that as an error.
- logging.warning("%s: pid file is not locked" % pidfile)
- try:
- # As a side effect, this drops the lock.
+
+ # pidfile exists but wasn't locked by anyone. Now we have the lock.
+ if not delete_if_stale:
file.close()
+ logging.warning("%s: pid file is stale" % pidfile)
+ return -errno.ESRCH
+
+ # Is the file we have locked still named 'pidfile'?
+ try:
+ raced = False
+ s = os.stat(pidfile)
+ s2 = os.fstat(file.fileno())
+ if s.st_ino != s2.st_ino or s.st_dev != s2.st_dev:
+ raced = True
except IOError:
- pass
- return -errno.ESRCH
+ raced = True
+ if raced:
+ logging.warning("%s: lost race to delete pidfile" % pidfile)
+ return -errno.ALREADY
+
+ # We won the right to delete the stale pidfile.
+ try:
+ os.unlink(pidfile)
+ except IOError, e:
+ logging.warning("%s: failed to delete stale pidfile"
+ % (pidfile, e.strerror))
+ return -e.errno
+
+ logging.debug("%s: deleted stale pidfile" % pidfile)
+ file.close()
+ return 0
except IOError, e:
if e.errno not in [errno.EACCES, errno.EAGAIN]:
- logging.warn("%s: fcntl: %s" % (pidfile, os.strerror(e.errno)))
+ logging.warn("%s: fcntl: %s" % (pidfile, e.strerror))
return -e.errno
+ # Someone else has the pidfile locked.
try:
try:
return int(file.readline())
def read_pidfile(pidfile):
"""Opens and reads a PID from 'pidfile'. Returns the positive PID if
successful, otherwise a negative errno value."""
- return __read_pidfile(pidfile, True)
-
-def read_pidfile_if_exists(pidfile):
- """Opens and reads a PID from 'pidfile'. Returns 0 if 'pidfile' does not
- exist, the positive PID if successful, otherwise a negative errno value."""
return __read_pidfile(pidfile, False)
+def _check_already_running():
+ pid = __read_pidfile(_pidfile, True)
+ if pid > 0:
+ _fatal("%s: already running as pid %d, aborting" % (_pidfile, pid))
+ elif pid < 0:
+ _fatal("%s: pidfile check failed (%s), aborting"
+ % (_pidfile, os.strerror(pid)))
+
# XXX Python's getopt does not support options with optional arguments, so we
# have to separate --pidfile (with no argument) from --pidfile-name (with an
# argument). Need to write our own getopt I guess.