/*
- * Copyright (c) 2008, 2009 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <config.h>
#include "poll-loop.h"
-#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <poll.h>
#include <stdlib.h>
#include <string.h>
-#include "backtrace.h"
#include "coverage.h"
#include "dynamic-string.h"
+#include "fatal-signal.h"
#include "list.h"
+#include "ovs-thread.h"
+#include "seq.h"
+#include "socket-util.h"
#include "timeval.h"
-
-#define THIS_MODULE VLM_poll_loop
#include "vlog.h"
-/* An event that will wake the following call to poll_block(). */
-struct poll_waiter {
- /* Set when the waiter is created. */
- struct list node; /* Element in global waiters list. */
- int fd; /* File descriptor. */
- short int events; /* Events to wait for (POLLIN, POLLOUT). */
- poll_fd_func *function; /* Callback function, if any, or null. */
- void *aux; /* Argument to callback function. */
- struct backtrace *backtrace; /* Optionally, event that created waiter. */
-
- /* Set only when poll_block() is called. */
- struct pollfd *pollfd; /* Pointer to element of the pollfds array
- (null if added from a callback). */
-};
-
-/* All active poll waiters. */
-static struct list waiters = LIST_INITIALIZER(&waiters);
+VLOG_DEFINE_THIS_MODULE(poll_loop);
-/* Number of elements in the waiters list. */
-static size_t n_waiters;
+COVERAGE_DEFINE(poll_fd_wait);
+COVERAGE_DEFINE(poll_zero_timeout);
-/* Max time to wait in next call to poll_block(), in milliseconds, or -1 to
- * wait forever. */
-static int timeout = -1;
+struct poll_loop {
+ /* All active poll waiters. */
+ struct pollfd *pollfds; /* Events to pass to poll(). */
+ const char **where; /* Where each pollfd was created. */
+ size_t n_waiters; /* Number of elems in 'where' and 'pollfds'. */
+ size_t allocated_waiters; /* Allocated elems in 'where' and 'pollfds'. */
-/* Backtrace of 'timeout''s registration, if debugging is enabled. */
-static struct backtrace timeout_backtrace;
-
-/* Callback currently running, to allow verifying that poll_cancel() is not
- * being called on a running callback. */
-#ifndef NDEBUG
-static struct poll_waiter *running_cb;
-#endif
+ /* Time at which to wake up the next call to poll_block(), LLONG_MIN to
+ * wake up immediately, or LLONG_MAX to wait forever. */
+ long long int timeout_when; /* In msecs as returned by time_msec(). */
+ const char *timeout_where; /* Where 'timeout_when' was set. */
+};
-static struct poll_waiter *new_waiter(int fd, short int events);
+static struct poll_loop *poll_loop(void);
/* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN
* or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will
*
* The event registration is one-shot: only the following call to poll_block()
* is affected. The event will need to be re-registered after poll_block() is
- * called if it is to persist. */
-struct poll_waiter *
-poll_fd_wait(int fd, short int events)
+ * called if it is to persist.
+ *
+ * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to
+ * automatically provide the caller's source file and line number for
+ * 'where'.) */
+void
+poll_fd_wait_at(int fd, short int events, const char *where)
{
+ struct poll_loop *loop = poll_loop();
+
COVERAGE_INC(poll_fd_wait);
- return new_waiter(fd, events);
+ if (loop->n_waiters >= loop->allocated_waiters) {
+ loop->where = x2nrealloc(loop->where, &loop->allocated_waiters,
+ sizeof *loop->where);
+ loop->pollfds = xrealloc(loop->pollfds,
+ (loop->allocated_waiters
+ * sizeof *loop->pollfds));
+ }
+
+ loop->where[loop->n_waiters] = where;
+ loop->pollfds[loop->n_waiters].fd = fd;
+ loop->pollfds[loop->n_waiters].events = events;
+ loop->n_waiters++;
}
/* Causes the following call to poll_block() to block for no more than 'msec'
*
* The timer registration is one-shot: only the following call to poll_block()
* is affected. The timer will need to be re-registered after poll_block() is
- * called if it is to persist. */
+ * called if it is to persist.
+ *
+ * ('where' is used in debug logging. Commonly one would use poll_timer_wait()
+ * to automatically provide the caller's source file and line number for
+ * 'where'.) */
void
-poll_timer_wait(int msec)
+poll_timer_wait_at(long long int msec, const char *where)
{
- if (timeout < 0 || msec < timeout) {
- timeout = MAX(0, msec);
- if (VLOG_IS_DBG_ENABLED()) {
- backtrace_capture(&timeout_backtrace);
- }
+ long long int now = time_msec();
+ long long int when;
+
+ if (msec <= 0) {
+ /* Wake up immediately. */
+ when = LLONG_MIN;
+ } else if ((unsigned long long int) now + msec <= LLONG_MAX) {
+ /* Normal case. */
+ when = now + msec;
+ } else {
+ /* now + msec would overflow. */
+ when = LLONG_MAX;
}
+
+ poll_timer_wait_until_at(when, where);
}
-/* Causes the following call to poll_block() to wake up immediately, without
- * blocking. */
+/* Causes the following call to poll_block() to wake up when the current time,
+ * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier
+ * than the current time, the following call to poll_block() will not block at
+ * all.
+ *
+ * The timer registration is one-shot: only the following call to poll_block()
+ * is affected. The timer will need to be re-registered after poll_block() is
+ * called if it is to persist.
+ *
+ * ('where' is used in debug logging. Commonly one would use
+ * poll_timer_wait_until() to automatically provide the caller's source file
+ * and line number for 'where'.) */
void
-poll_immediate_wake(void)
+poll_timer_wait_until_at(long long int when, const char *where)
{
- poll_timer_wait(0);
+ struct poll_loop *loop = poll_loop();
+ if (when < loop->timeout_when) {
+ loop->timeout_when = when;
+ loop->timeout_where = where;
+ }
}
-static void PRINTF_FORMAT(2, 3)
-log_wakeup(const struct backtrace *backtrace, const char *format, ...)
+/* Causes the following call to poll_block() to wake up immediately, without
+ * blocking.
+ *
+ * ('where' is used in debug logging. Commonly one would use
+ * poll_immediate_wake() to automatically provide the caller's source file and
+ * line number for 'where'.) */
+void
+poll_immediate_wake_at(const char *where)
{
- struct ds ds;
- va_list args;
+ poll_timer_wait_at(0, where);
+}
- ds_init(&ds);
- va_start(args, format);
- ds_put_format_valist(&ds, format, args);
- va_end(args);
+/* Logs, if appropriate, that the poll loop was awakened by an event
+ * registered at 'where' (typically a source file and line number). The other
+ * arguments have two possible interpretations:
+ *
+ * - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused
+ * the wakeup. 'timeout' is ignored.
+ *
+ * - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after
+ * which the poll loop woke up.
+ */
+static void
+log_wakeup(const char *where, const struct pollfd *pollfd, int timeout)
+{
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
+ enum vlog_level level;
+ int cpu_usage;
+ struct ds s;
- if (backtrace) {
- int i;
+ cpu_usage = get_cpu_usage();
+ if (VLOG_IS_DBG_ENABLED()) {
+ level = VLL_DBG;
+ } else if (cpu_usage > 50 && !VLOG_DROP_INFO(&rl)) {
+ level = VLL_INFO;
+ } else {
+ return;
+ }
- ds_put_char(&ds, ':');
- for (i = 0; i < backtrace->n_frames; i++) {
- ds_put_format(&ds, " 0x%"PRIxPTR, backtrace->frames[i]);
+ ds_init(&s);
+ ds_put_cstr(&s, "wakeup due to ");
+ if (pollfd) {
+ char *description = describe_fd(pollfd->fd);
+ if (pollfd->revents & POLLIN) {
+ ds_put_cstr(&s, "[POLLIN]");
+ }
+ if (pollfd->revents & POLLOUT) {
+ ds_put_cstr(&s, "[POLLOUT]");
}
+ if (pollfd->revents & POLLERR) {
+ ds_put_cstr(&s, "[POLLERR]");
+ }
+ if (pollfd->revents & POLLHUP) {
+ ds_put_cstr(&s, "[POLLHUP]");
+ }
+ if (pollfd->revents & POLLNVAL) {
+ ds_put_cstr(&s, "[POLLNVAL]");
+ }
+ ds_put_format(&s, " on fd %d (%s)", pollfd->fd, description);
+ free(description);
+ } else {
+ ds_put_format(&s, "%d-ms timeout", timeout);
+ }
+ if (where) {
+ ds_put_format(&s, " at %s", where);
}
- VLOG_DBG("%s", ds_cstr(&ds));
- ds_destroy(&ds);
+ if (cpu_usage >= 0) {
+ ds_put_format(&s, " (%d%% CPU usage)", cpu_usage);
+ }
+ VLOG(level, "%s", ds_cstr(&s));
+ ds_destroy(&s);
}
/* Blocks until one or more of the events registered with poll_fd_wait()
* occurs, or until the minimum duration registered with poll_timer_wait()
- * elapses, or not at all if poll_immediate_wake() has been called.
- *
- * Also executes any autonomous subroutines registered with poll_fd_callback(),
- * if their file descriptors have become ready. */
+ * elapses, or not at all if poll_immediate_wake() has been called. */
void
poll_block(void)
{
- static struct pollfd *pollfds;
- static size_t max_pollfds;
-
- struct poll_waiter *pw;
- struct list *node;
- int n_pollfds;
+ struct poll_loop *loop = poll_loop();
+ int elapsed;
int retval;
- assert(!running_cb);
- if (max_pollfds < n_waiters) {
- max_pollfds = n_waiters;
- pollfds = xrealloc(pollfds, max_pollfds * sizeof *pollfds);
- }
-
- n_pollfds = 0;
- LIST_FOR_EACH (pw, struct poll_waiter, node, &waiters) {
- pw->pollfd = &pollfds[n_pollfds];
- pollfds[n_pollfds].fd = pw->fd;
- pollfds[n_pollfds].events = pw->events;
- pollfds[n_pollfds].revents = 0;
- n_pollfds++;
- }
+ /* Register fatal signal events before actually doing any real work for
+ * poll_block. */
+ fatal_signal_wait();
- if (!timeout) {
+ if (loop->timeout_when == LLONG_MIN) {
COVERAGE_INC(poll_zero_timeout);
}
- retval = time_poll(pollfds, n_pollfds, timeout);
+
+ retval = time_poll(loop->pollfds, loop->n_waiters,
+ loop->timeout_when, &elapsed);
if (retval < 0) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- VLOG_ERR_RL(&rl, "poll: %s", strerror(-retval));
- } else if (!retval && VLOG_IS_DBG_ENABLED()) {
- log_wakeup(&timeout_backtrace, "%d-ms timeout", timeout);
- }
+ VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
+ } else if (!retval) {
+ log_wakeup(loop->timeout_where, NULL, elapsed);
+ } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) {
+ size_t i;
- for (node = waiters.next; node != &waiters; ) {
- pw = CONTAINER_OF(node, struct poll_waiter, node);
- if (!pw->pollfd || !pw->pollfd->revents) {
- if (pw->function) {
- node = node->next;
- continue;
- }
- } else {
- if (VLOG_IS_DBG_ENABLED()) {
- log_wakeup(pw->backtrace, "%s%s%s%s%s on fd %d",
- pw->pollfd->revents & POLLIN ? "[POLLIN]" : "",
- pw->pollfd->revents & POLLOUT ? "[POLLOUT]" : "",
- pw->pollfd->revents & POLLERR ? "[POLLERR]" : "",
- pw->pollfd->revents & POLLHUP ? "[POLLHUP]" : "",
- pw->pollfd->revents & POLLNVAL ? "[POLLNVAL]" : "",
- pw->fd);
- }
-
- if (pw->function) {
-#ifndef NDEBUG
- running_cb = pw;
-#endif
- pw->function(pw->fd, pw->pollfd->revents, pw->aux);
-#ifndef NDEBUG
- running_cb = NULL;
-#endif
+ for (i = 0; i < loop->n_waiters; i++) {
+ if (loop->pollfds[i].revents) {
+ log_wakeup(loop->where[i], &loop->pollfds[i], 0);
}
}
- node = node->next;
- poll_cancel(pw);
}
- timeout = -1;
- timeout_backtrace.n_frames = 0;
-}
+ loop->timeout_when = LLONG_MAX;
+ loop->timeout_where = NULL;
+ loop->n_waiters = 0;
-/* Registers 'function' to be called with argument 'aux' by poll_block() when
- * 'fd' becomes ready for one of the events in 'events', which should be POLLIN
- * or POLLOUT or POLLIN | POLLOUT.
- *
- * The callback registration persists until the event actually occurs. At that
- * point, it is automatically de-registered. The callback function must
- * re-register the event by calling poll_fd_callback() again within the
- * callback, if it wants to be called back again later. */
-struct poll_waiter *
-poll_fd_callback(int fd, short int events, poll_fd_func *function, void *aux)
+ /* Handle any pending signals before doing anything else. */
+ fatal_signal_run();
+
+ seq_woke();
+}
+\f
+static void
+free_poll_loop(void *loop_)
{
- struct poll_waiter *pw = new_waiter(fd, events);
- pw->function = function;
- pw->aux = aux;
- return pw;
+ struct poll_loop *loop = loop_;
+
+ free(loop->pollfds);
+ free(loop->where);
+ free(loop);
}
-/* Cancels the file descriptor event registered with poll_fd_wait() or
- * poll_fd_callback(). 'pw' must be the struct poll_waiter returned by one of
- * those functions.
- *
- * An event registered with poll_fd_wait() may be canceled from its time of
- * registration until the next call to poll_block(). At that point, the event
- * is automatically canceled by the system and its poll_waiter is freed.
- *
- * An event registered with poll_fd_callback() may be canceled from its time of
- * registration until its callback is actually called. At that point, the
- * event is automatically canceled by the system and its poll_waiter is
- * freed. */
-void
-poll_cancel(struct poll_waiter *pw)
+static struct poll_loop *
+poll_loop(void)
{
- if (pw) {
- assert(pw != running_cb);
- list_remove(&pw->node);
- free(pw->backtrace);
- free(pw);
- n_waiters--;
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+ static pthread_key_t key;
+ struct poll_loop *loop;
+
+ if (ovsthread_once_start(&once)) {
+ xpthread_key_create(&key, free_poll_loop);
+ ovsthread_once_done(&once);
}
-}
-\f
-/* Creates and returns a new poll_waiter for 'fd' and 'events'. */
-static struct poll_waiter *
-new_waiter(int fd, short int events)
-{
- struct poll_waiter *waiter = xcalloc(1, sizeof *waiter);
- assert(fd >= 0);
- waiter->fd = fd;
- waiter->events = events;
- if (VLOG_IS_DBG_ENABLED()) {
- waiter->backtrace = xmalloc(sizeof *waiter->backtrace);
- backtrace_capture(waiter->backtrace);
+
+ loop = pthread_getspecific(key);
+ if (!loop) {
+ loop = xzalloc(sizeof *loop);
+ xpthread_setspecific(key, loop);
}
- list_push_back(&waiters, &waiter->node);
- n_waiters++;
- return waiter;
+ return loop;
}
+