/*
- * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <config.h>
#include "poll-loop.h"
-#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <poll.h>
#include "dynamic-string.h"
#include "fatal-signal.h"
#include "list.h"
+#include "ovs-thread.h"
+#include "seq.h"
#include "socket-util.h"
#include "timeval.h"
#include "vlog.h"
-
-#undef poll_fd_wait
-#undef poll_timer_wait
-#undef poll_timer_wait_until
-#undef poll_immediate_wake
+#include "hmap.h"
+#include "hash.h"
VLOG_DEFINE_THIS_MODULE(poll_loop);
COVERAGE_DEFINE(poll_fd_wait);
COVERAGE_DEFINE(poll_zero_timeout);
-/* An event that will wake the following call to poll_block(). */
-struct poll_waiter {
- /* Set when the waiter is created. */
- struct list node; /* Element in global waiters list. */
- int fd; /* File descriptor. */
- short int events; /* Events to wait for (POLLIN, POLLOUT). */
- const char *where; /* Where the waiter was created. */
-
- /* Set only when poll_block() is called. */
- struct pollfd *pollfd; /* Pointer to element of the pollfds array. */
+struct poll_node {
+ struct hmap_node hmap_node;
+ struct pollfd pollfd; /* Events to pass to time_poll(). */
+ HANDLE wevent; /* Events for WaitForMultipleObjects(). */
+ const char *where; /* Where poll_node was created. */
};
-/* All active poll waiters. */
-static struct list waiters = LIST_INITIALIZER(&waiters);
-
-/* Max time to wait in next call to poll_block(), in milliseconds, or -1 to
- * wait forever. */
-static int timeout = -1;
-
-/* Location where waiter created. */
-static const char *timeout_where;
+struct poll_loop {
+ /* All active poll waiters. */
+ struct hmap poll_nodes;
-/* Array of file descriptors from last run of poll_block(). */
-static struct pollfd *pollfds;
+ /* Time at which to wake up the next call to poll_block(), LLONG_MIN to
+ * wake up immediately, or LLONG_MAX to wait forever. */
+ long long int timeout_when; /* In msecs as returned by time_msec(). */
+ const char *timeout_where; /* Where 'timeout_when' was set. */
+};
-/* Allocated size of pollfds. */
-static size_t max_pollfds;
+static struct poll_loop *poll_loop(void);
-/* Current number of elements in pollfds. */
-static int n_pollfds;
+/* Look up the node with same fd and wevent. */
+static struct poll_node *
+find_poll_node(struct poll_loop *loop, int fd, uint32_t wevent)
+{
+ struct poll_node *node;
-static struct poll_waiter *new_waiter(int fd, short int events,
- const char *where);
+ HMAP_FOR_EACH_WITH_HASH (node, hmap_node, hash_2words(fd, wevent),
+ &loop->poll_nodes) {
+ if (node->pollfd.fd == fd && node->wevent == wevent) {
+ return node;
+ }
+ }
+ return NULL;
+}
-/* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN
- * or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will
- * wake up when 'fd' becomes ready for one or more of the requested events.
+/* On Unix based systems:
*
- * The event registration is one-shot: only the following call to poll_block()
- * is affected. The event will need to be re-registered after poll_block() is
- * called if it is to persist.
+ * Registers 'fd' as waiting for the specified 'events' (which should be
+ * POLLIN or POLLOUT or POLLIN | POLLOUT). The following call to
+ * poll_block() will wake up when 'fd' becomes ready for one or more of the
+ * requested events. the 'fd's are given to poll() function later.
+ *
+ * On Windows system:
*
- * Ordinarily the 'where' argument is supplied automatically; see poll-loop.h
- * for more information. */
-struct poll_waiter *
-poll_fd_wait(int fd, short int events, const char *where)
+ * If both 'wevent' handle and 'fd' is specified, associate the 'fd' with
+ * with that 'wevent' for 'events' (implemented in poll_block()).
+ * In case of no 'fd' specified, wake up on any event on that 'wevent'.
+ * These wevents are given to the WaitForMultipleObjects() to be polled.
+ * The event registration is one-shot: only the following call to
+ * poll_block() is affected. The event will need to be re-registered after
+ * poll_block() is called if it is to persist.
+ *
+ * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to
+ * automatically provide the caller's source file and line number for
+ * 'where'.) */
+void
+poll_fd_wait_at(int fd, HANDLE wevent, short int events, const char *where)
{
+ struct poll_loop *loop = poll_loop();
+ struct poll_node *node;
+
COVERAGE_INC(poll_fd_wait);
- return new_waiter(fd, events, where);
-}
-/* The caller must ensure that 'msec' is not negative. */
-static void
-poll_timer_wait__(int msec, const char *where)
-{
- if (timeout < 0 || msec < timeout) {
- timeout = msec;
- timeout_where = where;
+#ifdef _WIN32
+ /* Null event cannot be polled. */
+ if (wevent == 0) {
+ VLOG_ERR("No event to wait fd %d", fd);
+ return;
+ }
+#endif
+
+ /* Check for duplicate. If found, "or" the event. */
+ node = find_poll_node(loop, fd, wevent);
+ if (node) {
+ node->pollfd.events |= events;
+ } else {
+ node = xzalloc(sizeof *node);
+ hmap_insert(&loop->poll_nodes, &node->hmap_node,
+ hash_2words(fd, wevent));
+ node->pollfd.fd = fd;
+ node->pollfd.events = events;
+ node->wevent = wevent;
+ node->where = where;
}
}
* is affected. The timer will need to be re-registered after poll_block() is
* called if it is to persist.
*
- * Ordinarily the 'where' argument is supplied automatically; see poll-loop.h
- * for more information. */
+ * ('where' is used in debug logging. Commonly one would use poll_timer_wait()
+ * to automatically provide the caller's source file and line number for
+ * 'where'.) */
void
-poll_timer_wait(long long int msec, const char *where)
+poll_timer_wait_at(long long int msec, const char *where)
{
- poll_timer_wait__((msec < 0 ? 0
- : msec > INT_MAX ? INT_MAX
- : msec),
- where);
+ long long int now = time_msec();
+ long long int when;
+
+ if (msec <= 0) {
+ /* Wake up immediately. */
+ when = LLONG_MIN;
+ } else if ((unsigned long long int) now + msec <= LLONG_MAX) {
+ /* Normal case. */
+ when = now + msec;
+ } else {
+ /* now + msec would overflow. */
+ when = LLONG_MAX;
+ }
+
+ poll_timer_wait_until_at(when, where);
}
/* Causes the following call to poll_block() to wake up when the current time,
- * as returned by time_msec(), reaches 'msec' or later. If 'msec' is earlier
+ * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier
* than the current time, the following call to poll_block() will not block at
* all.
*
* is affected. The timer will need to be re-registered after poll_block() is
* called if it is to persist.
*
- * Ordinarily the 'where' argument is supplied automatically; see poll-loop.h
- * for more information. */
+ * ('where' is used in debug logging. Commonly one would use
+ * poll_timer_wait_until() to automatically provide the caller's source file
+ * and line number for 'where'.) */
void
-poll_timer_wait_until(long long int msec, const char *where)
+poll_timer_wait_until_at(long long int when, const char *where)
{
- long long int now = time_msec();
- poll_timer_wait__((msec <= now ? 0
- : msec < now + INT_MAX ? msec - now
- : INT_MAX),
- where);
+ struct poll_loop *loop = poll_loop();
+ if (when < loop->timeout_when) {
+ loop->timeout_when = when;
+ loop->timeout_where = where;
+ }
}
/* Causes the following call to poll_block() to wake up immediately, without
* blocking.
*
- * Ordinarily the 'where' argument is supplied automatically; see poll-loop.h
- * for more information. */
+ * ('where' is used in debug logging. Commonly one would use
+ * poll_immediate_wake() to automatically provide the caller's source file and
+ * line number for 'where'.) */
void
-poll_immediate_wake(const char *where)
+poll_immediate_wake_at(const char *where)
{
- poll_timer_wait(0, where);
+ poll_timer_wait_at(0, where);
}
/* Logs, if appropriate, that the poll loop was awakened by an event
static void
log_wakeup(const char *where, const struct pollfd *pollfd, int timeout)
{
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(120, 120);
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
enum vlog_level level;
int cpu_usage;
struct ds s;
cpu_usage = get_cpu_usage();
if (VLOG_IS_DBG_ENABLED()) {
level = VLL_DBG;
- } else if (cpu_usage > 50 && !VLOG_DROP_WARN(&rl)) {
- level = VLL_WARN;
+ } else if (cpu_usage > 50 && !VLOG_DROP_INFO(&rl)) {
+ level = VLL_INFO;
} else {
return;
}
ds_destroy(&s);
}
+static void
+free_poll_nodes(struct poll_loop *loop)
+{
+ struct poll_node *node, *next;
+
+ HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) {
+ hmap_remove(&loop->poll_nodes, &node->hmap_node);
+ free(node);
+ }
+}
+
/* Blocks until one or more of the events registered with poll_fd_wait()
* occurs, or until the minimum duration registered with poll_timer_wait()
* elapses, or not at all if poll_immediate_wake() has been called. */
void
poll_block(void)
{
- struct poll_waiter *pw, *next;
- int n_waiters;
+ struct poll_loop *loop = poll_loop();
+ struct poll_node *node;
+ struct pollfd *pollfds;
+ HANDLE *wevents = NULL;
+ int elapsed;
int retval;
+ int i;
/* Register fatal signal events before actually doing any real work for
* poll_block. */
fatal_signal_wait();
- n_waiters = list_size(&waiters);
- if (max_pollfds < n_waiters) {
- max_pollfds = n_waiters;
- pollfds = xrealloc(pollfds, max_pollfds * sizeof *pollfds);
+ if (loop->timeout_when == LLONG_MIN) {
+ COVERAGE_INC(poll_zero_timeout);
}
- n_pollfds = 0;
- LIST_FOR_EACH (pw, node, &waiters) {
- pw->pollfd = &pollfds[n_pollfds];
- pollfds[n_pollfds].fd = pw->fd;
- pollfds[n_pollfds].events = pw->events;
- pollfds[n_pollfds].revents = 0;
- n_pollfds++;
+ timewarp_wait();
+ pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
+
+#ifdef _WIN32
+ wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
+#endif
+
+ /* Populate with all the fds and events. */
+ i = 0;
+ HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
+ pollfds[i] = node->pollfd;
+#ifdef _WIN32
+ wevents[i] = node->wevent;
+ if (node->pollfd.fd && node->wevent) {
+ short int wsa_events = 0;
+ if (node->pollfd.events & POLLIN) {
+ wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
+ }
+ if (node->pollfd.events & POLLOUT) {
+ wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
+ }
+ WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
+ }
+#endif
+ i++;
}
- if (!timeout) {
- COVERAGE_INC(poll_zero_timeout);
- }
- retval = time_poll(pollfds, n_pollfds, timeout);
+ retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
+ loop->timeout_when, &elapsed);
if (retval < 0) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- VLOG_ERR_RL(&rl, "poll: %s", strerror(-retval));
+ VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
} else if (!retval) {
- log_wakeup(timeout_where, NULL, timeout);
- }
-
- LIST_FOR_EACH_SAFE (pw, next, node, &waiters) {
- if (pw->pollfd->revents) {
- log_wakeup(pw->where, pw->pollfd, 0);
+ log_wakeup(loop->timeout_where, NULL, elapsed);
+ } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) {
+ i = 0;
+ HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
+ if (pollfds[i].revents) {
+ log_wakeup(node->where, &pollfds[i], 0);
+ }
+ i++;
}
- poll_cancel(pw);
}
- timeout = -1;
- timeout_where = NULL;
+ free_poll_nodes(loop);
+ loop->timeout_when = LLONG_MAX;
+ loop->timeout_where = NULL;
+ free(pollfds);
+ free(wevents);
/* Handle any pending signals before doing anything else. */
fatal_signal_run();
-}
-/* Cancels the file descriptor event registered with poll_fd_wait() using 'pw',
- * the struct poll_waiter returned by that function.
- *
- * An event registered with poll_fd_wait() may be canceled from its time of
- * registration until the next call to poll_block(). At that point, the event
- * is automatically canceled by the system and its poll_waiter is freed. */
-void
-poll_cancel(struct poll_waiter *pw)
+ seq_woke();
+}
+\f
+static void
+free_poll_loop(void *loop_)
{
- if (pw) {
- list_remove(&pw->node);
- free(pw);
- }
+ struct poll_loop *loop = loop_;
+
+ free_poll_nodes(loop);
+ hmap_destroy(&loop->poll_nodes);
+ free(loop);
}
-/* Checks whether the given file descriptor caused the poll loop to wake up
- * in the previous iteration. If it did, returns a bitmask of the events
- * that caused the wakeup. Otherwise returns 0;
- */
-short int
-poll_fd_woke(int fd)
+static struct poll_loop *
+poll_loop(void)
{
- int i;
- short int events = 0;
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+ static pthread_key_t key;
+ struct poll_loop *loop;
- for (i = 0; i < n_pollfds; i++) {
- if (pollfds[i].fd == fd) {
- events |= pollfds[i].revents;
- }
+ if (ovsthread_once_start(&once)) {
+ xpthread_key_create(&key, free_poll_loop);
+ ovsthread_once_done(&once);
}
- return events;
-}
-\f
-/* Creates and returns a new poll_waiter for 'fd' and 'events'. */
-static struct poll_waiter *
-new_waiter(int fd, short int events, const char *where)
-{
- struct poll_waiter *waiter = xzalloc(sizeof *waiter);
- assert(fd >= 0);
- waiter->fd = fd;
- waiter->events = events;
- waiter->where = where;
- list_push_back(&waiters, &waiter->node);
- return waiter;
+ loop = pthread_getspecific(key);
+ if (!loop) {
+ loop = xzalloc(sizeof *loop);
+ hmap_init(&loop->poll_nodes);
+ xpthread_setspecific(key, loop);
+ }
+ return loop;
}
+