X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fpoll-loop.c;h=510903e46c9677d2dccbb8f171b017fa0cc0333a;hb=003ce655b7116d18c86a74c50391e54990346931;hp=945b5c441b2ca4f0dd3bd4c0978429566d0784bb;hpb=d65349ea28bb67a0062a9b4b60ff97538206373b;p=sliver-openvswitch.git diff --git a/lib/poll-loop.c b/lib/poll-loop.c index 945b5c441..510903e46 100644 --- a/lib/poll-loop.c +++ b/lib/poll-loop.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,69 +16,111 @@ #include #include "poll-loop.h" -#include #include #include #include #include #include -#include "backtrace.h" #include "coverage.h" #include "dynamic-string.h" +#include "fatal-signal.h" #include "list.h" +#include "ovs-thread.h" +#include "seq.h" +#include "socket-util.h" #include "timeval.h" - -#define THIS_MODULE VLM_poll_loop #include "vlog.h" +#include "hmap.h" +#include "hash.h" -/* An event that will wake the following call to poll_block(). */ -struct poll_waiter { - /* Set when the waiter is created. */ - struct list node; /* Element in global waiters list. */ - int fd; /* File descriptor. */ - short int events; /* Events to wait for (POLLIN, POLLOUT). */ - poll_fd_func *function; /* Callback function, if any, or null. */ - void *aux; /* Argument to callback function. */ - struct backtrace *backtrace; /* Optionally, event that created waiter. */ - - /* Set only when poll_block() is called. */ - struct pollfd *pollfd; /* Pointer to element of the pollfds array - (null if added from a callback). */ -}; +VLOG_DEFINE_THIS_MODULE(poll_loop); -/* All active poll waiters. */ -static struct list waiters = LIST_INITIALIZER(&waiters); +COVERAGE_DEFINE(poll_fd_wait); +COVERAGE_DEFINE(poll_zero_timeout); -/* Number of elements in the waiters list. */ -static size_t n_waiters; +struct poll_node { + struct hmap_node hmap_node; + struct pollfd pollfd; /* Events to pass to time_poll(). */ + HANDLE wevent; /* Events for WaitForMultipleObjects(). */ + const char *where; /* Where poll_node was created. */ +}; -/* Max time to wait in next call to poll_block(), in milliseconds, or -1 to - * wait forever. */ -static int timeout = -1; +struct poll_loop { + /* All active poll waiters. */ + struct hmap poll_nodes; -/* Backtrace of 'timeout''s registration, if debugging is enabled. */ -static struct backtrace timeout_backtrace; + /* Time at which to wake up the next call to poll_block(), LLONG_MIN to + * wake up immediately, or LLONG_MAX to wait forever. */ + long long int timeout_when; /* In msecs as returned by time_msec(). */ + const char *timeout_where; /* Where 'timeout_when' was set. */ +}; -/* Callback currently running, to allow verifying that poll_cancel() is not - * being called on a running callback. */ -#ifndef NDEBUG -static struct poll_waiter *running_cb; -#endif +static struct poll_loop *poll_loop(void); -static struct poll_waiter *new_waiter(int fd, short int events); +/* Look up the node with same fd and wevent. */ +static struct poll_node * +find_poll_node(struct poll_loop *loop, int fd, uint32_t wevent) +{ + struct poll_node *node; -/* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN - * or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will - * wake up when 'fd' becomes ready for one or more of the requested events. + HMAP_FOR_EACH_WITH_HASH (node, hmap_node, hash_2words(fd, wevent), + &loop->poll_nodes) { + if (node->pollfd.fd == fd && node->wevent == wevent) { + return node; + } + } + return NULL; +} + +/* On Unix based systems: + * + * Registers 'fd' as waiting for the specified 'events' (which should be + * POLLIN or POLLOUT or POLLIN | POLLOUT). The following call to + * poll_block() will wake up when 'fd' becomes ready for one or more of the + * requested events. the 'fd's are given to poll() function later. * - * The event registration is one-shot: only the following call to poll_block() - * is affected. The event will need to be re-registered after poll_block() is - * called if it is to persist. */ -struct poll_waiter * -poll_fd_wait(int fd, short int events) + * On Windows system: + * + * If both 'wevent' handle and 'fd' is specified, associate the 'fd' with + * with that 'wevent' for 'events' (implemented in poll_block()). + * In case of no 'fd' specified, wake up on any event on that 'wevent'. + * These wevents are given to the WaitForMultipleObjects() to be polled. + * The event registration is one-shot: only the following call to + * poll_block() is affected. The event will need to be re-registered after + * poll_block() is called if it is to persist. + * + * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to + * automatically provide the caller's source file and line number for + * 'where'.) */ +void +poll_fd_wait_at(int fd, HANDLE wevent, short int events, const char *where) { + struct poll_loop *loop = poll_loop(); + struct poll_node *node; + COVERAGE_INC(poll_fd_wait); - return new_waiter(fd, events); + +#ifdef _WIN32 + /* Null event cannot be polled. */ + if (wevent == 0) { + VLOG_ERR("No event to wait fd %d", fd); + return; + } +#endif + + /* Check for duplicate. If found, "or" the event. */ + node = find_poll_node(loop, fd, wevent); + if (node) { + node->pollfd.events |= events; + } else { + node = xzalloc(sizeof *node); + hmap_insert(&loop->poll_nodes, &node->hmap_node, + hash_2words(fd, wevent)); + node->pollfd.fd = fd; + node->pollfd.events = events; + node->wevent = wevent; + node->where = where; + } } /* Causes the following call to poll_block() to block for no more than 'msec' @@ -87,182 +129,243 @@ poll_fd_wait(int fd, short int events) * * The timer registration is one-shot: only the following call to poll_block() * is affected. The timer will need to be re-registered after poll_block() is - * called if it is to persist. */ + * called if it is to persist. + * + * ('where' is used in debug logging. Commonly one would use poll_timer_wait() + * to automatically provide the caller's source file and line number for + * 'where'.) */ void -poll_timer_wait(int msec) +poll_timer_wait_at(long long int msec, const char *where) { - if (timeout < 0 || msec < timeout) { - timeout = MAX(0, msec); - if (VLOG_IS_DBG_ENABLED()) { - backtrace_capture(&timeout_backtrace); - } + long long int now = time_msec(); + long long int when; + + if (msec <= 0) { + /* Wake up immediately. */ + when = LLONG_MIN; + } else if ((unsigned long long int) now + msec <= LLONG_MAX) { + /* Normal case. */ + when = now + msec; + } else { + /* now + msec would overflow. */ + when = LLONG_MAX; } + + poll_timer_wait_until_at(when, where); } -/* Causes the following call to poll_block() to wake up immediately, without - * blocking. */ +/* Causes the following call to poll_block() to wake up when the current time, + * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier + * than the current time, the following call to poll_block() will not block at + * all. + * + * The timer registration is one-shot: only the following call to poll_block() + * is affected. The timer will need to be re-registered after poll_block() is + * called if it is to persist. + * + * ('where' is used in debug logging. Commonly one would use + * poll_timer_wait_until() to automatically provide the caller's source file + * and line number for 'where'.) */ void -poll_immediate_wake(void) +poll_timer_wait_until_at(long long int when, const char *where) { - poll_timer_wait(0); + struct poll_loop *loop = poll_loop(); + if (when < loop->timeout_when) { + loop->timeout_when = when; + loop->timeout_where = where; + } } -static void PRINTF_FORMAT(2, 3) -log_wakeup(const struct backtrace *backtrace, const char *format, ...) +/* Causes the following call to poll_block() to wake up immediately, without + * blocking. + * + * ('where' is used in debug logging. Commonly one would use + * poll_immediate_wake() to automatically provide the caller's source file and + * line number for 'where'.) */ +void +poll_immediate_wake_at(const char *where) { - struct ds ds; - va_list args; + poll_timer_wait_at(0, where); +} - ds_init(&ds); - va_start(args, format); - ds_put_format_valist(&ds, format, args); - va_end(args); +/* Logs, if appropriate, that the poll loop was awakened by an event + * registered at 'where' (typically a source file and line number). The other + * arguments have two possible interpretations: + * + * - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused + * the wakeup. 'timeout' is ignored. + * + * - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after + * which the poll loop woke up. + */ +static void +log_wakeup(const char *where, const struct pollfd *pollfd, int timeout) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); + enum vlog_level level; + int cpu_usage; + struct ds s; - if (backtrace) { - int i; + cpu_usage = get_cpu_usage(); + if (VLOG_IS_DBG_ENABLED()) { + level = VLL_DBG; + } else if (cpu_usage > 50 && !VLOG_DROP_INFO(&rl)) { + level = VLL_INFO; + } else { + return; + } - ds_put_char(&ds, ':'); - for (i = 0; i < backtrace->n_frames; i++) { - ds_put_format(&ds, " 0x%"PRIxPTR, backtrace->frames[i]); + ds_init(&s); + ds_put_cstr(&s, "wakeup due to "); + if (pollfd) { + char *description = describe_fd(pollfd->fd); + if (pollfd->revents & POLLIN) { + ds_put_cstr(&s, "[POLLIN]"); + } + if (pollfd->revents & POLLOUT) { + ds_put_cstr(&s, "[POLLOUT]"); + } + if (pollfd->revents & POLLERR) { + ds_put_cstr(&s, "[POLLERR]"); + } + if (pollfd->revents & POLLHUP) { + ds_put_cstr(&s, "[POLLHUP]"); } + if (pollfd->revents & POLLNVAL) { + ds_put_cstr(&s, "[POLLNVAL]"); + } + ds_put_format(&s, " on fd %d (%s)", pollfd->fd, description); + free(description); + } else { + ds_put_format(&s, "%d-ms timeout", timeout); + } + if (where) { + ds_put_format(&s, " at %s", where); + } + if (cpu_usage >= 0) { + ds_put_format(&s, " (%d%% CPU usage)", cpu_usage); + } + VLOG(level, "%s", ds_cstr(&s)); + ds_destroy(&s); +} + +static void +free_poll_nodes(struct poll_loop *loop) +{ + struct poll_node *node, *next; + + HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) { + hmap_remove(&loop->poll_nodes, &node->hmap_node); + free(node); } - VLOG_DBG("%s", ds_cstr(&ds)); - ds_destroy(&ds); } /* Blocks until one or more of the events registered with poll_fd_wait() * occurs, or until the minimum duration registered with poll_timer_wait() - * elapses, or not at all if poll_immediate_wake() has been called. - * - * Also executes any autonomous subroutines registered with poll_fd_callback(), - * if their file descriptors have become ready. */ + * elapses, or not at all if poll_immediate_wake() has been called. */ void poll_block(void) { - static struct pollfd *pollfds; - static size_t max_pollfds; - - struct poll_waiter *pw; - struct list *node; - int n_pollfds; + struct poll_loop *loop = poll_loop(); + struct poll_node *node; + struct pollfd *pollfds; + HANDLE *wevents = NULL; + int elapsed; int retval; + int i; - assert(!running_cb); - if (max_pollfds < n_waiters) { - max_pollfds = n_waiters; - pollfds = xrealloc(pollfds, max_pollfds * sizeof *pollfds); - } + /* Register fatal signal events before actually doing any real work for + * poll_block. */ + fatal_signal_wait(); - n_pollfds = 0; - LIST_FOR_EACH (pw, struct poll_waiter, node, &waiters) { - pw->pollfd = &pollfds[n_pollfds]; - pollfds[n_pollfds].fd = pw->fd; - pollfds[n_pollfds].events = pw->events; - pollfds[n_pollfds].revents = 0; - n_pollfds++; - } - - if (!timeout) { + if (loop->timeout_when == LLONG_MIN) { COVERAGE_INC(poll_zero_timeout); } - retval = time_poll(pollfds, n_pollfds, timeout); - if (retval < 0) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_ERR_RL(&rl, "poll: %s", strerror(-retval)); - } else if (!retval && VLOG_IS_DBG_ENABLED()) { - log_wakeup(&timeout_backtrace, "%d-ms timeout", timeout); - } - for (node = waiters.next; node != &waiters; ) { - pw = CONTAINER_OF(node, struct poll_waiter, node); - if (!pw->pollfd || !pw->pollfd->revents) { - if (pw->function) { - node = node->next; - continue; - } - } else { - if (VLOG_IS_DBG_ENABLED()) { - log_wakeup(pw->backtrace, "%s%s%s%s%s on fd %d", - pw->pollfd->revents & POLLIN ? "[POLLIN]" : "", - pw->pollfd->revents & POLLOUT ? "[POLLOUT]" : "", - pw->pollfd->revents & POLLERR ? "[POLLERR]" : "", - pw->pollfd->revents & POLLHUP ? "[POLLHUP]" : "", - pw->pollfd->revents & POLLNVAL ? "[POLLNVAL]" : "", - pw->fd); - } + timewarp_wait(); + pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); - if (pw->function) { -#ifndef NDEBUG - running_cb = pw; +#ifdef _WIN32 + wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); #endif - pw->function(pw->fd, pw->pollfd->revents, pw->aux); -#ifndef NDEBUG - running_cb = NULL; + + /* Populate with all the fds and events. */ + i = 0; + HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { + pollfds[i] = node->pollfd; +#ifdef _WIN32 + wevents[i] = node->wevent; + if (node->pollfd.fd && node->wevent) { + short int wsa_events = 0; + if (node->pollfd.events & POLLIN) { + wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; + } + if (node->pollfd.events & POLLOUT) { + wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; + } + WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); + } #endif + i++; + } + + retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents, + loop->timeout_when, &elapsed); + if (retval < 0) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval)); + } else if (!retval) { + log_wakeup(loop->timeout_where, NULL, elapsed); + } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) { + i = 0; + HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { + if (pollfds[i].revents) { + log_wakeup(node->where, &pollfds[i], 0); } + i++; } - node = node->next; - poll_cancel(pw); } - timeout = -1; - timeout_backtrace.n_frames = 0; -} + free_poll_nodes(loop); + loop->timeout_when = LLONG_MAX; + loop->timeout_where = NULL; + free(pollfds); + free(wevents); -/* Registers 'function' to be called with argument 'aux' by poll_block() when - * 'fd' becomes ready for one of the events in 'events', which should be POLLIN - * or POLLOUT or POLLIN | POLLOUT. - * - * The callback registration persists until the event actually occurs. At that - * point, it is automatically de-registered. The callback function must - * re-register the event by calling poll_fd_callback() again within the - * callback, if it wants to be called back again later. */ -struct poll_waiter * -poll_fd_callback(int fd, short int events, poll_fd_func *function, void *aux) + /* Handle any pending signals before doing anything else. */ + fatal_signal_run(); + + seq_woke(); +} + +static void +free_poll_loop(void *loop_) { - struct poll_waiter *pw = new_waiter(fd, events); - pw->function = function; - pw->aux = aux; - return pw; + struct poll_loop *loop = loop_; + + free_poll_nodes(loop); + hmap_destroy(&loop->poll_nodes); + free(loop); } -/* Cancels the file descriptor event registered with poll_fd_wait() or - * poll_fd_callback(). 'pw' must be the struct poll_waiter returned by one of - * those functions. - * - * An event registered with poll_fd_wait() may be canceled from its time of - * registration until the next call to poll_block(). At that point, the event - * is automatically canceled by the system and its poll_waiter is freed. - * - * An event registered with poll_fd_callback() may be canceled from its time of - * registration until its callback is actually called. At that point, the - * event is automatically canceled by the system and its poll_waiter is - * freed. */ -void -poll_cancel(struct poll_waiter *pw) +static struct poll_loop * +poll_loop(void) { - if (pw) { - assert(pw != running_cb); - list_remove(&pw->node); - free(pw->backtrace); - free(pw); - n_waiters--; + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; + static pthread_key_t key; + struct poll_loop *loop; + + if (ovsthread_once_start(&once)) { + xpthread_key_create(&key, free_poll_loop); + ovsthread_once_done(&once); } -} - -/* Creates and returns a new poll_waiter for 'fd' and 'events'. */ -static struct poll_waiter * -new_waiter(int fd, short int events) -{ - struct poll_waiter *waiter = xcalloc(1, sizeof *waiter); - assert(fd >= 0); - waiter->fd = fd; - waiter->events = events; - if (VLOG_IS_DBG_ENABLED()) { - waiter->backtrace = xmalloc(sizeof *waiter->backtrace); - backtrace_capture(waiter->backtrace); + + loop = pthread_getspecific(key); + if (!loop) { + loop = xzalloc(sizeof *loop); + hmap_init(&loop->poll_nodes); + xpthread_setspecific(key, loop); } - list_push_back(&waiters, &waiter->node); - n_waiters++; - return waiter; + return loop; } +