X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Ftimeval.c;h=d2a4380ff068c3535780cadae6729ed6f5511390;hb=3308c696220bbfff36bf29b3e1b168449d57b3d7;hp=302224ac10639f265410a4e8f2133ba4718e9b92;hpb=61a7b1e6fee6c81b0d8551898d2b108de2b42536;p=sliver-openvswitch.git diff --git a/lib/timeval.c b/lib/timeval.c index 302224ac1..d2a4380ff 100644 --- a/lib/timeval.c +++ b/lib/timeval.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,12 +16,9 @@ #include #include "timeval.h" -#include #include -#if HAVE_EXECINFO_H -#include -#endif #include +#include #include #include #include @@ -32,264 +29,204 @@ #include "dummy.h" #include "dynamic-string.h" #include "fatal-signal.h" +#include "hash.h" +#include "hmap.h" +#include "ovs-rcu.h" +#include "ovs-thread.h" #include "signals.h" +#include "seq.h" #include "unixctl.h" #include "util.h" #include "vlog.h" -#ifndef HAVE_EXECINFO_H -#define HAVE_EXECINFO_H 0 +VLOG_DEFINE_THIS_MODULE(timeval); + +#ifdef _WIN32 +typedef unsigned int clockid_t; + +#ifndef CLOCK_MONOTONIC +#define CLOCK_MONOTONIC 1 #endif -VLOG_DEFINE_THIS_MODULE(timeval); +#ifndef CLOCK_REALTIME +#define CLOCK_REALTIME 2 +#endif -/* The clock to use for measuring time intervals. This is CLOCK_MONOTONIC by - * preference, but on systems that don't have a monotonic clock we fall back - * to CLOCK_REALTIME. */ -static clockid_t monotonic_clock; +/* Number of 100 ns intervals from January 1, 1601 till January 1, 1970. */ +static ULARGE_INTEGER unix_epoch; +#endif /* _WIN32 */ -/* Has a timer tick occurred? Only relevant if CACHE_TIME is true. - * - * We initialize these to true to force time_init() to get called on the first - * call to time_msec() or another function that queries the current time. */ -static volatile sig_atomic_t wall_tick = true; -static volatile sig_atomic_t monotonic_tick = true; +struct clock { + clockid_t id; /* CLOCK_MONOTONIC or CLOCK_REALTIME. */ -/* The current time, as of the last refresh. */ -static struct timespec wall_time; -static struct timespec monotonic_time; + /* Features for use by unit tests. Protected by 'mutex'. */ + struct ovs_mutex mutex; + atomic_bool slow_path; /* True if warped or stopped. */ + struct timespec warp OVS_GUARDED; /* Offset added for unit tests. */ + bool stopped OVS_GUARDED; /* Disable real-time updates if true. */ + struct timespec cache OVS_GUARDED; /* Last time read from kernel. */ +}; + +/* Our clocks. */ +static struct clock monotonic_clock; /* CLOCK_MONOTONIC, if available. */ +static struct clock wall_clock; /* CLOCK_REALTIME. */ /* The monotonic time at which the time module was initialized. */ static long long int boot_time; -/* features for use by unit tests. */ -static struct timespec warp_offset; /* Offset added to monotonic_time. */ -static bool time_stopped; /* Disables real-time updates, if true. */ +/* True only when timeval_dummy_register() is called. */ +static bool timewarp_enabled; +/* Reference to the seq struct. Threads other than main thread can + * wait on timewarp_seq and be waken up when time is warped. */ +static struct seq *timewarp_seq; +/* Last value of 'timewarp_seq'. */ +DEFINE_STATIC_PER_THREAD_DATA(uint64_t, last_seq, 0); -/* Time in milliseconds at which to die with SIGALRM (if not LLONG_MAX). */ +/* Monotonic time in milliseconds at which to die with SIGALRM (if not + * LLONG_MAX). */ static long long int deadline = LLONG_MAX; -struct trace { - void *backtrace[32]; /* Populated by backtrace(). */ - size_t n_frames; /* Number of frames in 'backtrace'. */ -}; +/* Monotonic time, in milliseconds, at which the last call to time_poll() woke + * up. */ +DEFINE_STATIC_PER_THREAD_DATA(long long int, last_wakeup, 0); -#define MAX_TRACES 50 -static struct unixctl_conn *backtrace_conn = NULL; -static struct trace *traces = NULL; -static size_t n_traces = 0; - -static void set_up_timer(void); -static void set_up_signal(int flags); -static void sigalrm_handler(int); -static void refresh_wall_if_ticked(void); -static void refresh_monotonic_if_ticked(void); -static void block_sigalrm(sigset_t *); -static void unblock_sigalrm(const sigset_t *); static void log_poll_interval(long long int last_wakeup); static struct rusage *get_recent_rusage(void); static void refresh_rusage(void); static void timespec_add(struct timespec *sum, const struct timespec *a, const struct timespec *b); -static void trace_run(void); -static unixctl_cb_func backtrace_cb; -/* Initializes the timetracking module, if not already initialized. */ static void -time_init(void) +init_clock(struct clock *c, clockid_t id) { - static bool inited; + memset(c, 0, sizeof *c); + c->id = id; + ovs_mutex_init(&c->mutex); + atomic_init(&c->slow_path, false); + xclock_gettime(c->id, &c->cache); + timewarp_seq = seq_create(); +} - /* The best place to do this is probably a timeval_run() function. - * However, none exists and this function is usually so fast that doing it - * here seems fine for now. */ - trace_run(); +static void +do_init_time(void) +{ + struct timespec ts; - if (inited) { - return; - } - inited = true; +#ifdef _WIN32 + /* Calculate number of 100-nanosecond intervals till 01/01/1970. */ + SYSTEMTIME unix_epoch_st = { 1970, 1, 0, 1, 0, 0, 0, 0}; + FILETIME unix_epoch_ft; - if (HAVE_EXECINFO_H && CACHE_TIME) { - unixctl_command_register("backtrace", "", 0, 0, backtrace_cb, NULL); - } + SystemTimeToFileTime(&unix_epoch_st, &unix_epoch_ft); + unix_epoch.LowPart = unix_epoch_ft.dwLowDateTime; + unix_epoch.HighPart = unix_epoch_ft.dwHighDateTime; +#endif coverage_init(); - if (!clock_gettime(CLOCK_MONOTONIC, &monotonic_time)) { - monotonic_clock = CLOCK_MONOTONIC; - } else { - monotonic_clock = CLOCK_REALTIME; - VLOG_DBG("monotonic timer not available"); - } - - set_up_signal(SA_RESTART); - set_up_timer(); - - boot_time = time_msec(); + init_clock(&monotonic_clock, (!clock_gettime(CLOCK_MONOTONIC, &ts) + ? CLOCK_MONOTONIC + : CLOCK_REALTIME)); + init_clock(&wall_clock, CLOCK_REALTIME); + boot_time = timespec_to_msec(&monotonic_clock.cache); } +/* Initializes the timetracking module, if not already initialized. */ static void -set_up_signal(int flags) -{ - struct sigaction sa; - - memset(&sa, 0, sizeof sa); - sa.sa_handler = sigalrm_handler; - sigemptyset(&sa.sa_mask); - sa.sa_flags = flags; - xsigaction(SIGALRM, &sa, NULL); -} - -/* Remove SA_RESTART from the flags for SIGALRM, so that any system call that - * is interrupted by the periodic timer interrupt will return EINTR instead of - * continuing after the signal handler returns. - * - * time_disable_restart() and time_enable_restart() may be usefully wrapped - * around function calls that might otherwise block forever unless interrupted - * by a signal, e.g.: - * - * time_disable_restart(); - * fcntl(fd, F_SETLKW, &lock); - * time_enable_restart(); - */ -void -time_disable_restart(void) -{ - time_init(); - set_up_signal(0); -} - -/* Add SA_RESTART to the flags for SIGALRM, so that any system call that - * is interrupted by the periodic timer interrupt will continue after the - * signal handler returns instead of returning EINTR. */ -void -time_enable_restart(void) +time_init(void) { - time_init(); - set_up_signal(SA_RESTART); + static pthread_once_t once = PTHREAD_ONCE_INIT; + pthread_once(&once, do_init_time); } static void -set_up_timer(void) +time_timespec__(struct clock *c, struct timespec *ts) { - static timer_t timer_id; /* "static" to avoid apparent memory leak. */ - struct itimerspec itimer; - - if (!CACHE_TIME) { - return; - } - - if (timer_create(monotonic_clock, NULL, &timer_id)) { - VLOG_FATAL("timer_create failed (%s)", strerror(errno)); - } + bool slow_path; - itimer.it_interval.tv_sec = 0; - itimer.it_interval.tv_nsec = TIME_UPDATE_INTERVAL * 1000 * 1000; - itimer.it_value = itimer.it_interval; + time_init(); - if (timer_settime(timer_id, 0, &itimer, NULL)) { - VLOG_FATAL("timer_settime failed (%s)", strerror(errno)); + atomic_read_explicit(&c->slow_path, &slow_path, memory_order_relaxed); + if (!slow_path) { + xclock_gettime(c->id, ts); + } else { + struct timespec warp; + struct timespec cache; + bool stopped; + + ovs_mutex_lock(&c->mutex); + stopped = c->stopped; + warp = c->warp; + cache = c->cache; + ovs_mutex_unlock(&c->mutex); + + if (!stopped) { + xclock_gettime(c->id, &cache); + } + timespec_add(ts, &cache, &warp); } } -/* Set up the interval timer, to ensure that time advances even without calling - * time_refresh(). - * - * A child created with fork() does not inherit the parent's interval timer, so - * this function needs to be called from the child after fork(). */ +/* Stores a monotonic timer, accurate within TIME_UPDATE_INTERVAL ms, into + * '*ts'. */ void -time_postfork(void) +time_timespec(struct timespec *ts) { - time_init(); - set_up_timer(); + time_timespec__(&monotonic_clock, ts); } -static void -refresh_wall(void) +/* Stores the current time, accurate within TIME_UPDATE_INTERVAL ms, into + * '*ts'. */ +void +time_wall_timespec(struct timespec *ts) { - time_init(); - clock_gettime(CLOCK_REALTIME, &wall_time); - wall_tick = false; + time_timespec__(&wall_clock, ts); } -static void -refresh_monotonic(void) +static time_t +time_sec__(struct clock *c) { - time_init(); - - if (!time_stopped) { - if (monotonic_clock == CLOCK_MONOTONIC) { - clock_gettime(monotonic_clock, &monotonic_time); - } else { - refresh_wall_if_ticked(); - monotonic_time = wall_time; - } - timespec_add(&monotonic_time, &monotonic_time, &warp_offset); - - monotonic_tick = false; - } -} + struct timespec ts; -/* Forces a refresh of the current time from the kernel. It is not usually - * necessary to call this function, since the time will be refreshed - * automatically at least every TIME_UPDATE_INTERVAL milliseconds. If - * CACHE_TIME is false, we will always refresh the current time so this - * function has no effect. */ -void -time_refresh(void) -{ - wall_tick = monotonic_tick = true; + time_timespec__(c, &ts); + return ts.tv_sec; } /* Returns a monotonic timer, in seconds. */ time_t time_now(void) { - refresh_monotonic_if_ticked(); - return monotonic_time.tv_sec; + return time_sec__(&monotonic_clock); } /* Returns the current time, in seconds. */ time_t time_wall(void) { - refresh_wall_if_ticked(); - return wall_time.tv_sec; + return time_sec__(&wall_clock); +} + +static long long int +time_msec__(struct clock *c) +{ + struct timespec ts; + + time_timespec__(c, &ts); + return timespec_to_msec(&ts); } /* Returns a monotonic timer, in ms (within TIME_UPDATE_INTERVAL ms). */ long long int time_msec(void) { - refresh_monotonic_if_ticked(); - return timespec_to_msec(&monotonic_time); + return time_msec__(&monotonic_clock); } /* Returns the current time, in ms (within TIME_UPDATE_INTERVAL ms). */ long long int time_wall_msec(void) { - refresh_wall_if_ticked(); - return timespec_to_msec(&wall_time); -} - -/* Stores a monotonic timer, accurate within TIME_UPDATE_INTERVAL ms, into - * '*ts'. */ -void -time_timespec(struct timespec *ts) -{ - refresh_monotonic_if_ticked(); - *ts = monotonic_time; -} - -/* Stores the current time, accurate within TIME_UPDATE_INTERVAL ms, into - * '*ts'. */ -void -time_wall_timespec(struct timespec *ts) -{ - refresh_wall_if_ticked(); - *ts = wall_time; + return time_msec__(&wall_clock); } /* Configures the program to die with SIGALRM 'secs' seconds from now, if @@ -300,17 +237,12 @@ time_alarm(unsigned int secs) long long int now; long long int msecs; - sigset_t oldsigs; - + assert_single_threaded(); time_init(); - time_refresh(); now = time_msec(); - msecs = secs * 1000; - - block_sigalrm(&oldsigs); + msecs = secs * 1000LL; deadline = now < LLONG_MAX - msecs ? now + msecs : LLONG_MAX; - unblock_sigalrm(&oldsigs); } /* Like poll(), except: @@ -324,26 +256,26 @@ time_alarm(unsigned int secs) * timeout is reached. (Because of this property, this function will * never return -EINTR.) * - * - As a side effect, refreshes the current time (like time_refresh()). - * * Stores the number of milliseconds elapsed during poll in '*elapsed'. */ int -time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when, - int *elapsed) +time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE *handles OVS_UNUSED, + long long int timeout_when, int *elapsed) { - static long long int last_wakeup; + long long int *last_wakeup = last_wakeup_get(); long long int start; - sigset_t oldsigs; - bool blocked; - int retval; + bool quiescent; + int retval = 0; - time_refresh(); - log_poll_interval(last_wakeup); + time_init(); coverage_clear(); + coverage_run(); + if (*last_wakeup) { + log_poll_interval(*last_wakeup); + } start = time_msec(); - blocked = false; timeout_when = MIN(timeout_when, deadline); + quiescent = ovsrcu_is_quiescent(); for (;;) { long long int now = time_msec(); @@ -357,14 +289,45 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when, time_left = timeout_when - now; } + if (!quiescent) { + if (!time_left) { + ovsrcu_quiesce(); + } else { + ovsrcu_quiesce_start(); + } + } + +#ifndef _WIN32 retval = poll(pollfds, n_pollfds, time_left); if (retval < 0) { retval = -errno; } +#else + if (n_pollfds > MAXIMUM_WAIT_OBJECTS) { + VLOG_ERR("Cannot handle more than maximum wait objects\n"); + } else if (n_pollfds != 0) { + retval = WaitForMultipleObjects(n_pollfds, handles, FALSE, + time_left); + } + if (retval < 0) { + /* XXX This will be replace by a win error to errno + conversion function */ + retval = -WSAGetLastError(); + retval = -EINVAL; + } +#endif + + if (!quiescent && time_left) { + ovsrcu_quiesce_end(); + } - time_refresh(); if (deadline <= time_msec()) { +#ifndef _WIN32 fatal_signal_handler(SIGALRM); +#else + VLOG_ERR("wake up from WaitForMultipleObjects after deadline"); + fatal_signal_handler(SIGTERM); +#endif if (retval < 0) { retval = 0; } @@ -374,67 +337,13 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when, if (retval != -EINTR) { break; } - - if (!blocked && CACHE_TIME && !backtrace_conn) { - block_sigalrm(&oldsigs); - blocked = true; - } } - if (blocked) { - unblock_sigalrm(&oldsigs); - } - last_wakeup = time_msec(); + *last_wakeup = time_msec(); refresh_rusage(); - *elapsed = last_wakeup - start; + *elapsed = *last_wakeup - start; return retval; } -static void -sigalrm_handler(int sig_nr OVS_UNUSED) -{ - wall_tick = true; - monotonic_tick = true; - -#if HAVE_EXECINFO_H - if (backtrace_conn && n_traces < MAX_TRACES) { - struct trace *trace = &traces[n_traces++]; - trace->n_frames = backtrace(trace->backtrace, - ARRAY_SIZE(trace->backtrace)); - } -#endif -} - -static void -refresh_wall_if_ticked(void) -{ - if (!CACHE_TIME || wall_tick) { - refresh_wall(); - } -} - -static void -refresh_monotonic_if_ticked(void) -{ - if (!CACHE_TIME || monotonic_tick) { - refresh_monotonic(); - } -} - -static void -block_sigalrm(sigset_t *oldsigs) -{ - sigset_t sigalrm; - sigemptyset(&sigalrm); - sigaddset(&sigalrm, SIGALRM); - xsigprocmask(SIG_BLOCK, &sigalrm, oldsigs); -} - -static void -unblock_sigalrm(const sigset_t *oldsigs) -{ - xsigprocmask(SIG_SETMASK, oldsigs, NULL); -} - long long int timespec_to_msec(const struct timespec *ts) { @@ -448,7 +357,7 @@ timeval_to_msec(const struct timeval *tv) } /* Returns the monotonic time at which the "time" module was initialized, in - * milliseconds(). */ + * milliseconds. */ long long int time_boot_msec(void) { @@ -456,11 +365,91 @@ time_boot_msec(void) return boot_time; } +#ifdef _WIN32 +static ULARGE_INTEGER +xgetfiletime(void) +{ + ULARGE_INTEGER current_time; + FILETIME current_time_ft; + + /* Returns current time in UTC as a 64-bit value representing the number + * of 100-nanosecond intervals since January 1, 1601 . */ + GetSystemTimePreciseAsFileTime(¤t_time_ft); + current_time.LowPart = current_time_ft.dwLowDateTime; + current_time.HighPart = current_time_ft.dwHighDateTime; + + return current_time; +} + +static int +clock_gettime(clock_t id, struct timespec *ts) +{ + if (id == CLOCK_MONOTONIC) { + static LARGE_INTEGER freq; + LARGE_INTEGER count; + long long int ns; + + if (!freq.QuadPart) { + /* Number of counts per second. */ + QueryPerformanceFrequency(&freq); + } + /* Total number of counts from a starting point. */ + QueryPerformanceCounter(&count); + + /* Total nano seconds from a starting point. */ + ns = (double) count.QuadPart / freq.QuadPart * 1000000000; + + ts->tv_sec = count.QuadPart / freq.QuadPart; + ts->tv_nsec = ns % 1000000000; + } else if (id == CLOCK_REALTIME) { + ULARGE_INTEGER current_time = xgetfiletime(); + + /* Time from Epoch to now. */ + ts->tv_sec = (current_time.QuadPart - unix_epoch.QuadPart) / 10000000; + ts->tv_nsec = ((current_time.QuadPart - unix_epoch.QuadPart) % + 10000000) * 100; + } else { + return -1; + } +} +#endif /* _WIN32 */ + void xgettimeofday(struct timeval *tv) { +#ifndef _WIN32 if (gettimeofday(tv, NULL) == -1) { - VLOG_FATAL("gettimeofday failed (%s)", strerror(errno)); + VLOG_FATAL("gettimeofday failed (%s)", ovs_strerror(errno)); + } +#else + ULARGE_INTEGER current_time = xgetfiletime(); + + tv->tv_sec = (current_time.QuadPart - unix_epoch.QuadPart) / 10000000; + tv->tv_usec = ((current_time.QuadPart - unix_epoch.QuadPart) % + 10000000) / 10; +#endif +} + +void +xclock_gettime(clock_t id, struct timespec *ts) +{ + if (clock_gettime(id, ts) == -1) { + /* It seems like a bad idea to try to use vlog here because it is + * likely to try to check the current time. */ + ovs_abort(errno, "xclock_gettime() failed"); + } +} + +/* Makes threads wait on timewarp_seq and be waken up when time is warped. + * This function will be no-op unless timeval_dummy_register() is called. */ +void +timewarp_wait(void) +{ + if (timewarp_enabled) { + uint64_t *last_seq = last_seq_get(); + + *last_seq = seq_read(timewarp_seq); + seq_wait(timewarp_seq, *last_seq); } } @@ -487,69 +476,55 @@ timespec_add(struct timespec *sum, *sum = tmp; } -static void -log_poll_interval(long long int last_wakeup) +static bool +is_warped(const struct clock *c) { - static unsigned int mean_interval; /* In 16ths of a millisecond. */ - static unsigned int n_samples; + bool warped; - long long int now; - unsigned int interval; /* In 16ths of a millisecond. */ + ovs_mutex_lock(&c->mutex); + warped = monotonic_clock.warp.tv_sec || monotonic_clock.warp.tv_nsec; + ovs_mutex_unlock(&c->mutex); - /* Compute interval from last wakeup to now in 16ths of a millisecond, - * capped at 10 seconds (16000 in this unit). */ - now = time_msec(); - interval = MIN(10000, now - last_wakeup) << 4; - - /* Warn if we took too much time between polls: at least 50 ms and at least - * 8X the mean interval. */ - if (n_samples > 10 && interval > mean_interval * 8 && interval > 50 * 16) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 3); - - if (!VLOG_DROP_WARN(&rl)) { - const struct rusage *last_rusage = get_recent_rusage(); - struct rusage rusage; - - getrusage(RUSAGE_SELF, &rusage); - VLOG_WARN("%lld ms poll interval (%lld ms user, %lld ms system) " - "is over %u times the weighted mean interval %u ms " - "(%u samples)", - now - last_wakeup, - timeval_diff_msec(&rusage.ru_utime, - &last_rusage->ru_utime), - timeval_diff_msec(&rusage.ru_stime, - &last_rusage->ru_stime), - interval / mean_interval, - (mean_interval + 8) / 16, n_samples); - if (rusage.ru_minflt > last_rusage->ru_minflt - || rusage.ru_majflt > last_rusage->ru_majflt) { - VLOG_WARN("faults: %ld minor, %ld major", - rusage.ru_minflt - last_rusage->ru_minflt, - rusage.ru_majflt - last_rusage->ru_majflt); - } - if (rusage.ru_inblock > last_rusage->ru_inblock - || rusage.ru_oublock > last_rusage->ru_oublock) { - VLOG_WARN("disk: %ld reads, %ld writes", - rusage.ru_inblock - last_rusage->ru_inblock, - rusage.ru_oublock - last_rusage->ru_oublock); - } - if (rusage.ru_nvcsw > last_rusage->ru_nvcsw - || rusage.ru_nivcsw > last_rusage->ru_nivcsw) { - VLOG_WARN("context switches: %ld voluntary, %ld involuntary", - rusage.ru_nvcsw - last_rusage->ru_nvcsw, - rusage.ru_nivcsw - last_rusage->ru_nivcsw); - } + return warped; +} + +static void +log_poll_interval(long long int last_wakeup) +{ + long long int interval = time_msec() - last_wakeup; + + if (interval >= 1000 && !is_warped(&monotonic_clock)) { + const struct rusage *last_rusage = get_recent_rusage(); + struct rusage rusage; + + getrusage(RUSAGE_SELF, &rusage); + VLOG_WARN("Unreasonably long %lldms poll interval" + " (%lldms user, %lldms system)", + interval, + timeval_diff_msec(&rusage.ru_utime, + &last_rusage->ru_utime), + timeval_diff_msec(&rusage.ru_stime, + &last_rusage->ru_stime)); + if (rusage.ru_minflt > last_rusage->ru_minflt + || rusage.ru_majflt > last_rusage->ru_majflt) { + VLOG_WARN("faults: %ld minor, %ld major", + rusage.ru_minflt - last_rusage->ru_minflt, + rusage.ru_majflt - last_rusage->ru_majflt); + } + if (rusage.ru_inblock > last_rusage->ru_inblock + || rusage.ru_oublock > last_rusage->ru_oublock) { + VLOG_WARN("disk: %ld reads, %ld writes", + rusage.ru_inblock - last_rusage->ru_inblock, + rusage.ru_oublock - last_rusage->ru_oublock); + } + if (rusage.ru_nvcsw > last_rusage->ru_nvcsw + || rusage.ru_nivcsw > last_rusage->ru_nivcsw) { + VLOG_WARN("context switches: %ld voluntary, %ld involuntary", + rusage.ru_nvcsw - last_rusage->ru_nvcsw, + rusage.ru_nivcsw - last_rusage->ru_nivcsw); } coverage_log(); } - - /* Update exponentially weighted moving average. With these parameters, a - * given value decays to 1% of its value in about 100 time steps. */ - if (n_samples++) { - mean_interval = (mean_interval * 122 + interval * 6 + 64) / 128; - } else { - mean_interval = interval; - } } /* CPU usage tracking. */ @@ -559,37 +534,66 @@ struct cpu_usage { unsigned long long int cpu; /* Total user+system CPU usage when sampled. */ }; -static struct rusage recent_rusage; -static struct cpu_usage older = { LLONG_MIN, 0 }; -static struct cpu_usage newer = { LLONG_MIN, 0 }; -static int cpu_usage = -1; +struct cpu_tracker { + struct cpu_usage older; + struct cpu_usage newer; + int cpu_usage; + + struct rusage recent_rusage; +}; +DEFINE_PER_THREAD_MALLOCED_DATA(struct cpu_tracker *, cpu_tracker_var); + +static struct cpu_tracker * +get_cpu_tracker(void) +{ + struct cpu_tracker *t = cpu_tracker_var_get(); + if (!t) { + t = xzalloc(sizeof *t); + t->older.when = LLONG_MIN; + t->newer.when = LLONG_MIN; + cpu_tracker_var_set_unsafe(t); + } + return t; +} static struct rusage * get_recent_rusage(void) { - return &recent_rusage; + return &get_cpu_tracker()->recent_rusage; +} + +static int +getrusage_thread(struct rusage *rusage OVS_UNUSED) +{ +#ifdef RUSAGE_THREAD + return getrusage(RUSAGE_THREAD, rusage); +#else + errno = EINVAL; + return -1; +#endif } static void refresh_rusage(void) { - long long int now; + struct cpu_tracker *t = get_cpu_tracker(); + struct rusage *recent_rusage = &t->recent_rusage; - now = time_msec(); - getrusage(RUSAGE_SELF, &recent_rusage); - - if (now >= newer.when + 3 * 1000) { - older = newer; - newer.when = now; - newer.cpu = (timeval_to_msec(&recent_rusage.ru_utime) + - timeval_to_msec(&recent_rusage.ru_stime)); - - if (older.when != LLONG_MIN && newer.cpu > older.cpu) { - unsigned int dividend = newer.cpu - older.cpu; - unsigned int divisor = (newer.when - older.when) / 100; - cpu_usage = divisor > 0 ? dividend / divisor : -1; - } else { - cpu_usage = -1; + if (!getrusage_thread(recent_rusage)) { + long long int now = time_msec(); + if (now >= t->newer.when + 3 * 1000) { + t->older = t->newer; + t->newer.when = now; + t->newer.cpu = (timeval_to_msec(&recent_rusage->ru_utime) + + timeval_to_msec(&recent_rusage->ru_stime)); + + if (t->older.when != LLONG_MIN && t->newer.cpu > t->older.cpu) { + unsigned int dividend = t->newer.cpu - t->older.cpu; + unsigned int divisor = (t->newer.when - t->older.when) / 100; + t->cpu_usage = divisor > 0 ? dividend / divisor : -1; + } else { + t->cpu_usage = -1; + } } } } @@ -601,48 +605,7 @@ refresh_rusage(void) int get_cpu_usage(void) { - return cpu_usage; -} - -static void -trace_run(void) -{ -#if HAVE_EXECINFO_H - if (backtrace_conn && n_traces >= MAX_TRACES) { - struct unixctl_conn *reply_conn = backtrace_conn; - struct ds ds = DS_EMPTY_INITIALIZER; - sigset_t oldsigs; - size_t i; - - block_sigalrm(&oldsigs); - - for (i = 0; i < n_traces; i++) { - struct trace *trace = &traces[i]; - char **frame_strs; - size_t j; - - frame_strs = backtrace_symbols(trace->backtrace, trace->n_frames); - - ds_put_format(&ds, "Backtrace %zu\n", i + 1); - for (j = 0; j < trace->n_frames; j++) { - ds_put_format(&ds, "%s\n", frame_strs[j]); - } - ds_put_cstr(&ds, "\n"); - - free(frame_strs); - } - - free(traces); - traces = NULL; - n_traces = 0; - backtrace_conn = NULL; - - unblock_sigalrm(&oldsigs); - - unixctl_command_reply(reply_conn, ds_cstr(&ds)); - ds_destroy(&ds); - } -#endif + return get_cpu_tracker()->cpu_usage; } /* Unixctl interface. */ @@ -654,7 +617,12 @@ timeval_stop_cb(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { - time_stopped = true; + ovs_mutex_lock(&monotonic_clock.mutex); + atomic_store(&monotonic_clock.slow_path, true); + monotonic_clock.stopped = true; + xclock_gettime(monotonic_clock.id, &monotonic_clock.cache); + ovs_mutex_unlock(&monotonic_clock.mutex); + unixctl_command_reply(conn, NULL); } @@ -678,37 +646,72 @@ timeval_warp_cb(struct unixctl_conn *conn, ts.tv_sec = msecs / 1000; ts.tv_nsec = (msecs % 1000) * 1000 * 1000; - timespec_add(&warp_offset, &warp_offset, &ts); - timespec_add(&monotonic_time, &monotonic_time, &ts); + + ovs_mutex_lock(&monotonic_clock.mutex); + atomic_store(&monotonic_clock.slow_path, true); + timespec_add(&monotonic_clock.warp, &monotonic_clock.warp, &ts); + ovs_mutex_unlock(&monotonic_clock.mutex); + seq_change(timewarp_seq); + /* give threads (eg. monitor) some chances to run */ +#ifndef _WIN32 + poll(NULL, 0, 10); +#else + Sleep(10); +#endif unixctl_command_reply(conn, "warped"); } -static void -backtrace_cb(struct unixctl_conn *conn, - int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, - void *aux OVS_UNUSED) +void +timeval_dummy_register(void) { - sigset_t oldsigs; + timewarp_enabled = true; + unixctl_command_register("time/stop", "", 0, 0, timeval_stop_cb, NULL); + unixctl_command_register("time/warp", "MSECS", 1, 1, + timeval_warp_cb, NULL); +} - assert(HAVE_EXECINFO_H && CACHE_TIME); - if (backtrace_conn) { - unixctl_command_reply_error(conn, "In Use"); - return; + +/* strftime() with an extension for high-resolution timestamps. Any '#'s in + * 'format' will be replaced by subseconds, e.g. use "%S.###" to obtain results + * like "01.123". */ +size_t +strftime_msec(char *s, size_t max, const char *format, + const struct tm_msec *tm) +{ + size_t n; + + n = strftime(s, max, format, &tm->tm); + if (n) { + char decimals[4]; + char *p; + + sprintf(decimals, "%03d", tm->msec); + for (p = strchr(s, '#'); p; p = strchr(p, '#')) { + char *d = decimals; + while (*p == '#') { + *p++ = *d ? *d++ : '0'; + } + } } - assert(!traces); - block_sigalrm(&oldsigs); - backtrace_conn = conn; - traces = xmalloc(MAX_TRACES * sizeof *traces); - n_traces = 0; - unblock_sigalrm(&oldsigs); + return n; } -void -timeval_dummy_register(void) +struct tm_msec * +localtime_msec(long long int now, struct tm_msec *result) { - unixctl_command_register("time/stop", "", 0, 0, timeval_stop_cb, NULL); - unixctl_command_register("time/warp", "MSECS", 1, 1, - timeval_warp_cb, NULL); + time_t now_sec = now / 1000; + localtime_r(&now_sec, &result->tm); + result->msec = now % 1000; + return result; +} + +struct tm_msec * +gmtime_msec(long long int now, struct tm_msec *result) +{ + time_t now_sec = now / 1000; + gmtime_r(&now_sec, &result->tm); + result->msec = now % 1000; + return result; }