X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Ftimer.c;h=9e7985cb24ed0f3345b17149fefad1b058004aa6;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=f7d42e8ec28d8f32b369cfd0257e43cc2179dc18;hpb=9bf4aaab3e101692164d49b7ca357651eb691cb6;p=linux-2.6.git diff --git a/kernel/timer.c b/kernel/timer.c index f7d42e8ec..9e7985cb2 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include @@ -39,6 +39,13 @@ #include #include #include +#include + +#ifdef CONFIG_TIME_INTERPOLATION +static void time_interpolator_update(long delta_nsec); +#else +#define time_interpolator_update(x) +#endif /* * per-CPU timer vector definitions: @@ -236,6 +243,7 @@ void add_timer_on(struct timer_list *timer, int cpu) spin_unlock_irqrestore(&base->lock, flags); } + /*** * mod_timer - modify a timer's timeout * @timer: the timer to be modified @@ -302,6 +310,8 @@ repeat: goto repeat; } list_del(&timer->entry); + /* Need to make sure that anybody who sees a NULL base also sees the list ops */ + smp_wmb(); timer->base = NULL; spin_unlock_irqrestore(&base->lock, flags); @@ -457,7 +467,14 @@ repeat: smp_wmb(); timer->base = NULL; spin_unlock_irq(&base->lock); - fn(data); + { + u32 preempt_count = preempt_count(); + fn(data); + if (preempt_count != preempt_count()) { + printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count()); + BUG(); + } + } spin_lock_irq(&base->lock); goto repeat; } @@ -546,7 +563,7 @@ unsigned long tick_nsec = TICK_NSEC; /* ACTHZ period (nsec) */ /* * The current time * wall_to_monotonic is what we need to add to xtime (or xtime corrected - * for sub jiffie times) to get to monotonic time. Monotonic is pegged at zero + * for sub jiffie times) to get to monotonic time. Monotonic is pegged * at zero at system boot time, so wall_to_monotonic will be negative, * however, we will ALWAYS keep the tv_nsec part positive so we can use * the usual normalization. @@ -622,6 +639,9 @@ static void second_overflow(void) if (xtime.tv_sec % 86400 == 0) { xtime.tv_sec--; wall_to_monotonic.tv_sec++; + /* The timer interpolator will make time change gradually instead + * of an immediate jump by one second. + */ time_interpolator_update(-NSEC_PER_SEC); time_state = TIME_OOP; clock_was_set(); @@ -633,6 +653,7 @@ static void second_overflow(void) if ((xtime.tv_sec + 1) % 86400 == 0) { xtime.tv_sec++; wall_to_monotonic.tv_sec--; + /* Use of time interpolator for a gradual change of time */ time_interpolator_update(NSEC_PER_SEC); time_state = TIME_WAIT; clock_was_set(); @@ -665,7 +686,11 @@ static void second_overflow(void) if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; time_offset += ltemp; + #if SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE > 0 time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + #else + time_adj = -ltemp >> (SHIFT_HZ + SHIFT_UPDATE - SHIFT_SCALE); + #endif } else { ltemp = time_offset; if (!(time_status & STA_FLL)) @@ -673,7 +698,11 @@ static void second_overflow(void) if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; time_offset -= ltemp; + #if SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE > 0 time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + #else + time_adj = ltemp >> (SHIFT_HZ + SHIFT_UPDATE - SHIFT_SCALE); + #endif } /* @@ -779,67 +808,14 @@ static void update_wall_time(unsigned long ticks) do { ticks--; update_wall_time_one_tick(); - } while (ticks); - - if (xtime.tv_nsec >= 1000000000) { - xtime.tv_nsec -= 1000000000; - xtime.tv_sec++; - second_overflow(); - } -} - -static inline void do_process_times(struct task_struct *p, - unsigned long user, unsigned long system) -{ - unsigned long psecs; - - psecs = (p->utime += user); - psecs += (p->stime += system); - if (psecs / HZ >= p->rlim[RLIMIT_CPU].rlim_cur) { - /* Send SIGXCPU every second.. */ - if (!(psecs % HZ)) - send_sig(SIGXCPU, p, 1); - /* and SIGKILL when we go over max.. */ - if (psecs / HZ >= p->rlim[RLIMIT_CPU].rlim_max) - send_sig(SIGKILL, p, 1); - } -} - -static inline void do_it_virt(struct task_struct * p, unsigned long ticks) -{ - unsigned long it_virt = p->it_virt_value; - - if (it_virt) { - it_virt -= ticks; - if (!it_virt) { - it_virt = p->it_virt_incr; - send_sig(SIGVTALRM, p, 1); - } - p->it_virt_value = it_virt; - } -} - -static inline void do_it_prof(struct task_struct *p) -{ - unsigned long it_prof = p->it_prof_value; - - if (it_prof) { - if (--it_prof == 0) { - it_prof = p->it_prof_incr; - send_sig(SIGPROF, p, 1); + if (xtime.tv_nsec >= 1000000000) { + xtime.tv_nsec -= 1000000000; + xtime.tv_sec++; + second_overflow(); } - p->it_prof_value = it_prof; - } + } while (ticks); } -static void update_one_process(struct task_struct *p, unsigned long user, - unsigned long system, int cpu) -{ - do_process_times(p, user, system); - do_it_virt(p, user); - do_it_prof(p); -} - /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. @@ -847,11 +823,17 @@ static void update_one_process(struct task_struct *p, unsigned long user, void update_process_times(int user_tick) { struct task_struct *p = current; - int cpu = smp_processor_id(), system = user_tick ^ 1; + int cpu = smp_processor_id(); - update_one_process(p, user_tick, system, cpu); + /* Note: this timer irq context must be accounted for as well. */ + if (user_tick) + account_user_time(p, jiffies_to_cputime(1)); + else + account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); run_local_timers(); - scheduler_tick(user_tick, system); + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_tick); + scheduler_tick(); } /* @@ -948,11 +930,6 @@ static inline void update_times(void) void do_timer(struct pt_regs *regs) { jiffies_64++; -#ifndef CONFIG_SMP - /* SMP process accounting uses the local APIC timer */ - - update_process_times(user_mode(regs)); -#endif update_times(); } @@ -999,7 +976,7 @@ asmlinkage unsigned long sys_alarm(unsigned int seconds) */ asmlinkage long sys_getpid(void) { - return vx_map_tgid(current->vx_info, current->tgid); + return vx_map_tgid(current->tgid); } /* @@ -1043,7 +1020,7 @@ asmlinkage long sys_getppid(void) #endif break; } - return vx_map_tgid(current->vx_info, pid); + return vx_map_pid(pid); } asmlinkage long sys_getuid(void) @@ -1244,8 +1221,7 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) * too. */ - do_gettimeofday((struct timeval *)&tp); - tp.tv_nsec *= NSEC_PER_USEC; + getnstimeofday(&tp); tp.tv_sec += wall_to_monotonic.tv_sec; tp.tv_nsec += wall_to_monotonic.tv_nsec; if (tp.tv_nsec - NSEC_PER_SEC >= 0) { @@ -1434,14 +1410,119 @@ void __init init_timers(void) } #ifdef CONFIG_TIME_INTERPOLATION -volatile unsigned long last_nsec_offset; -#ifndef __HAVE_ARCH_CMPXCHG -spinlock_t last_nsec_offset_lock = SPIN_LOCK_UNLOCKED; -#endif struct time_interpolator *time_interpolator; static struct time_interpolator *time_interpolator_list; -static spinlock_t time_interpolator_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(time_interpolator_lock); + +static inline u64 time_interpolator_get_cycles(unsigned int src) +{ + unsigned long (*x)(void); + + switch (src) + { + case TIME_SOURCE_FUNCTION: + x = time_interpolator->addr; + return x(); + + case TIME_SOURCE_MMIO64 : + return readq((void __iomem *) time_interpolator->addr); + + case TIME_SOURCE_MMIO32 : + return readl((void __iomem *) time_interpolator->addr); + + default: return get_cycles(); + } +} + +static inline u64 time_interpolator_get_counter(void) +{ + unsigned int src = time_interpolator->source; + + if (time_interpolator->jitter) + { + u64 lcycle; + u64 now; + + do { + lcycle = time_interpolator->last_cycle; + now = time_interpolator_get_cycles(src); + if (lcycle && time_after(lcycle, now)) + return lcycle; + /* Keep track of the last timer value returned. The use of cmpxchg here + * will cause contention in an SMP environment. + */ + } while (unlikely(cmpxchg(&time_interpolator->last_cycle, lcycle, now) != lcycle)); + return now; + } + else + return time_interpolator_get_cycles(src); +} + +void time_interpolator_reset(void) +{ + time_interpolator->offset = 0; + time_interpolator->last_counter = time_interpolator_get_counter(); +} + +#define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) + +unsigned long time_interpolator_get_offset(void) +{ + /* If we do not have a time interpolator set up then just return zero */ + if (!time_interpolator) + return 0; + + return time_interpolator->offset + + GET_TI_NSECS(time_interpolator_get_counter(), time_interpolator); +} + +#define INTERPOLATOR_ADJUST 65536 +#define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST + +static void time_interpolator_update(long delta_nsec) +{ + u64 counter; + unsigned long offset; + + /* If there is no time interpolator set up then do nothing */ + if (!time_interpolator) + return; + + /* The interpolator compensates for late ticks by accumulating + * the late time in time_interpolator->offset. A tick earlier than + * expected will lead to a reset of the offset and a corresponding + * jump of the clock forward. Again this only works if the + * interpolator clock is running slightly slower than the regular clock + * and the tuning logic insures that. + */ + + counter = time_interpolator_get_counter(); + offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator); + + if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) + time_interpolator->offset = offset - delta_nsec; + else { + time_interpolator->skips++; + time_interpolator->ns_skipped += delta_nsec - offset; + time_interpolator->offset = 0; + } + time_interpolator->last_counter = counter; + + /* Tuning logic for time interpolator invoked every minute or so. + * Decrease interpolator clock speed if no skips occurred and an offset is carried. + * Increase interpolator clock speed if we skip too much time. + */ + if (jiffies % INTERPOLATOR_ADJUST == 0) + { + if (time_interpolator->skips == 0 && time_interpolator->offset > TICK_NSEC) + time_interpolator->nsec_per_cyc--; + if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0) + time_interpolator->nsec_per_cyc++; + time_interpolator->skips = 0; + time_interpolator->ns_skipped = 0; + } +} static inline int is_better_time_interpolator(struct time_interpolator *new) @@ -1455,11 +1536,20 @@ is_better_time_interpolator(struct time_interpolator *new) void register_time_interpolator(struct time_interpolator *ti) { + unsigned long flags; + + /* Sanity check */ + if (ti->frequency == 0 || ti->mask == 0) + BUG(); + + ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency; spin_lock(&time_interpolator_lock); - write_seqlock_irq(&xtime_lock); - if (is_better_time_interpolator(ti)) + write_seqlock_irqsave(&xtime_lock, flags); + if (is_better_time_interpolator(ti)) { time_interpolator = ti; - write_sequnlock_irq(&xtime_lock); + time_interpolator_reset(); + } + write_sequnlock_irqrestore(&xtime_lock, flags); ti->next = time_interpolator_list; time_interpolator_list = ti; @@ -1470,6 +1560,7 @@ void unregister_time_interpolator(struct time_interpolator *ti) { struct time_interpolator *curr, **prev; + unsigned long flags; spin_lock(&time_interpolator_lock); prev = &time_interpolator_list; @@ -1481,7 +1572,7 @@ unregister_time_interpolator(struct time_interpolator *ti) prev = &curr->next; } - write_seqlock_irq(&xtime_lock); + write_seqlock_irqsave(&xtime_lock, flags); if (ti == time_interpolator) { /* we lost the best time-interpolator: */ time_interpolator = NULL; @@ -1489,8 +1580,9 @@ unregister_time_interpolator(struct time_interpolator *ti) for (curr = time_interpolator_list; curr; curr = curr->next) if (is_better_time_interpolator(curr)) time_interpolator = curr; + time_interpolator_reset(); } - write_sequnlock_irq(&xtime_lock); + write_sequnlock_irqrestore(&xtime_lock, flags); spin_unlock(&time_interpolator_lock); } #endif /* CONFIG_TIME_INTERPOLATION */ @@ -1501,7 +1593,7 @@ unregister_time_interpolator(struct time_interpolator *ti) */ void msleep(unsigned int msecs) { - unsigned long timeout = msecs_to_jiffies(msecs); + unsigned long timeout = msecs_to_jiffies(msecs) + 1; while (timeout) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -1511,3 +1603,19 @@ void msleep(unsigned int msecs) EXPORT_SYMBOL(msleep); +/** + * msleep_interruptible - sleep waiting for waitqueue interruptions + * @msecs: Time in milliseconds to sleep for + */ +unsigned long msleep_interruptible(unsigned int msecs) +{ + unsigned long timeout = msecs_to_jiffies(msecs) + 1; + + while (timeout && !signal_pending(current)) { + set_current_state(TASK_INTERRUPTIBLE); + timeout = schedule_timeout(timeout); + } + return jiffies_to_msecs(timeout); +} + +EXPORT_SYMBOL(msleep_interruptible);