X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fposix-timers.c;h=4d5f8c9b5478d25d2aa5276d8a3f716c31e10506;hb=refs%2Fheads%2Fvserver;hp=f846b77a205e868f4f3d4ea8ebc23b0247230117;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index f846b77a2..4d5f8c9b5 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -1,5 +1,5 @@ /* - * linux/kernel/posix_timers.c + * linux/kernel/posix-timers.c * * * 2002-10-15 Posix Clocks & timers @@ -7,6 +7,9 @@ * * Copyright (C) 2002 2003 by MontaVista Software. * + * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug. + * Copyright (C) 2004 Boris Hu + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or (at @@ -32,6 +35,7 @@ #include #include #include +#include #include #include @@ -40,23 +44,12 @@ #include #include #include +#include #include +#include +#include +#include -#ifndef div_long_long_rem -#include - -#define div_long_long_rem(dividend,divisor,remainder) ({ \ - u64 result = dividend; \ - *remainder = do_div(result,divisor); \ - result; }) - -#endif -#define CLOCK_REALTIME_RES TICK_NSEC /* In nano seconds. */ - -static inline u64 mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2) -{ - return (u64)mpy1 * mpy2; -} /* * Management arrays for POSIX timers. Timers are kept in slab memory * Timer ids are allocated by an external routine that keeps track of the @@ -78,28 +71,10 @@ static inline u64 mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2) /* * Lets keep our timers in a slab cache :-) */ -static kmem_cache_t *posix_timers_cache; +static struct kmem_cache *posix_timers_cache; static struct idr posix_timers_id; -static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(idr_lock); -/* - * Just because the timer is not in the timer list does NOT mean it is - * inactive. It could be in the "fire" routine getting a new expire time. - */ -#define TIMER_INACTIVE 1 -#define TIMER_RETRY 1 - -#ifdef CONFIG_SMP -# define timer_active(tmr) \ - ((tmr)->it_timer.entry.prev != (void *)TIMER_INACTIVE) -# define set_timer_inactive(tmr) \ - do { \ - (tmr)->it_timer.entry.prev = (void *)TIMER_INACTIVE; \ - } while (0) -#else -# define timer_active(tmr) BARFY // error to use outside of SMP -# define set_timer_inactive(tmr) do { } while (0) -#endif /* * we assume that the new SIGEV_THREAD_ID shares no bits with the other * SIGEV values. Here we put out an error if this assumption fails. @@ -110,7 +85,6 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; #endif -#define REQUEUE_PENDING 1 /* * The timer ID is turned into a timer address by idr_find(). * Verifying a valid ID consists of: @@ -129,18 +103,10 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; * resolution. Here we define the standard CLOCK_REALTIME as a * 1/HZ resolution clock. * - * CPUTIME & THREAD_CPUTIME: We are not, at this time, definding these - * two clocks (and the other process related clocks (Std - * 1003.1d-1999). The way these should be supported, we think, - * is to use large negative numbers for the two clocks that are - * pinned to the executing process and to use -pid for clocks - * pinned to particular pids. Calls which supported these clock - * ids would split early in the function. - * * RESOLUTION: Clock resolution is used to round up timer and interval * times, NOT to report clock times, which are reported with as * much resolution as the system can muster. In some cases this - * resolution may depend on the underlaying clock hardware and + * resolution may depend on the underlying clock hardware and * may not be quantifiable until run time, and only then is the * necessary code is written. The standard says we should say * something about this issue in the documentation... @@ -158,7 +124,7 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; * * At this time all functions EXCEPT clock_nanosleep can be * redirected by the CLOCKS structure. Clock_nanosleep is in - * there, but the code ignors it. + * there, but the code ignores it. * * Permissions: It is assumed that the clock_settime() function defined * for each clock will take care of permission checks. Some @@ -170,24 +136,18 @@ static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED; static struct k_clock posix_clocks[MAX_CLOCKS]; -#define if_clock_do(clock_fun,alt_fun,parms) \ - (!clock_fun) ? alt_fun parms : clock_fun parms - -#define p_timer_get(clock,a,b) \ - if_clock_do((clock)->timer_get,do_timer_gettime, (a,b)) - -#define p_nsleep(clock,a,b,c) \ - if_clock_do((clock)->nsleep, do_nsleep, (a,b,c)) +/* + * These ones are defined below. + */ +static int common_nsleep(const clockid_t, int flags, struct timespec *t, + struct timespec __user *rmtp); +static void common_timer_get(struct k_itimer *, struct itimerspec *); +static int common_timer_set(struct k_itimer *, int, + struct itimerspec *, struct itimerspec *); +static int common_timer_del(struct k_itimer *timer); -#define p_timer_del(clock,a) \ - if_clock_do((clock)->timer_del, do_timer_delete, (a)) +static int posix_timer_fn(struct hrtimer *data); -void register_posix_clock(int clock_id, struct k_clock *new_clock); -static int do_posix_gettime(struct k_clock *clock, struct timespec *tp); -static u64 do_posix_clock_monotonic_gettime_parts( - struct timespec *tp, struct timespec *mo); -int do_posix_clock_monotonic_gettime(struct timespec *tp); -int do_posix_clock_monotonic_settime(struct timespec *tp); static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) @@ -195,67 +155,115 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) spin_unlock_irqrestore(&timr->it_lock, flags); } +/* + * Call the k_clock hook function if non-null, or the default function. + */ +#define CLOCK_DISPATCH(clock, call, arglist) \ + ((clock) < 0 ? posix_cpu_##call arglist : \ + (posix_clocks[clock].call != NULL \ + ? (*posix_clocks[clock].call) arglist : common_##call arglist)) + +/* + * Default clock hook functions when the struct k_clock passed + * to register_posix_clock leaves a function pointer null. + * + * The function common_CALL is the default implementation for + * the function pointer CALL in struct k_clock. + */ + +static inline int common_clock_getres(const clockid_t which_clock, + struct timespec *tp) +{ + tp->tv_sec = 0; + tp->tv_nsec = posix_clocks[which_clock].res; + return 0; +} + +/* + * Get real time for posix timers + */ +static int common_clock_get(clockid_t which_clock, struct timespec *tp) +{ + ktime_get_real_ts(tp); + return 0; +} + +static inline int common_clock_set(const clockid_t which_clock, + struct timespec *tp) +{ + return do_sys_settimeofday(tp, NULL); +} + +static int common_timer_create(struct k_itimer *new_timer) +{ + hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); + return 0; +} + +/* + * Return nonzero if we know a priori this clockid_t value is bogus. + */ +static inline int invalid_clockid(const clockid_t which_clock) +{ + if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */ + return 0; + if ((unsigned) which_clock >= MAX_CLOCKS) + return 1; + if (posix_clocks[which_clock].clock_getres != NULL) + return 0; + if (posix_clocks[which_clock].res != 0) + return 0; + return 1; +} + +/* + * Get monotonic time for posix timers + */ +static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp) +{ + ktime_get_ts(tp); + return 0; +} + /* * Initialize everything, well, just everything in Posix clocks/timers ;) */ static __init int init_posix_timers(void) { - struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES }; - struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES, - .clock_get = do_posix_clock_monotonic_gettime, - .clock_set = do_posix_clock_monotonic_settime + struct k_clock clock_realtime = { + .clock_getres = hrtimer_get_res, + }; + struct k_clock clock_monotonic = { + .clock_getres = hrtimer_get_res, + .clock_get = posix_ktime_get_ts, + .clock_set = do_posix_clock_nosettime, }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); posix_timers_cache = kmem_cache_create("posix_timers_cache", - sizeof (struct k_itimer), 0, 0, 0, 0); + sizeof (struct k_itimer), 0, 0, NULL, NULL); idr_init(&posix_timers_id); - return 0; } __initcall(init_posix_timers); -static void tstojiffie(struct timespec *tp, int res, u64 *jiff) -{ - long sec = tp->tv_sec; - long nsec = tp->tv_nsec + res - 1; - - if (nsec > NSEC_PER_SEC) { - sec++; - nsec -= NSEC_PER_SEC; - } - - /* - * The scaling constants are defined in - * The difference between there and here is that we do the - * res rounding and compute a 64-bit result (well so does that - * but it then throws away the high bits). - */ - *jiff = (mpy_l_X_l_ll(sec, SEC_CONVERSION) + - (mpy_l_X_l_ll(nsec, NSEC_CONVERSION) >> - (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; -} - static void schedule_next_timer(struct k_itimer *timr) { - struct now_struct now; + struct hrtimer *timer = &timr->it.real.timer; - /* Set up the timer for the next interval (if there is one) */ - if (!timr->it_incr) + if (timr->it.real.interval.tv64 == 0) return; - posix_get_now(&now); - do { - posix_bump_timer(timr); - }while (posix_time_before(&timr->it_timer, &now)); + timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), + timr->it.real.interval); timr->it_overrun_last = timr->it_overrun; timr->it_overrun = -1; ++timr->it_requeue_pending; - add_timer(&timr->it_timer); + hrtimer_restart(timer); } /* @@ -276,99 +284,104 @@ void do_schedule_next_timer(struct siginfo *info) timr = lock_timer(info->si_tid, &flags); - if (!timr || timr->it_requeue_pending != info->si_sys_private) - goto exit; + if (timr && timr->it_requeue_pending == info->si_sys_private) { + if (timr->it_clock < 0) + posix_cpu_timer_schedule(timr); + else + schedule_next_timer(timr); + + info->si_overrun = timr->it_overrun_last; + } - schedule_next_timer(timr); - info->si_overrun = timr->it_overrun_last; -exit: if (timr) unlock_timer(timr, flags); } -/* - * Notify the task and set up the timer for the next expiration (if - * applicable). This function requires that the k_itimer structure - * it_lock is taken. This code will requeue the timer only if we get - * either an error return or a flag (ret > 0) from send_seg_info - * indicating that the signal was either not queued or was queued - * without an info block. In this case, we will not get a call back to - * do_schedule_next_timer() so we do it here. This should be rare... - - * An interesting problem can occur if, while a signal, and thus a call - * back is pending, the timer is rearmed, i.e. stopped and restarted. - * We then need to sort out the call back and do the right thing. What - * we do is to put a counter in the info block and match it with the - * timers copy on the call back. If they don't match, we just ignore - * the call back. The counter is local to the timer and we use odd to - * indicate a call back is pending. Note that we do allow the timer to - * be deleted while a signal is pending. The standard says we can - * allow that signal to be delivered, and we do. - */ - -static void timer_notify_task(struct k_itimer *timr) +int posix_timer_event(struct k_itimer *timr,int si_private) { + struct vx_info_save vxis; int ret; + enter_vx_info(task_get_vx_info(timr->it_process), &vxis); memset(&timr->sigq->info, 0, sizeof(siginfo_t)); + timr->sigq->info.si_sys_private = si_private; + /* Send signal to the process that owns this timer.*/ - /* Send signal to the process that owns this timer. */ timr->sigq->info.si_signo = timr->it_sigev_signo; timr->sigq->info.si_errno = 0; timr->sigq->info.si_code = SI_TIMER; timr->sigq->info.si_tid = timr->it_id; timr->sigq->info.si_value = timr->it_sigev_value; - if (timr->it_incr) - timr->sigq->info.si_sys_private = ++timr->it_requeue_pending; if (timr->it_sigev_notify & SIGEV_THREAD_ID) { - if (unlikely(timr->it_process->flags & PF_EXITING)) { - timr->it_sigev_notify = SIGEV_SIGNAL; - put_task_struct(timr->it_process); - timr->it_process = timr->it_process->group_leader; - goto group; - } + struct task_struct *leader; + ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, - timr->it_process); - } - else { - group: - ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq, - timr->it_process); - } - if (ret) { - /* - * signal was not sent because of sig_ignor - * we will not get a call back to restart it AND - * it should be restarted. - */ - schedule_next_timer(timr); + timr->it_process); + if (likely(ret >= 0)) + goto out; + + timr->it_sigev_notify = SIGEV_SIGNAL; + leader = timr->it_process->group_leader; + put_task_struct(timr->it_process); + timr->it_process = leader; } + + ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq, + timr->it_process); +out: + leave_vx_info(&vxis); + put_vx_info(vxis.vxi); + return ret; } +EXPORT_SYMBOL_GPL(posix_timer_event); /* * This function gets called when a POSIX.1b interval timer expires. It * is used as a callback from the kernel internal timer. The - * run_timer_list code ALWAYS calls with interrutps on. + * run_timer_list code ALWAYS calls with interrupts on. + + * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. */ -static void posix_timer_fn(unsigned long __data) +static int posix_timer_fn(struct hrtimer *timer) { - struct k_itimer *timr = (struct k_itimer *) __data; + struct k_itimer *timr; unsigned long flags; + int si_private = 0; + int ret = HRTIMER_NORESTART; + timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); - set_timer_inactive(timr); - timer_notify_task(timr); + + if (timr->it.real.interval.tv64 != 0) + si_private = ++timr->it_requeue_pending; + + if (posix_timer_event(timr, si_private)) { + /* + * signal was not sent because of sig_ignor + * we will not get a call back to restart it AND + * it should be restarted. + */ + if (timr->it.real.interval.tv64 != 0) { + timr->it_overrun += + hrtimer_forward(timer, + timer->base->softirq_time, + timr->it.real.interval); + ret = HRTIMER_RESTART; + ++timr->it_requeue_pending; + } + } + unlock_timer(timr, flags); + return ret; } - -static inline struct task_struct * good_sigevent(sigevent_t * event) +static struct task_struct * good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) || + (!(rtn = find_task_by_real_pid(event->sigev_notify_thread_id)) || rtn->tgid != current->tgid || (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) return NULL; @@ -380,15 +393,17 @@ static inline struct task_struct * good_sigevent(sigevent_t * event) return rtn; } -void register_posix_clock(int clock_id, struct k_clock *new_clock) +void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) { if ((unsigned) clock_id >= MAX_CLOCKS) { printk("POSIX clock register failed for clock_id %d\n", clock_id); return; } + posix_clocks[clock_id] = *new_clock; } +EXPORT_SYMBOL_GPL(register_posix_clock); static struct k_itimer * alloc_posix_timer(void) { @@ -397,17 +412,18 @@ static struct k_itimer * alloc_posix_timer(void) if (!tmr) return tmr; memset(tmr, 0, sizeof (struct k_itimer)); - tmr->it_id = (timer_t)-1; if (unlikely(!(tmr->sigq = sigqueue_alloc()))) { kmem_cache_free(posix_timers_cache, tmr); - tmr = 0; + tmr = NULL; } return tmr; } -static void release_posix_timer(struct k_itimer *tmr) +#define IT_ID_SET 1 +#define IT_ID_NOT_SET 0 +static void release_posix_timer(struct k_itimer *tmr, int it_id_set) { - if (tmr->it_id != -1) { + if (it_id_set) { unsigned long flags; spin_lock_irqsave(&idr_lock, flags); idr_remove(&posix_timers_id, tmr->it_id); @@ -423,19 +439,19 @@ static void release_posix_timer(struct k_itimer *tmr) /* Create a POSIX.1b interval timer. */ asmlinkage long -sys_timer_create(clockid_t which_clock, +sys_timer_create(const clockid_t which_clock, struct sigevent __user *timer_event_spec, timer_t __user * created_timer_id) { int error = 0; struct k_itimer *new_timer = NULL; - timer_t new_timer_id; - struct task_struct *process = 0; + int new_timer_id; + struct task_struct *process = NULL; unsigned long flags; sigevent_t event; + int it_id_set = IT_ID_NOT_SET; - if ((unsigned) which_clock >= MAX_CLOCKS || - !posix_clocks[which_clock].res) + if (invalid_clockid(which_clock)) return -EINVAL; new_timer = alloc_posix_timer(); @@ -443,19 +459,34 @@ sys_timer_create(clockid_t which_clock, return -EAGAIN; spin_lock_init(&new_timer->it_lock); - do { - if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) { - error = -EAGAIN; - new_timer->it_id = (timer_t)-1; - goto out; - } - spin_lock_irq(&idr_lock); - new_timer_id = (timer_t) idr_get_new(&posix_timers_id, - (void *) new_timer); - spin_unlock_irq(&idr_lock); - } while (unlikely(new_timer_id == -1)); + retry: + if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) { + error = -EAGAIN; + goto out; + } + spin_lock_irq(&idr_lock); + error = idr_get_new(&posix_timers_id, (void *) new_timer, + &new_timer_id); + spin_unlock_irq(&idr_lock); + if (error == -EAGAIN) + goto retry; + else if (error) { + /* + * Wierd looking, but we return EAGAIN if the IDR is + * full (proper POSIX return value for this) + */ + error = -EAGAIN; + goto out; + } + + it_id_set = IT_ID_SET; + new_timer->it_id = (timer_t) new_timer_id; + new_timer->it_clock = which_clock; + new_timer->it_overrun = -1; + error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); + if (error) + goto out; - new_timer->it_id = new_timer_id; /* * return the timer_id now. The next step is hard to * back out if there is an error. @@ -470,6 +501,10 @@ sys_timer_create(clockid_t which_clock, error = -EFAULT; goto out; } + new_timer->it_sigev_notify = event.sigev_notify; + new_timer->it_sigev_signo = event.sigev_signo; + new_timer->it_sigev_value = event.sigev_value; + read_lock(&tasklist_lock); if ((process = good_sigevent(&event))) { /* @@ -489,13 +524,15 @@ sys_timer_create(clockid_t which_clock, */ spin_lock_irqsave(&process->sighand->siglock, flags); if (!(process->flags & PF_EXITING)) { + new_timer->it_process = process; list_add(&new_timer->list, &process->signal->posix_timers); spin_unlock_irqrestore(&process->sighand->siglock, flags); - get_task_struct(process); + if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) + get_task_struct(process); } else { spin_unlock_irqrestore(&process->sighand->siglock, flags); - process = 0; + process = NULL; } } read_unlock(&tasklist_lock); @@ -503,60 +540,31 @@ sys_timer_create(clockid_t which_clock, error = -EINVAL; goto out; } - new_timer->it_sigev_notify = event.sigev_notify; - new_timer->it_sigev_signo = event.sigev_signo; - new_timer->it_sigev_value = event.sigev_value; } else { new_timer->it_sigev_notify = SIGEV_SIGNAL; new_timer->it_sigev_signo = SIGALRM; new_timer->it_sigev_value.sival_int = new_timer->it_id; process = current->group_leader; spin_lock_irqsave(&process->sighand->siglock, flags); + new_timer->it_process = process; list_add(&new_timer->list, &process->signal->posix_timers); spin_unlock_irqrestore(&process->sighand->siglock, flags); } - new_timer->it_clock = which_clock; - new_timer->it_incr = 0; - new_timer->it_overrun = -1; - init_timer(&new_timer->it_timer); - new_timer->it_timer.expires = 0; - new_timer->it_timer.data = (unsigned long) new_timer; - new_timer->it_timer.function = posix_timer_fn; - set_timer_inactive(new_timer); - - /* - * Once we set the process, it can be found so do it last... + /* + * In the case of the timer belonging to another task, after + * the task is unlocked, the timer is owned by the other task + * and may cease to exist at any time. Don't use or modify + * new_timer after the unlock call. */ - new_timer->it_process = process; + out: if (error) - release_posix_timer(new_timer); + release_posix_timer(new_timer, it_id_set); return error; } -/* - * good_timespec - * - * This function checks the elements of a timespec structure. - * - * Arguments: - * ts : Pointer to the timespec structure to check - * - * Return value: - * If a NULL pointer was passed in, or the tv_nsec field was less than 0 - * or greater than NSEC_PER_SEC, or the tv_sec field was less than 0, - * this function returns 0. Otherwise it returns 1. - */ -static int good_timespec(const struct timespec *ts) -{ - if ((!ts) || (ts->tv_sec < 0) || - ((unsigned) ts->tv_nsec >= NSEC_PER_SEC)) - return 0; - return 1; -} - /* * Locking issues: We need to protect the result of the id look up until * we get the timer locked down so it is not deleted under us. The @@ -607,42 +615,44 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) * report. */ static void -do_timer_gettime(struct k_itimer *timr, struct itimerspec *cur_setting) +common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { - unsigned long expires; - struct now_struct now; - - do - expires = timr->it_timer.expires; - while ((volatile long) (timr->it_timer.expires) != expires); - - posix_get_now(&now); - - if (expires && - ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) && - !timr->it_incr && - posix_time_before(&timr->it_timer, &now)) - timr->it_timer.expires = expires = 0; - if (expires) { - if (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { - while (posix_time_before(&timr->it_timer, &now)) - posix_bump_timer(timr); - expires = timr->it_timer.expires; - } - else - if (!timer_pending(&timr->it_timer)) - expires = 0; - if (expires) - expires -= now.jiffies; - } - jiffies_to_timespec(expires, &cur_setting->it_value); - jiffies_to_timespec(timr->it_incr, &cur_setting->it_interval); + ktime_t now, remaining, iv; + struct hrtimer *timer = &timr->it.real.timer; - if (cur_setting->it_value.tv_sec < 0) { - cur_setting->it_value.tv_nsec = 1; - cur_setting->it_value.tv_sec = 0; - } + memset(cur_setting, 0, sizeof(struct itimerspec)); + + iv = timr->it.real.interval; + + /* interval timer ? */ + if (iv.tv64) + cur_setting->it_interval = ktime_to_timespec(iv); + else if (!hrtimer_active(timer) && + (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + return; + + now = timer->base->get_time(); + + /* + * When a requeue is pending or this is a SIGEV_NONE + * timer move the expiry time forward by intervals, so + * expiry is > now. + */ + if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || + (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) + timr->it_overrun += hrtimer_forward(timer, now, iv); + + remaining = ktime_sub(timer->expires, now); + /* Return 0 only, when the timer is expired and not pending */ + if (remaining.tv64 <= 0) { + /* + * A single shot SIGEV_NONE timer must return 0, when + * it is expired ! + */ + if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + cur_setting->it_value.tv_nsec = 1; + } else + cur_setting->it_value = ktime_to_timespec(remaining); } /* Get the time remaining on a POSIX.1b interval timer. */ @@ -657,7 +667,7 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) if (!timr) return -EINVAL; - p_timer_get(&posix_clocks[timr->it_clock], timr, &cur_setting); + CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting)); unlock_timer(timr, flags); @@ -666,6 +676,7 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) return 0; } + /* * Get the number of overruns of a POSIX.1b interval timer. This is to * be the overrun of the timer last delivered. At the same time we are @@ -675,7 +686,6 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) * the call back to do_schedule_next_timer(). So all we need to do is * to pick up the frozen overrun. */ - asmlinkage long sys_timer_getoverrun(timer_t timer_id) { @@ -692,160 +702,55 @@ sys_timer_getoverrun(timer_t timer_id) return overrun; } -/* - * Adjust for absolute time - * - * If absolute time is given and it is not CLOCK_MONOTONIC, we need to - * adjust for the offset between the timer clock (CLOCK_MONOTONIC) and - * what ever clock he is using. - * - * If it is relative time, we need to add the current (CLOCK_MONOTONIC) - * time to it to get the proper time for the timer. - */ -static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, - int abs, u64 *exp) -{ - struct timespec now; - struct timespec oc = *tp; - struct timespec wall_to_mono; - u64 jiffies_64_f; - int rtn =0; - - if (abs) { - /* - * The mask pick up the 4 basic clocks - */ - if (!(clock - &posix_clocks[0]) & ~CLOCKS_MASK) { - jiffies_64_f = do_posix_clock_monotonic_gettime_parts( - &now, &wall_to_mono); - /* - * If we are doing a MONOTONIC clock - */ - if((clock - &posix_clocks[0]) & CLOCKS_MONO){ - now.tv_sec += wall_to_mono.tv_sec; - now.tv_nsec += wall_to_mono.tv_nsec; - } - } else { - /* - * Not one of the basic clocks - */ - do_posix_gettime(clock, &now); - jiffies_64_f = get_jiffies_64(); - } - /* - * Take away now to get delta - */ - oc.tv_sec -= now.tv_sec; - oc.tv_nsec -= now.tv_nsec; - /* - * Normalize... - */ - while ((oc.tv_nsec - NSEC_PER_SEC) >= 0) { - oc.tv_nsec -= NSEC_PER_SEC; - oc.tv_sec++; - } - while ((oc.tv_nsec) < 0) { - oc.tv_nsec += NSEC_PER_SEC; - oc.tv_sec--; - } - }else{ - jiffies_64_f = get_jiffies_64(); - } - /* - * Check if the requested time is prior to now (if so set now) - */ - if (oc.tv_sec < 0) - oc.tv_sec = oc.tv_nsec = 0; - tstojiffie(&oc, clock->res, exp); - - /* - * Check if the requested time is more than the timer code - * can handle (if so we error out but return the value too). - */ - if (*exp > ((u64)MAX_JIFFY_OFFSET)) - /* - * This is a considered response, not exactly in - * line with the standard (in fact it is silent on - * possible overflows). We assume such a large - * value is ALMOST always a programming error and - * try not to compound it by setting a really dumb - * value. - */ - rtn = -EINVAL; - /* - * return the actual jiffies expire time, full 64 bits - */ - *exp += jiffies_64_f; - return rtn; -} /* Set a POSIX.1b interval timer. */ /* timr->it_lock is taken. */ -static inline int -do_timer_settime(struct k_itimer *timr, int flags, +static int +common_timer_set(struct k_itimer *timr, int flags, struct itimerspec *new_setting, struct itimerspec *old_setting) { - struct k_clock *clock = &posix_clocks[timr->it_clock]; - u64 expire_64; + struct hrtimer *timer = &timr->it.real.timer; + enum hrtimer_mode mode; if (old_setting) - do_timer_gettime(timr, old_setting); + common_timer_get(timr, old_setting); /* disable the timer */ - timr->it_incr = 0; + timr->it.real.interval.tv64 = 0; /* * careful here. If smp we could be in the "fire" routine which will * be spinning as we hold the lock. But this is ONLY an SMP issue. */ -#ifdef CONFIG_SMP - if (timer_active(timr) && !del_timer(&timr->it_timer)) - /* - * It can only be active if on an other cpu. Since - * we have cleared the interval stuff above, it should - * clear once we release the spin lock. Of course once - * we do that anything could happen, including the - * complete melt down of the timer. So return with - * a "retry" exit status. - */ + if (hrtimer_try_to_cancel(timer) < 0) return TIMER_RETRY; - set_timer_inactive(timr); -#else - del_timer(&timr->it_timer); -#endif timr->it_requeue_pending = (timr->it_requeue_pending + 2) & ~REQUEUE_PENDING; timr->it_overrun_last = 0; - timr->it_overrun = -1; - /* - *switch off the timer when it_value is zero - */ - if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) { - timr->it_timer.expires = 0; + + /* switch off the timer when it_value is zero */ + if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) return 0; - } - if (adjust_abs_time(clock, - &new_setting->it_value, flags & TIMER_ABSTIME, - &expire_64)) { - return -EINVAL; - } - timr->it_timer.expires = (unsigned long)expire_64; - tstojiffie(&new_setting->it_interval, clock->res, &expire_64); - timr->it_incr = (unsigned long)expire_64; + mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; + hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); + timr->it.real.timer.function = posix_timer_fn; + timer->expires = timespec_to_ktime(new_setting->it_value); - /* - * For some reason the timer does not fire immediately if expires is - * equal to jiffies, so the timer notify function is called directly. - * We do not even queue SIGEV_NONE timers! - */ - if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)) { - if (timr->it_timer.expires == jiffies) - timer_notify_task(timr); - else - add_timer(&timr->it_timer); + /* Convert interval */ + timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); + + /* SIGEV_NONE timers are not queued ! See common_timer_get */ + if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { + /* Setup correct expiry time for relative timers */ + if (mode == HRTIMER_REL) + timer->expires = ktime_add(timer->expires, + timer->base->get_time()); + return 0; } + + hrtimer_start(timer, timer->expires, mode); return 0; } @@ -867,53 +772,44 @@ sys_timer_settime(timer_t timer_id, int flags, if (copy_from_user(&new_spec, new_setting, sizeof (new_spec))) return -EFAULT; - if ((!good_timespec(&new_spec.it_interval)) || - (!good_timespec(&new_spec.it_value))) + if (!timespec_valid(&new_spec.it_interval) || + !timespec_valid(&new_spec.it_value)) return -EINVAL; retry: timr = lock_timer(timer_id, &flag); if (!timr) return -EINVAL; - if (!posix_clocks[timr->it_clock].timer_set) - error = do_timer_settime(timr, flags, &new_spec, rtn); - else - error = posix_clocks[timr->it_clock].timer_set(timr, - flags, - &new_spec, rtn); + error = CLOCK_DISPATCH(timr->it_clock, timer_set, + (timr, flags, &new_spec, rtn)); + unlock_timer(timr, flag); if (error == TIMER_RETRY) { rtn = NULL; // We already got the old time... goto retry; } - if (old_setting && !error && copy_to_user(old_setting, - &old_spec, sizeof (old_spec))) + if (old_setting && !error && + copy_to_user(old_setting, &old_spec, sizeof (old_spec))) error = -EFAULT; return error; } -static inline int do_timer_delete(struct k_itimer *timer) +static inline int common_timer_del(struct k_itimer *timer) { - timer->it_incr = 0; -#ifdef CONFIG_SMP - if (timer_active(timer) && !del_timer(&timer->it_timer)) - /* - * It can only be active if on an other cpu. Since - * we have cleared the interval stuff above, it should - * clear once we release the spin lock. Of course once - * we do that anything could happen, including the - * complete melt down of the timer. So return with - * a "retry" exit status. - */ + timer->it.real.interval.tv64 = 0; + + if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0) return TIMER_RETRY; -#else - del_timer(&timer->it_timer); -#endif return 0; } +static inline int timer_delete_hook(struct k_itimer *timer) +{ + return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer)); +} + /* Delete a POSIX.1b interval timer. */ asmlinkage long sys_timer_delete(timer_t timer_id) @@ -921,24 +817,16 @@ sys_timer_delete(timer_t timer_id) struct k_itimer *timer; long flags; -#ifdef CONFIG_SMP - int error; retry_delete: -#endif timer = lock_timer(timer_id, &flags); if (!timer) return -EINVAL; -#ifdef CONFIG_SMP - error = p_timer_del(&posix_clocks[timer->it_clock], timer); - - if (error == TIMER_RETRY) { + if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); goto retry_delete; } -#else - p_timer_del(&posix_clocks[timer->it_clock], timer); -#endif + spin_lock(¤t->sighand->siglock); list_del(&timer->list); spin_unlock(¤t->sighand->siglock); @@ -949,35 +837,27 @@ retry_delete: if (timer->it_process) { if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) put_task_struct(timer->it_process); - timer->it_process = NULL; + timer->it_process = NULL; } unlock_timer(timer, flags); - release_posix_timer(timer); + release_posix_timer(timer, IT_ID_SET); return 0; } + /* * return timer owned by the process, used by exit_itimers */ -static inline void itimer_delete(struct k_itimer *timer) +static void itimer_delete(struct k_itimer *timer) { unsigned long flags; -#ifdef CONFIG_SMP - int error; retry_delete: -#endif spin_lock_irqsave(&timer->it_lock, flags); -#ifdef CONFIG_SMP - error = p_timer_del(&posix_clocks[timer->it_clock], timer); - - if (error == TIMER_RETRY) { + if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); goto retry_delete; } -#else - p_timer_del(&posix_clocks[timer->it_clock], timer); -#endif list_del(&timer->list); /* * This keeps any tasks waiting on the spin lock from thinking @@ -989,11 +869,11 @@ retry_delete: timer->it_process = NULL; } unlock_timer(timer, flags); - release_posix_timer(timer); + release_posix_timer(timer, IT_ID_SET); } /* - * This is called by __exit_signal, only when there are no more + * This is called by do_exit or de_thread, only when there are no more * references to the shared signal_struct. */ void exit_itimers(struct signal_struct *sig) @@ -1006,110 +886,48 @@ void exit_itimers(struct signal_struct *sig) } } -/* - * And now for the "clock" calls - * - * These functions are called both from timer functions (with the timer - * spin_lock_irq() held and from clock calls with no locking. They must - * use the save flags versions of locks. - */ -static int do_posix_gettime(struct k_clock *clock, struct timespec *tp) +/* Not available / possible... functions */ +int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp) { - struct timeval tv; - - if (clock->clock_get) - return clock->clock_get(tp); - - do_gettimeofday(&tv); - tp->tv_sec = tv.tv_sec; - tp->tv_nsec = tv.tv_usec * NSEC_PER_USEC; - - return 0; -} - -/* - * We do ticks here to avoid the irq lock ( they take sooo long). - * The seqlock is great here. Since we a reader, we don't really care - * if we are interrupted since we don't take lock that will stall us or - * any other cpu. Voila, no irq lock is needed. - * - */ - -static u64 do_posix_clock_monotonic_gettime_parts( - struct timespec *tp, struct timespec *mo) -{ - u64 jiff; - struct timeval tpv; - unsigned int seq; - - do { - seq = read_seqbegin(&xtime_lock); - do_gettimeofday(&tpv); - *mo = wall_to_monotonic; - jiff = jiffies_64; - - } while(read_seqretry(&xtime_lock, seq)); - - /* - * Love to get this before it is converted to usec. - * It would save a div AND a mpy. - */ - tp->tv_sec = tpv.tv_sec; - tp->tv_nsec = tpv.tv_usec * NSEC_PER_USEC; - - return jiff; -} - -int do_posix_clock_monotonic_gettime(struct timespec *tp) -{ - struct timespec wall_to_mono; - - do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono); - - tp->tv_sec += wall_to_mono.tv_sec; - tp->tv_nsec += wall_to_mono.tv_nsec; - - if ((tp->tv_nsec - NSEC_PER_SEC) > 0) { - tp->tv_nsec -= NSEC_PER_SEC; - tp->tv_sec++; - } - return 0; + return -EINVAL; } +EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); -int do_posix_clock_monotonic_settime(struct timespec *tp) +int do_posix_clock_nonanosleep(const clockid_t clock, int flags, + struct timespec *t, struct timespec __user *r) { - return -EINVAL; +#ifndef ENOTSUP + return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */ +#else /* parisc does define it separately. */ + return -ENOTSUP; +#endif } +EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); -asmlinkage long -sys_clock_settime(clockid_t which_clock, const struct timespec __user *tp) +asmlinkage long sys_clock_settime(const clockid_t which_clock, + const struct timespec __user *tp) { struct timespec new_tp; - if ((unsigned) which_clock >= MAX_CLOCKS || - !posix_clocks[which_clock].res) + if (invalid_clockid(which_clock)) return -EINVAL; if (copy_from_user(&new_tp, tp, sizeof (*tp))) return -EFAULT; - if (posix_clocks[which_clock].clock_set) - return posix_clocks[which_clock].clock_set(&new_tp); - return do_sys_settimeofday(&new_tp, NULL); + return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp)); } asmlinkage long -sys_clock_gettime(clockid_t which_clock, struct timespec __user *tp) +sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) { - struct timespec rtn_tp; - int error = 0; + struct timespec kernel_tp; + int error; - if ((unsigned) which_clock >= MAX_CLOCKS || - !posix_clocks[which_clock].res) + if (invalid_clockid(which_clock)) return -EINVAL; - - error = do_posix_gettime(&posix_clocks[which_clock], &rtn_tp); - - if (!error && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) + error = CLOCK_DISPATCH(which_clock, clock_get, + (which_clock, &kernel_tp)); + if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) error = -EFAULT; return error; @@ -1117,201 +935,71 @@ sys_clock_gettime(clockid_t which_clock, struct timespec __user *tp) } asmlinkage long -sys_clock_getres(clockid_t which_clock, struct timespec __user *tp) +sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) { struct timespec rtn_tp; + int error; - if ((unsigned) which_clock >= MAX_CLOCKS || - !posix_clocks[which_clock].res) + if (invalid_clockid(which_clock)) return -EINVAL; - rtn_tp.tv_sec = 0; - rtn_tp.tv_nsec = posix_clocks[which_clock].res; - if (tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) - return -EFAULT; - - return 0; - -} + error = CLOCK_DISPATCH(which_clock, clock_getres, + (which_clock, &rtn_tp)); -static void nanosleep_wake_up(unsigned long __data) -{ - struct task_struct *p = (struct task_struct *) __data; + if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) { + error = -EFAULT; + } - wake_up_process(p); + return error; } /* - * The standard says that an absolute nanosleep call MUST wake up at - * the requested time in spite of clock settings. Here is what we do: - * For each nanosleep call that needs it (only absolute and not on - * CLOCK_MONOTONIC* (as it can not be set)) we thread a little structure - * into the "nanosleep_abs_list". All we need is the task_struct pointer. - * When ever the clock is set we just wake up all those tasks. The rest - * is done by the while loop in clock_nanosleep(). - * - * On locking, clock_was_set() is called from update_wall_clock which - * holds (or has held for it) a write_lock_irq( xtime_lock) and is - * called from the timer bh code. Thus we need the irq save locks. + * nanosleep for monotonic and realtime clocks */ - -static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue); - -void clock_was_set(void) +static int common_nsleep(const clockid_t which_clock, int flags, + struct timespec *tsave, struct timespec __user *rmtp) { - wake_up_all(&nanosleep_abs_wqueue); + return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ? + HRTIMER_ABS : HRTIMER_REL, which_clock); } -long clock_nanosleep_restart(struct restart_block *restart_block); - -extern long do_clock_nanosleep(clockid_t which_clock, int flags, - struct timespec *t); - asmlinkage long -sys_clock_nanosleep(clockid_t which_clock, int flags, +sys_clock_nanosleep(const clockid_t which_clock, int flags, const struct timespec __user *rqtp, struct timespec __user *rmtp) { struct timespec t; - struct restart_block *restart_block = - &(current_thread_info()->restart_block); - int ret; - if ((unsigned) which_clock >= MAX_CLOCKS || - !posix_clocks[which_clock].res) + if (invalid_clockid(which_clock)) return -EINVAL; if (copy_from_user(&t, rqtp, sizeof (struct timespec))) return -EFAULT; - if ((unsigned) t.tv_nsec >= NSEC_PER_SEC || t.tv_sec < 0) + if (!timespec_valid(&t)) return -EINVAL; - ret = do_clock_nanosleep(which_clock, flags, &t); - /* - * Do this here as do_clock_nanosleep does not have the real address - */ - restart_block->arg1 = (unsigned long)rmtp; - - if ((ret == -ERESTART_RESTARTBLOCK) && rmtp && - copy_to_user(rmtp, &t, sizeof (t))) - return -EFAULT; - return ret; + return CLOCK_DISPATCH(which_clock, nsleep, + (which_clock, flags, &t, rmtp)); } -long -do_clock_nanosleep(clockid_t which_clock, int flags, struct timespec *tsave) +/* + * nanosleep_restart for monotonic and realtime clocks + */ +static int common_nsleep_restart(struct restart_block *restart_block) { - struct timespec t; - struct timer_list new_timer; - DECLARE_WAITQUEUE(abs_wqueue, current); - u64 rq_time = (u64)0; - s64 left; - int abs; - struct restart_block *restart_block = - ¤t_thread_info()->restart_block; - - abs_wqueue.flags = 0; - init_timer(&new_timer); - new_timer.expires = 0; - new_timer.data = (unsigned long) current; - new_timer.function = nanosleep_wake_up; - abs = flags & TIMER_ABSTIME; - - if (restart_block->fn == clock_nanosleep_restart) { - /* - * Interrupted by a non-delivered signal, pick up remaining - * time and continue. Remaining time is in arg2 & 3. - */ - restart_block->fn = do_no_restart_syscall; - - rq_time = restart_block->arg3; - rq_time = (rq_time << 32) + restart_block->arg2; - if (!rq_time) - return -EINTR; - left = rq_time - get_jiffies_64(); - if (left <= (s64)0) - return 0; /* Already passed */ - } - - if (abs && (posix_clocks[which_clock].clock_get != - posix_clocks[CLOCK_MONOTONIC].clock_get)) - add_wait_queue(&nanosleep_abs_wqueue, &abs_wqueue); - - do { - t = *tsave; - if (abs || !rq_time) { - adjust_abs_time(&posix_clocks[which_clock], &t, abs, - &rq_time); - rq_time += (t.tv_sec || t.tv_nsec); - } - - left = rq_time - get_jiffies_64(); - if (left >= (s64)MAX_JIFFY_OFFSET) - left = (s64)MAX_JIFFY_OFFSET; - if (left < (s64)0) - break; - - new_timer.expires = jiffies + left; - __set_current_state(TASK_INTERRUPTIBLE); - add_timer(&new_timer); - - schedule(); - - del_timer_sync(&new_timer); - left = rq_time - get_jiffies_64(); - } while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING)); - - if (abs_wqueue.task_list.next) - finish_wait(&nanosleep_abs_wqueue, &abs_wqueue); - - if (left > (s64)0) { - - /* - * Always restart abs calls from scratch to pick up any - * clock shifting that happened while we are away. - */ - if (abs) - return -ERESTARTNOHAND; - - left *= TICK_NSEC; - tsave->tv_sec = div_long_long_rem(left, - NSEC_PER_SEC, - &tsave->tv_nsec); - /* - * Restart works by saving the time remaing in - * arg2 & 3 (it is 64-bits of jiffies). The other - * info we need is the clock_id (saved in arg0). - * The sys_call interface needs the users - * timespec return address which _it_ saves in arg1. - * Since we have cast the nanosleep call to a clock_nanosleep - * both can be restarted with the same code. - */ - restart_block->fn = clock_nanosleep_restart; - restart_block->arg0 = which_clock; - /* - * Caller sets arg1 - */ - restart_block->arg2 = rq_time & 0xffffffffLL; - restart_block->arg3 = rq_time >> 32; - - return -ERESTART_RESTARTBLOCK; - } - - return 0; + return hrtimer_nanosleep_restart(restart_block); } + /* - * This will restart clock_nanosleep. + * This will restart clock_nanosleep. This is required only by + * compat_clock_nanosleep_restart for now. */ long clock_nanosleep_restart(struct restart_block *restart_block) { - struct timespec t; - int ret = do_clock_nanosleep(restart_block->arg0, 0, &t); + clockid_t which_clock = restart_block->arg0; - if ((ret == -ERESTART_RESTARTBLOCK) && restart_block->arg1 && - copy_to_user((struct timespec __user *)(restart_block->arg1), &t, - sizeof (t))) - return -EFAULT; - return ret; + return CLOCK_DISPATCH(which_clock, nsleep_restart, + (restart_block)); }