fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / kernel / itimer.c
index 6918cb7..204ed79 100644 (file)
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
+#include <linux/syscalls.h>
 #include <linux/time.h>
+#include <linux/posix-timers.h>
+#include <linux/hrtimer.h>
 
 #include <asm/uaccess.h>
 
+/**
+ * itimer_get_remtime - get remaining time for the timer
+ *
+ * @timer: the timer to read
+ *
+ * Returns the delta between the expiry time and now, which can be
+ * less than zero or 1usec for an pending expired timer
+ */
+static struct timeval itimer_get_remtime(struct hrtimer *timer)
+{
+       ktime_t rem = hrtimer_get_remaining(timer);
+
+       /*
+        * Racy but safe: if the itimer expires after the above
+        * hrtimer_get_remtime() call but before this condition
+        * then we return 0 - which is correct.
+        */
+       if (hrtimer_active(timer)) {
+               if (rem.tv64 <= 0)
+                       rem.tv64 = NSEC_PER_USEC;
+       } else
+               rem.tv64 = 0;
+
+       return ktime_to_timeval(rem);
+}
+
 int do_getitimer(int which, struct itimerval *value)
 {
-       register unsigned long val, interval;
+       struct task_struct *tsk = current;
+       cputime_t cinterval, cval;
 
        switch (which) {
        case ITIMER_REAL:
-               interval = current->it_real_incr;
-               val = 0;
-               /* 
-                * FIXME! This needs to be atomic, in case the kernel timer happens!
-                */
-               if (timer_pending(&current->real_timer)) {
-                       val = current->real_timer.expires - jiffies;
-
-                       /* look out for negative/zero itimer.. */
-                       if ((long) val <= 0)
-                               val = 1;
-               }
+               spin_lock_irq(&tsk->sighand->siglock);
+               value->it_value = itimer_get_remtime(&tsk->signal->real_timer);
+               value->it_interval =
+                       ktime_to_timeval(tsk->signal->it_real_incr);
+               spin_unlock_irq(&tsk->sighand->siglock);
                break;
        case ITIMER_VIRTUAL:
-               val = current->it_virt_value;
-               interval = current->it_virt_incr;
+               read_lock(&tasklist_lock);
+               spin_lock_irq(&tsk->sighand->siglock);
+               cval = tsk->signal->it_virt_expires;
+               cinterval = tsk->signal->it_virt_incr;
+               if (!cputime_eq(cval, cputime_zero)) {
+                       struct task_struct *t = tsk;
+                       cputime_t utime = tsk->signal->utime;
+                       do {
+                               utime = cputime_add(utime, t->utime);
+                               t = next_thread(t);
+                       } while (t != tsk);
+                       if (cputime_le(cval, utime)) { /* about to fire */
+                               cval = jiffies_to_cputime(1);
+                       } else {
+                               cval = cputime_sub(cval, utime);
+                       }
+               }
+               spin_unlock_irq(&tsk->sighand->siglock);
+               read_unlock(&tasklist_lock);
+               cputime_to_timeval(cval, &value->it_value);
+               cputime_to_timeval(cinterval, &value->it_interval);
                break;
        case ITIMER_PROF:
-               val = current->it_prof_value;
-               interval = current->it_prof_incr;
+               read_lock(&tasklist_lock);
+               spin_lock_irq(&tsk->sighand->siglock);
+               cval = tsk->signal->it_prof_expires;
+               cinterval = tsk->signal->it_prof_incr;
+               if (!cputime_eq(cval, cputime_zero)) {
+                       struct task_struct *t = tsk;
+                       cputime_t ptime = cputime_add(tsk->signal->utime,
+                                                     tsk->signal->stime);
+                       do {
+                               ptime = cputime_add(ptime,
+                                                   cputime_add(t->utime,
+                                                               t->stime));
+                               t = next_thread(t);
+                       } while (t != tsk);
+                       if (cputime_le(cval, ptime)) { /* about to fire */
+                               cval = jiffies_to_cputime(1);
+                       } else {
+                               cval = cputime_sub(cval, ptime);
+                       }
+               }
+               spin_unlock_irq(&tsk->sighand->siglock);
+               read_unlock(&tasklist_lock);
+               cputime_to_timeval(cval, &value->it_value);
+               cputime_to_timeval(cinterval, &value->it_interval);
                break;
        default:
                return(-EINVAL);
        }
-       jiffies_to_timeval(val, &value->it_value);
-       jiffies_to_timeval(interval, &value->it_interval);
        return 0;
 }
 
-/* SMP: Only we modify our itimer values. */
 asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
 {
        int error = -EFAULT;
@@ -63,64 +124,211 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
        return error;
 }
 
-void it_real_fn(unsigned long __data)
+
+/*
+ * The timer is automagically restarted, when interval != 0
+ */
+int it_real_fn(struct hrtimer *timer)
 {
-       struct task_struct * p = (struct task_struct *) __data;
-       unsigned long interval;
-
-       send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p);
-       interval = p->it_real_incr;
-       if (interval) {
-               if (interval > (unsigned long) LONG_MAX)
-                       interval = LONG_MAX;
-               p->real_timer.expires = jiffies + interval;
-               add_timer(&p->real_timer);
+       struct signal_struct *sig =
+           container_of(timer, struct signal_struct, real_timer);
+
+       send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk);
+
+       if (sig->it_real_incr.tv64 != 0) {
+               hrtimer_forward(timer, timer->base->softirq_time,
+                               sig->it_real_incr);
+               return HRTIMER_RESTART;
        }
+       return HRTIMER_NORESTART;
+}
+
+/*
+ * We do not care about correctness. We just sanitize the values so
+ * the ktime_t operations which expect normalized values do not
+ * break. This converts negative values to long timeouts similar to
+ * the code in kernel versions < 2.6.16
+ *
+ * Print a limited number of warning messages when an invalid timeval
+ * is detected.
+ */
+static void fixup_timeval(struct timeval *tv, int interval)
+{
+       static int warnlimit = 10;
+       unsigned long tmp;
+
+       if (warnlimit > 0) {
+               warnlimit--;
+               printk(KERN_WARNING
+                      "setitimer: %s (pid = %d) provided "
+                      "invalid timeval %s: tv_sec = %ld tv_usec = %ld\n",
+                      current->comm, current->pid,
+                      interval ? "it_interval" : "it_value",
+                      tv->tv_sec, (long) tv->tv_usec);
+       }
+
+       tmp = tv->tv_usec;
+       if (tmp >= USEC_PER_SEC) {
+               tv->tv_usec = tmp % USEC_PER_SEC;
+               tv->tv_sec += tmp / USEC_PER_SEC;
+       }
+
+       tmp = tv->tv_sec;
+       if (tmp > LONG_MAX)
+               tv->tv_sec = LONG_MAX;
+}
+
+/*
+ * Returns true if the timeval is in canonical form
+ */
+#define timeval_valid(t) \
+       (((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC))
+
+/*
+ * Check for invalid timevals, sanitize them and print a limited
+ * number of warnings.
+ */
+static void check_itimerval(struct itimerval *value) {
+
+       if (unlikely(!timeval_valid(&value->it_value)))
+               fixup_timeval(&value->it_value, 0);
+
+       if (unlikely(!timeval_valid(&value->it_interval)))
+               fixup_timeval(&value->it_interval, 1);
 }
 
 int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 {
-       register unsigned long i, j;
-       int k;
+       struct task_struct *tsk = current;
+       struct hrtimer *timer;
+       ktime_t expires;
+       cputime_t cval, cinterval, nval, ninterval;
+
+       /*
+        * Validate the timevals in value.
+        *
+        * Note: Although the spec requires that invalid values shall
+        * return -EINVAL, we just fixup the value and print a limited
+        * number of warnings in order not to break users of this
+        * historical misfeature.
+        *
+        * Scheduled for replacement in March 2007
+        */
+       check_itimerval(value);
 
-       i = timeval_to_jiffies(&value->it_interval);
-       j = timeval_to_jiffies(&value->it_value);
-       if (ovalue && (k = do_getitimer(which, ovalue)) < 0)
-               return k;
        switch (which) {
-               case ITIMER_REAL:
-                       del_timer_sync(&current->real_timer);
-                       current->it_real_value = j;
-                       current->it_real_incr = i;
-                       if (!j)
-                               break;
-                       if (j > (unsigned long) LONG_MAX)
-                               j = LONG_MAX;
-                       i = j + jiffies;
-                       current->real_timer.expires = i;
-                       add_timer(&current->real_timer);
-                       break;
-               case ITIMER_VIRTUAL:
-                       if (j)
-                               j++;
-                       current->it_virt_value = j;
-                       current->it_virt_incr = i;
-                       break;
-               case ITIMER_PROF:
-                       if (j)
-                               j++;
-                       current->it_prof_value = j;
-                       current->it_prof_incr = i;
-                       break;
-               default:
-                       return -EINVAL;
+       case ITIMER_REAL:
+again:
+               spin_lock_irq(&tsk->sighand->siglock);
+               timer = &tsk->signal->real_timer;
+               if (ovalue) {
+                       ovalue->it_value = itimer_get_remtime(timer);
+                       ovalue->it_interval
+                               = ktime_to_timeval(tsk->signal->it_real_incr);
+               }
+               /* We are sharing ->siglock with it_real_fn() */
+               if (hrtimer_try_to_cancel(timer) < 0) {
+                       spin_unlock_irq(&tsk->sighand->siglock);
+                       goto again;
+               }
+               tsk->signal->it_real_incr =
+                       timeval_to_ktime(value->it_interval);
+               expires = timeval_to_ktime(value->it_value);
+               if (expires.tv64 != 0)
+                       hrtimer_start(timer, expires, HRTIMER_REL);
+               spin_unlock_irq(&tsk->sighand->siglock);
+               break;
+       case ITIMER_VIRTUAL:
+               nval = timeval_to_cputime(&value->it_value);
+               ninterval = timeval_to_cputime(&value->it_interval);
+               read_lock(&tasklist_lock);
+               spin_lock_irq(&tsk->sighand->siglock);
+               cval = tsk->signal->it_virt_expires;
+               cinterval = tsk->signal->it_virt_incr;
+               if (!cputime_eq(cval, cputime_zero) ||
+                   !cputime_eq(nval, cputime_zero)) {
+                       if (cputime_gt(nval, cputime_zero))
+                               nval = cputime_add(nval,
+                                                  jiffies_to_cputime(1));
+                       set_process_cpu_timer(tsk, CPUCLOCK_VIRT,
+                                             &nval, &cval);
+               }
+               tsk->signal->it_virt_expires = nval;
+               tsk->signal->it_virt_incr = ninterval;
+               spin_unlock_irq(&tsk->sighand->siglock);
+               read_unlock(&tasklist_lock);
+               if (ovalue) {
+                       cputime_to_timeval(cval, &ovalue->it_value);
+                       cputime_to_timeval(cinterval, &ovalue->it_interval);
+               }
+               break;
+       case ITIMER_PROF:
+               nval = timeval_to_cputime(&value->it_value);
+               ninterval = timeval_to_cputime(&value->it_interval);
+               read_lock(&tasklist_lock);
+               spin_lock_irq(&tsk->sighand->siglock);
+               cval = tsk->signal->it_prof_expires;
+               cinterval = tsk->signal->it_prof_incr;
+               if (!cputime_eq(cval, cputime_zero) ||
+                   !cputime_eq(nval, cputime_zero)) {
+                       if (cputime_gt(nval, cputime_zero))
+                               nval = cputime_add(nval,
+                                                  jiffies_to_cputime(1));
+                       set_process_cpu_timer(tsk, CPUCLOCK_PROF,
+                                             &nval, &cval);
+               }
+               tsk->signal->it_prof_expires = nval;
+               tsk->signal->it_prof_incr = ninterval;
+               spin_unlock_irq(&tsk->sighand->siglock);
+               read_unlock(&tasklist_lock);
+               if (ovalue) {
+                       cputime_to_timeval(cval, &ovalue->it_value);
+                       cputime_to_timeval(cinterval, &ovalue->it_interval);
+               }
+               break;
+       default:
+               return -EINVAL;
        }
        return 0;
 }
 
-/* SMP: Again, only we play with our itimers, and signals are SMP safe
- *      now so that is not an issue at all anymore.
+/**
+ * alarm_setitimer - set alarm in seconds
+ *
+ * @seconds:   number of seconds until alarm
+ *             0 disables the alarm
+ *
+ * Returns the remaining time in seconds of a pending timer or 0 when
+ * the timer is not active.
+ *
+ * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
+ * negative timeval settings which would cause immediate expiry.
  */
+unsigned int alarm_setitimer(unsigned int seconds)
+{
+       struct itimerval it_new, it_old;
+
+#if BITS_PER_LONG < 64
+       if (seconds > INT_MAX)
+               seconds = INT_MAX;
+#endif
+       it_new.it_value.tv_sec = seconds;
+       it_new.it_value.tv_usec = 0;
+       it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+
+       do_setitimer(ITIMER_REAL, &it_new, &it_old);
+
+       /*
+        * We can't return 0 if we have an alarm pending ...  And we'd
+        * better return too much than too little anyway
+        */
+       if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) ||
+             it_old.it_value.tv_usec >= 500000)
+               it_old.it_value.tv_sec++;
+
+       return it_old.it_value.tv_sec;
+}
+
 asmlinkage long sys_setitimer(int which,
                              struct itimerval __user *value,
                              struct itimerval __user *ovalue)