vserver 1.9.3
[linux-2.6.git] / arch / x86_64 / kernel / time.c
index 10c511f..ee94184 100644 (file)
@@ -11,7 +11,6 @@
  *  Copyright (c) 2002  Vojtech Pavlik
  *  Copyright (c) 2003  Andi Kleen
  *  RTC support code taken from arch/i386/kernel/timers/time_hpet.c
- *
  */
 
 #include <linux/kernel.h>
@@ -33,6 +32,7 @@
 #include <asm/timex.h>
 #include <asm/proto.h>
 #include <asm/hpet.h>
+#include <asm/sections.h>
 #include <linux/cpufreq.h>
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/apic.h>
@@ -42,6 +42,10 @@ u64 jiffies_64 = INITIAL_JIFFIES;
 
 EXPORT_SYMBOL(jiffies_64);
 
+#ifdef CONFIG_CPU_FREQ
+static void cpufreq_delayed_get(void);
+#endif
+
 extern int using_apic_timer;
 
 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
@@ -82,7 +86,7 @@ static inline void rdtscll_sync(unsigned long *tsc)
  * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
  * This is not a problem, because jiffies hasn't updated either. They are bound
  * together by xtime_lock.
        */
+ */
 
 static inline unsigned int do_gettimeoffset_tsc(void)
 {
@@ -119,7 +123,7 @@ void do_gettimeofday(struct timeval *tv)
                usec = xtime.tv_nsec / 1000;
 
                /* i386 does some correction here to keep the clock 
-                  monotonus even when ntpd is fixing drift.
+                  monotonous even when ntpd is fixing drift.
                   But they didn't work for me, there is a non monotonic
                   clock anyways with ntp.
                   I dropped all corrections now until a real solution can
@@ -176,6 +180,28 @@ int do_settimeofday(struct timespec *tv)
 
 EXPORT_SYMBOL(do_settimeofday);
 
+unsigned long profile_pc(struct pt_regs *regs)
+{
+       unsigned long pc = instruction_pointer(regs);
+
+       /* Assume the lock function has either no stack frame or only a single word.
+          This checks if the address on the stack looks like a kernel text address.
+          There is a small window for false hits, but in that case the tick
+          is just accounted to the spinlock function.
+          Better would be to write these functions in assembler again
+          and check exactly. */
+       if (in_lock_functions(pc)) {
+               char *v = *(char **)regs->rsp;
+               if ((v >= _stext && v <= _etext) ||
+                       (v >= _sinittext && v <= _einittext) ||
+                       (v >= (char *)MODULES_VADDR  && v <= (char *)MODULES_END))
+                       return (unsigned long)v;
+               return ((unsigned long *)regs->rsp)[1];
+       }
+       return pc;
+}
+EXPORT_SYMBOL(profile_pc);
+
 /*
  * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
  * ms after the second nowtime has started, because when nowtime is written
@@ -214,7 +240,7 @@ static void set_rtc_mmss(unsigned long nowtime)
  * overflow. This avoids messing with unknown time zones but requires your RTC
  * not to be off by more than 15 minutes. Since we're calling it only when
  * our clock is externally synchronized using NTP, this shouldn't be a problem.
       */
+ */
 
        real_seconds = nowtime % 60;
        real_minutes = nowtime / 60;
@@ -293,19 +319,56 @@ unsigned long long monotonic_clock(void)
 }
 EXPORT_SYMBOL(monotonic_clock);
 
+static noinline void handle_lost_ticks(int lost, struct pt_regs *regs)
+{
+    static long lost_count;
+    static int warned;
+
+    if (report_lost_ticks) {
+           printk(KERN_WARNING "time.c: Lost %d timer "
+                  "tick(s)! ", lost);
+           print_symbol("rip %s)\n", regs->rip);
+    }
+
+    if (lost_count == 100 && !warned) {
+           printk(KERN_WARNING
+                  "warning: many lost ticks.\n"
+                  KERN_WARNING "Your time source seems to be instable or "
+                               "some driver is hogging interupts\n");
+           print_symbol("rip %s\n", regs->rip);
+           if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) {
+                   printk(KERN_WARNING "Falling back to HPET\n");
+                   vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+                   vxtime.mode = VXTIME_HPET;
+                   do_gettimeoffset = do_gettimeoffset_hpet;
+           }
+           /* else should fall back to PIT, but code missing. */
+           warned = 1;
+    } else
+           lost_count++;
+
+#ifdef CONFIG_CPU_FREQ
+    /* In some cases the CPU can change frequency without us noticing
+       (like going into thermal throttle)
+       Give cpufreq a change to catch up. */
+    if ((lost_count+1) % 25 == 0) {
+           cpufreq_delayed_get();
+    }
+#endif
+}
 
 static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
        static unsigned long rtc_update = 0;
-       unsigned long tsc, lost = 0;
-       int delay, offset = 0;
+       unsigned long tsc;
+       int delay, offset = 0, lost = 0;
 
 /*
  * Here we are in the timer irq handler. We have irqs locally disabled (so we
  * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
  * on the other CPU, so we need a lock. We also need to lock the vsyscall
  * variables, because both do_timer() and us change them -arca+vojtech
       */
+ */
 
        write_seqlock(&xtime_lock);
 
@@ -354,12 +417,8 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
                                (((long) offset << 32) / vxtime.tsc_quot) - 1;
        }
 
-       if (lost) {
-               if (report_lost_ticks) {
-                       printk(KERN_WARNING "time.c: Lost %ld timer "
-                              "tick(s)! ", lost);
-                       print_symbol("rip %s)\n", regs->rip);
-               }
+       if (lost > 0) {
+               handle_lost_ticks(lost, regs);
                jiffies += lost;
        }
 
@@ -376,7 +435,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  */
 
 #ifndef CONFIG_X86_LOCAL_APIC
-       x86_do_profile(regs);
+       profile_tick(CPU_PROFILING, regs);
 #else
        if (!using_apic_timer)
                smp_local_timer_interrupt(regs);
@@ -509,6 +568,38 @@ unsigned long get_cmos_time(void)
    Should fix up last_tsc too. Currently gettimeofday in the
    first tick after the change will be slightly wrong. */
 
+#include <linux/workqueue.h>
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(void *v)
+{
+       unsigned int cpu;
+       for_each_online_cpu(cpu) {
+               cpufreq_get(cpu);
+       }
+       cpufreq_delayed_issched = 0;
+}
+
+/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
+ * to verify the CPU frequency the timing core thinks the CPU is running
+ * at is still correct.
+ */
+static void cpufreq_delayed_get(void)
+{
+       static int warned;
+       if (cpufreq_init && !cpufreq_delayed_issched) {
+               cpufreq_delayed_issched = 1;
+               if (!warned) {
+                       warned = 1;
+                       printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
+               }
+               schedule_work(&cpufreq_delayed_get_work);
+       }
+}
+
 static unsigned int  ref_freq = 0;
 static unsigned long loops_per_jiffy_ref = 0;
 
@@ -518,14 +609,18 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
                                 void *data)
 {
         struct cpufreq_freqs *freq = data;
-       unsigned long *lpj;
+       unsigned long *lpj, dummy;
 
+       lpj = &dummy;
+       if (!(freq->flags & CPUFREQ_CONST_LOOPS))
 #ifdef CONFIG_SMP
        lpj = &cpu_data[freq->cpu].loops_per_jiffy;
 #else
        lpj = &boot_cpu_data.loops_per_jiffy;
 #endif
 
+
+
        if (!ref_freq) {
                ref_freq = freq->old;
                loops_per_jiffy_ref = *lpj;
@@ -538,7 +633,8 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
                cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
 
                cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
-               vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+               if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+                       vxtime.tsc_quot = (1000L << 32) / cpu_khz;
        }
        
        set_cyc2ns_scale(cpu_khz_ref / 1000);
@@ -549,6 +645,18 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 static struct notifier_block time_cpufreq_notifier_block = {
          .notifier_call  = time_cpufreq_notifier
 };
+
+static int __init cpufreq_tsc(void)
+{
+       INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+       if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
+                                      CPUFREQ_TRANSITION_NOTIFIER))
+               cpufreq_init = 1;
+       return 0;
+}
+
+core_initcall(cpufreq_tsc);
+
 #endif
 
 /*
@@ -705,9 +813,9 @@ void __init time_init(void)
                 outl(0x800038a0, 0xcf8);
                 outl(0xff000001, 0xcfc);
                 outl(0x800038a0, 0xcf8);
-                hpet_address = inl(0xcfc) & 0xfffffffe;
+                vxtime.hpet_address = inl(0xcfc) & 0xfffffffe;
                printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
-                      "at %#lx.\n", hpet_address);
+                      "at %#lx.\n", vxtime.hpet_address);
         }
 #endif
        if (nohpet)
@@ -725,8 +833,8 @@ void __init time_init(void)
                cpu_khz = hpet_calibrate_tsc();
                timename = "HPET";
        } else {
-       pit_init();
-       cpu_khz = pit_calibrate_tsc();
+               pit_init();
+               cpu_khz = pit_calibrate_tsc();
                timename = "PIT";
        }
 
@@ -742,11 +850,6 @@ void __init time_init(void)
        setup_irq(0, &irq0);
 
        set_cyc2ns_scale(cpu_khz / 1000);
-
-#ifdef CONFIG_CPU_FREQ
-       cpufreq_register_notifier(&time_cpufreq_notifier_block, 
-                                 CPUFREQ_TRANSITION_NOTIFIER);
-#endif
 }
 
 void __init time_init_smp(void)
@@ -781,11 +884,12 @@ static int time_suspend(struct sys_device *dev, u32 state)
 
 static int time_resume(struct sys_device *dev)
 {
+       unsigned long flags;
        unsigned long sec = get_cmos_time() + clock_cmos_diff;
-       write_seqlock_irq(&xtime_lock);
+       write_seqlock_irqsave(&xtime_lock,flags);
        xtime.tv_sec = sec;
        xtime.tv_nsec = 0;
-       write_sequnlock_irq(&xtime_lock);
+       write_sequnlock_irqrestore(&xtime_lock,flags);
        return 0;
 }
 
@@ -1038,6 +1142,8 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 }
 #endif
 
+
+
 static int __init nohpet_setup(char *s) 
 { 
        nohpet = 1;