Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / arch / x86_64 / kernel / time.c
index fb8c809..7392570 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/mc146818rtc.h>
-#include <linux/irq.h>
 #include <linux/time.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <asm/apic.h>
 #endif
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 #ifdef CONFIG_CPU_FREQ
 static void cpufreq_delayed_get(void);
 #endif
 extern void i8254_timer_resume(void);
 extern int using_apic_timer;
 
+static char *time_init_gtod(void);
+
 DEFINE_SPINLOCK(rtc_lock);
 DEFINE_SPINLOCK(i8253_lock);
 
-static int nohpet __initdata = 0;
+int nohpet __initdata = 0;
 static int notsc __initdata = 0;
 
 #undef HPET_HACK_ENABLE_DANGEROUS
@@ -64,6 +61,7 @@ static int notsc __initdata = 0;
 unsigned int cpu_khz;                                  /* TSC clocks / usec, not used here */
 static unsigned long hpet_period;                      /* fsecs / HPET clock */
 unsigned long hpet_tick;                               /* HPET clocks / interrupt */
+int hpet_use_timer;                            /* Use counter of hpet for time keeping, otherwise PIT */
 unsigned long vxtime_hz = PIT_TICK_RATE;
 int report_lost_ticks;                         /* command line option */
 unsigned long long monotonic_base;
@@ -75,14 +73,6 @@ unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
 struct timespec __xtime __section_xtime;
 struct timezone __sys_tz __section_sys_tz;
 
-static inline void rdtscll_sync(unsigned long *tsc)
-{
-#ifdef CONFIG_SMP
-       sync_core();
-#endif
-       rdtscll(*tsc);
-}
-
 /*
  * do_gettimeoffset() returns microseconds since last timer interrupt was
  * triggered by hardware. A memory read of HPET is slower than a register read
@@ -97,15 +87,18 @@ static inline unsigned int do_gettimeoffset_tsc(void)
 {
        unsigned long t;
        unsigned long x;
-       rdtscll_sync(&t);
-       if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */
+       t = get_cycles_sync();
+       if (t < vxtime.last_tsc) 
+               t = vxtime.last_tsc; /* hack */
        x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
        return x;
 }
 
 static inline unsigned int do_gettimeoffset_hpet(void)
 {
-       return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
+       /* cap counter read to one tick to avoid inconsistencies */
+       unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
+       return (min(counter,hpet_tick) * vxtime.quot) >> 32;
 }
 
 unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
@@ -173,10 +166,7 @@ int do_settimeofday(struct timespec *tv)
        set_normalized_timespec(&xtime, sec, nsec);
        set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
 
-       time_adjust = 0;                /* stop active adjtime() */
-       time_status |= STA_UNSYNC;
-       time_maxerror = NTP_PHASE_LIMIT;
-       time_esterror = NTP_PHASE_LIMIT;
+       ntp_clear();
 
        write_sequnlock_irq(&xtime_lock);
        clock_was_set();
@@ -189,8 +179,9 @@ unsigned long profile_pc(struct pt_regs *regs)
 {
        unsigned long pc = instruction_pointer(regs);
 
-       /* Assume the lock function has either no stack frame or only a single word.
-          This checks if the address on the stack looks like a kernel text address.
+       /* Assume the lock function has either no stack frame or only a single 
+          word.  This checks if the address on the stack looks like a kernel 
+          text address.
           There is a small window for false hits, but in that case the tick
           is just accounted to the spinlock function.
           Better would be to write these functions in assembler again
@@ -253,19 +244,12 @@ static void set_rtc_mmss(unsigned long nowtime)
                real_minutes += 30;             /* correct for half hour time zone */
        real_minutes %= 60;
 
-#if 0
-       /* AMD 8111 is a really bad time keeper and hits this regularly. 
-          It probably was an attempt to avoid screwing up DST, but ignore
-          that for now. */        
        if (abs(real_minutes - cmos_minutes) >= 30) {
                printk(KERN_WARNING "time.c: can't update CMOS clock "
                       "from %d to %d\n", cmos_minutes, real_minutes);
-       } else
-#endif
-
-       {
-                       BIN_TO_BCD(real_seconds);
-                       BIN_TO_BCD(real_minutes);
+       } else {
+               BIN_TO_BCD(real_seconds);
+               BIN_TO_BCD(real_minutes);
                CMOS_WRITE(real_seconds, RTC_SECONDS);
                CMOS_WRITE(real_minutes, RTC_MINUTES);
        }
@@ -301,72 +285,66 @@ unsigned long long monotonic_clock(void)
 
                        last_offset = vxtime.last;
                        base = monotonic_base;
-                       this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
-
+                       this_offset = hpet_readl(HPET_COUNTER);
                } while (read_seqretry(&xtime_lock, seq));
                offset = (this_offset - last_offset);
-               offset *=(NSEC_PER_SEC/HZ)/hpet_tick;
-               return base + offset;
-       }else{
+               offset *= (NSEC_PER_SEC/HZ) / hpet_tick;
+       } else {
                do {
                        seq = read_seqbegin(&xtime_lock);
 
                        last_offset = vxtime.last_tsc;
                        base = monotonic_base;
                } while (read_seqretry(&xtime_lock, seq));
-               sync_core();
-               rdtscll(this_offset);
-               offset = (this_offset - last_offset)*1000/cpu_khz; 
-               return base + offset;
+               this_offset = get_cycles_sync();
+               offset = (this_offset - last_offset)*1000 / cpu_khz; 
        }
-
-
+       return base + offset;
 }
 EXPORT_SYMBOL(monotonic_clock);
 
 static noinline void handle_lost_ticks(int lost, struct pt_regs *regs)
 {
-    static long lost_count;
-    static int warned;
-
-    if (report_lost_ticks) {
-           printk(KERN_WARNING "time.c: Lost %d timer "
-                  "tick(s)! ", lost);
-           print_symbol("rip %s)\n", regs->rip);
-    }
-
-    if (lost_count == 1000 && !warned) {
-           printk(KERN_WARNING
-                  "warning: many lost ticks.\n"
-                  KERN_WARNING "Your time source seems to be instable or "
+       static long lost_count;
+       static int warned;
+       if (report_lost_ticks) {
+               printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost);
+               print_symbol("rip %s)\n", regs->rip);
+       }
+
+       if (lost_count == 1000 && !warned) {
+               printk(KERN_WARNING "warning: many lost ticks.\n"
+                      KERN_WARNING "Your time source seems to be instable or "
                                "some driver is hogging interupts\n");
-           print_symbol("rip %s\n", regs->rip);
-           if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) {
-                   printk(KERN_WARNING "Falling back to HPET\n");
-                   vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
-                   vxtime.mode = VXTIME_HPET;
-                   do_gettimeoffset = do_gettimeoffset_hpet;
-           }
-           /* else should fall back to PIT, but code missing. */
-           warned = 1;
-    } else
-           lost_count++;
+               print_symbol("rip %s\n", regs->rip);
+               if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) {
+                       printk(KERN_WARNING "Falling back to HPET\n");
+                       if (hpet_use_timer)
+                               vxtime.last = hpet_readl(HPET_T0_CMP) - 
+                                                       hpet_tick;
+                       else
+                               vxtime.last = hpet_readl(HPET_COUNTER);
+                       vxtime.mode = VXTIME_HPET;
+                       do_gettimeoffset = do_gettimeoffset_hpet;
+               }
+               /* else should fall back to PIT, but code missing. */
+               warned = 1;
+       } else
+               lost_count++;
 
 #ifdef CONFIG_CPU_FREQ
-    /* In some cases the CPU can change frequency without us noticing
-       (like going into thermal throttle)
-       Give cpufreq a change to catch up. */
-    if ((lost_count+1) % 25 == 0) {
-           cpufreq_delayed_get();
-    }
+       /* In some cases the CPU can change frequency without us noticing
+          Give cpufreq a change to catch up. */
+       if ((lost_count+1) % 25 == 0)
+               cpufreq_delayed_get();
 #endif
 }
 
-static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+void main_timer_handler(struct pt_regs *regs)
 {
        static unsigned long rtc_update = 0;
        unsigned long tsc;
-       int delay, offset = 0, lost = 0;
+       int delay = 0, offset = 0, lost = 0;
 
 /*
  * Here we are in the timer irq handler. We have irqs locally disabled (so we
@@ -377,10 +355,17 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 
        write_seqlock(&xtime_lock);
 
-       if (vxtime.hpet_address) {
+       if (vxtime.hpet_address)
+               offset = hpet_readl(HPET_COUNTER);
+
+       if (hpet_use_timer) {
+               /* if we're using the hpet timer functionality,
+                * we can more accurately know the counter value
+                * when the timer interrupt occured.
+                */
                offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
                delay = hpet_readl(HPET_COUNTER) - offset;
-       } else {
+       } else if (!pmtmr_ioport) {
                spin_lock(&i8253_lock);
                outb_p(0x00, 0x43);
                delay = inb_p(0x40);
@@ -389,7 +374,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
                delay = LATCH - 1 - delay;
        }
 
-       rdtscll_sync(&tsc);
+       tsc = get_cycles_sync();
 
        if (vxtime.mode == VXTIME_HPET) {
                if (offset - vxtime.last > hpet_tick) {
@@ -461,23 +446,33 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  * off) isn't likely to go away much sooner anyway.
  */
 
-       if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update &&
+       if (ntp_synced() && xtime.tv_sec > rtc_update &&
                abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
                set_rtc_mmss(xtime.tv_sec);
                rtc_update = xtime.tv_sec + 660;
        }
  
        write_sequnlock(&xtime_lock);
+}
 
+static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+       if (apic_runs_main_timer > 1)
+               return IRQ_HANDLED;
+       main_timer_handler(regs);
+#ifdef CONFIG_X86_LOCAL_APIC
+       if (using_apic_timer)
+               smp_send_timer_broadcast_ipi();
+#endif
        return IRQ_HANDLED;
 }
 
-static unsigned int cyc2ns_scale;
+static unsigned int cyc2ns_scale __read_mostly;
 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+static inline void set_cyc2ns_scale(unsigned long cpu_khz)
 {
-       cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+       cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
 }
 
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
@@ -507,62 +502,53 @@ unsigned long long sched_clock(void)
        return cycles_2_ns(a);
 }
 
-unsigned long get_cmos_time(void)
+static unsigned long get_cmos_time(void)
 {
-       unsigned int timeout, year, mon, day, hour, min, sec;
-       unsigned char last, this;
+       unsigned int year, mon, day, hour, min, sec;
        unsigned long flags;
-
-/*
- * The Linux interpretation of the CMOS clock register contents: When the
- * Update-In-Progress (UIP) flag goes from 1 to 0, the RTC registers show the
- * second which has precisely just started. Waiting for this can take up to 1
- * second, we timeout approximately after 2.4 seconds on a machine with
- * standard 8.3 MHz ISA bus.
- */
+       unsigned extyear = 0;
 
        spin_lock_irqsave(&rtc_lock, flags);
 
-       timeout = 1000000;
-       last = this = 0;
-
-       while (timeout && last && !this) {
-               last = this;
-               this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
-               timeout--;
-       }
-
-/*
- * Here we are safe to assume the registers won't change for a whole second, so
- * we just go ahead and read them.
-        */
-
+       do {
                sec = CMOS_READ(RTC_SECONDS);
                min = CMOS_READ(RTC_MINUTES);
                hour = CMOS_READ(RTC_HOURS);
                day = CMOS_READ(RTC_DAY_OF_MONTH);
                mon = CMOS_READ(RTC_MONTH);
                year = CMOS_READ(RTC_YEAR);
+#ifdef CONFIG_ACPI
+               if (acpi_fadt.revision >= FADT2_REVISION_ID &&
+                                       acpi_fadt.century)
+                       extyear = CMOS_READ(acpi_fadt.century);
+#endif
+       } while (sec != CMOS_READ(RTC_SECONDS));
 
        spin_unlock_irqrestore(&rtc_lock, flags);
 
-/*
- * We know that x86-64 always uses BCD format, no need to check the config
- * register.
- */
-
-           BCD_TO_BIN(sec);
-           BCD_TO_BIN(min);
-           BCD_TO_BIN(hour);
-           BCD_TO_BIN(day);
-           BCD_TO_BIN(mon);
-           BCD_TO_BIN(year);
-
-/*
- * x86-64 systems only exists since 2002.
- * This will work up to Dec 31, 2100
- */
-       year += 2000;
+       /*
+        * We know that x86-64 always uses BCD format, no need to check the
+        * config register.
+        */
+
+       BCD_TO_BIN(sec);
+       BCD_TO_BIN(min);
+       BCD_TO_BIN(hour);
+       BCD_TO_BIN(day);
+       BCD_TO_BIN(mon);
+       BCD_TO_BIN(year);
+
+       if (extyear) {
+               BCD_TO_BIN(extyear);
+               year += extyear;
+               printk(KERN_INFO "Extended CMOS year: %d\n", extyear);
+       } else { 
+               /*
+                * x86-64 systems only exists since 2002.
+                * This will work up to Dec 31, 2100
+                */
+               year += 2000;
+       }
 
        return mktime(year, mon, day, hour, min, sec);
 }
@@ -605,7 +591,8 @@ static void cpufreq_delayed_get(void)
                cpufreq_delayed_issched = 1;
                if (!warned) {
                        warned = 1;
-                       printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
+                       printk(KERN_DEBUG 
+       "Losing some ticks... checking if CPU frequency changed.\n");
                }
                schedule_work(&cpufreq_delayed_get_work);
        }
@@ -628,9 +615,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
        lpj = &dummy;
        if (!(freq->flags & CPUFREQ_CONST_LOOPS))
 #ifdef CONFIG_SMP
-       lpj = &cpu_data[freq->cpu].loops_per_jiffy;
+               lpj = &cpu_data[freq->cpu].loops_per_jiffy;
 #else
-       lpj = &boot_cpu_data.loops_per_jiffy;
+               lpj = &boot_cpu_data.loops_per_jiffy;
 #endif
 
        if (!ref_freq) {
@@ -649,7 +636,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
                        vxtime.tsc_quot = (1000L << 32) / cpu_khz;
        }
        
-       set_cyc2ns_scale(cpu_khz_ref / 1000);
+       set_cyc2ns_scale(cpu_khz_ref);
 
        return 0;
 }
@@ -693,8 +680,7 @@ static unsigned int __init hpet_calibrate_tsc(void)
        do {
                local_irq_disable();
                hpet_now = hpet_readl(HPET_COUNTER);
-               sync_core();
-               rdtscl(tsc_now);
+               tsc_now = get_cycles_sync();
                local_irq_restore(flags);
        } while ((tsc_now - tsc_start) < TICK_COUNT &&
                 (hpet_now - hpet_start) < TICK_COUNT);
@@ -724,11 +710,9 @@ static unsigned int __init pit_calibrate_tsc(void)
        outb(0xb0, 0x43);
        outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
        outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
-       rdtscll(start);
-       sync_core();
+       start = get_cycles_sync();
        while ((inb(0x61) & 0x20) == 0);
-       sync_core();
-       rdtscll(end);
+       end = get_cycles_sync();
 
        spin_unlock_irqrestore(&i8253_lock, flags);
        
@@ -742,7 +726,7 @@ static __init int late_hpet_init(void)
        unsigned int            ntimer;
 
        if (!vxtime.hpet_address)
-          return -1;
+               return 0;
 
        memset(&hd, 0, sizeof (hd));
 
@@ -755,7 +739,7 @@ static __init int late_hpet_init(void)
         * Timer0 and Timer1 is used by platform.
         */
        hd.hd_phys_address = vxtime.hpet_address;
-       hd.hd_address = (void *)fix_to_virt(FIX_HPET_BASE);
+       hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
        hd.hd_nirqs = ntimer;
        hd.hd_flags = HPET_DATA_PLATFORM;
        hpet_reserve_timer(&hd, 0);
@@ -770,9 +754,8 @@ static __init int late_hpet_init(void)
                int                     i;
 
                hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
-
-               for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer;
-                    timer++, i++)
+               timer = &hpet->hpet_timers[2];
+               for (i = 2; i < ntimer; timer++, i++)
                        hd.hd_irq[i] = (timer->hpet_config &
                                        Tn_INT_ROUTE_CNF_MASK) >>
                                Tn_INT_ROUTE_CNF_SHIFT;
@@ -803,17 +786,18 @@ static int hpet_timer_stop_set_go(unsigned long tick)
  * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
  * and period also hpet_tick.
  */
-
-       hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
+       if (hpet_use_timer) {
+               hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
                    HPET_TN_32BIT, HPET_T0_CFG);
-       hpet_writel(hpet_tick, HPET_T0_CMP);
-       hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
-
+               hpet_writel(hpet_tick, HPET_T0_CMP);
+               hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
+               cfg |= HPET_CFG_LEGACY;
+       }
 /*
  * Go!
  */
 
-       cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+       cfg |= HPET_CFG_ENABLE;
        hpet_writel(cfg, HPET_CFG);
 
        return 0;
@@ -834,8 +818,7 @@ static int hpet_init(void)
 
        id = hpet_readl(HPET_ID);
 
-       if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
-           !(id & HPET_ID_LEGSUP))
+       if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
                return -1;
 
        hpet_period = hpet_readl(HPET_PERIOD);
@@ -845,6 +828,8 @@ static int hpet_init(void)
        hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
                hpet_period;
 
+       hpet_use_timer = (id & HPET_ID_LEGSUP);
+
        return hpet_timer_stop_set_go(hpet_tick);
 }
 
@@ -853,17 +838,43 @@ static int hpet_reenable(void)
        return hpet_timer_stop_set_go(hpet_tick);
 }
 
-void __init pit_init(void)
+#define PIT_MODE 0x43
+#define PIT_CH0  0x40
+
+static void __init __pit_init(int val, u8 mode)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&i8253_lock, flags);
-       outb_p(0x34, 0x43);             /* binary, mode 2, LSB/MSB, ch 0 */
-       outb_p(LATCH & 0xff, 0x40);     /* LSB */
-       outb_p(LATCH >> 8, 0x40);       /* MSB */
+       outb_p(mode, PIT_MODE);
+       outb_p(val & 0xff, PIT_CH0);    /* LSB */
+       outb_p(val >> 8, PIT_CH0);      /* MSB */
        spin_unlock_irqrestore(&i8253_lock, flags);
 }
 
+void __init pit_init(void)
+{
+       __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
+}
+
+void __init pit_stop_interrupt(void)
+{
+       __pit_init(0, 0x30); /* mode 0 */
+}
+
+void __init stop_timer_interrupt(void)
+{
+       char *name;
+       if (vxtime.hpet_address) {
+               name = "HPET";
+               hpet_timer_stop_set_go(0);
+       } else {
+               name = "PIT";
+               pit_stop_interrupt();
+       }
+       printk(KERN_INFO "timer: %s interrupt stopped.\n", name);
+}
+
 int __init time_setup(char *str)
 {
        report_lost_ticks = 1;
@@ -874,11 +885,10 @@ static struct irqaction irq0 = {
        timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL
 };
 
-extern void __init config_acpi_tables(void);
-
 void __init time_init(void)
 {
        char *timename;
+       char *gtod;
 
 #ifdef HPET_HACK_ENABLE_DANGEROUS
         if (!vxtime.hpet_address) {
@@ -901,13 +911,18 @@ void __init time_init(void)
        set_normalized_timespec(&wall_to_monotonic,
                                -xtime.tv_sec, -xtime.tv_nsec);
 
-       if (!hpet_init()) {
-                vxtime_hz = (1000000000000000L + hpet_period / 2) /
-                       hpet_period;
+       if (!hpet_init())
+                vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period;
+       else
+               vxtime.hpet_address = 0;
+
+       if (hpet_use_timer) {
+               /* set tick_nsec to use the proper rate for HPET */
+               tick_nsec = TICK_NSEC_HPET;
                cpu_khz = hpet_calibrate_tsc();
                timename = "HPET";
 #ifdef CONFIG_X86_PM_TIMER
-       } else if (pmtmr_ioport) {
+       } else if (pmtmr_ioport && !vxtime.hpet_address) {
                vxtime_hz = PM_TIMER_FREQUENCY;
                timename = "PM";
                pit_init();
@@ -919,29 +934,26 @@ void __init time_init(void)
                timename = "PIT";
        }
 
-       printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n",
-              vxtime_hz / 1000000, vxtime_hz % 1000000, timename);
+       vxtime.mode = VXTIME_TSC;
+       gtod = time_init_gtod();
+
+       printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
+              vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
        printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
                cpu_khz / 1000, cpu_khz % 1000);
-       vxtime.mode = VXTIME_TSC;
        vxtime.quot = (1000000L << 32) / vxtime_hz;
        vxtime.tsc_quot = (1000L << 32) / cpu_khz;
-       vxtime.hz = vxtime_hz;
-       rdtscll_sync(&vxtime.last_tsc);
+       vxtime.last_tsc = get_cycles_sync();
        setup_irq(0, &irq0);
 
-       set_cyc2ns_scale(cpu_khz / 1000);
-
-#ifndef CONFIG_SMP
-       time_init_gtod();
-#endif
+       set_cyc2ns_scale(cpu_khz);
 }
 
 /*
  * Make an educated guess if the TSC is trustworthy and synchronized
  * over all CPUs.
  */
-static __init int unsynchronized_tsc(void)
+__cpuinit int unsynchronized_tsc(void)
 {
 #ifdef CONFIG_SMP
        if (oem_force_hpet_timer())
@@ -950,26 +962,26 @@ static __init int unsynchronized_tsc(void)
           are handled in the OEM check above. */
        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
                return 0;
-       /* All in a single socket - should be synchronized */
-       if (cpus_weight(cpu_core_map[0]) == num_online_cpus())
-               return 0;
 #endif
        /* Assume multi socket systems are not synchronized */
-       return num_online_cpus() > 1;
+       return num_present_cpus() > 1;
 }
 
 /*
- * Decide after all CPUs are booted what mode gettimeofday should use.
+ * Decide what mode gettimeofday should use.
  */
-void __init time_init_gtod(void)
+__init static char *time_init_gtod(void)
 {
        char *timetype;
 
        if (unsynchronized_tsc())
                notsc = 1;
        if (vxtime.hpet_address && notsc) {
-               timetype = "HPET";
-               vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+               timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
+               if (hpet_use_timer)
+                       vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+               else
+                       vxtime.last = hpet_readl(HPET_COUNTER);
                vxtime.mode = VXTIME_HPET;
                do_gettimeoffset = do_gettimeoffset_hpet;
 #ifdef CONFIG_X86_PM_TIMER
@@ -983,11 +995,10 @@ void __init time_init_gtod(void)
                printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
 #endif
        } else {
-               timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC";
+               timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
                vxtime.mode = VXTIME_TSC;
        }
-
-       printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
+       return timetype;
 }
 
 __setup("report_lost_ticks", time_setup);
@@ -995,6 +1006,10 @@ __setup("report_lost_ticks", time_setup);
 static long clock_cmos_diff;
 static unsigned long sleep_start;
 
+/*
+ * sysfs support for the timer.
+ */
+
 static int timer_suspend(struct sys_device *dev, pm_message_t state)
 {
        /*
@@ -1024,9 +1039,22 @@ static int timer_resume(struct sys_device *dev)
        write_seqlock_irqsave(&xtime_lock,flags);
        xtime.tv_sec = sec;
        xtime.tv_nsec = 0;
+       if (vxtime.mode == VXTIME_HPET) {
+               if (hpet_use_timer)
+                       vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+               else
+                       vxtime.last = hpet_readl(HPET_COUNTER);
+#ifdef CONFIG_X86_PM_TIMER
+       } else if (vxtime.mode == VXTIME_PMTMR) {
+               pmtimer_resume();
+#endif
+       } else
+               vxtime.last_tsc = get_cycles_sync();
        write_sequnlock_irqrestore(&xtime_lock,flags);
        jiffies += sleep_length;
        wall_jiffies += sleep_length;
+       monotonic_base += sleep_length * (NSEC_PER_SEC/HZ);
+       touch_softlockup_watchdog();
        return 0;
 }
 
@@ -1036,7 +1064,6 @@ static struct sysdev_class timer_sysclass = {
        set_kset_name("timer"),
 };
 
-
 /* XXX this driverfs stuff should probably go elsewhere later -john */
 static struct sys_device device_timer = {
        .id     = 0,
@@ -1070,8 +1097,6 @@ device_initcall(time_init_device);
  */
 #include <linux/rtc.h>
 
-extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-
 #define DEFAULT_RTC_INT_FREQ   64
 #define RTC_NUM_INTS           1
 
@@ -1086,6 +1111,7 @@ static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
 static unsigned long PIE_count;
 
 static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
+static unsigned int hpet_t1_cmp; /* cached comparator register */
 
 int is_hpet_enabled(void)
 {
@@ -1122,10 +1148,12 @@ int hpet_rtc_timer_init(void)
        cnt = hpet_readl(HPET_COUNTER);
        cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
        hpet_writel(cnt, HPET_T1_CMP);
+       hpet_t1_cmp = cnt;
        local_irq_restore(flags);
 
        cfg = hpet_readl(HPET_T1_CFG);
-       cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
+       cfg &= ~HPET_TN_PERIODIC;
+       cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
        hpet_writel(cfg, HPET_T1_CFG);
 
        return 1;
@@ -1135,8 +1163,12 @@ static void hpet_rtc_timer_reinit(void)
 {
        unsigned int cfg, cnt;
 
-       if (!(PIE_on | AIE_on | UIE_on))
+       if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
+               cfg = hpet_readl(HPET_T1_CFG);
+               cfg &= ~HPET_TN_ENABLE;
+               hpet_writel(cfg, HPET_T1_CFG);
                return;
+       }
 
        if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
                hpet_rtc_int_freq = PIE_freq;
@@ -1144,15 +1176,10 @@ static void hpet_rtc_timer_reinit(void)
                hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
 
        /* It is more accurate to use the comparator value than current count.*/
-       cnt = hpet_readl(HPET_T1_CMP);
+       cnt = hpet_t1_cmp;
        cnt += hpet_tick*HZ/hpet_rtc_int_freq;
        hpet_writel(cnt, HPET_T1_CMP);
-
-       cfg = hpet_readl(HPET_T1_CFG);
-       cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
-       hpet_writel(cfg, HPET_T1_CFG);
-
-       return;
+       hpet_t1_cmp = cnt;
 }
 
 /*
@@ -1278,23 +1305,18 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 }
 #endif
 
-
-
 static int __init nohpet_setup(char *s) 
 { 
        nohpet = 1;
-       return 0;
+       return 1;
 } 
 
 __setup("nohpet", nohpet_setup);
 
-
-static int __init notsc_setup(char *s)
+int __init notsc_setup(char *s)
 {
        notsc = 1;
-       return 0;
+       return 1;
 }
 
 __setup("notsc", notsc_setup);
-
-