X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Ftime.c;h=5cc76d0d331f0d0fe2b1f7913d7705887112b073;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=3080f84bf7b76f8f68274d84c5af15d705afc625;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3080f84bf..5cc76d0d3 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -8,7 +8,7 @@ * Copyright (c) 1995 Markus Kuhn * Copyright (c) 1996 Ingo Molnar * Copyright (c) 1998 Andrea Arcangeli - * Copyright (c) 2002 Vojtech Pavlik + * Copyright (c) 2002,2006 Vojtech Pavlik * Copyright (c) 2003 Andi Kleen * RTC support code taken from arch/i386/kernel/timers/time_hpet.c */ @@ -24,10 +24,13 @@ #include #include #include +#include +#include #include #include #ifdef CONFIG_ACPI #include /* for PM timer frequency */ +#include #endif #include #include @@ -38,9 +41,7 @@ #include #include #include -#ifdef CONFIG_X86_LOCAL_APIC #include -#endif #ifdef CONFIG_CPU_FREQ static void cpufreq_delayed_get(void); @@ -48,17 +49,24 @@ static void cpufreq_delayed_get(void); extern void i8254_timer_resume(void); extern int using_apic_timer; -static char *time_init_gtod(void); +static char *timename = NULL; DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); int nohpet __initdata = 0; static int notsc __initdata = 0; -#undef HPET_HACK_ENABLE_DANGEROUS +#define USEC_PER_TICK (USEC_PER_SEC / HZ) +#define NSEC_PER_TICK (NSEC_PER_SEC / HZ) +#define FSEC_PER_TICK (FSEC_PER_SEC / HZ) + +#define NS_SCALE 10 /* 2^10, carefully chosen */ +#define US_SCALE 32 /* 2^32, arbitralrily chosen */ unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +EXPORT_SYMBOL(cpu_khz); static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ @@ -69,7 +77,6 @@ unsigned long long monotonic_base; struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; struct timespec __xtime __section_xtime; struct timezone __sys_tz __section_sys_tz; @@ -88,8 +95,9 @@ static inline unsigned int do_gettimeoffset_tsc(void) unsigned long t; unsigned long x; t = get_cycles_sync(); - if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ - x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; + if (t < vxtime.last_tsc) + t = vxtime.last_tsc; /* hack */ + x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE; return x; } @@ -97,7 +105,7 @@ static inline unsigned int do_gettimeoffset_hpet(void) { /* cap counter read to one tick to avoid inconsistencies */ unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; - return (min(counter,hpet_tick) * vxtime.quot) >> 32; + return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE; } unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; @@ -110,14 +118,14 @@ unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; void do_gettimeofday(struct timeval *tv) { - unsigned long seq, t; + unsigned long seq; unsigned int sec, usec; do { seq = read_seqbegin(&xtime_lock); sec = xtime.tv_sec; - usec = xtime.tv_nsec / 1000; + usec = xtime.tv_nsec / NSEC_PER_USEC; /* i386 does some correction here to keep the clock monotonous even when ntpd is fixing drift. @@ -127,15 +135,12 @@ void do_gettimeofday(struct timeval *tv) be found. Note when you fix it here you need to do the same in arch/x86_64/kernel/vsyscall.c and export all needed variables in vmlinux.lds. -AK */ - - t = (jiffies - wall_jiffies) * (1000000L / HZ) + - do_gettimeoffset(); - usec += t; + usec += do_gettimeoffset(); } while (read_seqretry(&xtime_lock, seq)); - tv->tv_sec = sec + usec / 1000000; - tv->tv_usec = usec % 1000000; + tv->tv_sec = sec + usec / USEC_PER_SEC; + tv->tv_usec = usec % USEC_PER_SEC; } EXPORT_SYMBOL(do_gettimeofday); @@ -156,8 +161,7 @@ int do_settimeofday(struct timespec *tv) write_seqlock_irq(&xtime_lock); - nsec -= do_gettimeoffset() * 1000 + - (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ); + nsec -= do_gettimeoffset() * NSEC_PER_USEC; wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); @@ -178,19 +182,15 @@ unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - /* Assume the lock function has either no stack frame or only a single word. - This checks if the address on the stack looks like a kernel text address. - There is a small window for false hits, but in that case the tick - is just accounted to the spinlock function. - Better would be to write these functions in assembler again - and check exactly. */ - if (in_lock_functions(pc)) { - char *v = *(char **)regs->rsp; - if ((v >= _stext && v <= _etext) || - (v >= _sinittext && v <= _einittext) || - (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END)) - return (unsigned long)v; - return ((unsigned long *)regs->rsp)[1]; + /* Assume the lock function has either no stack frame or a copy + of eflags from PUSHF + Eflags always has bits 22 and up cleared unlike kernel addresses. */ + if (!user_mode(regs) && in_lock_functions(pc)) { + unsigned long *sp = (unsigned long *)regs->rsp; + if (sp[0] >> 22) + return sp[0]; + if (sp[1] >> 22) + return sp[1]; } return pc; } @@ -242,17 +242,10 @@ static void set_rtc_mmss(unsigned long nowtime) real_minutes += 30; /* correct for half hour time zone */ real_minutes %= 60; -#if 0 - /* AMD 8111 is a really bad time keeper and hits this regularly. - It probably was an attempt to avoid screwing up DST, but ignore - that for now. */ if (abs(real_minutes - cmos_minutes) >= 30) { printk(KERN_WARNING "time.c: can't update CMOS clock " "from %d to %d\n", cmos_minutes, real_minutes); - } else -#endif - - { + } else { BIN_TO_BCD(real_seconds); BIN_TO_BCD(real_minutes); CMOS_WRITE(real_seconds, RTC_SECONDS); @@ -278,6 +271,7 @@ static void set_rtc_mmss(unsigned long nowtime) * Note: This function is required to return accurate * time even in the absence of multiple timer ticks. */ +static inline unsigned long long cycles_2_ns(unsigned long long cyc); unsigned long long monotonic_clock(void) { unsigned long seq; @@ -293,8 +287,7 @@ unsigned long long monotonic_clock(void) this_offset = hpet_readl(HPET_COUNTER); } while (read_seqretry(&xtime_lock, seq)); offset = (this_offset - last_offset); - offset *=(NSEC_PER_SEC/HZ)/hpet_tick; - return base + offset; + offset *= NSEC_PER_TICK / hpet_tick; } else { do { seq = read_seqbegin(&xtime_lock); @@ -303,58 +296,54 @@ unsigned long long monotonic_clock(void) base = monotonic_base; } while (read_seqretry(&xtime_lock, seq)); this_offset = get_cycles_sync(); - offset = (this_offset - last_offset)*1000/cpu_khz; - return base + offset; + offset = cycles_2_ns(this_offset - last_offset); } + return base + offset; } EXPORT_SYMBOL(monotonic_clock); -static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) +static noinline void handle_lost_ticks(int lost) { - static long lost_count; - static int warned; - - if (report_lost_ticks) { - printk(KERN_WARNING "time.c: Lost %d timer " - "tick(s)! ", lost); - print_symbol("rip %s)\n", regs->rip); - } - - if (lost_count == 1000 && !warned) { - printk(KERN_WARNING - "warning: many lost ticks.\n" - KERN_WARNING "Your time source seems to be instable or " + static long lost_count; + static int warned; + if (report_lost_ticks) { + printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost); + print_symbol("rip %s)\n", get_irq_regs()->rip); + } + + if (lost_count == 1000 && !warned) { + printk(KERN_WARNING "warning: many lost ticks.\n" + KERN_WARNING "Your time source seems to be instable or " "some driver is hogging interupts\n"); - print_symbol("rip %s\n", regs->rip); - if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { - printk(KERN_WARNING "Falling back to HPET\n"); - if (hpet_use_timer) - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - vxtime.last = hpet_readl(HPET_COUNTER); - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; - } - /* else should fall back to PIT, but code missing. */ - warned = 1; - } else - lost_count++; + print_symbol("rip %s\n", get_irq_regs()->rip); + if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { + printk(KERN_WARNING "Falling back to HPET\n"); + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - + hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); + vxtime.mode = VXTIME_HPET; + do_gettimeoffset = do_gettimeoffset_hpet; + } + /* else should fall back to PIT, but code missing. */ + warned = 1; + } else + lost_count++; #ifdef CONFIG_CPU_FREQ - /* In some cases the CPU can change frequency without us noticing - (like going into thermal throttle) - Give cpufreq a change to catch up. */ - if ((lost_count+1) % 25 == 0) { - cpufreq_delayed_get(); - } + /* In some cases the CPU can change frequency without us noticing + Give cpufreq a change to catch up. */ + if ((lost_count+1) % 25 == 0) + cpufreq_delayed_get(); #endif } -void main_timer_handler(struct pt_regs *regs) +void main_timer_handler(void) { static unsigned long rtc_update = 0; unsigned long tsc; - int delay, offset = 0, lost = 0; + int delay = 0, offset = 0, lost = 0; /* * Here we are in the timer irq handler. We have irqs locally disabled (so we @@ -375,7 +364,7 @@ void main_timer_handler(struct pt_regs *regs) */ offset = hpet_readl(HPET_T0_CMP) - hpet_tick; delay = hpet_readl(HPET_COUNTER) - offset; - } else { + } else if (!pmtmr_ioport) { spin_lock(&i8253_lock); outb_p(0x00, 0x43); delay = inb_p(0x40); @@ -392,7 +381,7 @@ void main_timer_handler(struct pt_regs *regs) } monotonic_base += - (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; + (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick; vxtime.last = offset; #ifdef CONFIG_X86_PM_TIMER @@ -401,38 +390,38 @@ void main_timer_handler(struct pt_regs *regs) #endif } else { offset = (((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); + vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK; if (offset < 0) offset = 0; - if (offset > (USEC_PER_SEC / HZ)) { - lost = offset / (USEC_PER_SEC / HZ); - offset %= (USEC_PER_SEC / HZ); + if (offset > USEC_PER_TICK) { + lost = offset / USEC_PER_TICK; + offset %= USEC_PER_TICK; } - monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ; + monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc); vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; if ((((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> 32) < offset) + vxtime.tsc_quot) >> US_SCALE) < offset) vxtime.last_tsc = tsc - - (((long) offset << 32) / vxtime.tsc_quot) - 1; + (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1; } - if (lost > 0) { - handle_lost_ticks(lost, regs); - jiffies += lost; - } + if (lost > 0) + handle_lost_ticks(lost); + else + lost = 0; /* * Do the timer stuff. */ - do_timer(regs); + do_timer(lost + 1); #ifndef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif /* @@ -441,12 +430,8 @@ void main_timer_handler(struct pt_regs *regs) * have to call the local interrupt handler. */ -#ifndef CONFIG_X86_LOCAL_APIC - profile_tick(CPU_PROFILING, regs); -#else if (!using_apic_timer) - smp_local_timer_interrupt(regs); -#endif + smp_local_timer_interrupt(); /* * If we have an externally synchronized Linux clock, then update CMOS clock @@ -465,29 +450,26 @@ void main_timer_handler(struct pt_regs *regs) write_sequnlock(&xtime_lock); } -static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t timer_interrupt(int irq, void *dev_id) { if (apic_runs_main_timer > 1) return IRQ_HANDLED; - main_timer_handler(regs); -#ifdef CONFIG_X86_LOCAL_APIC + main_timer_handler(); if (using_apic_timer) smp_send_timer_broadcast_ipi(); -#endif return IRQ_HANDLED; } -static unsigned int cyc2ns_scale; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ +static unsigned int cyc2ns_scale __read_mostly; static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; + cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) { - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; + return (cyc * cyc2ns_scale) >> NS_SCALE; } unsigned long long sched_clock(void) @@ -500,7 +482,7 @@ unsigned long long sched_clock(void) Disadvantage is a small drift between CPUs in some configurations, but that should be tolerable. */ if (__vxtime.mode == VXTIME_HPET) - return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> 32; + return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE; #endif /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, @@ -514,43 +496,32 @@ unsigned long long sched_clock(void) static unsigned long get_cmos_time(void) { - unsigned int timeout = 1000000, year, mon, day, hour, min, sec; - unsigned char uip = 0, this = 0; + unsigned int year, mon, day, hour, min, sec; unsigned long flags; - -/* - * The Linux interpretation of the CMOS clock register contents: When the - * Update-In-Progress (UIP) flag goes from 1 to 0, the RTC registers show the - * second which has precisely just started. Waiting for this can take up to 1 - * second, we timeout approximately after 2.4 seconds on a machine with - * standard 8.3 MHz ISA bus. - */ + unsigned extyear = 0; spin_lock_irqsave(&rtc_lock, flags); - while (timeout && (!uip || this)) { - uip |= this; - this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; - timeout--; - } - - /* - * Here we are safe to assume the registers won't change for a whole - * second, so we just go ahead and read them. - */ - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); + do { + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); +#ifdef CONFIG_ACPI + if (acpi_fadt.revision >= FADT2_REVISION_ID && + acpi_fadt.century) + extyear = CMOS_READ(acpi_fadt.century); +#endif + } while (sec != CMOS_READ(RTC_SECONDS)); spin_unlock_irqrestore(&rtc_lock, flags); /* * We know that x86-64 always uses BCD format, no need to check the * config register. - */ + */ BCD_TO_BIN(sec); BCD_TO_BIN(min); @@ -559,11 +530,17 @@ static unsigned long get_cmos_time(void) BCD_TO_BIN(mon); BCD_TO_BIN(year); - /* - * x86-64 systems only exists since 2002. - * This will work up to Dec 31, 2100 - */ - year += 2000; + if (extyear) { + BCD_TO_BIN(extyear); + year += extyear; + printk(KERN_INFO "Extended CMOS year: %d\n", extyear); + } else { + /* + * x86-64 systems only exists since 2002. + * This will work up to Dec 31, 2100 + */ + year += 2000; + } return mktime(year, mon, day, hour, min, sec); } @@ -586,7 +563,7 @@ static unsigned int cpufreq_delayed_issched = 0; static unsigned int cpufreq_init = 0; static struct work_struct cpufreq_delayed_get_work; -static void handle_cpufreq_delayed_get(void *v) +static void handle_cpufreq_delayed_get(struct work_struct *v) { unsigned int cpu; for_each_online_cpu(cpu) { @@ -606,7 +583,8 @@ static void cpufreq_delayed_get(void) cpufreq_delayed_issched = 1; if (!warned) { warned = 1; - printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); + printk(KERN_DEBUG + "Losing some ticks... checking if CPU frequency changed.\n"); } schedule_work(&cpufreq_delayed_get_work); } @@ -629,9 +607,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, lpj = &dummy; if (!(freq->flags & CPUFREQ_CONST_LOOPS)) #ifdef CONFIG_SMP - lpj = &cpu_data[freq->cpu].loops_per_jiffy; + lpj = &cpu_data[freq->cpu].loops_per_jiffy; #else - lpj = &boot_cpu_data.loops_per_jiffy; + lpj = &boot_cpu_data.loops_per_jiffy; #endif if (!ref_freq) { @@ -647,7 +625,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - vxtime.tsc_quot = (1000L << 32) / cpu_khz; + vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; } set_cyc2ns_scale(cpu_khz_ref); @@ -661,7 +639,7 @@ static struct notifier_block time_cpufreq_notifier_block = { static int __init cpufreq_tsc(void) { - INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); + INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get); if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER)) cpufreq_init = 1; @@ -678,6 +656,25 @@ core_initcall(cpufreq_tsc); */ #define TICK_COUNT 100000000 +#define TICK_MIN 5000 + +/* + * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none + * occurs between the reads of the hpet & TSC. + */ +static void __init read_hpet_tsc(int *hpet, int *tsc) +{ + int tsc1, tsc2, hpet1; + + do { + tsc1 = get_cycles_sync(); + hpet1 = hpet_readl(HPET_COUNTER); + tsc2 = get_cycles_sync(); + } while (tsc2 - tsc1 > TICK_MIN); + *hpet = hpet1; + *tsc = tsc2; +} + static unsigned int __init hpet_calibrate_tsc(void) { @@ -688,13 +685,11 @@ static unsigned int __init hpet_calibrate_tsc(void) local_irq_save(flags); local_irq_disable(); - hpet_start = hpet_readl(HPET_COUNTER); - rdtscl(tsc_start); + read_hpet_tsc(&hpet_start, &tsc_start); do { local_irq_disable(); - hpet_now = hpet_readl(HPET_COUNTER); - tsc_now = get_cycles_sync(); + read_hpet_tsc(&hpet_now, &tsc_now); local_irq_restore(flags); } while ((tsc_now - tsc_start) < TICK_COUNT && (hpet_now - hpet_start) < TICK_COUNT); @@ -740,7 +735,7 @@ static __init int late_hpet_init(void) unsigned int ntimer; if (!vxtime.hpet_address) - return -1; + return 0; memset(&hd, 0, sizeof (hd)); @@ -768,9 +763,8 @@ static __init int late_hpet_init(void) int i; hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); - - for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer; - timer++, i++) + timer = &hpet->hpet_timers[2]; + for (i = 2; i < ntimer; timer++, i++) hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; @@ -804,8 +798,8 @@ static int hpet_timer_stop_set_go(unsigned long tick) if (hpet_use_timer) { hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT, HPET_T0_CFG); - hpet_writel(hpet_tick, HPET_T0_CMP); - hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ + hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */ + hpet_writel(hpet_tick, HPET_T0_CMP); /* period */ cfg |= HPET_CFG_LEGACY; } /* @@ -840,8 +834,7 @@ static int hpet_init(void) if (hpet_period < 100000 || hpet_period > 100000000) return -1; - hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / - hpet_period; + hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; hpet_use_timer = (id & HPET_ID_LEGSUP); @@ -897,26 +890,11 @@ int __init time_setup(char *str) } static struct irqaction irq0 = { - timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL + timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL }; void __init time_init(void) { - char *timename; - char *gtod; - -#ifdef HPET_HACK_ENABLE_DANGEROUS - if (!vxtime.hpet_address) { - printk(KERN_WARNING "time.c: WARNING: Enabling HPET base " - "manually!\n"); - outl(0x800038a0, 0xcf8); - outl(0xff000001, 0xcfc); - outl(0x800038a0, 0xcf8); - vxtime.hpet_address = inl(0xcfc) & 0xfffffffe; - printk(KERN_WARNING "time.c: WARNING: Enabled HPET " - "at %#lx.\n", vxtime.hpet_address); - } -#endif if (nohpet) vxtime.hpet_address = 0; @@ -927,12 +905,13 @@ void __init time_init(void) -xtime.tv_sec, -xtime.tv_nsec); if (!hpet_init()) - vxtime_hz = (1000000000000000L + hpet_period / 2) / - hpet_period; + vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period; else vxtime.hpet_address = 0; if (hpet_use_timer) { + /* set tick_nsec to use the proper rate for HPET */ + tick_nsec = TICK_NSEC_HPET; cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; #ifdef CONFIG_X86_PM_TIMER @@ -949,18 +928,15 @@ void __init time_init(void) } vxtime.mode = VXTIME_TSC; - gtod = time_init_gtod(); - - printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n", - vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); - printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - vxtime.quot = (1000000L << 32) / vxtime_hz; - vxtime.tsc_quot = (1000L << 32) / cpu_khz; + vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; + vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; vxtime.last_tsc = get_cycles_sync(); + set_cyc2ns_scale(cpu_khz); setup_irq(0, &irq0); - set_cyc2ns_scale(cpu_khz); +#ifndef CONFIG_SMP + time_init_gtod(); +#endif } /* @@ -970,13 +946,20 @@ void __init time_init(void) __cpuinit int unsynchronized_tsc(void) { #ifdef CONFIG_SMP - if (oem_force_hpet_timer()) + if (apic_is_clustered_box()) return 1; - /* Intel systems are normally all synchronized. Exceptions - are handled in the OEM check above. */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - return 0; #endif + /* Most intel systems have synchronized TSCs except for + multi node systems */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { +#ifdef CONFIG_ACPI + /* But TSC doesn't tick in C3 so don't use it there */ + if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 1000) + return 1; +#endif + return 0; + } + /* Assume multi socket systems are not synchronized */ return num_present_cpus() > 1; } @@ -984,12 +967,18 @@ __cpuinit int unsynchronized_tsc(void) /* * Decide what mode gettimeofday should use. */ -__init static char *time_init_gtod(void) +void time_init_gtod(void) { char *timetype; if (unsynchronized_tsc()) notsc = 1; + + if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) + vgetcpu_mode = VGETCPU_RDTSCP; + else + vgetcpu_mode = VGETCPU_LSL; + if (vxtime.hpet_address && notsc) { timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; if (hpet_use_timer) @@ -1012,7 +1001,16 @@ __init static char *time_init_gtod(void) timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; } - return timetype; + + printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n", + vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype); + printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; + vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; + vxtime.last_tsc = get_cycles_sync(); + + set_cyc2ns_scale(cpu_khz); } __setup("report_lost_ticks", time_setup); @@ -1042,8 +1040,16 @@ static int timer_resume(struct sys_device *dev) unsigned long flags; unsigned long sec; unsigned long ctime = get_cmos_time(); - unsigned long sleep_length = (ctime - sleep_start) * HZ; + long sleep_length = (ctime - sleep_start) * HZ; + if (sleep_length < 0) { + printk(KERN_WARNING "Time skew detected in timer resume!\n"); + /* The time after the resume must not be earlier than the time + * before the suspend or some nasty things will happen + */ + sleep_length = 0; + ctime = sleep_start; + } if (vxtime.hpet_address) hpet_reenable(); else @@ -1066,7 +1072,6 @@ static int timer_resume(struct sys_device *dev) vxtime.last_tsc = get_cycles_sync(); write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; - wall_jiffies += sleep_length; monotonic_base += sleep_length * (NSEC_PER_SEC/HZ); touch_softlockup_watchdog(); return 0; @@ -1159,23 +1164,25 @@ int hpet_rtc_timer_init(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; local_irq_save(flags); + cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); hpet_t1_cmp = cnt; - local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_T1_CFG); + local_irq_restore(flags); + return 1; } static void hpet_rtc_timer_reinit(void) { - unsigned int cfg, cnt; + unsigned int cfg, cnt, ticks_per_int, lost_ints; if (unlikely(!(PIE_on | AIE_on | UIE_on))) { cfg = hpet_readl(HPET_T1_CFG); @@ -1190,10 +1197,33 @@ static void hpet_rtc_timer_reinit(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; /* It is more accurate to use the comparator value than current count.*/ - cnt = hpet_t1_cmp; - cnt += hpet_tick*HZ/hpet_rtc_int_freq; - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; + ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq; + hpet_t1_cmp += ticks_per_int; + hpet_writel(hpet_t1_cmp, HPET_T1_CMP); + + /* + * If the interrupt handler was delayed too long, the write above tries + * to schedule the next interrupt in the past and the hardware would + * not interrupt until the counter had wrapped around. + * So we have to check that the comparator wasn't set to a past time. + */ + cnt = hpet_readl(HPET_COUNTER); + if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) { + lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1; + /* Make sure that, even with the time needed to execute + * this code, the next scheduled interrupt has been moved + * back to the future: */ + lost_ints++; + + hpet_t1_cmp += lost_ints * ticks_per_int; + hpet_writel(hpet_t1_cmp, HPET_T1_CMP); + + if (PIE_on) + PIE_count += lost_ints; + + printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", + hpet_rtc_int_freq); + } } /* @@ -1313,7 +1343,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) } if (call_rtc_interrupt) { rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); - rtc_interrupt(rtc_int_flag, dev_id, regs); + rtc_interrupt(rtc_int_flag, dev_id); } return IRQ_HANDLED; } @@ -1322,7 +1352,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) static int __init nohpet_setup(char *s) { nohpet = 1; - return 0; + return 1; } __setup("nohpet", nohpet_setup); @@ -1330,7 +1360,7 @@ __setup("nohpet", nohpet_setup); int __init notsc_setup(char *s) { notsc = 1; - return 0; + return 1; } __setup("notsc", notsc_setup);