X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Ftime.c;h=b0df27f80bacadf3e9d5a30c9d3ae98714d6ffd7;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=d47d3f8de44eb23caf26f5a6b1999e65888a6a2c;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index d47d3f8de..b0df27f80 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -11,7 +11,6 @@ * Copyright (c) 2002 Vojtech Pavlik * Copyright (c) 2003 Andi Kleen * RTC support code taken from arch/i386/kernel/timers/time_hpet.c - * */ #include @@ -33,7 +32,9 @@ #include #include #include +#include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #endif @@ -42,16 +43,20 @@ u64 jiffies_64 = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); +#ifdef CONFIG_CPU_FREQ +static void cpufreq_delayed_get(void); +#endif + extern int using_apic_timer; -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; -spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(rtc_lock); +DEFINE_SPINLOCK(i8253_lock); static int nohpet __initdata = 0; +static int notsc __initdata = 0; #undef HPET_HACK_ENABLE_DANGEROUS - unsigned int cpu_khz; /* TSC clocks / usec, not used here */ unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ @@ -82,7 +87,7 @@ static inline void rdtscll_sync(unsigned long *tsc) * timer interrupt has happened already, but vxtime.trigger wasn't updated yet. * This is not a problem, because jiffies hasn't updated either. They are bound * together by xtime_lock. - */ + */ static inline unsigned int do_gettimeoffset_tsc(void) { @@ -119,7 +124,7 @@ void do_gettimeofday(struct timeval *tv) usec = xtime.tv_nsec / 1000; /* i386 does some correction here to keep the clock - monotonus even when ntpd is fixing drift. + monotonous even when ntpd is fixing drift. But they didn't work for me, there is a non monotonic clock anyways with ntp. I dropped all corrections now until a real solution can @@ -176,6 +181,28 @@ int do_settimeofday(struct timespec *tv) EXPORT_SYMBOL(do_settimeofday); +unsigned long profile_pc(struct pt_regs *regs) +{ + unsigned long pc = instruction_pointer(regs); + + /* Assume the lock function has either no stack frame or only a single word. + This checks if the address on the stack looks like a kernel text address. + There is a small window for false hits, but in that case the tick + is just accounted to the spinlock function. + Better would be to write these functions in assembler again + and check exactly. */ + if (in_lock_functions(pc)) { + char *v = *(char **)regs->rsp; + if ((v >= _stext && v <= _etext) || + (v >= _sinittext && v <= _einittext) || + (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END)) + return (unsigned long)v; + return ((unsigned long *)regs->rsp)[1]; + } + return pc; +} +EXPORT_SYMBOL(profile_pc); + /* * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500 * ms after the second nowtime has started, because when nowtime is written @@ -214,7 +241,7 @@ static void set_rtc_mmss(unsigned long nowtime) * overflow. This avoids messing with unknown time zones but requires your RTC * not to be off by more than 15 minutes. Since we're calling it only when * our clock is externally synchronized using NTP, this shouldn't be a problem. - */ + */ real_seconds = nowtime % 60; real_minutes = nowtime / 60; @@ -293,19 +320,56 @@ unsigned long long monotonic_clock(void) } EXPORT_SYMBOL(monotonic_clock); +static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) +{ + static long lost_count; + static int warned; + + if (report_lost_ticks) { + printk(KERN_WARNING "time.c: Lost %d timer " + "tick(s)! ", lost); + print_symbol("rip %s)\n", regs->rip); + } + + if (lost_count == 1000 && !warned) { + printk(KERN_WARNING + "warning: many lost ticks.\n" + KERN_WARNING "Your time source seems to be instable or " + "some driver is hogging interupts\n"); + print_symbol("rip %s\n", regs->rip); + if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { + printk(KERN_WARNING "Falling back to HPET\n"); + vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; + vxtime.mode = VXTIME_HPET; + do_gettimeoffset = do_gettimeoffset_hpet; + } + /* else should fall back to PIT, but code missing. */ + warned = 1; + } else + lost_count++; + +#ifdef CONFIG_CPU_FREQ + /* In some cases the CPU can change frequency without us noticing + (like going into thermal throttle) + Give cpufreq a change to catch up. */ + if ((lost_count+1) % 25 == 0) { + cpufreq_delayed_get(); + } +#endif +} static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { static unsigned long rtc_update = 0; - unsigned long tsc, lost = 0; - int delay, offset = 0; + unsigned long tsc; + int delay, offset = 0, lost = 0; /* * Here we are in the timer irq handler. We have irqs locally disabled (so we * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running * on the other CPU, so we need a lock. We also need to lock the vsyscall * variables, because both do_timer() and us change them -arca+vojtech - */ + */ write_seqlock(&xtime_lock); @@ -354,12 +418,8 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) (((long) offset << 32) / vxtime.tsc_quot) - 1; } - if (lost) { - if (report_lost_ticks) { - printk(KERN_WARNING "time.c: Lost %ld timer " - "tick(s)! ", lost); - print_symbol("rip %s)\n", regs->rip); - } + if (lost > 0) { + handle_lost_ticks(lost, regs); jiffies += lost; } @@ -368,6 +428,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) */ do_timer(regs); +#ifndef CONFIG_SMP + update_process_times(user_mode(regs)); +#endif /* * In the SMP case we use the local APIC timer interrupt to do the profiling, @@ -376,7 +439,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) */ #ifndef CONFIG_X86_LOCAL_APIC - x86_do_profile(regs); + profile_tick(CPU_PROFILING, regs); #else if (!using_apic_timer) smp_local_timer_interrupt(regs); @@ -509,6 +572,38 @@ unsigned long get_cmos_time(void) Should fix up last_tsc too. Currently gettimeofday in the first tick after the change will be slightly wrong. */ +#include + +static unsigned int cpufreq_delayed_issched = 0; +static unsigned int cpufreq_init = 0; +static struct work_struct cpufreq_delayed_get_work; + +static void handle_cpufreq_delayed_get(void *v) +{ + unsigned int cpu; + for_each_online_cpu(cpu) { + cpufreq_get(cpu); + } + cpufreq_delayed_issched = 0; +} + +/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries + * to verify the CPU frequency the timing core thinks the CPU is running + * at is still correct. + */ +static void cpufreq_delayed_get(void) +{ + static int warned; + if (cpufreq_init && !cpufreq_delayed_issched) { + cpufreq_delayed_issched = 1; + if (!warned) { + warned = 1; + printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); + } + schedule_work(&cpufreq_delayed_get_work); + } +} + static unsigned int ref_freq = 0; static unsigned long loops_per_jiffy_ref = 0; @@ -518,14 +613,18 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - unsigned long *lpj; + unsigned long *lpj, dummy; + lpj = &dummy; + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) #ifdef CONFIG_SMP lpj = &cpu_data[freq->cpu].loops_per_jiffy; #else lpj = &boot_cpu_data.loops_per_jiffy; #endif + + if (!ref_freq) { ref_freq = freq->old; loops_per_jiffy_ref = *lpj; @@ -538,7 +637,8 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); - vxtime.tsc_quot = (1000L << 32) / cpu_khz; + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + vxtime.tsc_quot = (1000L << 32) / cpu_khz; } set_cyc2ns_scale(cpu_khz_ref / 1000); @@ -549,6 +649,18 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, static struct notifier_block time_cpufreq_notifier_block = { .notifier_call = time_cpufreq_notifier }; + +static int __init cpufreq_tsc(void) +{ + INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); + if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER)) + cpufreq_init = 1; + return 0; +} + +core_initcall(cpufreq_tsc); + #endif /* @@ -615,31 +727,59 @@ static unsigned int __init pit_calibrate_tsc(void) return (end - start) / 50; } -static int hpet_init(void) +#ifdef CONFIG_HPET +static __init int late_hpet_init(void) { - unsigned int cfg, id; + struct hpet_data hd; + unsigned int ntimer; if (!vxtime.hpet_address) - return -1; - set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address); - __set_fixmap(VSYSCALL_HPET, vxtime.hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); + return -1; -/* - * Read the period, compute tick and quotient. - */ + memset(&hd, 0, sizeof (hd)); - id = hpet_readl(HPET_ID); + ntimer = hpet_readl(HPET_ID); + ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; + ntimer++; - if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) || - !(id & HPET_ID_LEGSUP)) - return -1; + /* + * Register with driver. + * Timer0 and Timer1 is used by platform. + */ + hd.hd_phys_address = vxtime.hpet_address; + hd.hd_address = (void *)fix_to_virt(FIX_HPET_BASE); + hd.hd_nirqs = ntimer; + hd.hd_flags = HPET_DATA_PLATFORM; + hpet_reserve_timer(&hd, 0); +#ifdef CONFIG_HPET_EMULATE_RTC + hpet_reserve_timer(&hd, 1); +#endif + hd.hd_irq[0] = HPET_LEGACY_8254; + hd.hd_irq[1] = HPET_LEGACY_RTC; + if (ntimer > 2) { + struct hpet *hpet; + struct hpet_timer *timer; + int i; - hpet_period = hpet_readl(HPET_PERIOD); - if (hpet_period < 100000 || hpet_period > 100000000) - return -1; + hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); - hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / - hpet_period; + for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer; + timer++, i++) + hd.hd_irq[i] = (timer->hpet_config & + Tn_INT_ROUTE_CNF_MASK) >> + Tn_INT_ROUTE_CNF_SHIFT; + + } + + hpet_alloc(&hd); + return 0; +} +fs_initcall(late_hpet_init); +#endif + +static int hpet_timer_stop_set_go(unsigned long tick) +{ + unsigned int cfg; /* * Stop the timers and reset the main counter. @@ -671,6 +811,40 @@ static int hpet_init(void) return 0; } +static int hpet_init(void) +{ + unsigned int id; + + if (!vxtime.hpet_address) + return -1; + set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address); + __set_fixmap(VSYSCALL_HPET, vxtime.hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); + +/* + * Read the period, compute tick and quotient. + */ + + id = hpet_readl(HPET_ID); + + if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) || + !(id & HPET_ID_LEGSUP)) + return -1; + + hpet_period = hpet_readl(HPET_PERIOD); + if (hpet_period < 100000 || hpet_period > 100000000) + return -1; + + hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / + hpet_period; + + return hpet_timer_stop_set_go(hpet_tick); +} + +static int hpet_reenable(void) +{ + return hpet_timer_stop_set_go(hpet_tick); +} + void __init pit_init(void) { unsigned long flags; @@ -689,7 +863,7 @@ int __init time_setup(char *str) } static struct irqaction irq0 = { - timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL + timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL }; extern void __init config_acpi_tables(void); @@ -705,9 +879,9 @@ void __init time_init(void) outl(0x800038a0, 0xcf8); outl(0xff000001, 0xcfc); outl(0x800038a0, 0xcf8); - hpet_address = inl(0xcfc) & 0xfffffffe; + vxtime.hpet_address = inl(0xcfc) & 0xfffffffe; printk(KERN_WARNING "time.c: WARNING: Enabled HPET " - "at %#lx.\n", hpet_address); + "at %#lx.\n", vxtime.hpet_address); } #endif if (nohpet) @@ -725,8 +899,8 @@ void __init time_init(void) cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; } else { - pit_init(); - cpu_khz = pit_calibrate_tsc(); + pit_init(); + cpu_khz = pit_calibrate_tsc(); timename = "PIT"; } @@ -742,71 +916,99 @@ void __init time_init(void) setup_irq(0, &irq0); set_cyc2ns_scale(cpu_khz / 1000); - -#ifdef CONFIG_CPU_FREQ - cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); -#endif } void __init time_init_smp(void) { char *timetype; - if (vxtime.hpet_address) { + /* + * AMD systems with more than one CPU don't have fully synchronized + * TSCs. Always use HPET gettimeofday for these, although it is slower. + * Intel SMP systems usually have synchronized TSCs, so use always + * the TSC. + * + * Exceptions: + * IBM Summit2 checked by oem_force_hpet_timer(). + * AMD dual core may also not need HPET. Check me. + * + * Can be turned off with "notsc". + */ + if (num_online_cpus() > 1 && + boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + notsc = 1; + /* Some systems will want to disable TSC and use HPET. */ + if (oem_force_hpet_timer()) + notsc = 1; + if (vxtime.hpet_address && notsc) { timetype = "HPET"; vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; } else { - timetype = "PIT/TSC"; + timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; } + printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype); } __setup("report_lost_ticks", time_setup); static long clock_cmos_diff; +static unsigned long sleep_start; -static int time_suspend(struct sys_device *dev, u32 state) +static int timer_suspend(struct sys_device *dev, u32 state) { /* * Estimate time zone so that set_time can update the clock */ - clock_cmos_diff = -get_cmos_time(); + long cmos_time = get_cmos_time(); + + clock_cmos_diff = -cmos_time; clock_cmos_diff += get_seconds(); + sleep_start = cmos_time; return 0; } -static int time_resume(struct sys_device *dev) +static int timer_resume(struct sys_device *dev) { - unsigned long sec = get_cmos_time() + clock_cmos_diff; - write_seqlock_irq(&xtime_lock); + unsigned long flags; + unsigned long sec; + unsigned long ctime = get_cmos_time(); + unsigned long sleep_length = (ctime - sleep_start) * HZ; + + if (vxtime.hpet_address) + hpet_reenable(); + + sec = ctime + clock_cmos_diff; + write_seqlock_irqsave(&xtime_lock,flags); xtime.tv_sec = sec; xtime.tv_nsec = 0; - write_sequnlock_irq(&xtime_lock); + write_sequnlock_irqrestore(&xtime_lock,flags); + jiffies += sleep_length; + wall_jiffies += sleep_length; return 0; } -static struct sysdev_class pit_sysclass = { - .resume = time_resume, - .suspend = time_suspend, - set_kset_name("pit"), +static struct sysdev_class timer_sysclass = { + .resume = timer_resume, + .suspend = timer_suspend, + set_kset_name("timer"), }; /* XXX this driverfs stuff should probably go elsewhere later -john */ -static struct sys_device device_i8253 = { +static struct sys_device device_timer = { .id = 0, - .cls = &pit_sysclass, + .cls = &timer_sysclass, }; static int time_init_device(void) { - int error = sysdev_class_register(&pit_sysclass); + int error = sysdev_class_register(&timer_sysclass); if (!error) - error = sysdev_register(&device_i8253); + error = sysdev_register(&device_timer); return error; } @@ -827,7 +1029,6 @@ device_initcall(time_init_device); * For (3), we use interrupts at 64Hz or user specified periodic * frequency, whichever is higher. */ -#include #include extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); @@ -1038,6 +1239,8 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) } #endif + + static int __init nohpet_setup(char *s) { nohpet = 1; @@ -1045,3 +1248,14 @@ static int __init nohpet_setup(char *s) } __setup("nohpet", nohpet_setup); + + +static int __init notsc_setup(char *s) +{ + notsc = 1; + return 0; +} + +__setup("notsc", notsc_setup); + +