* Copyright (c) 1995 Markus Kuhn
* Copyright (c) 1996 Ingo Molnar
* Copyright (c) 1998 Andrea Arcangeli
- * Copyright (c) 2002 Vojtech Pavlik
+ * Copyright (c) 2002,2006 Vojtech Pavlik
* Copyright (c) 2003 Andi Kleen
* RTC support code taken from arch/i386/kernel/timers/time_hpet.c
- *
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/mc146818rtc.h>
-#include <linux/irq.h>
#include <linux/time.h>
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/sysdev.h>
#include <linux/bcd.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
#include <linux/kallsyms.h>
+#include <linux/acpi.h>
+#ifdef CONFIG_ACPI
+#include <acpi/achware.h> /* for PM timer frequency */
+#include <acpi/acpi_bus.h>
+#endif
#include <asm/8253pit.h>
#include <asm/pgtable.h>
#include <asm/vsyscall.h>
#include <asm/timex.h>
#include <asm/proto.h>
#include <asm/hpet.h>
+#include <asm/sections.h>
#include <linux/cpufreq.h>
-#ifdef CONFIG_X86_LOCAL_APIC
+#include <linux/hpet.h>
#include <asm/apic.h>
-#endif
-
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
+#ifdef CONFIG_CPU_FREQ
+static void cpufreq_delayed_get(void);
+#endif
+extern void i8254_timer_resume(void);
extern int using_apic_timer;
-spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
-spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
+static char *timename = NULL;
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+DEFINE_SPINLOCK(i8253_lock);
-static int nohpet __initdata = 0;
+int nohpet __initdata = 0;
+static int notsc __initdata = 0;
-#undef HPET_HACK_ENABLE_DANGEROUS
+#define USEC_PER_TICK (USEC_PER_SEC / HZ)
+#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
+#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
+#define NS_SCALE 10 /* 2^10, carefully chosen */
+#define US_SCALE 32 /* 2^32, arbitralrily chosen */
unsigned int cpu_khz; /* TSC clocks / usec, not used here */
-unsigned long hpet_period; /* fsecs / HPET clock */
+EXPORT_SYMBOL(cpu_khz);
+static unsigned long hpet_period; /* fsecs / HPET clock */
unsigned long hpet_tick; /* HPET clocks / interrupt */
+int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
unsigned long vxtime_hz = PIT_TICK_RATE;
int report_lost_ticks; /* command line option */
unsigned long long monotonic_base;
struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
-unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
struct timespec __xtime __section_xtime;
struct timezone __sys_tz __section_sys_tz;
-static inline void rdtscll_sync(unsigned long *tsc)
-{
-#ifdef CONFIG_SMP
- sync_core();
-#endif
- rdtscll(*tsc);
-}
-
/*
* do_gettimeoffset() returns microseconds since last timer interrupt was
* triggered by hardware. A memory read of HPET is slower than a register read
* timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
* This is not a problem, because jiffies hasn't updated either. They are bound
* together by xtime_lock.
- */
+ */
static inline unsigned int do_gettimeoffset_tsc(void)
{
unsigned long t;
unsigned long x;
- rdtscll_sync(&t);
- if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */
- x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
+ t = get_cycles_sync();
+ if (t < vxtime.last_tsc)
+ t = vxtime.last_tsc; /* hack */
+ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
return x;
}
static inline unsigned int do_gettimeoffset_hpet(void)
{
- return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
+ /* cap counter read to one tick to avoid inconsistencies */
+ unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
+ return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
}
unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
void do_gettimeofday(struct timeval *tv)
{
- unsigned long seq, t;
+ unsigned long seq;
unsigned int sec, usec;
do {
seq = read_seqbegin(&xtime_lock);
sec = xtime.tv_sec;
- usec = xtime.tv_nsec / 1000;
+ usec = xtime.tv_nsec / NSEC_PER_USEC;
/* i386 does some correction here to keep the clock
- monotonus even when ntpd is fixing drift.
+ monotonous even when ntpd is fixing drift.
But they didn't work for me, there is a non monotonic
clock anyways with ntp.
I dropped all corrections now until a real solution can
be found. Note when you fix it here you need to do the same
in arch/x86_64/kernel/vsyscall.c and export all needed
variables in vmlinux.lds. -AK */
-
- t = (jiffies - wall_jiffies) * (1000000L / HZ) +
- do_gettimeoffset();
- usec += t;
+ usec += do_gettimeoffset();
} while (read_seqretry(&xtime_lock, seq));
- tv->tv_sec = sec + usec / 1000000;
- tv->tv_usec = usec % 1000000;
+ tv->tv_sec = sec + usec / USEC_PER_SEC;
+ tv->tv_usec = usec % USEC_PER_SEC;
}
EXPORT_SYMBOL(do_gettimeofday);
write_seqlock_irq(&xtime_lock);
- nsec -= do_gettimeoffset() * 1000 +
- (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ);
+ nsec -= do_gettimeoffset() * NSEC_PER_USEC;
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
set_normalized_timespec(&xtime, sec, nsec);
set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
- time_adjust = 0; /* stop active adjtime() */
- time_status |= STA_UNSYNC;
- time_maxerror = NTP_PHASE_LIMIT;
- time_esterror = NTP_PHASE_LIMIT;
+ ntp_clear();
write_sequnlock_irq(&xtime_lock);
clock_was_set();
EXPORT_SYMBOL(do_settimeofday);
+unsigned long profile_pc(struct pt_regs *regs)
+{
+ unsigned long pc = instruction_pointer(regs);
+
+ /* Assume the lock function has either no stack frame or a copy
+ of eflags from PUSHF
+ Eflags always has bits 22 and up cleared unlike kernel addresses. */
+ if (!user_mode(regs) && in_lock_functions(pc)) {
+ unsigned long *sp = (unsigned long *)regs->rsp;
+ if (sp[0] >> 22)
+ return sp[0];
+ if (sp[1] >> 22)
+ return sp[1];
+ }
+ return pc;
+}
+EXPORT_SYMBOL(profile_pc);
+
/*
* In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
* ms after the second nowtime has started, because when nowtime is written
* overflow. This avoids messing with unknown time zones but requires your RTC
* not to be off by more than 15 minutes. Since we're calling it only when
* our clock is externally synchronized using NTP, this shouldn't be a problem.
- */
+ */
real_seconds = nowtime % 60;
real_minutes = nowtime / 60;
real_minutes += 30; /* correct for half hour time zone */
real_minutes %= 60;
-#if 0
- /* AMD 8111 is a really bad time keeper and hits this regularly.
- It probably was an attempt to avoid screwing up DST, but ignore
- that for now. */
if (abs(real_minutes - cmos_minutes) >= 30) {
printk(KERN_WARNING "time.c: can't update CMOS clock "
"from %d to %d\n", cmos_minutes, real_minutes);
- } else
-#endif
-
- {
- BIN_TO_BCD(real_seconds);
- BIN_TO_BCD(real_minutes);
+ } else {
+ BIN_TO_BCD(real_seconds);
+ BIN_TO_BCD(real_minutes);
CMOS_WRITE(real_seconds, RTC_SECONDS);
CMOS_WRITE(real_minutes, RTC_MINUTES);
}
* Note: This function is required to return accurate
* time even in the absence of multiple timer ticks.
*/
+static inline unsigned long long cycles_2_ns(unsigned long long cyc);
unsigned long long monotonic_clock(void)
{
unsigned long seq;
last_offset = vxtime.last;
base = monotonic_base;
- this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
-
+ this_offset = hpet_readl(HPET_COUNTER);
} while (read_seqretry(&xtime_lock, seq));
offset = (this_offset - last_offset);
- offset *=(NSEC_PER_SEC/HZ)/hpet_tick;
- return base + offset;
- }else{
+ offset *= NSEC_PER_TICK / hpet_tick;
+ } else {
do {
seq = read_seqbegin(&xtime_lock);
last_offset = vxtime.last_tsc;
base = monotonic_base;
} while (read_seqretry(&xtime_lock, seq));
- sync_core();
- rdtscll(this_offset);
- offset = (this_offset - last_offset)*1000/cpu_khz;
- return base + offset;
+ this_offset = get_cycles_sync();
+ offset = cycles_2_ns(this_offset - last_offset);
}
-
-
+ return base + offset;
}
EXPORT_SYMBOL(monotonic_clock);
+static noinline void handle_lost_ticks(int lost)
+{
+ static long lost_count;
+ static int warned;
+ if (report_lost_ticks) {
+ printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost);
+ print_symbol("rip %s)\n", get_irq_regs()->rip);
+ }
+
+ if (lost_count == 1000 && !warned) {
+ printk(KERN_WARNING "warning: many lost ticks.\n"
+ KERN_WARNING "Your time source seems to be instable or "
+ "some driver is hogging interupts\n");
+ print_symbol("rip %s\n", get_irq_regs()->rip);
+ if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) {
+ printk(KERN_WARNING "Falling back to HPET\n");
+ if (hpet_use_timer)
+ vxtime.last = hpet_readl(HPET_T0_CMP) -
+ hpet_tick;
+ else
+ vxtime.last = hpet_readl(HPET_COUNTER);
+ vxtime.mode = VXTIME_HPET;
+ do_gettimeoffset = do_gettimeoffset_hpet;
+ }
+ /* else should fall back to PIT, but code missing. */
+ warned = 1;
+ } else
+ lost_count++;
-static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+#ifdef CONFIG_CPU_FREQ
+ /* In some cases the CPU can change frequency without us noticing
+ Give cpufreq a change to catch up. */
+ if ((lost_count+1) % 25 == 0)
+ cpufreq_delayed_get();
+#endif
+}
+
+void main_timer_handler(void)
{
static unsigned long rtc_update = 0;
- unsigned long tsc, lost = 0;
- int delay, offset = 0;
+ unsigned long tsc;
+ int delay = 0, offset = 0, lost = 0;
/*
* Here we are in the timer irq handler. We have irqs locally disabled (so we
* don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
* on the other CPU, so we need a lock. We also need to lock the vsyscall
* variables, because both do_timer() and us change them -arca+vojtech
- */
+ */
write_seqlock(&xtime_lock);
- if (vxtime.hpet_address) {
+ if (vxtime.hpet_address)
+ offset = hpet_readl(HPET_COUNTER);
+
+ if (hpet_use_timer) {
+ /* if we're using the hpet timer functionality,
+ * we can more accurately know the counter value
+ * when the timer interrupt occured.
+ */
offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
delay = hpet_readl(HPET_COUNTER) - offset;
- } else {
+ } else if (!pmtmr_ioport) {
spin_lock(&i8253_lock);
outb_p(0x00, 0x43);
delay = inb_p(0x40);
delay = LATCH - 1 - delay;
}
- rdtscll_sync(&tsc);
+ tsc = get_cycles_sync();
if (vxtime.mode == VXTIME_HPET) {
if (offset - vxtime.last > hpet_tick) {
}
monotonic_base +=
- (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
+ (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
vxtime.last = offset;
+#ifdef CONFIG_X86_PM_TIMER
+ } else if (vxtime.mode == VXTIME_PMTMR) {
+ lost = pmtimer_mark_offset();
+#endif
} else {
offset = (((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
+ vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
if (offset < 0)
offset = 0;
- if (offset > (USEC_PER_SEC / HZ)) {
- lost = offset / (USEC_PER_SEC / HZ);
- offset %= (USEC_PER_SEC / HZ);
+ if (offset > USEC_PER_TICK) {
+ lost = offset / USEC_PER_TICK;
+ offset %= USEC_PER_TICK;
}
- monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ;
+ monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc);
vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
if ((((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> 32) < offset)
+ vxtime.tsc_quot) >> US_SCALE) < offset)
vxtime.last_tsc = tsc -
- (((long) offset << 32) / vxtime.tsc_quot) - 1;
+ (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
}
- if (lost) {
- if (report_lost_ticks) {
- printk(KERN_WARNING "time.c: Lost %ld timer "
- "tick(s)! ", lost);
- print_symbol("rip %s)\n", regs->rip);
- }
- jiffies += lost;
- }
+ if (lost > 0)
+ handle_lost_ticks(lost);
+ else
+ lost = 0;
/*
* Do the timer stuff.
*/
- do_timer(regs);
+ do_timer(lost + 1);
+#ifndef CONFIG_SMP
+ update_process_times(user_mode(get_irq_regs()));
+#endif
/*
* In the SMP case we use the local APIC timer interrupt to do the profiling,
* have to call the local interrupt handler.
*/
-#ifndef CONFIG_X86_LOCAL_APIC
- x86_do_profile(regs);
-#else
if (!using_apic_timer)
- smp_local_timer_interrupt(regs);
-#endif
+ smp_local_timer_interrupt();
/*
* If we have an externally synchronized Linux clock, then update CMOS clock
* off) isn't likely to go away much sooner anyway.
*/
- if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update &&
+ if (ntp_synced() && xtime.tv_sec > rtc_update &&
abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
set_rtc_mmss(xtime.tv_sec);
rtc_update = xtime.tv_sec + 660;
}
write_sequnlock(&xtime_lock);
+}
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
+{
+ if (apic_runs_main_timer > 1)
+ return IRQ_HANDLED;
+ main_timer_handler();
+ if (using_apic_timer)
+ smp_send_timer_broadcast_ipi();
return IRQ_HANDLED;
}
-static unsigned int cyc2ns_scale;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+static unsigned int cyc2ns_scale __read_mostly;
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+static inline void set_cyc2ns_scale(unsigned long cpu_khz)
{
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+ cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+ return (cyc * cyc2ns_scale) >> NS_SCALE;
}
unsigned long long sched_clock(void)
Disadvantage is a small drift between CPUs in some configurations,
but that should be tolerable. */
if (__vxtime.mode == VXTIME_HPET)
- return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> 32;
+ return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
#endif
/* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
return cycles_2_ns(a);
}
-unsigned long get_cmos_time(void)
+static unsigned long get_cmos_time(void)
{
- unsigned int timeout, year, mon, day, hour, min, sec;
- unsigned char last, this;
+ unsigned int year, mon, day, hour, min, sec;
unsigned long flags;
-
-/*
- * The Linux interpretation of the CMOS clock register contents: When the
- * Update-In-Progress (UIP) flag goes from 1 to 0, the RTC registers show the
- * second which has precisely just started. Waiting for this can take up to 1
- * second, we timeout approximately after 2.4 seconds on a machine with
- * standard 8.3 MHz ISA bus.
- */
+ unsigned extyear = 0;
spin_lock_irqsave(&rtc_lock, flags);
- timeout = 1000000;
- last = this = 0;
-
- while (timeout && last && !this) {
- last = this;
- this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
- timeout--;
- }
-
-/*
- * Here we are safe to assume the registers won't change for a whole second, so
- * we just go ahead and read them.
- */
-
+ do {
sec = CMOS_READ(RTC_SECONDS);
min = CMOS_READ(RTC_MINUTES);
hour = CMOS_READ(RTC_HOURS);
day = CMOS_READ(RTC_DAY_OF_MONTH);
mon = CMOS_READ(RTC_MONTH);
year = CMOS_READ(RTC_YEAR);
+#ifdef CONFIG_ACPI
+ if (acpi_fadt.revision >= FADT2_REVISION_ID &&
+ acpi_fadt.century)
+ extyear = CMOS_READ(acpi_fadt.century);
+#endif
+ } while (sec != CMOS_READ(RTC_SECONDS));
spin_unlock_irqrestore(&rtc_lock, flags);
-/*
- * We know that x86-64 always uses BCD format, no need to check the config
- * register.
- */
-
- BCD_TO_BIN(sec);
- BCD_TO_BIN(min);
- BCD_TO_BIN(hour);
- BCD_TO_BIN(day);
- BCD_TO_BIN(mon);
- BCD_TO_BIN(year);
-
-/*
- * This will work up to Dec 31, 2069.
- */
-
- if ((year += 1900) < 1970)
- year += 100;
+ /*
+ * We know that x86-64 always uses BCD format, no need to check the
+ * config register.
+ */
+
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
+
+ if (extyear) {
+ BCD_TO_BIN(extyear);
+ year += extyear;
+ printk(KERN_INFO "Extended CMOS year: %d\n", extyear);
+ } else {
+ /*
+ * x86-64 systems only exists since 2002.
+ * This will work up to Dec 31, 2100
+ */
+ year += 2000;
+ }
return mktime(year, mon, day, hour, min, sec);
}
Should fix up last_tsc too. Currently gettimeofday in the
first tick after the change will be slightly wrong. */
+#include <linux/workqueue.h>
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(struct work_struct *v)
+{
+ unsigned int cpu;
+ for_each_online_cpu(cpu) {
+ cpufreq_get(cpu);
+ }
+ cpufreq_delayed_issched = 0;
+}
+
+/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
+ * to verify the CPU frequency the timing core thinks the CPU is running
+ * at is still correct.
+ */
+static void cpufreq_delayed_get(void)
+{
+ static int warned;
+ if (cpufreq_init && !cpufreq_delayed_issched) {
+ cpufreq_delayed_issched = 1;
+ if (!warned) {
+ warned = 1;
+ printk(KERN_DEBUG
+ "Losing some ticks... checking if CPU frequency changed.\n");
+ }
+ schedule_work(&cpufreq_delayed_get_work);
+ }
+}
+
static unsigned int ref_freq = 0;
static unsigned long loops_per_jiffy_ref = 0;
void *data)
{
struct cpufreq_freqs *freq = data;
- unsigned long *lpj;
+ unsigned long *lpj, dummy;
+ if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
+ return 0;
+
+ lpj = &dummy;
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
#ifdef CONFIG_SMP
- lpj = &cpu_data[freq->cpu].loops_per_jiffy;
+ lpj = &cpu_data[freq->cpu].loops_per_jiffy;
#else
- lpj = &boot_cpu_data.loops_per_jiffy;
+ lpj = &boot_cpu_data.loops_per_jiffy;
#endif
if (!ref_freq) {
cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
- vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+ vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
}
- set_cyc2ns_scale(cpu_khz_ref / 1000);
+ set_cyc2ns_scale(cpu_khz_ref);
return 0;
}
static struct notifier_block time_cpufreq_notifier_block = {
.notifier_call = time_cpufreq_notifier
};
+
+static int __init cpufreq_tsc(void)
+{
+ INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
+ if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER))
+ cpufreq_init = 1;
+ return 0;
+}
+
+core_initcall(cpufreq_tsc);
+
#endif
/*
*/
#define TICK_COUNT 100000000
+#define TICK_MIN 5000
+
+/*
+ * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none
+ * occurs between the reads of the hpet & TSC.
+ */
+static void __init read_hpet_tsc(int *hpet, int *tsc)
+{
+ int tsc1, tsc2, hpet1;
+
+ do {
+ tsc1 = get_cycles_sync();
+ hpet1 = hpet_readl(HPET_COUNTER);
+ tsc2 = get_cycles_sync();
+ } while (tsc2 - tsc1 > TICK_MIN);
+ *hpet = hpet1;
+ *tsc = tsc2;
+}
+
static unsigned int __init hpet_calibrate_tsc(void)
{
local_irq_save(flags);
local_irq_disable();
- hpet_start = hpet_readl(HPET_COUNTER);
- rdtscl(tsc_start);
+ read_hpet_tsc(&hpet_start, &tsc_start);
do {
local_irq_disable();
- hpet_now = hpet_readl(HPET_COUNTER);
- sync_core();
- rdtscl(tsc_now);
+ read_hpet_tsc(&hpet_now, &tsc_now);
local_irq_restore(flags);
} while ((tsc_now - tsc_start) < TICK_COUNT &&
(hpet_now - hpet_start) < TICK_COUNT);
outb(0xb0, 0x43);
outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
- rdtscll(start);
- sync_core();
+ start = get_cycles_sync();
while ((inb(0x61) & 0x20) == 0);
- sync_core();
- rdtscll(end);
+ end = get_cycles_sync();
spin_unlock_irqrestore(&i8253_lock, flags);
return (end - start) / 50;
}
-static int hpet_init(void)
+#ifdef CONFIG_HPET
+static __init int late_hpet_init(void)
{
- unsigned int cfg, id;
+ struct hpet_data hd;
+ unsigned int ntimer;
if (!vxtime.hpet_address)
- return -1;
- set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address);
- __set_fixmap(VSYSCALL_HPET, vxtime.hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+ return 0;
-/*
- * Read the period, compute tick and quotient.
- */
+ memset(&hd, 0, sizeof (hd));
- id = hpet_readl(HPET_ID);
+ ntimer = hpet_readl(HPET_ID);
+ ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
+ ntimer++;
- if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
- !(id & HPET_ID_LEGSUP))
- return -1;
+ /*
+ * Register with driver.
+ * Timer0 and Timer1 is used by platform.
+ */
+ hd.hd_phys_address = vxtime.hpet_address;
+ hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
+ hd.hd_nirqs = ntimer;
+ hd.hd_flags = HPET_DATA_PLATFORM;
+ hpet_reserve_timer(&hd, 0);
+#ifdef CONFIG_HPET_EMULATE_RTC
+ hpet_reserve_timer(&hd, 1);
+#endif
+ hd.hd_irq[0] = HPET_LEGACY_8254;
+ hd.hd_irq[1] = HPET_LEGACY_RTC;
+ if (ntimer > 2) {
+ struct hpet *hpet;
+ struct hpet_timer *timer;
+ int i;
+
+ hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
+ timer = &hpet->hpet_timers[2];
+ for (i = 2; i < ntimer; timer++, i++)
+ hd.hd_irq[i] = (timer->hpet_config &
+ Tn_INT_ROUTE_CNF_MASK) >>
+ Tn_INT_ROUTE_CNF_SHIFT;
- hpet_period = hpet_readl(HPET_PERIOD);
- if (hpet_period < 100000 || hpet_period > 100000000)
- return -1;
+ }
- hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
- hpet_period;
+ hpet_alloc(&hd);
+ return 0;
+}
+fs_initcall(late_hpet_init);
+#endif
+
+static int hpet_timer_stop_set_go(unsigned long tick)
+{
+ unsigned int cfg;
/*
* Stop the timers and reset the main counter.
* Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
* and period also hpet_tick.
*/
-
- hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
+ if (hpet_use_timer) {
+ hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
HPET_TN_32BIT, HPET_T0_CFG);
- hpet_writel(hpet_tick, HPET_T0_CMP);
- hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
-
+ hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
+ hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
+ cfg |= HPET_CFG_LEGACY;
+ }
/*
* Go!
*/
- cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+ cfg |= HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
return 0;
}
-void __init pit_init(void)
+static int hpet_init(void)
+{
+ unsigned int id;
+
+ if (!vxtime.hpet_address)
+ return -1;
+ set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address);
+ __set_fixmap(VSYSCALL_HPET, vxtime.hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+
+/*
+ * Read the period, compute tick and quotient.
+ */
+
+ id = hpet_readl(HPET_ID);
+
+ if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
+ return -1;
+
+ hpet_period = hpet_readl(HPET_PERIOD);
+ if (hpet_period < 100000 || hpet_period > 100000000)
+ return -1;
+
+ hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
+
+ hpet_use_timer = (id & HPET_ID_LEGSUP);
+
+ return hpet_timer_stop_set_go(hpet_tick);
+}
+
+static int hpet_reenable(void)
+{
+ return hpet_timer_stop_set_go(hpet_tick);
+}
+
+#define PIT_MODE 0x43
+#define PIT_CH0 0x40
+
+static void __init __pit_init(int val, u8 mode)
{
unsigned long flags;
spin_lock_irqsave(&i8253_lock, flags);
- outb_p(0x34, 0x43); /* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(LATCH & 0xff, 0x40); /* LSB */
- outb_p(LATCH >> 8, 0x40); /* MSB */
+ outb_p(mode, PIT_MODE);
+ outb_p(val & 0xff, PIT_CH0); /* LSB */
+ outb_p(val >> 8, PIT_CH0); /* MSB */
spin_unlock_irqrestore(&i8253_lock, flags);
}
+void __init pit_init(void)
+{
+ __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
+}
+
+void __init pit_stop_interrupt(void)
+{
+ __pit_init(0, 0x30); /* mode 0 */
+}
+
+void __init stop_timer_interrupt(void)
+{
+ char *name;
+ if (vxtime.hpet_address) {
+ name = "HPET";
+ hpet_timer_stop_set_go(0);
+ } else {
+ name = "PIT";
+ pit_stop_interrupt();
+ }
+ printk(KERN_INFO "timer: %s interrupt stopped.\n", name);
+}
+
int __init time_setup(char *str)
{
report_lost_ticks = 1;
}
static struct irqaction irq0 = {
- timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL
+ timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
};
-extern void __init config_acpi_tables(void);
-
void __init time_init(void)
{
- char *timename;
-
-#ifdef HPET_HACK_ENABLE_DANGEROUS
- if (!vxtime.hpet_address) {
- printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
- "manually!\n");
- outl(0x800038a0, 0xcf8);
- outl(0xff000001, 0xcfc);
- outl(0x800038a0, 0xcf8);
- hpet_address = inl(0xcfc) & 0xfffffffe;
- printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
- "at %#lx.\n", hpet_address);
- }
-#endif
if (nohpet)
vxtime.hpet_address = 0;
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- if (!hpet_init()) {
- vxtime_hz = (1000000000000000L + hpet_period / 2) /
- hpet_period;
+ if (!hpet_init())
+ vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
+ else
+ vxtime.hpet_address = 0;
+
+ if (hpet_use_timer) {
+ /* set tick_nsec to use the proper rate for HPET */
+ tick_nsec = TICK_NSEC_HPET;
cpu_khz = hpet_calibrate_tsc();
timename = "HPET";
+#ifdef CONFIG_X86_PM_TIMER
+ } else if (pmtmr_ioport && !vxtime.hpet_address) {
+ vxtime_hz = PM_TIMER_FREQUENCY;
+ timename = "PM";
+ pit_init();
+ cpu_khz = pit_calibrate_tsc();
+#endif
} else {
- pit_init();
- cpu_khz = pit_calibrate_tsc();
+ pit_init();
+ cpu_khz = pit_calibrate_tsc();
timename = "PIT";
}
- printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n",
- vxtime_hz / 1000000, vxtime_hz % 1000000, timename);
- printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
vxtime.mode = VXTIME_TSC;
- vxtime.quot = (1000000L << 32) / vxtime_hz;
- vxtime.tsc_quot = (1000L << 32) / cpu_khz;
- vxtime.hz = vxtime_hz;
- rdtscll_sync(&vxtime.last_tsc);
+ vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
+ vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+ vxtime.last_tsc = get_cycles_sync();
+ set_cyc2ns_scale(cpu_khz);
setup_irq(0, &irq0);
- set_cyc2ns_scale(cpu_khz / 1000);
+#ifndef CONFIG_SMP
+ time_init_gtod();
+#endif
+}
-#ifdef CONFIG_CPU_FREQ
- cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
+/*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+__cpuinit int unsynchronized_tsc(void)
+{
+#ifdef CONFIG_SMP
+ if (apic_is_clustered_box())
+ return 1;
#endif
+ /* Most intel systems have synchronized TSCs except for
+ multi node systems */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+#ifdef CONFIG_ACPI
+ /* But TSC doesn't tick in C3 so don't use it there */
+ if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 1000)
+ return 1;
+#endif
+ return 0;
+ }
+
+ /* Assume multi socket systems are not synchronized */
+ return num_present_cpus() > 1;
}
-void __init time_init_smp(void)
+/*
+ * Decide what mode gettimeofday should use.
+ */
+void time_init_gtod(void)
{
char *timetype;
- if (vxtime.hpet_address) {
- timetype = "HPET";
- vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ if (unsynchronized_tsc())
+ notsc = 1;
+
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
+ vgetcpu_mode = VGETCPU_RDTSCP;
+ else
+ vgetcpu_mode = VGETCPU_LSL;
+
+ if (vxtime.hpet_address && notsc) {
+ timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
+ if (hpet_use_timer)
+ vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ else
+ vxtime.last = hpet_readl(HPET_COUNTER);
vxtime.mode = VXTIME_HPET;
do_gettimeoffset = do_gettimeoffset_hpet;
+#ifdef CONFIG_X86_PM_TIMER
+ /* Using PM for gettimeofday is quite slow, but we have no other
+ choice because the TSC is too unreliable on some systems. */
+ } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) {
+ timetype = "PM";
+ do_gettimeoffset = do_gettimeoffset_pm;
+ vxtime.mode = VXTIME_PMTMR;
+ sysctl_vsyscall = 0;
+ printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
+#endif
} else {
- timetype = "PIT/TSC";
+ timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
vxtime.mode = VXTIME_TSC;
}
- printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
+
+ printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
+ vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
+ printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
+ vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
+ vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
+ vxtime.last_tsc = get_cycles_sync();
+
+ set_cyc2ns_scale(cpu_khz);
}
__setup("report_lost_ticks", time_setup);
static long clock_cmos_diff;
+static unsigned long sleep_start;
+
+/*
+ * sysfs support for the timer.
+ */
-static int time_suspend(struct sys_device *dev, u32 state)
+static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
* Estimate time zone so that set_time can update the clock
*/
- clock_cmos_diff = -get_cmos_time();
+ long cmos_time = get_cmos_time();
+
+ clock_cmos_diff = -cmos_time;
clock_cmos_diff += get_seconds();
+ sleep_start = cmos_time;
return 0;
}
-static int time_resume(struct sys_device *dev)
+static int timer_resume(struct sys_device *dev)
{
- unsigned long sec = get_cmos_time() + clock_cmos_diff;
- write_seqlock_irq(&xtime_lock);
+ unsigned long flags;
+ unsigned long sec;
+ unsigned long ctime = get_cmos_time();
+ long sleep_length = (ctime - sleep_start) * HZ;
+
+ if (sleep_length < 0) {
+ printk(KERN_WARNING "Time skew detected in timer resume!\n");
+ /* The time after the resume must not be earlier than the time
+ * before the suspend or some nasty things will happen
+ */
+ sleep_length = 0;
+ ctime = sleep_start;
+ }
+ if (vxtime.hpet_address)
+ hpet_reenable();
+ else
+ i8254_timer_resume();
+
+ sec = ctime + clock_cmos_diff;
+ write_seqlock_irqsave(&xtime_lock,flags);
xtime.tv_sec = sec;
xtime.tv_nsec = 0;
- write_sequnlock_irq(&xtime_lock);
+ if (vxtime.mode == VXTIME_HPET) {
+ if (hpet_use_timer)
+ vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ else
+ vxtime.last = hpet_readl(HPET_COUNTER);
+#ifdef CONFIG_X86_PM_TIMER
+ } else if (vxtime.mode == VXTIME_PMTMR) {
+ pmtimer_resume();
+#endif
+ } else
+ vxtime.last_tsc = get_cycles_sync();
+ write_sequnlock_irqrestore(&xtime_lock,flags);
+ jiffies += sleep_length;
+ monotonic_base += sleep_length * (NSEC_PER_SEC/HZ);
+ touch_softlockup_watchdog();
return 0;
}
-static struct sysdev_class pit_sysclass = {
- .resume = time_resume,
- .suspend = time_suspend,
- set_kset_name("pit"),
+static struct sysdev_class timer_sysclass = {
+ .resume = timer_resume,
+ .suspend = timer_suspend,
+ set_kset_name("timer"),
};
-
/* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_i8253 = {
+static struct sys_device device_timer = {
.id = 0,
- .cls = &pit_sysclass,
+ .cls = &timer_sysclass,
};
static int time_init_device(void)
{
- int error = sysdev_class_register(&pit_sysclass);
+ int error = sysdev_class_register(&timer_sysclass);
if (!error)
- error = sysdev_register(&device_i8253);
+ error = sysdev_register(&device_timer);
return error;
}
* For (3), we use interrupts at 64Hz or user specified periodic
* frequency, whichever is higher.
*/
-#include <linux/mc146818rtc.h>
#include <linux/rtc.h>
-extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-
#define DEFAULT_RTC_INT_FREQ 64
#define RTC_NUM_INTS 1
static unsigned long PIE_count;
static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
+static unsigned int hpet_t1_cmp; /* cached comparator register */
int is_hpet_enabled(void)
{
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
local_irq_save(flags);
+
cnt = hpet_readl(HPET_COUNTER);
cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
hpet_writel(cnt, HPET_T1_CMP);
- local_irq_restore(flags);
+ hpet_t1_cmp = cnt;
cfg = hpet_readl(HPET_T1_CFG);
- cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
+ cfg &= ~HPET_TN_PERIODIC;
+ cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_writel(cfg, HPET_T1_CFG);
+ local_irq_restore(flags);
+
return 1;
}
static void hpet_rtc_timer_reinit(void)
{
- unsigned int cfg, cnt;
+ unsigned int cfg, cnt, ticks_per_int, lost_ints;
- if (!(PIE_on | AIE_on | UIE_on))
+ if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
+ cfg = hpet_readl(HPET_T1_CFG);
+ cfg &= ~HPET_TN_ENABLE;
+ hpet_writel(cfg, HPET_T1_CFG);
return;
+ }
if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
hpet_rtc_int_freq = PIE_freq;
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
/* It is more accurate to use the comparator value than current count.*/
- cnt = hpet_readl(HPET_T1_CMP);
- cnt += hpet_tick*HZ/hpet_rtc_int_freq;
- hpet_writel(cnt, HPET_T1_CMP);
+ ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
+ hpet_t1_cmp += ticks_per_int;
+ hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
- cfg = hpet_readl(HPET_T1_CFG);
- cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_T1_CFG);
+ /*
+ * If the interrupt handler was delayed too long, the write above tries
+ * to schedule the next interrupt in the past and the hardware would
+ * not interrupt until the counter had wrapped around.
+ * So we have to check that the comparator wasn't set to a past time.
+ */
+ cnt = hpet_readl(HPET_COUNTER);
+ if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
+ lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
+ /* Make sure that, even with the time needed to execute
+ * this code, the next scheduled interrupt has been moved
+ * back to the future: */
+ lost_ints++;
- return;
+ hpet_t1_cmp += lost_ints * ticks_per_int;
+ hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+ if (PIE_on)
+ PIE_count += lost_ints;
+
+ printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+ hpet_rtc_int_freq);
+ }
}
/*
}
if (call_rtc_interrupt) {
rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
- rtc_interrupt(rtc_int_flag, dev_id, regs);
+ rtc_interrupt(rtc_int_flag, dev_id);
}
return IRQ_HANDLED;
}
static int __init nohpet_setup(char *s)
{
nohpet = 1;
- return 0;
+ return 1;
}
__setup("nohpet", nohpet_setup);
+
+int __init notsc_setup(char *s)
+{
+ notsc = 1;
+ return 1;
+}
+
+__setup("notsc", notsc_setup);