2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
6 #include <linux/spinlock.h>
7 #include <linux/init.h>
8 #include <linux/timex.h>
9 #include <linux/errno.h>
10 #include <linux/cpufreq.h>
11 #include <linux/string.h>
12 #include <linux/jiffies.h>
14 #include <asm/timer.h>
16 /* processor.h for distable_tsc flag */
17 #include <asm/processor.h>
20 #include "mach_timer.h"
24 #ifdef CONFIG_HPET_TIMER
25 static unsigned long hpet_usec_quotient;
26 static unsigned long hpet_last;
27 struct timer_opts timer_tsc;
30 int tsc_disable __initdata = 0;
32 extern spinlock_t i8253_lock;
35 /* Number of usecs that the last interrupt was delayed */
36 static int delay_at_last_interrupt;
38 static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
39 static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
40 static unsigned long long monotonic_base;
41 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
43 /* convert from cycles(64bits) => nanoseconds (64bits)
45 * ns = cycles / (freq / ns_per_sec)
46 * ns = cycles * (ns_per_sec / freq)
47 * ns = cycles * (10^9 / (cpu_mhz * 10^6))
48 * ns = cycles * (10^3 / cpu_mhz)
50 * Then we use scaling math (suggested by george@mvista.com) to get:
51 * ns = cycles * (10^3 * SC / cpu_mhz) / SC
52 * ns = cycles * cyc2ns_scale / SC
54 * And since SC is a constant power of two, we can convert the div
56 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
58 static unsigned long cyc2ns_scale;
59 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
61 static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
63 cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
66 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
68 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
72 static int count2; /* counter for mark_offset_tsc() */
74 /* Cached *multiplier* to convert TSC counts to microseconds.
75 * (see the equation below).
76 * Equal to 2^32 * (1 / (clocks per usec) ).
77 * Initialized in time_init.
79 static unsigned long fast_gettimeoffset_quotient;
81 static unsigned long get_offset_tsc(void)
83 register unsigned long eax, edx;
85 /* Read the Time Stamp Counter */
89 /* .. relative to previous jiffy (32 bits is enough) */
90 eax -= last_tsc_low; /* tsc_low delta */
93 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
94 * = (tsc_low delta) * (usecs_per_clock)
95 * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
97 * Using a mull instead of a divl saves up to 31 clock cycles
98 * in the critical path.
102 :"=a" (eax), "=d" (edx)
103 :"rm" (fast_gettimeoffset_quotient),
106 /* our adjusted time offset in microseconds */
107 return delay_at_last_interrupt + edx;
110 static unsigned long long monotonic_clock_tsc(void)
112 unsigned long long last_offset, this_offset, base;
115 /* atomically read monotonic base & last_offset */
117 seq = read_seqbegin(&monotonic_lock);
118 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
119 base = monotonic_base;
120 } while (read_seqretry(&monotonic_lock, seq));
122 /* Read the Time Stamp Counter */
123 rdtscll(this_offset);
125 /* return the value in ns */
126 return base + cycles_2_ns(this_offset - last_offset);
130 * Scheduler clock - returns current time in nanosec units.
132 unsigned long long sched_clock(void)
134 unsigned long long this_offset;
137 * In the NUMA case we dont use the TSC as they are not
138 * synchronized across all CPUs.
143 /* no locking but a rare wrong value is not a big deal */
144 return jiffies_64 * (1000000000 / HZ);
146 /* Read the Time Stamp Counter */
147 rdtscll(this_offset);
149 /* return the value in ns */
150 return cycles_2_ns(this_offset);
154 static void mark_offset_tsc(void)
156 unsigned long lost,delay;
157 unsigned long delta = last_tsc_low;
160 static int count1 = 0;
161 unsigned long long this_offset, last_offset;
162 static int lost_count = 0;
164 write_seqlock(&monotonic_lock);
165 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
167 * It is important that these two operations happen almost at
168 * the same time. We do the RDTSC stuff first, since it's
169 * faster. To avoid any inconsistencies, we need interrupts
174 * Interrupts are just disabled locally since the timer irq
175 * has the SA_INTERRUPT flag set. -arca
178 /* read Pentium cycle counter */
180 rdtsc(last_tsc_low, last_tsc_high);
182 spin_lock(&i8253_lock);
183 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
185 count = inb_p(PIT_CH0); /* read the latched count */
186 count |= inb(PIT_CH0) << 8;
189 * VIA686a test code... reset the latch if count > max + 1
190 * from timer_pit.c - cjb
193 outb_p(0x34, PIT_MODE);
194 outb_p(LATCH & 0xff, PIT_CH0);
195 outb(LATCH >> 8, PIT_CH0);
199 spin_unlock(&i8253_lock);
201 if (pit_latch_buggy) {
202 /* get center value of last 3 time lutch */
203 if ((count2 >= count && count >= count1)
204 || (count1 >= count && count >= count2)) {
205 count2 = count1; count1 = count;
206 } else if ((count1 >= count2 && count2 >= count)
207 || (count >= count2 && count2 >= count1)) {
208 countmp = count;count = count2;
209 count2 = count1;count1 = countmp;
211 count2 = count1; count1 = count; count = count1;
215 /* lost tick compensation */
216 delta = last_tsc_low - delta;
218 register unsigned long eax, edx;
221 :"=a" (eax), "=d" (edx)
222 :"rm" (fast_gettimeoffset_quotient),
226 delta += delay_at_last_interrupt;
227 lost = delta/(1000000/HZ);
228 delay = delta%(1000000/HZ);
230 jiffies_64 += lost-1;
232 /* sanity check to ensure we're not always losing ticks */
233 if (lost_count++ > 100) {
234 printk(KERN_WARNING "Losing too many ticks!\n");
235 printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
236 printk(KERN_WARNING "Possible reasons for this are:\n");
237 printk(KERN_WARNING " You're running with Speedstep,\n");
238 printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
239 printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
240 printk(KERN_WARNING "Falling back to a sane timesource now.\n");
246 /* update the monotonic base value */
247 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
248 monotonic_base += cycles_2_ns(this_offset - last_offset);
249 write_sequnlock(&monotonic_lock);
251 /* calculate delay_at_last_interrupt */
252 count = ((LATCH-1) - count) * TICK_SIZE;
253 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
255 /* catch corner case where tick rollover occured
256 * between tsc and pit reads (as noted when
257 * usec delta is > 90% # of usecs/tick)
259 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
263 static void delay_tsc(unsigned long loops)
265 unsigned long bclock, now;
272 } while ((now-bclock) < loops);
275 #ifdef CONFIG_HPET_TIMER
276 static void mark_offset_tsc_hpet(void)
278 unsigned long long this_offset, last_offset;
279 unsigned long offset, temp, hpet_current;
281 write_seqlock(&monotonic_lock);
282 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
284 * It is important that these two operations happen almost at
285 * the same time. We do the RDTSC stuff first, since it's
286 * faster. To avoid any inconsistencies, we need interrupts
290 * Interrupts are just disabled locally since the timer irq
291 * has the SA_INTERRUPT flag set. -arca
293 /* read Pentium cycle counter */
295 hpet_current = hpet_readl(HPET_COUNTER);
296 rdtsc(last_tsc_low, last_tsc_high);
298 /* lost tick compensation */
299 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
300 if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
301 int lost_ticks = (offset - hpet_last) / hpet_tick;
302 jiffies_64 += lost_ticks;
304 hpet_last = hpet_current;
306 /* update the monotonic base value */
307 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
308 monotonic_base += cycles_2_ns(this_offset - last_offset);
309 write_sequnlock(&monotonic_lock);
311 /* calculate delay_at_last_interrupt */
313 * Time offset = (hpet delta) * ( usecs per HPET clock )
314 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
315 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
317 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
319 delay_at_last_interrupt = hpet_current - offset;
320 ASM_MUL64_REG(temp, delay_at_last_interrupt,
321 hpet_usec_quotient, delay_at_last_interrupt);
326 #ifdef CONFIG_CPU_FREQ
327 /* If the CPU frequency is scaled, TSC-based delays will need a different
328 * loops_per_jiffy value to function properly. An exception to this
329 * are modern Intel Pentium 4 processors, where the TSC runs at a constant
330 * speed independent of frequency scaling.
333 static unsigned int ref_freq = 0;
334 static unsigned long loops_per_jiffy_ref = 0;
335 static unsigned int variable_tsc = 1;
338 static unsigned long fast_gettimeoffset_ref = 0;
339 static unsigned long cpu_khz_ref = 0;
343 time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
346 struct cpufreq_freqs *freq = data;
348 write_seqlock_irq(&xtime_lock);
350 ref_freq = freq->old;
351 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
353 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
354 cpu_khz_ref = cpu_khz;
358 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
359 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
361 cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
364 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
367 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
368 set_cyc2ns_scale(cpu_khz/1000);
373 write_sequnlock_irq(&xtime_lock);
378 static struct notifier_block time_cpufreq_notifier_block = {
379 .notifier_call = time_cpufreq_notifier
383 static int __init cpufreq_tsc(void)
385 /* P4 and above CPU TSC freq doesn't change when CPU frequency changes*/
386 if ((boot_cpu_data.x86 >= 15) && (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
389 return cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER);
391 core_initcall(cpufreq_tsc);
396 static int __init init_tsc(char* override)
399 /* check clock override */
400 if (override[0] && strncmp(override,"tsc",3)) {
401 #ifdef CONFIG_HPET_TIMER
402 if (is_hpet_enabled()) {
403 printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
412 * If we have APM enabled or the CPU clock speed is variable
413 * (CPU stops clock on HLT or slows clock to save power)
414 * then the TSC timestamps may diverge by up to 1 jiffy from
415 * 'real time' but nothing will break.
416 * The most frequent case is that the CPU is "woken" from a halt
417 * state by the timer interrupt itself, so we get 0 error. In the
418 * rare cases where a driver would "wake" the CPU and request a
419 * timestamp, the maximum error is < 1 jiffy. But timestamps are
420 * still perfectly ordered.
421 * Note that the TSC counter will be reset if APM suspends
422 * to disk; this won't break the kernel, though, 'cuz we're
423 * smart. See arch/i386/kernel/apm.c.
426 * Firstly we have to do a CPU check for chips with
427 * a potentially buggy TSC. At this point we haven't run
428 * the ident/bugs checks so we must run this hook as it
429 * may turn off the TSC flag.
431 * NOTE: this doesn't yet handle SMP 486 machines where only
432 * some CPU's have a TSC. Thats never worked and nobody has
433 * moaned if you have the only one in the world - you fix it!
436 count2 = LATCH; /* initialize counter for mark_offset_tsc() */
439 unsigned long tsc_quotient;
440 #ifdef CONFIG_HPET_TIMER
441 if (is_hpet_enabled()){
442 unsigned long result, remain;
443 printk("Using TSC for gettimeofday\n");
444 tsc_quotient = calibrate_tsc_hpet(NULL);
445 timer_tsc.mark_offset = &mark_offset_tsc_hpet;
447 * Math to calculate hpet to usec multiplier
448 * Look for the comments at get_offset_tsc_hpet()
450 ASM_DIV64_REG(result, remain, hpet_tick,
451 0, KERNEL_TICK_USEC);
452 if (remain > (hpet_tick >> 1))
453 result++; /* rounding the result */
455 hpet_usec_quotient = result;
459 tsc_quotient = calibrate_tsc();
463 fast_gettimeoffset_quotient = tsc_quotient;
466 * We could be more selective here I suspect
467 * and just enable this for the next intel chips ?
469 /* report CPU clock rate in Hz.
470 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
471 * clock/second. Our precision is about 100 ppm.
473 { unsigned long eax=0, edx=1000;
475 :"=a" (cpu_khz), "=d" (edx)
477 "0" (eax), "1" (edx));
478 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
480 set_cyc2ns_scale(cpu_khz/1000);
487 #ifndef CONFIG_X86_TSC
488 /* disable flag for tsc. Takes effect by clearing the TSC cpu flag
490 static int __init tsc_setup(char *str)
496 static int __init tsc_setup(char *str)
498 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
499 "cannot disable TSC.\n");
503 __setup("notsc", tsc_setup);
507 /************************************************************/
509 /* tsc timer_opts struct */
510 struct timer_opts timer_tsc = {
513 .mark_offset = mark_offset_tsc,
514 .get_offset = get_offset_tsc,
515 .monotonic_clock = monotonic_clock_tsc,