patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / arch / i386 / kernel / timers / timer_tsc.c
1 /*
2  * This code largely moved from arch/i386/kernel/time.c.
3  * See comments there for proper credits.
4  */
5
6 #include <linux/spinlock.h>
7 #include <linux/init.h>
8 #include <linux/timex.h>
9 #include <linux/errno.h>
10 #include <linux/cpufreq.h>
11 #include <linux/string.h>
12 #include <linux/jiffies.h>
13
14 #include <asm/timer.h>
15 #include <asm/io.h>
16 /* processor.h for distable_tsc flag */
17 #include <asm/processor.h>
18
19 #include "io_ports.h"
20 #include "mach_timer.h"
21
22 #include <asm/hpet.h>
23
24 #ifdef CONFIG_HPET_TIMER
25 static unsigned long hpet_usec_quotient;
26 static unsigned long hpet_last;
27 struct timer_opts timer_tsc;
28 #endif
29
30 static inline void cpufreq_delayed_get(void);
31
32 int tsc_disable __initdata = 0;
33
34 extern spinlock_t i8253_lock;
35
36 static int use_tsc;
37 /* Number of usecs that the last interrupt was delayed */
38 static int delay_at_last_interrupt;
39
40 static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
41 static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
42 static unsigned long long monotonic_base;
43 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
44
45 /* convert from cycles(64bits) => nanoseconds (64bits)
46  *  basic equation:
47  *              ns = cycles / (freq / ns_per_sec)
48  *              ns = cycles * (ns_per_sec / freq)
49  *              ns = cycles * (10^9 / (cpu_mhz * 10^6))
50  *              ns = cycles * (10^3 / cpu_mhz)
51  *
52  *      Then we use scaling math (suggested by george@mvista.com) to get:
53  *              ns = cycles * (10^3 * SC / cpu_mhz) / SC
54  *              ns = cycles * cyc2ns_scale / SC
55  *
56  *      And since SC is a constant power of two, we can convert the div
57  *  into a shift.   
58  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
59  */
60 static unsigned long cyc2ns_scale; 
61 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
62
63 static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
64 {
65         cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
66 }
67
68 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
69 {
70         return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
71 }
72
73
74 static int count2; /* counter for mark_offset_tsc() */
75
76 /* Cached *multiplier* to convert TSC counts to microseconds.
77  * (see the equation below).
78  * Equal to 2^32 * (1 / (clocks per usec) ).
79  * Initialized in time_init.
80  */
81 static unsigned long fast_gettimeoffset_quotient;
82
83 static unsigned long get_offset_tsc(void)
84 {
85         register unsigned long eax, edx;
86
87         /* Read the Time Stamp Counter */
88
89         rdtsc(eax,edx);
90
91         /* .. relative to previous jiffy (32 bits is enough) */
92         eax -= last_tsc_low;    /* tsc_low delta */
93
94         /*
95          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
96          *             = (tsc_low delta) * (usecs_per_clock)
97          *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
98          *
99          * Using a mull instead of a divl saves up to 31 clock cycles
100          * in the critical path.
101          */
102
103         __asm__("mull %2"
104                 :"=a" (eax), "=d" (edx)
105                 :"rm" (fast_gettimeoffset_quotient),
106                  "0" (eax));
107
108         /* our adjusted time offset in microseconds */
109         return delay_at_last_interrupt + edx;
110 }
111
112 static unsigned long long monotonic_clock_tsc(void)
113 {
114         unsigned long long last_offset, this_offset, base;
115         unsigned seq;
116         
117         /* atomically read monotonic base & last_offset */
118         do {
119                 seq = read_seqbegin(&monotonic_lock);
120                 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
121                 base = monotonic_base;
122         } while (read_seqretry(&monotonic_lock, seq));
123
124         /* Read the Time Stamp Counter */
125         rdtscll(this_offset);
126
127         /* return the value in ns */
128         return base + cycles_2_ns(this_offset - last_offset);
129 }
130
131 /*
132  * Scheduler clock - returns current time in nanosec units.
133  */
134 unsigned long long sched_clock(void)
135 {
136         unsigned long long this_offset;
137
138         /*
139          * In the NUMA case we dont use the TSC as they are not
140          * synchronized across all CPUs.
141          */
142 #ifndef CONFIG_NUMA
143         if (!use_tsc)
144 #endif
145                 /* no locking but a rare wrong value is not a big deal */
146                 return jiffies_64 * (1000000000 / HZ);
147
148         /* Read the Time Stamp Counter */
149         rdtscll(this_offset);
150
151         /* return the value in ns */
152         return cycles_2_ns(this_offset);
153 }
154
155
156 static void mark_offset_tsc(void)
157 {
158         unsigned long lost,delay;
159         unsigned long delta = last_tsc_low;
160         int count;
161         int countmp;
162         static int count1 = 0;
163         unsigned long long this_offset, last_offset;
164         static int lost_count = 0;
165         
166         write_seqlock(&monotonic_lock);
167         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
168         /*
169          * It is important that these two operations happen almost at
170          * the same time. We do the RDTSC stuff first, since it's
171          * faster. To avoid any inconsistencies, we need interrupts
172          * disabled locally.
173          */
174
175         /*
176          * Interrupts are just disabled locally since the timer irq
177          * has the SA_INTERRUPT flag set. -arca
178          */
179         
180         /* read Pentium cycle counter */
181
182         rdtsc(last_tsc_low, last_tsc_high);
183
184         spin_lock(&i8253_lock);
185         outb_p(0x00, PIT_MODE);     /* latch the count ASAP */
186
187         count = inb_p(PIT_CH0);    /* read the latched count */
188         count |= inb(PIT_CH0) << 8;
189
190         /*
191          * VIA686a test code... reset the latch if count > max + 1
192          * from timer_pit.c - cjb
193          */
194         if (count > LATCH) {
195                 outb_p(0x34, PIT_MODE);
196                 outb_p(LATCH & 0xff, PIT_CH0);
197                 outb(LATCH >> 8, PIT_CH0);
198                 count = LATCH - 1;
199         }
200
201         spin_unlock(&i8253_lock);
202
203         if (pit_latch_buggy) {
204                 /* get center value of last 3 time lutch */
205                 if ((count2 >= count && count >= count1)
206                     || (count1 >= count && count >= count2)) {
207                         count2 = count1; count1 = count;
208                 } else if ((count1 >= count2 && count2 >= count)
209                            || (count >= count2 && count2 >= count1)) {
210                         countmp = count;count = count2;
211                         count2 = count1;count1 = countmp;
212                 } else {
213                         count2 = count1; count1 = count; count = count1;
214                 }
215         }
216
217         /* lost tick compensation */
218         delta = last_tsc_low - delta;
219         {
220                 register unsigned long eax, edx;
221                 eax = delta;
222                 __asm__("mull %2"
223                 :"=a" (eax), "=d" (edx)
224                 :"rm" (fast_gettimeoffset_quotient),
225                  "0" (eax));
226                 delta = edx;
227         }
228         delta += delay_at_last_interrupt;
229         lost = delta/(1000000/HZ);
230         delay = delta%(1000000/HZ);
231         if (lost >= 2) {
232                 jiffies_64 += lost-1;
233
234                 /* sanity check to ensure we're not always losing ticks */
235                 if (lost_count++ > 100) {
236                         printk(KERN_WARNING "Losing too many ticks!\n");
237                         printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
238                         printk(KERN_WARNING "Possible reasons for this are:\n");
239                         printk(KERN_WARNING "  You're running with Speedstep,\n");
240                         printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
241                         printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
242                         printk(KERN_WARNING "Falling back to a sane timesource now.\n");
243
244                         clock_fallback();
245                 }
246                 /* ... but give the TSC a fair chance */
247                 if (lost_count > 25)
248                         cpufreq_delayed_get();
249         } else
250                 lost_count = 0;
251         /* update the monotonic base value */
252         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
253         monotonic_base += cycles_2_ns(this_offset - last_offset);
254         write_sequnlock(&monotonic_lock);
255
256         /* calculate delay_at_last_interrupt */
257         count = ((LATCH-1) - count) * TICK_SIZE;
258         delay_at_last_interrupt = (count + LATCH/2) / LATCH;
259
260         /* catch corner case where tick rollover occured 
261          * between tsc and pit reads (as noted when 
262          * usec delta is > 90% # of usecs/tick)
263          */
264         if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
265                 jiffies_64++;
266 }
267
268 static void delay_tsc(unsigned long loops)
269 {
270         unsigned long bclock, now;
271         
272         rdtscl(bclock);
273         do
274         {
275                 rep_nop();
276                 rdtscl(now);
277         } while ((now-bclock) < loops);
278 }
279
280 #ifdef CONFIG_HPET_TIMER
281 static void mark_offset_tsc_hpet(void)
282 {
283         unsigned long long this_offset, last_offset;
284         unsigned long offset, temp, hpet_current;
285
286         write_seqlock(&monotonic_lock);
287         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
288         /*
289          * It is important that these two operations happen almost at
290          * the same time. We do the RDTSC stuff first, since it's
291          * faster. To avoid any inconsistencies, we need interrupts
292          * disabled locally.
293          */
294         /*
295          * Interrupts are just disabled locally since the timer irq
296          * has the SA_INTERRUPT flag set. -arca
297          */
298         /* read Pentium cycle counter */
299
300         hpet_current = hpet_readl(HPET_COUNTER);
301         rdtsc(last_tsc_low, last_tsc_high);
302
303         /* lost tick compensation */
304         offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
305         if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
306                 int lost_ticks = (offset - hpet_last) / hpet_tick;
307                 jiffies_64 += lost_ticks;
308         }
309         hpet_last = hpet_current;
310
311         /* update the monotonic base value */
312         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
313         monotonic_base += cycles_2_ns(this_offset - last_offset);
314         write_sequnlock(&monotonic_lock);
315
316         /* calculate delay_at_last_interrupt */
317         /*
318          * Time offset = (hpet delta) * ( usecs per HPET clock )
319          *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
320          *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
321          * Where,
322          * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
323          */
324         delay_at_last_interrupt = hpet_current - offset;
325         ASM_MUL64_REG(temp, delay_at_last_interrupt,
326                         hpet_usec_quotient, delay_at_last_interrupt);
327 }
328 #endif
329
330
331 #ifdef CONFIG_CPU_FREQ
332 #include <linux/workqueue.h>
333
334 static unsigned int cpufreq_delayed_issched = 0;
335 static unsigned int cpufreq_init = 0;
336 static struct work_struct cpufreq_delayed_get_work;
337
338 static void handle_cpufreq_delayed_get(void *v)
339 {
340         unsigned int cpu;
341         for_each_online_cpu(cpu) {
342                 cpufreq_get(cpu);
343         }
344         cpufreq_delayed_issched = 0;
345 }
346
347 /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
348  * to verify the CPU frequency the timing core thinks the CPU is running
349  * at is still correct.
350  */
351 static inline void cpufreq_delayed_get(void) 
352 {
353         if (cpufreq_init && !cpufreq_delayed_issched) {
354                 cpufreq_delayed_issched = 1;
355                 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
356                 schedule_work(&cpufreq_delayed_get_work);
357         }
358 }
359
360 /* If the CPU frequency is scaled, TSC-based delays will need a different
361  * loops_per_jiffy value to function properly.
362  */
363
364 static unsigned int  ref_freq = 0;
365 static unsigned long loops_per_jiffy_ref = 0;
366
367 #ifndef CONFIG_SMP
368 static unsigned long fast_gettimeoffset_ref = 0;
369 static unsigned long cpu_khz_ref = 0;
370 #endif
371
372 static int
373 time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
374                        void *data)
375 {
376         struct cpufreq_freqs *freq = data;
377
378         write_seqlock_irq(&xtime_lock);
379         if (!ref_freq) {
380                 ref_freq = freq->old;
381                 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
382 #ifndef CONFIG_SMP
383                 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
384                 cpu_khz_ref = cpu_khz;
385 #endif
386         }
387
388         if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
389             (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
390             (val == CPUFREQ_RESUMECHANGE)) {
391                 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
392                         cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
393 #ifndef CONFIG_SMP
394                 if (cpu_khz)
395                         cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
396                 if (use_tsc) {
397                         if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
398                                 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
399                                 set_cyc2ns_scale(cpu_khz/1000);
400                         }
401                 }
402 #endif
403         }
404         write_sequnlock_irq(&xtime_lock);
405
406         return 0;
407 }
408
409 static struct notifier_block time_cpufreq_notifier_block = {
410         .notifier_call  = time_cpufreq_notifier
411 };
412
413
414 static int __init cpufreq_tsc(void)
415 {
416         int ret;
417         INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
418         ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER);
419         if (!ret)
420                 cpufreq_init = 1;
421         return ret;
422 }
423 core_initcall(cpufreq_tsc);
424
425 #else /* CONFIG_CPU_FREQ */
426 static inline void cpufreq_delayed_get(void) { return; }
427 #endif 
428
429
430 static int __init init_tsc(char* override)
431 {
432
433         /* check clock override */
434         if (override[0] && strncmp(override,"tsc",3)) {
435 #ifdef CONFIG_HPET_TIMER
436                 if (is_hpet_enabled()) {
437                         printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
438                 } else
439 #endif
440                 {
441                         return -ENODEV;
442                 }
443         }
444
445         /*
446          * If we have APM enabled or the CPU clock speed is variable
447          * (CPU stops clock on HLT or slows clock to save power)
448          * then the TSC timestamps may diverge by up to 1 jiffy from
449          * 'real time' but nothing will break.
450          * The most frequent case is that the CPU is "woken" from a halt
451          * state by the timer interrupt itself, so we get 0 error. In the
452          * rare cases where a driver would "wake" the CPU and request a
453          * timestamp, the maximum error is < 1 jiffy. But timestamps are
454          * still perfectly ordered.
455          * Note that the TSC counter will be reset if APM suspends
456          * to disk; this won't break the kernel, though, 'cuz we're
457          * smart.  See arch/i386/kernel/apm.c.
458          */
459         /*
460          *      Firstly we have to do a CPU check for chips with
461          *      a potentially buggy TSC. At this point we haven't run
462          *      the ident/bugs checks so we must run this hook as it
463          *      may turn off the TSC flag.
464          *
465          *      NOTE: this doesn't yet handle SMP 486 machines where only
466          *      some CPU's have a TSC. Thats never worked and nobody has
467          *      moaned if you have the only one in the world - you fix it!
468          */
469
470         count2 = LATCH; /* initialize counter for mark_offset_tsc() */
471
472         if (cpu_has_tsc) {
473                 unsigned long tsc_quotient;
474 #ifdef CONFIG_HPET_TIMER
475                 if (is_hpet_enabled()){
476                         unsigned long result, remain;
477                         printk("Using TSC for gettimeofday\n");
478                         tsc_quotient = calibrate_tsc_hpet(NULL);
479                         timer_tsc.mark_offset = &mark_offset_tsc_hpet;
480                         /*
481                          * Math to calculate hpet to usec multiplier
482                          * Look for the comments at get_offset_tsc_hpet()
483                          */
484                         ASM_DIV64_REG(result, remain, hpet_tick,
485                                         0, KERNEL_TICK_USEC);
486                         if (remain > (hpet_tick >> 1))
487                                 result++; /* rounding the result */
488
489                         hpet_usec_quotient = result;
490                 } else
491 #endif
492                 {
493                         tsc_quotient = calibrate_tsc();
494                 }
495
496                 if (tsc_quotient) {
497                         fast_gettimeoffset_quotient = tsc_quotient;
498                         use_tsc = 1;
499                         /*
500                          *      We could be more selective here I suspect
501                          *      and just enable this for the next intel chips ?
502                          */
503                         /* report CPU clock rate in Hz.
504                          * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
505                          * clock/second. Our precision is about 100 ppm.
506                          */
507                         {       unsigned long eax=0, edx=1000;
508                                 __asm__("divl %2"
509                                 :"=a" (cpu_khz), "=d" (edx)
510                                 :"r" (tsc_quotient),
511                                 "0" (eax), "1" (edx));
512                                 printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
513                         }
514                         set_cyc2ns_scale(cpu_khz/1000);
515                         return 0;
516                 }
517         }
518         return -ENODEV;
519 }
520
521 #ifndef CONFIG_X86_TSC
522 /* disable flag for tsc.  Takes effect by clearing the TSC cpu flag
523  * in cpu/common.c */
524 static int __init tsc_setup(char *str)
525 {
526         tsc_disable = 1;
527         return 1;
528 }
529 #else
530 static int __init tsc_setup(char *str)
531 {
532         printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
533                                 "cannot disable TSC.\n");
534         return 1;
535 }
536 #endif
537 __setup("notsc", tsc_setup);
538
539
540
541 /************************************************************/
542
543 /* tsc timer_opts struct */
544 struct timer_opts timer_tsc = {
545         .name =         "tsc",
546         .init =         init_tsc,
547         .mark_offset =  mark_offset_tsc, 
548         .get_offset =   get_offset_tsc,
549         .monotonic_clock =      monotonic_clock_tsc,
550         .delay = delay_tsc,
551 };