2 * linux/arch/i386/kernel/time.c
4 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
6 * This file contains the PC-specific time handling details:
7 * reading the RTC at bootup, etc..
8 * 1994-07-02 Alan Modra
9 * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
10 * 1995-03-26 Markus Kuhn
11 * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
12 * precision CMOS clock update
13 * 1996-05-03 Ingo Molnar
14 * fixed time warps in do_[slow|fast]_gettimeoffset()
15 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
16 * "A Kernel Model for Precision Timekeeping" by Dave Mills
17 * 1998-09-05 (Various)
18 * More robust do_fast_gettimeoffset() algorithm implemented
19 * (works with APM, Cyrix 6x86MX and Centaur C6),
20 * monotonic gettimeofday() with fast_get_timeoffset(),
21 * drift-proof precision TSC calibration on boot
22 * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
23 * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
24 * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
25 * 1998-12-16 Andrea Arcangeli
26 * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
27 * because was not accounting lost_ticks.
28 * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
29 * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
30 * serialize accesses to xtime/lost_ticks).
33 #include <linux/errno.h>
34 #include <linux/sched.h>
35 #include <linux/kernel.h>
36 #include <linux/param.h>
37 #include <linux/string.h>
39 #include <linux/interrupt.h>
40 #include <linux/time.h>
41 #include <linux/delay.h>
42 #include <linux/init.h>
43 #include <linux/smp.h>
44 #include <linux/module.h>
45 #include <linux/sysdev.h>
46 #include <linux/bcd.h>
47 #include <linux/efi.h>
48 #include <linux/mca.h>
49 #include <linux/sysctl.h>
50 #include <linux/percpu.h>
51 #include <linux/kernel_stat.h>
52 #include <linux/posix-timers.h>
58 #include <asm/delay.h>
59 #include <asm/mpspec.h>
60 #include <asm/uaccess.h>
61 #include <asm/processor.h>
62 #include <asm/timer.h>
63 #include <asm/sections.h>
65 #include "mach_time.h"
67 #include <linux/timex.h>
71 #include <asm/arch_hooks.h>
73 #include <xen/evtchn.h>
74 #include <xen/interface/vcpu.h>
76 int pit_latch_buggy; /* extern */
78 unsigned long vxtime_hz = PIT_TICK_RATE;
79 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
80 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
81 struct timespec __xtime __section_xtime;
82 struct timezone __sys_tz __section_sys_tz;
84 #define USEC_PER_TICK (USEC_PER_SEC / HZ)
85 #define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
86 #define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
88 #define NS_SCALE 10 /* 2^10, carefully chosen */
89 #define US_SCALE 32 /* 2^32, arbitralrily chosen */
91 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
92 EXPORT_SYMBOL(cpu_khz);
94 DEFINE_SPINLOCK(rtc_lock);
95 EXPORT_SYMBOL(rtc_lock);
97 extern struct init_timer_opts timer_tsc_init;
98 extern struct timer_opts timer_tsc;
99 #define timer_none timer_tsc
101 /* These are peridically updated in shared_info, and then copied here. */
102 struct shadow_time_info {
103 u64 tsc_timestamp; /* TSC at last update of time vals. */
104 u64 system_timestamp; /* Time, in nanosecs, since boot. */
110 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
111 static struct timespec shadow_tv;
112 static u32 shadow_tv_version;
114 /* Keep track of last time we did processing/updating of jiffies and xtime. */
115 static u64 processed_system_time; /* System time (ns) at last processing. */
116 static DEFINE_PER_CPU(u64, processed_system_time);
118 /* How much CPU time was spent blocked and how much was 'stolen'? */
119 static DEFINE_PER_CPU(u64, processed_stolen_time);
120 static DEFINE_PER_CPU(u64, processed_blocked_time);
122 /* Current runstate of each CPU (updated automatically by the hypervisor). */
123 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
125 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
126 #define NS_PER_TICK (1000000000LL/HZ)
128 static inline void __normalize_time(time_t *sec, s64 *nsec)
130 while (*nsec >= NSEC_PER_SEC) {
131 (*nsec) -= NSEC_PER_SEC;
135 (*nsec) += NSEC_PER_SEC;
140 /* Does this guest OS track Xen time, or set its wall clock independently? */
141 static int independent_wallclock = 0;
142 static int __init __independent_wallclock(char *str)
144 independent_wallclock = 1;
147 __setup("independent_wallclock", __independent_wallclock);
149 /* Permitted clock jitter, in nsecs, beyond which a warning will be printed. */
150 static unsigned long permitted_clock_jitter = 10000000UL; /* 10ms */
151 static int __init __permitted_clock_jitter(char *str)
153 permitted_clock_jitter = simple_strtoul(str, NULL, 0);
156 __setup("permitted_clock_jitter=", __permitted_clock_jitter);
159 int tsc_disable __devinitdata = 0;
162 static void delay_tsc(unsigned long loops)
164 unsigned long bclock, now;
170 } while ((now - bclock) < loops);
173 struct timer_opts timer_tsc = {
179 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
180 * yielding a 64-bit result.
182 static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
192 "mul %%rdx ; shrd $32,%%rdx,%%rax"
193 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
198 void init_cpu_khz(void)
200 u64 __cpu_khz = 1000000ULL << US_SCALE;
201 struct vcpu_time_info *info;
202 info = &HYPERVISOR_shared_info->vcpu_info[0].time;
203 do_div(__cpu_khz, info->tsc_to_system_mul);
204 if (info->tsc_shift < 0)
205 cpu_khz = __cpu_khz << -info->tsc_shift;
207 cpu_khz = __cpu_khz >> info->tsc_shift;
210 static u64 get_nsec_offset(struct shadow_time_info *shadow)
214 delta = now - shadow->tsc_timestamp;
215 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
218 static unsigned long get_usec_offset(struct shadow_time_info *shadow)
222 delta = now - shadow->tsc_timestamp;
223 return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
226 static void __update_wallclock(time_t sec, long nsec)
228 long wtm_nsec, xtime_nsec;
229 time_t wtm_sec, xtime_sec;
232 /* Adjust wall-clock time base based on jiffies ticks. */
233 wc_nsec = processed_system_time;
234 wc_nsec += sec * (u64)NSEC_PER_SEC;
237 /* Split wallclock base into seconds and nanoseconds. */
239 xtime_nsec = do_div(tmp, 1000000000);
240 xtime_sec = (time_t)tmp;
242 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
243 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
245 set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
246 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
251 static void update_wallclock(void)
253 shared_info_t *s = HYPERVISOR_shared_info;
256 shadow_tv_version = s->wc_version;
258 shadow_tv.tv_sec = s->wc_sec;
259 shadow_tv.tv_nsec = s->wc_nsec;
261 } while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
263 if (!independent_wallclock)
264 __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
268 * Reads a consistent set of time-base values from Xen, into a shadow data
271 static void get_time_values_from_xen(void)
273 shared_info_t *s = HYPERVISOR_shared_info;
274 struct vcpu_time_info *src;
275 struct shadow_time_info *dst;
277 src = &s->vcpu_info[smp_processor_id()].time;
278 dst = &per_cpu(shadow_time, smp_processor_id());
281 dst->version = src->version;
283 dst->tsc_timestamp = src->tsc_timestamp;
284 dst->system_timestamp = src->system_time;
285 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
286 dst->tsc_shift = src->tsc_shift;
288 } while ((src->version & 1) | (dst->version ^ src->version));
290 dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
293 static inline int time_values_up_to_date(int cpu)
295 struct vcpu_time_info *src;
296 struct shadow_time_info *dst;
298 src = &HYPERVISOR_shared_info->vcpu_info[cpu].time;
299 dst = &per_cpu(shadow_time, cpu);
302 return (dst->version == src->version);
306 * This is a special lock that is owned by the CPU and holds the index
307 * register we are working with. It is required for NMI access to the
308 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
310 volatile unsigned long cmos_lock = 0;
311 EXPORT_SYMBOL(cmos_lock);
313 /* Routines for accessing the CMOS RAM/RTC. */
314 unsigned char rtc_cmos_read(unsigned char addr)
317 lock_cmos_prefix(addr);
318 outb_p(addr, RTC_PORT(0));
319 val = inb_p(RTC_PORT(1));
320 lock_cmos_suffix(addr);
323 EXPORT_SYMBOL(rtc_cmos_read);
325 void rtc_cmos_write(unsigned char val, unsigned char addr)
327 lock_cmos_prefix(addr);
328 outb_p(addr, RTC_PORT(0));
329 outb_p(val, RTC_PORT(1));
330 lock_cmos_suffix(addr);
332 EXPORT_SYMBOL(rtc_cmos_write);
335 * This version of gettimeofday has microsecond resolution
336 * and better than microsecond precision on fast x86 machines with TSC.
338 void do_gettimeofday(struct timeval *tv)
341 unsigned long usec, sec;
342 unsigned long max_ntp_tick;
345 struct shadow_time_info *shadow;
346 u32 local_time_version;
349 shadow = &per_cpu(shadow_time, cpu);
352 local_time_version = shadow->version;
353 seq = read_seqbegin(&xtime_lock);
355 usec = get_usec_offset(shadow);
358 * If time_adjust is negative then NTP is slowing the clock
359 * so make sure not to go into next possible interval.
360 * Better to lose some accuracy than have time go backwards..
362 if (unlikely(time_adjust < 0)) {
363 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
364 usec = min(usec, max_ntp_tick);
368 usec += (xtime.tv_nsec / NSEC_PER_USEC);
370 nsec = shadow->system_timestamp - processed_system_time;
371 __normalize_time(&sec, &nsec);
372 usec += (long)nsec / NSEC_PER_USEC;
374 if (unlikely(!time_values_up_to_date(cpu))) {
376 * We may have blocked for a long time,
377 * rendering our calculations invalid
378 * (e.g. the time delta may have
379 * overflowed). Detect that and recalculate
382 get_time_values_from_xen();
385 } while (read_seqretry(&xtime_lock, seq) ||
386 (local_time_version != shadow->version));
390 while (usec >= USEC_PER_SEC) {
391 usec -= USEC_PER_SEC;
399 EXPORT_SYMBOL(do_gettimeofday);
401 int do_settimeofday(struct timespec *tv)
406 struct shadow_time_info *shadow;
409 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
413 shadow = &per_cpu(shadow_time, cpu);
415 write_seqlock_irq(&xtime_lock);
418 * Ensure we don't get blocked for a long time so that our time delta
419 * overflows. If that were to happen then our shadow time values would
420 * be stale, so we can retry with fresh ones.
423 nsec = tv->tv_nsec - get_nsec_offset(shadow);
424 if (time_values_up_to_date(cpu))
426 get_time_values_from_xen();
429 __normalize_time(&sec, &nsec);
431 if (is_initial_xendomain() && !independent_wallclock) {
432 op.cmd = DOM0_SETTIME;
433 op.u.settime.secs = sec;
434 op.u.settime.nsecs = nsec;
435 op.u.settime.system_time = shadow->system_timestamp;
436 HYPERVISOR_dom0_op(&op);
438 } else if (independent_wallclock) {
439 nsec -= shadow->system_timestamp;
440 __normalize_time(&sec, &nsec);
441 __update_wallclock(sec, nsec);
444 write_sequnlock_irq(&xtime_lock);
452 EXPORT_SYMBOL(do_settimeofday);
454 static void sync_xen_wallclock(unsigned long dummy);
455 static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
456 static void sync_xen_wallclock(unsigned long dummy)
462 if (!ntp_synced() || independent_wallclock || !is_initial_xendomain())
465 write_seqlock_irq(&xtime_lock);
468 nsec = xtime.tv_nsec;
469 __normalize_time(&sec, &nsec);
471 op.cmd = DOM0_SETTIME;
472 op.u.settime.secs = sec;
473 op.u.settime.nsecs = nsec;
474 op.u.settime.system_time = processed_system_time;
475 HYPERVISOR_dom0_op(&op);
479 write_sequnlock_irq(&xtime_lock);
481 /* Once per minute. */
482 mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ);
485 static int set_rtc_mmss(unsigned long nowtime)
490 if (independent_wallclock || !is_initial_xendomain())
493 /* gets recalled with irq locally disabled */
494 spin_lock_irqsave(&rtc_lock, flags);
496 retval = efi_set_rtc_mmss(nowtime);
498 retval = mach_set_rtc_mmss(nowtime);
499 spin_unlock_irqrestore(&rtc_lock, flags);
504 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
505 * Note: This function is required to return accurate
506 * time even in the absence of multiple timer ticks.
508 unsigned long long monotonic_clock(void)
511 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
513 u32 local_time_version;
516 local_time_version = shadow->version;
518 time = shadow->system_timestamp + get_nsec_offset(shadow);
519 if (!time_values_up_to_date(cpu))
520 get_time_values_from_xen();
522 } while (local_time_version != shadow->version);
528 EXPORT_SYMBOL(monotonic_clock);
530 unsigned long long sched_clock(void)
532 return monotonic_clock();
535 unsigned long profile_pc(struct pt_regs *regs)
537 unsigned long pc = instruction_pointer(regs);
539 /* Assume the lock function has either no stack frame or a copy
541 Eflags always has bits 22 and up cleared unlike kernel addresses. */
542 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
543 unsigned long *sp = (unsigned long *)regs->rsp;
551 EXPORT_SYMBOL(profile_pc);
553 irqreturn_t timer_interrupt(int irq, void *dev_id)
555 s64 delta, delta_cpu, stolen, blocked;
557 int i, cpu = smp_processor_id();
558 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
559 struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
561 write_seqlock(&xtime_lock);
564 get_time_values_from_xen();
566 /* Obtain a consistent snapshot of elapsed wallclock cycles. */
568 shadow->system_timestamp + get_nsec_offset(shadow);
569 delta -= processed_system_time;
570 delta_cpu -= per_cpu(processed_system_time, cpu);
573 * Obtain a consistent snapshot of stolen/blocked cycles. We
574 * can use state_entry_time to detect if we get preempted here.
577 sched_time = runstate->state_entry_time;
579 stolen = runstate->time[RUNSTATE_runnable] +
580 runstate->time[RUNSTATE_offline] -
581 per_cpu(processed_stolen_time, cpu);
582 blocked = runstate->time[RUNSTATE_blocked] -
583 per_cpu(processed_blocked_time, cpu);
585 } while (sched_time != runstate->state_entry_time);
586 } while (!time_values_up_to_date(cpu));
588 if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
589 unlikely(delta_cpu < -(s64)permitted_clock_jitter))
590 && printk_ratelimit()) {
591 printk("Timer ISR/%d: Time went backwards: "
592 "delta=%lld delta_cpu=%lld shadow=%lld "
593 "off=%lld processed=%lld cpu_processed=%lld\n",
594 cpu, delta, delta_cpu, shadow->system_timestamp,
595 (s64)get_nsec_offset(shadow),
596 processed_system_time,
597 per_cpu(processed_system_time, cpu));
598 for (i = 0; i < num_online_cpus(); i++)
599 printk(" %d: %lld\n", i,
600 per_cpu(processed_system_time, i));
603 /* System-wide jiffy work. */
604 while (delta >= NS_PER_TICK) {
605 delta -= NS_PER_TICK;
606 processed_system_time += NS_PER_TICK;
610 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
615 write_sequnlock(&xtime_lock);
618 * Account stolen ticks.
619 * HACK: Passing NULL to account_steal_time()
620 * ensures that the ticks are accounted as stolen.
622 if ((stolen > 0) && (delta_cpu > 0)) {
624 if (unlikely(delta_cpu < 0))
625 stolen += delta_cpu; /* clamp local-time progress */
626 do_div(stolen, NS_PER_TICK);
627 per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
628 per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK;
629 account_steal_time(NULL, (cputime_t)stolen);
633 * Account blocked ticks.
634 * HACK: Passing idle_task to account_steal_time()
635 * ensures that the ticks are accounted as idle/wait.
637 if ((blocked > 0) && (delta_cpu > 0)) {
638 delta_cpu -= blocked;
639 if (unlikely(delta_cpu < 0))
640 blocked += delta_cpu; /* clamp local-time progress */
641 do_div(blocked, NS_PER_TICK);
642 per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
643 per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK;
644 account_steal_time(idle_task(cpu), (cputime_t)blocked);
647 /* Account user/system ticks. */
649 do_div(delta_cpu, NS_PER_TICK);
650 per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
651 if (user_mode(get_irq_regs()))
652 account_user_time(current, (cputime_t)delta_cpu);
654 account_system_time(current, HARDIRQ_OFFSET,
655 (cputime_t)delta_cpu);
658 /* Local timer processing (see update_process_times()). */
660 if (rcu_pending(cpu))
661 rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
663 run_posix_cpu_timers(current);
668 static void init_missing_ticks_accounting(int cpu)
670 struct vcpu_register_runstate_memory_area area;
671 struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
673 memset(runstate, 0, sizeof(*runstate));
675 area.addr.v = runstate;
676 HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
678 per_cpu(processed_blocked_time, cpu) =
679 runstate->time[RUNSTATE_blocked];
680 per_cpu(processed_stolen_time, cpu) =
681 runstate->time[RUNSTATE_runnable] +
682 runstate->time[RUNSTATE_offline];
685 /* not static: needed by APM */
686 unsigned long get_cmos_time(void)
688 unsigned long retval;
691 spin_lock_irqsave(&rtc_lock, flags);
694 retval = efi_get_time();
696 retval = mach_get_cmos_time();
698 spin_unlock_irqrestore(&rtc_lock, flags);
702 EXPORT_SYMBOL(get_cmos_time);
704 static void sync_cmos_clock(unsigned long dummy);
706 static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
708 static void sync_cmos_clock(unsigned long dummy)
710 struct timeval now, next;
714 * If we have an externally synchronized Linux clock, then update
715 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
716 * called as close as possible to 500 ms before the new second starts.
717 * This code is run on a timer. If the clock is set, that timer
718 * may not expire at the correct time. Thus, we adjust...
722 * Not synced, exit, do not restart a timer (if one is
723 * running, let it run out).
727 do_gettimeofday(&now);
728 if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
729 now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
730 fail = set_rtc_mmss(now.tv_sec);
732 next.tv_usec = USEC_AFTER - now.tv_usec;
733 if (next.tv_usec <= 0)
734 next.tv_usec += USEC_PER_SEC;
741 if (next.tv_usec >= USEC_PER_SEC) {
743 next.tv_usec -= USEC_PER_SEC;
745 mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
748 void notify_arch_cmos_timer(void)
750 mod_timer(&sync_cmos_timer, jiffies + 1);
751 mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
754 static long clock_cmos_diff;
755 static unsigned long sleep_start;
757 static int timer_suspend(struct sys_device *dev, pm_message_t state)
760 * Estimate time zone so that set_time can update the clock
762 unsigned long ctime = get_cmos_time();
764 clock_cmos_diff = -ctime;
765 clock_cmos_diff += get_seconds();
770 static int timer_resume(struct sys_device *dev)
774 unsigned long ctime = get_cmos_time();
775 long sleep_length = (ctime - sleep_start) * HZ;
777 if (sleep_length < 0) {
778 printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n");
779 /* The time after the resume must not be earlier than the time
780 * before the suspend or some nasty things will happen
786 #ifdef CONFIG_HPET_TIMER
787 if (is_hpet_enabled())
790 sec = ctime + clock_cmos_diff;
791 write_seqlock_irqsave(&xtime_lock, flags);
794 jiffies_64 += sleep_length;
795 write_sequnlock_irqrestore(&xtime_lock, flags);
796 touch_softlockup_watchdog();
800 static struct sysdev_class timer_sysclass = {
801 .resume = timer_resume,
802 .suspend = timer_suspend,
803 set_kset_name("timer"),
807 /* XXX this driverfs stuff should probably go elsewhere later -john */
808 static struct sys_device device_timer = {
810 .cls = &timer_sysclass,
813 static int time_init_device(void)
815 int error = sysdev_class_register(&timer_sysclass);
817 error = sysdev_register(&device_timer);
821 device_initcall(time_init_device);
823 #ifdef CONFIG_HPET_TIMER
824 extern void (*late_time_init)(void);
825 /* Duplicate of time_init() below, with hpet_enable part added */
826 static void __init hpet_time_init(void)
828 xtime.tv_sec = get_cmos_time();
829 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
830 set_normalized_timespec(&wall_to_monotonic,
831 -xtime.tv_sec, -xtime.tv_nsec);
833 if ((hpet_enable() >= 0) && hpet_use_timer) {
834 printk("Using HPET for base-timer\n");
841 /* Dynamically-mapped IRQ. */
842 DEFINE_PER_CPU(int, timer_irq);
844 extern void (*late_time_init)(void);
845 static void setup_cpu0_timer_irq(void)
847 per_cpu(timer_irq, 0) =
848 bind_virq_to_irqhandler(
855 BUG_ON(per_cpu(timer_irq, 0) < 0);
858 void __init time_init(void)
860 #ifdef CONFIG_HPET_TIMER
861 if (is_hpet_capable()) {
863 * HPET initialization needs to do memory-mapped io. So, let
864 * us do a late initialization after mem_init().
866 late_time_init = hpet_time_init;
870 get_time_values_from_xen();
872 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
873 per_cpu(processed_system_time, 0) = processed_system_time;
874 init_missing_ticks_accounting(0);
879 printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
880 cpu_khz / 1000, cpu_khz % 1000);
882 vxtime.mode = VXTIME_TSC;
883 vxtime.quot = (1000000L << US_SCALE) / vxtime_hz;
884 vxtime.tsc_quot = (1000L << US_SCALE) / cpu_khz;
886 rdtscll(vxtime.last_tsc);
888 /* Cannot request_irq() until kmem is initialised. */
889 late_time_init = setup_cpu0_timer_irq;
892 /* Convert jiffies to system time. */
893 u64 jiffies_to_st(unsigned long j)
900 seq = read_seqbegin(&xtime_lock);
903 /* Triggers in some wrap-around cases, but that's okay:
904 * we just end up with a shorter timeout. */
905 st = processed_system_time + NS_PER_TICK;
906 } else if (((unsigned long)delta >> (BITS_PER_LONG-3)) != 0) {
907 /* Very long timeout means there is no pending timer.
908 * We indicate this to Xen by passing zero timeout. */
911 st = processed_system_time + delta * (u64)NS_PER_TICK;
913 } while (read_seqretry(&xtime_lock, seq));
917 EXPORT_SYMBOL(jiffies_to_st);
920 * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
921 * These functions are based on implementations from arch/s390/kernel/time.c
923 static void stop_hz_timer(void)
925 unsigned int cpu = smp_processor_id();
928 cpu_set(cpu, nohz_cpu_mask);
930 /* See matching smp_mb in rcu_start_batch in rcupdate.c. These mbs */
931 /* ensure that if __rcu_pending (nested in rcu_needs_cpu) fetches a */
932 /* value of rcp->cur that matches rdp->quiescbatch and allows us to */
933 /* stop the hz timer then the cpumasks created for subsequent values */
934 /* of cur in rcu_start_batch are guaranteed to pick up the updated */
935 /* nohz_cpu_mask and so will not depend on this cpu. */
939 /* Leave ourselves in tick mode if rcu or softirq or timer pending. */
940 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
941 (j = next_timer_interrupt(), time_before_eq(j, jiffies))) {
942 cpu_clear(cpu, nohz_cpu_mask);
946 if (HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0)
950 static void start_hz_timer(void)
952 cpu_clear(smp_processor_id(), nohz_cpu_mask);
955 void raw_safe_halt(void)
958 /* Blocking includes an implicit local_irq_enable(). */
962 EXPORT_SYMBOL(raw_safe_halt);
967 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
971 /* No locking required. We are only CPU running, and interrupts are off. */
972 void time_resume(void)
976 get_time_values_from_xen();
978 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
979 per_cpu(processed_system_time, 0) = processed_system_time;
980 init_missing_ticks_accounting(0);
986 static char timer_name[NR_CPUS][15];
988 void local_setup_timer(unsigned int cpu)
995 seq = read_seqbegin(&xtime_lock);
996 /* Use cpu0 timestamp: cpu's shadow is not initialised yet. */
997 per_cpu(processed_system_time, cpu) =
998 per_cpu(shadow_time, 0).system_timestamp;
999 init_missing_ticks_accounting(cpu);
1000 } while (read_seqretry(&xtime_lock, seq));
1002 sprintf(timer_name[cpu], "timer%d", cpu);
1003 per_cpu(timer_irq, cpu) =
1004 bind_virq_to_irqhandler(
1011 BUG_ON(per_cpu(timer_irq, cpu) < 0);
1014 void local_teardown_timer(unsigned int cpu)
1017 unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL);
1022 * /proc/sys/xen: This really belongs in another file. It can stay here for
1025 static ctl_table xen_subtable[] = {
1028 .procname = "independent_wallclock",
1029 .data = &independent_wallclock,
1030 .maxlen = sizeof(independent_wallclock),
1032 .proc_handler = proc_dointvec
1036 .procname = "permitted_clock_jitter",
1037 .data = &permitted_clock_jitter,
1038 .maxlen = sizeof(permitted_clock_jitter),
1040 .proc_handler = proc_doulongvec_minmax
1044 static ctl_table xen_table[] = {
1049 .child = xen_subtable},
1052 static int __init xen_sysctl_init(void)
1054 (void)register_sysctl_table(xen_table, 0);
1057 __initcall(xen_sysctl_init);