2 * linux/arch/i386/kernel/time.c
4 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
6 * This file contains the PC-specific time handling details:
7 * reading the RTC at bootup, etc..
8 * 1994-07-02 Alan Modra
9 * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
10 * 1995-03-26 Markus Kuhn
11 * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
12 * precision CMOS clock update
13 * 1996-05-03 Ingo Molnar
14 * fixed time warps in do_[slow|fast]_gettimeoffset()
15 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
16 * "A Kernel Model for Precision Timekeeping" by Dave Mills
17 * 1998-09-05 (Various)
18 * More robust do_fast_gettimeoffset() algorithm implemented
19 * (works with APM, Cyrix 6x86MX and Centaur C6),
20 * monotonic gettimeofday() with fast_get_timeoffset(),
21 * drift-proof precision TSC calibration on boot
22 * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
23 * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
24 * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
25 * 1998-12-16 Andrea Arcangeli
26 * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
27 * because was not accounting lost_ticks.
28 * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
29 * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
30 * serialize accesses to xtime/lost_ticks).
33 #include <linux/errno.h>
34 #include <linux/sched.h>
35 #include <linux/kernel.h>
36 #include <linux/param.h>
37 #include <linux/string.h>
39 #include <linux/interrupt.h>
40 #include <linux/time.h>
41 #include <linux/delay.h>
42 #include <linux/init.h>
43 #include <linux/smp.h>
44 #include <linux/module.h>
45 #include <linux/sysdev.h>
46 #include <linux/bcd.h>
47 #include <linux/efi.h>
48 #include <linux/mca.h>
49 #include <linux/sysctl.h>
50 #include <linux/percpu.h>
51 #include <linux/kernel_stat.h>
52 #include <linux/posix-timers.h>
58 #include <asm/delay.h>
59 #include <asm/mpspec.h>
60 #include <asm/uaccess.h>
61 #include <asm/processor.h>
62 #include <asm/timer.h>
63 #include <asm/sections.h>
65 #include "mach_time.h"
67 #include <linux/timex.h>
68 #include <linux/config.h>
72 #include <asm/arch_hooks.h>
74 #include <xen/evtchn.h>
75 #include <xen/interface/vcpu.h>
77 #if defined (__i386__)
78 #include <asm/i8259.h>
81 int pit_latch_buggy; /* extern */
83 #if defined(__x86_64__)
84 unsigned long vxtime_hz = PIT_TICK_RATE;
85 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
86 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
87 unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
88 struct timespec __xtime __section_xtime;
89 struct timezone __sys_tz __section_sys_tz;
92 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
93 EXPORT_SYMBOL(cpu_khz);
95 extern unsigned long wall_jiffies;
97 DEFINE_SPINLOCK(rtc_lock);
98 EXPORT_SYMBOL(rtc_lock);
100 #if defined (__i386__)
101 #include <asm/i8253.h>
104 DEFINE_SPINLOCK(i8253_lock);
105 EXPORT_SYMBOL(i8253_lock);
107 extern struct init_timer_opts timer_tsc_init;
108 extern struct timer_opts timer_tsc;
109 #define timer_none timer_tsc
110 struct timer_opts *cur_timer __read_mostly = &timer_tsc;
112 /* These are peridically updated in shared_info, and then copied here. */
113 struct shadow_time_info {
114 u64 tsc_timestamp; /* TSC at last update of time vals. */
115 u64 system_timestamp; /* Time, in nanosecs, since boot. */
121 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
122 static struct timespec shadow_tv;
123 static u32 shadow_tv_version;
125 /* Keep track of last time we did processing/updating of jiffies and xtime. */
126 static u64 processed_system_time; /* System time (ns) at last processing. */
127 static DEFINE_PER_CPU(u64, processed_system_time);
129 /* How much CPU time was spent blocked and how much was 'stolen'? */
130 static DEFINE_PER_CPU(u64, processed_stolen_time);
131 static DEFINE_PER_CPU(u64, processed_blocked_time);
133 /* Current runstate of each CPU (updated automatically by the hypervisor). */
134 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
136 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
137 #define NS_PER_TICK (1000000000LL/HZ)
139 static inline void __normalize_time(time_t *sec, s64 *nsec)
141 while (*nsec >= NSEC_PER_SEC) {
142 (*nsec) -= NSEC_PER_SEC;
146 (*nsec) += NSEC_PER_SEC;
151 /* Does this guest OS track Xen time, or set its wall clock independently? */
152 static int independent_wallclock = 0;
153 static int __init __independent_wallclock(char *str)
155 independent_wallclock = 1;
158 __setup("independent_wallclock", __independent_wallclock);
160 /* Permitted clock jitter, in nsecs, beyond which a warning will be printed. */
161 static unsigned long permitted_clock_jitter = 10000000UL; /* 10ms */
162 static int __init __permitted_clock_jitter(char *str)
164 permitted_clock_jitter = simple_strtoul(str, NULL, 0);
167 __setup("permitted_clock_jitter=", __permitted_clock_jitter);
169 int tsc_disable __devinitdata = 0;
171 static void delay_tsc(unsigned long loops)
173 unsigned long bclock, now;
179 } while ((now - bclock) < loops);
182 struct timer_opts timer_tsc = {
188 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
189 * yielding a 64-bit result.
191 static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
212 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
213 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
216 "mul %%rdx ; shrd $32,%%rdx,%%rax"
217 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
223 #if defined (__i386__)
224 int read_current_timer(unsigned long *timer_val)
231 void init_cpu_khz(void)
233 u64 __cpu_khz = 1000000ULL << 32;
234 struct vcpu_time_info *info;
235 info = &HYPERVISOR_shared_info->vcpu_info[0].time;
236 do_div(__cpu_khz, info->tsc_to_system_mul);
237 if (info->tsc_shift < 0)
238 cpu_khz = __cpu_khz << -info->tsc_shift;
240 cpu_khz = __cpu_khz >> info->tsc_shift;
243 static u64 get_nsec_offset(struct shadow_time_info *shadow)
247 delta = now - shadow->tsc_timestamp;
248 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
251 static unsigned long get_usec_offset(struct shadow_time_info *shadow)
255 delta = now - shadow->tsc_timestamp;
256 return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
259 static void __update_wallclock(time_t sec, long nsec)
261 long wtm_nsec, xtime_nsec;
262 time_t wtm_sec, xtime_sec;
265 /* Adjust wall-clock time base based on wall_jiffies ticks. */
266 wc_nsec = processed_system_time;
267 wc_nsec += sec * (u64)NSEC_PER_SEC;
269 wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
271 /* Split wallclock base into seconds and nanoseconds. */
273 xtime_nsec = do_div(tmp, 1000000000);
274 xtime_sec = (time_t)tmp;
276 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
277 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
279 set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
280 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
285 static void update_wallclock(void)
287 shared_info_t *s = HYPERVISOR_shared_info;
290 shadow_tv_version = s->wc_version;
292 shadow_tv.tv_sec = s->wc_sec;
293 shadow_tv.tv_nsec = s->wc_nsec;
295 } while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
297 if (!independent_wallclock)
298 __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
302 * Reads a consistent set of time-base values from Xen, into a shadow data
305 static void get_time_values_from_xen(void)
307 shared_info_t *s = HYPERVISOR_shared_info;
308 struct vcpu_time_info *src;
309 struct shadow_time_info *dst;
311 src = &s->vcpu_info[smp_processor_id()].time;
312 dst = &per_cpu(shadow_time, smp_processor_id());
315 dst->version = src->version;
317 dst->tsc_timestamp = src->tsc_timestamp;
318 dst->system_timestamp = src->system_time;
319 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
320 dst->tsc_shift = src->tsc_shift;
322 } while ((src->version & 1) | (dst->version ^ src->version));
324 dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
327 static inline int time_values_up_to_date(int cpu)
329 struct vcpu_time_info *src;
330 struct shadow_time_info *dst;
332 src = &HYPERVISOR_shared_info->vcpu_info[cpu].time;
333 dst = &per_cpu(shadow_time, cpu);
336 return (dst->version == src->version);
340 * This is a special lock that is owned by the CPU and holds the index
341 * register we are working with. It is required for NMI access to the
342 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
344 volatile unsigned long cmos_lock = 0;
345 EXPORT_SYMBOL(cmos_lock);
347 /* Routines for accessing the CMOS RAM/RTC. */
348 unsigned char rtc_cmos_read(unsigned char addr)
351 lock_cmos_prefix(addr);
352 outb_p(addr, RTC_PORT(0));
353 val = inb_p(RTC_PORT(1));
354 lock_cmos_suffix(addr);
357 EXPORT_SYMBOL(rtc_cmos_read);
359 void rtc_cmos_write(unsigned char val, unsigned char addr)
361 lock_cmos_prefix(addr);
362 outb_p(addr, RTC_PORT(0));
363 outb_p(val, RTC_PORT(1));
364 lock_cmos_suffix(addr);
366 EXPORT_SYMBOL(rtc_cmos_write);
369 * This version of gettimeofday has microsecond resolution
370 * and better than microsecond precision on fast x86 machines with TSC.
372 void do_gettimeofday(struct timeval *tv)
375 unsigned long usec, sec;
376 unsigned long max_ntp_tick;
379 struct shadow_time_info *shadow;
380 u32 local_time_version;
383 shadow = &per_cpu(shadow_time, cpu);
388 local_time_version = shadow->version;
389 seq = read_seqbegin(&xtime_lock);
391 usec = get_usec_offset(shadow);
392 lost = jiffies - wall_jiffies;
395 * If time_adjust is negative then NTP is slowing the clock
396 * so make sure not to go into next possible interval.
397 * Better to lose some accuracy than have time go backwards..
399 if (unlikely(time_adjust < 0)) {
400 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
401 usec = min(usec, max_ntp_tick);
404 usec += lost * max_ntp_tick;
406 else if (unlikely(lost))
407 usec += lost * (USEC_PER_SEC / HZ);
410 usec += (xtime.tv_nsec / NSEC_PER_USEC);
412 nsec = shadow->system_timestamp - processed_system_time;
413 __normalize_time(&sec, &nsec);
414 usec += (long)nsec / NSEC_PER_USEC;
416 if (unlikely(!time_values_up_to_date(cpu))) {
418 * We may have blocked for a long time,
419 * rendering our calculations invalid
420 * (e.g. the time delta may have
421 * overflowed). Detect that and recalculate
424 get_time_values_from_xen();
427 } while (read_seqretry(&xtime_lock, seq) ||
428 (local_time_version != shadow->version));
432 while (usec >= USEC_PER_SEC) {
433 usec -= USEC_PER_SEC;
441 EXPORT_SYMBOL(do_gettimeofday);
443 int do_settimeofday(struct timespec *tv)
448 struct shadow_time_info *shadow;
451 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
455 shadow = &per_cpu(shadow_time, cpu);
457 write_seqlock_irq(&xtime_lock);
460 * Ensure we don't get blocked for a long time so that our time delta
461 * overflows. If that were to happen then our shadow time values would
462 * be stale, so we can retry with fresh ones.
465 nsec = tv->tv_nsec - get_nsec_offset(shadow);
466 if (time_values_up_to_date(cpu))
468 get_time_values_from_xen();
471 __normalize_time(&sec, &nsec);
473 if ((xen_start_info->flags & SIF_INITDOMAIN) &&
474 !independent_wallclock) {
475 op.cmd = DOM0_SETTIME;
476 op.u.settime.secs = sec;
477 op.u.settime.nsecs = nsec;
478 op.u.settime.system_time = shadow->system_timestamp;
479 HYPERVISOR_dom0_op(&op);
481 } else if (independent_wallclock) {
482 nsec -= shadow->system_timestamp;
483 __normalize_time(&sec, &nsec);
484 __update_wallclock(sec, nsec);
487 write_sequnlock_irq(&xtime_lock);
495 EXPORT_SYMBOL(do_settimeofday);
497 static void sync_xen_wallclock(unsigned long dummy);
498 static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
499 static void sync_xen_wallclock(unsigned long dummy)
505 if (!ntp_synced() || independent_wallclock ||
506 !(xen_start_info->flags & SIF_INITDOMAIN))
509 write_seqlock_irq(&xtime_lock);
512 nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
513 __normalize_time(&sec, &nsec);
515 op.cmd = DOM0_SETTIME;
516 op.u.settime.secs = sec;
517 op.u.settime.nsecs = nsec;
518 op.u.settime.system_time = processed_system_time;
519 HYPERVISOR_dom0_op(&op);
523 write_sequnlock_irq(&xtime_lock);
525 /* Once per minute. */
526 mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ);
529 static int set_rtc_mmss(unsigned long nowtime)
533 WARN_ON(irqs_disabled());
535 if (independent_wallclock || !(xen_start_info->flags & SIF_INITDOMAIN))
538 /* gets recalled with irq locally disabled */
539 spin_lock_irq(&rtc_lock);
541 retval = efi_set_rtc_mmss(nowtime);
543 retval = mach_set_rtc_mmss(nowtime);
544 spin_unlock_irq(&rtc_lock);
549 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
550 * Note: This function is required to return accurate
551 * time even in the absence of multiple timer ticks.
553 unsigned long long monotonic_clock(void)
556 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
558 u32 local_time_version;
561 local_time_version = shadow->version;
563 time = shadow->system_timestamp + get_nsec_offset(shadow);
564 if (!time_values_up_to_date(cpu))
565 get_time_values_from_xen();
567 } while (local_time_version != shadow->version);
573 EXPORT_SYMBOL(monotonic_clock);
575 unsigned long long sched_clock(void)
577 return monotonic_clock();
580 #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
581 unsigned long profile_pc(struct pt_regs *regs)
583 unsigned long pc = instruction_pointer(regs);
586 /* Assume the lock function has either no stack frame or only a single word.
587 This checks if the address on the stack looks like a kernel text address.
588 There is a small window for false hits, but in that case the tick
589 is just accounted to the spinlock function.
590 Better would be to write these functions in assembler again
591 and check exactly. */
592 if (in_lock_functions(pc)) {
593 char *v = *(char **)regs->rsp;
594 if ((v >= _stext && v <= _etext) ||
595 (v >= _sinittext && v <= _einittext) ||
596 (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
597 return (unsigned long)v;
598 return ((unsigned long *)regs->rsp)[1];
601 if (in_lock_functions(pc))
602 return *(unsigned long *)(regs->ebp + 4);
607 EXPORT_SYMBOL(profile_pc);
610 irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
612 s64 delta, delta_cpu, stolen, blocked;
614 int i, cpu = smp_processor_id();
615 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
616 struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
618 write_seqlock(&xtime_lock);
621 get_time_values_from_xen();
623 /* Obtain a consistent snapshot of elapsed wallclock cycles. */
625 shadow->system_timestamp + get_nsec_offset(shadow);
626 delta -= processed_system_time;
627 delta_cpu -= per_cpu(processed_system_time, cpu);
630 * Obtain a consistent snapshot of stolen/blocked cycles. We
631 * can use state_entry_time to detect if we get preempted here.
634 sched_time = runstate->state_entry_time;
636 stolen = runstate->time[RUNSTATE_runnable] +
637 runstate->time[RUNSTATE_offline] -
638 per_cpu(processed_stolen_time, cpu);
639 blocked = runstate->time[RUNSTATE_blocked] -
640 per_cpu(processed_blocked_time, cpu);
642 } while (sched_time != runstate->state_entry_time);
643 } while (!time_values_up_to_date(cpu));
645 if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
646 unlikely(delta_cpu < -(s64)permitted_clock_jitter))
647 && printk_ratelimit()) {
648 printk("Timer ISR/%d: Time went backwards: "
649 "delta=%lld delta_cpu=%lld shadow=%lld "
650 "off=%lld processed=%lld cpu_processed=%lld\n",
651 cpu, delta, delta_cpu, shadow->system_timestamp,
652 (s64)get_nsec_offset(shadow),
653 processed_system_time,
654 per_cpu(processed_system_time, cpu));
655 for (i = 0; i < num_online_cpus(); i++)
656 printk(" %d: %lld\n", i,
657 per_cpu(processed_system_time, i));
660 /* System-wide jiffy work. */
661 while (delta >= NS_PER_TICK) {
662 delta -= NS_PER_TICK;
663 processed_system_time += NS_PER_TICK;
667 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
672 write_sequnlock(&xtime_lock);
675 * Account stolen ticks.
676 * HACK: Passing NULL to account_steal_time()
677 * ensures that the ticks are accounted as stolen.
679 if ((stolen > 0) && (delta_cpu > 0)) {
681 if (unlikely(delta_cpu < 0))
682 stolen += delta_cpu; /* clamp local-time progress */
683 do_div(stolen, NS_PER_TICK);
684 per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
685 per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK;
686 account_steal_time(NULL, (cputime_t)stolen);
690 * Account blocked ticks.
691 * HACK: Passing idle_task to account_steal_time()
692 * ensures that the ticks are accounted as idle/wait.
694 if ((blocked > 0) && (delta_cpu > 0)) {
695 delta_cpu -= blocked;
696 if (unlikely(delta_cpu < 0))
697 blocked += delta_cpu; /* clamp local-time progress */
698 do_div(blocked, NS_PER_TICK);
699 per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
700 per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK;
701 account_steal_time(idle_task(cpu), (cputime_t)blocked);
704 /* Account user/system ticks. */
706 do_div(delta_cpu, NS_PER_TICK);
707 per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
709 account_user_time(current, (cputime_t)delta_cpu);
711 account_system_time(current, HARDIRQ_OFFSET,
712 (cputime_t)delta_cpu);
715 /* Local timer processing (see update_process_times()). */
717 if (rcu_pending(cpu))
718 rcu_check_callbacks(cpu, user_mode(regs));
720 run_posix_cpu_timers(current);
725 static void init_missing_ticks_accounting(int cpu)
727 struct vcpu_register_runstate_memory_area area;
728 struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
730 memset(runstate, 0, sizeof(*runstate));
732 area.addr.v = runstate;
733 HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
735 per_cpu(processed_blocked_time, cpu) =
736 runstate->time[RUNSTATE_blocked];
737 per_cpu(processed_stolen_time, cpu) =
738 runstate->time[RUNSTATE_runnable] +
739 runstate->time[RUNSTATE_offline];
742 /* not static: needed by APM */
743 unsigned long get_cmos_time(void)
745 unsigned long retval;
747 spin_lock(&rtc_lock);
750 retval = efi_get_time();
752 retval = mach_get_cmos_time();
754 spin_unlock(&rtc_lock);
758 EXPORT_SYMBOL(get_cmos_time);
760 static void sync_cmos_clock(unsigned long dummy);
762 static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
764 static void sync_cmos_clock(unsigned long dummy)
766 struct timeval now, next;
770 * If we have an externally synchronized Linux clock, then update
771 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
772 * called as close as possible to 500 ms before the new second starts.
773 * This code is run on a timer. If the clock is set, that timer
774 * may not expire at the correct time. Thus, we adjust...
778 * Not synced, exit, do not restart a timer (if one is
779 * running, let it run out).
783 do_gettimeofday(&now);
784 if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
785 now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
786 fail = set_rtc_mmss(now.tv_sec);
788 next.tv_usec = USEC_AFTER - now.tv_usec;
789 if (next.tv_usec <= 0)
790 next.tv_usec += USEC_PER_SEC;
797 if (next.tv_usec >= USEC_PER_SEC) {
799 next.tv_usec -= USEC_PER_SEC;
801 mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
804 void notify_arch_cmos_timer(void)
806 mod_timer(&sync_cmos_timer, jiffies + 1);
807 mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
810 static long clock_cmos_diff, sleep_start;
812 static struct timer_opts *last_timer;
813 static int timer_suspend(struct sys_device *dev, pm_message_t state)
816 * Estimate time zone so that set_time can update the clock
818 clock_cmos_diff = -get_cmos_time();
819 clock_cmos_diff += get_seconds();
820 sleep_start = get_cmos_time();
821 last_timer = cur_timer;
822 cur_timer = &timer_none;
823 if (last_timer->suspend)
824 last_timer->suspend(state);
828 static int timer_resume(struct sys_device *dev)
832 unsigned long sleep_length;
834 #ifdef CONFIG_HPET_TIMER
835 if (is_hpet_enabled())
838 sec = get_cmos_time() + clock_cmos_diff;
839 sleep_length = (get_cmos_time() - sleep_start) * HZ;
840 write_seqlock_irqsave(&xtime_lock, flags);
843 jiffies_64 += sleep_length;
844 wall_jiffies += sleep_length;
845 write_sequnlock_irqrestore(&xtime_lock, flags);
846 if (last_timer->resume)
847 last_timer->resume();
848 cur_timer = last_timer;
850 touch_softlockup_watchdog();
854 static struct sysdev_class timer_sysclass = {
855 .resume = timer_resume,
856 .suspend = timer_suspend,
857 set_kset_name("timer"),
861 /* XXX this driverfs stuff should probably go elsewhere later -john */
862 static struct sys_device device_timer = {
864 .cls = &timer_sysclass,
867 static int time_init_device(void)
869 int error = sysdev_class_register(&timer_sysclass);
871 error = sysdev_register(&device_timer);
875 device_initcall(time_init_device);
877 #ifdef CONFIG_HPET_TIMER
878 extern void (*late_time_init)(void);
879 /* Duplicate of time_init() below, with hpet_enable part added */
880 static void __init hpet_time_init(void)
882 xtime.tv_sec = get_cmos_time();
883 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
884 set_normalized_timespec(&wall_to_monotonic,
885 -xtime.tv_sec, -xtime.tv_nsec);
887 if ((hpet_enable() >= 0) && hpet_use_timer) {
888 printk("Using HPET for base-timer\n");
891 cur_timer = select_timer();
892 printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
898 /* Dynamically-mapped IRQ. */
899 DEFINE_PER_CPU(int, timer_irq);
901 extern void (*late_time_init)(void);
902 static void setup_cpu0_timer_irq(void)
904 per_cpu(timer_irq, 0) =
905 bind_virq_to_irqhandler(
912 BUG_ON(per_cpu(timer_irq, 0) < 0);
915 void __init time_init(void)
917 #ifdef CONFIG_HPET_TIMER
918 if (is_hpet_capable()) {
920 * HPET initialization needs to do memory-mapped io. So, let
921 * us do a late initialization after mem_init().
923 late_time_init = hpet_time_init;
927 get_time_values_from_xen();
929 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
930 per_cpu(processed_system_time, 0) = processed_system_time;
931 init_missing_ticks_accounting(0);
936 printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
937 cpu_khz / 1000, cpu_khz % 1000);
939 #if defined(__x86_64__)
940 vxtime.mode = VXTIME_TSC;
941 vxtime.quot = (1000000L << 32) / vxtime_hz;
942 vxtime.tsc_quot = (1000L << 32) / cpu_khz;
944 rdtscll(vxtime.last_tsc);
947 /* Cannot request_irq() until kmem is initialised. */
948 late_time_init = setup_cpu0_timer_irq;
951 /* Convert jiffies to system time. */
952 u64 jiffies_to_st(unsigned long j)
959 seq = read_seqbegin(&xtime_lock);
962 /* Triggers in some wrap-around cases, but that's okay:
963 * we just end up with a shorter timeout. */
964 st = processed_system_time + NS_PER_TICK;
965 } else if (((unsigned long)delta >> (BITS_PER_LONG-3)) != 0) {
966 /* Very long timeout means there is no pending timer.
967 * We indicate this to Xen by passing zero timeout. */
970 st = processed_system_time + delta * (u64)NS_PER_TICK;
972 } while (read_seqretry(&xtime_lock, seq));
976 EXPORT_SYMBOL(jiffies_to_st);
979 * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
980 * These functions are based on implementations from arch/s390/kernel/time.c
982 static void stop_hz_timer(void)
984 unsigned int cpu = smp_processor_id();
987 cpu_set(cpu, nohz_cpu_mask);
989 /* See matching smp_mb in rcu_start_batch in rcupdate.c. These mbs */
990 /* ensure that if __rcu_pending (nested in rcu_needs_cpu) fetches a */
991 /* value of rcp->cur that matches rdp->quiescbatch and allows us to */
992 /* stop the hz timer then the cpumasks created for subsequent values */
993 /* of cur in rcu_start_batch are guaranteed to pick up the updated */
994 /* nohz_cpu_mask and so will not depend on this cpu. */
998 /* Leave ourselves in tick mode if rcu or softirq or timer pending. */
999 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
1000 (j = next_timer_interrupt(), time_before_eq(j, jiffies))) {
1001 cpu_clear(cpu, nohz_cpu_mask);
1005 if (HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0)
1009 static void start_hz_timer(void)
1011 cpu_clear(smp_processor_id(), nohz_cpu_mask);
1014 void safe_halt(void)
1017 /* Blocking includes an implicit local_irq_enable(). */
1021 EXPORT_SYMBOL(safe_halt);
1025 if (irqs_disabled())
1026 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
1028 EXPORT_SYMBOL(halt);
1030 /* No locking required. We are only CPU running, and interrupts are off. */
1031 void time_resume(void)
1035 get_time_values_from_xen();
1037 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
1038 per_cpu(processed_system_time, 0) = processed_system_time;
1039 init_missing_ticks_accounting(0);
1045 static char timer_name[NR_CPUS][15];
1047 void local_setup_timer(unsigned int cpu)
1054 seq = read_seqbegin(&xtime_lock);
1055 /* Use cpu0 timestamp: cpu's shadow is not initialised yet. */
1056 per_cpu(processed_system_time, cpu) =
1057 per_cpu(shadow_time, 0).system_timestamp;
1058 init_missing_ticks_accounting(cpu);
1059 } while (read_seqretry(&xtime_lock, seq));
1061 sprintf(timer_name[cpu], "timer%d", cpu);
1062 per_cpu(timer_irq, cpu) =
1063 bind_virq_to_irqhandler(
1070 BUG_ON(per_cpu(timer_irq, cpu) < 0);
1073 void local_teardown_timer(unsigned int cpu)
1076 unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL);
1081 * /proc/sys/xen: This really belongs in another file. It can stay here for
1084 static ctl_table xen_subtable[] = {
1087 .procname = "independent_wallclock",
1088 .data = &independent_wallclock,
1089 .maxlen = sizeof(independent_wallclock),
1091 .proc_handler = proc_dointvec
1095 .procname = "permitted_clock_jitter",
1096 .data = &permitted_clock_jitter,
1097 .maxlen = sizeof(permitted_clock_jitter),
1099 .proc_handler = proc_doulongvec_minmax
1103 static ctl_table xen_table[] = {
1108 .child = xen_subtable},
1111 static int __init xen_sysctl_init(void)
1113 (void)register_sysctl_table(xen_table, 0);
1116 __initcall(xen_sysctl_init);