2 * linux/arch/i386/kernel/time.c
4 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
6 * This file contains the PC-specific time handling details:
7 * reading the RTC at bootup, etc..
8 * 1994-07-02 Alan Modra
9 * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
10 * 1995-03-26 Markus Kuhn
11 * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
12 * precision CMOS clock update
13 * 1996-05-03 Ingo Molnar
14 * fixed time warps in do_[slow|fast]_gettimeoffset()
15 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
16 * "A Kernel Model for Precision Timekeeping" by Dave Mills
17 * 1998-09-05 (Various)
18 * More robust do_fast_gettimeoffset() algorithm implemented
19 * (works with APM, Cyrix 6x86MX and Centaur C6),
20 * monotonic gettimeofday() with fast_get_timeoffset(),
21 * drift-proof precision TSC calibration on boot
22 * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
23 * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
24 * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
25 * 1998-12-16 Andrea Arcangeli
26 * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
27 * because was not accounting lost_ticks.
28 * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
29 * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
30 * serialize accesses to xtime/lost_ticks).
33 #include <linux/errno.h>
34 #include <linux/sched.h>
35 #include <linux/kernel.h>
36 #include <linux/param.h>
37 #include <linux/string.h>
39 #include <linux/interrupt.h>
40 #include <linux/time.h>
41 #include <linux/delay.h>
42 #include <linux/init.h>
43 #include <linux/smp.h>
44 #include <linux/module.h>
45 #include <linux/sysdev.h>
46 #include <linux/bcd.h>
47 #include <linux/efi.h>
48 #include <linux/mca.h>
49 #include <linux/sysctl.h>
50 #include <linux/percpu.h>
51 #include <linux/kernel_stat.h>
52 #include <linux/posix-timers.h>
58 #include <asm/delay.h>
59 #include <asm/mpspec.h>
60 #include <asm/uaccess.h>
61 #include <asm/processor.h>
62 #include <asm/timer.h>
63 #include <asm/sections.h>
65 #include "mach_time.h"
67 #include <linux/timex.h>
71 #include <asm/arch_hooks.h>
73 #include <xen/evtchn.h>
74 #include <xen/interface/vcpu.h>
76 #if defined (__i386__)
77 #include <asm/i8259.h>
80 int pit_latch_buggy; /* extern */
82 #if defined(__x86_64__)
83 unsigned long vxtime_hz = PIT_TICK_RATE;
84 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
85 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
86 unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
87 struct timespec __xtime __section_xtime;
88 struct timezone __sys_tz __section_sys_tz;
91 #define USEC_PER_TICK (USEC_PER_SEC / HZ)
92 #define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
93 #define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
95 #define NS_SCALE 10 /* 2^10, carefully chosen */
96 #define US_SCALE 32 /* 2^32, arbitralrily chosen */
98 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
99 EXPORT_SYMBOL(cpu_khz);
101 extern unsigned long wall_jiffies;
103 DEFINE_SPINLOCK(rtc_lock);
104 EXPORT_SYMBOL(rtc_lock);
106 extern struct init_timer_opts timer_tsc_init;
107 extern struct timer_opts timer_tsc;
108 #define timer_none timer_tsc
110 /* These are peridically updated in shared_info, and then copied here. */
111 struct shadow_time_info {
112 u64 tsc_timestamp; /* TSC at last update of time vals. */
113 u64 system_timestamp; /* Time, in nanosecs, since boot. */
119 static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
120 static struct timespec shadow_tv;
121 static u32 shadow_tv_version;
123 /* Keep track of last time we did processing/updating of jiffies and xtime. */
124 static u64 processed_system_time; /* System time (ns) at last processing. */
125 static DEFINE_PER_CPU(u64, processed_system_time);
127 /* How much CPU time was spent blocked and how much was 'stolen'? */
128 static DEFINE_PER_CPU(u64, processed_stolen_time);
129 static DEFINE_PER_CPU(u64, processed_blocked_time);
131 /* Current runstate of each CPU (updated automatically by the hypervisor). */
132 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
134 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
135 #define NS_PER_TICK (1000000000LL/HZ)
137 static inline void __normalize_time(time_t *sec, s64 *nsec)
139 while (*nsec >= NSEC_PER_SEC) {
140 (*nsec) -= NSEC_PER_SEC;
144 (*nsec) += NSEC_PER_SEC;
149 /* Does this guest OS track Xen time, or set its wall clock independently? */
150 static int independent_wallclock = 0;
151 static int __init __independent_wallclock(char *str)
153 independent_wallclock = 1;
156 __setup("independent_wallclock", __independent_wallclock);
158 /* Permitted clock jitter, in nsecs, beyond which a warning will be printed. */
159 static unsigned long permitted_clock_jitter = 10000000UL; /* 10ms */
160 static int __init __permitted_clock_jitter(char *str)
162 permitted_clock_jitter = simple_strtoul(str, NULL, 0);
165 __setup("permitted_clock_jitter=", __permitted_clock_jitter);
168 int tsc_disable __devinitdata = 0;
171 static void delay_tsc(unsigned long loops)
173 unsigned long bclock, now;
179 } while ((now - bclock) < loops);
182 struct timer_opts timer_tsc = {
188 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
189 * yielding a 64-bit result.
191 static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
212 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
213 : "a" ((u32)delta), "1" ((u32)(delta >> US_SCALE)), "2" (mul_frac) );
216 "mul %%rdx ; shrd $32,%%rdx,%%rax"
217 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
223 #if defined (__i386__)
224 int read_current_timer(unsigned long *timer_val)
231 void init_cpu_khz(void)
233 u64 __cpu_khz = 1000000ULL << US_SCALE;
234 struct vcpu_time_info *info;
235 info = &HYPERVISOR_shared_info->vcpu_info[0].time;
236 do_div(__cpu_khz, info->tsc_to_system_mul);
237 if (info->tsc_shift < 0)
238 cpu_khz = __cpu_khz << -info->tsc_shift;
240 cpu_khz = __cpu_khz >> info->tsc_shift;
243 static u64 get_nsec_offset(struct shadow_time_info *shadow)
247 delta = now - shadow->tsc_timestamp;
248 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
251 static unsigned long get_usec_offset(struct shadow_time_info *shadow)
255 delta = now - shadow->tsc_timestamp;
256 return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
259 static void __update_wallclock(time_t sec, long nsec)
261 long wtm_nsec, xtime_nsec;
262 time_t wtm_sec, xtime_sec;
265 /* Adjust wall-clock time base based on wall_jiffies ticks. */
266 wc_nsec = processed_system_time;
267 wc_nsec += sec * (u64)NSEC_PER_SEC;
269 wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
271 /* Split wallclock base into seconds and nanoseconds. */
273 xtime_nsec = do_div(tmp, 1000000000);
274 xtime_sec = (time_t)tmp;
276 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
277 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
279 set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
280 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
285 static void update_wallclock(void)
287 shared_info_t *s = HYPERVISOR_shared_info;
290 shadow_tv_version = s->wc_version;
292 shadow_tv.tv_sec = s->wc_sec;
293 shadow_tv.tv_nsec = s->wc_nsec;
295 } while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
297 if (!independent_wallclock)
298 __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec);
302 * Reads a consistent set of time-base values from Xen, into a shadow data
305 static void get_time_values_from_xen(void)
307 shared_info_t *s = HYPERVISOR_shared_info;
308 struct vcpu_time_info *src;
309 struct shadow_time_info *dst;
311 src = &s->vcpu_info[smp_processor_id()].time;
312 dst = &per_cpu(shadow_time, smp_processor_id());
315 dst->version = src->version;
317 dst->tsc_timestamp = src->tsc_timestamp;
318 dst->system_timestamp = src->system_time;
319 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
320 dst->tsc_shift = src->tsc_shift;
322 } while ((src->version & 1) | (dst->version ^ src->version));
324 dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
327 static inline int time_values_up_to_date(int cpu)
329 struct vcpu_time_info *src;
330 struct shadow_time_info *dst;
332 src = &HYPERVISOR_shared_info->vcpu_info[cpu].time;
333 dst = &per_cpu(shadow_time, cpu);
336 return (dst->version == src->version);
340 * This is a special lock that is owned by the CPU and holds the index
341 * register we are working with. It is required for NMI access to the
342 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
344 volatile unsigned long cmos_lock = 0;
345 EXPORT_SYMBOL(cmos_lock);
347 /* Routines for accessing the CMOS RAM/RTC. */
348 unsigned char rtc_cmos_read(unsigned char addr)
351 lock_cmos_prefix(addr);
352 outb_p(addr, RTC_PORT(0));
353 val = inb_p(RTC_PORT(1));
354 lock_cmos_suffix(addr);
357 EXPORT_SYMBOL(rtc_cmos_read);
359 void rtc_cmos_write(unsigned char val, unsigned char addr)
361 lock_cmos_prefix(addr);
362 outb_p(addr, RTC_PORT(0));
363 outb_p(val, RTC_PORT(1));
364 lock_cmos_suffix(addr);
366 EXPORT_SYMBOL(rtc_cmos_write);
369 * This version of gettimeofday has microsecond resolution
370 * and better than microsecond precision on fast x86 machines with TSC.
372 void do_gettimeofday(struct timeval *tv)
375 unsigned long usec, sec;
376 unsigned long max_ntp_tick;
379 struct shadow_time_info *shadow;
380 u32 local_time_version;
383 shadow = &per_cpu(shadow_time, cpu);
388 local_time_version = shadow->version;
389 seq = read_seqbegin(&xtime_lock);
391 usec = get_usec_offset(shadow);
392 lost = jiffies - wall_jiffies;
395 * If time_adjust is negative then NTP is slowing the clock
396 * so make sure not to go into next possible interval.
397 * Better to lose some accuracy than have time go backwards..
399 if (unlikely(time_adjust < 0)) {
400 max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
401 usec = min(usec, max_ntp_tick);
404 usec += lost * max_ntp_tick;
406 else if (unlikely(lost))
407 usec += lost * (USEC_PER_SEC / HZ);
410 usec += (xtime.tv_nsec / NSEC_PER_USEC);
412 nsec = shadow->system_timestamp - processed_system_time;
413 __normalize_time(&sec, &nsec);
414 usec += (long)nsec / NSEC_PER_USEC;
416 if (unlikely(!time_values_up_to_date(cpu))) {
418 * We may have blocked for a long time,
419 * rendering our calculations invalid
420 * (e.g. the time delta may have
421 * overflowed). Detect that and recalculate
424 get_time_values_from_xen();
427 } while (read_seqretry(&xtime_lock, seq) ||
428 (local_time_version != shadow->version));
432 while (usec >= USEC_PER_SEC) {
433 usec -= USEC_PER_SEC;
441 EXPORT_SYMBOL(do_gettimeofday);
443 int do_settimeofday(struct timespec *tv)
448 struct shadow_time_info *shadow;
451 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
455 shadow = &per_cpu(shadow_time, cpu);
457 write_seqlock_irq(&xtime_lock);
460 * Ensure we don't get blocked for a long time so that our time delta
461 * overflows. If that were to happen then our shadow time values would
462 * be stale, so we can retry with fresh ones.
465 nsec = tv->tv_nsec - get_nsec_offset(shadow);
466 if (time_values_up_to_date(cpu))
468 get_time_values_from_xen();
471 __normalize_time(&sec, &nsec);
473 if (is_initial_xendomain() && !independent_wallclock) {
474 op.cmd = DOM0_SETTIME;
475 op.u.settime.secs = sec;
476 op.u.settime.nsecs = nsec;
477 op.u.settime.system_time = shadow->system_timestamp;
478 HYPERVISOR_dom0_op(&op);
480 } else if (independent_wallclock) {
481 nsec -= shadow->system_timestamp;
482 __normalize_time(&sec, &nsec);
483 __update_wallclock(sec, nsec);
486 write_sequnlock_irq(&xtime_lock);
494 EXPORT_SYMBOL(do_settimeofday);
496 static void sync_xen_wallclock(unsigned long dummy);
497 static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
498 static void sync_xen_wallclock(unsigned long dummy)
504 if (!ntp_synced() || independent_wallclock || !is_initial_xendomain())
507 write_seqlock_irq(&xtime_lock);
510 nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
511 __normalize_time(&sec, &nsec);
513 op.cmd = DOM0_SETTIME;
514 op.u.settime.secs = sec;
515 op.u.settime.nsecs = nsec;
516 op.u.settime.system_time = processed_system_time;
517 HYPERVISOR_dom0_op(&op);
521 write_sequnlock_irq(&xtime_lock);
523 /* Once per minute. */
524 mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ);
527 static int set_rtc_mmss(unsigned long nowtime)
532 if (independent_wallclock || !is_initial_xendomain())
535 /* gets recalled with irq locally disabled */
536 spin_lock_irqsave(&rtc_lock, flags);
538 retval = efi_set_rtc_mmss(nowtime);
540 retval = mach_set_rtc_mmss(nowtime);
541 spin_unlock_irqrestore(&rtc_lock, flags);
546 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
547 * Note: This function is required to return accurate
548 * time even in the absence of multiple timer ticks.
550 unsigned long long monotonic_clock(void)
553 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
555 u32 local_time_version;
558 local_time_version = shadow->version;
560 time = shadow->system_timestamp + get_nsec_offset(shadow);
561 if (!time_values_up_to_date(cpu))
562 get_time_values_from_xen();
564 } while (local_time_version != shadow->version);
570 EXPORT_SYMBOL(monotonic_clock);
572 unsigned long long sched_clock(void)
574 return monotonic_clock();
577 #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
578 unsigned long profile_pc(struct pt_regs *regs)
580 unsigned long pc = instruction_pointer(regs);
583 /* Assume the lock function has either no stack frame or only a single word.
584 This checks if the address on the stack looks like a kernel text address.
585 There is a small window for false hits, but in that case the tick
586 is just accounted to the spinlock function.
587 Better would be to write these functions in assembler again
588 and check exactly. */
589 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
590 char *v = *(char **)regs->rsp;
591 if ((v >= _stext && v <= _etext) ||
592 (v >= _sinittext && v <= _einittext) ||
593 (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
594 return (unsigned long)v;
595 return ((unsigned long *)regs->rsp)[1];
598 if (!user_mode_vm(regs) && in_lock_functions(pc))
599 return *(unsigned long *)(regs->ebp + 4);
604 EXPORT_SYMBOL(profile_pc);
607 irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
609 s64 delta, delta_cpu, stolen, blocked;
611 int i, cpu = smp_processor_id();
612 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
613 struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
615 write_seqlock(&xtime_lock);
618 get_time_values_from_xen();
620 /* Obtain a consistent snapshot of elapsed wallclock cycles. */
622 shadow->system_timestamp + get_nsec_offset(shadow);
623 delta -= processed_system_time;
624 delta_cpu -= per_cpu(processed_system_time, cpu);
627 * Obtain a consistent snapshot of stolen/blocked cycles. We
628 * can use state_entry_time to detect if we get preempted here.
631 sched_time = runstate->state_entry_time;
633 stolen = runstate->time[RUNSTATE_runnable] +
634 runstate->time[RUNSTATE_offline] -
635 per_cpu(processed_stolen_time, cpu);
636 blocked = runstate->time[RUNSTATE_blocked] -
637 per_cpu(processed_blocked_time, cpu);
639 } while (sched_time != runstate->state_entry_time);
640 } while (!time_values_up_to_date(cpu));
642 if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
643 unlikely(delta_cpu < -(s64)permitted_clock_jitter))
644 && printk_ratelimit()) {
645 printk("Timer ISR/%d: Time went backwards: "
646 "delta=%lld delta_cpu=%lld shadow=%lld "
647 "off=%lld processed=%lld cpu_processed=%lld\n",
648 cpu, delta, delta_cpu, shadow->system_timestamp,
649 (s64)get_nsec_offset(shadow),
650 processed_system_time,
651 per_cpu(processed_system_time, cpu));
652 for (i = 0; i < num_online_cpus(); i++)
653 printk(" %d: %lld\n", i,
654 per_cpu(processed_system_time, i));
657 /* System-wide jiffy work. */
658 while (delta >= NS_PER_TICK) {
659 delta -= NS_PER_TICK;
660 processed_system_time += NS_PER_TICK;
664 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
669 write_sequnlock(&xtime_lock);
672 * Account stolen ticks.
673 * HACK: Passing NULL to account_steal_time()
674 * ensures that the ticks are accounted as stolen.
676 if ((stolen > 0) && (delta_cpu > 0)) {
678 if (unlikely(delta_cpu < 0))
679 stolen += delta_cpu; /* clamp local-time progress */
680 do_div(stolen, NS_PER_TICK);
681 per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
682 per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK;
683 account_steal_time(NULL, (cputime_t)stolen);
687 * Account blocked ticks.
688 * HACK: Passing idle_task to account_steal_time()
689 * ensures that the ticks are accounted as idle/wait.
691 if ((blocked > 0) && (delta_cpu > 0)) {
692 delta_cpu -= blocked;
693 if (unlikely(delta_cpu < 0))
694 blocked += delta_cpu; /* clamp local-time progress */
695 do_div(blocked, NS_PER_TICK);
696 per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
697 per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK;
698 account_steal_time(idle_task(cpu), (cputime_t)blocked);
701 /* Account user/system ticks. */
703 do_div(delta_cpu, NS_PER_TICK);
704 per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
706 account_user_time(current, (cputime_t)delta_cpu);
708 account_system_time(current, HARDIRQ_OFFSET,
709 (cputime_t)delta_cpu);
712 /* Local timer processing (see update_process_times()). */
714 if (rcu_pending(cpu))
715 rcu_check_callbacks(cpu, user_mode(regs));
717 run_posix_cpu_timers(current);
722 static void init_missing_ticks_accounting(int cpu)
724 struct vcpu_register_runstate_memory_area area;
725 struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
727 memset(runstate, 0, sizeof(*runstate));
729 area.addr.v = runstate;
730 HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
732 per_cpu(processed_blocked_time, cpu) =
733 runstate->time[RUNSTATE_blocked];
734 per_cpu(processed_stolen_time, cpu) =
735 runstate->time[RUNSTATE_runnable] +
736 runstate->time[RUNSTATE_offline];
739 /* not static: needed by APM */
740 unsigned long get_cmos_time(void)
742 unsigned long retval;
745 spin_lock_irqsave(&rtc_lock, flags);
748 retval = efi_get_time();
750 retval = mach_get_cmos_time();
752 spin_unlock_irqrestore(&rtc_lock, flags);
756 EXPORT_SYMBOL(get_cmos_time);
758 static void sync_cmos_clock(unsigned long dummy);
760 static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
762 static void sync_cmos_clock(unsigned long dummy)
764 struct timeval now, next;
768 * If we have an externally synchronized Linux clock, then update
769 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
770 * called as close as possible to 500 ms before the new second starts.
771 * This code is run on a timer. If the clock is set, that timer
772 * may not expire at the correct time. Thus, we adjust...
776 * Not synced, exit, do not restart a timer (if one is
777 * running, let it run out).
781 do_gettimeofday(&now);
782 if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
783 now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
784 fail = set_rtc_mmss(now.tv_sec);
786 next.tv_usec = USEC_AFTER - now.tv_usec;
787 if (next.tv_usec <= 0)
788 next.tv_usec += USEC_PER_SEC;
795 if (next.tv_usec >= USEC_PER_SEC) {
797 next.tv_usec -= USEC_PER_SEC;
799 mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
802 void notify_arch_cmos_timer(void)
804 mod_timer(&sync_cmos_timer, jiffies + 1);
805 mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
808 static long clock_cmos_diff, sleep_start;
810 static int timer_suspend(struct sys_device *dev, pm_message_t state)
813 * Estimate time zone so that set_time can update the clock
815 clock_cmos_diff = -get_cmos_time();
816 clock_cmos_diff += get_seconds();
817 sleep_start = get_cmos_time();
821 static int timer_resume(struct sys_device *dev)
825 unsigned long sleep_length;
827 #ifdef CONFIG_HPET_TIMER
828 if (is_hpet_enabled())
831 sec = get_cmos_time() + clock_cmos_diff;
832 sleep_length = (get_cmos_time() - sleep_start) * HZ;
833 write_seqlock_irqsave(&xtime_lock, flags);
836 jiffies_64 += sleep_length;
837 wall_jiffies += sleep_length;
838 write_sequnlock_irqrestore(&xtime_lock, flags);
839 touch_softlockup_watchdog();
843 static struct sysdev_class timer_sysclass = {
844 .resume = timer_resume,
845 .suspend = timer_suspend,
846 set_kset_name("timer"),
850 /* XXX this driverfs stuff should probably go elsewhere later -john */
851 static struct sys_device device_timer = {
853 .cls = &timer_sysclass,
856 static int time_init_device(void)
858 int error = sysdev_class_register(&timer_sysclass);
860 error = sysdev_register(&device_timer);
864 device_initcall(time_init_device);
866 #ifdef CONFIG_HPET_TIMER
867 extern void (*late_time_init)(void);
868 /* Duplicate of time_init() below, with hpet_enable part added */
869 static void __init hpet_time_init(void)
871 xtime.tv_sec = get_cmos_time();
872 xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
873 set_normalized_timespec(&wall_to_monotonic,
874 -xtime.tv_sec, -xtime.tv_nsec);
876 if ((hpet_enable() >= 0) && hpet_use_timer) {
877 printk("Using HPET for base-timer\n");
883 /* Dynamically-mapped IRQ. */
884 DEFINE_PER_CPU(int, timer_irq);
886 extern void (*late_time_init)(void);
887 static void setup_cpu0_timer_irq(void)
889 per_cpu(timer_irq, 0) =
890 bind_virq_to_irqhandler(
897 BUG_ON(per_cpu(timer_irq, 0) < 0);
900 void __init time_init(void)
902 #ifdef CONFIG_HPET_TIMER
903 if (is_hpet_capable()) {
905 * HPET initialization needs to do memory-mapped io. So, let
906 * us do a late initialization after mem_init().
908 late_time_init = hpet_time_init;
912 get_time_values_from_xen();
914 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
915 per_cpu(processed_system_time, 0) = processed_system_time;
916 init_missing_ticks_accounting(0);
921 printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
922 cpu_khz / 1000, cpu_khz % 1000);
924 #if defined(__x86_64__)
925 vxtime.mode = VXTIME_TSC;
926 vxtime.quot = (1000000L << US_SCALE) / vxtime_hz;
927 vxtime.tsc_quot = (1000L << US_SCALE) / cpu_khz;
929 rdtscll(vxtime.last_tsc);
932 /* Cannot request_irq() until kmem is initialised. */
933 late_time_init = setup_cpu0_timer_irq;
936 /* Convert jiffies to system time. */
937 u64 jiffies_to_st(unsigned long j)
944 seq = read_seqbegin(&xtime_lock);
947 /* Triggers in some wrap-around cases, but that's okay:
948 * we just end up with a shorter timeout. */
949 st = processed_system_time + NS_PER_TICK;
950 } else if (((unsigned long)delta >> (BITS_PER_LONG-3)) != 0) {
951 /* Very long timeout means there is no pending timer.
952 * We indicate this to Xen by passing zero timeout. */
955 st = processed_system_time + delta * (u64)NS_PER_TICK;
957 } while (read_seqretry(&xtime_lock, seq));
961 EXPORT_SYMBOL(jiffies_to_st);
964 * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
965 * These functions are based on implementations from arch/s390/kernel/time.c
967 static void stop_hz_timer(void)
969 unsigned int cpu = smp_processor_id();
972 cpu_set(cpu, nohz_cpu_mask);
974 /* See matching smp_mb in rcu_start_batch in rcupdate.c. These mbs */
975 /* ensure that if __rcu_pending (nested in rcu_needs_cpu) fetches a */
976 /* value of rcp->cur that matches rdp->quiescbatch and allows us to */
977 /* stop the hz timer then the cpumasks created for subsequent values */
978 /* of cur in rcu_start_batch are guaranteed to pick up the updated */
979 /* nohz_cpu_mask and so will not depend on this cpu. */
983 /* Leave ourselves in tick mode if rcu or softirq or timer pending. */
984 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
985 (j = next_timer_interrupt(), time_before_eq(j, jiffies))) {
986 cpu_clear(cpu, nohz_cpu_mask);
990 if (HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0)
994 static void start_hz_timer(void)
996 cpu_clear(smp_processor_id(), nohz_cpu_mask);
999 void raw_safe_halt(void)
1002 /* Blocking includes an implicit local_irq_enable(). */
1006 EXPORT_SYMBOL(raw_safe_halt);
1010 if (irqs_disabled())
1011 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
1013 EXPORT_SYMBOL(halt);
1015 /* No locking required. We are only CPU running, and interrupts are off. */
1016 void time_resume(void)
1020 get_time_values_from_xen();
1022 processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
1023 per_cpu(processed_system_time, 0) = processed_system_time;
1024 init_missing_ticks_accounting(0);
1030 static char timer_name[NR_CPUS][15];
1032 void local_setup_timer(unsigned int cpu)
1039 seq = read_seqbegin(&xtime_lock);
1040 /* Use cpu0 timestamp: cpu's shadow is not initialised yet. */
1041 per_cpu(processed_system_time, cpu) =
1042 per_cpu(shadow_time, 0).system_timestamp;
1043 init_missing_ticks_accounting(cpu);
1044 } while (read_seqretry(&xtime_lock, seq));
1046 sprintf(timer_name[cpu], "timer%d", cpu);
1047 per_cpu(timer_irq, cpu) =
1048 bind_virq_to_irqhandler(
1055 BUG_ON(per_cpu(timer_irq, cpu) < 0);
1058 void local_teardown_timer(unsigned int cpu)
1061 unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL);
1066 * /proc/sys/xen: This really belongs in another file. It can stay here for
1069 static ctl_table xen_subtable[] = {
1072 .procname = "independent_wallclock",
1073 .data = &independent_wallclock,
1074 .maxlen = sizeof(independent_wallclock),
1076 .proc_handler = proc_dointvec
1080 .procname = "permitted_clock_jitter",
1081 .data = &permitted_clock_jitter,
1082 .maxlen = sizeof(permitted_clock_jitter),
1084 .proc_handler = proc_doulongvec_minmax
1088 static ctl_table xen_table[] = {
1093 .child = xen_subtable},
1096 static int __init xen_sysctl_init(void)
1098 (void)register_sysctl_table(xen_table, 0);
1101 __initcall(xen_sysctl_init);