*
* X86-64 port
* Andi Kleen.
- *
- * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
+ *
+ * CPU hotplug support - ashok.raj@intel.com
*/
/*
#include <stdarg.h>
+#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/a.out.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/irq.h>
#include <linux/ptrace.h>
-#include <linux/version.h>
+#include <linux/utsname.h>
+#include <linux/random.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/ia32.h>
+#include <asm/idle.h>
asmlinkage extern void ret_from_fork(void);
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
-atomic_t hlt_counter = ATOMIC_INIT(0);
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
/*
* Powermanagement idle function, if any..
*/
void (*pm_idle)(void);
+EXPORT_SYMBOL(pm_idle);
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
-void disable_hlt(void)
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
{
- atomic_inc(&hlt_counter);
+ atomic_notifier_chain_register(&idle_notifier, n);
}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+ atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL(idle_notifier_unregister);
+
+enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
+static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
-EXPORT_SYMBOL(disable_hlt);
+void enter_idle(void)
+{
+ __get_cpu_var(idle_state) = CPU_IDLE;
+ atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+}
-void enable_hlt(void)
+static void __exit_idle(void)
{
- atomic_dec(&hlt_counter);
+ __get_cpu_var(idle_state) = CPU_NOT_IDLE;
+ atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
}
-EXPORT_SYMBOL(enable_hlt);
+/* Called from interrupts to signify idle end */
+void exit_idle(void)
+{
+ if (current->pid | read_pda(irqcount))
+ return;
+ __exit_idle();
+}
/*
* We use this if we don't have any better
* idle routine..
*/
-void default_idle(void)
+static void default_idle(void)
{
- if (!atomic_read(&hlt_counter)) {
+ local_irq_enable();
+
+ current_thread_info()->status &= ~TS_POLLING;
+ smp_mb__after_clear_bit();
+ while (!need_resched()) {
local_irq_disable();
if (!need_resched())
safe_halt();
else
local_irq_enable();
}
+ current_thread_info()->status |= TS_POLLING;
}
/*
*/
static void poll_idle (void)
{
- int oldval;
-
local_irq_enable();
- /*
- * Deal with another CPU just having chosen a thread to
- * run here:
- */
- oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
- if (!oldval) {
- set_thread_flag(TIF_POLLING_NRFLAG);
- asm volatile(
- "2:"
- "testl %0,%1;"
- "rep; nop;"
- "je 2b;"
- : :
- "i" (_TIF_NEED_RESCHED),
- "m" (current_thread_info()->flags));
- } else {
- set_need_resched();
+ asm volatile(
+ "2:"
+ "testl %0,%1;"
+ "rep; nop;"
+ "je 2b;"
+ : :
+ "i" (_TIF_NEED_RESCHED),
+ "m" (current_thread_info()->flags));
+}
+
+void cpu_idle_wait(void)
+{
+ unsigned int cpu, this_cpu = get_cpu();
+ cpumask_t map;
+
+ set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+ put_cpu();
+
+ cpus_clear(map);
+ for_each_online_cpu(cpu) {
+ per_cpu(cpu_idle_state, cpu) = 1;
+ cpu_set(cpu, map);
}
+
+ __get_cpu_var(cpu_idle_state) = 0;
+
+ wmb();
+ do {
+ ssleep(1);
+ for_each_online_cpu(cpu) {
+ if (cpu_isset(cpu, map) &&
+ !per_cpu(cpu_idle_state, cpu))
+ cpu_clear(cpu, map);
+ }
+ cpus_and(map, map, cpu_online_map);
+ } while (!cpus_empty(map));
}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+#ifdef CONFIG_HOTPLUG_CPU
+DECLARE_PER_CPU(int, cpu_state);
+
+#include <asm/nmi.h>
+/* We halt the CPU with physical CPU hotplug */
+static inline void play_dead(void)
+{
+ idle_task_exit();
+ wbinvd();
+ mb();
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ local_irq_disable();
+ while (1)
+ halt();
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
/*
* The idle thread. There's no useful work to be
*/
void cpu_idle (void)
{
+ current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
void (*idle)(void);
- /*
- * Mark this as an RCU critical section so that
- * synchronize_kernel() in the unload path waits
- * for our completion.
- */
- rcu_read_lock();
+
+ if (__get_cpu_var(cpu_idle_state))
+ __get_cpu_var(cpu_idle_state) = 0;
+
+ rmb();
idle = pm_idle;
if (!idle)
idle = default_idle;
+ if (cpu_is_offline(smp_processor_id()))
+ play_dead();
+ enter_idle();
idle();
- rcu_read_unlock();
+ __exit_idle();
}
+
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
}
}
{
local_irq_enable();
- if (!need_resched()) {
- set_thread_flag(TIF_POLLING_NRFLAG);
- do {
- __monitor((void *)¤t_thread_info()->flags, 0, 0);
- if (need_resched())
- break;
- __mwait(0, 0);
- } while (!need_resched());
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ while (!need_resched()) {
+ __monitor((void *)¤t_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (need_resched())
+ break;
+ __mwait(0, 0);
}
}
-void __init select_idle_routine(const struct cpuinfo_x86 *c)
+void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
static int printed;
if (cpu_has(c, X86_FEATURE_MWAIT)) {
pm_idle = poll_idle;
}
+ boot_option_idle_override = 1;
return 1;
}
printk("\n");
print_modules();
- printk("Pid: %d, comm: %.20s %s %s\n",
- current->pid, current->comm, print_tainted(), UTS_RELEASE);
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ current->pid, current->comm, print_tainted(),
+ system_utsname.release,
+ (int)strcspn(system_utsname.version, " "),
+ system_utsname.version);
printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
printk_address(regs->rip);
- printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
+ printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
+ regs->eflags);
printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->rax, regs->rbx, regs->rcx);
printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
void show_regs(struct pt_regs *regs)
{
+ printk("CPU %d:", smp_processor_id());
__show_regs(regs);
- show_trace(®s->rsp);
+ show_trace(NULL, regs, (void *)(regs + 1));
}
/*
{
struct task_struct *me = current;
struct thread_struct *t = &me->thread;
+
if (me->thread.io_bitmap_ptr) {
struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
struct task_struct *tsk = current;
struct thread_info *t = current_thread_info();
- if (t->flags & _TIF_ABI_PENDING)
+ if (t->flags & _TIF_ABI_PENDING) {
t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
+ if (t->flags & _TIF_IA32)
+ current_thread_info()->status |= TS_COMPAT;
+ }
tsk->thread.debugreg0 = 0;
tsk->thread.debugreg1 = 0;
* Forget coprocessor state..
*/
clear_fpu(tsk);
- tsk->used_math = 0;
+ clear_used_math();
}
void release_thread(struct task_struct *dead_task)
struct pt_regs * childregs;
struct task_struct *me = current;
- childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
-
+ childregs = ((struct pt_regs *)
+ (THREAD_SIZE + task_stack_page(p))) - 1;
*childregs = *regs;
childregs->rax = 0;
childregs->rsp = rsp;
- if (rsp == ~0UL) {
+ if (rsp == ~0UL)
childregs->rsp = (unsigned long)childregs;
- }
- p->set_child_tid = p->clear_child_tid = NULL;
p->thread.rsp = (unsigned long) childregs;
p->thread.rsp0 = (unsigned long) (childregs+1);
p->thread.userrsp = me->thread.userrsp;
- set_ti_thread_flag(p->thread_info, TIF_FORK);
+ set_tsk_thread_flag(p, TIF_FORK);
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
- asm("movl %%gs,%0" : "=m" (p->thread.gsindex));
- asm("movl %%fs,%0" : "=m" (p->thread.fsindex));
- asm("movl %%es,%0" : "=m" (p->thread.es));
- asm("movl %%ds,%0" : "=m" (p->thread.ds));
+ asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
+ asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
+ asm("mov %%es,%0" : "=m" (p->thread.es));
+ asm("mov %%ds,%0" : "=m" (p->thread.ds));
if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
p->thread.io_bitmap_max = 0;
return -ENOMEM;
}
- memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
+ memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+ IO_BITMAP_BYTES);
}
/*
/*
* This special macro can be used to load a debugging register
*/
-#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
+#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
/*
* switch_to(x,y) should switch tasks from x to y.
* This could still be optimized:
* - fold all the options into a flag word and test it with a single test.
* - could test fs/gs bitsliced
+ *
+ * Kprobes not supported here. Set the probe on schedule instead.
*/
-struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+__kprobes struct task_struct *
+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
- unlazy_fpu(prev_p);
-
/*
* Reload esp0, LDT and the page table pointer:
*/
* Switch DS and ES.
* This won't pick up thread selector changes, but I guess that is ok.
*/
- asm volatile("movl %%es,%0" : "=m" (prev->es));
+ asm volatile("mov %%es,%0" : "=m" (prev->es));
if (unlikely(next->es | prev->es))
loadsegment(es, next->es);
- asm volatile ("movl %%ds,%0" : "=m" (prev->ds));
+ asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
if (unlikely(next->ds | prev->ds))
loadsegment(ds, next->ds);
*/
{
unsigned fsindex;
- asm volatile("movl %%fs,%0" : "=g" (fsindex));
+ asm volatile("movl %%fs,%0" : "=r" (fsindex));
/* segment register != 0 always requires a reload.
also reload when it has changed.
when prev process used 64bit base always reload
}
{
unsigned gsindex;
- asm volatile("movl %%gs,%0" : "=g" (gsindex));
+ asm volatile("movl %%gs,%0" : "=r" (gsindex));
if (unlikely(gsindex | next->gsindex | prev->gs)) {
load_gs_index(next->gsindex);
if (gsindex)
prev->gsindex = gsindex;
}
+ /* Must be after DS reload */
+ unlazy_fpu(prev_p);
+
/*
- * Switch the PDA context.
+ * Switch the PDA and FPU contexts.
*/
prev->userrsp = read_pda(oldrsp);
write_pda(oldrsp, next->userrsp);
write_pda(pcurrent, next_p);
- write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
+
+ write_pda(kernelstack,
+ task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
/*
* Now maybe reload the debug registers
if (IS_ERR(filename))
return error;
error = do_execve(filename, argv, envp, ®s);
- if (error == 0)
+ if (error == 0) {
+ task_lock(current);
current->ptrace &= ~PT_DTRACE;
+ task_unlock(current);
+ }
putname(filename);
return error;
}
return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
}
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+asmlinkage long
+sys_clone(unsigned long clone_flags, unsigned long newsp,
+ void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
{
if (!newsp)
newsp = regs->rsp;
if (!p || p == current || p->state==TASK_RUNNING)
return 0;
- stack = (unsigned long)p->thread_info;
+ stack = (unsigned long)task_stack_page(p);
if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
return 0;
fp = *(u64 *)(p->thread.rsp);
do {
- if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
+ if (fp < (unsigned long)stack ||
+ fp > (unsigned long)stack+THREAD_SIZE)
return 0;
rip = *(u64 *)(fp+8);
if (!in_sched_functions(rip))
switch (code) {
case ARCH_SET_GS:
- if (addr >= TASK_SIZE)
+ if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
/* handle small bases via the GDT because that's faster to
task->thread.gsindex = 0;
task->thread.gs = addr;
if (doit) {
- load_gs_index(0);
- ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
+ load_gs_index(0);
+ ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
}
}
put_cpu();
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
with gs */
- if (addr >= TASK_SIZE)
+ if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
/* handle small bases via the GDT because that's faster to
set_32bit_tls(task, FS_TLS, addr);
if (doit) {
load_TLS(&task->thread, cpu);
- asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
+ asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
}
task->thread.fsindex = FS_TLS_SEL;
task->thread.fs = 0;
if (doit) {
/* set the selector to 0 to not confuse
__switch_to */
- asm volatile("movl %0,%%fs" :: "r" (0));
- ret = checking_wrmsrl(MSR_FS_BASE, addr);
+ asm volatile("movl %0,%%fs" :: "r" (0));
+ ret = checking_wrmsrl(MSR_FS_BASE, addr);
}
}
put_cpu();
unsigned long base;
if (task->thread.fsindex == FS_TLS_SEL)
base = read_32bit_tls(task, FS_TLS);
- else if (doit) {
+ else if (doit)
rdmsrl(MSR_FS_BASE, base);
- } else
+ else
base = task->thread.fs;
ret = put_user(base, (unsigned long __user *)addr);
break;
}
case ARCH_GET_GS: {
unsigned long base;
+ unsigned gsindex;
if (task->thread.gsindex == GS_TLS_SEL)
base = read_32bit_tls(task, GS_TLS);
else if (doit) {
- rdmsrl(MSR_KERNEL_GS_BASE, base);
- } else
+ asm("movl %%gs,%0" : "=r" (gsindex));
+ if (gsindex)
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else
+ base = task->thread.gs;
+ }
+ else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
break;
{
struct pt_regs *pp, ptregs;
- pp = (struct pt_regs *)(tsk->thread.rsp0);
- --pp;
+ pp = task_pt_regs(tsk);
ptregs = *pp;
ptregs.cs &= 0xffff;
return 1;
}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (randomize_va_space)
+ sp -= get_random_int() % 8192;
+ return sp & ~0xf;
+}