This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / i386 / kernel / process.c
index 0095fa1..96e3ea6 100644 (file)
@@ -28,7 +28,7 @@
 #include <linux/a.out.h>
 #include <linux/interrupt.h>
 #include <linux/config.h>
-#include <linux/version.h>
+#include <linux/utsname.h>
 #include <linux/delay.h>
 #include <linux/reboot.h>
 #include <linux/init.h>
@@ -36,6 +36,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
+#include <linux/random.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
-int hlt_counter;
+static int hlt_counter;
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
 
 /*
  * Return saved PC of a blocked thread.
@@ -69,6 +73,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
  * Powermanagement idle function, if any..
  */
 void (*pm_idle)(void);
+static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
 void disable_hlt(void)
 {
@@ -90,12 +95,14 @@ EXPORT_SYMBOL(enable_hlt);
  */
 void default_idle(void)
 {
-       if (!hlt_counter && current_cpu_data.hlt_works_ok) {
+       if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
                local_irq_disable();
                if (!need_resched())
                        safe_halt();
                else
                        local_irq_enable();
+       } else {
+               cpu_relax();
        }
 }
 
@@ -143,25 +150,51 @@ void cpu_idle (void)
        while (1) {
                while (!need_resched()) {
                        void (*idle)(void);
-                       /*
-                        * Mark this as an RCU critical section so that
-                        * synchronize_kernel() in the unload path waits
-                        * for our completion.
-                        */
-                       rcu_read_lock();
+
+                       if (__get_cpu_var(cpu_idle_state))
+                               __get_cpu_var(cpu_idle_state) = 0;
+
+                       rmb();
                        idle = pm_idle;
 
                        if (!idle)
                                idle = default_idle;
 
-                       irq_stat[smp_processor_id()].idle_timestamp = jiffies;
+                       __get_cpu_var(irq_stat).idle_timestamp = jiffies;
                        idle();
-                       rcu_read_unlock();
                }
                schedule();
        }
 }
 
+void cpu_idle_wait(void)
+{
+       unsigned int cpu, this_cpu = get_cpu();
+       cpumask_t map;
+
+       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+       put_cpu();
+
+       cpus_clear(map);
+       for_each_online_cpu(cpu) {
+               per_cpu(cpu_idle_state, cpu) = 1;
+               cpu_set(cpu, map);
+       }
+
+       __get_cpu_var(cpu_idle_state) = 0;
+
+       wmb();
+       do {
+               ssleep(1);
+               for_each_online_cpu(cpu) {
+                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+                               cpu_clear(cpu, map);
+               }
+               cpus_and(map, map, cpu_online_map);
+       } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
@@ -214,6 +247,7 @@ static int __init idle_setup (char *str)
                pm_idle = default_idle;
        }
 
+       boot_option_idle_override = 1;
        return 1;
 }
 
@@ -230,7 +264,8 @@ void show_regs(struct pt_regs * regs)
 
        if (regs->xcs & 3)
                printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
-       printk(" EFLAGS: %08lx    %s  (%s)\n",regs->eflags, print_tainted(),UTS_RELEASE);
+       printk(" EFLAGS: %08lx    %s  (%s)\n",
+              regs->eflags, print_tainted(), system_utsname.release);
        printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
                regs->eax,regs->ebx,regs->ecx,regs->edx);
        printk("ESI: %08lx EDI: %08lx EBP: %08lx",
@@ -328,7 +363,7 @@ void flush_thread(void)
         * Forget coprocessor state..
         */
        clear_fpu(tsk);
-       tsk->used_math = 0;
+       clear_used_math();
 }
 
 void release_thread(struct task_struct *dead_task)
@@ -344,7 +379,7 @@ void release_thread(struct task_struct *dead_task)
                }
        }
 
-       release_x86_irqs(dead_task);
+       release_vm86_irqs(dead_task);
 }
 
 /*
@@ -365,10 +400,20 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
        int err;
 
        childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+       /*
+        * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+        * This is necessary to guarantee that the entire "struct pt_regs"
+        * is accessable even if the CPU haven't stored the SS/ESP registers
+        * on the stack (interrupt gate does not save these registers
+        * when switching to the same priv ring).
+        * Therefore beware: accessing the xss/esp fields of the
+        * "struct pt_regs" is possible, but they may contain the
+        * completely wrong values.
+        */
+       childregs = (struct pt_regs *) ((unsigned long) childregs - 8);
        *childregs = *regs;
        childregs->eax = 0;
        childregs->esp = esp;
-       p->set_child_tid = p->clear_child_tid = NULL;
 
        p->thread.esp = (unsigned long) childregs;
        p->thread.esp0 = (unsigned long) (childregs+1);
@@ -514,13 +559,6 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
         */
        tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
 }
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread,register) \
-               __asm__("movl %0,%%db" #register  \
-                       : /* no output */ \
-                       :"r" (thread->debugreg[register]))
 
 /*
  *     switch_to(x,yn) should switch tasks from x to y.
@@ -574,8 +612,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
         * Save away %fs and %gs. No need to save %es and %ds, as
         * those are always kernel segments while inside the kernel.
         */
-       asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
-       asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
+       asm volatile("mov %%fs,%0":"=m" (prev->fs));
+       asm volatile("mov %%gs,%0":"=m" (prev->gs));
 
        /*
         * Restore %fs and %gs if needed.
@@ -656,7 +694,9 @@ asmlinkage int sys_execve(struct pt_regs regs)
                        (char __user * __user *) regs.edx,
                        &regs);
        if (error == 0) {
+               task_lock(current);
                current->ptrace &= ~PT_DTRACE;
+               task_unlock(current);
                /* Make sure we don't return using sysenter.. */
                set_thread_flag(TIF_IRET);
        }
@@ -804,3 +844,9 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
        return 0;
 }
 
+unsigned long arch_align_stack(unsigned long sp)
+{
+       if (randomize_va_space)
+               sp -= get_random_int() % 8192;
+       return sp & ~0xf;
+}