This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / kernel / fork.c
index 68597bc..559e6bb 100644 (file)
 #include <linux/completion.h>
 #include <linux/namespace.h>
 #include <linux/personality.h>
+#include <linux/mempolicy.h>
 #include <linux/sem.h>
 #include <linux/file.h>
 #include <linux/binfmts.h>
 #include <linux/mman.h>
 #include <linux/fs.h>
+#include <linux/cpu.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
@@ -33,6 +35,7 @@
 #include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/rmap.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -60,7 +63,7 @@ int nr_processes(void)
        int cpu;
        int total = 0;
 
-       for_each_cpu(cpu)
+       for_each_online_cpu(cpu)
                total += per_cpu(process_counts, cpu);
 
        return total;
@@ -196,9 +199,9 @@ void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
 
 EXPORT_SYMBOL(finish_wait);
 
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync)
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
-       int ret = default_wake_function(wait, mode, sync);
+       int ret = default_wake_function(wait, mode, sync, key);
 
        if (ret)
                list_del_init(&wait->task_list);
@@ -215,11 +218,8 @@ void __init fork_init(unsigned long mempages)
 #endif
        /* create a slab on which task_structs can be allocated */
        task_struct_cachep =
-               kmem_cache_create("task_struct",
-                                 sizeof(struct task_struct),ARCH_MIN_TASKALIGN,
-                                 0, NULL, NULL);
-       if (!task_struct_cachep)
-               panic("fork_init(): cannot create task_struct SLAB cache");
+               kmem_cache_create("task_struct", sizeof(struct task_struct),
+                       ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
 #endif
 
        /*
@@ -272,6 +272,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
        struct rb_node **rb_link, *rb_parent;
        int retval;
        unsigned long charge = 0;
+       struct mempolicy *pol;
 
        down_write(&oldmm->mmap_sem);
        flush_cache_mm(current->mm);
@@ -313,11 +314,17 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
                if (!tmp)
                        goto fail_nomem;
                *tmp = *mpnt;
+               pol = mpol_copy(vma_policy(mpnt));
+               retval = PTR_ERR(pol);
+               if (IS_ERR(pol))
+                       goto fail_nomem_policy;
+               vma_set_policy(tmp, pol);
                tmp->vm_flags &= ~VM_LOCKED;
                tmp->vm_mm = mm;
                tmp->vm_next = NULL;
+               anon_vma_link(tmp);
+               vma_prio_tree_init(tmp);
                file = tmp->vm_file;
-               INIT_LIST_HEAD(&tmp->shared);
                if (file) {
                        struct inode *inode = file->f_dentry->d_inode;
                        get_file(file);
@@ -325,9 +332,11 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
                                atomic_dec(&inode->i_writecount);
       
                        /* insert tmp into the share list, just after mpnt */
-                       down(&file->f_mapping->i_shared_sem);
-                       list_add(&tmp->shared, &mpnt->shared);
-                       up(&file->f_mapping->i_shared_sem);
+                       spin_lock(&file->f_mapping->i_mmap_lock);
+                       flush_dcache_mmap_lock(file->f_mapping);
+                       vma_prio_tree_add(tmp, mpnt);
+                       flush_dcache_mmap_unlock(file->f_mapping);
+                       spin_unlock(&file->f_mapping->i_mmap_lock);
                }
 
                /*
@@ -359,6 +368,8 @@ out:
        flush_tlb_mm(current->mm);
        up_write(&oldmm->mmap_sem);
        return retval;
+fail_nomem_policy:
+       kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
        retval = -ENOMEM;
 fail:
@@ -405,6 +416,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
 
        if (likely(!mm_alloc_pgd(mm))) {
                mm->def_flags = 0;
+#ifdef __HAVE_ARCH_MMAP_TOP
+               mm->mmap_top = mmap_top();
+#endif
                return mm;
        }
        free_mm(mm);
@@ -421,9 +435,9 @@ struct mm_struct * mm_alloc(void)
        mm = allocate_mm();
        if (mm) {
                memset(mm, 0, sizeof(*mm));
-               return mm_init(mm);
+               mm = mm_init(mm);
        }
-       return NULL;
+       return mm;
 }
 
 /*
@@ -888,6 +902,7 @@ struct task_struct *copy_process(unsigned long clone_flags,
        p = dup_task_struct(current);
        if (!p)
                goto fork_out;
+       p->tux_info = NULL;
 
        retval = -EAGAIN;
        if (atomic_read(&p->user->processes) >=
@@ -953,10 +968,18 @@ struct task_struct *copy_process(unsigned long clone_flags,
        p->security = NULL;
        p->io_context = NULL;
        p->audit_context = NULL;
+#ifdef CONFIG_NUMA
+       p->mempolicy = mpol_copy(p->mempolicy);
+       if (IS_ERR(p->mempolicy)) {
+               retval = PTR_ERR(p->mempolicy);
+               p->mempolicy = NULL;
+               goto bad_fork_cleanup;
+       }
+#endif
 
        retval = -ENOMEM;
        if ((retval = security_task_alloc(p)))
-               goto bad_fork_cleanup;
+               goto bad_fork_cleanup_policy;
        if ((retval = audit_alloc(p)))
                goto bad_fork_cleanup_security;
        /* copy all the process information */
@@ -1102,6 +1125,10 @@ bad_fork_cleanup_audit:
        audit_free(p);
 bad_fork_cleanup_security:
        security_task_free(p);
+bad_fork_cleanup_policy:
+#ifdef CONFIG_NUMA
+       mpol_free(p->mempolicy);
+#endif
 bad_fork_cleanup:
        if (p->pid > 0)
                free_pidmap(p->pid);
@@ -1180,10 +1207,31 @@ long do_fork(unsigned long clone_flags,
                        set_tsk_thread_flag(p, TIF_SIGPENDING);
                }
 
-               if (!(clone_flags & CLONE_STOPPED))
-                       wake_up_forked_process(p);      /* do this last */
-               else
+               if (!(clone_flags & CLONE_STOPPED)) {
+                       /*
+                        * Do the wakeup last. On SMP we treat fork() and
+                        * CLONE_VM separately, because fork() has already
+                        * created cache footprint on this CPU (due to
+                        * copying the pagetables), hence migration would
+                        * probably be costy. Threads on the other hand
+                        * have less traction to the current CPU, and if
+                        * there's an imbalance then the scheduler can
+                        * migrate this fresh thread now, before it
+                        * accumulates a larger cache footprint:
+                        */
+                       if (clone_flags & CLONE_VM)
+                               wake_up_forked_thread(p);
+                       else
+                               wake_up_forked_process(p);
+               } else {
+                       int cpu = get_cpu();
+
                        p->state = TASK_STOPPED;
+                       if (cpu_is_offline(task_cpu(p)))
+                               set_task_cpu(p, cpu);
+
+                       put_cpu();
+               }
                ++total_forks;
 
                if (unlikely (trace)) {
@@ -1227,37 +1275,20 @@ void __init proc_caches_init(void)
 {
        sighand_cachep = kmem_cache_create("sighand_cache",
                        sizeof(struct sighand_struct), 0,
-                       SLAB_HWCACHE_ALIGN, NULL, NULL);
-       if (!sighand_cachep)
-               panic("Cannot create sighand SLAB cache");
-
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
        signal_cachep = kmem_cache_create("signal_cache",
                        sizeof(struct signal_struct), 0,
-                       SLAB_HWCACHE_ALIGN, NULL, NULL);
-       if (!signal_cachep)
-               panic("Cannot create signal SLAB cache");
-
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
        files_cachep = kmem_cache_create("files_cache", 
-                        sizeof(struct files_struct), 0, 
-                        SLAB_HWCACHE_ALIGN, NULL, NULL);
-       if (!files_cachep) 
-               panic("Cannot create files SLAB cache");
-
+                       sizeof(struct files_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
        fs_cachep = kmem_cache_create("fs_cache", 
-                        sizeof(struct fs_struct), 0, 
-                        SLAB_HWCACHE_ALIGN, NULL, NULL);
-       if (!fs_cachep) 
-               panic("Cannot create fs_struct SLAB cache");
+                       sizeof(struct fs_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
        vm_area_cachep = kmem_cache_create("vm_area_struct",
                        sizeof(struct vm_area_struct), 0,
-                       0, NULL, NULL);
-       if(!vm_area_cachep)
-               panic("vma_init: Cannot alloc vm_area_struct SLAB cache");
-
+                       SLAB_PANIC, NULL, NULL);
        mm_cachep = kmem_cache_create("mm_struct",
                        sizeof(struct mm_struct), 0,
-                       SLAB_HWCACHE_ALIGN, NULL, NULL);
-       if(!mm_cachep)
-               panic("vma_init: Cannot alloc mm_struct SLAB cache");
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 }