kernel/fork.c

   1 /*
   2  *  linux/kernel/fork.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 /*
   8  *  'fork.c' contains the help-routines for the 'fork' system call
   9  * (see also entry.S and others).
  10  * Fork is rather simple, once you get the hang of it, but the memory
  11  * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
  12  */
  13
  14 #include <linux/config.h>
  15 #include <linux/slab.h>
  16 #include <linux/init.h>
  17 #include <linux/unistd.h>
  18 #include <linux/smp_lock.h>
  19 #include <linux/module.h>
  20 #include <linux/vmalloc.h>
  21 #include <linux/completion.h>
  22 #include <linux/namespace.h>
  23 #include <linux/personality.h>
  24 #include <linux/mempolicy.h>
  25 #include <linux/sem.h>
  26 #include <linux/file.h>
  27 #include <linux/binfmts.h>
  28 #include <linux/mman.h>
  29 #include <linux/fs.h>
  30 #include <linux/cpu.h>
  31 #include <linux/security.h>
  32 #include <linux/swap.h>
  33 #include <linux/syscalls.h>
  34 #include <linux/jiffies.h>
  35 #include <linux/futex.h>
  36 #include <linux/ptrace.h>
  37 #include <linux/mount.h>
  38 #include <linux/audit.h>
  39 #include <linux/rmap.h>
  40 #include <linux/vs_network.h>
  41 #include <linux/vs_limit.h>
  42 #include <linux/vs_memory.h>
  43 #include <linux/ckrm.h>
  44 #include <linux/ckrm_tsk.h>
  45 #include <linux/ckrm_mem_inline.h>
  46
  47 #include <asm/pgtable.h>
  48 #include <asm/pgalloc.h>
  49 #include <asm/uaccess.h>
  50 #include <asm/mmu_context.h>
  51 #include <asm/cacheflush.h>
  52 #include <asm/tlbflush.h>
  53
  54 /* The idle threads do not count..
  55  * Protected by write_lock_irq(&tasklist_lock)
  56  */
  57 int nr_threads;
  58
  59 int max_threads;
  60 unsigned long total_forks;      /* Handle normal Linux uptimes. */
  61
  62 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
  63
  64 rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;  /* outer */
  65
  66 EXPORT_SYMBOL(tasklist_lock);
  67
  68 int nr_processes(void)
  69 {
  70         int cpu;
  71         int total = 0;
  72
  73         for_each_online_cpu(cpu)
  74                 total += per_cpu(process_counts, cpu);
  75
  76         return total;
  77 }
  78
  79 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
  80 # define alloc_task_struct()    kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
  81 # define free_task_struct(tsk)  kmem_cache_free(task_struct_cachep, (tsk))
  82 static kmem_cache_t *task_struct_cachep;
  83 #endif
  84
  85 static void free_task(struct task_struct *tsk)
  86 {
  87         free_thread_info(tsk->thread_info);
  88         clr_vx_info(&tsk->vx_info);
  89         clr_nx_info(&tsk->nx_info);
  90         free_task_struct(tsk);
  91 }
  92
  93 void __put_task_struct(struct task_struct *tsk)
  94 {
  95         WARN_ON(!(tsk->state & (TASK_DEAD | TASK_ZOMBIE)));
  96         WARN_ON(atomic_read(&tsk->usage));
  97         WARN_ON(tsk == current);
  98
  99         if (unlikely(tsk->audit_context))
 100                 audit_free(tsk);
 101         security_task_free(tsk);
 102         free_uid(tsk->user);
 103         put_group_info(tsk->group_info);
 104         free_task(tsk);
 105 }
 106
 107 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
 108 {
 109         unsigned long flags;
 110
 111         wait->flags &= ~WQ_FLAG_EXCLUSIVE;
 112         spin_lock_irqsave(&q->lock, flags);
 113         __add_wait_queue(q, wait);
 114         spin_unlock_irqrestore(&q->lock, flags);
 115 }
 116
 117 EXPORT_SYMBOL(add_wait_queue);
 118
 119 void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
 120 {
 121         unsigned long flags;
 122
 123         wait->flags |= WQ_FLAG_EXCLUSIVE;
 124         spin_lock_irqsave(&q->lock, flags);
 125         __add_wait_queue_tail(q, wait);
 126         spin_unlock_irqrestore(&q->lock, flags);
 127 }
 128
 129 EXPORT_SYMBOL(add_wait_queue_exclusive);
 130
 131 void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
 132 {
 133         unsigned long flags;
 134
 135         spin_lock_irqsave(&q->lock, flags);
 136         __remove_wait_queue(q, wait);
 137         spin_unlock_irqrestore(&q->lock, flags);
 138 }
 139
 140 EXPORT_SYMBOL(remove_wait_queue);
 141
 142
 143 /*
 144  * Note: we use "set_current_state()" _after_ the wait-queue add,
 145  * because we need a memory barrier there on SMP, so that any
 146  * wake-function that tests for the wait-queue being active
 147  * will be guaranteed to see waitqueue addition _or_ subsequent
 148  * tests in this thread will see the wakeup having taken place.
 149  *
 150  * The spin_unlock() itself is semi-permeable and only protects
 151  * one way (it only protects stuff inside the critical region and
 152  * stops them from bleeding out - it would still allow subsequent
 153  * loads to move into the the critical region).
 154  */
 155 void fastcall prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
 156 {
 157         unsigned long flags;
 158
 159         wait->flags &= ~WQ_FLAG_EXCLUSIVE;
 160         spin_lock_irqsave(&q->lock, flags);
 161         if (list_empty(&wait->task_list))
 162                 __add_wait_queue(q, wait);
 163         set_current_state(state);
 164         spin_unlock_irqrestore(&q->lock, flags);
 165 }
 166
 167 EXPORT_SYMBOL(prepare_to_wait);
 168
 169 void fastcall
 170 prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
 171 {
 172         unsigned long flags;
 173
 174         wait->flags |= WQ_FLAG_EXCLUSIVE;
 175         spin_lock_irqsave(&q->lock, flags);
 176         if (list_empty(&wait->task_list))
 177                 __add_wait_queue_tail(q, wait);
 178         set_current_state(state);
 179         spin_unlock_irqrestore(&q->lock, flags);
 180 }
 181
 182 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 183
 184 void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
 185 {
 186         unsigned long flags;
 187
 188         __set_current_state(TASK_RUNNING);
 189         /*
 190          * We can check for list emptiness outside the lock
 191          * IFF:
 192          *  - we use the "careful" check that verifies both
 193          *    the next and prev pointers, so that there cannot
 194          *    be any half-pending updates in progress on other
 195          *    CPU's that we haven't seen yet (and that might
 196          *    still change the stack area.
 197          * and
 198          *  - all other users take the lock (ie we can only
 199          *    have _one_ other CPU that looks at or modifies
 200          *    the list).
 201          */
 202         if (!list_empty_careful(&wait->task_list)) {
 203                 spin_lock_irqsave(&q->lock, flags);
 204                 list_del_init(&wait->task_list);
 205                 spin_unlock_irqrestore(&q->lock, flags);
 206         }
 207 }
 208
 209 EXPORT_SYMBOL(finish_wait);
 210
 211 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 212 {
 213         int ret = default_wake_function(wait, mode, sync, key);
 214
 215         if (ret)
 216                 list_del_init(&wait->task_list);
 217         return ret;
 218 }
 219
 220 EXPORT_SYMBOL(autoremove_wake_function);
 221
 222 void __init fork_init(unsigned long mempages)
 223 {
 224 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
 225 #ifndef ARCH_MIN_TASKALIGN
 226 #define ARCH_MIN_TASKALIGN      L1_CACHE_BYTES
 227 #endif
 228         /* create a slab on which task_structs can be allocated */
 229         task_struct_cachep =
 230                 kmem_cache_create("task_struct", sizeof(struct task_struct),
 231                         ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
 232 #endif
 233
 234         /*
 235          * The default maximum number of threads is set to a safe
 236          * value: the thread structures can take up at most half
 237          * of memory.
 238          */
 239         max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
 240         /*
 241          * we need to allow at least 20 threads to boot a system
 242          */
 243         if(max_threads < 20)
 244                 max_threads = 20;
 245
 246         init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
 247         init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 248 }
 249
 250 static struct task_struct *dup_task_struct(struct task_struct *orig)
 251 {
 252         struct task_struct *tsk;
 253         struct thread_info *ti;
 254
 255         prepare_to_copy(orig);
 256
 257         tsk = alloc_task_struct();
 258         if (!tsk)
 259                 return NULL;
 260
 261         ti = alloc_thread_info(tsk);
 262         if (!ti) {
 263                 free_task_struct(tsk);
 264                 return NULL;
 265         }
 266
 267         *ti = *orig->thread_info;
 268         *tsk = *orig;
 269         tsk->thread_info = ti;
 270         ti->task = tsk;
 271
 272         ckrm_cb_newtask(tsk);
 273         /* One for us, one for whoever does the "release_task()" (usually parent) */
 274         atomic_set(&tsk->usage,2);
 275 #ifdef CONFIG_CKRM_RES_MEM
 276         INIT_LIST_HEAD(&tsk->mm_peers);
 277 #endif
 278         return tsk;
 279 }
 280
 281 #ifdef CONFIG_MMU
 282 static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
 283 {
 284         struct vm_area_struct * mpnt, *tmp, **pprev;
 285         struct rb_node **rb_link, *rb_parent;
 286         int retval;
 287         unsigned long charge;
 288         struct mempolicy *pol;
 289
 290         down_write(&oldmm->mmap_sem);
 291         flush_cache_mm(current->mm);
 292         mm->locked_vm = 0;
 293         mm->mmap = NULL;
 294         mm->mmap_cache = NULL;
 295         mm->free_area_cache = oldmm->mmap_base;
 296         mm->map_count = 0;
 297         mm->rss = 0;
 298         cpus_clear(mm->cpu_vm_mask);
 299         mm->mm_rb = RB_ROOT;
 300         rb_link = &mm->mm_rb.rb_node;
 301         rb_parent = NULL;
 302         pprev = &mm->mmap;
 303
 304         /*
 305          * Add it to the mmlist after the parent.
 306          * Doing it this way means that we can order the list,
 307          * and fork() won't mess up the ordering significantly.
 308          * Add it first so that swapoff can see any swap entries.
 309          */
 310         spin_lock(&mmlist_lock);
 311         list_add(&mm->mmlist, &current->mm->mmlist);
 312         mmlist_nr++;
 313         spin_unlock(&mmlist_lock);
 314
 315         for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
 316                 struct file *file;
 317
 318                 if(mpnt->vm_flags & VM_DONTCOPY)
 319                         continue;
 320                 charge = 0;
 321                 if (mpnt->vm_flags & VM_ACCOUNT) {
 322                         unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 323                         if (security_vm_enough_memory(len))
 324                                 goto fail_nomem;
 325                         charge = len;
 326                 }
 327                 tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 328                 if (!tmp)
 329                         goto fail_nomem;
 330                 *tmp = *mpnt;
 331                 pol = mpol_copy(vma_policy(mpnt));
 332                 retval = PTR_ERR(pol);
 333                 if (IS_ERR(pol))
 334                         goto fail_nomem_policy;
 335                 vma_set_policy(tmp, pol);
 336                 tmp->vm_flags &= ~VM_LOCKED;
 337                 tmp->vm_mm = mm;
 338                 tmp->vm_next = NULL;
 339                 anon_vma_link(tmp);
 340                 vma_prio_tree_init(tmp);
 341                 file = tmp->vm_file;
 342                 if (file) {
 343                         struct inode *inode = file->f_dentry->d_inode;
 344                         get_file(file);
 345                         if (tmp->vm_flags & VM_DENYWRITE)
 346                                 atomic_dec(&inode->i_writecount);
 347
 348                         /* insert tmp into the share list, just after mpnt */
 349                         spin_lock(&file->f_mapping->i_mmap_lock);
 350                         flush_dcache_mmap_lock(file->f_mapping);
 351                         vma_prio_tree_add(tmp, mpnt);
 352                         flush_dcache_mmap_unlock(file->f_mapping);
 353                         spin_unlock(&file->f_mapping->i_mmap_lock);
 354                 }
 355
 356                 /*
 357                  * Link in the new vma and copy the page table entries:
 358                  * link in first so that swapoff can see swap entries,
 359                  * and try_to_unmap_one's find_vma find the new vma.
 360                  */
 361                 spin_lock(&mm->page_table_lock);
 362                 *pprev = tmp;
 363                 pprev = &tmp->vm_next;
 364
 365                 __vma_link_rb(mm, tmp, rb_link, rb_parent);
 366                 rb_link = &tmp->vm_rb.rb_right;
 367                 rb_parent = &tmp->vm_rb;
 368
 369                 mm->map_count++;
 370                 retval = copy_page_range(mm, current->mm, tmp);
 371                 spin_unlock(&mm->page_table_lock);
 372
 373                 if (tmp->vm_ops && tmp->vm_ops->open)
 374                         tmp->vm_ops->open(tmp);
 375
 376                 if (retval)
 377                         goto out;
 378         }
 379         retval = 0;
 380
 381 out:
 382         flush_tlb_mm(current->mm);
 383         up_write(&oldmm->mmap_sem);
 384         return retval;
 385 fail_nomem_policy:
 386         kmem_cache_free(vm_area_cachep, tmp);
 387 fail_nomem:
 388         retval = -ENOMEM;
 389         vm_unacct_memory(charge);
 390         goto out;
 391 }
 392
 393 static inline int mm_alloc_pgd(struct mm_struct * mm)
 394 {
 395         mm->pgd = pgd_alloc(mm);
 396         if (unlikely(!mm->pgd))
 397                 return -ENOMEM;
 398         return 0;
 399 }
 400
 401 static inline void mm_free_pgd(struct mm_struct * mm)
 402 {
 403         pgd_free(mm->pgd);
 404 }
 405 #else
 406 #define dup_mmap(mm, oldmm)     (0)
 407 #define mm_alloc_pgd(mm)        (0)
 408 #define mm_free_pgd(mm)
 409 #endif /* CONFIG_MMU */
 410
 411 spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 412 int mmlist_nr;
 413
 414 #define allocate_mm()   (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
 415 #define free_mm(mm)     (kmem_cache_free(mm_cachep, (mm)))
 416
 417 #include <linux/init_task.h>
 418
 419 static struct mm_struct * mm_init(struct mm_struct * mm)
 420 {
 421         atomic_set(&mm->mm_users, 1);
 422         atomic_set(&mm->mm_count, 1);
 423         init_rwsem(&mm->mmap_sem);
 424         mm->core_waiters = 0;
 425         mm->page_table_lock = SPIN_LOCK_UNLOCKED;
 426         mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
 427         mm->ioctx_list = NULL;
 428         mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
 429         mm->free_area_cache = TASK_UNMAPPED_BASE;
 430 #ifdef CONFIG_CKRM_RES_MEM
 431         INIT_LIST_HEAD(&mm->tasklist);
 432         mm->peertask_lock = SPIN_LOCK_UNLOCKED;
 433 #endif
 434
 435         if (likely(!mm_alloc_pgd(mm))) {
 436                 mm->def_flags = 0;
 437                 set_vx_info(&mm->mm_vx_info, current->vx_info);
 438                 return mm;
 439         }
 440         free_mm(mm);
 441         return NULL;
 442 }
 443
 444 /*
 445  * Allocate and initialize an mm_struct.
 446  */
 447 struct mm_struct * mm_alloc(void)
 448 {
 449         struct mm_struct * mm;
 450
 451         mm = allocate_mm();
 452         if (mm) {
 453                 memset(mm, 0, sizeof(*mm));
 454                 mm = mm_init(mm);
 455 #ifdef CONFIG_CKRM_RES_MEM
 456                 mm->memclass = GET_MEM_CLASS(current);
 457                 mem_class_get(mm->memclass);
 458 #endif
 459         }
 460         return mm;
 461 }
 462
 463 /*
 464  * Called when the last reference to the mm
 465  * is dropped: either by a lazy thread or by
 466  * mmput. Free the page directory and the mm.
 467  */
 468 void fastcall __mmdrop(struct mm_struct *mm)
 469 {
 470         BUG_ON(mm == &init_mm);
 471         mm_free_pgd(mm);
 472         destroy_context(mm);
 473         clr_vx_info(&mm->mm_vx_info);
 474 #ifdef CONFIG_CKRM_RES_MEM
 475         /* class can be null and mm's tasklist can be empty here */
 476         if (mm->memclass) {
 477                 mem_class_put(mm->memclass);
 478                 mm->memclass = NULL;
 479         }
 480 #endif
 481         free_mm(mm);
 482 }
 483
 484 /*
 485  * Decrement the use count and release all resources for an mm.
 486  */
 487 void mmput(struct mm_struct *mm)
 488 {
 489         if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
 490                 list_del(&mm->mmlist);
 491                 mmlist_nr--;
 492                 spin_unlock(&mmlist_lock);
 493                 exit_aio(mm);
 494                 exit_mmap(mm);
 495                 put_swap_token(mm);
 496                 mmdrop(mm);
 497         }
 498 }
 499
 500 /*
 501  * Checks if the use count of an mm is non-zero and if so
 502  * returns a reference to it after bumping up the use count.
 503  * If the use count is zero, it means this mm is going away,
 504  * so return NULL.
 505  */
 506 struct mm_struct *mmgrab(struct mm_struct *mm)
 507 {
 508         spin_lock(&mmlist_lock);
 509         if (!atomic_read(&mm->mm_users))
 510                 mm = NULL;
 511         else
 512                 atomic_inc(&mm->mm_users);
 513         spin_unlock(&mmlist_lock);
 514         return mm;
 515 }
 516
 517 /* Please note the differences between mmput and mm_release.
 518  * mmput is called whenever we stop holding onto a mm_struct,
 519  * error success whatever.
 520  *
 521  * mm_release is called after a mm_struct has been removed
 522  * from the current process.
 523  *
 524  * This difference is important for error handling, when we
 525  * only half set up a mm_struct for a new process and need to restore
 526  * the old one.  Because we mmput the new mm_struct before
 527  * restoring the old one. . .
 528  * Eric Biederman 10 January 1998
 529  */
 530 void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 531 {
 532         struct completion *vfork_done = tsk->vfork_done;
 533
 534         /* Get rid of any cached register state */
 535         deactivate_mm(tsk, mm);
 536
 537         /* notify parent sleeping on vfork() */
 538         if (vfork_done) {
 539                 tsk->vfork_done = NULL;
 540                 complete(vfork_done);
 541         }
 542         if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
 543                 u32 __user * tidptr = tsk->clear_child_tid;
 544                 tsk->clear_child_tid = NULL;
 545
 546                 /*
 547                  * We don't check the error code - if userspace has
 548                  * not set up a proper pointer then tough luck.
 549                  */
 550                 put_user(0, tidptr);
 551                 sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
 552         }
 553 }
 554
 555 static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 556 {
 557         struct mm_struct * mm, *oldmm;
 558         int retval;
 559
 560         tsk->min_flt = tsk->maj_flt = 0;
 561         tsk->cmin_flt = tsk->cmaj_flt = 0;
 562         tsk->nvcsw = tsk->nivcsw = tsk->cnvcsw = tsk->cnivcsw = 0;
 563
 564         tsk->mm = NULL;
 565         tsk->active_mm = NULL;
 566
 567         /*
 568          * Are we cloning a kernel thread?
 569          *
 570          * We need to steal a active VM for that..
 571          */
 572         oldmm = current->mm;
 573         if (!oldmm)
 574                 return 0;
 575
 576         if (clone_flags & CLONE_VM) {
 577                 atomic_inc(&oldmm->mm_users);
 578                 mm = oldmm;
 579                 /*
 580                  * There are cases where the PTL is held to ensure no
 581                  * new threads start up in user mode using an mm, which
 582                  * allows optimizing out ipis; the tlb_gather_mmu code
 583                  * is an example.
 584                  */
 585                 spin_unlock_wait(&oldmm->page_table_lock);
 586                 goto good_mm;
 587         }
 588
 589         retval = -ENOMEM;
 590         mm = allocate_mm();
 591         if (!mm)
 592                 goto fail_nomem;
 593
 594         /* Copy the current MM stuff.. */
 595         memcpy(mm, oldmm, sizeof(*mm));
 596         mm->mm_vx_info = NULL;
 597         if (!mm_init(mm))
 598                 goto fail_nomem;
 599
 600         if (init_new_context(tsk,mm))
 601                 goto fail_nocontext;
 602
 603         retval = dup_mmap(mm, oldmm);
 604         if (retval)
 605                 goto free_pt;
 606
 607 good_mm:
 608         tsk->mm = mm;
 609         tsk->active_mm = mm;
 610         ckrm_init_mm_to_task(mm, tsk);
 611         return 0;
 612
 613 free_pt:
 614         mmput(mm);
 615 fail_nomem:
 616         return retval;
 617
 618 fail_nocontext:
 619         /*
 620          * If init_new_context() failed, we cannot use mmput() to free the mm
 621          * because it calls destroy_context()
 622          */
 623         mm_free_pgd(mm);
 624         free_mm(mm);
 625         return retval;
 626 }
 627
 628 static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 629 {
 630         struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
 631         /* We don't need to lock fs - think why ;-) */
 632         if (fs) {
 633                 atomic_set(&fs->count, 1);
 634                 fs->lock = RW_LOCK_UNLOCKED;
 635                 fs->umask = old->umask;
 636                 read_lock(&old->lock);
 637                 fs->rootmnt = mntget(old->rootmnt);
 638                 fs->root = dget(old->root);
 639                 fs->pwdmnt = mntget(old->pwdmnt);
 640                 fs->pwd = dget(old->pwd);
 641                 if (old->altroot) {
 642                         fs->altrootmnt = mntget(old->altrootmnt);
 643                         fs->altroot = dget(old->altroot);
 644                 } else {
 645                         fs->altrootmnt = NULL;
 646                         fs->altroot = NULL;
 647                 }
 648                 read_unlock(&old->lock);
 649         }
 650         return fs;
 651 }
 652
 653 struct fs_struct *copy_fs_struct(struct fs_struct *old)
 654 {
 655         return __copy_fs_struct(old);
 656 }
 657
 658 EXPORT_SYMBOL_GPL(copy_fs_struct);
 659
 660 static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 661 {
 662         if (clone_flags & CLONE_FS) {
 663                 atomic_inc(&current->fs->count);
 664                 return 0;
 665         }
 666         tsk->fs = __copy_fs_struct(current->fs);
 667         if (!tsk->fs)
 668                 return -ENOMEM;
 669         return 0;
 670 }
 671
 672 static int count_open_files(struct files_struct *files, int size)
 673 {
 674         int i;
 675
 676         /* Find the last open fd */
 677         for (i = size/(8*sizeof(long)); i > 0; ) {
 678                 if (files->open_fds->fds_bits[--i])
 679                         break;
 680         }
 681         i = (i+1) * 8 * sizeof(long);
 682         return i;
 683 }
 684
 685 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 686 {
 687         struct files_struct *oldf, *newf;
 688         struct file **old_fds, **new_fds;
 689         int open_files, nfds, size, i, error = 0;
 690
 691         /*
 692          * A background process may not have any files ...
 693          */
 694         oldf = current->files;
 695         if (!oldf)
 696                 goto out;
 697
 698         if (clone_flags & CLONE_FILES) {
 699                 atomic_inc(&oldf->count);
 700                 goto out;
 701         }
 702
 703         /*
 704          * Note: we may be using current for both targets (See exec.c)
 705          * This works because we cache current->files (old) as oldf. Don't
 706          * break this.
 707          */
 708         tsk->files = NULL;
 709         error = -ENOMEM;
 710         newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
 711         if (!newf)
 712                 goto out;
 713
 714         atomic_set(&newf->count, 1);
 715
 716         newf->file_lock     = SPIN_LOCK_UNLOCKED;
 717         newf->next_fd       = 0;
 718         newf->max_fds       = NR_OPEN_DEFAULT;
 719         newf->max_fdset     = __FD_SETSIZE;
 720         newf->close_on_exec = &newf->close_on_exec_init;
 721         newf->open_fds      = &newf->open_fds_init;
 722         newf->fd            = &newf->fd_array[0];
 723
 724         /* We don't yet have the oldf readlock, but even if the old
 725            fdset gets grown now, we'll only copy up to "size" fds */
 726         size = oldf->max_fdset;
 727         if (size > __FD_SETSIZE) {
 728                 newf->max_fdset = 0;
 729                 spin_lock(&newf->file_lock);
 730                 error = expand_fdset(newf, size-1);
 731                 spin_unlock(&newf->file_lock);
 732                 if (error)
 733                         goto out_release;
 734         }
 735         spin_lock(&oldf->file_lock);
 736
 737         open_files = count_open_files(oldf, size);
 738
 739         /*
 740          * Check whether we need to allocate a larger fd array.
 741          * Note: we're not a clone task, so the open count won't
 742          * change.
 743          */
 744         nfds = NR_OPEN_DEFAULT;
 745         if (open_files > nfds) {
 746                 spin_unlock(&oldf->file_lock);
 747                 newf->max_fds = 0;
 748                 spin_lock(&newf->file_lock);
 749                 error = expand_fd_array(newf, open_files-1);
 750                 spin_unlock(&newf->file_lock);
 751                 if (error)
 752                         goto out_release;
 753                 nfds = newf->max_fds;
 754                 spin_lock(&oldf->file_lock);
 755         }
 756
 757         old_fds = oldf->fd;
 758         new_fds = newf->fd;
 759
 760         memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
 761         memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
 762
 763         for (i = open_files; i != 0; i--) {
 764                 struct file *f = *old_fds++;
 765                 if (f)
 766                         get_file(f);
 767                 *new_fds++ = f;
 768         }
 769         spin_unlock(&oldf->file_lock);
 770
 771         /* compute the remainder to be cleared */
 772         size = (newf->max_fds - open_files) * sizeof(struct file *);
 773
 774         /* This is long word aligned thus could use a optimized version */
 775         memset(new_fds, 0, size);
 776
 777         if (newf->max_fdset > open_files) {
 778                 int left = (newf->max_fdset-open_files)/8;
 779                 int start = open_files / (8 * sizeof(unsigned long));
 780
 781                 memset(&newf->open_fds->fds_bits[start], 0, left);
 782                 memset(&newf->close_on_exec->fds_bits[start], 0, left);
 783         }
 784
 785         tsk->files = newf;
 786         error = 0;
 787 out:
 788         return error;
 789
 790 out_release:
 791         free_fdset (newf->close_on_exec, newf->max_fdset);
 792         free_fdset (newf->open_fds, newf->max_fdset);
 793         kmem_cache_free(files_cachep, newf);
 794         goto out;
 795 }
 796
 797 /*
 798  *      Helper to unshare the files of the current task.
 799  *      We don't want to expose copy_files internals to
 800  *      the exec layer of the kernel.
 801  */
 802
 803 int unshare_files(void)
 804 {
 805         struct files_struct *files  = current->files;
 806         int rc;
 807
 808         if(!files)
 809                 BUG();
 810
 811         /* This can race but the race causes us to copy when we don't
 812            need to and drop the copy */
 813         if(atomic_read(&files->count) == 1)
 814         {
 815                 atomic_inc(&files->count);
 816                 return 0;
 817         }
 818         rc = copy_files(0, current);
 819         if(rc)
 820                 current->files = files;
 821         return rc;
 822 }
 823
 824 EXPORT_SYMBOL(unshare_files);
 825
 826 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 827 {
 828         struct sighand_struct *sig;
 829
 830         if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
 831                 atomic_inc(&current->sighand->count);
 832                 return 0;
 833         }
 834         sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
 835         tsk->sighand = sig;
 836         if (!sig)
 837                 return -ENOMEM;
 838         spin_lock_init(&sig->siglock);
 839         atomic_set(&sig->count, 1);
 840         memcpy(sig->action, current->sighand->action, sizeof(sig->action));
 841         return 0;
 842 }
 843
 844 static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
 845 {
 846         struct signal_struct *sig;
 847
 848         if (clone_flags & CLONE_THREAD) {
 849                 atomic_inc(&current->signal->count);
 850                 return 0;
 851         }
 852         sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
 853         tsk->signal = sig;
 854         if (!sig)
 855                 return -ENOMEM;
 856         atomic_set(&sig->count, 1);
 857         sig->group_exit = 0;
 858         sig->group_exit_code = 0;
 859         sig->group_exit_task = NULL;
 860         sig->group_stop_count = 0;
 861         sig->curr_target = NULL;
 862         init_sigpending(&sig->shared_pending);
 863         INIT_LIST_HEAD(&sig->posix_timers);
 864
 865         sig->tty = current->signal->tty;
 866         sig->pgrp = process_group(current);
 867         sig->session = current->signal->session;
 868         sig->leader = 0;        /* session leadership doesn't inherit */
 869         sig->tty_old_pgrp = 0;
 870
 871         return 0;
 872 }
 873
 874 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 875 {
 876         unsigned long new_flags = p->flags;
 877
 878         new_flags &= ~PF_SUPERPRIV;
 879         new_flags |= PF_FORKNOEXEC;
 880         if (!(clone_flags & CLONE_PTRACE))
 881                 p->ptrace = 0;
 882         p->flags = new_flags;
 883 }
 884
 885 asmlinkage long sys_set_tid_address(int __user *tidptr)
 886 {
 887         current->clear_child_tid = tidptr;
 888
 889         return current->pid;
 890 }
 891
 892 /*
 893  * This creates a new process as a copy of the old one,
 894  * but does not actually start it yet.
 895  *
 896  * It copies the registers, and all the appropriate
 897  * parts of the process environment (as per the clone
 898  * flags). The actual kick-off is left to the caller.
 899  */
 900 struct task_struct *copy_process(unsigned long clone_flags,
 901                                  unsigned long stack_start,
 902                                  struct pt_regs *regs,
 903                                  unsigned long stack_size,
 904                                  int __user *parent_tidptr,
 905                                  int __user *child_tidptr)
 906 {
 907         int retval;
 908         struct task_struct *p = NULL;
 909         struct vx_info *vxi;
 910
 911         if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 912                 return ERR_PTR(-EINVAL);
 913
 914         /*
 915          * Thread groups must share signals as well, and detached threads
 916          * can only be started up within the thread group.
 917          */
 918         if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
 919                 return ERR_PTR(-EINVAL);
 920
 921         /*
 922          * Shared signal handlers imply shared VM. By way of the above,
 923          * thread groups also imply shared VM. Blocking this case allows
 924          * for various simplifications in other code.
 925          */
 926         if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
 927                 return ERR_PTR(-EINVAL);
 928
 929         retval = security_task_create(clone_flags);
 930         if (retval)
 931                 goto fork_out;
 932
 933         retval = -ENOMEM;
 934         p = dup_task_struct(current);
 935         if (!p)
 936                 goto fork_out;
 937         p->tux_info = NULL;
 938
 939         p->vx_info = NULL;
 940         set_vx_info(&p->vx_info, current->vx_info);
 941         p->nx_info = NULL;
 942         set_nx_info(&p->nx_info, current->nx_info);
 943
 944         /* check vserver memory */
 945         if (p->mm && !(clone_flags & CLONE_VM)) {
 946                 if (vx_vmpages_avail(p->mm, p->mm->total_vm))
 947                         vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
 948                 else
 949                         goto bad_fork_free;
 950         }
 951         if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
 952                 if (!vx_rsspages_avail(p->mm, p->mm->rss))
 953                         goto bad_fork_cleanup_vm;
 954         }
 955
 956         retval = -EAGAIN;
 957         if (!vx_nproc_avail(1))
 958                 goto bad_fork_cleanup_vm;
 959
 960         if (atomic_read(&p->user->processes) >=
 961                         p->rlim[RLIMIT_NPROC].rlim_cur) {
 962                 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
 963                                 p->user != &root_user)
 964                         goto bad_fork_cleanup_vm;
 965         }
 966
 967         atomic_inc(&p->user->__count);
 968         atomic_inc(&p->user->processes);
 969         get_group_info(p->group_info);
 970
 971         /*
 972          * If multiple threads are within copy_process(), then this check
 973          * triggers too late. This doesn't hurt, the check is only there
 974          * to stop root fork bombs.
 975          */
 976         if (nr_threads >= max_threads)
 977                 goto bad_fork_cleanup_count;
 978
 979         if (!try_module_get(p->thread_info->exec_domain->module))
 980                 goto bad_fork_cleanup_count;
 981
 982         if (p->binfmt && !try_module_get(p->binfmt->module))
 983                 goto bad_fork_cleanup_put_domain;
 984
 985         init_delays(p);
 986         p->did_exec = 0;
 987         copy_flags(clone_flags, p);
 988         if (clone_flags & CLONE_IDLETASK)
 989                 p->pid = 0;
 990         else {
 991                 p->pid = alloc_pidmap();
 992                 if (p->pid == -1)
 993                         goto bad_fork_cleanup;
 994         }
 995         retval = -EFAULT;
 996         if (clone_flags & CLONE_PARENT_SETTID)
 997                 if (put_user(p->pid, parent_tidptr))
 998                         goto bad_fork_cleanup;
 999
1000         p->proc_dentry = NULL;
1001
1002         INIT_LIST_HEAD(&p->children);
1003         INIT_LIST_HEAD(&p->sibling);
1004         init_waitqueue_head(&p->wait_chldexit);
1005         p->vfork_done = NULL;
1006         spin_lock_init(&p->alloc_lock);
1007         spin_lock_init(&p->proc_lock);
1008
1009         clear_tsk_thread_flag(p, TIF_SIGPENDING);
1010         init_sigpending(&p->pending);
1011
1012         p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
1013         p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
1014         init_timer(&p->real_timer);
1015         p->real_timer.data = (unsigned long) p;
1016
1017         p->utime = p->stime = 0;
1018         p->cutime = p->cstime = 0;
1019         p->lock_depth = -1;             /* -1 = no lock */
1020         p->start_time = get_jiffies_64();
1021         p->security = NULL;
1022         p->io_context = NULL;
1023         p->audit_context = NULL;
1024 #ifdef CONFIG_NUMA
1025         p->mempolicy = mpol_copy(p->mempolicy);
1026         if (IS_ERR(p->mempolicy)) {
1027                 retval = PTR_ERR(p->mempolicy);
1028                 p->mempolicy = NULL;
1029                 goto bad_fork_cleanup;
1030         }
1031 #endif
1032
1033         if ((retval = security_task_alloc(p)))
1034                 goto bad_fork_cleanup_policy;
1035         if ((retval = audit_alloc(p)))
1036                 goto bad_fork_cleanup_security;
1037         /* copy all the process information */
1038         if ((retval = copy_semundo(clone_flags, p)))
1039                 goto bad_fork_cleanup_audit;
1040         if ((retval = copy_files(clone_flags, p)))
1041                 goto bad_fork_cleanup_semundo;
1042         if ((retval = copy_fs(clone_flags, p)))
1043                 goto bad_fork_cleanup_files;
1044         if ((retval = copy_sighand(clone_flags, p)))
1045                 goto bad_fork_cleanup_fs;
1046         if ((retval = copy_signal(clone_flags, p)))
1047                 goto bad_fork_cleanup_sighand;
1048         if ((retval = copy_mm(clone_flags, p)))
1049                 goto bad_fork_cleanup_signal;
1050         if ((retval = copy_namespace(clone_flags, p)))
1051                 goto bad_fork_cleanup_mm;
1052         retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
1053         if (retval)
1054                 goto bad_fork_cleanup_namespace;
1055
1056         p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1057         /*
1058          * Clear TID on mm_release()?
1059          */
1060         p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1061
1062         /*
1063          * Syscall tracing should be turned off in the child regardless
1064          * of CLONE_PTRACE.
1065          */
1066         clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
1067
1068         /* Our parent execution domain becomes current domain
1069            These must match for thread signalling to apply */
1070
1071         p->parent_exec_id = p->self_exec_id;
1072
1073         /* ok, now we should be set up.. */
1074         p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
1075         p->pdeath_signal = 0;
1076
1077         /* Perform scheduler related setup */
1078         sched_fork(p);
1079
1080         /*
1081          * Ok, make it visible to the rest of the system.
1082          * We dont wake it up yet.
1083          */
1084         p->tgid = p->pid;
1085         p->group_leader = p;
1086         INIT_LIST_HEAD(&p->ptrace_children);
1087         INIT_LIST_HEAD(&p->ptrace_list);
1088
1089         /* Need tasklist lock for parent etc handling! */
1090         write_lock_irq(&tasklist_lock);
1091         /*
1092          * Check for pending SIGKILL! The new thread should not be allowed
1093          * to slip out of an OOM kill. (or normal SIGKILL.)
1094          */
1095         if (sigismember(&current->pending.signal, SIGKILL)) {
1096                 write_unlock_irq(&tasklist_lock);
1097                 retval = -EINTR;
1098                 goto bad_fork_cleanup_namespace;
1099         }
1100
1101         /* CLONE_PARENT re-uses the old parent */
1102         if (clone_flags & CLONE_PARENT)
1103                 p->real_parent = current->real_parent;
1104         else
1105                 p->real_parent = current;
1106         p->parent = p->real_parent;
1107
1108         if (clone_flags & CLONE_THREAD) {
1109                 spin_lock(&current->sighand->siglock);
1110                 /*
1111                  * Important: if an exit-all has been started then
1112                  * do not create this new thread - the whole thread
1113                  * group is supposed to exit anyway.
1114                  */
1115                 if (current->signal->group_exit) {
1116                         spin_unlock(&current->sighand->siglock);
1117                         write_unlock_irq(&tasklist_lock);
1118                         retval = -EAGAIN;
1119                         goto bad_fork_cleanup_namespace;
1120                 }
1121                 p->tgid = current->tgid;
1122                 p->group_leader = current->group_leader;
1123
1124                 if (current->signal->group_stop_count > 0) {
1125                         /*
1126                          * There is an all-stop in progress for the group.
1127                          * We ourselves will stop as soon as we check signals.
1128                          * Make the new thread part of that group stop too.
1129                          */
1130                         current->signal->group_stop_count++;
1131                         set_tsk_thread_flag(p, TIF_SIGPENDING);
1132                 }
1133
1134                 spin_unlock(&current->sighand->siglock);
1135         }
1136
1137         SET_LINKS(p);
1138         if (p->ptrace & PT_PTRACED)
1139                 __ptrace_link(p, current->parent);
1140
1141         attach_pid(p, PIDTYPE_PID, p->pid);
1142         if (thread_group_leader(p)) {
1143                 attach_pid(p, PIDTYPE_TGID, p->tgid);
1144                 attach_pid(p, PIDTYPE_PGID, process_group(p));
1145                 attach_pid(p, PIDTYPE_SID, p->signal->session);
1146                 if (p->pid)
1147                         __get_cpu_var(process_counts)++;
1148         } else
1149                 link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
1150
1151         p->ioprio = current->ioprio;
1152         nr_threads++;
1153         /* p is copy of current */
1154         vxi = p->vx_info;
1155         if (vxi) {
1156                 atomic_inc(&vxi->cacct.nr_threads);
1157                 atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]);
1158         }
1159         write_unlock_irq(&tasklist_lock);
1160         retval = 0;
1161
1162 fork_out:
1163         if (retval)
1164                 return ERR_PTR(retval);
1165         return p;
1166
1167 bad_fork_cleanup_namespace:
1168         exit_namespace(p);
1169 bad_fork_cleanup_mm:
1170         exit_mm(p);
1171         if (p->active_mm)
1172                 mmdrop(p->active_mm);
1173 bad_fork_cleanup_signal:
1174         exit_signal(p);
1175 bad_fork_cleanup_sighand:
1176         exit_sighand(p);
1177 bad_fork_cleanup_fs:
1178         exit_fs(p); /* blocking */
1179 bad_fork_cleanup_files:
1180         exit_files(p); /* blocking */
1181 bad_fork_cleanup_semundo:
1182         exit_sem(p);
1183 bad_fork_cleanup_audit:
1184         audit_free(p);
1185 bad_fork_cleanup_security:
1186         security_task_free(p);
1187 bad_fork_cleanup_policy:
1188 #ifdef CONFIG_NUMA
1189         mpol_free(p->mempolicy);
1190 #endif
1191 bad_fork_cleanup:
1192         if (p->pid > 0)
1193                 free_pidmap(p->pid);
1194         if (p->binfmt)
1195                 module_put(p->binfmt->module);
1196 bad_fork_cleanup_put_domain:
1197         module_put(p->thread_info->exec_domain->module);
1198 bad_fork_cleanup_count:
1199         put_group_info(p->group_info);
1200         atomic_dec(&p->user->processes);
1201         free_uid(p->user);
1202 bad_fork_cleanup_vm:
1203         if (p->mm && !(clone_flags & CLONE_VM))
1204                 vx_pages_sub(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
1205 bad_fork_free:
1206         free_task(p);
1207         goto fork_out;
1208 }
1209
1210 static inline int fork_traceflag (unsigned clone_flags)
1211 {
1212         if (clone_flags & (CLONE_UNTRACED | CLONE_IDLETASK))
1213                 return 0;
1214         else if (clone_flags & CLONE_VFORK) {
1215                 if (current->ptrace & PT_TRACE_VFORK)
1216                         return PTRACE_EVENT_VFORK;
1217         } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
1218                 if (current->ptrace & PT_TRACE_CLONE)
1219                         return PTRACE_EVENT_CLONE;
1220         } else if (current->ptrace & PT_TRACE_FORK)
1221                 return PTRACE_EVENT_FORK;
1222
1223         return 0;
1224 }
1225
1226 /*
1227  *  Ok, this is the main fork-routine.
1228  *
1229  * It copies the process, and if successful kick-starts
1230  * it and waits for it to finish using the VM if required.
1231  */
1232 long do_fork(unsigned long clone_flags,
1233               unsigned long stack_start,
1234               struct pt_regs *regs,
1235               unsigned long stack_size,
1236               int __user *parent_tidptr,
1237               int __user *child_tidptr)
1238 {
1239         struct task_struct *p;
1240         int trace = 0;
1241         long pid;
1242
1243         if (unlikely(current->ptrace)) {
1244                 trace = fork_traceflag (clone_flags);
1245                 if (trace)
1246                         clone_flags |= CLONE_PTRACE;
1247         }
1248
1249 #ifdef CONFIG_CKRM_TYPE_TASKCLASS
1250         if (numtasks_get_ref(current->taskclass, 0) == 0) {
1251                 return -ENOMEM;
1252         }
1253 #endif
1254
1255         p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr);
1256         /*
1257          * Do this prior waking up the new thread - the thread pointer
1258          * might get invalid after that point, if the thread exits quickly.
1259          */
1260         pid = IS_ERR(p) ? PTR_ERR(p) : p->pid;
1261
1262         if (!IS_ERR(p)) {
1263                 struct completion vfork;
1264
1265                 ckrm_cb_fork(p);
1266
1267                 if (clone_flags & CLONE_VFORK) {
1268                         p->vfork_done = &vfork;
1269                         init_completion(&vfork);
1270                 }
1271
1272                 if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
1273                         /*
1274                          * We'll start up with an immediate SIGSTOP.
1275                          */
1276                         sigaddset(&p->pending.signal, SIGSTOP);
1277                         set_tsk_thread_flag(p, TIF_SIGPENDING);
1278                 }
1279
1280                 if (!(clone_flags & CLONE_STOPPED)) {
1281                         /*
1282                          * Do the wakeup last. On SMP we treat fork() and
1283                          * CLONE_VM separately, because fork() has already
1284                          * created cache footprint on this CPU (due to
1285                          * copying the pagetables), hence migration would
1286                          * probably be costy. Threads on the other hand
1287                          * have less traction to the current CPU, and if
1288                          * there's an imbalance then the scheduler can
1289                          * migrate this fresh thread now, before it
1290                          * accumulates a larger cache footprint:
1291                          */
1292                         if (clone_flags & CLONE_VM)
1293                                 wake_up_forked_thread(p);
1294                         else
1295                                 wake_up_forked_process(p);
1296                 } else {
1297                         int cpu = get_cpu();
1298
1299                         p->state = TASK_STOPPED;
1300                         if (cpu_is_offline(task_cpu(p)))
1301                                 set_task_cpu(p, cpu);
1302
1303                         put_cpu();
1304                 }
1305                 ++total_forks;
1306
1307                 if (unlikely (trace)) {
1308                         current->ptrace_message = pid;
1309                         ptrace_notify ((trace << 8) | SIGTRAP);
1310                 }
1311
1312                 if (clone_flags & CLONE_VFORK) {
1313                         wait_for_completion(&vfork);
1314                         if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
1315                                 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1316                 } else
1317                         /*
1318                          * Let the child process run first, to avoid most of the
1319                          * COW overhead when the child exec()s afterwards.
1320                          */
1321                         set_need_resched();
1322         } else {
1323 #ifdef CONFIG_CKRM_TYPE_TASKCLASS
1324                 numtasks_put_ref(current->taskclass);
1325 #endif
1326         }
1327         return pid;
1328 }
1329
1330 /* SLAB cache for signal_struct structures (tsk->signal) */
1331 kmem_cache_t *signal_cachep;
1332
1333 /* SLAB cache for sighand_struct structures (tsk->sighand) */
1334 kmem_cache_t *sighand_cachep;
1335
1336 /* SLAB cache for files_struct structures (tsk->files) */
1337 kmem_cache_t *files_cachep;
1338
1339 /* SLAB cache for fs_struct structures (tsk->fs) */
1340 kmem_cache_t *fs_cachep;
1341
1342 /* SLAB cache for vm_area_struct structures */
1343 kmem_cache_t *vm_area_cachep;
1344
1345 /* SLAB cache for mm_struct structures (tsk->mm) */
1346 kmem_cache_t *mm_cachep;
1347
1348 void __init proc_caches_init(void)
1349 {
1350         sighand_cachep = kmem_cache_create("sighand_cache",
1351                         sizeof(struct sighand_struct), 0,
1352                         SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1353         signal_cachep = kmem_cache_create("signal_cache",
1354                         sizeof(struct signal_struct), 0,
1355                         SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1356         files_cachep = kmem_cache_create("files_cache",
1357                         sizeof(struct files_struct), 0,
1358                         SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1359         fs_cachep = kmem_cache_create("fs_cache",
1360                         sizeof(struct fs_struct), 0,
1361                         SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1362         vm_area_cachep = kmem_cache_create("vm_area_struct",
1363                         sizeof(struct vm_area_struct), 0,
1364                         SLAB_PANIC, NULL, NULL);
1365         mm_cachep = kmem_cache_create("mm_struct",
1366                         sizeof(struct mm_struct), 0,
1367                         SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1368 }