#include <linux/mman.h>
#include <linux/fs.h>
#include <linux/cpu.h>
+#include <linux/cpuset.h>
#include <linux/security.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
#include <linux/profile.h>
#include <linux/rmap.h>
+#include <linux/acct.h>
#include <linux/vs_network.h>
#include <linux/vs_limit.h>
#include <linux/vs_memory.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-/* The idle threads do not count..
- * Protected by write_lock_irq(&tasklist_lock)
+/*
+ * Protected counters by write_lock_irq(&tasklist_lock)
*/
-int nr_threads;
-
-int max_threads;
unsigned long total_forks; /* Handle normal Linux uptimes. */
+int nr_threads; /* The idle threads do not count.. */
+
+int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
-rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
+ __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
EXPORT_SYMBOL(tasklist_lock);
static kmem_cache_t *task_struct_cachep;
#endif
+/* SLAB cache for signal_struct structures (tsk->signal) */
+kmem_cache_t *signal_cachep;
+
+/* SLAB cache for sighand_struct structures (tsk->sighand) */
+kmem_cache_t *sighand_cachep;
+
+/* SLAB cache for files_struct structures (tsk->files) */
+kmem_cache_t *files_cachep;
+
+/* SLAB cache for fs_struct structures (tsk->fs) */
+kmem_cache_t *fs_cachep;
+
+/* SLAB cache for vm_area_struct structures */
+kmem_cache_t *vm_area_cachep;
+
+/* SLAB cache for mm_struct structures (tsk->mm) */
+static kmem_cache_t *mm_cachep;
+
void free_task(struct task_struct *tsk)
{
free_thread_info(tsk->thread_info);
init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
+ init_task.signal->rlim[RLIMIT_SIGPENDING] =
+ init_task.signal->rlim[RLIMIT_NPROC];
}
static struct task_struct *dup_task_struct(struct task_struct *orig)
mm->mmap_cache = NULL;
mm->free_area_cache = oldmm->mmap_base;
mm->map_count = 0;
- mm->rss = 0;
- mm->anon_rss = 0;
+ __set_mm_counter(mm, rss, 0);
+ __set_mm_counter(mm, anon_rss, 0);
cpus_clear(mm->cpu_vm_mask);
mm->mm_rb = RB_ROOT;
rb_link = &mm->mm_rb.rb_node;
/* insert tmp into the share list, just after mpnt */
spin_lock(&file->f_mapping->i_mmap_lock);
+ tmp->vm_truncate_count = mpnt->vm_truncate_count;
flush_dcache_mmap_lock(file->f_mapping);
vma_prio_tree_add(tmp, mpnt);
flush_dcache_mmap_unlock(file->f_mapping);
#define mm_free_pgd(mm)
#endif /* CONFIG_MMU */
-spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
if (retval)
goto free_pt;
+ mm->hiwater_rss = get_mm_counter(mm,rss);
+ mm->hiwater_vm = mm->total_vm;
+
good_mm:
tsk->mm = mm;
tsk->active_mm = mm;
* If init_new_context() failed, we cannot use mmput() to free the mm
* because it calls destroy_context()
*/
+ clr_vx_info(&mm->mm_vx_info);
mm_free_pgd(mm);
free_mm(mm);
return retval;
{
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
- int open_files, nfds, size, i, error = 0;
+ int open_files, size, i, error = 0, expand;
/*
* A background process may not have any files ...
newf->open_fds = &newf->open_fds_init;
newf->fd = &newf->fd_array[0];
- /* We don't yet have the oldf readlock, but even if the old
- fdset gets grown now, we'll only copy up to "size" fds */
- size = oldf->max_fdset;
- if (size > __FD_SETSIZE) {
- newf->max_fdset = 0;
- spin_lock(&newf->file_lock);
- error = expand_fdset(newf, size-1);
- spin_unlock(&newf->file_lock);
- if (error)
- goto out_release;
- }
spin_lock(&oldf->file_lock);
- open_files = count_open_files(oldf, size);
+ open_files = count_open_files(oldf, oldf->max_fdset);
+ expand = 0;
/*
- * Check whether we need to allocate a larger fd array.
- * Note: we're not a clone task, so the open count won't
- * change.
+ * Check whether we need to allocate a larger fd array or fd set.
+ * Note: we're not a clone task, so the open count won't change.
*/
- nfds = NR_OPEN_DEFAULT;
- if (open_files > nfds) {
- spin_unlock(&oldf->file_lock);
+ if (open_files > newf->max_fdset) {
+ newf->max_fdset = 0;
+ expand = 1;
+ }
+ if (open_files > newf->max_fds) {
newf->max_fds = 0;
+ expand = 1;
+ }
+
+ /* if the old fdset gets grown now, we'll only copy up to "size" fds */
+ if (expand) {
+ spin_unlock(&oldf->file_lock);
spin_lock(&newf->file_lock);
- error = expand_fd_array(newf, open_files-1);
+ error = expand_files(newf, open_files-1);
spin_unlock(&newf->file_lock);
- if (error)
+ if (error < 0)
goto out_release;
- nfds = newf->max_fds;
spin_lock(&oldf->file_lock);
}
struct file *f = *old_fds++;
if (f) {
get_file(f);
+ /* FIXME sum it first for check and performance */
+ vx_openfd_inc(open_files - i);
} else {
/*
* The fd may be claimed in the fd bitmap but not yet
out_release:
free_fdset (newf->close_on_exec, newf->max_fdset);
free_fdset (newf->open_fds, newf->max_fdset);
+ free_fd_array(newf->fd, newf->max_fds);
kmem_cache_free(files_cachep, newf);
goto out;
}
static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
{
struct signal_struct *sig;
+ int ret;
if (clone_flags & CLONE_THREAD) {
atomic_inc(¤t->signal->count);
tsk->signal = sig;
if (!sig)
return -ENOMEM;
+
+ ret = copy_thread_group_keys(tsk);
+ if (ret < 0) {
+ kmem_cache_free(signal_cachep, sig);
+ return ret;
+ }
+
atomic_set(&sig->count, 1);
atomic_set(&sig->live, 1);
- sig->group_exit = 0;
+ init_waitqueue_head(&sig->wait_chldexit);
+ sig->flags = 0;
sig->group_exit_code = 0;
sig->group_exit_task = NULL;
sig->group_stop_count = 0;
- sig->stop_state = 0;
sig->curr_target = NULL;
init_sigpending(&sig->shared_pending);
INIT_LIST_HEAD(&sig->posix_timers);
+ sig->it_real_value = sig->it_real_incr = 0;
+ sig->real_timer.function = it_real_fn;
+ sig->real_timer.data = (unsigned long) tsk;
+ init_timer(&sig->real_timer);
+
+ sig->it_virt_expires = cputime_zero;
+ sig->it_virt_incr = cputime_zero;
+ sig->it_prof_expires = cputime_zero;
+ sig->it_prof_incr = cputime_zero;
+
sig->tty = current->signal->tty;
sig->pgrp = process_group(current);
sig->session = current->signal->session;
sig->leader = 0; /* session leadership doesn't inherit */
sig->tty_old_pgrp = 0;
- sig->utime = sig->stime = sig->cutime = sig->cstime = 0;
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+ sig->sched_time = 0;
+ INIT_LIST_HEAD(&sig->cpu_timers[0]);
+ INIT_LIST_HEAD(&sig->cpu_timers[1]);
+ INIT_LIST_HEAD(&sig->cpu_timers[2]);
task_lock(current->group_leader);
memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
task_unlock(current->group_leader);
+ if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+ /*
+ * New sole thread in the process gets an expiry time
+ * of the whole CPU time limit.
+ */
+ tsk->it_prof_expires =
+ secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+ }
+
return 0;
}
int retval;
struct task_struct *p = NULL;
struct vx_info *vxi;
+ struct nx_info *nxi;
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
goto bad_fork_free;
}
if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
- if (!vx_rsspages_avail(p->mm, p->mm->rss))
+ if (!vx_rsspages_avail(p->mm, get_mm_counter(p->mm, rss)))
goto bad_fork_cleanup_vm;
}
goto bad_fork_free;
}
if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
- if (!vx_rsspages_avail(p->mm, p->mm->rss))
+ if (!vx_rsspages_avail(p->mm, get_mm_counter(p->mm, rss)))
+ goto bad_fork_cleanup_vm;
+ }
+
+ init_vx_info(&p->vx_info, current->vx_info);
+ p->nx_info = NULL;
+ set_nx_info(&p->nx_info, current->nx_info);
+
+ /* check vserver memory */
+ if (p->mm && !(clone_flags & CLONE_VM)) {
+ if (vx_vmpages_avail(p->mm, p->mm->total_vm))
+ vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
+ else
+ goto bad_fork_free;
+ }
+ if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
+ if (!vx_rsspages_avail(p->mm, get_mm_counter(p->mm, rss)))
+ goto bad_fork_cleanup_vm;
+ }
+
+ init_vx_info(&p->vx_info, current->vx_info);
+ init_nx_info(&p->nx_info, current->nx_info);
+
+ /* check vserver memory */
+ if (p->mm && !(clone_flags & CLONE_VM)) {
+ if (vx_vmpages_avail(p->mm, p->mm->total_vm))
+ vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
+ else
+ goto bad_fork_free;
+ }
+ if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
+ if (!vx_rsspages_avail(p->mm, get_mm_counter(p->mm, rss)))
+ goto bad_fork_cleanup_vm;
+ }
+
+ init_vx_info(&p->vx_info, current->vx_info);
+ init_nx_info(&p->nx_info, current->nx_info);
+
+ /* check vserver memory */
+ if (p->mm && !(clone_flags & CLONE_VM)) {
+ if (vx_vmpages_avail(p->mm, p->mm->total_vm))
+ vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
+ else
+ goto bad_fork_free;
+ }
+ if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
+ if (!vx_rsspages_avail(p->mm, get_mm_counter(p->mm, rss)))
goto bad_fork_cleanup_vm;
}
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
- init_waitqueue_head(&p->wait_chldexit);
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);
spin_lock_init(&p->proc_lock);
clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending);
- p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
- p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
- init_timer(&p->real_timer);
- p->real_timer.data = (unsigned long) p;
+ p->utime = cputime_zero;
+ p->stime = cputime_zero;
+ p->sched_time = 0;
+ p->rchar = 0; /* I/O counter: bytes read */
+ p->wchar = 0; /* I/O counter: bytes written */
+ p->syscr = 0; /* I/O counter: read syscalls */
+ p->syscw = 0; /* I/O counter: write syscalls */
+ acct_clear_integrals(p);
+
+ p->it_virt_expires = cputime_zero;
+ p->it_prof_expires = cputime_zero;
+ p->it_sched_expires = 0;
+ INIT_LIST_HEAD(&p->cpu_timers[0]);
+ INIT_LIST_HEAD(&p->cpu_timers[1]);
+ INIT_LIST_HEAD(&p->cpu_timers[2]);
- p->utime = p->stime = 0;
p->lock_depth = -1; /* -1 = no lock */
do_posix_clock_monotonic_gettime(&p->start_time);
p->security = NULL;
* do not create this new thread - the whole thread
* group is supposed to exit anyway.
*/
- if (current->signal->group_exit) {
+ if (current->signal->flags & SIGNAL_GROUP_EXIT) {
spin_unlock(¤t->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -EAGAIN;
set_tsk_thread_flag(p, TIF_SIGPENDING);
}
+ if (!cputime_eq(current->signal->it_virt_expires,
+ cputime_zero) ||
+ !cputime_eq(current->signal->it_prof_expires,
+ cputime_zero) ||
+ current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
+ !list_empty(¤t->signal->cpu_timers[0]) ||
+ !list_empty(¤t->signal->cpu_timers[1]) ||
+ !list_empty(¤t->signal->cpu_timers[2])) {
+ /*
+ * Have child wake up on its first tick to check
+ * for process CPU timers.
+ */
+ p->it_prof_expires = jiffies_to_cputime(1);
+ }
+
spin_unlock(¤t->sighand->siglock);
}
if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);
+ cpuset_fork(p);
+
attach_pid(p, PIDTYPE_PID, p->pid);
attach_pid(p, PIDTYPE_TGID, p->tgid);
if (thread_group_leader(p)) {
p->ioprio = current->ioprio;
nr_threads++;
+ total_forks++;
+
/* p is copy of current */
vxi = p->vx_info;
if (vxi) {
+ claim_vx_info(vxi, p);
atomic_inc(&vxi->cvirt.nr_threads);
+ atomic_inc(&vxi->cvirt.total_forks);
vx_nproc_inc(p);
}
+ nxi = p->nx_info;
+ if (nxi)
+ claim_nx_info(nxi, p);
write_unlock_irq(&tasklist_lock);
retval = 0;
wake_up_new_task(p, clone_flags);
else
p->state = TASK_STOPPED;
- ++total_forks;
if (unlikely (trace)) {
current->ptrace_message = pid;
return pid;
}
-/* SLAB cache for signal_struct structures (tsk->signal) */
-kmem_cache_t *signal_cachep;
-
-/* SLAB cache for sighand_struct structures (tsk->sighand) */
-kmem_cache_t *sighand_cachep;
-
-/* SLAB cache for files_struct structures (tsk->files) */
-kmem_cache_t *files_cachep;
-
-/* SLAB cache for fs_struct structures (tsk->fs) */
-kmem_cache_t *fs_cachep;
-
-/* SLAB cache for vm_area_struct structures */
-kmem_cache_t *vm_area_cachep;
-
-/* SLAB cache for mm_struct structures (tsk->mm) */
-kmem_cache_t *mm_cachep;
-
void __init proc_caches_init(void)
{
sighand_cachep = kmem_cache_create("sighand_cache",