#include <linux/timer.h>
#include <asm/processor.h>
+#include <linux/vserver/context.h>
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_STOPPED 4
#define TASK_ZOMBIE 8
#define TASK_DEAD 16
+#define TASK_ONHOLD 32
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
typedef struct task_struct task_t;
extern void sched_init(void);
+extern void sched_init_smp(void);
extern void init_idle(task_t *idle, int cpu);
-extern cpumask_t idle_cpu_mask;
+extern cpumask_t nohz_cpu_mask;
extern void show_state(void);
extern void show_regs(struct pt_regs *);
extern void cpu_init (void);
extern void trap_init(void);
extern void update_process_times(int user);
-extern void update_one_process(struct task_struct *p, unsigned long user,
- unsigned long system, int cpu);
extern void scheduler_tick(int user_tick, int system);
extern unsigned long cache_decay_ticks;
-extern const unsigned long scheduling_functions_start_here;
-extern const unsigned long scheduling_functions_end_here;
+/* Attach to any functions which should be ignored in wchan output. */
+#define __sched __attribute__((__section__(".sched.text")))
+/* Is this address in the __sched functions? */
+extern int in_sched_functions(unsigned long addr);
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
extern signed long FASTCALL(schedule_timeout(signed long timeout));
unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
unsigned dumpable:1;
-#ifdef CONFIG_HUGETLB_PAGE
- int used_hugetlb;
-#endif
cpumask_t cpu_vm_mask;
/* Architecture-specific MM context */
mm_context_t context;
+ struct vx_info *mm_vx_info;
/* coredumping support */
int core_waiters;
* in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values
* are inverted: lower p->prio value means higher priority.
*
- * The MAX_RT_USER_PRIO value allows the actual maximum
+ * The MAX_USER_RT_PRIO value allows the actual maximum
* RT priority to be separate from the value exported to
* user-space. This allows kernel threads to set their
* priority to a value higher than any user task. Note:
atomic_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t files; /* How many open files does this user have? */
+ atomic_t sigpending; /* How many pending signals does this user have? */
+ /* protected by mq_lock */
+ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
/* Hash table maintenance information */
struct list_head uidhash_list;
uid_t uid;
+ xid_t xid;
};
-extern struct user_struct *find_user(uid_t);
+extern struct user_struct *find_user(xid_t, uid_t);
extern struct user_struct root_user;
#define INIT_USER (&root_user)
struct task_struct *it_process; /* process to send signal to */
struct timer_list it_timer;
struct sigqueue *sigq; /* signal queue entry. */
+ struct list_head abs_timer_entry; /* clock abs_timer_list */
+ struct timespec wall_to_prev; /* wall_to_monotonic used when set */
};
void exit_io_context(void);
#define NGROUPS_SMALL 32
-#define NGROUPS_PER_BLOCK ((int)(EXEC_PAGESIZE / sizeof(gid_t)))
+#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t)))
struct group_info {
int ngroups;
atomic_t usage;
gid_t *blocks[0];
};
+/*
+ * get_group_info() must be called with the owning task locked (via task_lock())
+ * when task != current. The reason being that the vast majority of callers are
+ * looking at current->group_info, which can not be changed except by the
+ * current task. Changing current->group_info requires the task lock, too.
+ */
#define get_group_info(group_info) do { \
atomic_inc(&(group_info)->usage); \
} while (0)
struct audit_context; /* See audit.c */
+struct mempolicy;
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
unsigned int time_slice, first_time_slice;
struct list_head tasks;
+ /*
+ * ptrace_list/ptrace_children forms the list of my children
+ * that were stolen by a ptracer.
+ */
struct list_head ptrace_children;
struct list_head ptrace_list;
*/
struct task_struct *real_parent; /* real parent process (when being debugged) */
struct task_struct *parent; /* parent process */
+ /*
+ * children/sibling forms the list of my children plus the
+ * tasks I'm ptracing.
+ */
struct list_head children; /* list of my children */
struct list_head sibling; /* linkage in my parent's children list */
struct task_struct *group_leader; /* threadgroup leader */
void *security;
struct audit_context *audit_context;
+/* vserver context data */
+ xid_t xid;
+ struct vx_info *vx_info;
+
+/* vserver network data */
+ nid_t nid;
+ struct nx_info *nx_info;
+
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
+
+#ifdef CONFIG_NUMA
+ struct mempolicy *mempolicy;
+ short il_next; /* could be shared with used_math */
+#endif
};
static inline pid_t process_group(struct task_struct *tsk)
#define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */
#ifdef CONFIG_SMP
+#define SCHED_LOAD_SCALE 128UL /* increase resolution of load */
+
+#define SD_BALANCE_NEWIDLE 1 /* Balance when about to become idle */
+#define SD_BALANCE_EXEC 2 /* Balance on exec */
+#define SD_BALANCE_CLONE 4 /* Balance on clone */
+#define SD_WAKE_IDLE 8 /* Wake to idle CPU on task wakeup */
+#define SD_WAKE_AFFINE 16 /* Wake task to waking CPU */
+#define SD_WAKE_BALANCE 32 /* Perform balancing at task wakeup */
+#define SD_SHARE_CPUPOWER 64 /* Domain members share cpu power */
+
+struct sched_group {
+ struct sched_group *next; /* Must be a circular list */
+ cpumask_t cpumask;
+
+ /*
+ * CPU power of this group, SCHED_LOAD_SCALE being max power for a
+ * single CPU. This should be read only (except for setup). Although
+ * it will need to be written to at cpu hot(un)plug time, perhaps the
+ * cpucontrol semaphore will provide enough exclusion?
+ */
+ unsigned long cpu_power;
+};
+
+struct sched_domain {
+ /* These fields must be setup */
+ struct sched_domain *parent; /* top domain must be null terminated */
+ struct sched_group *groups; /* the balancing groups of the domain */
+ cpumask_t span; /* span of all CPUs in this domain */
+ unsigned long min_interval; /* Minimum balance interval ms */
+ unsigned long max_interval; /* Maximum balance interval ms */
+ unsigned int busy_factor; /* less balancing by factor if busy */
+ unsigned int imbalance_pct; /* No balance until over watermark */
+ unsigned long long cache_hot_time; /* Task considered cache hot (ns) */
+ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
+ unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */
+ int flags; /* See SD_* */
+
+ /* Runtime fields. */
+ unsigned long last_balance; /* init to jiffies. units in jiffies */
+ unsigned int balance_interval; /* initialise to 1. units in ms. */
+ unsigned int nr_balance_failed; /* initialise to 0 */
+};
+
+/* Common values for SMT siblings */
+#define SD_SIBLING_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 1, \
+ .max_interval = 2, \
+ .busy_factor = 8, \
+ .imbalance_pct = 110, \
+ .cache_hot_time = 0, \
+ .cache_nice_tries = 0, \
+ .per_cpu_gain = 15, \
+ .flags = SD_BALANCE_NEWIDLE \
+ | SD_BALANCE_EXEC \
+ | SD_BALANCE_CLONE \
+ | SD_WAKE_AFFINE \
+ | SD_WAKE_IDLE \
+ | SD_SHARE_CPUPOWER, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
+
+/* Common values for CPUs */
+#define SD_CPU_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 1, \
+ .max_interval = 4, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (5*1000000/2), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_BALANCE_NEWIDLE \
+ | SD_BALANCE_EXEC \
+ | SD_BALANCE_CLONE \
+ | SD_WAKE_AFFINE \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
+
+#ifdef CONFIG_NUMA
+/* Common values for NUMA nodes */
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 8, \
+ .max_interval = 32, \
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000000), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_BALANCE_EXEC \
+ | SD_BALANCE_CLONE \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
+#endif
+
+extern void cpu_attach_domain(struct sched_domain *sd, int cpu);
+
extern int set_cpus_allowed(task_t *p, cpumask_t new_mask);
#else
static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask)
extern unsigned long long sched_clock(void);
-#ifdef CONFIG_NUMA
+#ifdef CONFIG_SMP
extern void sched_balance_exec(void);
-extern void node_nr_running_init(void);
#else
#define sched_balance_exec() {}
-#define node_nr_running_init() {}
#endif
-/* Move tasks off this (offline) CPU onto another. */
-extern void migrate_all_tasks(void);
+extern void sched_idle_next(void);
extern void set_user_nice(task_t *p, long nice);
-extern int task_prio(task_t *p);
-extern int task_nice(task_t *p);
-extern int task_curr(task_t *p);
+extern int task_prio(const task_t *p);
+extern int task_nice(const task_t *p);
+extern int task_curr(const task_t *p);
extern int idle_cpu(int cpu);
void yield(void);
extern void __set_special_pids(pid_t session, pid_t pgrp);
/* per-UID process charging. */
-extern struct user_struct * alloc_uid(uid_t);
+extern struct user_struct * alloc_uid(xid_t, uid_t);
+static inline struct user_struct *get_uid(struct user_struct *u)
+{
+ atomic_inc(&u->__count);
+ return u;
+}
extern void free_uid(struct user_struct *);
extern void switch_uid(struct user_struct *);
extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
#ifdef CONFIG_SMP
extern void kick_process(struct task_struct *tsk);
+ extern void FASTCALL(wake_up_forked_thread(struct task_struct * tsk));
#else
static inline void kick_process(struct task_struct *tsk) { }
+ static inline void wake_up_forked_thread(struct task_struct * tsk)
+ {
+ wake_up_forked_process(tsk);
+ }
#endif
-extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
extern void FASTCALL(sched_fork(task_t * p));
extern void FASTCALL(sched_exit(task_t * p));
#define while_each_thread(g, t) \
while ((t = next_thread(t)) != g)
-extern task_t * FASTCALL(next_thread(task_t *p));
+extern task_t * FASTCALL(next_thread(const task_t *p));
#define thread_group_leader(p) (p->pid == p->tgid)
extern void unhash_process(struct task_struct *p);
/*
- * Protects ->fs, ->files, ->mm, ->ptrace and synchronises with wait4().
+ * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info and synchronises with
+ * wait4().
+ *
* Nests both inside and outside of read_lock(&tasklist_lock).
* It must not be nested with write_lock_irq(&tasklist_lock),
* neither inside nor outside.
*/
#ifdef CONFIG_SMP
-static inline unsigned int task_cpu(struct task_struct *p)
+static inline unsigned int task_cpu(const struct task_struct *p)
{
return p->thread_info->cpu;
}
#else
-static inline unsigned int task_cpu(struct task_struct *p)
+static inline unsigned int task_cpu(const struct task_struct *p)
{
return 0;
}