X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Flinux%2Fsched.h;h=7fc52d7b5f5fa094dc45cea4165aca98e94f52d9;hb=9bf4aaab3e101692164d49b7ca357651eb691cb6;hp=73d7127e3f282e22f9685dacf0eaab5ff99180db;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/include/linux/sched.h b/include/linux/sched.h index 73d7127e3..7fc52d7b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -102,6 +102,7 @@ extern unsigned long nr_iowait(void); #include #include +#include #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 @@ -109,6 +110,7 @@ extern unsigned long nr_iowait(void); #define TASK_STOPPED 4 #define TASK_ZOMBIE 8 #define TASK_DEAD 16 +#define TASK_ONHOLD 32 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -147,9 +149,10 @@ extern spinlock_t mmlist_lock; typedef struct task_struct task_t; extern void sched_init(void); +extern void sched_init_smp(void); extern void init_idle(task_t *idle, int cpu); -extern cpumask_t idle_cpu_mask; +extern cpumask_t nohz_cpu_mask; extern void show_state(void); extern void show_regs(struct pt_regs *); @@ -167,13 +170,13 @@ long io_schedule_timeout(long timeout); extern void cpu_init (void); extern void trap_init(void); extern void update_process_times(int user); -extern void update_one_process(struct task_struct *p, unsigned long user, - unsigned long system, int cpu); extern void scheduler_tick(int user_tick, int system); extern unsigned long cache_decay_ticks; -extern const unsigned long scheduling_functions_start_here; -extern const unsigned long scheduling_functions_end_here; +/* Attach to any functions which should be ignored in wchan output. */ +#define __sched __attribute__((__section__(".sched.text"))) +/* Is this address in the __sched functions? */ +extern int in_sched_functions(unsigned long addr); #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); @@ -214,13 +217,11 @@ struct mm_struct { unsigned long saved_auxv[40]; /* for /proc/PID/auxv */ unsigned dumpable:1; -#ifdef CONFIG_HUGETLB_PAGE - int used_hugetlb; -#endif cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ mm_context_t context; + struct vx_info *mm_vx_info; /* coredumping support */ int core_waiters; @@ -290,7 +291,7 @@ struct signal_struct { * in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values * are inverted: lower p->prio value means higher priority. * - * The MAX_RT_USER_PRIO value allows the actual maximum + * The MAX_USER_RT_PRIO value allows the actual maximum * RT priority to be separate from the value exported to * user-space. This allows kernel threads to set their * priority to a value higher than any user task. Note: @@ -311,13 +312,17 @@ struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t files; /* How many open files does this user have? */ + atomic_t sigpending; /* How many pending signals does this user have? */ + /* protected by mq_lock */ + unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ /* Hash table maintenance information */ struct list_head uidhash_list; uid_t uid; + xid_t xid; }; -extern struct user_struct *find_user(uid_t); +extern struct user_struct *find_user(xid_t, uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -342,6 +347,8 @@ struct k_itimer { struct task_struct *it_process; /* process to send signal to */ struct timer_list it_timer; struct sigqueue *sigq; /* signal queue entry. */ + struct list_head abs_timer_entry; /* clock abs_timer_list */ + struct timespec wall_to_prev; /* wall_to_monotonic used when set */ }; @@ -349,7 +356,7 @@ struct io_context; /* See blkdev.h */ void exit_io_context(void); #define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((int)(EXEC_PAGESIZE / sizeof(gid_t))) +#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t))) struct group_info { int ngroups; atomic_t usage; @@ -358,6 +365,12 @@ struct group_info { gid_t *blocks[0]; }; +/* + * get_group_info() must be called with the owning task locked (via task_lock()) + * when task != current. The reason being that the vast majority of callers are + * looking at current->group_info, which can not be changed except by the + * current task. Changing current->group_info requires the task lock, too. + */ #define get_group_info(group_info) do { \ atomic_inc(&(group_info)->usage); \ } while (0) @@ -376,6 +389,7 @@ int set_current_groups(struct group_info *group_info); struct audit_context; /* See audit.c */ +struct mempolicy; struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ @@ -400,6 +414,10 @@ struct task_struct { unsigned int time_slice, first_time_slice; struct list_head tasks; + /* + * ptrace_list/ptrace_children forms the list of my children + * that were stolen by a ptracer. + */ struct list_head ptrace_children; struct list_head ptrace_list; @@ -421,6 +439,10 @@ struct task_struct { */ struct task_struct *real_parent; /* real parent process (when being debugged) */ struct task_struct *parent; /* parent process */ + /* + * children/sibling forms the list of my children plus the + * tasks I'm ptracing. + */ struct list_head children; /* list of my children */ struct list_head sibling; /* linkage in my parent's children list */ struct task_struct *group_leader; /* threadgroup leader */ @@ -481,6 +503,14 @@ struct task_struct { void *security; struct audit_context *audit_context; +/* vserver context data */ + xid_t xid; + struct vx_info *vx_info; + +/* vserver network data */ + nid_t nid; + struct nx_info *nx_info; + /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; @@ -504,6 +534,11 @@ struct task_struct { unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ + +#ifdef CONFIG_NUMA + struct mempolicy *mempolicy; + short il_next; /* could be shared with used_math */ +#endif }; static inline pid_t process_group(struct task_struct *tsk) @@ -542,6 +577,118 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ #ifdef CONFIG_SMP +#define SCHED_LOAD_SCALE 128UL /* increase resolution of load */ + +#define SD_BALANCE_NEWIDLE 1 /* Balance when about to become idle */ +#define SD_BALANCE_EXEC 2 /* Balance on exec */ +#define SD_BALANCE_CLONE 4 /* Balance on clone */ +#define SD_WAKE_IDLE 8 /* Wake to idle CPU on task wakeup */ +#define SD_WAKE_AFFINE 16 /* Wake task to waking CPU */ +#define SD_WAKE_BALANCE 32 /* Perform balancing at task wakeup */ +#define SD_SHARE_CPUPOWER 64 /* Domain members share cpu power */ + +struct sched_group { + struct sched_group *next; /* Must be a circular list */ + cpumask_t cpumask; + + /* + * CPU power of this group, SCHED_LOAD_SCALE being max power for a + * single CPU. This should be read only (except for setup). Although + * it will need to be written to at cpu hot(un)plug time, perhaps the + * cpucontrol semaphore will provide enough exclusion? + */ + unsigned long cpu_power; +}; + +struct sched_domain { + /* These fields must be setup */ + struct sched_domain *parent; /* top domain must be null terminated */ + struct sched_group *groups; /* the balancing groups of the domain */ + cpumask_t span; /* span of all CPUs in this domain */ + unsigned long min_interval; /* Minimum balance interval ms */ + unsigned long max_interval; /* Maximum balance interval ms */ + unsigned int busy_factor; /* less balancing by factor if busy */ + unsigned int imbalance_pct; /* No balance until over watermark */ + unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ + unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ + unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */ + int flags; /* See SD_* */ + + /* Runtime fields. */ + unsigned long last_balance; /* init to jiffies. units in jiffies */ + unsigned int balance_interval; /* initialise to 1. units in ms. */ + unsigned int nr_balance_failed; /* initialise to 0 */ +}; + +/* Common values for SMT siblings */ +#define SD_SIBLING_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 2, \ + .busy_factor = 8, \ + .imbalance_pct = 110, \ + .cache_hot_time = 0, \ + .cache_nice_tries = 0, \ + .per_cpu_gain = 15, \ + .flags = SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_BALANCE_CLONE \ + | SD_WAKE_AFFINE \ + | SD_WAKE_IDLE \ + | SD_SHARE_CPUPOWER, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + +/* Common values for CPUs */ +#define SD_CPU_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_hot_time = (5*1000000/2), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_BALANCE_CLONE \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + +#ifdef CONFIG_NUMA +/* Common values for NUMA nodes */ +#define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 8, \ + .max_interval = 32, \ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000000), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_BALANCE_EXEC \ + | SD_BALANCE_CLONE \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} +#endif + +extern void cpu_attach_domain(struct sched_domain *sd, int cpu); + extern int set_cpus_allowed(task_t *p, cpumask_t new_mask); #else static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) @@ -552,20 +699,17 @@ static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) extern unsigned long long sched_clock(void); -#ifdef CONFIG_NUMA +#ifdef CONFIG_SMP extern void sched_balance_exec(void); -extern void node_nr_running_init(void); #else #define sched_balance_exec() {} -#define node_nr_running_init() {} #endif -/* Move tasks off this (offline) CPU onto another. */ -extern void migrate_all_tasks(void); +extern void sched_idle_next(void); extern void set_user_nice(task_t *p, long nice); -extern int task_prio(task_t *p); -extern int task_nice(task_t *p); -extern int task_curr(task_t *p); +extern int task_prio(const task_t *p); +extern int task_nice(const task_t *p); +extern int task_curr(const task_t *p); extern int idle_cpu(int cpu); void yield(void); @@ -600,7 +744,12 @@ extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(xid_t, uid_t); +static inline struct user_struct *get_uid(struct user_struct *u) +{ + atomic_inc(&u->__count); + return u; +} extern void free_uid(struct user_struct *); extern void switch_uid(struct user_struct *); @@ -612,12 +761,17 @@ extern void do_timer(struct pt_regs *); extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state)); extern int FASTCALL(wake_up_process(struct task_struct * tsk)); +extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk)); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); + extern void FASTCALL(wake_up_forked_thread(struct task_struct * tsk)); #else static inline void kick_process(struct task_struct *tsk) { } + static inline void wake_up_forked_thread(struct task_struct * tsk) + { + wake_up_forked_process(tsk); + } #endif -extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk)); extern void FASTCALL(sched_fork(task_t * p)); extern void FASTCALL(sched_exit(task_t * p)); @@ -782,7 +936,7 @@ extern void wait_task_inactive(task_t * p); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) -extern task_t * FASTCALL(next_thread(task_t *p)); +extern task_t * FASTCALL(next_thread(const task_t *p)); #define thread_group_leader(p) (p->pid == p->tgid) @@ -799,7 +953,9 @@ static inline int thread_group_empty(task_t *p) extern void unhash_process(struct task_struct *p); /* - * Protects ->fs, ->files, ->mm, ->ptrace and synchronises with wait4(). + * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info and synchronises with + * wait4(). + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. @@ -921,7 +1077,7 @@ extern void signal_wake_up(struct task_struct *t, int resume_stopped); */ #ifdef CONFIG_SMP -static inline unsigned int task_cpu(struct task_struct *p) +static inline unsigned int task_cpu(const struct task_struct *p) { return p->thread_info->cpu; } @@ -933,7 +1089,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #else -static inline unsigned int task_cpu(struct task_struct *p) +static inline unsigned int task_cpu(const struct task_struct *p) { return 0; }