vserver 1.9.3
[linux-2.6.git] / kernel / vserver / sched.c
1 /*
2  *  linux/kernel/vserver/sched.c
3  *
4  *  Virtual Server: Scheduler Support
5  *
6  *  Copyright (C) 2004  Herbert Pƶtzl
7  *
8  *  V0.01  adapted Sam Vilains version to 2.6.3
9  *  V0.02  removed legacy interface
10  *
11  */
12
13 #include <linux/config.h>
14 #include <linux/sched.h>
15 #include <linux/vs_base.h>
16 #include <linux/vs_context.h>
17 #include <linux/vserver/context.h>
18 #include <linux/vserver/sched.h>
19
20 #include <asm/errno.h>
21 #include <asm/uaccess.h>
22
23
24 /*
25  * recalculate the context's scheduling tokens
26  *
27  * ret > 0 : number of tokens available
28  * ret = 0 : context is paused
29  * ret < 0 : number of jiffies until new tokens arrive
30  *
31  */
32 int vx_tokens_recalc(struct vx_info *vxi)
33 {
34         long delta, tokens = 0;
35
36         if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
37                 /* we are paused */
38                 return 0;
39
40         delta = jiffies - vxi->sched.jiffies;
41
42         if (delta >= vxi->sched.interval) {
43                 /* lockdown scheduler info */
44                 spin_lock(&vxi->sched.tokens_lock);
45
46                 /* calc integral token part */
47                 delta = jiffies - vxi->sched.jiffies;
48                 tokens = delta / vxi->sched.interval;
49                 delta = tokens * vxi->sched.interval;
50                 tokens *= vxi->sched.fill_rate;
51
52                 atomic_add(tokens, &vxi->sched.tokens);
53                 vxi->sched.jiffies += delta;
54                 tokens = atomic_read(&vxi->sched.tokens);
55
56                 if (tokens > vxi->sched.tokens_max) {
57                         tokens = vxi->sched.tokens_max;
58                         atomic_set(&vxi->sched.tokens, tokens);
59                 }
60                 spin_unlock(&vxi->sched.tokens_lock);
61         } else {
62                 /* no new tokens */
63                 tokens = vx_tokens_avail(vxi);
64                 if (tokens <= 0)
65                         vxi->vx_state |= VXS_ONHOLD;
66                 if (tokens < vxi->sched.tokens_min) {
67                         /* enough tokens will be available in */
68                         if (vxi->sched.tokens_min == 0)
69                                 return delta - vxi->sched.interval;
70                         return delta - vxi->sched.interval *
71                                 vxi->sched.tokens_min / vxi->sched.fill_rate;
72                 }
73         }
74
75         /* we have some tokens left */
76         if (vx_info_state(vxi, VXS_ONHOLD) &&
77                 (tokens >= vxi->sched.tokens_min))
78                 vxi->vx_state &= ~VXS_ONHOLD;
79         if (vx_info_state(vxi, VXS_ONHOLD))
80                 tokens -= vxi->sched.tokens_min;
81
82         return tokens;
83 }
84
85 /*
86  * effective_prio - return the priority that is based on the static
87  * priority but is modified by bonuses/penalties.
88  *
89  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
90  * into a -4 ... 0 ... +4 bonus/penalty range.
91  *
92  * Additionally, we scale another amount based on the number of
93  * CPU tokens currently held by the context, if the process is
94  * part of a context (and the appropriate SCHED flag is set).
95  * This ranges from -5 ... 0 ... +15, quadratically.
96  *
97  * So, the total bonus is -9 .. 0 .. +19
98  * We use ~50% of the full 0...39 priority range so that:
99  *
100  * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
101  * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
102  *    unless that context is far exceeding its CPU allocation.
103  *
104  * Both properties are important to certain workloads.
105  */
106 int effective_vavavoom(task_t *p, int max_prio)
107 {
108         struct vx_info *vxi = p->vx_info;
109         int vavavoom, max;
110
111         /* lots of tokens = lots of vavavoom
112          *      no tokens = no vavavoom      */
113         if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
114                 max = vxi->sched.tokens_max;
115                 vavavoom = max - vavavoom;
116                 max = max * max;
117                 vavavoom = max_prio * VAVAVOOM_RATIO / 100
118                         * (vavavoom*vavavoom - (max >> 2)) / max;
119                 /*  alternative, geometric mapping
120                 vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom
121                         / vxi->sched.tokens_max -
122                         MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
123         } else
124                 vavavoom = 0;
125         /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) -
126                 MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
127
128         return vavavoom;
129 }
130
131
132 int vc_set_sched_v2(uint32_t xid, void __user *data)
133 {
134         struct vcmd_set_sched_v2 vc_data;
135         struct vx_info *vxi;
136
137         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
138                 return -EFAULT;
139
140         vxi = locate_vx_info(xid);
141         if (!vxi)
142                 return -EINVAL;
143
144         spin_lock(&vxi->sched.tokens_lock);
145
146         if (vc_data.interval != SCHED_KEEP)
147                 vxi->sched.interval = vc_data.interval;
148         if (vc_data.fill_rate != SCHED_KEEP)
149                 vxi->sched.fill_rate = vc_data.fill_rate;
150         if (vc_data.tokens_min != SCHED_KEEP)
151                 vxi->sched.tokens_min = vc_data.tokens_min;
152         if (vc_data.tokens_max != SCHED_KEEP)
153                 vxi->sched.tokens_max = vc_data.tokens_max;
154         if (vc_data.tokens != SCHED_KEEP)
155                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
156
157         /* Sanity check the resultant values */
158         if (vxi->sched.fill_rate <= 0)
159                 vxi->sched.fill_rate = 1;
160         if (vxi->sched.interval <= 0)
161                 vxi->sched.interval = HZ;
162         if (vxi->sched.tokens_max == 0)
163                 vxi->sched.tokens_max = 1;
164         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
165                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
166         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
167                 vxi->sched.tokens_min = vxi->sched.tokens_max;
168
169         spin_unlock(&vxi->sched.tokens_lock);
170         put_vx_info(vxi);
171         return 0;
172 }
173
174
175 int vc_set_sched(uint32_t xid, void __user *data)
176 {
177         struct vcmd_set_sched_v3 vc_data;
178         struct vx_info *vxi;
179         unsigned int set_mask;
180
181         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
182                 return -EFAULT;
183
184         vxi = locate_vx_info(xid);
185         if (!vxi)
186                 return -EINVAL;
187
188         set_mask = vc_data.set_mask;
189
190         spin_lock(&vxi->sched.tokens_lock);
191
192         if (set_mask & VXSM_FILL_RATE)
193                 vxi->sched.fill_rate = vc_data.fill_rate;
194         if (set_mask & VXSM_INTERVAL)
195                 vxi->sched.interval = vc_data.interval;
196         if (set_mask & VXSM_TOKENS)
197                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
198         if (set_mask & VXSM_TOKENS_MIN)
199                 vxi->sched.tokens_min = vc_data.tokens_min;
200         if (set_mask & VXSM_TOKENS_MAX)
201                 vxi->sched.tokens_max = vc_data.tokens_max;
202         if (set_mask & VXSM_PRIO_BIAS)
203                 vxi->sched.priority_bias = vc_data.priority_bias;
204
205         /* Sanity check the resultant values */
206         if (vxi->sched.fill_rate <= 0)
207                 vxi->sched.fill_rate = 1;
208         if (vxi->sched.interval <= 0)
209                 vxi->sched.interval = HZ;
210         if (vxi->sched.tokens_max == 0)
211                 vxi->sched.tokens_max = 1;
212         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
213                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
214         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
215                 vxi->sched.tokens_min = vxi->sched.tokens_max;
216         if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
217                 vxi->sched.priority_bias = MAX_PRIO_BIAS;
218         if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
219                 vxi->sched.priority_bias = MIN_PRIO_BIAS;
220
221         spin_unlock(&vxi->sched.tokens_lock);
222         put_vx_info(vxi);
223         return 0;
224 }
225