2 * linux/kernel/vserver/sched.c
4 * Virtual Server: Scheduler Support
6 * Copyright (C) 2004-2005 Herbert Pƶtzl
8 * V0.01 adapted Sam Vilains version to 2.6.3
9 * V0.02 removed legacy interface
13 #include <linux/config.h>
14 #include <linux/sched.h>
15 #include <linux/vs_context.h>
16 #include <linux/vs_sched.h>
17 #include <linux/vserver/sched_cmd.h>
19 #include <asm/errno.h>
20 #include <asm/uaccess.h>
22 #ifdef CONFIG_VSERVER_ACB_SCHED
24 #define TICK_SCALE 1000
25 #define TICKS_PER_TOKEN(vxi) \
26 ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate)
28 (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE)
29 #define GLOBAL_TICKS(vxi) \
30 (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks)
32 uint64_t vx_guaranteed_ticks = 0;
33 uint64_t vx_best_effort_ticks = 0;
35 void vx_tokens_set(struct vx_info *vxi, int tokens) {
36 int class = CLASS(vxi);
38 vxi->sched.ticks[class] = GLOBAL_TICKS(vxi);
39 vxi->sched.ticks[class] -= tokens * TICKS_PER_TOKEN(vxi);
42 void vx_scheduler_tick(void) {
43 vx_guaranteed_ticks += TICK_SCALE;
44 vx_best_effort_ticks += TICK_SCALE;
47 void vx_advance_best_effort_ticks(int ticks) {
48 vx_best_effort_ticks += TICK_SCALE * ticks;
51 void vx_advance_guaranteed_ticks(int ticks) {
52 vx_guaranteed_ticks += TICK_SCALE * ticks;
55 int vx_tokens_avail(struct vx_info *vxi)
60 int class = CLASS(vxi);
62 if (vxi->sched.state[class] == SCH_UNINITIALIZED) {
63 /* Set the "real" token count */
64 tokens = atomic_read(&vxi->sched.tokens);
65 vx_tokens_set(vxi, tokens);
66 vxi->sched.state[class] = SCH_INITIALIZED;
70 if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) {
71 tokens = atomic_read(&vxi->sched.tokens);
75 /* Use of fixed-point arithmetic in these calculations leads to
76 * some limitations. These should be made explicit.
78 diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class];
79 tokens = div_long_long_rem(diff, TICKS_PER_TOKEN(vxi), &rem);
81 if (tokens > vxi->sched.tokens_max) {
82 vx_tokens_set(vxi, vxi->sched.tokens_max);
83 tokens = vxi->sched.tokens_max;
86 atomic_set(&vxi->sched.tokens, tokens);
89 vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi);
93 void vx_consume_token(struct vx_info *vxi)
95 int class = CLASS(vxi);
97 vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi);
101 * recalculate the context's scheduling tokens
103 * ret > 0 : number of tokens available
104 * ret = 0 : context is paused
105 * ret < 0 : number of jiffies until new tokens arrive
108 int vx_tokens_recalc(struct vx_info *vxi)
112 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
116 tokens = vx_tokens_avail(vxi);
118 vxi->vx_state |= VXS_ONHOLD;
119 if (tokens < vxi->sched.tokens_min) {
120 delta = tokens - vxi->sched.tokens_min;
121 /* enough tokens will be available in */
122 return (delta * vxi->sched.interval) / vxi->sched.fill_rate;
125 /* we have some tokens left */
126 if (vx_info_state(vxi, VXS_ONHOLD) &&
127 (tokens >= vxi->sched.tokens_min))
128 vxi->vx_state &= ~VXS_ONHOLD;
129 if (vx_info_state(vxi, VXS_ONHOLD))
130 tokens -= vxi->sched.tokens_min;
138 * recalculate the context's scheduling tokens
140 * ret > 0 : number of tokens available
141 * ret = 0 : context is paused
142 * ret < 0 : number of jiffies until new tokens arrive
145 int vx_tokens_recalc(struct vx_info *vxi)
147 long delta, tokens = 0;
149 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
153 delta = jiffies - vxi->sched.jiffies;
155 if (delta >= vxi->sched.interval) {
156 /* lockdown scheduler info */
157 spin_lock(&vxi->sched.tokens_lock);
159 /* calc integral token part */
160 delta = jiffies - vxi->sched.jiffies;
161 tokens = delta / vxi->sched.interval;
162 delta = tokens * vxi->sched.interval;
163 tokens *= vxi->sched.fill_rate;
165 atomic_add(tokens, &vxi->sched.tokens);
166 vxi->sched.jiffies += delta;
167 tokens = atomic_read(&vxi->sched.tokens);
169 if (tokens > vxi->sched.tokens_max) {
170 tokens = vxi->sched.tokens_max;
171 atomic_set(&vxi->sched.tokens, tokens);
173 spin_unlock(&vxi->sched.tokens_lock);
176 tokens = vx_tokens_avail(vxi);
178 vxi->vx_state |= VXS_ONHOLD;
179 if (tokens < vxi->sched.tokens_min) {
180 /* enough tokens will be available in */
181 if (vxi->sched.tokens_min == 0)
182 return delta - vxi->sched.interval;
183 return delta - vxi->sched.interval *
184 vxi->sched.tokens_min / vxi->sched.fill_rate;
188 /* we have some tokens left */
189 if (vx_info_state(vxi, VXS_ONHOLD) &&
190 (tokens >= vxi->sched.tokens_min))
191 vxi->vx_state &= ~VXS_ONHOLD;
192 if (vx_info_state(vxi, VXS_ONHOLD))
193 tokens -= vxi->sched.tokens_min;
198 #endif /* CONFIG_VSERVER_ACB_SCHED */
201 * effective_prio - return the priority that is based on the static
202 * priority but is modified by bonuses/penalties.
204 * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
205 * into a -4 ... 0 ... +4 bonus/penalty range.
207 * Additionally, we scale another amount based on the number of
208 * CPU tokens currently held by the context, if the process is
209 * part of a context (and the appropriate SCHED flag is set).
210 * This ranges from -5 ... 0 ... +15, quadratically.
212 * So, the total bonus is -9 .. 0 .. +19
213 * We use ~50% of the full 0...39 priority range so that:
215 * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
216 * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
217 * unless that context is far exceeding its CPU allocation.
219 * Both properties are important to certain workloads.
221 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
225 /* lots of tokens = lots of vavavoom
226 * no tokens = no vavavoom */
227 if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
228 max = vxi->sched.tokens_max;
229 vavavoom = max - vavavoom;
231 vavavoom = max_prio * VAVAVOOM_RATIO / 100
232 * (vavavoom*vavavoom - (max >> 2)) / max;
236 vxi->sched.vavavoom = vavavoom;
241 int vc_set_sched_v2(uint32_t xid, void __user *data)
243 struct vcmd_set_sched_v2 vc_data;
246 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
249 vxi = locate_vx_info(xid);
253 spin_lock(&vxi->sched.tokens_lock);
255 if (vc_data.interval != SCHED_KEEP)
256 vxi->sched.interval = vc_data.interval;
257 if (vc_data.fill_rate != SCHED_KEEP)
258 vxi->sched.fill_rate = vc_data.fill_rate;
259 if (vc_data.tokens_min != SCHED_KEEP)
260 vxi->sched.tokens_min = vc_data.tokens_min;
261 if (vc_data.tokens_max != SCHED_KEEP)
262 vxi->sched.tokens_max = vc_data.tokens_max;
263 if (vc_data.tokens != SCHED_KEEP)
264 atomic_set(&vxi->sched.tokens, vc_data.tokens);
266 /* Sanity check the resultant values */
267 if (vxi->sched.fill_rate <= 0)
268 vxi->sched.fill_rate = 1;
269 if (vxi->sched.interval <= 0)
270 vxi->sched.interval = HZ;
271 if (vxi->sched.tokens_max == 0)
272 vxi->sched.tokens_max = 1;
273 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
274 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
275 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
276 vxi->sched.tokens_min = vxi->sched.tokens_max;
278 #ifdef CONFIG_VSERVER_ACB_SCHED
279 vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
282 spin_unlock(&vxi->sched.tokens_lock);
288 int vc_set_sched(uint32_t xid, void __user *data)
290 struct vcmd_set_sched_v3 vc_data;
292 unsigned int set_mask;
294 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
297 vxi = locate_vx_info(xid);
301 set_mask = vc_data.set_mask;
303 spin_lock(&vxi->sched.tokens_lock);
305 if (set_mask & VXSM_FILL_RATE)
306 vxi->sched.fill_rate = vc_data.fill_rate;
307 if (set_mask & VXSM_INTERVAL)
308 vxi->sched.interval = vc_data.interval;
309 if (set_mask & VXSM_TOKENS)
310 atomic_set(&vxi->sched.tokens, vc_data.tokens);
311 if (set_mask & VXSM_TOKENS_MIN)
312 vxi->sched.tokens_min = vc_data.tokens_min;
313 if (set_mask & VXSM_TOKENS_MAX)
314 vxi->sched.tokens_max = vc_data.tokens_max;
315 if (set_mask & VXSM_PRIO_BIAS)
316 vxi->sched.priority_bias = vc_data.priority_bias;
318 /* Sanity check the resultant values */
319 if (vxi->sched.fill_rate <= 0)
320 vxi->sched.fill_rate = 1;
321 if (vxi->sched.interval <= 0)
322 vxi->sched.interval = HZ;
323 if (vxi->sched.tokens_max == 0)
324 vxi->sched.tokens_max = 1;
325 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
326 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
327 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
328 vxi->sched.tokens_min = vxi->sched.tokens_max;
329 if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
330 vxi->sched.priority_bias = MAX_PRIO_BIAS;
331 if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
332 vxi->sched.priority_bias = MIN_PRIO_BIAS;
334 #ifdef CONFIG_VSERVER_ACB_SCHED
335 vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
338 spin_unlock(&vxi->sched.tokens_lock);