2 * linux/kernel/vserver/sched.c
4 * Virtual Server: Scheduler Support
6 * Copyright (C) 2004-2005 Herbert Pƶtzl
8 * V0.01 adapted Sam Vilains version to 2.6.3
9 * V0.02 removed legacy interface
13 #include <linux/sched.h>
14 #include <linux/vs_context.h>
15 #include <linux/vs_sched.h>
16 #include <linux/vserver/sched_cmd.h>
18 #include <asm/errno.h>
19 #include <asm/uaccess.h>
21 #ifdef CONFIG_VSERVER_ACB_SCHED
23 #define TICK_SCALE 1000
24 #define TICKS_PER_TOKEN(vxi) \
25 ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate)
27 (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE)
28 #define GLOBAL_TICKS(vxi) \
29 (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks)
31 uint64_t vx_guaranteed_ticks = 0;
32 uint64_t vx_best_effort_ticks = 0;
34 void vx_tokens_set(struct vx_info *vxi, int tokens) {
35 int class = CLASS(vxi);
38 tmp = GLOBAL_TICKS(vxi);
39 tmp -= tokens * TICKS_PER_TOKEN(vxi);
41 vxi->sched.ticks[class] = tmp;
44 void vx_scheduler_tick(void) {
45 vx_guaranteed_ticks += TICK_SCALE;
46 vx_best_effort_ticks += TICK_SCALE;
49 void vx_advance_best_effort_ticks(int ticks) {
50 vx_best_effort_ticks += TICK_SCALE * ticks;
53 void vx_advance_guaranteed_ticks(int ticks) {
54 vx_guaranteed_ticks += TICK_SCALE * ticks;
57 int vx_tokens_avail(struct vx_info *vxi)
59 uint64_t diff, max_ticks;
62 int class = CLASS(vxi);
64 if (vxi->sched.state[class] == SCH_UNINITIALIZED) {
65 /* Set the "real" token count */
66 tokens = atomic_read(&vxi->sched.tokens);
67 vx_tokens_set(vxi, tokens);
68 vxi->sched.state[class] = SCH_INITIALIZED;
72 if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) {
73 tokens = atomic_read(&vxi->sched.tokens);
77 /* Use of fixed-point arithmetic in these calculations leads to
78 * some limitations. These should be made explicit.
80 max_ticks = (tpt = TICKS_PER_TOKEN(vxi));
81 max_ticks *= vxi->sched.tokens_max;
82 diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class];
84 /* Avoid an overflow from div_long_long_rem */
85 if (diff >= max_ticks) {
86 vx_tokens_set(vxi, vxi->sched.tokens_max);
87 tokens = vxi->sched.tokens_max;
89 /* Divide ticks by ticks per token to get tokens */
90 tokens = div_long_long_rem(diff, tpt, &rem);
93 atomic_set(&vxi->sched.tokens, tokens);
96 vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi);
100 void vx_consume_token(struct vx_info *vxi)
102 int class = CLASS(vxi);
104 vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi);
105 atomic_dec(&vxi->sched.tokens);
109 * recalculate the context's scheduling tokens
111 * ret > 0 : number of tokens available
112 * ret = 0 : context is paused
113 * ret < 0 : number of jiffies until new tokens arrive
116 int vx_tokens_recalc(struct vx_info *vxi)
120 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
124 tokens = vx_tokens_avail(vxi);
126 vxi->vx_state |= VXS_ONHOLD;
127 if (tokens < vxi->sched.tokens_min) {
128 delta = tokens - vxi->sched.tokens_min;
129 /* enough tokens will be available in */
130 return (delta * vxi->sched.interval) / vxi->sched.fill_rate;
133 /* we have some tokens left */
134 if (vx_info_state(vxi, VXS_ONHOLD) &&
135 (tokens >= vxi->sched.tokens_min))
136 vxi->vx_state &= ~VXS_ONHOLD;
137 if (vx_info_state(vxi, VXS_ONHOLD))
138 tokens -= vxi->sched.tokens_min;
146 * recalculate the context's scheduling tokens
148 * ret > 0 : number of tokens available
149 * ret = 0 : context is paused
150 * ret < 0 : number of jiffies until new tokens arrive
153 int vx_tokens_recalc(struct vx_info *vxi)
155 long delta, tokens = 0;
157 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
161 delta = jiffies - vxi->sched.jiffies;
163 if (delta >= vxi->sched.interval) {
164 /* lockdown scheduler info */
165 spin_lock(&vxi->sched.tokens_lock);
167 /* calc integral token part */
168 delta = jiffies - vxi->sched.jiffies;
169 tokens = delta / vxi->sched.interval;
170 delta = tokens * vxi->sched.interval;
171 tokens *= vxi->sched.fill_rate;
173 atomic_add(tokens, &vxi->sched.tokens);
174 vxi->sched.jiffies += delta;
175 tokens = atomic_read(&vxi->sched.tokens);
177 if (tokens > vxi->sched.tokens_max) {
178 tokens = vxi->sched.tokens_max;
179 atomic_set(&vxi->sched.tokens, tokens);
181 spin_unlock(&vxi->sched.tokens_lock);
184 tokens = vx_tokens_avail(vxi);
186 vxi->vx_state |= VXS_ONHOLD;
187 if (tokens < vxi->sched.tokens_min) {
188 /* enough tokens will be available in */
189 if (vxi->sched.tokens_min == 0)
190 return delta - vxi->sched.interval;
191 return delta - vxi->sched.interval *
192 vxi->sched.tokens_min / vxi->sched.fill_rate;
196 /* we have some tokens left */
197 if (vx_info_state(vxi, VXS_ONHOLD) &&
198 (tokens >= vxi->sched.tokens_min))
199 vxi->vx_state &= ~VXS_ONHOLD;
200 if (vx_info_state(vxi, VXS_ONHOLD))
201 tokens -= vxi->sched.tokens_min;
206 #endif /* CONFIG_VSERVER_ACB_SCHED */
209 * effective_prio - return the priority that is based on the static
210 * priority but is modified by bonuses/penalties.
212 * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
213 * into a -4 ... 0 ... +4 bonus/penalty range.
215 * Additionally, we scale another amount based on the number of
216 * CPU tokens currently held by the context, if the process is
217 * part of a context (and the appropriate SCHED flag is set).
218 * This ranges from -5 ... 0 ... +15, quadratically.
220 * So, the total bonus is -9 .. 0 .. +19
221 * We use ~50% of the full 0...39 priority range so that:
223 * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
224 * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
225 * unless that context is far exceeding its CPU allocation.
227 * Both properties are important to certain workloads.
229 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
233 /* lots of tokens = lots of vavavoom
234 * no tokens = no vavavoom */
235 if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
236 max = vxi->sched.tokens_max;
237 vavavoom = max - vavavoom;
239 vavavoom = max_prio * VAVAVOOM_RATIO / 100
240 * (vavavoom*vavavoom - (max >> 2)) / max;
244 vxi->sched.vavavoom = vavavoom;
245 return vavavoom + vxi->sched.priority_bias;
249 int vc_set_sched_v2(uint32_t xid, void __user *data)
251 struct vcmd_set_sched_v2 vc_data;
254 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
257 vxi = lookup_vx_info(xid);
261 spin_lock(&vxi->sched.tokens_lock);
263 if (vc_data.interval != SCHED_KEEP)
264 vxi->sched.interval = vc_data.interval;
265 if (vc_data.fill_rate != SCHED_KEEP)
266 vxi->sched.fill_rate = vc_data.fill_rate;
267 if (vc_data.tokens_min != SCHED_KEEP)
268 vxi->sched.tokens_min = vc_data.tokens_min;
269 if (vc_data.tokens_max != SCHED_KEEP)
270 vxi->sched.tokens_max = vc_data.tokens_max;
271 if (vc_data.tokens != SCHED_KEEP)
272 atomic_set(&vxi->sched.tokens, vc_data.tokens);
274 /* Sanity check the resultant values */
275 if (vxi->sched.fill_rate <= 0)
276 vxi->sched.fill_rate = 1;
277 if (vxi->sched.interval <= 0)
278 vxi->sched.interval = HZ;
279 if (vxi->sched.tokens_max == 0)
280 vxi->sched.tokens_max = 1;
281 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
282 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
283 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
284 vxi->sched.tokens_min = vxi->sched.tokens_max;
286 #ifdef CONFIG_VSERVER_ACB_SCHED
287 vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
290 spin_unlock(&vxi->sched.tokens_lock);
296 int vc_set_sched(uint32_t xid, void __user *data)
298 struct vcmd_set_sched_v3 vc_data;
300 unsigned int set_mask;
302 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
305 vxi = lookup_vx_info(xid);
309 set_mask = vc_data.set_mask;
311 spin_lock(&vxi->sched.tokens_lock);
313 if (set_mask & VXSM_FILL_RATE)
314 vxi->sched.fill_rate = vc_data.fill_rate;
315 if (set_mask & VXSM_INTERVAL)
316 vxi->sched.interval = vc_data.interval;
317 if (set_mask & VXSM_TOKENS)
318 atomic_set(&vxi->sched.tokens, vc_data.tokens);
319 if (set_mask & VXSM_TOKENS_MIN)
320 vxi->sched.tokens_min = vc_data.tokens_min;
321 if (set_mask & VXSM_TOKENS_MAX)
322 vxi->sched.tokens_max = vc_data.tokens_max;
323 if (set_mask & VXSM_PRIO_BIAS)
324 vxi->sched.priority_bias = vc_data.priority_bias;
326 /* Sanity check the resultant values */
327 if (vxi->sched.fill_rate <= 0)
328 vxi->sched.fill_rate = 1;
329 if (vxi->sched.interval <= 0)
330 vxi->sched.interval = HZ;
331 if (vxi->sched.tokens_max == 0)
332 vxi->sched.tokens_max = 1;
333 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
334 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
335 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
336 vxi->sched.tokens_min = vxi->sched.tokens_max;
337 if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
338 vxi->sched.priority_bias = MAX_PRIO_BIAS;
339 if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
340 vxi->sched.priority_bias = MIN_PRIO_BIAS;
342 #ifdef CONFIG_VSERVER_ACB_SCHED
343 vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
346 spin_unlock(&vxi->sched.tokens_lock);