2 * linux/kernel/vserver/sched.c
4 * Virtual Server: Scheduler Support
6 * Copyright (C) 2004-2005 Herbert Pƶtzl
8 * V0.01 adapted Sam Vilains version to 2.6.3
9 * V0.02 removed legacy interface
13 #include <linux/sched.h>
14 #include <linux/vs_base.h>
15 #include <linux/vs_context.h>
16 #include <linux/vs_sched.h>
17 #include <linux/vserver/sched_cmd.h>
19 #include <asm/errno.h>
20 #include <asm/uaccess.h>
22 #ifdef CONFIG_VSERVER_ACB_SCHED
24 #define TICK_SCALE 1000
25 #define TICKS_PER_TOKEN(vxi) \
26 ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate)
28 (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE)
29 #define GLOBAL_TICKS(vxi) \
30 (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks)
32 uint64_t vx_guaranteed_ticks = 0;
33 uint64_t vx_best_effort_ticks = 0;
35 void vx_tokens_set(struct vx_info *vxi, int tokens) {
36 int class = CLASS(vxi);
39 tmp = GLOBAL_TICKS(vxi);
40 tmp -= tokens * TICKS_PER_TOKEN(vxi);
42 vxi->sched.ticks[class] = tmp;
45 void vx_scheduler_tick(void) {
46 vx_guaranteed_ticks += TICK_SCALE;
47 vx_best_effort_ticks += TICK_SCALE;
50 void vx_advance_best_effort_ticks(int ticks) {
51 vx_best_effort_ticks += TICK_SCALE * ticks;
54 void vx_advance_guaranteed_ticks(int ticks) {
55 vx_guaranteed_ticks += TICK_SCALE * ticks;
58 int vx_tokens_avail(struct vx_info *vxi)
60 uint64_t diff, max_ticks;
63 int class = CLASS(vxi);
65 if (vxi->sched.state[class] == SCH_UNINITIALIZED) {
66 /* Set the "real" token count */
67 tokens = atomic_read(&vxi->sched.tokens);
68 vx_tokens_set(vxi, tokens);
69 vxi->sched.state[class] = SCH_INITIALIZED;
73 if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) {
74 tokens = atomic_read(&vxi->sched.tokens);
78 /* Use of fixed-point arithmetic in these calculations leads to
79 * some limitations. These should be made explicit.
81 max_ticks = (tpt = TICKS_PER_TOKEN(vxi));
82 max_ticks *= vxi->sched.tokens_max;
83 diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class];
85 /* Avoid an overflow from div_long_long_rem */
86 if (diff >= max_ticks) {
87 vx_tokens_set(vxi, vxi->sched.tokens_max);
88 tokens = vxi->sched.tokens_max;
90 /* Divide ticks by ticks per token to get tokens */
91 tokens = div_long_long_rem(diff, tpt, &rem);
94 atomic_set(&vxi->sched.tokens, tokens);
97 vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi);
101 void vx_consume_token(struct vx_info *vxi)
103 int class = CLASS(vxi);
105 vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi);
106 atomic_dec(&vxi->sched.tokens);
110 * recalculate the context's scheduling tokens
112 * ret > 0 : number of tokens available
113 * ret = 0 : context is paused
114 * ret < 0 : number of jiffies until new tokens arrive
117 int vx_tokens_recalc(struct vx_info *vxi)
121 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
125 tokens = vx_tokens_avail(vxi);
127 vxi->vx_state |= VXS_ONHOLD;
128 if (tokens < vxi->sched.tokens_min) {
129 delta = tokens - vxi->sched.tokens_min;
130 /* enough tokens will be available in */
131 return (delta * vxi->sched.interval) / vxi->sched.fill_rate;
134 /* we have some tokens left */
135 if (vx_info_state(vxi, VXS_ONHOLD) &&
136 (tokens >= vxi->sched.tokens_min))
137 vxi->vx_state &= ~VXS_ONHOLD;
138 if (vx_info_state(vxi, VXS_ONHOLD))
139 tokens -= vxi->sched.tokens_min;
147 * recalculate the context's scheduling tokens
149 * ret > 0 : number of tokens available
150 * ret = 0 : context is paused
151 * ret < 0 : number of jiffies until new tokens arrive
154 int vx_tokens_recalc(struct vx_info *vxi)
156 long delta, tokens = 0;
158 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
162 delta = jiffies - vxi->sched.jiffies;
164 if (delta >= vxi->sched.interval) {
165 /* lockdown scheduler info */
166 spin_lock(&vxi->sched.tokens_lock);
168 /* calc integral token part */
169 delta = jiffies - vxi->sched.jiffies;
170 tokens = delta / vxi->sched.interval;
171 delta = tokens * vxi->sched.interval;
172 tokens *= vxi->sched.fill_rate;
174 atomic_add(tokens, &vxi->sched.tokens);
175 vxi->sched.jiffies += delta;
176 tokens = atomic_read(&vxi->sched.tokens);
178 if (tokens > vxi->sched.tokens_max) {
179 tokens = vxi->sched.tokens_max;
180 atomic_set(&vxi->sched.tokens, tokens);
182 spin_unlock(&vxi->sched.tokens_lock);
185 tokens = vx_tokens_avail(vxi);
187 vxi->vx_state |= VXS_ONHOLD;
188 if (tokens < vxi->sched.tokens_min) {
189 /* enough tokens will be available in */
190 if (vxi->sched.tokens_min == 0)
191 return delta - vxi->sched.interval;
192 return delta - vxi->sched.interval *
193 vxi->sched.tokens_min / vxi->sched.fill_rate;
197 /* we have some tokens left */
198 if (vx_info_state(vxi, VXS_ONHOLD) &&
199 (tokens >= vxi->sched.tokens_min))
200 vxi->vx_state &= ~VXS_ONHOLD;
201 if (vx_info_state(vxi, VXS_ONHOLD))
202 tokens -= vxi->sched.tokens_min;
207 #endif /* CONFIG_VSERVER_ACB_SCHED */
210 * effective_prio - return the priority that is based on the static
211 * priority but is modified by bonuses/penalties.
213 * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
214 * into a -4 ... 0 ... +4 bonus/penalty range.
216 * Additionally, we scale another amount based on the number of
217 * CPU tokens currently held by the context, if the process is
218 * part of a context (and the appropriate SCHED flag is set).
219 * This ranges from -5 ... 0 ... +15, quadratically.
221 * So, the total bonus is -9 .. 0 .. +19
222 * We use ~50% of the full 0...39 priority range so that:
224 * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
225 * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
226 * unless that context is far exceeding its CPU allocation.
228 * Both properties are important to certain workloads.
230 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
234 /* lots of tokens = lots of vavavoom
235 * no tokens = no vavavoom */
236 if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
237 max = vxi->sched.tokens_max;
238 vavavoom = max - vavavoom;
240 vavavoom = max_prio * VAVAVOOM_RATIO / 100
241 * (vavavoom*vavavoom - (max >> 2)) / max;
245 vxi->sched.vavavoom = vavavoom;
246 return vavavoom + vxi->sched.priority_bias;
250 int vc_set_sched_v2(uint32_t xid, void __user *data)
252 struct vcmd_set_sched_v2 vc_data;
255 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
258 vxi = lookup_vx_info(xid);
262 spin_lock(&vxi->sched.tokens_lock);
264 if (vc_data.interval != SCHED_KEEP)
265 vxi->sched.interval = vc_data.interval;
266 if (vc_data.fill_rate != SCHED_KEEP)
267 vxi->sched.fill_rate = vc_data.fill_rate;
268 if (vc_data.tokens_min != SCHED_KEEP)
269 vxi->sched.tokens_min = vc_data.tokens_min;
270 if (vc_data.tokens_max != SCHED_KEEP)
271 vxi->sched.tokens_max = vc_data.tokens_max;
272 if (vc_data.tokens != SCHED_KEEP)
273 atomic_set(&vxi->sched.tokens, vc_data.tokens);
275 /* Sanity check the resultant values */
276 if (vxi->sched.fill_rate <= 0)
277 vxi->sched.fill_rate = 1;
278 if (vxi->sched.interval <= 0)
279 vxi->sched.interval = HZ;
280 if (vxi->sched.tokens_max == 0)
281 vxi->sched.tokens_max = 1;
282 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
283 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
284 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
285 vxi->sched.tokens_min = vxi->sched.tokens_max;
287 #ifdef CONFIG_VSERVER_ACB_SCHED
288 vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
291 spin_unlock(&vxi->sched.tokens_lock);
297 int vc_set_sched(uint32_t xid, void __user *data)
299 struct vcmd_set_sched_v3 vc_data;
301 unsigned int set_mask;
303 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
306 vxi = lookup_vx_info(xid);
310 set_mask = vc_data.set_mask;
312 spin_lock(&vxi->sched.tokens_lock);
314 if (set_mask & VXSM_FILL_RATE)
315 vxi->sched.fill_rate = vc_data.fill_rate;
316 if (set_mask & VXSM_INTERVAL)
317 vxi->sched.interval = vc_data.interval;
318 if (set_mask & VXSM_TOKENS)
319 atomic_set(&vxi->sched.tokens, vc_data.tokens);
320 if (set_mask & VXSM_TOKENS_MIN)
321 vxi->sched.tokens_min = vc_data.tokens_min;
322 if (set_mask & VXSM_TOKENS_MAX)
323 vxi->sched.tokens_max = vc_data.tokens_max;
324 if (set_mask & VXSM_PRIO_BIAS)
325 vxi->sched.priority_bias = vc_data.priority_bias;
327 /* Sanity check the resultant values */
328 if (vxi->sched.fill_rate <= 0)
329 vxi->sched.fill_rate = 1;
330 if (vxi->sched.interval <= 0)
331 vxi->sched.interval = HZ;
332 if (vxi->sched.tokens_max == 0)
333 vxi->sched.tokens_max = 1;
334 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
335 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
336 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
337 vxi->sched.tokens_min = vxi->sched.tokens_max;
338 if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
339 vxi->sched.priority_bias = MAX_PRIO_BIAS;
340 if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
341 vxi->sched.priority_bias = MIN_PRIO_BIAS;
343 #ifdef CONFIG_VSERVER_ACB_SCHED
344 vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
347 spin_unlock(&vxi->sched.tokens_lock);