2 * linux/kernel/vserver/sched.c
4 * Virtual Server: Scheduler Support
6 * Copyright (C) 2004-2005 Herbert Pƶtzl
8 * V0.01 adapted Sam Vilains version to 2.6.3
9 * V0.02 removed legacy interface
13 #include <linux/config.h>
14 #include <linux/sched.h>
15 #include <linux/vs_context.h>
16 #include <linux/vs_sched.h>
17 #include <linux/vserver/sched_cmd.h>
19 #include <asm/errno.h>
20 #include <asm/uaccess.h>
24 * recalculate the context's scheduling tokens
26 * ret > 0 : number of tokens available
27 * ret = 0 : context is paused
28 * ret < 0 : number of jiffies until new tokens arrive
31 int vx_tokens_recalc(struct vx_info *vxi)
33 long delta, tokens = 0;
35 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
39 delta = jiffies - vxi->sched.jiffies;
41 if (delta >= vxi->sched.interval) {
42 /* lockdown scheduler info */
43 spin_lock(&vxi->sched.tokens_lock);
45 /* calc integral token part */
46 delta = jiffies - vxi->sched.jiffies;
47 tokens = delta / vxi->sched.interval;
48 delta = tokens * vxi->sched.interval;
49 tokens *= vxi->sched.fill_rate;
51 atomic_add(tokens, &vxi->sched.tokens);
52 vxi->sched.jiffies += delta;
53 tokens = atomic_read(&vxi->sched.tokens);
55 if (tokens > vxi->sched.tokens_max) {
56 tokens = vxi->sched.tokens_max;
57 atomic_set(&vxi->sched.tokens, tokens);
59 spin_unlock(&vxi->sched.tokens_lock);
62 tokens = vx_tokens_avail(vxi);
64 vxi->vx_state |= VXS_ONHOLD;
65 if (tokens < vxi->sched.tokens_min) {
66 /* enough tokens will be available in */
67 if (vxi->sched.tokens_min == 0)
68 return delta - vxi->sched.interval;
69 return delta - vxi->sched.interval *
70 vxi->sched.tokens_min / vxi->sched.fill_rate;
74 /* we have some tokens left */
75 if (vx_info_state(vxi, VXS_ONHOLD) &&
76 (tokens >= vxi->sched.tokens_min))
77 vxi->vx_state &= ~VXS_ONHOLD;
78 if (vx_info_state(vxi, VXS_ONHOLD))
79 tokens -= vxi->sched.tokens_min;
85 * effective_prio - return the priority that is based on the static
86 * priority but is modified by bonuses/penalties.
88 * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
89 * into a -4 ... 0 ... +4 bonus/penalty range.
91 * Additionally, we scale another amount based on the number of
92 * CPU tokens currently held by the context, if the process is
93 * part of a context (and the appropriate SCHED flag is set).
94 * This ranges from -5 ... 0 ... +15, quadratically.
96 * So, the total bonus is -9 .. 0 .. +19
97 * We use ~50% of the full 0...39 priority range so that:
99 * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
100 * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
101 * unless that context is far exceeding its CPU allocation.
103 * Both properties are important to certain workloads.
105 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
109 /* lots of tokens = lots of vavavoom
110 * no tokens = no vavavoom */
111 if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
112 max = vxi->sched.tokens_max;
113 vavavoom = max - vavavoom;
115 vavavoom = max_prio * VAVAVOOM_RATIO / 100
116 * (vavavoom*vavavoom - (max >> 2)) / max;
117 /* alternative, geometric mapping
118 vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom
119 / vxi->sched.tokens_max -
120 MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
123 /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) -
124 MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
126 vxi->sched.vavavoom = vavavoom;
131 int vc_set_sched_v2(uint32_t xid, void __user *data)
133 struct vcmd_set_sched_v2 vc_data;
136 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
139 vxi = locate_vx_info(xid);
143 spin_lock(&vxi->sched.tokens_lock);
145 if (vc_data.interval != SCHED_KEEP)
146 vxi->sched.interval = vc_data.interval;
147 if (vc_data.fill_rate != SCHED_KEEP)
148 vxi->sched.fill_rate = vc_data.fill_rate;
149 if (vc_data.tokens_min != SCHED_KEEP)
150 vxi->sched.tokens_min = vc_data.tokens_min;
151 if (vc_data.tokens_max != SCHED_KEEP)
152 vxi->sched.tokens_max = vc_data.tokens_max;
153 if (vc_data.tokens != SCHED_KEEP)
154 atomic_set(&vxi->sched.tokens, vc_data.tokens);
156 /* Sanity check the resultant values */
157 if (vxi->sched.fill_rate <= 0)
158 vxi->sched.fill_rate = 1;
159 if (vxi->sched.interval <= 0)
160 vxi->sched.interval = HZ;
161 if (vxi->sched.tokens_max == 0)
162 vxi->sched.tokens_max = 1;
163 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
164 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
165 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
166 vxi->sched.tokens_min = vxi->sched.tokens_max;
168 spin_unlock(&vxi->sched.tokens_lock);
174 int vc_set_sched(uint32_t xid, void __user *data)
176 struct vcmd_set_sched_v3 vc_data;
178 unsigned int set_mask;
180 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
183 vxi = locate_vx_info(xid);
187 set_mask = vc_data.set_mask;
189 spin_lock(&vxi->sched.tokens_lock);
191 if (set_mask & VXSM_FILL_RATE)
192 vxi->sched.fill_rate = vc_data.fill_rate;
193 if (set_mask & VXSM_INTERVAL)
194 vxi->sched.interval = vc_data.interval;
195 if (set_mask & VXSM_TOKENS)
196 atomic_set(&vxi->sched.tokens, vc_data.tokens);
197 if (set_mask & VXSM_TOKENS_MIN)
198 vxi->sched.tokens_min = vc_data.tokens_min;
199 if (set_mask & VXSM_TOKENS_MAX)
200 vxi->sched.tokens_max = vc_data.tokens_max;
201 if (set_mask & VXSM_PRIO_BIAS)
202 vxi->sched.priority_bias = vc_data.priority_bias;
204 /* Sanity check the resultant values */
205 if (vxi->sched.fill_rate <= 0)
206 vxi->sched.fill_rate = 1;
207 if (vxi->sched.interval <= 0)
208 vxi->sched.interval = HZ;
209 if (vxi->sched.tokens_max == 0)
210 vxi->sched.tokens_max = 1;
211 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
212 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
213 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
214 vxi->sched.tokens_min = vxi->sched.tokens_max;
215 if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
216 vxi->sched.priority_bias = MAX_PRIO_BIAS;
217 if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
218 vxi->sched.priority_bias = MIN_PRIO_BIAS;
220 spin_unlock(&vxi->sched.tokens_lock);