2 * linux/kernel/vserver/sched.c
4 * Virtual Server: Scheduler Support
6 * Copyright (C) 2004-2007 Herbert Pƶtzl
8 * V0.01 adapted Sam Vilains version to 2.6.3
9 * V0.02 removed legacy interface
10 * V0.03 changed vcmds to vxi arg
14 #include <linux/sched.h>
15 #include <linux/vs_context.h>
16 #include <linux/vs_sched.h>
17 #include <linux/vserver/sched_cmd.h>
19 #include <asm/errno.h>
20 #include <asm/uaccess.h>
22 #define vxd_check_range(val, min, max) do { \
23 vxlprintk((val<min) || (val>max), \
24 "check_range(%ld,%ld,%ld)", \
25 (long)val, (long)min, (long)max, \
26 __FILE__, __LINE__); \
30 void vx_update_sched_param(struct _vx_sched *sched,
31 struct _vx_sched_pc *sched_pc)
33 unsigned int set_mask = sched->update_mask;
35 if (set_mask & VXSM_FILL_RATE)
36 sched_pc->fill_rate[0] = sched->fill_rate[0];
37 if (set_mask & VXSM_INTERVAL)
38 sched_pc->interval[0] = sched->interval[0];
39 if (set_mask & VXSM_FILL_RATE2)
40 sched_pc->fill_rate[1] = sched->fill_rate[1];
41 if (set_mask & VXSM_INTERVAL2)
42 sched_pc->interval[1] = sched->interval[1];
43 if (set_mask & VXSM_TOKENS)
44 sched_pc->tokens = sched->tokens;
45 if (set_mask & VXSM_TOKENS_MIN)
46 sched_pc->tokens_min = sched->tokens_min;
47 if (set_mask & VXSM_TOKENS_MAX)
48 sched_pc->tokens_max = sched->tokens_max;
49 if (set_mask & VXSM_PRIO_BIAS)
50 sched_pc->prio_bias = sched->prio_bias;
52 if (set_mask & VXSM_IDLE_TIME)
53 sched_pc->flags |= VXSF_IDLE_TIME;
55 sched_pc->flags &= ~VXSF_IDLE_TIME;
58 sched_pc->norm_time = jiffies;
63 * recalculate the context's scheduling tokens
65 * ret > 0 : number of tokens available
66 * ret < 0 : on hold, check delta_min[]
71 int vx_tokens_recalc(struct _vx_sched_pc *sched_pc,
72 unsigned long *norm_time, unsigned long *idle_time, int delta_min[2])
76 int flags = sched_pc->flags;
78 /* how much time did pass? */
79 delta = *norm_time - sched_pc->norm_time;
80 vxd_check_range(delta, 0, INT_MAX);
82 if (delta >= sched_pc->interval[0]) {
83 long tokens, integral;
85 /* calc integral token part */
86 tokens = delta / sched_pc->interval[0];
87 integral = tokens * sched_pc->interval[0];
88 tokens *= sched_pc->fill_rate[0];
89 #ifdef CONFIG_VSERVER_HARDCPU
90 delta_min[0] = delta - integral;
91 vxd_check_range(delta_min[0], 0, sched_pc->interval[0]);
94 sched_pc->norm_time += delta;
97 sched_pc->tokens += tokens;
98 sched_pc->token_time += tokens;
101 delta_min[0] = delta;
103 #ifdef CONFIG_VSERVER_IDLETIME
104 if (!(flags & VXSF_IDLE_TIME))
107 /* how much was the idle skip? */
108 delta = *idle_time - sched_pc->idle_time;
109 vxd_check_range(delta, 0, INT_MAX);
111 if (delta >= sched_pc->interval[1]) {
112 long tokens, integral;
114 /* calc fair share token part */
115 tokens = delta / sched_pc->interval[1];
116 integral = tokens * sched_pc->interval[1];
117 tokens *= sched_pc->fill_rate[1];
118 delta_min[1] = delta - integral;
119 vxd_check_range(delta_min[1], 0, sched_pc->interval[1]);
121 /* advance idle time */
122 sched_pc->idle_time += integral;
125 sched_pc->tokens += tokens;
126 sched_pc->token_time += tokens;
129 delta_min[1] = delta;
133 /* clip at maximum */
134 if (sched_pc->tokens > sched_pc->tokens_max)
135 sched_pc->tokens = sched_pc->tokens_max;
136 tokens = sched_pc->tokens;
138 if ((flags & VXSF_ONHOLD)) {
140 if (tokens >= sched_pc->tokens_min) {
141 flags &= ~VXSF_ONHOLD;
142 sched_pc->hold_ticks +=
143 *norm_time - sched_pc->onhold;
150 flags |= VXSF_ONHOLD;
151 sched_pc->onhold = *norm_time;
155 sched_pc->flags = flags;
159 tokens = sched_pc->tokens_min - tokens;
160 sched_pc->flags = flags;
163 #ifdef CONFIG_VSERVER_HARDCPU
165 if (!sched_pc->fill_rate[0])
167 else if (tokens > sched_pc->fill_rate[0])
168 delta_min[0] += sched_pc->interval[0] *
169 tokens / sched_pc->fill_rate[0];
171 delta_min[0] = sched_pc->interval[0] - delta_min[0];
172 vxd_check_range(delta_min[0], 0, INT_MAX);
174 #ifdef CONFIG_VSERVER_IDLETIME
175 if (!(flags & VXSF_IDLE_TIME))
179 if (!sched_pc->fill_rate[1])
181 else if (tokens > sched_pc->fill_rate[1])
182 delta_min[1] += sched_pc->interval[1] *
183 tokens / sched_pc->fill_rate[1];
185 delta_min[1] = sched_pc->interval[1] - delta_min[1];
186 vxd_check_range(delta_min[1], 0, INT_MAX);
191 #endif /* CONFIG_VSERVER_IDLETIME */
194 #endif /* CONFIG_VSERVER_HARDCPU */
197 static inline unsigned long msec_to_ticks(unsigned long msec)
199 return msecs_to_jiffies(msec);
202 static inline unsigned long ticks_to_msec(unsigned long ticks)
204 return jiffies_to_msecs(ticks);
207 static inline unsigned long ticks_to_usec(unsigned long ticks)
209 return jiffies_to_usecs(ticks);
213 static int do_set_sched(struct vx_info *vxi, struct vcmd_sched_v5 *data)
215 unsigned int set_mask = data->mask;
216 unsigned int update_mask;
219 /* Sanity check data values */
220 if (data->tokens_max <= 0)
221 data->tokens_max = HZ;
222 if (data->tokens_min < 0)
223 data->tokens_min = HZ/3;
224 if (data->tokens_min >= data->tokens_max)
225 data->tokens_min = data->tokens_max;
227 if (data->prio_bias > MAX_PRIO_BIAS)
228 data->prio_bias = MAX_PRIO_BIAS;
229 if (data->prio_bias < MIN_PRIO_BIAS)
230 data->prio_bias = MIN_PRIO_BIAS;
232 spin_lock(&vxi->sched.tokens_lock);
234 /* sync up on delayed updates */
235 for_each_cpu_mask(cpu, vxi->sched.update)
236 vx_update_sched_param(&vxi->sched,
237 &vx_per_cpu(vxi, sched_pc, cpu));
239 if (set_mask & VXSM_FILL_RATE)
240 vxi->sched.fill_rate[0] = data->fill_rate[0];
241 if (set_mask & VXSM_FILL_RATE2)
242 vxi->sched.fill_rate[1] = data->fill_rate[1];
243 if (set_mask & VXSM_INTERVAL)
244 vxi->sched.interval[0] = (set_mask & VXSM_MSEC) ?
245 msec_to_ticks(data->interval[0]) : data->interval[0];
246 if (set_mask & VXSM_INTERVAL2)
247 vxi->sched.interval[1] = (set_mask & VXSM_MSEC) ?
248 msec_to_ticks(data->interval[1]) : data->interval[1];
249 if (set_mask & VXSM_TOKENS)
250 vxi->sched.tokens = data->tokens;
251 if (set_mask & VXSM_TOKENS_MIN)
252 vxi->sched.tokens_min = data->tokens_min;
253 if (set_mask & VXSM_TOKENS_MAX)
254 vxi->sched.tokens_max = data->tokens_max;
255 if (set_mask & VXSM_PRIO_BIAS)
256 vxi->sched.prio_bias = data->prio_bias;
258 /* Sanity check rate/interval */
259 for (i=0; i<2; i++) {
260 if (data->fill_rate[i] < 0)
261 data->fill_rate[i] = 0;
262 if (data->interval[i] <= 0)
263 data->interval[i] = HZ;
266 update_mask = vxi->sched.update_mask & VXSM_SET_MASK;
267 update_mask |= (set_mask & (VXSM_SET_MASK|VXSM_IDLE_TIME));
268 vxi->sched.update_mask = update_mask;
271 if (set_mask & VXSM_CPU_ID) {
272 vxi->sched.update = cpumask_of_cpu(data->cpu_id);
273 cpus_and(vxi->sched.update, cpu_online_map,
277 vxi->sched.update = cpu_online_map;
280 if (set_mask & VXSM_FORCE) {
281 for_each_cpu_mask(cpu, vxi->sched.update)
282 vx_update_sched_param(&vxi->sched,
283 &vx_per_cpu(vxi, sched_pc, cpu));
284 vxi->sched.update = CPU_MASK_NONE;
287 /* on UP we update immediately */
288 vx_update_sched_param(&vxi->sched,
289 &vx_per_cpu(vxi, sched_pc, 0));
292 spin_unlock(&vxi->sched.tokens_lock);
296 #define COPY_IDS(C) C(cpu_id); C(bucket_id)
297 #define COPY_PRI(C) C(prio_bias)
298 #define COPY_TOK(C) C(tokens); C(tokens_min); C(tokens_max)
299 #define COPY_FRI(C) C(fill_rate[0]); C(interval[0]); \
300 C(fill_rate[1]); C(interval[1]);
302 #define COPY_VALUE(name) vc_data.name = data->name
304 static int do_set_sched_v4(struct vx_info *vxi, struct vcmd_set_sched_v4 *data)
306 struct vcmd_sched_v5 vc_data;
308 vc_data.mask = data->set_mask;
309 COPY_IDS(COPY_VALUE);
310 COPY_PRI(COPY_VALUE);
311 COPY_TOK(COPY_VALUE);
312 vc_data.fill_rate[0] = vc_data.fill_rate[1] = data->fill_rate;
313 vc_data.interval[0] = vc_data.interval[1] = data->interval;
314 return do_set_sched(vxi, &vc_data);
317 #ifdef CONFIG_VSERVER_LEGACY
319 #define COPY_MASK_V2(name, mask) \
320 if (vc_data.name != SCHED_KEEP) { \
321 vc_data_v4.name = vc_data.name; \
322 vc_data_v4.set_mask |= mask; \
325 int vc_set_sched_v2(struct vx_info *vxi, void __user *data)
327 struct vcmd_set_sched_v2 vc_data;
328 struct vcmd_set_sched_v4 vc_data_v4 = { .set_mask = 0 };
330 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
333 COPY_MASK_V2(fill_rate, VXSM_FILL_RATE);
334 COPY_MASK_V2(interval, VXSM_INTERVAL);
335 COPY_MASK_V2(tokens, VXSM_TOKENS);
336 COPY_MASK_V2(tokens_min, VXSM_TOKENS_MIN);
337 COPY_MASK_V2(tokens_max, VXSM_TOKENS_MAX);
338 vc_data_v4.bucket_id = 0;
340 do_set_sched_v4(vxi, &vc_data_v4);
345 int vc_set_sched_v3(struct vx_info *vxi, void __user *data)
347 struct vcmd_set_sched_v3 vc_data;
348 struct vcmd_set_sched_v4 vc_data_v4;
350 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
353 /* structures are binary compatible */
354 memcpy(&vc_data_v4, &vc_data, sizeof(vc_data));
355 vc_data_v4.set_mask &= VXSM_V3_MASK;
356 vc_data_v4.bucket_id = 0;
358 return do_set_sched_v4(vxi, &vc_data_v4);
361 int vc_set_sched_v4(struct vx_info *vxi, void __user *data)
363 struct vcmd_set_sched_v4 vc_data;
365 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
368 return do_set_sched_v4(vxi, &vc_data);
371 /* latest interface is v5 */
373 int vc_set_sched(struct vx_info *vxi, void __user *data)
375 struct vcmd_sched_v5 vc_data;
377 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
380 return do_set_sched(vxi, &vc_data);
384 int vc_get_sched(struct vx_info *vxi, void __user *data)
386 struct vcmd_sched_v5 vc_data;
388 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
391 if (vc_data.mask & VXSM_CPU_ID) {
392 int cpu = vc_data.cpu_id;
393 struct _vx_sched_pc *data;
395 if (!cpu_possible(cpu))
398 data = &vx_per_cpu(vxi, sched_pc, cpu);
399 COPY_TOK(COPY_VALUE);
400 COPY_PRI(COPY_VALUE);
401 COPY_FRI(COPY_VALUE);
403 if (data->flags & VXSF_IDLE_TIME)
404 vc_data.mask |= VXSM_IDLE_TIME;
406 struct _vx_sched *data = &vxi->sched;
408 COPY_TOK(COPY_VALUE);
409 COPY_PRI(COPY_VALUE);
410 COPY_FRI(COPY_VALUE);
413 if (vc_data.mask & VXSM_MSEC) {
414 vc_data.interval[0] = ticks_to_msec(vc_data.interval[0]);
415 vc_data.interval[1] = ticks_to_msec(vc_data.interval[1]);
418 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
424 int vc_sched_info(struct vx_info *vxi, void __user *data)
426 struct vcmd_sched_info vc_data;
429 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
432 cpu = vc_data.cpu_id;
433 if (!cpu_possible(cpu))
437 struct _vx_sched_pc *sched_pc =
438 &vx_per_cpu(vxi, sched_pc, cpu);
440 vc_data.user_msec = ticks_to_msec(sched_pc->user_ticks);
441 vc_data.sys_msec = ticks_to_msec(sched_pc->sys_ticks);
442 vc_data.hold_msec = ticks_to_msec(sched_pc->hold_ticks);
443 vc_data.vavavoom = sched_pc->vavavoom;
445 vc_data.token_usec = ticks_to_usec(1);
447 if (copy_to_user (data, &vc_data, sizeof(vc_data)))