2 * linux/kernel/vserver/sched.c
4 * Virtual Server: Scheduler Support
6 * Copyright (C) 2004 Herbert Pƶtzl
8 * V0.01 adapted Sam Vilains version to 2.6.3
9 * V0.02 removed legacy interface
13 #include <linux/config.h>
14 #include <linux/sched.h>
15 // #include <linux/vs_base.h>
16 #include <linux/vs_context.h>
17 #include <linux/vs_sched.h>
18 #include <linux/vserver/sched_cmd.h>
20 #include <asm/errno.h>
21 #include <asm/uaccess.h>
25 * recalculate the context's scheduling tokens
27 * ret > 0 : number of tokens available
28 * ret = 0 : context is paused
29 * ret < 0 : number of jiffies until new tokens arrive
32 int vx_tokens_recalc(struct vx_info *vxi)
34 long delta, tokens = 0;
36 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
40 delta = jiffies - vxi->sched.jiffies;
42 if (delta >= vxi->sched.interval) {
43 /* lockdown scheduler info */
44 spin_lock(&vxi->sched.tokens_lock);
46 /* calc integral token part */
47 delta = jiffies - vxi->sched.jiffies;
48 tokens = delta / vxi->sched.interval;
49 delta = tokens * vxi->sched.interval;
50 tokens *= vxi->sched.fill_rate;
52 atomic_add(tokens, &vxi->sched.tokens);
53 vxi->sched.jiffies += delta;
54 tokens = atomic_read(&vxi->sched.tokens);
56 if (tokens > vxi->sched.tokens_max) {
57 tokens = vxi->sched.tokens_max;
58 atomic_set(&vxi->sched.tokens, tokens);
60 spin_unlock(&vxi->sched.tokens_lock);
63 tokens = vx_tokens_avail(vxi);
65 vxi->vx_state |= VXS_ONHOLD;
66 if (tokens < vxi->sched.tokens_min) {
67 /* enough tokens will be available in */
68 if (vxi->sched.tokens_min == 0)
69 return delta - vxi->sched.interval;
70 return delta - vxi->sched.interval *
71 vxi->sched.tokens_min / vxi->sched.fill_rate;
75 /* we have some tokens left */
76 if (vx_info_state(vxi, VXS_ONHOLD) &&
77 (tokens >= vxi->sched.tokens_min))
78 vxi->vx_state &= ~VXS_ONHOLD;
79 if (vx_info_state(vxi, VXS_ONHOLD))
80 tokens -= vxi->sched.tokens_min;
86 * effective_prio - return the priority that is based on the static
87 * priority but is modified by bonuses/penalties.
89 * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
90 * into a -4 ... 0 ... +4 bonus/penalty range.
92 * Additionally, we scale another amount based on the number of
93 * CPU tokens currently held by the context, if the process is
94 * part of a context (and the appropriate SCHED flag is set).
95 * This ranges from -5 ... 0 ... +15, quadratically.
97 * So, the total bonus is -9 .. 0 .. +19
98 * We use ~50% of the full 0...39 priority range so that:
100 * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
101 * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
102 * unless that context is far exceeding its CPU allocation.
104 * Both properties are important to certain workloads.
106 int effective_vavavoom(task_t *p, int max_prio)
108 struct vx_info *vxi = p->vx_info;
111 /* lots of tokens = lots of vavavoom
112 * no tokens = no vavavoom */
113 if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
114 max = vxi->sched.tokens_max;
115 vavavoom = max - vavavoom;
117 vavavoom = max_prio * VAVAVOOM_RATIO / 100
118 * (vavavoom*vavavoom - (max >> 2)) / max;
119 /* alternative, geometric mapping
120 vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom
121 / vxi->sched.tokens_max -
122 MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
125 /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) -
126 MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
132 int vc_set_sched_v2(uint32_t xid, void __user *data)
134 struct vcmd_set_sched_v2 vc_data;
137 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
140 vxi = locate_vx_info(xid);
144 spin_lock(&vxi->sched.tokens_lock);
146 if (vc_data.interval != SCHED_KEEP)
147 vxi->sched.interval = vc_data.interval;
148 if (vc_data.fill_rate != SCHED_KEEP)
149 vxi->sched.fill_rate = vc_data.fill_rate;
150 if (vc_data.tokens_min != SCHED_KEEP)
151 vxi->sched.tokens_min = vc_data.tokens_min;
152 if (vc_data.tokens_max != SCHED_KEEP)
153 vxi->sched.tokens_max = vc_data.tokens_max;
154 if (vc_data.tokens != SCHED_KEEP)
155 atomic_set(&vxi->sched.tokens, vc_data.tokens);
157 /* Sanity check the resultant values */
158 if (vxi->sched.fill_rate <= 0)
159 vxi->sched.fill_rate = 1;
160 if (vxi->sched.interval <= 0)
161 vxi->sched.interval = HZ;
162 if (vxi->sched.tokens_max == 0)
163 vxi->sched.tokens_max = 1;
164 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
165 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
166 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
167 vxi->sched.tokens_min = vxi->sched.tokens_max;
169 spin_unlock(&vxi->sched.tokens_lock);
175 int vc_set_sched(uint32_t xid, void __user *data)
177 struct vcmd_set_sched_v3 vc_data;
179 unsigned int set_mask;
181 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
184 vxi = locate_vx_info(xid);
188 set_mask = vc_data.set_mask;
190 spin_lock(&vxi->sched.tokens_lock);
192 if (set_mask & VXSM_FILL_RATE)
193 vxi->sched.fill_rate = vc_data.fill_rate;
194 if (set_mask & VXSM_INTERVAL)
195 vxi->sched.interval = vc_data.interval;
196 if (set_mask & VXSM_TOKENS)
197 atomic_set(&vxi->sched.tokens, vc_data.tokens);
198 if (set_mask & VXSM_TOKENS_MIN)
199 vxi->sched.tokens_min = vc_data.tokens_min;
200 if (set_mask & VXSM_TOKENS_MAX)
201 vxi->sched.tokens_max = vc_data.tokens_max;
202 if (set_mask & VXSM_PRIO_BIAS)
203 vxi->sched.priority_bias = vc_data.priority_bias;
205 /* Sanity check the resultant values */
206 if (vxi->sched.fill_rate <= 0)
207 vxi->sched.fill_rate = 1;
208 if (vxi->sched.interval <= 0)
209 vxi->sched.interval = HZ;
210 if (vxi->sched.tokens_max == 0)
211 vxi->sched.tokens_max = 1;
212 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
213 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
214 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
215 vxi->sched.tokens_min = vxi->sched.tokens_max;
216 if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
217 vxi->sched.priority_bias = MAX_PRIO_BIAS;
218 if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
219 vxi->sched.priority_bias = MIN_PRIO_BIAS;
221 spin_unlock(&vxi->sched.tokens_lock);