2 * linux/kernel/vserver/sched.c
4 * Virtual Server: Scheduler Support
6 * Copyright (C) 2004-2005 Herbert Pƶtzl
8 * V0.01 adapted Sam Vilains version to 2.6.3
9 * V0.02 removed legacy interface
13 #include <linux/sched.h>
14 #include <linux/vs_context.h>
15 #include <linux/vs_sched.h>
16 #include <linux/vserver/sched_cmd.h>
18 #include <asm/errno.h>
19 #include <asm/uaccess.h>
23 * recalculate the context's scheduling tokens
25 * ret > 0 : number of tokens available
26 * ret = 0 : context is paused
27 * ret < 0 : number of jiffies until new tokens arrive
30 int vx_tokens_recalc(struct vx_info *vxi)
32 long delta, tokens = 0;
34 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
38 delta = jiffies - vxi->sched.jiffies;
40 if (delta >= vxi->sched.interval) {
41 /* lockdown scheduler info */
42 spin_lock(&vxi->sched.tokens_lock);
44 /* calc integral token part */
45 delta = jiffies - vxi->sched.jiffies;
46 tokens = delta / vxi->sched.interval;
47 delta = tokens * vxi->sched.interval;
48 tokens *= vxi->sched.fill_rate;
50 atomic_add(tokens, &vxi->sched.tokens);
51 vxi->sched.jiffies += delta;
52 tokens = atomic_read(&vxi->sched.tokens);
54 if (tokens > vxi->sched.tokens_max) {
55 tokens = vxi->sched.tokens_max;
56 atomic_set(&vxi->sched.tokens, tokens);
58 spin_unlock(&vxi->sched.tokens_lock);
61 tokens = vx_tokens_avail(vxi);
63 vxi->vx_state |= VXS_ONHOLD;
64 if (tokens < vxi->sched.tokens_min) {
65 /* enough tokens will be available in */
66 if (vxi->sched.tokens_min == 0)
67 return delta - vxi->sched.interval;
68 return delta - vxi->sched.interval *
69 vxi->sched.tokens_min / vxi->sched.fill_rate;
73 /* we have some tokens left */
74 if (vx_info_state(vxi, VXS_ONHOLD) &&
75 (tokens >= vxi->sched.tokens_min))
76 vxi->vx_state &= ~VXS_ONHOLD;
77 if (vx_info_state(vxi, VXS_ONHOLD))
78 tokens -= vxi->sched.tokens_min;
84 * effective_prio - return the priority that is based on the static
85 * priority but is modified by bonuses/penalties.
87 * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
88 * into a -4 ... 0 ... +4 bonus/penalty range.
90 * Additionally, we scale another amount based on the number of
91 * CPU tokens currently held by the context, if the process is
92 * part of a context (and the appropriate SCHED flag is set).
93 * This ranges from -5 ... 0 ... +15, quadratically.
95 * So, the total bonus is -9 .. 0 .. +19
96 * We use ~50% of the full 0...39 priority range so that:
98 * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
99 * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
100 * unless that context is far exceeding its CPU allocation.
102 * Both properties are important to certain workloads.
104 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
108 /* lots of tokens = lots of vavavoom
109 * no tokens = no vavavoom */
110 if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
111 max = vxi->sched.tokens_max;
112 vavavoom = max - vavavoom;
114 vavavoom = max_prio * VAVAVOOM_RATIO / 100
115 * (vavavoom*vavavoom - (max >> 2)) / max;
119 vxi->sched.vavavoom = vavavoom;
120 return vavavoom + vxi->sched.priority_bias;
124 int vc_set_sched_v2(uint32_t xid, void __user *data)
126 struct vcmd_set_sched_v2 vc_data;
129 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
132 vxi = lookup_vx_info(xid);
136 spin_lock(&vxi->sched.tokens_lock);
138 if (vc_data.interval != SCHED_KEEP)
139 vxi->sched.interval = vc_data.interval;
140 if (vc_data.fill_rate != SCHED_KEEP)
141 vxi->sched.fill_rate = vc_data.fill_rate;
142 if (vc_data.tokens_min != SCHED_KEEP)
143 vxi->sched.tokens_min = vc_data.tokens_min;
144 if (vc_data.tokens_max != SCHED_KEEP)
145 vxi->sched.tokens_max = vc_data.tokens_max;
146 if (vc_data.tokens != SCHED_KEEP)
147 atomic_set(&vxi->sched.tokens, vc_data.tokens);
149 /* Sanity check the resultant values */
150 if (vxi->sched.fill_rate <= 0)
151 vxi->sched.fill_rate = 1;
152 if (vxi->sched.interval <= 0)
153 vxi->sched.interval = HZ;
154 if (vxi->sched.tokens_max == 0)
155 vxi->sched.tokens_max = 1;
156 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
157 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
158 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
159 vxi->sched.tokens_min = vxi->sched.tokens_max;
161 spin_unlock(&vxi->sched.tokens_lock);
167 int vc_set_sched(uint32_t xid, void __user *data)
169 struct vcmd_set_sched_v3 vc_data;
171 unsigned int set_mask;
173 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
176 vxi = lookup_vx_info(xid);
180 set_mask = vc_data.set_mask;
182 spin_lock(&vxi->sched.tokens_lock);
184 if (set_mask & VXSM_FILL_RATE)
185 vxi->sched.fill_rate = vc_data.fill_rate;
186 if (set_mask & VXSM_INTERVAL)
187 vxi->sched.interval = vc_data.interval;
188 if (set_mask & VXSM_TOKENS)
189 atomic_set(&vxi->sched.tokens, vc_data.tokens);
190 if (set_mask & VXSM_TOKENS_MIN)
191 vxi->sched.tokens_min = vc_data.tokens_min;
192 if (set_mask & VXSM_TOKENS_MAX)
193 vxi->sched.tokens_max = vc_data.tokens_max;
194 if (set_mask & VXSM_PRIO_BIAS)
195 vxi->sched.priority_bias = vc_data.priority_bias;
197 /* Sanity check the resultant values */
198 if (vxi->sched.fill_rate <= 0)
199 vxi->sched.fill_rate = 1;
200 if (vxi->sched.interval <= 0)
201 vxi->sched.interval = HZ;
202 if (vxi->sched.tokens_max == 0)
203 vxi->sched.tokens_max = 1;
204 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
205 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
206 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
207 vxi->sched.tokens_min = vxi->sched.tokens_max;
208 if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
209 vxi->sched.priority_bias = MAX_PRIO_BIAS;
210 if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
211 vxi->sched.priority_bias = MIN_PRIO_BIAS;
213 spin_unlock(&vxi->sched.tokens_lock);