2 * linux/kernel/vserver/sched.c
4 * Virtual Server: Scheduler Support
6 * Copyright (C) 2004-2005 Herbert Pƶtzl
8 * V0.01 adapted Sam Vilains version to 2.6.3
9 * V0.02 removed legacy interface
13 #include <linux/config.h>
14 #include <linux/sched.h>
15 #include <linux/vs_context.h>
16 #include <linux/vs_sched.h>
17 #include <linux/vserver/sched_cmd.h>
19 #include <asm/errno.h>
20 #include <asm/uaccess.h>
24 * recalculate the context's scheduling tokens
26 * ret > 0 : number of tokens available
27 * ret = 0 : context is paused
28 * ret < 0 : number of jiffies until new tokens arrive
31 int vx_tokens_recalc(struct vx_info *vxi)
33 long delta, tokens = 0;
35 if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
39 delta = jiffies - vxi->sched.jiffies;
41 if (delta >= vxi->sched.interval) {
42 /* lockdown scheduler info */
43 spin_lock(&vxi->sched.tokens_lock);
45 /* calc integral token part */
46 delta = jiffies - vxi->sched.jiffies;
47 tokens = delta / vxi->sched.interval;
48 delta = tokens * vxi->sched.interval;
49 tokens *= vxi->sched.fill_rate;
51 atomic_add(tokens, &vxi->sched.tokens);
52 vxi->sched.jiffies += delta;
53 tokens = atomic_read(&vxi->sched.tokens);
55 if (tokens > vxi->sched.tokens_max) {
56 tokens = vxi->sched.tokens_max;
57 atomic_set(&vxi->sched.tokens, tokens);
59 spin_unlock(&vxi->sched.tokens_lock);
62 tokens = vx_tokens_avail(vxi);
64 vxi->vx_state |= VXS_ONHOLD;
65 if (tokens < vxi->sched.tokens_min) {
66 /* enough tokens will be available in */
67 if (vxi->sched.tokens_min == 0)
68 return delta - vxi->sched.interval;
69 return delta - vxi->sched.interval *
70 vxi->sched.tokens_min / vxi->sched.fill_rate;
74 /* we have some tokens left */
75 if (vx_info_state(vxi, VXS_ONHOLD) &&
76 (tokens >= vxi->sched.tokens_min))
77 vxi->vx_state &= ~VXS_ONHOLD;
78 if (vx_info_state(vxi, VXS_ONHOLD))
79 tokens -= vxi->sched.tokens_min;
85 * effective_prio - return the priority that is based on the static
86 * priority but is modified by bonuses/penalties.
88 * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
89 * into a -4 ... 0 ... +4 bonus/penalty range.
91 * Additionally, we scale another amount based on the number of
92 * CPU tokens currently held by the context, if the process is
93 * part of a context (and the appropriate SCHED flag is set).
94 * This ranges from -5 ... 0 ... +15, quadratically.
96 * So, the total bonus is -9 .. 0 .. +19
97 * We use ~50% of the full 0...39 priority range so that:
99 * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
100 * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
101 * unless that context is far exceeding its CPU allocation.
103 * Both properties are important to certain workloads.
105 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
109 /* lots of tokens = lots of vavavoom
110 * no tokens = no vavavoom */
111 if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
112 max = vxi->sched.tokens_max;
113 vavavoom = max - vavavoom;
115 vavavoom = max_prio * VAVAVOOM_RATIO / 100
116 * (vavavoom*vavavoom - (max >> 2)) / max;
120 vxi->sched.vavavoom = vavavoom;
125 int vc_set_sched_v2(uint32_t xid, void __user *data)
127 struct vcmd_set_sched_v2 vc_data;
130 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
133 vxi = locate_vx_info(xid);
137 spin_lock(&vxi->sched.tokens_lock);
139 if (vc_data.interval != SCHED_KEEP)
140 vxi->sched.interval = vc_data.interval;
141 if (vc_data.fill_rate != SCHED_KEEP)
142 vxi->sched.fill_rate = vc_data.fill_rate;
143 if (vc_data.tokens_min != SCHED_KEEP)
144 vxi->sched.tokens_min = vc_data.tokens_min;
145 if (vc_data.tokens_max != SCHED_KEEP)
146 vxi->sched.tokens_max = vc_data.tokens_max;
147 if (vc_data.tokens != SCHED_KEEP)
148 atomic_set(&vxi->sched.tokens, vc_data.tokens);
150 /* Sanity check the resultant values */
151 if (vxi->sched.fill_rate <= 0)
152 vxi->sched.fill_rate = 1;
153 if (vxi->sched.interval <= 0)
154 vxi->sched.interval = HZ;
155 if (vxi->sched.tokens_max == 0)
156 vxi->sched.tokens_max = 1;
157 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
158 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
159 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
160 vxi->sched.tokens_min = vxi->sched.tokens_max;
162 spin_unlock(&vxi->sched.tokens_lock);
168 int vc_set_sched(uint32_t xid, void __user *data)
170 struct vcmd_set_sched_v3 vc_data;
172 unsigned int set_mask;
174 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
177 vxi = locate_vx_info(xid);
181 set_mask = vc_data.set_mask;
183 spin_lock(&vxi->sched.tokens_lock);
185 if (set_mask & VXSM_FILL_RATE)
186 vxi->sched.fill_rate = vc_data.fill_rate;
187 if (set_mask & VXSM_INTERVAL)
188 vxi->sched.interval = vc_data.interval;
189 if (set_mask & VXSM_TOKENS)
190 atomic_set(&vxi->sched.tokens, vc_data.tokens);
191 if (set_mask & VXSM_TOKENS_MIN)
192 vxi->sched.tokens_min = vc_data.tokens_min;
193 if (set_mask & VXSM_TOKENS_MAX)
194 vxi->sched.tokens_max = vc_data.tokens_max;
195 if (set_mask & VXSM_PRIO_BIAS)
196 vxi->sched.priority_bias = vc_data.priority_bias;
198 /* Sanity check the resultant values */
199 if (vxi->sched.fill_rate <= 0)
200 vxi->sched.fill_rate = 1;
201 if (vxi->sched.interval <= 0)
202 vxi->sched.interval = HZ;
203 if (vxi->sched.tokens_max == 0)
204 vxi->sched.tokens_max = 1;
205 if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
206 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
207 if (vxi->sched.tokens_min > vxi->sched.tokens_max)
208 vxi->sched.tokens_min = vxi->sched.tokens_max;
209 if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
210 vxi->sched.priority_bias = MAX_PRIO_BIAS;
211 if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
212 vxi->sched.priority_bias = MIN_PRIO_BIAS;
214 spin_unlock(&vxi->sched.tokens_lock);