Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / kernel / vserver / sched.c
1 /*
2  *  linux/kernel/vserver/sched.c
3  *
4  *  Virtual Server: Scheduler Support
5  *
6  *  Copyright (C) 2004-2005  Herbert Pƶtzl
7  *
8  *  V0.01  adapted Sam Vilains version to 2.6.3
9  *  V0.02  removed legacy interface
10  *
11  */
12
13 #include <linux/sched.h>
14 #include <linux/vs_context.h>
15 #include <linux/vs_sched.h>
16 #include <linux/vserver/sched_cmd.h>
17
18 #include <asm/errno.h>
19 #include <asm/uaccess.h>
20
21
22 /*
23  * recalculate the context's scheduling tokens
24  *
25  * ret > 0 : number of tokens available
26  * ret = 0 : context is paused
27  * ret < 0 : number of jiffies until new tokens arrive
28  *
29  */
30 int vx_tokens_recalc(struct vx_info *vxi)
31 {
32         long delta, tokens = 0;
33
34         if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
35                 /* we are paused */
36                 return 0;
37
38         delta = jiffies - vxi->sched.jiffies;
39
40         if (delta >= vxi->sched.interval) {
41                 /* lockdown scheduler info */
42                 spin_lock(&vxi->sched.tokens_lock);
43
44                 /* calc integral token part */
45                 delta = jiffies - vxi->sched.jiffies;
46                 tokens = delta / vxi->sched.interval;
47                 delta = tokens * vxi->sched.interval;
48                 tokens *= vxi->sched.fill_rate;
49
50                 atomic_add(tokens, &vxi->sched.tokens);
51                 vxi->sched.jiffies += delta;
52                 tokens = atomic_read(&vxi->sched.tokens);
53
54                 if (tokens > vxi->sched.tokens_max) {
55                         tokens = vxi->sched.tokens_max;
56                         atomic_set(&vxi->sched.tokens, tokens);
57                 }
58                 spin_unlock(&vxi->sched.tokens_lock);
59         } else {
60                 /* no new tokens */
61                 tokens = vx_tokens_avail(vxi);
62                 if (tokens <= 0)
63                         vxi->vx_state |= VXS_ONHOLD;
64                 if (tokens < vxi->sched.tokens_min) {
65                         /* enough tokens will be available in */
66                         if (vxi->sched.tokens_min == 0)
67                                 return delta - vxi->sched.interval;
68                         return delta - vxi->sched.interval *
69                                 vxi->sched.tokens_min / vxi->sched.fill_rate;
70                 }
71         }
72
73         /* we have some tokens left */
74         if (vx_info_state(vxi, VXS_ONHOLD) &&
75                 (tokens >= vxi->sched.tokens_min))
76                 vxi->vx_state &= ~VXS_ONHOLD;
77         if (vx_info_state(vxi, VXS_ONHOLD))
78                 tokens -= vxi->sched.tokens_min;
79
80         return tokens;
81 }
82
83 /*
84  * effective_prio - return the priority that is based on the static
85  * priority but is modified by bonuses/penalties.
86  *
87  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
88  * into a -4 ... 0 ... +4 bonus/penalty range.
89  *
90  * Additionally, we scale another amount based on the number of
91  * CPU tokens currently held by the context, if the process is
92  * part of a context (and the appropriate SCHED flag is set).
93  * This ranges from -5 ... 0 ... +15, quadratically.
94  *
95  * So, the total bonus is -9 .. 0 .. +19
96  * We use ~50% of the full 0...39 priority range so that:
97  *
98  * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
99  * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
100  *    unless that context is far exceeding its CPU allocation.
101  *
102  * Both properties are important to certain workloads.
103  */
104 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
105 {
106         int vavavoom, max;
107
108         /* lots of tokens = lots of vavavoom
109          *      no tokens = no vavavoom      */
110         if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
111                 max = vxi->sched.tokens_max;
112                 vavavoom = max - vavavoom;
113                 max = max * max;
114                 vavavoom = max_prio * VAVAVOOM_RATIO / 100
115                         * (vavavoom*vavavoom - (max >> 2)) / max;
116         } else
117                 vavavoom = 0;
118
119         vxi->sched.vavavoom = vavavoom;
120         return vavavoom + vxi->sched.priority_bias;
121 }
122
123
124 int vc_set_sched_v2(uint32_t xid, void __user *data)
125 {
126         struct vcmd_set_sched_v2 vc_data;
127         struct vx_info *vxi;
128
129         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
130                 return -EFAULT;
131
132         vxi = lookup_vx_info(xid);
133         if (!vxi)
134                 return -EINVAL;
135
136         spin_lock(&vxi->sched.tokens_lock);
137
138         if (vc_data.interval != SCHED_KEEP)
139                 vxi->sched.interval = vc_data.interval;
140         if (vc_data.fill_rate != SCHED_KEEP)
141                 vxi->sched.fill_rate = vc_data.fill_rate;
142         if (vc_data.tokens_min != SCHED_KEEP)
143                 vxi->sched.tokens_min = vc_data.tokens_min;
144         if (vc_data.tokens_max != SCHED_KEEP)
145                 vxi->sched.tokens_max = vc_data.tokens_max;
146         if (vc_data.tokens != SCHED_KEEP)
147                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
148
149         /* Sanity check the resultant values */
150         if (vxi->sched.fill_rate <= 0)
151                 vxi->sched.fill_rate = 1;
152         if (vxi->sched.interval <= 0)
153                 vxi->sched.interval = HZ;
154         if (vxi->sched.tokens_max == 0)
155                 vxi->sched.tokens_max = 1;
156         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
157                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
158         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
159                 vxi->sched.tokens_min = vxi->sched.tokens_max;
160
161         spin_unlock(&vxi->sched.tokens_lock);
162         put_vx_info(vxi);
163         return 0;
164 }
165
166
167 int vc_set_sched(uint32_t xid, void __user *data)
168 {
169         struct vcmd_set_sched_v3 vc_data;
170         struct vx_info *vxi;
171         unsigned int set_mask;
172
173         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
174                 return -EFAULT;
175
176         vxi = lookup_vx_info(xid);
177         if (!vxi)
178                 return -EINVAL;
179
180         set_mask = vc_data.set_mask;
181
182         spin_lock(&vxi->sched.tokens_lock);
183
184         if (set_mask & VXSM_FILL_RATE)
185                 vxi->sched.fill_rate = vc_data.fill_rate;
186         if (set_mask & VXSM_INTERVAL)
187                 vxi->sched.interval = vc_data.interval;
188         if (set_mask & VXSM_TOKENS)
189                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
190         if (set_mask & VXSM_TOKENS_MIN)
191                 vxi->sched.tokens_min = vc_data.tokens_min;
192         if (set_mask & VXSM_TOKENS_MAX)
193                 vxi->sched.tokens_max = vc_data.tokens_max;
194         if (set_mask & VXSM_PRIO_BIAS)
195                 vxi->sched.priority_bias = vc_data.priority_bias;
196
197         /* Sanity check the resultant values */
198         if (vxi->sched.fill_rate <= 0)
199                 vxi->sched.fill_rate = 1;
200         if (vxi->sched.interval <= 0)
201                 vxi->sched.interval = HZ;
202         if (vxi->sched.tokens_max == 0)
203                 vxi->sched.tokens_max = 1;
204         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
205                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
206         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
207                 vxi->sched.tokens_min = vxi->sched.tokens_max;
208         if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
209                 vxi->sched.priority_bias = MAX_PRIO_BIAS;
210         if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
211                 vxi->sched.priority_bias = MIN_PRIO_BIAS;
212
213         spin_unlock(&vxi->sched.tokens_lock);
214         put_vx_info(vxi);
215         return 0;
216 }
217