VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / arch / ppc64 / kernel / idle.c
1 /*
2  * Idle daemon for PowerPC.  Idle daemon will handle any action
3  * that needs to be taken when the system becomes idle.
4  *
5  * Originally Written by Cort Dougan (cort@cs.nmt.edu)
6  *
7  * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
8  *
9  * Additional shared processor, SMT, and firmware support
10  *    Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License
14  * as published by the Free Software Foundation; either version
15  * 2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
21 #include <linux/kernel.h>
22 #include <linux/mm.h>
23 #include <linux/smp.h>
24 #include <linux/smp_lock.h>
25 #include <linux/stddef.h>
26 #include <linux/unistd.h>
27 #include <linux/slab.h>
28 #include <linux/interrupt.h>
29 #include <linux/cpu.h>
30
31 #include <asm/pgtable.h>
32 #include <asm/uaccess.h>
33 #include <asm/system.h>
34 #include <asm/io.h>
35 #include <asm/processor.h>
36 #include <asm/mmu.h>
37 #include <asm/cache.h>
38 #include <asm/cputable.h>
39 #include <asm/time.h>
40 #include <asm/iSeries/LparData.h>
41 #include <asm/iSeries/HvCall.h>
42 #include <asm/iSeries/ItLpQueue.h>
43
44 extern long cede_processor(void);
45 extern long poll_pending(void);
46 extern void power4_idle(void);
47
48 int (*idle_loop)(void);
49
50 #ifdef CONFIG_PPC_ISERIES
51 unsigned long maxYieldTime = 0;
52 unsigned long minYieldTime = 0xffffffffffffffffUL;
53
54 static void yield_shared_processor(void)
55 {
56         unsigned long tb;
57         unsigned long yieldTime;
58
59         HvCall_setEnabledInterrupts(HvCall_MaskIPI |
60                                     HvCall_MaskLpEvent |
61                                     HvCall_MaskLpProd |
62                                     HvCall_MaskTimeout);
63
64         tb = get_tb();
65         /* Compute future tb value when yield should expire */
66         HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy);
67
68         yieldTime = get_tb() - tb;
69         if (yieldTime > maxYieldTime)
70                 maxYieldTime = yieldTime;
71
72         if (yieldTime < minYieldTime)
73                 minYieldTime = yieldTime;
74         
75         /*
76          * The decrementer stops during the yield.  Force a fake decrementer
77          * here and let the timer_interrupt code sort out the actual time.
78          */
79         get_paca()->lppaca.xIntDword.xFields.xDecrInt = 1;
80         process_iSeries_events();
81 }
82
83 int iSeries_idle(void)
84 {
85         struct paca_struct *lpaca;
86         long oldval;
87         unsigned long CTRL;
88
89         /* ensure iSeries run light will be out when idle */
90         clear_thread_flag(TIF_RUN_LIGHT);
91         CTRL = mfspr(CTRLF);
92         CTRL &= ~RUNLATCH;
93         mtspr(CTRLT, CTRL);
94 #if 0
95         init_idle();    
96 #endif
97
98         lpaca = get_paca();
99
100         for (;;) {
101                 if (lpaca->lppaca.xSharedProc) {
102                         if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
103                                 process_iSeries_events();
104                         if (!need_resched())
105                                 yield_shared_processor();
106                 } else {
107                         oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
108
109                         if (!oldval) {
110                                 set_thread_flag(TIF_POLLING_NRFLAG);
111
112                                 while (!need_resched()) {
113                                         HMT_medium();
114                                         if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
115                                                 process_iSeries_events();
116                                         HMT_low();
117                                 }
118
119                                 HMT_medium();
120                                 clear_thread_flag(TIF_POLLING_NRFLAG);
121                         } else {
122                                 set_need_resched();
123                         }
124                 }
125
126                 schedule();
127         }
128         return 0;
129 }
130 #endif
131
132 int default_idle(void)
133 {
134         long oldval;
135
136         while (1) {
137                 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
138
139                 if (!oldval) {
140                         set_thread_flag(TIF_POLLING_NRFLAG);
141
142                         while (!need_resched()) {
143                                 barrier();
144                                 HMT_low();
145                         }
146
147                         HMT_medium();
148                         clear_thread_flag(TIF_POLLING_NRFLAG);
149                 } else {
150                         set_need_resched();
151                 }
152
153                 schedule();
154                 if (cpu_is_offline(smp_processor_id()) &&
155                                 system_state == SYSTEM_RUNNING)
156                         cpu_die();
157         }
158
159         return 0;
160 }
161
162 #ifdef CONFIG_PPC_PSERIES
163
164 DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
165
166 int dedicated_idle(void)
167 {
168         long oldval;
169         struct paca_struct *lpaca = get_paca(), *ppaca;
170         unsigned long start_snooze;
171         unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
172
173         ppaca = &paca[smp_processor_id() ^ 1];
174
175         while (1) {
176                 /* Indicate to the HV that we are idle.  Now would be
177                  * a good time to find other work to dispatch. */
178                 lpaca->lppaca.xIdle = 1;
179
180                 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
181                 if (!oldval) {
182                         set_thread_flag(TIF_POLLING_NRFLAG);
183                         start_snooze = __get_tb() +
184                                 *smt_snooze_delay * tb_ticks_per_usec;
185                         while (!need_resched()) {
186                                 /* need_resched could be 1 or 0 at this 
187                                  * point.  If it is 0, set it to 0, so
188                                  * an IPI/Prod is sent.  If it is 1, keep
189                                  * it that way & schedule work.
190                                  */
191                                 if (*smt_snooze_delay == 0 ||
192                                     __get_tb() < start_snooze) {
193                                         HMT_low(); /* Low thread priority */
194                                         continue;
195                                 }
196
197                                 HMT_very_low(); /* Low power mode */
198
199                                 /* If the SMT mode is system controlled & the 
200                                  * partner thread is doing work, switch into
201                                  * ST mode.
202                                  */
203                                 if((naca->smt_state == SMT_DYNAMIC) &&
204                                    (!(ppaca->lppaca.xIdle))) {
205                                         /* Indicate we are no longer polling for
206                                          * work, and then clear need_resched.  If
207                                          * need_resched was 1, set it back to 1
208                                          * and schedule work
209                                          */
210                                         clear_thread_flag(TIF_POLLING_NRFLAG);
211                                         oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
212                                         if(oldval == 1) {
213                                                 set_need_resched();
214                                                 break;
215                                         }
216
217                                         /* DRENG: Go HMT_medium here ? */
218                                         local_irq_disable(); 
219
220                                         /* SMT dynamic mode.  Cede will result 
221                                          * in this thread going dormant, if the
222                                          * partner thread is still doing work.
223                                          * Thread wakes up if partner goes idle,
224                                          * an interrupt is presented, or a prod
225                                          * occurs.  Returning from the cede
226                                          * enables external interrupts.
227                                          */
228                                         cede_processor();
229                                 } else {
230                                         /* Give the HV an opportunity at the
231                                          * processor, since we are not doing
232                                          * any work.
233                                          */
234                                         poll_pending();
235                                 }
236                         }
237                 } else {
238                         set_need_resched();
239                 }
240
241                 HMT_medium();
242                 lpaca->lppaca.xIdle = 0;
243                 schedule();
244                 if (cpu_is_offline(smp_processor_id()) &&
245                                 system_state == SYSTEM_RUNNING)
246                         cpu_die();
247         }
248         return 0;
249 }
250
251 int shared_idle(void)
252 {
253         struct paca_struct *lpaca = get_paca();
254
255         while (1) {
256                 if (cpu_is_offline(smp_processor_id()) &&
257                                 system_state == SYSTEM_RUNNING)
258                         cpu_die();
259
260                 /* Indicate to the HV that we are idle.  Now would be
261                  * a good time to find other work to dispatch. */
262                 lpaca->lppaca.xIdle = 1;
263
264                 if (!need_resched()) {
265                         local_irq_disable(); 
266                         
267                         /* 
268                          * Yield the processor to the hypervisor.  We return if
269                          * an external interrupt occurs (which are driven prior
270                          * to returning here) or if a prod occurs from another 
271                          * processor.  When returning here, external interrupts 
272                          * are enabled.
273                          */
274                         cede_processor();
275                 }
276
277                 HMT_medium();
278                 lpaca->lppaca.xIdle = 0;
279                 schedule();
280         }
281
282         return 0;
283 }
284 #endif
285
286 int cpu_idle(void)
287 {
288         idle_loop();
289         return 0; 
290 }
291
292 int native_idle(void)
293 {
294         while(1) {
295                 if (!need_resched())
296                         power4_idle();
297                 if (need_resched())
298                         schedule();
299         }
300         return 0;
301 }
302
303 int idle_setup(void)
304 {
305 #ifdef CONFIG_PPC_ISERIES
306         idle_loop = iSeries_idle;
307 #else
308         if (systemcfg->platform & PLATFORM_PSERIES) {
309                 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
310                         if (get_paca()->lppaca.xSharedProc) {
311                                 printk("idle = shared_idle\n");
312                                 idle_loop = shared_idle;
313                         } else {
314                                 printk("idle = dedicated_idle\n");
315                                 idle_loop = dedicated_idle;
316                         }
317                 } else {
318                         printk("idle = default_idle\n");
319                         idle_loop = default_idle;
320                 }
321         } else if (systemcfg->platform == PLATFORM_POWERMAC) {
322                 printk("idle = native_idle\n");
323                 idle_loop = native_idle;
324         } else {
325                 printk("idle_setup: unknown platform, use default_idle\n");
326                 idle_loop = default_idle;
327         }
328 #endif
329
330         return 1;
331 }
332