patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / lib / rwsem.c
1 /* rwsem.c: R/W semaphores: contention handling functions
2  *
3  * Written by David Howells (dhowells@redhat.com).
4  * Derived from arch/i386/kernel/semaphore.c
5  */
6 #include <linux/rwsem.h>
7 #include <linux/sched.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
10
11 struct rwsem_waiter {
12         struct list_head        list;
13         struct task_struct      *task;
14         unsigned int            flags;
15 #define RWSEM_WAITING_FOR_READ  0x00000001
16 #define RWSEM_WAITING_FOR_WRITE 0x00000002
17 };
18
19 #if RWSEM_DEBUG
20 #undef rwsemtrace
21 void rwsemtrace(struct rw_semaphore *sem, const char *str)
22 {
23         printk("sem=%p\n",sem);
24         printk("(sem)=%08lx\n",sem->count);
25         if (sem->debug)
26                 printk("[%d] %s({%08lx})\n",current->pid,str,sem->count);
27 }
28 #endif
29
30 /*
31  * handle the lock being released whilst there are processes blocked on it that can now run
32  * - if we come here from up_xxxx(), then:
33  *   - the 'active part' of the count (&0x0000ffff) had reached zero (but may have changed)
34  *   - the 'waiting part' of the count (&0xffff0000) is negative (and will still be so)
35  *   - there must be someone on the queue
36  * - the spinlock must be held by the caller
37  * - woken process blocks are discarded from the list after having task zeroed
38  * - writers are only woken if downgrading is false
39  */
40 static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
41 {
42         struct rwsem_waiter *waiter;
43         struct task_struct *tsk;
44         struct list_head *next;
45         signed long oldcount, woken, loop;
46
47         rwsemtrace(sem,"Entering __rwsem_do_wake");
48
49         if (downgrading)
50                 goto dont_wake_writers;
51
52         /* if we came through an up_xxxx() call, we only only wake someone up
53          * if we can transition the active part of the count from 0 -> 1
54          */
55  try_again:
56         oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS,sem) - RWSEM_ACTIVE_BIAS;
57         if (oldcount & RWSEM_ACTIVE_MASK)
58                 goto undo;
59
60         waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
61
62         /* try to grant a single write lock if there's a writer at the front of the queue
63          * - note we leave the 'active part' of the count incremented by 1 and the waiting part
64          *   incremented by 0x00010000
65          */
66         if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
67                 goto readers_only;
68
69         list_del(&waiter->list);
70         tsk = waiter->task;
71         mb();
72         waiter->task = NULL;
73         wake_up_process(tsk);
74         put_task_struct(tsk);
75         goto out;
76
77         /* don't want to wake any writers */
78  dont_wake_writers:
79         waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
80         if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
81                 goto out;
82
83         /* grant an infinite number of read locks to the readers at the front
84          * of the queue
85          * - note we increment the 'active part' of the count by the number of
86          *   readers before waking any processes up
87          */
88  readers_only:
89         woken = 0;
90         do {
91                 woken++;
92
93                 if (waiter->list.next==&sem->wait_list)
94                         break;
95
96                 waiter = list_entry(waiter->list.next,struct rwsem_waiter,list);
97
98         } while (waiter->flags & RWSEM_WAITING_FOR_READ);
99
100         loop = woken;
101         woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
102         if (!downgrading)
103                 woken -= RWSEM_ACTIVE_BIAS; /* we'd already done one increment
104                                              * earlier */
105         rwsem_atomic_add(woken,sem);
106
107         next = sem->wait_list.next;
108         for (; loop>0; loop--) {
109                 waiter = list_entry(next,struct rwsem_waiter,list);
110                 next = waiter->list.next;
111                 tsk = waiter->task;
112                 mb();
113                 waiter->task = NULL;
114                 wake_up_process(tsk);
115                 put_task_struct(tsk);
116         }
117
118         sem->wait_list.next = next;
119         next->prev = &sem->wait_list;
120
121  out:
122         rwsemtrace(sem,"Leaving __rwsem_do_wake");
123         return sem;
124
125         /* undo the change to count, but check for a transition 1->0 */
126  undo:
127         if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS,sem)!=0)
128                 goto out;
129         goto try_again;
130 }
131
132 /*
133  * wait for a lock to be granted
134  */
135 static inline struct rw_semaphore *rwsem_down_failed_common(struct rw_semaphore *sem,
136                                                                  struct rwsem_waiter *waiter,
137                                                                  signed long adjustment)
138 {
139         struct task_struct *tsk = current;
140         signed long count;
141
142         set_task_state(tsk,TASK_UNINTERRUPTIBLE);
143
144         /* set up my own style of waitqueue */
145         spin_lock(&sem->wait_lock);
146         waiter->task = tsk;
147         get_task_struct(tsk);
148
149         list_add_tail(&waiter->list,&sem->wait_list);
150
151         /* note that we're now waiting on the lock, but no longer actively read-locking */
152         count = rwsem_atomic_update(adjustment,sem);
153
154         /* if there are no longer active locks, wake the front queued process(es) up
155          * - it might even be this process, since the waker takes a more active part
156          */
157         if (!(count & RWSEM_ACTIVE_MASK))
158                 sem = __rwsem_do_wake(sem, 0);
159
160         spin_unlock(&sem->wait_lock);
161
162         /* wait to be given the lock */
163         for (;;) {
164                 if (!waiter->task)
165                         break;
166                 schedule();
167                 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
168         }
169
170         tsk->state = TASK_RUNNING;
171
172         return sem;
173 }
174
175 /*
176  * wait for the read lock to be granted
177  */
178 struct rw_semaphore fastcall __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
179 {
180         struct rwsem_waiter waiter;
181
182         rwsemtrace(sem,"Entering rwsem_down_read_failed");
183
184         waiter.flags = RWSEM_WAITING_FOR_READ;
185         rwsem_down_failed_common(sem,&waiter,RWSEM_WAITING_BIAS-RWSEM_ACTIVE_BIAS);
186
187         rwsemtrace(sem,"Leaving rwsem_down_read_failed");
188         return sem;
189 }
190
191 /*
192  * wait for the write lock to be granted
193  */
194 struct rw_semaphore fastcall __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
195 {
196         struct rwsem_waiter waiter;
197
198         rwsemtrace(sem,"Entering rwsem_down_write_failed");
199
200         waiter.flags = RWSEM_WAITING_FOR_WRITE;
201         rwsem_down_failed_common(sem,&waiter,-RWSEM_ACTIVE_BIAS);
202
203         rwsemtrace(sem,"Leaving rwsem_down_write_failed");
204         return sem;
205 }
206
207 /*
208  * handle waking up a waiter on the semaphore
209  * - up_read/up_write has decremented the active part of the count if we come here
210  */
211 struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
212 {
213         rwsemtrace(sem,"Entering rwsem_wake");
214
215         spin_lock(&sem->wait_lock);
216
217         /* do nothing if list empty */
218         if (!list_empty(&sem->wait_list))
219                 sem = __rwsem_do_wake(sem, 0);
220
221         spin_unlock(&sem->wait_lock);
222
223         rwsemtrace(sem,"Leaving rwsem_wake");
224
225         return sem;
226 }
227
228 /*
229  * downgrade a write lock into a read lock
230  * - caller incremented waiting part of count, and discovered it to be still negative
231  * - just wake up any readers at the front of the queue
232  */
233 struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
234 {
235         rwsemtrace(sem,"Entering rwsem_downgrade_wake");
236
237         spin_lock(&sem->wait_lock);
238
239         /* do nothing if list empty */
240         if (!list_empty(&sem->wait_list))
241                 sem = __rwsem_do_wake(sem, 1);
242
243         spin_unlock(&sem->wait_lock);
244
245         rwsemtrace(sem,"Leaving rwsem_downgrade_wake");
246         return sem;
247 }
248
249 EXPORT_SYMBOL_NOVERS(rwsem_down_read_failed);
250 EXPORT_SYMBOL_NOVERS(rwsem_down_write_failed);
251 EXPORT_SYMBOL_NOVERS(rwsem_wake);
252 EXPORT_SYMBOL_NOVERS(rwsem_downgrade_wake);
253 #if RWSEM_DEBUG
254 EXPORT_SYMBOL(rwsemtrace);
255 #endif