ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / lib / rwsem.c
1 /* rwsem.c: R/W semaphores: contention handling functions
2  *
3  * Written by David Howells (dhowells@redhat.com).
4  * Derived from arch/i386/kernel/semaphore.c
5  */
6 #include <linux/rwsem.h>
7 #include <linux/sched.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
10
11 struct rwsem_waiter {
12         struct list_head        list;
13         struct task_struct      *task;
14         unsigned int            flags;
15 #define RWSEM_WAITING_FOR_READ  0x00000001
16 #define RWSEM_WAITING_FOR_WRITE 0x00000002
17 };
18
19 #if RWSEM_DEBUG
20 #undef rwsemtrace
21 void rwsemtrace(struct rw_semaphore *sem, const char *str)
22 {
23         printk("sem=%p\n",sem);
24         printk("(sem)=%08lx\n",sem->count);
25         if (sem->debug)
26                 printk("[%d] %s({%08lx})\n",current->pid,str,sem->count);
27 }
28 #endif
29
30 /*
31  * handle the lock being released whilst there are processes blocked on it that can now run
32  * - if we come here, then:
33  *   - the 'active part' of the count (&0x0000ffff) reached zero but has been re-incremented
34  *   - the 'waiting part' of the count (&0xffff0000) is negative (and will still be so)
35  *   - there must be someone on the queue
36  * - the spinlock must be held by the caller
37  * - woken process blocks are discarded from the list after having task zeroed
38  * - writers are only woken if wakewrite is non-zero
39  */
40 static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
41 {
42         struct rwsem_waiter *waiter;
43         struct task_struct *tsk;
44         struct list_head *next;
45         signed long oldcount, woken, loop;
46
47         rwsemtrace(sem,"Entering __rwsem_do_wake");
48
49         if (!wakewrite)
50                 goto dont_wake_writers;
51
52         /* only wake someone up if we can transition the active part of the count from 0 -> 1 */
53  try_again:
54         oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS,sem) - RWSEM_ACTIVE_BIAS;
55         if (oldcount & RWSEM_ACTIVE_MASK)
56                 goto undo;
57
58         waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
59
60         /* try to grant a single write lock if there's a writer at the front of the queue
61          * - note we leave the 'active part' of the count incremented by 1 and the waiting part
62          *   incremented by 0x00010000
63          */
64         if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
65                 goto readers_only;
66
67         list_del(&waiter->list);
68         tsk = waiter->task;
69         mb();
70         waiter->task = NULL;
71         wake_up_process(tsk);
72         put_task_struct(tsk);
73         goto out;
74
75         /* don't want to wake any writers */
76  dont_wake_writers:
77         waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
78         if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
79                 goto out;
80
81         /* grant an infinite number of read locks to the readers at the front of the queue
82          * - note we increment the 'active part' of the count by the number of readers (less one
83          *   for the activity decrement we've already done) before waking any processes up
84          */
85  readers_only:
86         woken = 0;
87         do {
88                 woken++;
89
90                 if (waiter->list.next==&sem->wait_list)
91                         break;
92
93                 waiter = list_entry(waiter->list.next,struct rwsem_waiter,list);
94
95         } while (waiter->flags & RWSEM_WAITING_FOR_READ);
96
97         loop = woken;
98         woken *= RWSEM_ACTIVE_BIAS-RWSEM_WAITING_BIAS;
99         woken -= RWSEM_ACTIVE_BIAS;
100         rwsem_atomic_add(woken,sem);
101
102         next = sem->wait_list.next;
103         for (; loop>0; loop--) {
104                 waiter = list_entry(next,struct rwsem_waiter,list);
105                 next = waiter->list.next;
106                 tsk = waiter->task;
107                 mb();
108                 waiter->task = NULL;
109                 wake_up_process(tsk);
110                 put_task_struct(tsk);
111         }
112
113         sem->wait_list.next = next;
114         next->prev = &sem->wait_list;
115
116  out:
117         rwsemtrace(sem,"Leaving __rwsem_do_wake");
118         return sem;
119
120         /* undo the change to count, but check for a transition 1->0 */
121  undo:
122         if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS,sem)!=0)
123                 goto out;
124         goto try_again;
125 }
126
127 /*
128  * wait for a lock to be granted
129  */
130 static inline struct rw_semaphore *rwsem_down_failed_common(struct rw_semaphore *sem,
131                                                                  struct rwsem_waiter *waiter,
132                                                                  signed long adjustment)
133 {
134         struct task_struct *tsk = current;
135         signed long count;
136
137         set_task_state(tsk,TASK_UNINTERRUPTIBLE);
138
139         /* set up my own style of waitqueue */
140         spin_lock(&sem->wait_lock);
141         waiter->task = tsk;
142         get_task_struct(tsk);
143
144         list_add_tail(&waiter->list,&sem->wait_list);
145
146         /* note that we're now waiting on the lock, but no longer actively read-locking */
147         count = rwsem_atomic_update(adjustment,sem);
148
149         /* if there are no longer active locks, wake the front queued process(es) up
150          * - it might even be this process, since the waker takes a more active part
151          */
152         if (!(count & RWSEM_ACTIVE_MASK))
153                 sem = __rwsem_do_wake(sem,1);
154
155         spin_unlock(&sem->wait_lock);
156
157         /* wait to be given the lock */
158         for (;;) {
159                 if (!waiter->task)
160                         break;
161                 schedule();
162                 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
163         }
164
165         tsk->state = TASK_RUNNING;
166
167         return sem;
168 }
169
170 /*
171  * wait for the read lock to be granted
172  */
173 struct rw_semaphore fastcall __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
174 {
175         struct rwsem_waiter waiter;
176
177         rwsemtrace(sem,"Entering rwsem_down_read_failed");
178
179         waiter.flags = RWSEM_WAITING_FOR_READ;
180         rwsem_down_failed_common(sem,&waiter,RWSEM_WAITING_BIAS-RWSEM_ACTIVE_BIAS);
181
182         rwsemtrace(sem,"Leaving rwsem_down_read_failed");
183         return sem;
184 }
185
186 /*
187  * wait for the write lock to be granted
188  */
189 struct rw_semaphore fastcall __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
190 {
191         struct rwsem_waiter waiter;
192
193         rwsemtrace(sem,"Entering rwsem_down_write_failed");
194
195         waiter.flags = RWSEM_WAITING_FOR_WRITE;
196         rwsem_down_failed_common(sem,&waiter,-RWSEM_ACTIVE_BIAS);
197
198         rwsemtrace(sem,"Leaving rwsem_down_write_failed");
199         return sem;
200 }
201
202 /*
203  * handle waking up a waiter on the semaphore
204  * - up_read has decremented the active part of the count if we come here
205  */
206 struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
207 {
208         rwsemtrace(sem,"Entering rwsem_wake");
209
210         spin_lock(&sem->wait_lock);
211
212         /* do nothing if list empty */
213         if (!list_empty(&sem->wait_list))
214                 sem = __rwsem_do_wake(sem,1);
215
216         spin_unlock(&sem->wait_lock);
217
218         rwsemtrace(sem,"Leaving rwsem_wake");
219
220         return sem;
221 }
222
223 /*
224  * downgrade a write lock into a read lock
225  * - caller incremented waiting part of count, and discovered it to be still negative
226  * - just wake up any readers at the front of the queue
227  */
228 struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
229 {
230         rwsemtrace(sem,"Entering rwsem_downgrade_wake");
231
232         spin_lock(&sem->wait_lock);
233
234         /* do nothing if list empty */
235         if (!list_empty(&sem->wait_list))
236                 sem = __rwsem_do_wake(sem,0);
237
238         spin_unlock(&sem->wait_lock);
239
240         rwsemtrace(sem,"Leaving rwsem_downgrade_wake");
241         return sem;
242 }
243
244 EXPORT_SYMBOL_NOVERS(rwsem_down_read_failed);
245 EXPORT_SYMBOL_NOVERS(rwsem_down_write_failed);
246 EXPORT_SYMBOL_NOVERS(rwsem_wake);
247 EXPORT_SYMBOL_NOVERS(rwsem_downgrade_wake);
248 #if RWSEM_DEBUG
249 EXPORT_SYMBOL(rwsemtrace);
250 #endif