Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/config.h>
87 #include <linux/kernel.h>
88 #include <linux/signal.h>
89 #include <linux/sched.h>
90 #include <linux/errno.h>
91 #include <linux/string.h>
92 #include <linux/stat.h>
93 #include <linux/dcache.h>
94 #include <linux/namei.h>
95 #include <linux/socket.h>
96 #include <linux/un.h>
97 #include <linux/fcntl.h>
98 #include <linux/termios.h>
99 #include <linux/sockios.h>
100 #include <linux/net.h>
101 #include <linux/in.h>
102 #include <linux/fs.h>
103 #include <linux/slab.h>
104 #include <asm/uaccess.h>
105 #include <linux/skbuff.h>
106 #include <linux/netdevice.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/smp_lock.h>
116 #include <linux/rtnetlink.h>
117 #include <linux/mount.h>
118 #include <net/checksum.h>
119 #include <linux/security.h>
120 #include <linux/vs_context.h>
121 #include <linux/vs_network.h>
122 #include <linux/vs_limit.h>
123
124 int sysctl_unix_max_dgram_qlen = 10;
125
126 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
127 DEFINE_SPINLOCK(unix_table_lock);
128 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
129
130 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
131
132 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
133
134 /*
135  *  SMP locking strategy:
136  *    hash table is protected with spinlock unix_table_lock
137  *    each socket state is protected by separate rwlock.
138  */
139
140 static inline unsigned unix_hash_fold(unsigned hash)
141 {
142         hash ^= hash>>16;
143         hash ^= hash>>8;
144         return hash&(UNIX_HASH_SIZE-1);
145 }
146
147 #define unix_peer(sk) (unix_sk(sk)->peer)
148
149 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
150 {
151         return unix_peer(osk) == sk;
152 }
153
154 static inline int unix_may_send(struct sock *sk, struct sock *osk)
155 {
156         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
157 }
158
159 static struct sock *unix_peer_get(struct sock *s)
160 {
161         struct sock *peer;
162
163         unix_state_rlock(s);
164         peer = unix_peer(s);
165         if (peer)
166                 sock_hold(peer);
167         unix_state_runlock(s);
168         return peer;
169 }
170
171 static inline void unix_release_addr(struct unix_address *addr)
172 {
173         if (atomic_dec_and_test(&addr->refcnt))
174                 kfree(addr);
175 }
176
177 /*
178  *      Check unix socket name:
179  *              - should be not zero length.
180  *              - if started by not zero, should be NULL terminated (FS object)
181  *              - if started by zero, it is abstract name.
182  */
183  
184 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
185 {
186         if (len <= sizeof(short) || len > sizeof(*sunaddr))
187                 return -EINVAL;
188         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
189                 return -EINVAL;
190         if (sunaddr->sun_path[0]) {
191                 /*
192                  * This may look like an off by one error but it is a bit more
193                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
194                  * sun_path[108] doesnt as such exist.  However in kernel space
195                  * we are guaranteed that it is a valid memory location in our
196                  * kernel address buffer.
197                  */
198                 ((char *)sunaddr)[len]=0;
199                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
200                 return len;
201         }
202
203         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
204         return len;
205 }
206
207 static void __unix_remove_socket(struct sock *sk)
208 {
209         sk_del_node_init(sk);
210 }
211
212 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
213 {
214         BUG_TRAP(sk_unhashed(sk));
215         sk_add_node(sk, list);
216 }
217
218 static inline void unix_remove_socket(struct sock *sk)
219 {
220         spin_lock(&unix_table_lock);
221         __unix_remove_socket(sk);
222         spin_unlock(&unix_table_lock);
223 }
224
225 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
226 {
227         spin_lock(&unix_table_lock);
228         __unix_insert_socket(list, sk);
229         spin_unlock(&unix_table_lock);
230 }
231
232 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
233                                               int len, int type, unsigned hash)
234 {
235         struct sock *s;
236         struct hlist_node *node;
237
238         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
239                 struct unix_sock *u = unix_sk(s);
240
241                 if (!vx_check(s->sk_xid, VX_IDENT|VX_WATCH))
242                         continue;
243                 if (u->addr->len == len &&
244                     !memcmp(u->addr->name, sunname, len))
245                         goto found;
246         }
247         s = NULL;
248 found:
249         return s;
250 }
251
252 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
253                                                    int len, int type,
254                                                    unsigned hash)
255 {
256         struct sock *s;
257
258         spin_lock(&unix_table_lock);
259         s = __unix_find_socket_byname(sunname, len, type, hash);
260         if (s)
261                 sock_hold(s);
262         spin_unlock(&unix_table_lock);
263         return s;
264 }
265
266 static struct sock *unix_find_socket_byinode(struct inode *i)
267 {
268         struct sock *s;
269         struct hlist_node *node;
270
271         spin_lock(&unix_table_lock);
272         sk_for_each(s, node,
273                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
274                 struct dentry *dentry = unix_sk(s)->dentry;
275
276                 if(dentry && dentry->d_inode == i)
277                 {
278                         sock_hold(s);
279                         goto found;
280                 }
281         }
282         s = NULL;
283 found:
284         spin_unlock(&unix_table_lock);
285         return s;
286 }
287
288 static inline int unix_writable(struct sock *sk)
289 {
290         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
291 }
292
293 static void unix_write_space(struct sock *sk)
294 {
295         read_lock(&sk->sk_callback_lock);
296         if (unix_writable(sk)) {
297                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
298                         wake_up_interruptible(sk->sk_sleep);
299                 sk_wake_async(sk, 2, POLL_OUT);
300         }
301         read_unlock(&sk->sk_callback_lock);
302 }
303
304 /* When dgram socket disconnects (or changes its peer), we clear its receive
305  * queue of packets arrived from previous peer. First, it allows to do
306  * flow control based only on wmem_alloc; second, sk connected to peer
307  * may receive messages only from that peer. */
308 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
309 {
310         if (!skb_queue_empty(&sk->sk_receive_queue)) {
311                 skb_queue_purge(&sk->sk_receive_queue);
312                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
313
314                 /* If one link of bidirectional dgram pipe is disconnected,
315                  * we signal error. Messages are lost. Do not make this,
316                  * when peer was not connected to us.
317                  */
318                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
319                         other->sk_err = ECONNRESET;
320                         other->sk_error_report(other);
321                 }
322         }
323 }
324
325 static void unix_sock_destructor(struct sock *sk)
326 {
327         struct unix_sock *u = unix_sk(sk);
328
329         skb_queue_purge(&sk->sk_receive_queue);
330
331         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
332         BUG_TRAP(sk_unhashed(sk));
333         BUG_TRAP(!sk->sk_socket);
334         if (!sock_flag(sk, SOCK_DEAD)) {
335                 printk("Attempt to release alive unix socket: %p\n", sk);
336                 return;
337         }
338
339         if (u->addr)
340                 unix_release_addr(u->addr);
341
342         atomic_dec(&unix_nr_socks);
343 #ifdef UNIX_REFCNT_DEBUG
344         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
345 #endif
346 }
347
348 static int unix_release_sock (struct sock *sk, int embrion)
349 {
350         struct unix_sock *u = unix_sk(sk);
351         struct dentry *dentry;
352         struct vfsmount *mnt;
353         struct sock *skpair;
354         struct sk_buff *skb;
355         int state;
356
357         unix_remove_socket(sk);
358
359         /* Clear state */
360         unix_state_wlock(sk);
361         sock_orphan(sk);
362         sk->sk_shutdown = SHUTDOWN_MASK;
363         dentry       = u->dentry;
364         u->dentry    = NULL;
365         mnt          = u->mnt;
366         u->mnt       = NULL;
367         state = sk->sk_state;
368         sk->sk_state = TCP_CLOSE;
369         unix_state_wunlock(sk);
370
371         wake_up_interruptible_all(&u->peer_wait);
372
373         skpair=unix_peer(sk);
374
375         if (skpair!=NULL) {
376                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
377                         unix_state_wlock(skpair);
378                         /* No more writes */
379                         skpair->sk_shutdown = SHUTDOWN_MASK;
380                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
381                                 skpair->sk_err = ECONNRESET;
382                         unix_state_wunlock(skpair);
383                         skpair->sk_state_change(skpair);
384                         read_lock(&skpair->sk_callback_lock);
385                         sk_wake_async(skpair,1,POLL_HUP);
386                         read_unlock(&skpair->sk_callback_lock);
387                 }
388                 sock_put(skpair); /* It may now die */
389                 unix_peer(sk) = NULL;
390         }
391
392         /* Try to flush out this socket. Throw out buffers at least */
393
394         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
395                 if (state==TCP_LISTEN)
396                         unix_release_sock(skb->sk, 1);
397                 /* passed fds are erased in the kfree_skb hook        */
398                 kfree_skb(skb);
399         }
400
401         if (dentry) {
402                 dput(dentry);
403                 mntput(mnt);
404         }
405
406         sock_put(sk);
407
408         /* ---- Socket is dead now and most probably destroyed ---- */
409
410         /*
411          * Fixme: BSD difference: In BSD all sockets connected to use get
412          *        ECONNRESET and we die on the spot. In Linux we behave
413          *        like files and pipes do and wait for the last
414          *        dereference.
415          *
416          * Can't we simply set sock->err?
417          *
418          *        What the above comment does talk about? --ANK(980817)
419          */
420
421         if (atomic_read(&unix_tot_inflight))
422                 unix_gc();              /* Garbage collect fds */       
423
424         return 0;
425 }
426
427 static int unix_listen(struct socket *sock, int backlog)
428 {
429         int err;
430         struct sock *sk = sock->sk;
431         struct unix_sock *u = unix_sk(sk);
432
433         err = -EOPNOTSUPP;
434         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
435                 goto out;                       /* Only stream/seqpacket sockets accept */
436         err = -EINVAL;
437         if (!u->addr)
438                 goto out;                       /* No listens on an unbound socket */
439         unix_state_wlock(sk);
440         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
441                 goto out_unlock;
442         if (backlog > sk->sk_max_ack_backlog)
443                 wake_up_interruptible_all(&u->peer_wait);
444         sk->sk_max_ack_backlog  = backlog;
445         sk->sk_state            = TCP_LISTEN;
446         /* set credentials so connect can copy them */
447         sk->sk_peercred.pid     = current->tgid;
448         sk->sk_peercred.uid     = current->euid;
449         sk->sk_peercred.gid     = current->egid;
450         err = 0;
451
452 out_unlock:
453         unix_state_wunlock(sk);
454 out:
455         return err;
456 }
457
458 static int unix_release(struct socket *);
459 static int unix_bind(struct socket *, struct sockaddr *, int);
460 static int unix_stream_connect(struct socket *, struct sockaddr *,
461                                int addr_len, int flags);
462 static int unix_socketpair(struct socket *, struct socket *);
463 static int unix_accept(struct socket *, struct socket *, int);
464 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
465 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
466 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
467 static int unix_shutdown(struct socket *, int);
468 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
469                                struct msghdr *, size_t);
470 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
471                                struct msghdr *, size_t, int);
472 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
473                               struct msghdr *, size_t);
474 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
475                               struct msghdr *, size_t, int);
476 static int unix_dgram_connect(struct socket *, struct sockaddr *,
477                               int, int);
478 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
479                                   struct msghdr *, size_t);
480
481 static const struct proto_ops unix_stream_ops = {
482         .family =       PF_UNIX,
483         .owner =        THIS_MODULE,
484         .release =      unix_release,
485         .bind =         unix_bind,
486         .connect =      unix_stream_connect,
487         .socketpair =   unix_socketpair,
488         .accept =       unix_accept,
489         .getname =      unix_getname,
490         .poll =         unix_poll,
491         .ioctl =        unix_ioctl,
492         .listen =       unix_listen,
493         .shutdown =     unix_shutdown,
494         .setsockopt =   sock_no_setsockopt,
495         .getsockopt =   sock_no_getsockopt,
496         .sendmsg =      unix_stream_sendmsg,
497         .recvmsg =      unix_stream_recvmsg,
498         .mmap =         sock_no_mmap,
499         .sendpage =     sock_no_sendpage,
500 };
501
502 static const struct proto_ops unix_dgram_ops = {
503         .family =       PF_UNIX,
504         .owner =        THIS_MODULE,
505         .release =      unix_release,
506         .bind =         unix_bind,
507         .connect =      unix_dgram_connect,
508         .socketpair =   unix_socketpair,
509         .accept =       sock_no_accept,
510         .getname =      unix_getname,
511         .poll =         datagram_poll,
512         .ioctl =        unix_ioctl,
513         .listen =       sock_no_listen,
514         .shutdown =     unix_shutdown,
515         .setsockopt =   sock_no_setsockopt,
516         .getsockopt =   sock_no_getsockopt,
517         .sendmsg =      unix_dgram_sendmsg,
518         .recvmsg =      unix_dgram_recvmsg,
519         .mmap =         sock_no_mmap,
520         .sendpage =     sock_no_sendpage,
521 };
522
523 static const struct proto_ops unix_seqpacket_ops = {
524         .family =       PF_UNIX,
525         .owner =        THIS_MODULE,
526         .release =      unix_release,
527         .bind =         unix_bind,
528         .connect =      unix_stream_connect,
529         .socketpair =   unix_socketpair,
530         .accept =       unix_accept,
531         .getname =      unix_getname,
532         .poll =         datagram_poll,
533         .ioctl =        unix_ioctl,
534         .listen =       unix_listen,
535         .shutdown =     unix_shutdown,
536         .setsockopt =   sock_no_setsockopt,
537         .getsockopt =   sock_no_getsockopt,
538         .sendmsg =      unix_seqpacket_sendmsg,
539         .recvmsg =      unix_dgram_recvmsg,
540         .mmap =         sock_no_mmap,
541         .sendpage =     sock_no_sendpage,
542 };
543
544 static struct proto unix_proto = {
545         .name     = "UNIX",
546         .owner    = THIS_MODULE,
547         .obj_size = sizeof(struct unix_sock),
548 };
549
550 static struct sock * unix_create1(struct socket *sock)
551 {
552         struct sock *sk = NULL;
553         struct unix_sock *u;
554
555         if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
556                 goto out;
557
558         sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
559         if (!sk)
560                 goto out;
561
562         atomic_inc(&unix_nr_socks);
563
564         sock_init_data(sock,sk);
565
566         sk->sk_write_space      = unix_write_space;
567         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
568         sk->sk_destruct         = unix_sock_destructor;
569         u         = unix_sk(sk);
570         u->dentry = NULL;
571         u->mnt    = NULL;
572         spin_lock_init(&u->lock);
573         atomic_set(&u->inflight, sock ? 0 : -1);
574         mutex_init(&u->readlock); /* single task reading lock */
575         init_waitqueue_head(&u->peer_wait);
576         unix_insert_socket(unix_sockets_unbound, sk);
577 out:
578         return sk;
579 }
580
581 static int unix_create(struct socket *sock, int protocol)
582 {
583         if (protocol && protocol != PF_UNIX)
584                 return -EPROTONOSUPPORT;
585
586         sock->state = SS_UNCONNECTED;
587
588         switch (sock->type) {
589         case SOCK_STREAM:
590                 sock->ops = &unix_stream_ops;
591                 break;
592                 /*
593                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
594                  *      nothing uses it.
595                  */
596         case SOCK_RAW:
597                 sock->type=SOCK_DGRAM;
598         case SOCK_DGRAM:
599                 sock->ops = &unix_dgram_ops;
600                 break;
601         case SOCK_SEQPACKET:
602                 sock->ops = &unix_seqpacket_ops;
603                 break;
604         default:
605                 return -ESOCKTNOSUPPORT;
606         }
607
608         return unix_create1(sock) ? 0 : -ENOMEM;
609 }
610
611 static int unix_release(struct socket *sock)
612 {
613         struct sock *sk = sock->sk;
614
615         if (!sk)
616                 return 0;
617
618         sock->sk = NULL;
619
620         return unix_release_sock (sk, 0);
621 }
622
623 static int unix_autobind(struct socket *sock)
624 {
625         struct sock *sk = sock->sk;
626         struct unix_sock *u = unix_sk(sk);
627         static u32 ordernum = 1;
628         struct unix_address * addr;
629         int err;
630
631         mutex_lock(&u->readlock);
632
633         err = 0;
634         if (u->addr)
635                 goto out;
636
637         err = -ENOMEM;
638         addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
639         if (!addr)
640                 goto out;
641
642         memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
643         addr->name->sun_family = AF_UNIX;
644         atomic_set(&addr->refcnt, 1);
645
646 retry:
647         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
648         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
649
650         spin_lock(&unix_table_lock);
651         ordernum = (ordernum+1)&0xFFFFF;
652
653         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
654                                       addr->hash)) {
655                 spin_unlock(&unix_table_lock);
656                 /* Sanity yield. It is unusual case, but yet... */
657                 if (!(ordernum&0xFF))
658                         yield();
659                 goto retry;
660         }
661         addr->hash ^= sk->sk_type;
662
663         __unix_remove_socket(sk);
664         u->addr = addr;
665         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
666         spin_unlock(&unix_table_lock);
667         err = 0;
668
669 out:    mutex_unlock(&u->readlock);
670         return err;
671 }
672
673 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
674                                     int type, unsigned hash, int *error)
675 {
676         struct sock *u;
677         struct nameidata nd;
678         int err = 0;
679         
680         if (sunname->sun_path[0]) {
681                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
682                 if (err)
683                         goto fail;
684                 err = vfs_permission(&nd, MAY_WRITE);
685                 if (err)
686                         goto put_fail;
687
688                 err = -ECONNREFUSED;
689                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
690                         goto put_fail;
691                 u=unix_find_socket_byinode(nd.dentry->d_inode);
692                 if (!u)
693                         goto put_fail;
694
695                 if (u->sk_type == type)
696                         touch_atime(nd.mnt, nd.dentry);
697
698                 path_release(&nd);
699
700                 err=-EPROTOTYPE;
701                 if (u->sk_type != type) {
702                         sock_put(u);
703                         goto fail;
704                 }
705         } else {
706                 err = -ECONNREFUSED;
707                 u=unix_find_socket_byname(sunname, len, type, hash);
708                 if (u) {
709                         struct dentry *dentry;
710                         dentry = unix_sk(u)->dentry;
711                         if (dentry)
712                                 touch_atime(unix_sk(u)->mnt, dentry);
713                 } else
714                         goto fail;
715         }
716         return u;
717
718 put_fail:
719         path_release(&nd);
720 fail:
721         *error=err;
722         return NULL;
723 }
724
725
726 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
727 {
728         struct sock *sk = sock->sk;
729         struct unix_sock *u = unix_sk(sk);
730         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
731         struct dentry * dentry = NULL;
732         struct nameidata nd;
733         int err;
734         unsigned hash;
735         struct unix_address *addr;
736         struct hlist_head *list;
737
738         err = -EINVAL;
739         if (sunaddr->sun_family != AF_UNIX)
740                 goto out;
741
742         if (addr_len==sizeof(short)) {
743                 err = unix_autobind(sock);
744                 goto out;
745         }
746
747         err = unix_mkname(sunaddr, addr_len, &hash);
748         if (err < 0)
749                 goto out;
750         addr_len = err;
751
752         mutex_lock(&u->readlock);
753
754         err = -EINVAL;
755         if (u->addr)
756                 goto out_up;
757
758         err = -ENOMEM;
759         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
760         if (!addr)
761                 goto out_up;
762
763         memcpy(addr->name, sunaddr, addr_len);
764         addr->len = addr_len;
765         addr->hash = hash ^ sk->sk_type;
766         atomic_set(&addr->refcnt, 1);
767
768         if (sunaddr->sun_path[0]) {
769                 unsigned int mode;
770                 err = 0;
771                 /*
772                  * Get the parent directory, calculate the hash for last
773                  * component.
774                  */
775                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
776                 if (err)
777                         goto out_mknod_parent;
778
779                 dentry = lookup_create(&nd, 0);
780                 err = PTR_ERR(dentry);
781                 if (IS_ERR(dentry))
782                         goto out_mknod_unlock;
783
784                 /*
785                  * All right, let's create it.
786                  */
787                 mode = S_IFSOCK |
788                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
789                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0, NULL);
790                 if (err)
791                         goto out_mknod_dput;
792                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
793                 dput(nd.dentry);
794                 nd.dentry = dentry;
795
796                 addr->hash = UNIX_HASH_SIZE;
797         }
798
799         spin_lock(&unix_table_lock);
800
801         if (!sunaddr->sun_path[0]) {
802                 err = -EADDRINUSE;
803                 if (__unix_find_socket_byname(sunaddr, addr_len,
804                                               sk->sk_type, hash)) {
805                         unix_release_addr(addr);
806                         goto out_unlock;
807                 }
808
809                 list = &unix_socket_table[addr->hash];
810         } else {
811                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
812                 u->dentry = nd.dentry;
813                 u->mnt    = nd.mnt;
814         }
815
816         err = 0;
817         __unix_remove_socket(sk);
818         u->addr = addr;
819         __unix_insert_socket(list, sk);
820
821 out_unlock:
822         spin_unlock(&unix_table_lock);
823 out_up:
824         mutex_unlock(&u->readlock);
825 out:
826         return err;
827
828 out_mknod_dput:
829         dput(dentry);
830 out_mknod_unlock:
831         mutex_unlock(&nd.dentry->d_inode->i_mutex);
832         path_release(&nd);
833 out_mknod_parent:
834         if (err==-EEXIST)
835                 err=-EADDRINUSE;
836         unix_release_addr(addr);
837         goto out_up;
838 }
839
840 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
841                               int alen, int flags)
842 {
843         struct sock *sk = sock->sk;
844         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
845         struct sock *other;
846         unsigned hash;
847         int err;
848
849         if (addr->sa_family != AF_UNSPEC) {
850                 err = unix_mkname(sunaddr, alen, &hash);
851                 if (err < 0)
852                         goto out;
853                 alen = err;
854
855                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
856                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
857                         goto out;
858
859                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
860                 if (!other)
861                         goto out;
862
863                 unix_state_wlock(sk);
864
865                 err = -EPERM;
866                 if (!unix_may_send(sk, other))
867                         goto out_unlock;
868
869                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
870                 if (err)
871                         goto out_unlock;
872
873         } else {
874                 /*
875                  *      1003.1g breaking connected state with AF_UNSPEC
876                  */
877                 other = NULL;
878                 unix_state_wlock(sk);
879         }
880
881         /*
882          * If it was connected, reconnect.
883          */
884         if (unix_peer(sk)) {
885                 struct sock *old_peer = unix_peer(sk);
886                 unix_peer(sk)=other;
887                 unix_state_wunlock(sk);
888
889                 if (other != old_peer)
890                         unix_dgram_disconnected(sk, old_peer);
891                 sock_put(old_peer);
892         } else {
893                 unix_peer(sk)=other;
894                 unix_state_wunlock(sk);
895         }
896         return 0;
897
898 out_unlock:
899         unix_state_wunlock(sk);
900         sock_put(other);
901 out:
902         return err;
903 }
904
905 static long unix_wait_for_peer(struct sock *other, long timeo)
906 {
907         struct unix_sock *u = unix_sk(other);
908         int sched;
909         DEFINE_WAIT(wait);
910
911         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
912
913         sched = !sock_flag(other, SOCK_DEAD) &&
914                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
915                 (skb_queue_len(&other->sk_receive_queue) >
916                  other->sk_max_ack_backlog);
917
918         unix_state_runlock(other);
919
920         if (sched)
921                 timeo = schedule_timeout(timeo);
922
923         finish_wait(&u->peer_wait, &wait);
924         return timeo;
925 }
926
927 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
928                                int addr_len, int flags)
929 {
930         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
931         struct sock *sk = sock->sk;
932         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
933         struct sock *newsk = NULL;
934         struct sock *other = NULL;
935         struct sk_buff *skb = NULL;
936         unsigned hash;
937         int st;
938         int err;
939         long timeo;
940
941         err = unix_mkname(sunaddr, addr_len, &hash);
942         if (err < 0)
943                 goto out;
944         addr_len = err;
945
946         if (test_bit(SOCK_PASSCRED, &sock->flags)
947                 && !u->addr && (err = unix_autobind(sock)) != 0)
948                 goto out;
949
950         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
951
952         /* First of all allocate resources.
953            If we will make it after state is locked,
954            we will have to recheck all again in any case.
955          */
956
957         err = -ENOMEM;
958
959         /* create new sock for complete connection */
960         newsk = unix_create1(NULL);
961         if (newsk == NULL)
962                 goto out;
963
964         /* Allocate skb for sending to listening sock */
965         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
966         if (skb == NULL)
967                 goto out;
968
969 restart:
970         /*  Find listening sock. */
971         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
972         if (!other)
973                 goto out;
974
975         /* Latch state of peer */
976         unix_state_rlock(other);
977
978         /* Apparently VFS overslept socket death. Retry. */
979         if (sock_flag(other, SOCK_DEAD)) {
980                 unix_state_runlock(other);
981                 sock_put(other);
982                 goto restart;
983         }
984
985         err = -ECONNREFUSED;
986         if (other->sk_state != TCP_LISTEN)
987                 goto out_unlock;
988
989         if (skb_queue_len(&other->sk_receive_queue) >
990             other->sk_max_ack_backlog) {
991                 err = -EAGAIN;
992                 if (!timeo)
993                         goto out_unlock;
994
995                 timeo = unix_wait_for_peer(other, timeo);
996
997                 err = sock_intr_errno(timeo);
998                 if (signal_pending(current))
999                         goto out;
1000                 sock_put(other);
1001                 goto restart;
1002         }
1003
1004         /* Latch our state.
1005
1006            It is tricky place. We need to grab write lock and cannot
1007            drop lock on peer. It is dangerous because deadlock is
1008            possible. Connect to self case and simultaneous
1009            attempt to connect are eliminated by checking socket
1010            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1011            check this before attempt to grab lock.
1012
1013            Well, and we have to recheck the state after socket locked.
1014          */
1015         st = sk->sk_state;
1016
1017         switch (st) {
1018         case TCP_CLOSE:
1019                 /* This is ok... continue with connect */
1020                 break;
1021         case TCP_ESTABLISHED:
1022                 /* Socket is already connected */
1023                 err = -EISCONN;
1024                 goto out_unlock;
1025         default:
1026                 err = -EINVAL;
1027                 goto out_unlock;
1028         }
1029
1030         unix_state_wlock(sk);
1031
1032         if (sk->sk_state != st) {
1033                 unix_state_wunlock(sk);
1034                 unix_state_runlock(other);
1035                 sock_put(other);
1036                 goto restart;
1037         }
1038
1039         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1040         if (err) {
1041                 unix_state_wunlock(sk);
1042                 goto out_unlock;
1043         }
1044
1045         /* The way is open! Fastly set all the necessary fields... */
1046
1047         sock_hold(sk);
1048         unix_peer(newsk)        = sk;
1049         newsk->sk_state         = TCP_ESTABLISHED;
1050         newsk->sk_type          = sk->sk_type;
1051         newsk->sk_peercred.pid  = current->tgid;
1052         newsk->sk_peercred.uid  = current->euid;
1053         newsk->sk_peercred.gid  = current->egid;
1054         newu = unix_sk(newsk);
1055         newsk->sk_sleep         = &newu->peer_wait;
1056         otheru = unix_sk(other);
1057
1058         /* copy address information from listening to new sock*/
1059         if (otheru->addr) {
1060                 atomic_inc(&otheru->addr->refcnt);
1061                 newu->addr = otheru->addr;
1062         }
1063         if (otheru->dentry) {
1064                 newu->dentry    = dget(otheru->dentry);
1065                 newu->mnt       = mntget(otheru->mnt);
1066         }
1067
1068         /* Set credentials */
1069         sk->sk_peercred = other->sk_peercred;
1070
1071         sock->state     = SS_CONNECTED;
1072         sk->sk_state    = TCP_ESTABLISHED;
1073         sock_hold(newsk);
1074
1075         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1076         unix_peer(sk)   = newsk;
1077
1078         unix_state_wunlock(sk);
1079
1080         /* take ten and and send info to listening sock */
1081         spin_lock(&other->sk_receive_queue.lock);
1082         __skb_queue_tail(&other->sk_receive_queue, skb);
1083         /* Undo artificially decreased inflight after embrion
1084          * is installed to listening socket. */
1085         atomic_inc(&newu->inflight);
1086         spin_unlock(&other->sk_receive_queue.lock);
1087         unix_state_runlock(other);
1088         other->sk_data_ready(other, 0);
1089         sock_put(other);
1090         return 0;
1091
1092 out_unlock:
1093         if (other)
1094                 unix_state_runlock(other);
1095
1096 out:
1097         if (skb)
1098                 kfree_skb(skb);
1099         if (newsk)
1100                 unix_release_sock(newsk, 0);
1101         if (other)
1102                 sock_put(other);
1103         return err;
1104 }
1105
1106 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1107 {
1108         struct sock *ska=socka->sk, *skb = sockb->sk;
1109
1110         /* Join our sockets back to back */
1111         sock_hold(ska);
1112         sock_hold(skb);
1113         unix_peer(ska)=skb;
1114         unix_peer(skb)=ska;
1115         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1116         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1117         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1118
1119         if (ska->sk_type != SOCK_DGRAM) {
1120                 ska->sk_state = TCP_ESTABLISHED;
1121                 skb->sk_state = TCP_ESTABLISHED;
1122                 socka->state  = SS_CONNECTED;
1123                 sockb->state  = SS_CONNECTED;
1124         }
1125         return 0;
1126 }
1127
1128 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1129 {
1130         struct sock *sk = sock->sk;
1131         struct sock *tsk;
1132         struct sk_buff *skb;
1133         int err;
1134
1135         err = -EOPNOTSUPP;
1136         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1137                 goto out;
1138
1139         err = -EINVAL;
1140         if (sk->sk_state != TCP_LISTEN)
1141                 goto out;
1142
1143         /* If socket state is TCP_LISTEN it cannot change (for now...),
1144          * so that no locks are necessary.
1145          */
1146
1147         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1148         if (!skb) {
1149                 /* This means receive shutdown. */
1150                 if (err == 0)
1151                         err = -EINVAL;
1152                 goto out;
1153         }
1154
1155         tsk = skb->sk;
1156         skb_free_datagram(sk, skb);
1157         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1158
1159         /* attach accepted sock to socket */
1160         unix_state_wlock(tsk);
1161         newsock->state = SS_CONNECTED;
1162         sock_graft(tsk, newsock);
1163         unix_state_wunlock(tsk);
1164         return 0;
1165
1166 out:
1167         return err;
1168 }
1169
1170
1171 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1172 {
1173         struct sock *sk = sock->sk;
1174         struct unix_sock *u;
1175         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1176         int err = 0;
1177
1178         if (peer) {
1179                 sk = unix_peer_get(sk);
1180
1181                 err = -ENOTCONN;
1182                 if (!sk)
1183                         goto out;
1184                 err = 0;
1185         } else {
1186                 sock_hold(sk);
1187         }
1188
1189         u = unix_sk(sk);
1190         unix_state_rlock(sk);
1191         if (!u->addr) {
1192                 sunaddr->sun_family = AF_UNIX;
1193                 sunaddr->sun_path[0] = 0;
1194                 *uaddr_len = sizeof(short);
1195         } else {
1196                 struct unix_address *addr = u->addr;
1197
1198                 *uaddr_len = addr->len;
1199                 memcpy(sunaddr, addr->name, *uaddr_len);
1200         }
1201         unix_state_runlock(sk);
1202         sock_put(sk);
1203 out:
1204         return err;
1205 }
1206
1207 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1208 {
1209         int i;
1210
1211         scm->fp = UNIXCB(skb).fp;
1212         skb->destructor = sock_wfree;
1213         UNIXCB(skb).fp = NULL;
1214
1215         for (i=scm->fp->count-1; i>=0; i--)
1216                 unix_notinflight(scm->fp->fp[i]);
1217 }
1218
1219 static void unix_destruct_fds(struct sk_buff *skb)
1220 {
1221         struct scm_cookie scm;
1222         memset(&scm, 0, sizeof(scm));
1223         unix_detach_fds(&scm, skb);
1224
1225         /* Alas, it calls VFS */
1226         /* So fscking what? fput() had been SMP-safe since the last Summer */
1227         scm_destroy(&scm);
1228         sock_wfree(skb);
1229 }
1230
1231 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1232 {
1233         int i;
1234         for (i=scm->fp->count-1; i>=0; i--)
1235                 unix_inflight(scm->fp->fp[i]);
1236         UNIXCB(skb).fp = scm->fp;
1237         skb->destructor = unix_destruct_fds;
1238         scm->fp = NULL;
1239 }
1240
1241 /*
1242  *      Send AF_UNIX data.
1243  */
1244
1245 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1246                               struct msghdr *msg, size_t len)
1247 {
1248         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1249         struct sock *sk = sock->sk;
1250         struct unix_sock *u = unix_sk(sk);
1251         struct sockaddr_un *sunaddr=msg->msg_name;
1252         struct sock *other = NULL;
1253         int namelen = 0; /* fake GCC */
1254         int err;
1255         unsigned hash;
1256         struct sk_buff *skb;
1257         long timeo;
1258         struct scm_cookie tmp_scm;
1259
1260         if (NULL == siocb->scm)
1261                 siocb->scm = &tmp_scm;
1262         err = scm_send(sock, msg, siocb->scm);
1263         if (err < 0)
1264                 return err;
1265
1266         err = -EOPNOTSUPP;
1267         if (msg->msg_flags&MSG_OOB)
1268                 goto out;
1269
1270         if (msg->msg_namelen) {
1271                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1272                 if (err < 0)
1273                         goto out;
1274                 namelen = err;
1275         } else {
1276                 sunaddr = NULL;
1277                 err = -ENOTCONN;
1278                 other = unix_peer_get(sk);
1279                 if (!other)
1280                         goto out;
1281         }
1282
1283         if (test_bit(SOCK_PASSCRED, &sock->flags)
1284                 && !u->addr && (err = unix_autobind(sock)) != 0)
1285                 goto out;
1286
1287         err = -EMSGSIZE;
1288         if (len > sk->sk_sndbuf - 32)
1289                 goto out;
1290
1291         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1292         if (skb==NULL)
1293                 goto out;
1294
1295         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1296         if (siocb->scm->fp)
1297                 unix_attach_fds(siocb->scm, skb);
1298
1299         skb->h.raw = skb->data;
1300         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1301         if (err)
1302                 goto out_free;
1303
1304         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1305
1306 restart:
1307         if (!other) {
1308                 err = -ECONNRESET;
1309                 if (sunaddr == NULL)
1310                         goto out_free;
1311
1312                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1313                                         hash, &err);
1314                 if (other==NULL)
1315                         goto out_free;
1316         }
1317
1318         unix_state_rlock(other);
1319         err = -EPERM;
1320         if (!unix_may_send(sk, other))
1321                 goto out_unlock;
1322
1323         if (sock_flag(other, SOCK_DEAD)) {
1324                 /*
1325                  *      Check with 1003.1g - what should
1326                  *      datagram error
1327                  */
1328                 unix_state_runlock(other);
1329                 sock_put(other);
1330
1331                 err = 0;
1332                 unix_state_wlock(sk);
1333                 if (unix_peer(sk) == other) {
1334                         unix_peer(sk)=NULL;
1335                         unix_state_wunlock(sk);
1336
1337                         unix_dgram_disconnected(sk, other);
1338                         sock_put(other);
1339                         err = -ECONNREFUSED;
1340                 } else {
1341                         unix_state_wunlock(sk);
1342                 }
1343
1344                 other = NULL;
1345                 if (err)
1346                         goto out_free;
1347                 goto restart;
1348         }
1349
1350         err = -EPIPE;
1351         if (other->sk_shutdown & RCV_SHUTDOWN)
1352                 goto out_unlock;
1353
1354         if (sk->sk_type != SOCK_SEQPACKET) {
1355                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1356                 if (err)
1357                         goto out_unlock;
1358         }
1359
1360         if (unix_peer(other) != sk &&
1361             (skb_queue_len(&other->sk_receive_queue) >
1362              other->sk_max_ack_backlog)) {
1363                 if (!timeo) {
1364                         err = -EAGAIN;
1365                         goto out_unlock;
1366                 }
1367
1368                 timeo = unix_wait_for_peer(other, timeo);
1369
1370                 err = sock_intr_errno(timeo);
1371                 if (signal_pending(current))
1372                         goto out_free;
1373
1374                 goto restart;
1375         }
1376
1377         skb_queue_tail(&other->sk_receive_queue, skb);
1378         unix_state_runlock(other);
1379         other->sk_data_ready(other, len);
1380         sock_put(other);
1381         scm_destroy(siocb->scm);
1382         return len;
1383
1384 out_unlock:
1385         unix_state_runlock(other);
1386 out_free:
1387         kfree_skb(skb);
1388 out:
1389         if (other)
1390                 sock_put(other);
1391         scm_destroy(siocb->scm);
1392         return err;
1393 }
1394
1395                 
1396 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1397                                struct msghdr *msg, size_t len)
1398 {
1399         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1400         struct sock *sk = sock->sk;
1401         struct sock *other = NULL;
1402         struct sockaddr_un *sunaddr=msg->msg_name;
1403         int err,size;
1404         struct sk_buff *skb;
1405         int sent=0;
1406         struct scm_cookie tmp_scm;
1407
1408         if (NULL == siocb->scm)
1409                 siocb->scm = &tmp_scm;
1410         err = scm_send(sock, msg, siocb->scm);
1411         if (err < 0)
1412                 return err;
1413
1414         err = -EOPNOTSUPP;
1415         if (msg->msg_flags&MSG_OOB)
1416                 goto out_err;
1417
1418         if (msg->msg_namelen) {
1419                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1420                 goto out_err;
1421         } else {
1422                 sunaddr = NULL;
1423                 err = -ENOTCONN;
1424                 other = unix_peer(sk);
1425                 if (!other)
1426                         goto out_err;
1427         }
1428
1429         if (sk->sk_shutdown & SEND_SHUTDOWN)
1430                 goto pipe_err;
1431
1432         while(sent < len)
1433         {
1434                 /*
1435                  *      Optimisation for the fact that under 0.01% of X
1436                  *      messages typically need breaking up.
1437                  */
1438
1439                 size = len-sent;
1440
1441                 /* Keep two messages in the pipe so it schedules better */
1442                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1443                         size = (sk->sk_sndbuf >> 1) - 64;
1444
1445                 if (size > SKB_MAX_ALLOC)
1446                         size = SKB_MAX_ALLOC;
1447                         
1448                 /*
1449                  *      Grab a buffer
1450                  */
1451                  
1452                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1453
1454                 if (skb==NULL)
1455                         goto out_err;
1456
1457                 /*
1458                  *      If you pass two values to the sock_alloc_send_skb
1459                  *      it tries to grab the large buffer with GFP_NOFS
1460                  *      (which can fail easily), and if it fails grab the
1461                  *      fallback size buffer which is under a page and will
1462                  *      succeed. [Alan]
1463                  */
1464                 size = min_t(int, size, skb_tailroom(skb));
1465
1466                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1467                 if (siocb->scm->fp)
1468                         unix_attach_fds(siocb->scm, skb);
1469
1470                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1471                         kfree_skb(skb);
1472                         goto out_err;
1473                 }
1474
1475                 unix_state_rlock(other);
1476
1477                 if (sock_flag(other, SOCK_DEAD) ||
1478                     (other->sk_shutdown & RCV_SHUTDOWN))
1479                         goto pipe_err_free;
1480
1481                 skb_queue_tail(&other->sk_receive_queue, skb);
1482                 unix_state_runlock(other);
1483                 other->sk_data_ready(other, size);
1484                 sent+=size;
1485         }
1486
1487         scm_destroy(siocb->scm);
1488         siocb->scm = NULL;
1489
1490         return sent;
1491
1492 pipe_err_free:
1493         unix_state_runlock(other);
1494         kfree_skb(skb);
1495 pipe_err:
1496         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1497                 send_sig(SIGPIPE,current,0);
1498         err = -EPIPE;
1499 out_err:
1500         scm_destroy(siocb->scm);
1501         siocb->scm = NULL;
1502         return sent ? : err;
1503 }
1504
1505 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1506                                   struct msghdr *msg, size_t len)
1507 {
1508         int err;
1509         struct sock *sk = sock->sk;
1510         
1511         err = sock_error(sk);
1512         if (err)
1513                 return err;
1514
1515         if (sk->sk_state != TCP_ESTABLISHED)
1516                 return -ENOTCONN;
1517
1518         if (msg->msg_namelen)
1519                 msg->msg_namelen = 0;
1520
1521         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1522 }
1523                                                                                             
1524 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1525 {
1526         struct unix_sock *u = unix_sk(sk);
1527
1528         msg->msg_namelen = 0;
1529         if (u->addr) {
1530                 msg->msg_namelen = u->addr->len;
1531                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1532         }
1533 }
1534
1535 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1536                               struct msghdr *msg, size_t size,
1537                               int flags)
1538 {
1539         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1540         struct scm_cookie tmp_scm;
1541         struct sock *sk = sock->sk;
1542         struct unix_sock *u = unix_sk(sk);
1543         int noblock = flags & MSG_DONTWAIT;
1544         struct sk_buff *skb;
1545         int err;
1546
1547         err = -EOPNOTSUPP;
1548         if (flags&MSG_OOB)
1549                 goto out;
1550
1551         msg->msg_namelen = 0;
1552
1553         mutex_lock(&u->readlock);
1554
1555         skb = skb_recv_datagram(sk, flags, noblock, &err);
1556         if (!skb)
1557                 goto out_unlock;
1558
1559         wake_up_interruptible(&u->peer_wait);
1560
1561         if (msg->msg_name)
1562                 unix_copy_addr(msg, skb->sk);
1563
1564         if (size > skb->len)
1565                 size = skb->len;
1566         else if (size < skb->len)
1567                 msg->msg_flags |= MSG_TRUNC;
1568
1569         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1570         if (err)
1571                 goto out_free;
1572
1573         if (!siocb->scm) {
1574                 siocb->scm = &tmp_scm;
1575                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1576         }
1577         siocb->scm->creds = *UNIXCREDS(skb);
1578
1579         if (!(flags & MSG_PEEK))
1580         {
1581                 if (UNIXCB(skb).fp)
1582                         unix_detach_fds(siocb->scm, skb);
1583         }
1584         else 
1585         {
1586                 /* It is questionable: on PEEK we could:
1587                    - do not return fds - good, but too simple 8)
1588                    - return fds, and do not return them on read (old strategy,
1589                      apparently wrong)
1590                    - clone fds (I chose it for now, it is the most universal
1591                      solution)
1592                 
1593                    POSIX 1003.1g does not actually define this clearly
1594                    at all. POSIX 1003.1g doesn't define a lot of things
1595                    clearly however!                  
1596                    
1597                 */
1598                 if (UNIXCB(skb).fp)
1599                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1600         }
1601         err = size;
1602
1603         scm_recv(sock, msg, siocb->scm, flags);
1604
1605 out_free:
1606         skb_free_datagram(sk,skb);
1607 out_unlock:
1608         mutex_unlock(&u->readlock);
1609 out:
1610         return err;
1611 }
1612
1613 /*
1614  *      Sleep until data has arrive. But check for races..
1615  */
1616  
1617 static long unix_stream_data_wait(struct sock * sk, long timeo)
1618 {
1619         DEFINE_WAIT(wait);
1620
1621         unix_state_rlock(sk);
1622
1623         for (;;) {
1624                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1625
1626                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1627                     sk->sk_err ||
1628                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1629                     signal_pending(current) ||
1630                     !timeo)
1631                         break;
1632
1633                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1634                 unix_state_runlock(sk);
1635                 timeo = schedule_timeout(timeo);
1636                 unix_state_rlock(sk);
1637                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1638         }
1639
1640         finish_wait(sk->sk_sleep, &wait);
1641         unix_state_runlock(sk);
1642         return timeo;
1643 }
1644
1645
1646
1647 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1648                                struct msghdr *msg, size_t size,
1649                                int flags)
1650 {
1651         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1652         struct scm_cookie tmp_scm;
1653         struct sock *sk = sock->sk;
1654         struct unix_sock *u = unix_sk(sk);
1655         struct sockaddr_un *sunaddr=msg->msg_name;
1656         int copied = 0;
1657         int check_creds = 0;
1658         int target;
1659         int err = 0;
1660         long timeo;
1661
1662         err = -EINVAL;
1663         if (sk->sk_state != TCP_ESTABLISHED)
1664                 goto out;
1665
1666         err = -EOPNOTSUPP;
1667         if (flags&MSG_OOB)
1668                 goto out;
1669
1670         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1671         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1672
1673         msg->msg_namelen = 0;
1674
1675         /* Lock the socket to prevent queue disordering
1676          * while sleeps in memcpy_tomsg
1677          */
1678
1679         if (!siocb->scm) {
1680                 siocb->scm = &tmp_scm;
1681                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1682         }
1683
1684         mutex_lock(&u->readlock);
1685
1686         do
1687         {
1688                 int chunk;
1689                 struct sk_buff *skb;
1690
1691                 skb = skb_dequeue(&sk->sk_receive_queue);
1692                 if (skb==NULL)
1693                 {
1694                         if (copied >= target)
1695                                 break;
1696
1697                         /*
1698                          *      POSIX 1003.1g mandates this order.
1699                          */
1700                          
1701                         if ((err = sock_error(sk)) != 0)
1702                                 break;
1703                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1704                                 break;
1705                         err = -EAGAIN;
1706                         if (!timeo)
1707                                 break;
1708                         mutex_unlock(&u->readlock);
1709
1710                         timeo = unix_stream_data_wait(sk, timeo);
1711
1712                         if (signal_pending(current)) {
1713                                 err = sock_intr_errno(timeo);
1714                                 goto out;
1715                         }
1716                         mutex_lock(&u->readlock);
1717                         continue;
1718                 }
1719
1720                 if (check_creds) {
1721                         /* Never glue messages from different writers */
1722                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1723                                 skb_queue_head(&sk->sk_receive_queue, skb);
1724                                 break;
1725                         }
1726                 } else {
1727                         /* Copy credentials */
1728                         siocb->scm->creds = *UNIXCREDS(skb);
1729                         check_creds = 1;
1730                 }
1731
1732                 /* Copy address just once */
1733                 if (sunaddr)
1734                 {
1735                         unix_copy_addr(msg, skb->sk);
1736                         sunaddr = NULL;
1737                 }
1738
1739                 chunk = min_t(unsigned int, skb->len, size);
1740                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1741                         skb_queue_head(&sk->sk_receive_queue, skb);
1742                         if (copied == 0)
1743                                 copied = -EFAULT;
1744                         break;
1745                 }
1746                 copied += chunk;
1747                 size -= chunk;
1748
1749                 /* Mark read part of skb as used */
1750                 if (!(flags & MSG_PEEK))
1751                 {
1752                         skb_pull(skb, chunk);
1753
1754                         if (UNIXCB(skb).fp)
1755                                 unix_detach_fds(siocb->scm, skb);
1756
1757                         /* put the skb back if we didn't use it up.. */
1758                         if (skb->len)
1759                         {
1760                                 skb_queue_head(&sk->sk_receive_queue, skb);
1761                                 break;
1762                         }
1763
1764                         kfree_skb(skb);
1765
1766                         if (siocb->scm->fp)
1767                                 break;
1768                 }
1769                 else
1770                 {
1771                         /* It is questionable, see note in unix_dgram_recvmsg.
1772                          */
1773                         if (UNIXCB(skb).fp)
1774                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1775
1776                         /* put message back and return */
1777                         skb_queue_head(&sk->sk_receive_queue, skb);
1778                         break;
1779                 }
1780         } while (size);
1781
1782         mutex_unlock(&u->readlock);
1783         scm_recv(sock, msg, siocb->scm, flags);
1784 out:
1785         return copied ? : err;
1786 }
1787
1788 static int unix_shutdown(struct socket *sock, int mode)
1789 {
1790         struct sock *sk = sock->sk;
1791         struct sock *other;
1792
1793         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1794
1795         if (mode) {
1796                 unix_state_wlock(sk);
1797                 sk->sk_shutdown |= mode;
1798                 other=unix_peer(sk);
1799                 if (other)
1800                         sock_hold(other);
1801                 unix_state_wunlock(sk);
1802                 sk->sk_state_change(sk);
1803
1804                 if (other &&
1805                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1806
1807                         int peer_mode = 0;
1808
1809                         if (mode&RCV_SHUTDOWN)
1810                                 peer_mode |= SEND_SHUTDOWN;
1811                         if (mode&SEND_SHUTDOWN)
1812                                 peer_mode |= RCV_SHUTDOWN;
1813                         unix_state_wlock(other);
1814                         other->sk_shutdown |= peer_mode;
1815                         unix_state_wunlock(other);
1816                         other->sk_state_change(other);
1817                         read_lock(&other->sk_callback_lock);
1818                         if (peer_mode == SHUTDOWN_MASK)
1819                                 sk_wake_async(other,1,POLL_HUP);
1820                         else if (peer_mode & RCV_SHUTDOWN)
1821                                 sk_wake_async(other,1,POLL_IN);
1822                         read_unlock(&other->sk_callback_lock);
1823                 }
1824                 if (other)
1825                         sock_put(other);
1826         }
1827         return 0;
1828 }
1829
1830 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1831 {
1832         struct sock *sk = sock->sk;
1833         long amount=0;
1834         int err;
1835
1836         switch(cmd)
1837         {
1838                 case SIOCOUTQ:
1839                         amount = atomic_read(&sk->sk_wmem_alloc);
1840                         err = put_user(amount, (int __user *)arg);
1841                         break;
1842                 case SIOCINQ:
1843                 {
1844                         struct sk_buff *skb;
1845
1846                         if (sk->sk_state == TCP_LISTEN) {
1847                                 err = -EINVAL;
1848                                 break;
1849                         }
1850
1851                         spin_lock(&sk->sk_receive_queue.lock);
1852                         if (sk->sk_type == SOCK_STREAM ||
1853                             sk->sk_type == SOCK_SEQPACKET) {
1854                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1855                                         amount += skb->len;
1856                         } else {
1857                                 skb = skb_peek(&sk->sk_receive_queue);
1858                                 if (skb)
1859                                         amount=skb->len;
1860                         }
1861                         spin_unlock(&sk->sk_receive_queue.lock);
1862                         err = put_user(amount, (int __user *)arg);
1863                         break;
1864                 }
1865
1866                 default:
1867                         err = -ENOIOCTLCMD;
1868                         break;
1869         }
1870         return err;
1871 }
1872
1873 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1874 {
1875         struct sock *sk = sock->sk;
1876         unsigned int mask;
1877
1878         poll_wait(file, sk->sk_sleep, wait);
1879         mask = 0;
1880
1881         /* exceptional events? */
1882         if (sk->sk_err)
1883                 mask |= POLLERR;
1884         if (sk->sk_shutdown == SHUTDOWN_MASK)
1885                 mask |= POLLHUP;
1886         if (sk->sk_shutdown & RCV_SHUTDOWN)
1887                 mask |= POLLRDHUP;
1888
1889         /* readable? */
1890         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1891             (sk->sk_shutdown & RCV_SHUTDOWN))
1892                 mask |= POLLIN | POLLRDNORM;
1893
1894         /* Connection-based need to check for termination and startup */
1895         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1896                 mask |= POLLHUP;
1897
1898         /*
1899          * we set writable also when the other side has shut down the
1900          * connection. This prevents stuck sockets.
1901          */
1902         if (unix_writable(sk))
1903                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1904
1905         return mask;
1906 }
1907
1908
1909 #ifdef CONFIG_PROC_FS
1910 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1911 {
1912         loff_t off = 0;
1913         struct sock *s;
1914
1915         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1916                 if (off == pos) 
1917                         return s;
1918                 ++off;
1919         }
1920         return NULL;
1921 }
1922
1923
1924 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1925 {
1926         spin_lock(&unix_table_lock);
1927         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1928 }
1929
1930 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1931 {
1932         ++*pos;
1933
1934         if (v == (void *)1) 
1935                 return first_unix_socket(seq->private);
1936         return next_unix_socket(seq->private, v);
1937 }
1938
1939 static void unix_seq_stop(struct seq_file *seq, void *v)
1940 {
1941         spin_unlock(&unix_table_lock);
1942 }
1943
1944 static int unix_seq_show(struct seq_file *seq, void *v)
1945 {
1946         
1947         if (v == (void *)1)
1948                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1949                          "Inode Path\n");
1950         else {
1951                 struct sock *s = v;
1952                 struct unix_sock *u = unix_sk(s);
1953                 unix_state_rlock(s);
1954
1955                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1956                         s,
1957                         atomic_read(&s->sk_refcnt),
1958                         0,
1959                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1960                         s->sk_type,
1961                         s->sk_socket ?
1962                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1963                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1964                         sock_i_ino(s));
1965
1966                 if (u->addr) {
1967                         int i, len;
1968                         seq_putc(seq, ' ');
1969
1970                         i = 0;
1971                         len = u->addr->len - sizeof(short);
1972                         if (!UNIX_ABSTRACT(s))
1973                                 len--;
1974                         else {
1975                                 seq_putc(seq, '@');
1976                                 i++;
1977                         }
1978                         for ( ; i < len; i++)
1979                                 seq_putc(seq, u->addr->name->sun_path[i]);
1980                 }
1981                 unix_state_runlock(s);
1982                 seq_putc(seq, '\n');
1983         }
1984
1985         return 0;
1986 }
1987
1988 static struct seq_operations unix_seq_ops = {
1989         .start  = unix_seq_start,
1990         .next   = unix_seq_next,
1991         .stop   = unix_seq_stop,
1992         .show   = unix_seq_show,
1993 };
1994
1995
1996 static int unix_seq_open(struct inode *inode, struct file *file)
1997 {
1998         struct seq_file *seq;
1999         int rc = -ENOMEM;
2000         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2001
2002         if (!iter)
2003                 goto out;
2004
2005         rc = seq_open(file, &unix_seq_ops);
2006         if (rc)
2007                 goto out_kfree;
2008
2009         seq          = file->private_data;
2010         seq->private = iter;
2011         *iter = 0;
2012 out:
2013         return rc;
2014 out_kfree:
2015         kfree(iter);
2016         goto out;
2017 }
2018
2019 static struct file_operations unix_seq_fops = {
2020         .owner          = THIS_MODULE,
2021         .open           = unix_seq_open,
2022         .read           = seq_read,
2023         .llseek         = seq_lseek,
2024         .release        = seq_release_private,
2025 };
2026
2027 #endif
2028
2029 static struct net_proto_family unix_family_ops = {
2030         .family = PF_UNIX,
2031         .create = unix_create,
2032         .owner  = THIS_MODULE,
2033 };
2034
2035 static int __init af_unix_init(void)
2036 {
2037         int rc = -1;
2038         struct sk_buff *dummy_skb;
2039
2040         if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2041                 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2042                 goto out;
2043         }
2044
2045         rc = proto_register(&unix_proto, 1);
2046         if (rc != 0) {
2047                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2048                        __FUNCTION__);
2049                 goto out;
2050         }
2051
2052         sock_register(&unix_family_ops);
2053 #ifdef CONFIG_PROC_FS
2054         proc_net_fops_create("unix", 0, &unix_seq_fops);
2055 #endif
2056         unix_sysctl_register();
2057 out:
2058         return rc;
2059 }
2060
2061 static void __exit af_unix_exit(void)
2062 {
2063         sock_unregister(PF_UNIX);
2064         unix_sysctl_unregister();
2065         proc_net_remove("unix");
2066         proto_unregister(&unix_proto);
2067 }
2068
2069 module_init(af_unix_init);
2070 module_exit(af_unix_exit);
2071
2072 MODULE_LICENSE("GPL");
2073 MODULE_ALIAS_NETPROTO(PF_UNIX);