Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/sock.h>
107 #include <net/tcp_states.h>
108 #include <net/af_unix.h>
109 #include <linux/proc_fs.h>
110 #include <linux/seq_file.h>
111 #include <net/scm.h>
112 #include <linux/init.h>
113 #include <linux/poll.h>
114 #include <linux/smp_lock.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/vs_context.h>
120 #include <linux/vs_network.h>
121 #include <linux/vs_limit.h>
122
123 int sysctl_unix_max_dgram_qlen = 10;
124
125 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
126 DEFINE_SPINLOCK(unix_table_lock);
127 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
128
129 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
130
131 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
132
133 #ifdef CONFIG_SECURITY_NETWORK
134 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
135 {
136         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
137 }
138
139 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141         scm->secid = *UNIXSID(skb);
142 }
143 #else
144 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 { }
146
147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
148 { }
149 #endif /* CONFIG_SECURITY_NETWORK */
150
151 /*
152  *  SMP locking strategy:
153  *    hash table is protected with spinlock unix_table_lock
154  *    each socket state is protected by separate rwlock.
155  */
156
157 static inline unsigned unix_hash_fold(unsigned hash)
158 {
159         hash ^= hash>>16;
160         hash ^= hash>>8;
161         return hash&(UNIX_HASH_SIZE-1);
162 }
163
164 #define unix_peer(sk) (unix_sk(sk)->peer)
165
166 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
167 {
168         return unix_peer(osk) == sk;
169 }
170
171 static inline int unix_may_send(struct sock *sk, struct sock *osk)
172 {
173         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
174 }
175
176 static struct sock *unix_peer_get(struct sock *s)
177 {
178         struct sock *peer;
179
180         unix_state_rlock(s);
181         peer = unix_peer(s);
182         if (peer)
183                 sock_hold(peer);
184         unix_state_runlock(s);
185         return peer;
186 }
187
188 static inline void unix_release_addr(struct unix_address *addr)
189 {
190         if (atomic_dec_and_test(&addr->refcnt))
191                 kfree(addr);
192 }
193
194 /*
195  *      Check unix socket name:
196  *              - should be not zero length.
197  *              - if started by not zero, should be NULL terminated (FS object)
198  *              - if started by zero, it is abstract name.
199  */
200  
201 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
202 {
203         if (len <= sizeof(short) || len > sizeof(*sunaddr))
204                 return -EINVAL;
205         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
206                 return -EINVAL;
207         if (sunaddr->sun_path[0]) {
208                 /*
209                  * This may look like an off by one error but it is a bit more
210                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
211                  * sun_path[108] doesnt as such exist.  However in kernel space
212                  * we are guaranteed that it is a valid memory location in our
213                  * kernel address buffer.
214                  */
215                 ((char *)sunaddr)[len]=0;
216                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
217                 return len;
218         }
219
220         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
221         return len;
222 }
223
224 static void __unix_remove_socket(struct sock *sk)
225 {
226         sk_del_node_init(sk);
227 }
228
229 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
230 {
231         BUG_TRAP(sk_unhashed(sk));
232         sk_add_node(sk, list);
233 }
234
235 static inline void unix_remove_socket(struct sock *sk)
236 {
237         spin_lock(&unix_table_lock);
238         __unix_remove_socket(sk);
239         spin_unlock(&unix_table_lock);
240 }
241
242 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
243 {
244         spin_lock(&unix_table_lock);
245         __unix_insert_socket(list, sk);
246         spin_unlock(&unix_table_lock);
247 }
248
249 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
250                                               int len, int type, unsigned hash)
251 {
252         struct sock *s;
253         struct hlist_node *node;
254
255         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256                 struct unix_sock *u = unix_sk(s);
257
258                 if (!vx_check(s->sk_xid, VX_IDENT|VX_WATCH))
259                         continue;
260                 if (u->addr->len == len &&
261                     !memcmp(u->addr->name, sunname, len))
262                         goto found;
263         }
264         s = NULL;
265 found:
266         return s;
267 }
268
269 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
270                                                    int len, int type,
271                                                    unsigned hash)
272 {
273         struct sock *s;
274
275         spin_lock(&unix_table_lock);
276         s = __unix_find_socket_byname(sunname, len, type, hash);
277         if (s)
278                 sock_hold(s);
279         spin_unlock(&unix_table_lock);
280         return s;
281 }
282
283 static struct sock *unix_find_socket_byinode(struct inode *i)
284 {
285         struct sock *s;
286         struct hlist_node *node;
287
288         spin_lock(&unix_table_lock);
289         sk_for_each(s, node,
290                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
291                 struct dentry *dentry = unix_sk(s)->dentry;
292
293                 if(dentry && dentry->d_inode == i)
294                 {
295                         sock_hold(s);
296                         goto found;
297                 }
298         }
299         s = NULL;
300 found:
301         spin_unlock(&unix_table_lock);
302         return s;
303 }
304
305 static inline int unix_writable(struct sock *sk)
306 {
307         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
308 }
309
310 static void unix_write_space(struct sock *sk)
311 {
312         read_lock(&sk->sk_callback_lock);
313         if (unix_writable(sk)) {
314                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
315                         wake_up_interruptible(sk->sk_sleep);
316                 sk_wake_async(sk, 2, POLL_OUT);
317         }
318         read_unlock(&sk->sk_callback_lock);
319 }
320
321 /* When dgram socket disconnects (or changes its peer), we clear its receive
322  * queue of packets arrived from previous peer. First, it allows to do
323  * flow control based only on wmem_alloc; second, sk connected to peer
324  * may receive messages only from that peer. */
325 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
326 {
327         if (!skb_queue_empty(&sk->sk_receive_queue)) {
328                 skb_queue_purge(&sk->sk_receive_queue);
329                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
330
331                 /* If one link of bidirectional dgram pipe is disconnected,
332                  * we signal error. Messages are lost. Do not make this,
333                  * when peer was not connected to us.
334                  */
335                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
336                         other->sk_err = ECONNRESET;
337                         other->sk_error_report(other);
338                 }
339         }
340 }
341
342 static void unix_sock_destructor(struct sock *sk)
343 {
344         struct unix_sock *u = unix_sk(sk);
345
346         skb_queue_purge(&sk->sk_receive_queue);
347
348         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
349         BUG_TRAP(sk_unhashed(sk));
350         BUG_TRAP(!sk->sk_socket);
351         if (!sock_flag(sk, SOCK_DEAD)) {
352                 printk("Attempt to release alive unix socket: %p\n", sk);
353                 return;
354         }
355
356         if (u->addr)
357                 unix_release_addr(u->addr);
358
359         atomic_dec(&unix_nr_socks);
360 #ifdef UNIX_REFCNT_DEBUG
361         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
362 #endif
363 }
364
365 static int unix_release_sock (struct sock *sk, int embrion)
366 {
367         struct unix_sock *u = unix_sk(sk);
368         struct dentry *dentry;
369         struct vfsmount *mnt;
370         struct sock *skpair;
371         struct sk_buff *skb;
372         int state;
373
374         unix_remove_socket(sk);
375
376         /* Clear state */
377         unix_state_wlock(sk);
378         sock_orphan(sk);
379         sk->sk_shutdown = SHUTDOWN_MASK;
380         dentry       = u->dentry;
381         u->dentry    = NULL;
382         mnt          = u->mnt;
383         u->mnt       = NULL;
384         state = sk->sk_state;
385         sk->sk_state = TCP_CLOSE;
386         unix_state_wunlock(sk);
387
388         wake_up_interruptible_all(&u->peer_wait);
389
390         skpair=unix_peer(sk);
391
392         if (skpair!=NULL) {
393                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
394                         unix_state_wlock(skpair);
395                         /* No more writes */
396                         skpair->sk_shutdown = SHUTDOWN_MASK;
397                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
398                                 skpair->sk_err = ECONNRESET;
399                         unix_state_wunlock(skpair);
400                         skpair->sk_state_change(skpair);
401                         read_lock(&skpair->sk_callback_lock);
402                         sk_wake_async(skpair,1,POLL_HUP);
403                         read_unlock(&skpair->sk_callback_lock);
404                 }
405                 sock_put(skpair); /* It may now die */
406                 unix_peer(sk) = NULL;
407         }
408
409         /* Try to flush out this socket. Throw out buffers at least */
410
411         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
412                 if (state==TCP_LISTEN)
413                         unix_release_sock(skb->sk, 1);
414                 /* passed fds are erased in the kfree_skb hook        */
415                 kfree_skb(skb);
416         }
417
418         if (dentry) {
419                 dput(dentry);
420                 mntput(mnt);
421         }
422
423         sock_put(sk);
424
425         /* ---- Socket is dead now and most probably destroyed ---- */
426
427         /*
428          * Fixme: BSD difference: In BSD all sockets connected to use get
429          *        ECONNRESET and we die on the spot. In Linux we behave
430          *        like files and pipes do and wait for the last
431          *        dereference.
432          *
433          * Can't we simply set sock->err?
434          *
435          *        What the above comment does talk about? --ANK(980817)
436          */
437
438         if (atomic_read(&unix_tot_inflight))
439                 unix_gc();              /* Garbage collect fds */       
440
441         return 0;
442 }
443
444 static int unix_listen(struct socket *sock, int backlog)
445 {
446         int err;
447         struct sock *sk = sock->sk;
448         struct unix_sock *u = unix_sk(sk);
449
450         err = -EOPNOTSUPP;
451         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
452                 goto out;                       /* Only stream/seqpacket sockets accept */
453         err = -EINVAL;
454         if (!u->addr)
455                 goto out;                       /* No listens on an unbound socket */
456         unix_state_wlock(sk);
457         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
458                 goto out_unlock;
459         if (backlog > sk->sk_max_ack_backlog)
460                 wake_up_interruptible_all(&u->peer_wait);
461         sk->sk_max_ack_backlog  = backlog;
462         sk->sk_state            = TCP_LISTEN;
463         /* set credentials so connect can copy them */
464         sk->sk_peercred.pid     = current->tgid;
465         sk->sk_peercred.uid     = current->euid;
466         sk->sk_peercred.gid     = current->egid;
467         err = 0;
468
469 out_unlock:
470         unix_state_wunlock(sk);
471 out:
472         return err;
473 }
474
475 static int unix_release(struct socket *);
476 static int unix_bind(struct socket *, struct sockaddr *, int);
477 static int unix_stream_connect(struct socket *, struct sockaddr *,
478                                int addr_len, int flags);
479 static int unix_socketpair(struct socket *, struct socket *);
480 static int unix_accept(struct socket *, struct socket *, int);
481 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
482 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
483 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
484 static int unix_shutdown(struct socket *, int);
485 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
486                                struct msghdr *, size_t);
487 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
488                                struct msghdr *, size_t, int);
489 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
490                               struct msghdr *, size_t);
491 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
492                               struct msghdr *, size_t, int);
493 static int unix_dgram_connect(struct socket *, struct sockaddr *,
494                               int, int);
495 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
496                                   struct msghdr *, size_t);
497
498 static const struct proto_ops unix_stream_ops = {
499         .family =       PF_UNIX,
500         .owner =        THIS_MODULE,
501         .release =      unix_release,
502         .bind =         unix_bind,
503         .connect =      unix_stream_connect,
504         .socketpair =   unix_socketpair,
505         .accept =       unix_accept,
506         .getname =      unix_getname,
507         .poll =         unix_poll,
508         .ioctl =        unix_ioctl,
509         .listen =       unix_listen,
510         .shutdown =     unix_shutdown,
511         .setsockopt =   sock_no_setsockopt,
512         .getsockopt =   sock_no_getsockopt,
513         .sendmsg =      unix_stream_sendmsg,
514         .recvmsg =      unix_stream_recvmsg,
515         .mmap =         sock_no_mmap,
516         .sendpage =     sock_no_sendpage,
517 };
518
519 static const struct proto_ops unix_dgram_ops = {
520         .family =       PF_UNIX,
521         .owner =        THIS_MODULE,
522         .release =      unix_release,
523         .bind =         unix_bind,
524         .connect =      unix_dgram_connect,
525         .socketpair =   unix_socketpair,
526         .accept =       sock_no_accept,
527         .getname =      unix_getname,
528         .poll =         datagram_poll,
529         .ioctl =        unix_ioctl,
530         .listen =       sock_no_listen,
531         .shutdown =     unix_shutdown,
532         .setsockopt =   sock_no_setsockopt,
533         .getsockopt =   sock_no_getsockopt,
534         .sendmsg =      unix_dgram_sendmsg,
535         .recvmsg =      unix_dgram_recvmsg,
536         .mmap =         sock_no_mmap,
537         .sendpage =     sock_no_sendpage,
538 };
539
540 static const struct proto_ops unix_seqpacket_ops = {
541         .family =       PF_UNIX,
542         .owner =        THIS_MODULE,
543         .release =      unix_release,
544         .bind =         unix_bind,
545         .connect =      unix_stream_connect,
546         .socketpair =   unix_socketpair,
547         .accept =       unix_accept,
548         .getname =      unix_getname,
549         .poll =         datagram_poll,
550         .ioctl =        unix_ioctl,
551         .listen =       unix_listen,
552         .shutdown =     unix_shutdown,
553         .setsockopt =   sock_no_setsockopt,
554         .getsockopt =   sock_no_getsockopt,
555         .sendmsg =      unix_seqpacket_sendmsg,
556         .recvmsg =      unix_dgram_recvmsg,
557         .mmap =         sock_no_mmap,
558         .sendpage =     sock_no_sendpage,
559 };
560
561 static struct proto unix_proto = {
562         .name     = "UNIX",
563         .owner    = THIS_MODULE,
564         .obj_size = sizeof(struct unix_sock),
565 };
566
567 /*
568  * AF_UNIX sockets do not interact with hardware, hence they
569  * dont trigger interrupts - so it's safe for them to have
570  * bh-unsafe locking for their sk_receive_queue.lock. Split off
571  * this special lock-class by reinitializing the spinlock key:
572  */
573 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
574
575 static struct sock * unix_create1(struct socket *sock)
576 {
577         struct sock *sk = NULL;
578         struct unix_sock *u;
579
580         if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
581                 goto out;
582
583         sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
584         if (!sk)
585                 goto out;
586
587         atomic_inc(&unix_nr_socks);
588
589         sock_init_data(sock,sk);
590         lockdep_set_class(&sk->sk_receive_queue.lock,
591                                 &af_unix_sk_receive_queue_lock_key);
592
593         sk->sk_write_space      = unix_write_space;
594         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
595         sk->sk_destruct         = unix_sock_destructor;
596         u         = unix_sk(sk);
597         u->dentry = NULL;
598         u->mnt    = NULL;
599         spin_lock_init(&u->lock);
600         atomic_set(&u->inflight, sock ? 0 : -1);
601         mutex_init(&u->readlock); /* single task reading lock */
602         init_waitqueue_head(&u->peer_wait);
603         unix_insert_socket(unix_sockets_unbound, sk);
604 out:
605         return sk;
606 }
607
608 static int unix_create(struct socket *sock, int protocol)
609 {
610         if (protocol && protocol != PF_UNIX)
611                 return -EPROTONOSUPPORT;
612
613         sock->state = SS_UNCONNECTED;
614
615         switch (sock->type) {
616         case SOCK_STREAM:
617                 sock->ops = &unix_stream_ops;
618                 break;
619                 /*
620                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
621                  *      nothing uses it.
622                  */
623         case SOCK_RAW:
624                 sock->type=SOCK_DGRAM;
625         case SOCK_DGRAM:
626                 sock->ops = &unix_dgram_ops;
627                 break;
628         case SOCK_SEQPACKET:
629                 sock->ops = &unix_seqpacket_ops;
630                 break;
631         default:
632                 return -ESOCKTNOSUPPORT;
633         }
634
635         return unix_create1(sock) ? 0 : -ENOMEM;
636 }
637
638 static int unix_release(struct socket *sock)
639 {
640         struct sock *sk = sock->sk;
641
642         if (!sk)
643                 return 0;
644
645         sock->sk = NULL;
646
647         return unix_release_sock (sk, 0);
648 }
649
650 static int unix_autobind(struct socket *sock)
651 {
652         struct sock *sk = sock->sk;
653         struct unix_sock *u = unix_sk(sk);
654         static u32 ordernum = 1;
655         struct unix_address * addr;
656         int err;
657
658         mutex_lock(&u->readlock);
659
660         err = 0;
661         if (u->addr)
662                 goto out;
663
664         err = -ENOMEM;
665         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
666         if (!addr)
667                 goto out;
668
669         addr->name->sun_family = AF_UNIX;
670         atomic_set(&addr->refcnt, 1);
671
672 retry:
673         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
674         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
675
676         spin_lock(&unix_table_lock);
677         ordernum = (ordernum+1)&0xFFFFF;
678
679         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
680                                       addr->hash)) {
681                 spin_unlock(&unix_table_lock);
682                 /* Sanity yield. It is unusual case, but yet... */
683                 if (!(ordernum&0xFF))
684                         yield();
685                 goto retry;
686         }
687         addr->hash ^= sk->sk_type;
688
689         __unix_remove_socket(sk);
690         u->addr = addr;
691         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
692         spin_unlock(&unix_table_lock);
693         err = 0;
694
695 out:    mutex_unlock(&u->readlock);
696         return err;
697 }
698
699 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
700                                     int type, unsigned hash, int *error)
701 {
702         struct sock *u;
703         struct nameidata nd;
704         int err = 0;
705         
706         if (sunname->sun_path[0]) {
707                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
708                 if (err)
709                         goto fail;
710                 err = vfs_permission(&nd, MAY_WRITE);
711                 if (err)
712                         goto put_fail;
713
714                 err = -ECONNREFUSED;
715                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
716                         goto put_fail;
717                 u=unix_find_socket_byinode(nd.dentry->d_inode);
718                 if (!u)
719                         goto put_fail;
720
721                 if (u->sk_type == type)
722                         touch_atime(nd.mnt, nd.dentry);
723
724                 path_release(&nd);
725
726                 err=-EPROTOTYPE;
727                 if (u->sk_type != type) {
728                         sock_put(u);
729                         goto fail;
730                 }
731         } else {
732                 err = -ECONNREFUSED;
733                 u=unix_find_socket_byname(sunname, len, type, hash);
734                 if (u) {
735                         struct dentry *dentry;
736                         dentry = unix_sk(u)->dentry;
737                         if (dentry)
738                                 touch_atime(unix_sk(u)->mnt, dentry);
739                 } else
740                         goto fail;
741         }
742         return u;
743
744 put_fail:
745         path_release(&nd);
746 fail:
747         *error=err;
748         return NULL;
749 }
750
751
752 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
753 {
754         struct sock *sk = sock->sk;
755         struct unix_sock *u = unix_sk(sk);
756         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
757         struct dentry * dentry = NULL;
758         struct nameidata nd;
759         int err;
760         unsigned hash;
761         struct unix_address *addr;
762         struct hlist_head *list;
763
764         err = -EINVAL;
765         if (sunaddr->sun_family != AF_UNIX)
766                 goto out;
767
768         if (addr_len==sizeof(short)) {
769                 err = unix_autobind(sock);
770                 goto out;
771         }
772
773         err = unix_mkname(sunaddr, addr_len, &hash);
774         if (err < 0)
775                 goto out;
776         addr_len = err;
777
778         mutex_lock(&u->readlock);
779
780         err = -EINVAL;
781         if (u->addr)
782                 goto out_up;
783
784         err = -ENOMEM;
785         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
786         if (!addr)
787                 goto out_up;
788
789         memcpy(addr->name, sunaddr, addr_len);
790         addr->len = addr_len;
791         addr->hash = hash ^ sk->sk_type;
792         atomic_set(&addr->refcnt, 1);
793
794         if (sunaddr->sun_path[0]) {
795                 unsigned int mode;
796                 err = 0;
797                 /*
798                  * Get the parent directory, calculate the hash for last
799                  * component.
800                  */
801                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
802                 if (err)
803                         goto out_mknod_parent;
804
805                 dentry = lookup_create(&nd, 0);
806                 err = PTR_ERR(dentry);
807                 if (IS_ERR(dentry))
808                         goto out_mknod_unlock;
809
810                 /*
811                  * All right, let's create it.
812                  */
813                 mode = S_IFSOCK |
814                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
815                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0, NULL);
816                 if (err)
817                         goto out_mknod_dput;
818                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
819                 dput(nd.dentry);
820                 nd.dentry = dentry;
821
822                 addr->hash = UNIX_HASH_SIZE;
823         }
824
825         spin_lock(&unix_table_lock);
826
827         if (!sunaddr->sun_path[0]) {
828                 err = -EADDRINUSE;
829                 if (__unix_find_socket_byname(sunaddr, addr_len,
830                                               sk->sk_type, hash)) {
831                         unix_release_addr(addr);
832                         goto out_unlock;
833                 }
834
835                 list = &unix_socket_table[addr->hash];
836         } else {
837                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
838                 u->dentry = nd.dentry;
839                 u->mnt    = nd.mnt;
840         }
841
842         err = 0;
843         __unix_remove_socket(sk);
844         u->addr = addr;
845         __unix_insert_socket(list, sk);
846
847 out_unlock:
848         spin_unlock(&unix_table_lock);
849 out_up:
850         mutex_unlock(&u->readlock);
851 out:
852         return err;
853
854 out_mknod_dput:
855         dput(dentry);
856 out_mknod_unlock:
857         mutex_unlock(&nd.dentry->d_inode->i_mutex);
858         path_release(&nd);
859 out_mknod_parent:
860         if (err==-EEXIST)
861                 err=-EADDRINUSE;
862         unix_release_addr(addr);
863         goto out_up;
864 }
865
866 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
867                               int alen, int flags)
868 {
869         struct sock *sk = sock->sk;
870         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
871         struct sock *other;
872         unsigned hash;
873         int err;
874
875         if (addr->sa_family != AF_UNSPEC) {
876                 err = unix_mkname(sunaddr, alen, &hash);
877                 if (err < 0)
878                         goto out;
879                 alen = err;
880
881                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
882                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
883                         goto out;
884
885                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
886                 if (!other)
887                         goto out;
888
889                 unix_state_wlock(sk);
890
891                 err = -EPERM;
892                 if (!unix_may_send(sk, other))
893                         goto out_unlock;
894
895                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
896                 if (err)
897                         goto out_unlock;
898
899         } else {
900                 /*
901                  *      1003.1g breaking connected state with AF_UNSPEC
902                  */
903                 other = NULL;
904                 unix_state_wlock(sk);
905         }
906
907         /*
908          * If it was connected, reconnect.
909          */
910         if (unix_peer(sk)) {
911                 struct sock *old_peer = unix_peer(sk);
912                 unix_peer(sk)=other;
913                 unix_state_wunlock(sk);
914
915                 if (other != old_peer)
916                         unix_dgram_disconnected(sk, old_peer);
917                 sock_put(old_peer);
918         } else {
919                 unix_peer(sk)=other;
920                 unix_state_wunlock(sk);
921         }
922         return 0;
923
924 out_unlock:
925         unix_state_wunlock(sk);
926         sock_put(other);
927 out:
928         return err;
929 }
930
931 static long unix_wait_for_peer(struct sock *other, long timeo)
932 {
933         struct unix_sock *u = unix_sk(other);
934         int sched;
935         DEFINE_WAIT(wait);
936
937         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
938
939         sched = !sock_flag(other, SOCK_DEAD) &&
940                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
941                 (skb_queue_len(&other->sk_receive_queue) >
942                  other->sk_max_ack_backlog);
943
944         unix_state_runlock(other);
945
946         if (sched)
947                 timeo = schedule_timeout(timeo);
948
949         finish_wait(&u->peer_wait, &wait);
950         return timeo;
951 }
952
953 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
954                                int addr_len, int flags)
955 {
956         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
957         struct sock *sk = sock->sk;
958         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
959         struct sock *newsk = NULL;
960         struct sock *other = NULL;
961         struct sk_buff *skb = NULL;
962         unsigned hash;
963         int st;
964         int err;
965         long timeo;
966
967         err = unix_mkname(sunaddr, addr_len, &hash);
968         if (err < 0)
969                 goto out;
970         addr_len = err;
971
972         if (test_bit(SOCK_PASSCRED, &sock->flags)
973                 && !u->addr && (err = unix_autobind(sock)) != 0)
974                 goto out;
975
976         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
977
978         /* First of all allocate resources.
979            If we will make it after state is locked,
980            we will have to recheck all again in any case.
981          */
982
983         err = -ENOMEM;
984
985         /* create new sock for complete connection */
986         newsk = unix_create1(NULL);
987         if (newsk == NULL)
988                 goto out;
989
990         /* Allocate skb for sending to listening sock */
991         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
992         if (skb == NULL)
993                 goto out;
994
995 restart:
996         /*  Find listening sock. */
997         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
998         if (!other)
999                 goto out;
1000
1001         /* Latch state of peer */
1002         unix_state_rlock(other);
1003
1004         /* Apparently VFS overslept socket death. Retry. */
1005         if (sock_flag(other, SOCK_DEAD)) {
1006                 unix_state_runlock(other);
1007                 sock_put(other);
1008                 goto restart;
1009         }
1010
1011         err = -ECONNREFUSED;
1012         if (other->sk_state != TCP_LISTEN)
1013                 goto out_unlock;
1014
1015         if (skb_queue_len(&other->sk_receive_queue) >
1016             other->sk_max_ack_backlog) {
1017                 err = -EAGAIN;
1018                 if (!timeo)
1019                         goto out_unlock;
1020
1021                 timeo = unix_wait_for_peer(other, timeo);
1022
1023                 err = sock_intr_errno(timeo);
1024                 if (signal_pending(current))
1025                         goto out;
1026                 sock_put(other);
1027                 goto restart;
1028         }
1029
1030         /* Latch our state.
1031
1032            It is tricky place. We need to grab write lock and cannot
1033            drop lock on peer. It is dangerous because deadlock is
1034            possible. Connect to self case and simultaneous
1035            attempt to connect are eliminated by checking socket
1036            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1037            check this before attempt to grab lock.
1038
1039            Well, and we have to recheck the state after socket locked.
1040          */
1041         st = sk->sk_state;
1042
1043         switch (st) {
1044         case TCP_CLOSE:
1045                 /* This is ok... continue with connect */
1046                 break;
1047         case TCP_ESTABLISHED:
1048                 /* Socket is already connected */
1049                 err = -EISCONN;
1050                 goto out_unlock;
1051         default:
1052                 err = -EINVAL;
1053                 goto out_unlock;
1054         }
1055
1056         unix_state_wlock_nested(sk);
1057
1058         if (sk->sk_state != st) {
1059                 unix_state_wunlock(sk);
1060                 unix_state_runlock(other);
1061                 sock_put(other);
1062                 goto restart;
1063         }
1064
1065         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1066         if (err) {
1067                 unix_state_wunlock(sk);
1068                 goto out_unlock;
1069         }
1070
1071         /* The way is open! Fastly set all the necessary fields... */
1072
1073         sock_hold(sk);
1074         unix_peer(newsk)        = sk;
1075         newsk->sk_state         = TCP_ESTABLISHED;
1076         newsk->sk_type          = sk->sk_type;
1077         newsk->sk_peercred.pid  = current->tgid;
1078         newsk->sk_peercred.uid  = current->euid;
1079         newsk->sk_peercred.gid  = current->egid;
1080         newu = unix_sk(newsk);
1081         newsk->sk_sleep         = &newu->peer_wait;
1082         otheru = unix_sk(other);
1083
1084         /* copy address information from listening to new sock*/
1085         if (otheru->addr) {
1086                 atomic_inc(&otheru->addr->refcnt);
1087                 newu->addr = otheru->addr;
1088         }
1089         if (otheru->dentry) {
1090                 newu->dentry    = dget(otheru->dentry);
1091                 newu->mnt       = mntget(otheru->mnt);
1092         }
1093
1094         /* Set credentials */
1095         sk->sk_peercred = other->sk_peercred;
1096
1097         sock->state     = SS_CONNECTED;
1098         sk->sk_state    = TCP_ESTABLISHED;
1099         sock_hold(newsk);
1100
1101         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1102         unix_peer(sk)   = newsk;
1103
1104         unix_state_wunlock(sk);
1105
1106         /* take ten and and send info to listening sock */
1107         spin_lock(&other->sk_receive_queue.lock);
1108         __skb_queue_tail(&other->sk_receive_queue, skb);
1109         /* Undo artificially decreased inflight after embrion
1110          * is installed to listening socket. */
1111         atomic_inc(&newu->inflight);
1112         spin_unlock(&other->sk_receive_queue.lock);
1113         unix_state_runlock(other);
1114         other->sk_data_ready(other, 0);
1115         sock_put(other);
1116         return 0;
1117
1118 out_unlock:
1119         if (other)
1120                 unix_state_runlock(other);
1121
1122 out:
1123         if (skb)
1124                 kfree_skb(skb);
1125         if (newsk)
1126                 unix_release_sock(newsk, 0);
1127         if (other)
1128                 sock_put(other);
1129         return err;
1130 }
1131
1132 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1133 {
1134         struct sock *ska=socka->sk, *skb = sockb->sk;
1135
1136         /* Join our sockets back to back */
1137         sock_hold(ska);
1138         sock_hold(skb);
1139         unix_peer(ska)=skb;
1140         unix_peer(skb)=ska;
1141         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1142         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1143         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1144
1145         if (ska->sk_type != SOCK_DGRAM) {
1146                 ska->sk_state = TCP_ESTABLISHED;
1147                 skb->sk_state = TCP_ESTABLISHED;
1148                 socka->state  = SS_CONNECTED;
1149                 sockb->state  = SS_CONNECTED;
1150         }
1151         return 0;
1152 }
1153
1154 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1155 {
1156         struct sock *sk = sock->sk;
1157         struct sock *tsk;
1158         struct sk_buff *skb;
1159         int err;
1160
1161         err = -EOPNOTSUPP;
1162         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1163                 goto out;
1164
1165         err = -EINVAL;
1166         if (sk->sk_state != TCP_LISTEN)
1167                 goto out;
1168
1169         /* If socket state is TCP_LISTEN it cannot change (for now...),
1170          * so that no locks are necessary.
1171          */
1172
1173         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1174         if (!skb) {
1175                 /* This means receive shutdown. */
1176                 if (err == 0)
1177                         err = -EINVAL;
1178                 goto out;
1179         }
1180
1181         tsk = skb->sk;
1182         skb_free_datagram(sk, skb);
1183         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1184
1185         /* attach accepted sock to socket */
1186         unix_state_wlock(tsk);
1187         newsock->state = SS_CONNECTED;
1188         sock_graft(tsk, newsock);
1189         unix_state_wunlock(tsk);
1190         return 0;
1191
1192 out:
1193         return err;
1194 }
1195
1196
1197 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1198 {
1199         struct sock *sk = sock->sk;
1200         struct unix_sock *u;
1201         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1202         int err = 0;
1203
1204         if (peer) {
1205                 sk = unix_peer_get(sk);
1206
1207                 err = -ENOTCONN;
1208                 if (!sk)
1209                         goto out;
1210                 err = 0;
1211         } else {
1212                 sock_hold(sk);
1213         }
1214
1215         u = unix_sk(sk);
1216         unix_state_rlock(sk);
1217         if (!u->addr) {
1218                 sunaddr->sun_family = AF_UNIX;
1219                 sunaddr->sun_path[0] = 0;
1220                 *uaddr_len = sizeof(short);
1221         } else {
1222                 struct unix_address *addr = u->addr;
1223
1224                 *uaddr_len = addr->len;
1225                 memcpy(sunaddr, addr->name, *uaddr_len);
1226         }
1227         unix_state_runlock(sk);
1228         sock_put(sk);
1229 out:
1230         return err;
1231 }
1232
1233 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1234 {
1235         int i;
1236
1237         scm->fp = UNIXCB(skb).fp;
1238         skb->destructor = sock_wfree;
1239         UNIXCB(skb).fp = NULL;
1240
1241         for (i=scm->fp->count-1; i>=0; i--)
1242                 unix_notinflight(scm->fp->fp[i]);
1243 }
1244
1245 static void unix_destruct_fds(struct sk_buff *skb)
1246 {
1247         struct scm_cookie scm;
1248         memset(&scm, 0, sizeof(scm));
1249         unix_detach_fds(&scm, skb);
1250
1251         /* Alas, it calls VFS */
1252         /* So fscking what? fput() had been SMP-safe since the last Summer */
1253         scm_destroy(&scm);
1254         sock_wfree(skb);
1255 }
1256
1257 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1258 {
1259         int i;
1260         for (i=scm->fp->count-1; i>=0; i--)
1261                 unix_inflight(scm->fp->fp[i]);
1262         UNIXCB(skb).fp = scm->fp;
1263         skb->destructor = unix_destruct_fds;
1264         scm->fp = NULL;
1265 }
1266
1267 /*
1268  *      Send AF_UNIX data.
1269  */
1270
1271 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1272                               struct msghdr *msg, size_t len)
1273 {
1274         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1275         struct sock *sk = sock->sk;
1276         struct unix_sock *u = unix_sk(sk);
1277         struct sockaddr_un *sunaddr=msg->msg_name;
1278         struct sock *other = NULL;
1279         int namelen = 0; /* fake GCC */
1280         int err;
1281         unsigned hash;
1282         struct sk_buff *skb;
1283         long timeo;
1284         struct scm_cookie tmp_scm;
1285
1286         if (NULL == siocb->scm)
1287                 siocb->scm = &tmp_scm;
1288         err = scm_send(sock, msg, siocb->scm);
1289         if (err < 0)
1290                 return err;
1291
1292         err = -EOPNOTSUPP;
1293         if (msg->msg_flags&MSG_OOB)
1294                 goto out;
1295
1296         if (msg->msg_namelen) {
1297                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1298                 if (err < 0)
1299                         goto out;
1300                 namelen = err;
1301         } else {
1302                 sunaddr = NULL;
1303                 err = -ENOTCONN;
1304                 other = unix_peer_get(sk);
1305                 if (!other)
1306                         goto out;
1307         }
1308
1309         if (test_bit(SOCK_PASSCRED, &sock->flags)
1310                 && !u->addr && (err = unix_autobind(sock)) != 0)
1311                 goto out;
1312
1313         err = -EMSGSIZE;
1314         if (len > sk->sk_sndbuf - 32)
1315                 goto out;
1316
1317         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1318         if (skb==NULL)
1319                 goto out;
1320
1321         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1322         if (siocb->scm->fp)
1323                 unix_attach_fds(siocb->scm, skb);
1324         unix_get_secdata(siocb->scm, skb);
1325
1326         skb->h.raw = skb->data;
1327         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1328         if (err)
1329                 goto out_free;
1330
1331         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1332
1333 restart:
1334         if (!other) {
1335                 err = -ECONNRESET;
1336                 if (sunaddr == NULL)
1337                         goto out_free;
1338
1339                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1340                                         hash, &err);
1341                 if (other==NULL)
1342                         goto out_free;
1343         }
1344
1345         unix_state_rlock(other);
1346         err = -EPERM;
1347         if (!unix_may_send(sk, other))
1348                 goto out_unlock;
1349
1350         if (sock_flag(other, SOCK_DEAD)) {
1351                 /*
1352                  *      Check with 1003.1g - what should
1353                  *      datagram error
1354                  */
1355                 unix_state_runlock(other);
1356                 sock_put(other);
1357
1358                 err = 0;
1359                 unix_state_wlock(sk);
1360                 if (unix_peer(sk) == other) {
1361                         unix_peer(sk)=NULL;
1362                         unix_state_wunlock(sk);
1363
1364                         unix_dgram_disconnected(sk, other);
1365                         sock_put(other);
1366                         err = -ECONNREFUSED;
1367                 } else {
1368                         unix_state_wunlock(sk);
1369                 }
1370
1371                 other = NULL;
1372                 if (err)
1373                         goto out_free;
1374                 goto restart;
1375         }
1376
1377         err = -EPIPE;
1378         if (other->sk_shutdown & RCV_SHUTDOWN)
1379                 goto out_unlock;
1380
1381         if (sk->sk_type != SOCK_SEQPACKET) {
1382                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1383                 if (err)
1384                         goto out_unlock;
1385         }
1386
1387         if (unix_peer(other) != sk &&
1388             (skb_queue_len(&other->sk_receive_queue) >
1389              other->sk_max_ack_backlog)) {
1390                 if (!timeo) {
1391                         err = -EAGAIN;
1392                         goto out_unlock;
1393                 }
1394
1395                 timeo = unix_wait_for_peer(other, timeo);
1396
1397                 err = sock_intr_errno(timeo);
1398                 if (signal_pending(current))
1399                         goto out_free;
1400
1401                 goto restart;
1402         }
1403
1404         skb_queue_tail(&other->sk_receive_queue, skb);
1405         unix_state_runlock(other);
1406         other->sk_data_ready(other, len);
1407         sock_put(other);
1408         scm_destroy(siocb->scm);
1409         return len;
1410
1411 out_unlock:
1412         unix_state_runlock(other);
1413 out_free:
1414         kfree_skb(skb);
1415 out:
1416         if (other)
1417                 sock_put(other);
1418         scm_destroy(siocb->scm);
1419         return err;
1420 }
1421
1422                 
1423 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1424                                struct msghdr *msg, size_t len)
1425 {
1426         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1427         struct sock *sk = sock->sk;
1428         struct sock *other = NULL;
1429         struct sockaddr_un *sunaddr=msg->msg_name;
1430         int err,size;
1431         struct sk_buff *skb;
1432         int sent=0;
1433         struct scm_cookie tmp_scm;
1434
1435         if (NULL == siocb->scm)
1436                 siocb->scm = &tmp_scm;
1437         err = scm_send(sock, msg, siocb->scm);
1438         if (err < 0)
1439                 return err;
1440
1441         err = -EOPNOTSUPP;
1442         if (msg->msg_flags&MSG_OOB)
1443                 goto out_err;
1444
1445         if (msg->msg_namelen) {
1446                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1447                 goto out_err;
1448         } else {
1449                 sunaddr = NULL;
1450                 err = -ENOTCONN;
1451                 other = unix_peer(sk);
1452                 if (!other)
1453                         goto out_err;
1454         }
1455
1456         if (sk->sk_shutdown & SEND_SHUTDOWN)
1457                 goto pipe_err;
1458
1459         while(sent < len)
1460         {
1461                 /*
1462                  *      Optimisation for the fact that under 0.01% of X
1463                  *      messages typically need breaking up.
1464                  */
1465
1466                 size = len-sent;
1467
1468                 /* Keep two messages in the pipe so it schedules better */
1469                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1470                         size = (sk->sk_sndbuf >> 1) - 64;
1471
1472                 if (size > SKB_MAX_ALLOC)
1473                         size = SKB_MAX_ALLOC;
1474                         
1475                 /*
1476                  *      Grab a buffer
1477                  */
1478                  
1479                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1480
1481                 if (skb==NULL)
1482                         goto out_err;
1483
1484                 /*
1485                  *      If you pass two values to the sock_alloc_send_skb
1486                  *      it tries to grab the large buffer with GFP_NOFS
1487                  *      (which can fail easily), and if it fails grab the
1488                  *      fallback size buffer which is under a page and will
1489                  *      succeed. [Alan]
1490                  */
1491                 size = min_t(int, size, skb_tailroom(skb));
1492
1493                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1494                 if (siocb->scm->fp)
1495                         unix_attach_fds(siocb->scm, skb);
1496
1497                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1498                         kfree_skb(skb);
1499                         goto out_err;
1500                 }
1501
1502                 unix_state_rlock(other);
1503
1504                 if (sock_flag(other, SOCK_DEAD) ||
1505                     (other->sk_shutdown & RCV_SHUTDOWN))
1506                         goto pipe_err_free;
1507
1508                 skb_queue_tail(&other->sk_receive_queue, skb);
1509                 unix_state_runlock(other);
1510                 other->sk_data_ready(other, size);
1511                 sent+=size;
1512         }
1513
1514         scm_destroy(siocb->scm);
1515         siocb->scm = NULL;
1516
1517         return sent;
1518
1519 pipe_err_free:
1520         unix_state_runlock(other);
1521         kfree_skb(skb);
1522 pipe_err:
1523         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1524                 send_sig(SIGPIPE,current,0);
1525         err = -EPIPE;
1526 out_err:
1527         scm_destroy(siocb->scm);
1528         siocb->scm = NULL;
1529         return sent ? : err;
1530 }
1531
1532 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1533                                   struct msghdr *msg, size_t len)
1534 {
1535         int err;
1536         struct sock *sk = sock->sk;
1537         
1538         err = sock_error(sk);
1539         if (err)
1540                 return err;
1541
1542         if (sk->sk_state != TCP_ESTABLISHED)
1543                 return -ENOTCONN;
1544
1545         if (msg->msg_namelen)
1546                 msg->msg_namelen = 0;
1547
1548         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1549 }
1550                                                                                             
1551 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1552 {
1553         struct unix_sock *u = unix_sk(sk);
1554
1555         msg->msg_namelen = 0;
1556         if (u->addr) {
1557                 msg->msg_namelen = u->addr->len;
1558                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1559         }
1560 }
1561
1562 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1563                               struct msghdr *msg, size_t size,
1564                               int flags)
1565 {
1566         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1567         struct scm_cookie tmp_scm;
1568         struct sock *sk = sock->sk;
1569         struct unix_sock *u = unix_sk(sk);
1570         int noblock = flags & MSG_DONTWAIT;
1571         struct sk_buff *skb;
1572         int err;
1573
1574         err = -EOPNOTSUPP;
1575         if (flags&MSG_OOB)
1576                 goto out;
1577
1578         msg->msg_namelen = 0;
1579
1580         mutex_lock(&u->readlock);
1581
1582         skb = skb_recv_datagram(sk, flags, noblock, &err);
1583         if (!skb)
1584                 goto out_unlock;
1585
1586         wake_up_interruptible(&u->peer_wait);
1587
1588         if (msg->msg_name)
1589                 unix_copy_addr(msg, skb->sk);
1590
1591         if (size > skb->len)
1592                 size = skb->len;
1593         else if (size < skb->len)
1594                 msg->msg_flags |= MSG_TRUNC;
1595
1596         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1597         if (err)
1598                 goto out_free;
1599
1600         if (!siocb->scm) {
1601                 siocb->scm = &tmp_scm;
1602                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1603         }
1604         siocb->scm->creds = *UNIXCREDS(skb);
1605         unix_set_secdata(siocb->scm, skb);
1606
1607         if (!(flags & MSG_PEEK))
1608         {
1609                 if (UNIXCB(skb).fp)
1610                         unix_detach_fds(siocb->scm, skb);
1611         }
1612         else 
1613         {
1614                 /* It is questionable: on PEEK we could:
1615                    - do not return fds - good, but too simple 8)
1616                    - return fds, and do not return them on read (old strategy,
1617                      apparently wrong)
1618                    - clone fds (I chose it for now, it is the most universal
1619                      solution)
1620                 
1621                    POSIX 1003.1g does not actually define this clearly
1622                    at all. POSIX 1003.1g doesn't define a lot of things
1623                    clearly however!                  
1624                    
1625                 */
1626                 if (UNIXCB(skb).fp)
1627                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1628         }
1629         err = size;
1630
1631         scm_recv(sock, msg, siocb->scm, flags);
1632
1633 out_free:
1634         skb_free_datagram(sk,skb);
1635 out_unlock:
1636         mutex_unlock(&u->readlock);
1637 out:
1638         return err;
1639 }
1640
1641 /*
1642  *      Sleep until data has arrive. But check for races..
1643  */
1644  
1645 static long unix_stream_data_wait(struct sock * sk, long timeo)
1646 {
1647         DEFINE_WAIT(wait);
1648
1649         unix_state_rlock(sk);
1650
1651         for (;;) {
1652                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1653
1654                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1655                     sk->sk_err ||
1656                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1657                     signal_pending(current) ||
1658                     !timeo)
1659                         break;
1660
1661                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1662                 unix_state_runlock(sk);
1663                 timeo = schedule_timeout(timeo);
1664                 unix_state_rlock(sk);
1665                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1666         }
1667
1668         finish_wait(sk->sk_sleep, &wait);
1669         unix_state_runlock(sk);
1670         return timeo;
1671 }
1672
1673
1674
1675 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1676                                struct msghdr *msg, size_t size,
1677                                int flags)
1678 {
1679         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1680         struct scm_cookie tmp_scm;
1681         struct sock *sk = sock->sk;
1682         struct unix_sock *u = unix_sk(sk);
1683         struct sockaddr_un *sunaddr=msg->msg_name;
1684         int copied = 0;
1685         int check_creds = 0;
1686         int target;
1687         int err = 0;
1688         long timeo;
1689
1690         err = -EINVAL;
1691         if (sk->sk_state != TCP_ESTABLISHED)
1692                 goto out;
1693
1694         err = -EOPNOTSUPP;
1695         if (flags&MSG_OOB)
1696                 goto out;
1697
1698         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1699         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1700
1701         msg->msg_namelen = 0;
1702
1703         /* Lock the socket to prevent queue disordering
1704          * while sleeps in memcpy_tomsg
1705          */
1706
1707         if (!siocb->scm) {
1708                 siocb->scm = &tmp_scm;
1709                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1710         }
1711
1712         mutex_lock(&u->readlock);
1713
1714         do
1715         {
1716                 int chunk;
1717                 struct sk_buff *skb;
1718
1719                 skb = skb_dequeue(&sk->sk_receive_queue);
1720                 if (skb==NULL)
1721                 {
1722                         if (copied >= target)
1723                                 break;
1724
1725                         /*
1726                          *      POSIX 1003.1g mandates this order.
1727                          */
1728                          
1729                         if ((err = sock_error(sk)) != 0)
1730                                 break;
1731                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1732                                 break;
1733                         err = -EAGAIN;
1734                         if (!timeo)
1735                                 break;
1736                         mutex_unlock(&u->readlock);
1737
1738                         timeo = unix_stream_data_wait(sk, timeo);
1739
1740                         if (signal_pending(current)) {
1741                                 err = sock_intr_errno(timeo);
1742                                 goto out;
1743                         }
1744                         mutex_lock(&u->readlock);
1745                         continue;
1746                 }
1747
1748                 if (check_creds) {
1749                         /* Never glue messages from different writers */
1750                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1751                                 skb_queue_head(&sk->sk_receive_queue, skb);
1752                                 break;
1753                         }
1754                 } else {
1755                         /* Copy credentials */
1756                         siocb->scm->creds = *UNIXCREDS(skb);
1757                         check_creds = 1;
1758                 }
1759
1760                 /* Copy address just once */
1761                 if (sunaddr)
1762                 {
1763                         unix_copy_addr(msg, skb->sk);
1764                         sunaddr = NULL;
1765                 }
1766
1767                 chunk = min_t(unsigned int, skb->len, size);
1768                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1769                         skb_queue_head(&sk->sk_receive_queue, skb);
1770                         if (copied == 0)
1771                                 copied = -EFAULT;
1772                         break;
1773                 }
1774                 copied += chunk;
1775                 size -= chunk;
1776
1777                 /* Mark read part of skb as used */
1778                 if (!(flags & MSG_PEEK))
1779                 {
1780                         skb_pull(skb, chunk);
1781
1782                         if (UNIXCB(skb).fp)
1783                                 unix_detach_fds(siocb->scm, skb);
1784
1785                         /* put the skb back if we didn't use it up.. */
1786                         if (skb->len)
1787                         {
1788                                 skb_queue_head(&sk->sk_receive_queue, skb);
1789                                 break;
1790                         }
1791
1792                         kfree_skb(skb);
1793
1794                         if (siocb->scm->fp)
1795                                 break;
1796                 }
1797                 else
1798                 {
1799                         /* It is questionable, see note in unix_dgram_recvmsg.
1800                          */
1801                         if (UNIXCB(skb).fp)
1802                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1803
1804                         /* put message back and return */
1805                         skb_queue_head(&sk->sk_receive_queue, skb);
1806                         break;
1807                 }
1808         } while (size);
1809
1810         mutex_unlock(&u->readlock);
1811         scm_recv(sock, msg, siocb->scm, flags);
1812 out:
1813         return copied ? : err;
1814 }
1815
1816 static int unix_shutdown(struct socket *sock, int mode)
1817 {
1818         struct sock *sk = sock->sk;
1819         struct sock *other;
1820
1821         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1822
1823         if (mode) {
1824                 unix_state_wlock(sk);
1825                 sk->sk_shutdown |= mode;
1826                 other=unix_peer(sk);
1827                 if (other)
1828                         sock_hold(other);
1829                 unix_state_wunlock(sk);
1830                 sk->sk_state_change(sk);
1831
1832                 if (other &&
1833                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1834
1835                         int peer_mode = 0;
1836
1837                         if (mode&RCV_SHUTDOWN)
1838                                 peer_mode |= SEND_SHUTDOWN;
1839                         if (mode&SEND_SHUTDOWN)
1840                                 peer_mode |= RCV_SHUTDOWN;
1841                         unix_state_wlock(other);
1842                         other->sk_shutdown |= peer_mode;
1843                         unix_state_wunlock(other);
1844                         other->sk_state_change(other);
1845                         read_lock(&other->sk_callback_lock);
1846                         if (peer_mode == SHUTDOWN_MASK)
1847                                 sk_wake_async(other,1,POLL_HUP);
1848                         else if (peer_mode & RCV_SHUTDOWN)
1849                                 sk_wake_async(other,1,POLL_IN);
1850                         read_unlock(&other->sk_callback_lock);
1851                 }
1852                 if (other)
1853                         sock_put(other);
1854         }
1855         return 0;
1856 }
1857
1858 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1859 {
1860         struct sock *sk = sock->sk;
1861         long amount=0;
1862         int err;
1863
1864         switch(cmd)
1865         {
1866                 case SIOCOUTQ:
1867                         amount = atomic_read(&sk->sk_wmem_alloc);
1868                         err = put_user(amount, (int __user *)arg);
1869                         break;
1870                 case SIOCINQ:
1871                 {
1872                         struct sk_buff *skb;
1873
1874                         if (sk->sk_state == TCP_LISTEN) {
1875                                 err = -EINVAL;
1876                                 break;
1877                         }
1878
1879                         spin_lock(&sk->sk_receive_queue.lock);
1880                         if (sk->sk_type == SOCK_STREAM ||
1881                             sk->sk_type == SOCK_SEQPACKET) {
1882                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1883                                         amount += skb->len;
1884                         } else {
1885                                 skb = skb_peek(&sk->sk_receive_queue);
1886                                 if (skb)
1887                                         amount=skb->len;
1888                         }
1889                         spin_unlock(&sk->sk_receive_queue.lock);
1890                         err = put_user(amount, (int __user *)arg);
1891                         break;
1892                 }
1893
1894                 default:
1895                         err = -ENOIOCTLCMD;
1896                         break;
1897         }
1898         return err;
1899 }
1900
1901 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1902 {
1903         struct sock *sk = sock->sk;
1904         unsigned int mask;
1905
1906         poll_wait(file, sk->sk_sleep, wait);
1907         mask = 0;
1908
1909         /* exceptional events? */
1910         if (sk->sk_err)
1911                 mask |= POLLERR;
1912         if (sk->sk_shutdown == SHUTDOWN_MASK)
1913                 mask |= POLLHUP;
1914         if (sk->sk_shutdown & RCV_SHUTDOWN)
1915                 mask |= POLLRDHUP;
1916
1917         /* readable? */
1918         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1919             (sk->sk_shutdown & RCV_SHUTDOWN))
1920                 mask |= POLLIN | POLLRDNORM;
1921
1922         /* Connection-based need to check for termination and startup */
1923         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1924                 mask |= POLLHUP;
1925
1926         /*
1927          * we set writable also when the other side has shut down the
1928          * connection. This prevents stuck sockets.
1929          */
1930         if (unix_writable(sk))
1931                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1932
1933         return mask;
1934 }
1935
1936
1937 #ifdef CONFIG_PROC_FS
1938 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1939 {
1940         loff_t off = 0;
1941         struct sock *s;
1942
1943         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1944                 if (off == pos) 
1945                         return s;
1946                 ++off;
1947         }
1948         return NULL;
1949 }
1950
1951
1952 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1953 {
1954         spin_lock(&unix_table_lock);
1955         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1956 }
1957
1958 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1959 {
1960         ++*pos;
1961
1962         if (v == (void *)1) 
1963                 return first_unix_socket(seq->private);
1964         return next_unix_socket(seq->private, v);
1965 }
1966
1967 static void unix_seq_stop(struct seq_file *seq, void *v)
1968 {
1969         spin_unlock(&unix_table_lock);
1970 }
1971
1972 static int unix_seq_show(struct seq_file *seq, void *v)
1973 {
1974         
1975         if (v == (void *)1)
1976                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1977                          "Inode Path\n");
1978         else {
1979                 struct sock *s = v;
1980                 struct unix_sock *u = unix_sk(s);
1981                 unix_state_rlock(s);
1982
1983                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1984                         s,
1985                         atomic_read(&s->sk_refcnt),
1986                         0,
1987                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1988                         s->sk_type,
1989                         s->sk_socket ?
1990                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1991                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1992                         sock_i_ino(s));
1993
1994                 if (u->addr) {
1995                         int i, len;
1996                         seq_putc(seq, ' ');
1997
1998                         i = 0;
1999                         len = u->addr->len - sizeof(short);
2000                         if (!UNIX_ABSTRACT(s))
2001                                 len--;
2002                         else {
2003                                 seq_putc(seq, '@');
2004                                 i++;
2005                         }
2006                         for ( ; i < len; i++)
2007                                 seq_putc(seq, u->addr->name->sun_path[i]);
2008                 }
2009                 unix_state_runlock(s);
2010                 seq_putc(seq, '\n');
2011         }
2012
2013         return 0;
2014 }
2015
2016 static struct seq_operations unix_seq_ops = {
2017         .start  = unix_seq_start,
2018         .next   = unix_seq_next,
2019         .stop   = unix_seq_stop,
2020         .show   = unix_seq_show,
2021 };
2022
2023
2024 static int unix_seq_open(struct inode *inode, struct file *file)
2025 {
2026         struct seq_file *seq;
2027         int rc = -ENOMEM;
2028         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2029
2030         if (!iter)
2031                 goto out;
2032
2033         rc = seq_open(file, &unix_seq_ops);
2034         if (rc)
2035                 goto out_kfree;
2036
2037         seq          = file->private_data;
2038         seq->private = iter;
2039         *iter = 0;
2040 out:
2041         return rc;
2042 out_kfree:
2043         kfree(iter);
2044         goto out;
2045 }
2046
2047 static struct file_operations unix_seq_fops = {
2048         .owner          = THIS_MODULE,
2049         .open           = unix_seq_open,
2050         .read           = seq_read,
2051         .llseek         = seq_lseek,
2052         .release        = seq_release_private,
2053 };
2054
2055 #endif
2056
2057 static struct net_proto_family unix_family_ops = {
2058         .family = PF_UNIX,
2059         .create = unix_create,
2060         .owner  = THIS_MODULE,
2061 };
2062
2063 static int __init af_unix_init(void)
2064 {
2065         int rc = -1;
2066         struct sk_buff *dummy_skb;
2067
2068         if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2069                 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2070                 goto out;
2071         }
2072
2073         rc = proto_register(&unix_proto, 1);
2074         if (rc != 0) {
2075                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2076                        __FUNCTION__);
2077                 goto out;
2078         }
2079
2080         sock_register(&unix_family_ops);
2081 #ifdef CONFIG_PROC_FS
2082         proc_net_fops_create("unix", 0, &unix_seq_fops);
2083 #endif
2084         unix_sysctl_register();
2085 out:
2086         return rc;
2087 }
2088
2089 static void __exit af_unix_exit(void)
2090 {
2091         sock_unregister(PF_UNIX);
2092         unix_sysctl_unregister();
2093         proc_net_remove("unix");
2094         proto_unregister(&unix_proto);
2095 }
2096
2097 module_init(af_unix_init);
2098 module_exit(af_unix_exit);
2099
2100 MODULE_LICENSE("GPL");
2101 MODULE_ALIAS_NETPROTO(PF_UNIX);