upgrade to linux 2.6.10-1.12_FC2
[linux-2.6.git] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/config.h>
87 #include <linux/kernel.h>
88 #include <linux/major.h>
89 #include <linux/signal.h>
90 #include <linux/sched.h>
91 #include <linux/errno.h>
92 #include <linux/string.h>
93 #include <linux/stat.h>
94 #include <linux/dcache.h>
95 #include <linux/namei.h>
96 #include <linux/socket.h>
97 #include <linux/un.h>
98 #include <linux/fcntl.h>
99 #include <linux/termios.h>
100 #include <linux/sockios.h>
101 #include <linux/net.h>
102 #include <linux/in.h>
103 #include <linux/fs.h>
104 #include <linux/slab.h>
105 #include <asm/uaccess.h>
106 #include <linux/skbuff.h>
107 #include <linux/netdevice.h>
108 #include <net/sock.h>
109 #include <linux/tcp.h>
110 #include <net/af_unix.h>
111 #include <linux/proc_fs.h>
112 #include <linux/seq_file.h>
113 #include <net/scm.h>
114 #include <linux/init.h>
115 #include <linux/poll.h>
116 #include <linux/smp_lock.h>
117 #include <linux/rtnetlink.h>
118 #include <linux/mount.h>
119 #include <net/checksum.h>
120 #include <linux/security.h>
121 #include <linux/vs_context.h>
122 #include <linux/vs_network.h>
123 #include <linux/vs_limit.h>
124
125 int sysctl_unix_max_dgram_qlen = 10;
126
127 kmem_cache_t *unix_sk_cachep;
128
129 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
130 rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;
131 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
132
133 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
134
135 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
136
137 /*
138  *  SMP locking strategy:
139  *    hash table is protected with rwlock unix_table_lock
140  *    each socket state is protected by separate rwlock.
141  */
142
143 static inline unsigned unix_hash_fold(unsigned hash)
144 {
145         hash ^= hash>>16;
146         hash ^= hash>>8;
147         return hash&(UNIX_HASH_SIZE-1);
148 }
149
150 #define unix_peer(sk) (unix_sk(sk)->peer)
151
152 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
153 {
154         return unix_peer(osk) == sk;
155 }
156
157 static inline int unix_may_send(struct sock *sk, struct sock *osk)
158 {
159         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
160 }
161
162 static struct sock *unix_peer_get(struct sock *s)
163 {
164         struct sock *peer;
165
166         unix_state_rlock(s);
167         peer = unix_peer(s);
168         if (peer)
169                 sock_hold(peer);
170         unix_state_runlock(s);
171         return peer;
172 }
173
174 static inline void unix_release_addr(struct unix_address *addr)
175 {
176         if (atomic_dec_and_test(&addr->refcnt))
177                 kfree(addr);
178 }
179
180 /*
181  *      Check unix socket name:
182  *              - should be not zero length.
183  *              - if started by not zero, should be NULL terminated (FS object)
184  *              - if started by zero, it is abstract name.
185  */
186  
187 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
188 {
189         if (len <= sizeof(short) || len > sizeof(*sunaddr))
190                 return -EINVAL;
191         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
192                 return -EINVAL;
193         if (sunaddr->sun_path[0]) {
194                 ((char *)sunaddr)[len]=0;
195                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
196                 return len;
197         }
198
199         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
200         return len;
201 }
202
203 static void __unix_remove_socket(struct sock *sk)
204 {
205         sk_del_node_init(sk);
206 }
207
208 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
209 {
210         BUG_TRAP(sk_unhashed(sk));
211         sk_add_node(sk, list);
212 }
213
214 static inline void unix_remove_socket(struct sock *sk)
215 {
216         write_lock(&unix_table_lock);
217         __unix_remove_socket(sk);
218         write_unlock(&unix_table_lock);
219 }
220
221 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
222 {
223         write_lock(&unix_table_lock);
224         __unix_insert_socket(list, sk);
225         write_unlock(&unix_table_lock);
226 }
227
228 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
229                                               int len, int type, unsigned hash)
230 {
231         struct sock *s;
232         struct hlist_node *node;
233
234         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
235                 struct unix_sock *u = unix_sk(s);
236
237                 if (u->addr->len == len &&
238                     !memcmp(u->addr->name, sunname, len))
239                         goto found;
240         }
241         s = NULL;
242 found:
243         return s;
244 }
245
246 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
247                                                    int len, int type,
248                                                    unsigned hash)
249 {
250         struct sock *s;
251
252         read_lock(&unix_table_lock);
253         s = __unix_find_socket_byname(sunname, len, type, hash);
254         if (s)
255                 sock_hold(s);
256         read_unlock(&unix_table_lock);
257         return s;
258 }
259
260 static struct sock *unix_find_socket_byinode(struct inode *i)
261 {
262         struct sock *s;
263         struct hlist_node *node;
264
265         read_lock(&unix_table_lock);
266         sk_for_each(s, node,
267                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
268                 struct dentry *dentry = unix_sk(s)->dentry;
269
270                 if(dentry && dentry->d_inode == i)
271                 {
272                         sock_hold(s);
273                         goto found;
274                 }
275         }
276         s = NULL;
277 found:
278         read_unlock(&unix_table_lock);
279         return s;
280 }
281
282 static inline int unix_writable(struct sock *sk)
283 {
284         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
285 }
286
287 static void unix_write_space(struct sock *sk)
288 {
289         read_lock(&sk->sk_callback_lock);
290         if (unix_writable(sk)) {
291                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
292                         wake_up_interruptible(sk->sk_sleep);
293                 sk_wake_async(sk, 2, POLL_OUT);
294         }
295         read_unlock(&sk->sk_callback_lock);
296 }
297
298 /* When dgram socket disconnects (or changes its peer), we clear its receive
299  * queue of packets arrived from previous peer. First, it allows to do
300  * flow control based only on wmem_alloc; second, sk connected to peer
301  * may receive messages only from that peer. */
302 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
303 {
304         if (skb_queue_len(&sk->sk_receive_queue)) {
305                 skb_queue_purge(&sk->sk_receive_queue);
306                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
307
308                 /* If one link of bidirectional dgram pipe is disconnected,
309                  * we signal error. Messages are lost. Do not make this,
310                  * when peer was not connected to us.
311                  */
312                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
313                         other->sk_err = ECONNRESET;
314                         other->sk_error_report(other);
315                 }
316         }
317 }
318
319 static void unix_sock_destructor(struct sock *sk)
320 {
321         struct unix_sock *u = unix_sk(sk);
322
323         skb_queue_purge(&sk->sk_receive_queue);
324
325         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
326         BUG_TRAP(sk_unhashed(sk));
327         BUG_TRAP(!sk->sk_socket);
328         if (!sock_flag(sk, SOCK_DEAD)) {
329                 printk("Attempt to release alive unix socket: %p\n", sk);
330                 return;
331         }
332
333         if (u->addr)
334                 unix_release_addr(u->addr);
335
336         atomic_dec(&unix_nr_socks);
337 #ifdef UNIX_REFCNT_DEBUG
338         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
339 #endif
340 }
341
342 static int unix_release_sock (struct sock *sk, int embrion)
343 {
344         struct unix_sock *u = unix_sk(sk);
345         struct dentry *dentry;
346         struct vfsmount *mnt;
347         struct sock *skpair;
348         struct sk_buff *skb;
349         int state;
350
351         unix_remove_socket(sk);
352
353         /* Clear state */
354         unix_state_wlock(sk);
355         sock_orphan(sk);
356         sk->sk_shutdown = SHUTDOWN_MASK;
357         dentry       = u->dentry;
358         u->dentry    = NULL;
359         mnt          = u->mnt;
360         u->mnt       = NULL;
361         state = sk->sk_state;
362         sk->sk_state = TCP_CLOSE;
363         unix_state_wunlock(sk);
364
365         wake_up_interruptible_all(&u->peer_wait);
366
367         skpair=unix_peer(sk);
368
369         if (skpair!=NULL) {
370                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
371                         unix_state_wlock(skpair);
372                         /* No more writes */
373                         skpair->sk_shutdown = SHUTDOWN_MASK;
374                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
375                                 skpair->sk_err = ECONNRESET;
376                         unix_state_wunlock(skpair);
377                         skpair->sk_state_change(skpair);
378                         read_lock(&skpair->sk_callback_lock);
379                         sk_wake_async(skpair,1,POLL_HUP);
380                         read_unlock(&skpair->sk_callback_lock);
381                 }
382                 sock_put(skpair); /* It may now die */
383                 unix_peer(sk) = NULL;
384         }
385
386         /* Try to flush out this socket. Throw out buffers at least */
387
388         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
389                 if (state==TCP_LISTEN)
390                         unix_release_sock(skb->sk, 1);
391                 /* passed fds are erased in the kfree_skb hook        */
392                 kfree_skb(skb);
393         }
394
395         if (dentry) {
396                 dput(dentry);
397                 mntput(mnt);
398         }
399
400         vx_sock_dec(sk);
401         clr_vx_info(&sk->sk_vx_info);
402         clr_nx_info(&sk->sk_nx_info);
403         sock_put(sk);
404
405         /* ---- Socket is dead now and most probably destroyed ---- */
406
407         /*
408          * Fixme: BSD difference: In BSD all sockets connected to use get
409          *        ECONNRESET and we die on the spot. In Linux we behave
410          *        like files and pipes do and wait for the last
411          *        dereference.
412          *
413          * Can't we simply set sock->err?
414          *
415          *        What the above comment does talk about? --ANK(980817)
416          */
417
418         if (atomic_read(&unix_tot_inflight))
419                 unix_gc();              /* Garbage collect fds */       
420
421         return 0;
422 }
423
424 static int unix_listen(struct socket *sock, int backlog)
425 {
426         int err;
427         struct sock *sk = sock->sk;
428         struct unix_sock *u = unix_sk(sk);
429
430         err = -EOPNOTSUPP;
431         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
432                 goto out;                       /* Only stream/seqpacket sockets accept */
433         err = -EINVAL;
434         if (!u->addr)
435                 goto out;                       /* No listens on an unbound socket */
436         unix_state_wlock(sk);
437         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
438                 goto out_unlock;
439         if (backlog > sk->sk_max_ack_backlog)
440                 wake_up_interruptible_all(&u->peer_wait);
441         sk->sk_max_ack_backlog  = backlog;
442         sk->sk_state            = TCP_LISTEN;
443         /* set credentials so connect can copy them */
444         sk->sk_peercred.pid     = current->tgid;
445         sk->sk_peercred.uid     = current->euid;
446         sk->sk_peercred.gid     = current->egid;
447         err = 0;
448
449 out_unlock:
450         unix_state_wunlock(sk);
451 out:
452         return err;
453 }
454
455 static int unix_release(struct socket *);
456 static int unix_bind(struct socket *, struct sockaddr *, int);
457 static int unix_stream_connect(struct socket *, struct sockaddr *,
458                                int addr_len, int flags);
459 static int unix_socketpair(struct socket *, struct socket *);
460 static int unix_accept(struct socket *, struct socket *, int);
461 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
462 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
463 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
464 static int unix_shutdown(struct socket *, int);
465 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
466                                struct msghdr *, size_t);
467 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
468                                struct msghdr *, size_t, int);
469 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
470                               struct msghdr *, size_t);
471 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
472                               struct msghdr *, size_t, int);
473 static int unix_dgram_connect(struct socket *, struct sockaddr *,
474                               int, int);
475 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
476                                   struct msghdr *, size_t);
477
478 static struct proto_ops unix_stream_ops = {
479         .family =       PF_UNIX,
480         .owner =        THIS_MODULE,
481         .release =      unix_release,
482         .bind =         unix_bind,
483         .connect =      unix_stream_connect,
484         .socketpair =   unix_socketpair,
485         .accept =       unix_accept,
486         .getname =      unix_getname,
487         .poll =         unix_poll,
488         .ioctl =        unix_ioctl,
489         .listen =       unix_listen,
490         .shutdown =     unix_shutdown,
491         .setsockopt =   sock_no_setsockopt,
492         .getsockopt =   sock_no_getsockopt,
493         .sendmsg =      unix_stream_sendmsg,
494         .recvmsg =      unix_stream_recvmsg,
495         .mmap =         sock_no_mmap,
496         .sendpage =     sock_no_sendpage,
497 };
498
499 static struct proto_ops unix_dgram_ops = {
500         .family =       PF_UNIX,
501         .owner =        THIS_MODULE,
502         .release =      unix_release,
503         .bind =         unix_bind,
504         .connect =      unix_dgram_connect,
505         .socketpair =   unix_socketpair,
506         .accept =       sock_no_accept,
507         .getname =      unix_getname,
508         .poll =         datagram_poll,
509         .ioctl =        unix_ioctl,
510         .listen =       sock_no_listen,
511         .shutdown =     unix_shutdown,
512         .setsockopt =   sock_no_setsockopt,
513         .getsockopt =   sock_no_getsockopt,
514         .sendmsg =      unix_dgram_sendmsg,
515         .recvmsg =      unix_dgram_recvmsg,
516         .mmap =         sock_no_mmap,
517         .sendpage =     sock_no_sendpage,
518 };
519
520 static struct proto_ops unix_seqpacket_ops = {
521         .family =       PF_UNIX,
522         .owner =        THIS_MODULE,
523         .release =      unix_release,
524         .bind =         unix_bind,
525         .connect =      unix_stream_connect,
526         .socketpair =   unix_socketpair,
527         .accept =       unix_accept,
528         .getname =      unix_getname,
529         .poll =         datagram_poll,
530         .ioctl =        unix_ioctl,
531         .listen =       unix_listen,
532         .shutdown =     unix_shutdown,
533         .setsockopt =   sock_no_setsockopt,
534         .getsockopt =   sock_no_getsockopt,
535         .sendmsg =      unix_seqpacket_sendmsg,
536         .recvmsg =      unix_dgram_recvmsg,
537         .mmap =         sock_no_mmap,
538         .sendpage =     sock_no_sendpage,
539 };
540
541 static struct sock * unix_create1(struct socket *sock)
542 {
543         struct sock *sk = NULL;
544         struct unix_sock *u;
545
546         if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
547                 goto out;
548
549         sk = sk_alloc(PF_UNIX, GFP_KERNEL, sizeof(struct unix_sock),
550                       unix_sk_cachep);
551         if (!sk)
552                 goto out;
553
554         atomic_inc(&unix_nr_socks);
555
556         sock_init_data(sock,sk);
557         sk_set_owner(sk, THIS_MODULE);
558
559         set_vx_info(&sk->sk_vx_info, current->vx_info);
560         sk->sk_xid = vx_current_xid();
561         vx_sock_inc(sk);
562         set_nx_info(&sk->sk_nx_info, current->nx_info);
563
564         sk->sk_write_space      = unix_write_space;
565         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
566         sk->sk_destruct         = unix_sock_destructor;
567         u         = unix_sk(sk);
568         u->dentry = NULL;
569         u->mnt    = NULL;
570         rwlock_init(&u->lock);
571         atomic_set(&u->inflight, sock ? 0 : -1);
572         init_MUTEX(&u->readsem); /* single task reading lock */
573         init_waitqueue_head(&u->peer_wait);
574         unix_insert_socket(unix_sockets_unbound, sk);
575 out:
576         return sk;
577 }
578
579 static int unix_create(struct socket *sock, int protocol)
580 {
581         if (protocol && protocol != PF_UNIX)
582                 return -EPROTONOSUPPORT;
583
584         sock->state = SS_UNCONNECTED;
585
586         switch (sock->type) {
587         case SOCK_STREAM:
588                 sock->ops = &unix_stream_ops;
589                 break;
590                 /*
591                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
592                  *      nothing uses it.
593                  */
594         case SOCK_RAW:
595                 sock->type=SOCK_DGRAM;
596         case SOCK_DGRAM:
597                 sock->ops = &unix_dgram_ops;
598                 break;
599         case SOCK_SEQPACKET:
600                 sock->ops = &unix_seqpacket_ops;
601                 break;
602         default:
603                 return -ESOCKTNOSUPPORT;
604         }
605
606         return unix_create1(sock) ? 0 : -ENOMEM;
607 }
608
609 static int unix_release(struct socket *sock)
610 {
611         struct sock *sk = sock->sk;
612
613         if (!sk)
614                 return 0;
615
616         sock->sk = NULL;
617
618         return unix_release_sock (sk, 0);
619 }
620
621 static int unix_autobind(struct socket *sock)
622 {
623         struct sock *sk = sock->sk;
624         struct unix_sock *u = unix_sk(sk);
625         static u32 ordernum = 1;
626         struct unix_address * addr;
627         int err;
628
629         down(&u->readsem);
630
631         err = 0;
632         if (u->addr)
633                 goto out;
634
635         err = -ENOMEM;
636         addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
637         if (!addr)
638                 goto out;
639
640         memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
641         addr->name->sun_family = AF_UNIX;
642         atomic_set(&addr->refcnt, 1);
643
644 retry:
645         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
646         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
647
648         write_lock(&unix_table_lock);
649         ordernum = (ordernum+1)&0xFFFFF;
650
651         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
652                                       addr->hash)) {
653                 write_unlock(&unix_table_lock);
654                 /* Sanity yield. It is unusual case, but yet... */
655                 if (!(ordernum&0xFF))
656                         yield();
657                 goto retry;
658         }
659         addr->hash ^= sk->sk_type;
660
661         __unix_remove_socket(sk);
662         u->addr = addr;
663         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
664         write_unlock(&unix_table_lock);
665         err = 0;
666
667 out:    up(&u->readsem);
668         return err;
669 }
670
671 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
672                                     int type, unsigned hash, int *error)
673 {
674         struct sock *u;
675         struct nameidata nd;
676         int err = 0;
677         
678         if (sunname->sun_path[0]) {
679                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
680                 if (err)
681                         goto fail;
682                 err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
683                 if (err)
684                         goto put_fail;
685
686                 err = -ECONNREFUSED;
687                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
688                         goto put_fail;
689                 u=unix_find_socket_byinode(nd.dentry->d_inode);
690                 if (!u)
691                         goto put_fail;
692
693                 if (u->sk_type == type)
694                         touch_atime(nd.mnt, nd.dentry);
695
696                 path_release(&nd);
697
698                 err=-EPROTOTYPE;
699                 if (u->sk_type != type) {
700                         sock_put(u);
701                         goto fail;
702                 }
703         } else {
704                 err = -ECONNREFUSED;
705                 u=unix_find_socket_byname(sunname, len, type, hash);
706                 if (u) {
707                         struct dentry *dentry;
708                         dentry = unix_sk(u)->dentry;
709                         if (dentry)
710                                 touch_atime(unix_sk(u)->mnt, dentry);
711                 } else
712                         goto fail;
713         }
714         return u;
715
716 put_fail:
717         path_release(&nd);
718 fail:
719         *error=err;
720         return NULL;
721 }
722
723
724 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
725 {
726         struct sock *sk = sock->sk;
727         struct unix_sock *u = unix_sk(sk);
728         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
729         struct dentry * dentry = NULL;
730         struct nameidata nd;
731         int err;
732         unsigned hash;
733         struct unix_address *addr;
734         struct hlist_head *list;
735
736         err = -EINVAL;
737         if (sunaddr->sun_family != AF_UNIX)
738                 goto out;
739
740         if (addr_len==sizeof(short)) {
741                 err = unix_autobind(sock);
742                 goto out;
743         }
744
745         err = unix_mkname(sunaddr, addr_len, &hash);
746         if (err < 0)
747                 goto out;
748         addr_len = err;
749
750         down(&u->readsem);
751
752         err = -EINVAL;
753         if (u->addr)
754                 goto out_up;
755
756         err = -ENOMEM;
757         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
758         if (!addr)
759                 goto out_up;
760
761         memcpy(addr->name, sunaddr, addr_len);
762         addr->len = addr_len;
763         addr->hash = hash ^ sk->sk_type;
764         atomic_set(&addr->refcnt, 1);
765
766         if (sunaddr->sun_path[0]) {
767                 unsigned int mode;
768                 err = 0;
769                 /*
770                  * Get the parent directory, calculate the hash for last
771                  * component.
772                  */
773                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
774                 if (err)
775                         goto out_mknod_parent;
776                 /*
777                  * Yucky last component or no last component at all?
778                  * (foo/., foo/.., /////)
779                  */
780                 err = -EEXIST;
781                 if (nd.last_type != LAST_NORM)
782                         goto out_mknod;
783                 /*
784                  * Lock the directory.
785                  */
786                 down(&nd.dentry->d_inode->i_sem);
787                 /*
788                  * Do the final lookup.
789                  */
790                 dentry = lookup_hash(&nd.last, nd.dentry);
791                 err = PTR_ERR(dentry);
792                 if (IS_ERR(dentry))
793                         goto out_mknod_unlock;
794                 err = -ENOENT;
795                 /*
796                  * Special case - lookup gave negative, but... we had foo/bar/
797                  * From the vfs_mknod() POV we just have a negative dentry -
798                  * all is fine. Let's be bastards - you had / on the end, you've
799                  * been asking for (non-existent) directory. -ENOENT for you.
800                  */
801                 if (nd.last.name[nd.last.len] && !dentry->d_inode)
802                         goto out_mknod_dput;
803                 /*
804                  * All right, let's create it.
805                  */
806                 mode = S_IFSOCK |
807                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
808                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
809                 if (err)
810                         goto out_mknod_dput;
811                 up(&nd.dentry->d_inode->i_sem);
812                 dput(nd.dentry);
813                 nd.dentry = dentry;
814
815                 addr->hash = UNIX_HASH_SIZE;
816         }
817
818         write_lock(&unix_table_lock);
819
820         if (!sunaddr->sun_path[0]) {
821                 err = -EADDRINUSE;
822                 if (__unix_find_socket_byname(sunaddr, addr_len,
823                                               sk->sk_type, hash)) {
824                         unix_release_addr(addr);
825                         goto out_unlock;
826                 }
827
828                 list = &unix_socket_table[addr->hash];
829         } else {
830                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
831                 u->dentry = nd.dentry;
832                 u->mnt    = nd.mnt;
833         }
834
835         err = 0;
836         __unix_remove_socket(sk);
837         u->addr = addr;
838         __unix_insert_socket(list, sk);
839
840 out_unlock:
841         write_unlock(&unix_table_lock);
842 out_up:
843         up(&u->readsem);
844 out:
845         return err;
846
847 out_mknod_dput:
848         dput(dentry);
849 out_mknod_unlock:
850         up(&nd.dentry->d_inode->i_sem);
851 out_mknod:
852         path_release(&nd);
853 out_mknod_parent:
854         if (err==-EEXIST)
855                 err=-EADDRINUSE;
856         unix_release_addr(addr);
857         goto out_up;
858 }
859
860 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
861                               int alen, int flags)
862 {
863         struct sock *sk = sock->sk;
864         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
865         struct sock *other;
866         unsigned hash;
867         int err;
868
869         if (addr->sa_family != AF_UNSPEC) {
870                 err = unix_mkname(sunaddr, alen, &hash);
871                 if (err < 0)
872                         goto out;
873                 alen = err;
874
875                 if (test_bit(SOCK_PASS_CRED, &sock->flags) && !unix_sk(sk)->addr &&
876                     (err = unix_autobind(sock)) != 0)
877                         goto out;
878
879                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
880                 if (!other)
881                         goto out;
882
883                 unix_state_wlock(sk);
884
885                 err = -EPERM;
886                 if (!unix_may_send(sk, other))
887                         goto out_unlock;
888
889                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
890                 if (err)
891                         goto out_unlock;
892
893         } else {
894                 /*
895                  *      1003.1g breaking connected state with AF_UNSPEC
896                  */
897                 other = NULL;
898                 unix_state_wlock(sk);
899         }
900
901         /*
902          * If it was connected, reconnect.
903          */
904         if (unix_peer(sk)) {
905                 struct sock *old_peer = unix_peer(sk);
906                 unix_peer(sk)=other;
907                 unix_state_wunlock(sk);
908
909                 if (other != old_peer)
910                         unix_dgram_disconnected(sk, old_peer);
911                 sock_put(old_peer);
912         } else {
913                 unix_peer(sk)=other;
914                 unix_state_wunlock(sk);
915         }
916         return 0;
917
918 out_unlock:
919         unix_state_wunlock(sk);
920         sock_put(other);
921 out:
922         return err;
923 }
924
925 static long unix_wait_for_peer(struct sock *other, long timeo)
926 {
927         struct unix_sock *u = unix_sk(other);
928         int sched;
929         DEFINE_WAIT(wait);
930
931         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
932
933         sched = !sock_flag(other, SOCK_DEAD) &&
934                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
935                 (skb_queue_len(&other->sk_receive_queue) >
936                  other->sk_max_ack_backlog);
937
938         unix_state_runlock(other);
939
940         if (sched)
941                 timeo = schedule_timeout(timeo);
942
943         finish_wait(&u->peer_wait, &wait);
944         return timeo;
945 }
946
947 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
948                                int addr_len, int flags)
949 {
950         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
951         struct sock *sk = sock->sk;
952         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
953         struct sock *newsk = NULL;
954         struct sock *other = NULL;
955         struct sk_buff *skb = NULL;
956         unsigned hash;
957         int st;
958         int err;
959         long timeo;
960
961         err = unix_mkname(sunaddr, addr_len, &hash);
962         if (err < 0)
963                 goto out;
964         addr_len = err;
965
966         if (test_bit(SOCK_PASS_CRED, &sock->flags)
967                 && !u->addr && (err = unix_autobind(sock)) != 0)
968                 goto out;
969
970         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
971
972         /* First of all allocate resources.
973            If we will make it after state is locked,
974            we will have to recheck all again in any case.
975          */
976
977         err = -ENOMEM;
978
979         /* create new sock for complete connection */
980         newsk = unix_create1(NULL);
981         if (newsk == NULL)
982                 goto out;
983
984         /* Allocate skb for sending to listening sock */
985         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
986         if (skb == NULL)
987                 goto out;
988
989 restart:
990         /*  Find listening sock. */
991         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
992         if (!other)
993                 goto out;
994
995         /* Latch state of peer */
996         unix_state_rlock(other);
997
998         /* Apparently VFS overslept socket death. Retry. */
999         if (sock_flag(other, SOCK_DEAD)) {
1000                 unix_state_runlock(other);
1001                 sock_put(other);
1002                 goto restart;
1003         }
1004
1005         err = -ECONNREFUSED;
1006         if (other->sk_state != TCP_LISTEN)
1007                 goto out_unlock;
1008
1009         if (skb_queue_len(&other->sk_receive_queue) >
1010             other->sk_max_ack_backlog) {
1011                 err = -EAGAIN;
1012                 if (!timeo)
1013                         goto out_unlock;
1014
1015                 timeo = unix_wait_for_peer(other, timeo);
1016
1017                 err = sock_intr_errno(timeo);
1018                 if (signal_pending(current))
1019                         goto out;
1020                 sock_put(other);
1021                 goto restart;
1022         }
1023
1024         /* Latch our state.
1025
1026            It is tricky place. We need to grab write lock and cannot
1027            drop lock on peer. It is dangerous because deadlock is
1028            possible. Connect to self case and simultaneous
1029            attempt to connect are eliminated by checking socket
1030            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1031            check this before attempt to grab lock.
1032
1033            Well, and we have to recheck the state after socket locked.
1034          */
1035         st = sk->sk_state;
1036
1037         switch (st) {
1038         case TCP_CLOSE:
1039                 /* This is ok... continue with connect */
1040                 break;
1041         case TCP_ESTABLISHED:
1042                 /* Socket is already connected */
1043                 err = -EISCONN;
1044                 goto out_unlock;
1045         default:
1046                 err = -EINVAL;
1047                 goto out_unlock;
1048         }
1049
1050         unix_state_wlock(sk);
1051
1052         if (sk->sk_state != st) {
1053                 unix_state_wunlock(sk);
1054                 unix_state_runlock(other);
1055                 sock_put(other);
1056                 goto restart;
1057         }
1058
1059         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1060         if (err) {
1061                 unix_state_wunlock(sk);
1062                 goto out_unlock;
1063         }
1064
1065         /* The way is open! Fastly set all the necessary fields... */
1066
1067         sock_hold(sk);
1068         unix_peer(newsk)        = sk;
1069         newsk->sk_state         = TCP_ESTABLISHED;
1070         newsk->sk_type          = sk->sk_type;
1071         newsk->sk_peercred.pid  = current->tgid;
1072         newsk->sk_peercred.uid  = current->euid;
1073         newsk->sk_peercred.gid  = current->egid;
1074         newu = unix_sk(newsk);
1075         newsk->sk_sleep         = &newu->peer_wait;
1076         otheru = unix_sk(other);
1077
1078         /* copy address information from listening to new sock*/
1079         if (otheru->addr) {
1080                 atomic_inc(&otheru->addr->refcnt);
1081                 newu->addr = otheru->addr;
1082         }
1083         if (otheru->dentry) {
1084                 newu->dentry    = dget(otheru->dentry);
1085                 newu->mnt       = mntget(otheru->mnt);
1086         }
1087
1088         /* Set credentials */
1089         sk->sk_peercred = other->sk_peercred;
1090
1091         sock_hold(newsk);
1092         unix_peer(sk)   = newsk;
1093         sock->state     = SS_CONNECTED;
1094         sk->sk_state    = TCP_ESTABLISHED;
1095
1096         unix_state_wunlock(sk);
1097
1098         /* take ten and and send info to listening sock */
1099         spin_lock(&other->sk_receive_queue.lock);
1100         __skb_queue_tail(&other->sk_receive_queue, skb);
1101         /* Undo artificially decreased inflight after embrion
1102          * is installed to listening socket. */
1103         atomic_inc(&newu->inflight);
1104         spin_unlock(&other->sk_receive_queue.lock);
1105         unix_state_runlock(other);
1106         other->sk_data_ready(other, 0);
1107         sock_put(other);
1108         return 0;
1109
1110 out_unlock:
1111         if (other)
1112                 unix_state_runlock(other);
1113
1114 out:
1115         if (skb)
1116                 kfree_skb(skb);
1117         if (newsk)
1118                 unix_release_sock(newsk, 0);
1119         if (other)
1120                 sock_put(other);
1121         return err;
1122 }
1123
1124 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1125 {
1126         struct sock *ska=socka->sk, *skb = sockb->sk;
1127
1128         /* Join our sockets back to back */
1129         sock_hold(ska);
1130         sock_hold(skb);
1131         unix_peer(ska)=skb;
1132         unix_peer(skb)=ska;
1133         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1134         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1135         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1136
1137         if (ska->sk_type != SOCK_DGRAM) {
1138                 ska->sk_state = TCP_ESTABLISHED;
1139                 skb->sk_state = TCP_ESTABLISHED;
1140                 socka->state  = SS_CONNECTED;
1141                 sockb->state  = SS_CONNECTED;
1142         }
1143         return 0;
1144 }
1145
1146 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1147 {
1148         struct sock *sk = sock->sk;
1149         struct sock *tsk;
1150         struct sk_buff *skb;
1151         int err;
1152
1153         err = -EOPNOTSUPP;
1154         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1155                 goto out;
1156
1157         err = -EINVAL;
1158         if (sk->sk_state != TCP_LISTEN)
1159                 goto out;
1160
1161         /* If socket state is TCP_LISTEN it cannot change (for now...),
1162          * so that no locks are necessary.
1163          */
1164
1165         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1166         if (!skb) {
1167                 /* This means receive shutdown. */
1168                 if (err == 0)
1169                         err = -EINVAL;
1170                 goto out;
1171         }
1172
1173         tsk = skb->sk;
1174         skb_free_datagram(sk, skb);
1175         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1176
1177         /* attach accepted sock to socket */
1178         unix_state_wlock(tsk);
1179         newsock->state = SS_CONNECTED;
1180         sock_graft(tsk, newsock);
1181         unix_state_wunlock(tsk);
1182         return 0;
1183
1184 out:
1185         return err;
1186 }
1187
1188
1189 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1190 {
1191         struct sock *sk = sock->sk;
1192         struct unix_sock *u;
1193         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1194         int err = 0;
1195
1196         if (peer) {
1197                 sk = unix_peer_get(sk);
1198
1199                 err = -ENOTCONN;
1200                 if (!sk)
1201                         goto out;
1202                 err = 0;
1203         } else {
1204                 sock_hold(sk);
1205         }
1206
1207         u = unix_sk(sk);
1208         unix_state_rlock(sk);
1209         if (!u->addr) {
1210                 sunaddr->sun_family = AF_UNIX;
1211                 sunaddr->sun_path[0] = 0;
1212                 *uaddr_len = sizeof(short);
1213         } else {
1214                 struct unix_address *addr = u->addr;
1215
1216                 *uaddr_len = addr->len;
1217                 memcpy(sunaddr, addr->name, *uaddr_len);
1218         }
1219         unix_state_runlock(sk);
1220         sock_put(sk);
1221 out:
1222         return err;
1223 }
1224
1225 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1226 {
1227         int i;
1228
1229         scm->fp = UNIXCB(skb).fp;
1230         skb->destructor = sock_wfree;
1231         UNIXCB(skb).fp = NULL;
1232
1233         for (i=scm->fp->count-1; i>=0; i--)
1234                 unix_notinflight(scm->fp->fp[i]);
1235 }
1236
1237 static void unix_destruct_fds(struct sk_buff *skb)
1238 {
1239         struct scm_cookie scm;
1240         memset(&scm, 0, sizeof(scm));
1241         unix_detach_fds(&scm, skb);
1242
1243         /* Alas, it calls VFS */
1244         /* So fscking what? fput() had been SMP-safe since the last Summer */
1245         scm_destroy(&scm);
1246         sock_wfree(skb);
1247 }
1248
1249 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1250 {
1251         int i;
1252         for (i=scm->fp->count-1; i>=0; i--)
1253                 unix_inflight(scm->fp->fp[i]);
1254         UNIXCB(skb).fp = scm->fp;
1255         skb->destructor = unix_destruct_fds;
1256         scm->fp = NULL;
1257 }
1258
1259 /*
1260  *      Send AF_UNIX data.
1261  */
1262
1263 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1264                               struct msghdr *msg, size_t len)
1265 {
1266         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1267         struct sock *sk = sock->sk;
1268         struct unix_sock *u = unix_sk(sk);
1269         struct sockaddr_un *sunaddr=msg->msg_name;
1270         struct sock *other = NULL;
1271         int namelen = 0; /* fake GCC */
1272         int err;
1273         unsigned hash;
1274         struct sk_buff *skb;
1275         long timeo;
1276         struct scm_cookie tmp_scm;
1277
1278         if (NULL == siocb->scm)
1279                 siocb->scm = &tmp_scm;
1280         err = scm_send(sock, msg, siocb->scm);
1281         if (err < 0)
1282                 return err;
1283
1284         err = -EOPNOTSUPP;
1285         if (msg->msg_flags&MSG_OOB)
1286                 goto out;
1287
1288         if (msg->msg_namelen) {
1289                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1290                 if (err < 0)
1291                         goto out;
1292                 namelen = err;
1293         } else {
1294                 sunaddr = NULL;
1295                 err = -ENOTCONN;
1296                 other = unix_peer_get(sk);
1297                 if (!other)
1298                         goto out;
1299         }
1300
1301         if (test_bit(SOCK_PASS_CRED, &sock->flags)
1302                 && !u->addr && (err = unix_autobind(sock)) != 0)
1303                 goto out;
1304
1305         err = -EMSGSIZE;
1306         if (len > sk->sk_sndbuf - 32)
1307                 goto out;
1308
1309         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1310         if (skb==NULL)
1311                 goto out;
1312
1313         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1314         if (siocb->scm->fp)
1315                 unix_attach_fds(siocb->scm, skb);
1316
1317         skb->h.raw = skb->data;
1318         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1319         if (err)
1320                 goto out_free;
1321
1322         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1323
1324 restart:
1325         if (!other) {
1326                 err = -ECONNRESET;
1327                 if (sunaddr == NULL)
1328                         goto out_free;
1329
1330                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1331                                         hash, &err);
1332                 if (other==NULL)
1333                         goto out_free;
1334         }
1335
1336         unix_state_rlock(other);
1337         err = -EPERM;
1338         if (!unix_may_send(sk, other))
1339                 goto out_unlock;
1340
1341         if (sock_flag(other, SOCK_DEAD)) {
1342                 /*
1343                  *      Check with 1003.1g - what should
1344                  *      datagram error
1345                  */
1346                 unix_state_runlock(other);
1347                 sock_put(other);
1348
1349                 err = 0;
1350                 unix_state_wlock(sk);
1351                 if (unix_peer(sk) == other) {
1352                         unix_peer(sk)=NULL;
1353                         unix_state_wunlock(sk);
1354
1355                         unix_dgram_disconnected(sk, other);
1356                         sock_put(other);
1357                         err = -ECONNREFUSED;
1358                 } else {
1359                         unix_state_wunlock(sk);
1360                 }
1361
1362                 other = NULL;
1363                 if (err)
1364                         goto out_free;
1365                 goto restart;
1366         }
1367
1368         err = -EPIPE;
1369         if (other->sk_shutdown & RCV_SHUTDOWN)
1370                 goto out_unlock;
1371
1372         if (sk->sk_type != SOCK_SEQPACKET) {
1373                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1374                 if (err)
1375                         goto out_unlock;
1376         }
1377
1378         if (unix_peer(other) != sk &&
1379             (skb_queue_len(&other->sk_receive_queue) >
1380              other->sk_max_ack_backlog)) {
1381                 if (!timeo) {
1382                         err = -EAGAIN;
1383                         goto out_unlock;
1384                 }
1385
1386                 timeo = unix_wait_for_peer(other, timeo);
1387
1388                 err = sock_intr_errno(timeo);
1389                 if (signal_pending(current))
1390                         goto out_free;
1391
1392                 goto restart;
1393         }
1394
1395         skb_queue_tail(&other->sk_receive_queue, skb);
1396         unix_state_runlock(other);
1397         other->sk_data_ready(other, len);
1398         sock_put(other);
1399         scm_destroy(siocb->scm);
1400         return len;
1401
1402 out_unlock:
1403         unix_state_runlock(other);
1404 out_free:
1405         kfree_skb(skb);
1406 out:
1407         if (other)
1408                 sock_put(other);
1409         scm_destroy(siocb->scm);
1410         return err;
1411 }
1412
1413                 
1414 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1415                                struct msghdr *msg, size_t len)
1416 {
1417         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1418         struct sock *sk = sock->sk;
1419         struct sock *other = NULL;
1420         struct sockaddr_un *sunaddr=msg->msg_name;
1421         int err,size;
1422         struct sk_buff *skb;
1423         int sent=0;
1424         struct scm_cookie tmp_scm;
1425
1426         if (NULL == siocb->scm)
1427                 siocb->scm = &tmp_scm;
1428         err = scm_send(sock, msg, siocb->scm);
1429         if (err < 0)
1430                 return err;
1431
1432         err = -EOPNOTSUPP;
1433         if (msg->msg_flags&MSG_OOB)
1434                 goto out_err;
1435
1436         if (msg->msg_namelen) {
1437                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1438                 goto out_err;
1439         } else {
1440                 sunaddr = NULL;
1441                 err = -ENOTCONN;
1442                 other = unix_peer_get(sk);
1443                 if (!other)
1444                         goto out_err;
1445         }
1446
1447         if (sk->sk_shutdown & SEND_SHUTDOWN)
1448                 goto pipe_err;
1449
1450         while(sent < len)
1451         {
1452                 /*
1453                  *      Optimisation for the fact that under 0.01% of X messages typically
1454                  *      need breaking up.
1455                  */
1456
1457                 size=len-sent;
1458
1459                 /* Keep two messages in the pipe so it schedules better */
1460                 if (size > sk->sk_sndbuf / 2 - 64)
1461                         size = sk->sk_sndbuf / 2 - 64;
1462
1463                 if (size > SKB_MAX_ALLOC)
1464                         size = SKB_MAX_ALLOC;
1465                         
1466                 /*
1467                  *      Grab a buffer
1468                  */
1469                  
1470                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1471
1472                 if (skb==NULL)
1473                         goto out_err;
1474
1475                 /*
1476                  *      If you pass two values to the sock_alloc_send_skb
1477                  *      it tries to grab the large buffer with GFP_NOFS
1478                  *      (which can fail easily), and if it fails grab the
1479                  *      fallback size buffer which is under a page and will
1480                  *      succeed. [Alan]
1481                  */
1482                 size = min_t(int, size, skb_tailroom(skb));
1483
1484                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1485                 if (siocb->scm->fp)
1486                         unix_attach_fds(siocb->scm, skb);
1487
1488                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1489                         kfree_skb(skb);
1490                         goto out_err;
1491                 }
1492
1493                 unix_state_rlock(other);
1494
1495                 if (sock_flag(other, SOCK_DEAD) ||
1496                     (other->sk_shutdown & RCV_SHUTDOWN))
1497                         goto pipe_err_free;
1498
1499                 skb_queue_tail(&other->sk_receive_queue, skb);
1500                 unix_state_runlock(other);
1501                 other->sk_data_ready(other, size);
1502                 sent+=size;
1503         }
1504         sock_put(other);
1505
1506         scm_destroy(siocb->scm);
1507         siocb->scm = NULL;
1508
1509         return sent;
1510
1511 pipe_err_free:
1512         unix_state_runlock(other);
1513         kfree_skb(skb);
1514 pipe_err:
1515         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1516                 send_sig(SIGPIPE,current,0);
1517         err = -EPIPE;
1518 out_err:
1519         if (other)
1520                 sock_put(other);
1521         scm_destroy(siocb->scm);
1522         siocb->scm = NULL;
1523         return sent ? : err;
1524 }
1525
1526 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1527                                   struct msghdr *msg, size_t len)
1528 {
1529         int err;
1530         struct sock *sk = sock->sk;
1531         
1532         err = sock_error(sk);
1533         if (err)
1534                 return err;
1535
1536         if (sk->sk_state != TCP_ESTABLISHED)
1537                 return -ENOTCONN;
1538
1539         if (msg->msg_namelen)
1540                 msg->msg_namelen = 0;
1541
1542         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1543 }
1544                                                                                             
1545 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1546 {
1547         struct unix_sock *u = unix_sk(sk);
1548
1549         msg->msg_namelen = 0;
1550         if (u->addr) {
1551                 msg->msg_namelen = u->addr->len;
1552                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1553         }
1554 }
1555
1556 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1557                               struct msghdr *msg, size_t size,
1558                               int flags)
1559 {
1560         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1561         struct scm_cookie tmp_scm;
1562         struct sock *sk = sock->sk;
1563         struct unix_sock *u = unix_sk(sk);
1564         int noblock = flags & MSG_DONTWAIT;
1565         struct sk_buff *skb;
1566         int err;
1567
1568         err = -EOPNOTSUPP;
1569         if (flags&MSG_OOB)
1570                 goto out;
1571
1572         msg->msg_namelen = 0;
1573
1574         down(&u->readsem);
1575
1576         skb = skb_recv_datagram(sk, flags, noblock, &err);
1577         if (!skb)
1578                 goto out_unlock;
1579
1580         wake_up_interruptible(&u->peer_wait);
1581
1582         if (msg->msg_name)
1583                 unix_copy_addr(msg, skb->sk);
1584
1585         if (size > skb->len)
1586                 size = skb->len;
1587         else if (size < skb->len)
1588                 msg->msg_flags |= MSG_TRUNC;
1589
1590         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1591         if (err)
1592                 goto out_free;
1593
1594         if (!siocb->scm) {
1595                 siocb->scm = &tmp_scm;
1596                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1597         }
1598         siocb->scm->creds = *UNIXCREDS(skb);
1599
1600         if (!(flags & MSG_PEEK))
1601         {
1602                 if (UNIXCB(skb).fp)
1603                         unix_detach_fds(siocb->scm, skb);
1604         }
1605         else 
1606         {
1607                 /* It is questionable: on PEEK we could:
1608                    - do not return fds - good, but too simple 8)
1609                    - return fds, and do not return them on read (old strategy,
1610                      apparently wrong)
1611                    - clone fds (I chose it for now, it is the most universal
1612                      solution)
1613                 
1614                    POSIX 1003.1g does not actually define this clearly
1615                    at all. POSIX 1003.1g doesn't define a lot of things
1616                    clearly however!                  
1617                    
1618                 */
1619                 if (UNIXCB(skb).fp)
1620                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1621         }
1622         err = size;
1623
1624         scm_recv(sock, msg, siocb->scm, flags);
1625
1626 out_free:
1627         skb_free_datagram(sk,skb);
1628 out_unlock:
1629         up(&u->readsem);
1630 out:
1631         return err;
1632 }
1633
1634 /*
1635  *      Sleep until data has arrive. But check for races..
1636  */
1637  
1638 static long unix_stream_data_wait(struct sock * sk, long timeo)
1639 {
1640         DEFINE_WAIT(wait);
1641
1642         unix_state_rlock(sk);
1643
1644         for (;;) {
1645                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1646
1647                 if (skb_queue_len(&sk->sk_receive_queue) ||
1648                     sk->sk_err ||
1649                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1650                     signal_pending(current) ||
1651                     !timeo)
1652                         break;
1653
1654                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1655                 unix_state_runlock(sk);
1656                 timeo = schedule_timeout(timeo);
1657                 unix_state_rlock(sk);
1658                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1659         }
1660
1661         finish_wait(sk->sk_sleep, &wait);
1662         unix_state_runlock(sk);
1663         return timeo;
1664 }
1665
1666
1667
1668 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1669                                struct msghdr *msg, size_t size,
1670                                int flags)
1671 {
1672         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1673         struct scm_cookie tmp_scm;
1674         struct sock *sk = sock->sk;
1675         struct unix_sock *u = unix_sk(sk);
1676         struct sockaddr_un *sunaddr=msg->msg_name;
1677         int copied = 0;
1678         int check_creds = 0;
1679         int target;
1680         int err = 0;
1681         long timeo;
1682
1683         err = -EINVAL;
1684         if (sk->sk_state != TCP_ESTABLISHED)
1685                 goto out;
1686
1687         err = -EOPNOTSUPP;
1688         if (flags&MSG_OOB)
1689                 goto out;
1690
1691         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1692         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1693
1694         msg->msg_namelen = 0;
1695
1696         /* Lock the socket to prevent queue disordering
1697          * while sleeps in memcpy_tomsg
1698          */
1699
1700         if (!siocb->scm) {
1701                 siocb->scm = &tmp_scm;
1702                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1703         }
1704
1705         down(&u->readsem);
1706
1707         do
1708         {
1709                 int chunk;
1710                 struct sk_buff *skb;
1711
1712                 skb = skb_dequeue(&sk->sk_receive_queue);
1713                 if (skb==NULL)
1714                 {
1715                         if (copied >= target)
1716                                 break;
1717
1718                         /*
1719                          *      POSIX 1003.1g mandates this order.
1720                          */
1721                          
1722                         if ((err = sock_error(sk)) != 0)
1723                                 break;
1724                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1725                                 break;
1726                         err = -EAGAIN;
1727                         if (!timeo)
1728                                 break;
1729                         up(&u->readsem);
1730
1731                         timeo = unix_stream_data_wait(sk, timeo);
1732
1733                         if (signal_pending(current)) {
1734                                 err = sock_intr_errno(timeo);
1735                                 goto out;
1736                         }
1737                         down(&u->readsem);
1738                         continue;
1739                 }
1740
1741                 if (check_creds) {
1742                         /* Never glue messages from different writers */
1743                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1744                                 skb_queue_head(&sk->sk_receive_queue, skb);
1745                                 break;
1746                         }
1747                 } else {
1748                         /* Copy credentials */
1749                         siocb->scm->creds = *UNIXCREDS(skb);
1750                         check_creds = 1;
1751                 }
1752
1753                 /* Copy address just once */
1754                 if (sunaddr)
1755                 {
1756                         unix_copy_addr(msg, skb->sk);
1757                         sunaddr = NULL;
1758                 }
1759
1760                 chunk = min_t(unsigned int, skb->len, size);
1761                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1762                         skb_queue_head(&sk->sk_receive_queue, skb);
1763                         if (copied == 0)
1764                                 copied = -EFAULT;
1765                         break;
1766                 }
1767                 copied += chunk;
1768                 size -= chunk;
1769
1770                 /* Mark read part of skb as used */
1771                 if (!(flags & MSG_PEEK))
1772                 {
1773                         skb_pull(skb, chunk);
1774
1775                         if (UNIXCB(skb).fp)
1776                                 unix_detach_fds(siocb->scm, skb);
1777
1778                         /* put the skb back if we didn't use it up.. */
1779                         if (skb->len)
1780                         {
1781                                 skb_queue_head(&sk->sk_receive_queue, skb);
1782                                 break;
1783                         }
1784
1785                         kfree_skb(skb);
1786
1787                         if (siocb->scm->fp)
1788                                 break;
1789                 }
1790                 else
1791                 {
1792                         /* It is questionable, see note in unix_dgram_recvmsg.
1793                          */
1794                         if (UNIXCB(skb).fp)
1795                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1796
1797                         /* put message back and return */
1798                         skb_queue_head(&sk->sk_receive_queue, skb);
1799                         break;
1800                 }
1801         } while (size);
1802
1803         up(&u->readsem);
1804         scm_recv(sock, msg, siocb->scm, flags);
1805 out:
1806         return copied ? : err;
1807 }
1808
1809 static int unix_shutdown(struct socket *sock, int mode)
1810 {
1811         struct sock *sk = sock->sk;
1812         struct sock *other;
1813
1814         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1815
1816         if (mode) {
1817                 unix_state_wlock(sk);
1818                 sk->sk_shutdown |= mode;
1819                 other=unix_peer(sk);
1820                 if (other)
1821                         sock_hold(other);
1822                 unix_state_wunlock(sk);
1823                 sk->sk_state_change(sk);
1824
1825                 if (other &&
1826                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1827
1828                         int peer_mode = 0;
1829
1830                         if (mode&RCV_SHUTDOWN)
1831                                 peer_mode |= SEND_SHUTDOWN;
1832                         if (mode&SEND_SHUTDOWN)
1833                                 peer_mode |= RCV_SHUTDOWN;
1834                         unix_state_wlock(other);
1835                         other->sk_shutdown |= peer_mode;
1836                         unix_state_wunlock(other);
1837                         other->sk_state_change(other);
1838                         read_lock(&other->sk_callback_lock);
1839                         if (peer_mode == SHUTDOWN_MASK)
1840                                 sk_wake_async(other,1,POLL_HUP);
1841                         else if (peer_mode & RCV_SHUTDOWN)
1842                                 sk_wake_async(other,1,POLL_IN);
1843                         read_unlock(&other->sk_callback_lock);
1844                 }
1845                 if (other)
1846                         sock_put(other);
1847         }
1848         return 0;
1849 }
1850
1851 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1852 {
1853         struct sock *sk = sock->sk;
1854         long amount=0;
1855         int err;
1856
1857         switch(cmd)
1858         {
1859                 case SIOCOUTQ:
1860                         amount = atomic_read(&sk->sk_wmem_alloc);
1861                         err = put_user(amount, (int __user *)arg);
1862                         break;
1863                 case SIOCINQ:
1864                 {
1865                         struct sk_buff *skb;
1866                         if (sk->sk_state == TCP_LISTEN) {
1867                                 err = -EINVAL;
1868                                 break;
1869                         }
1870
1871                         spin_lock(&sk->sk_receive_queue.lock);
1872                         skb = skb_peek(&sk->sk_receive_queue);
1873                         if (skb)
1874                                 amount=skb->len;
1875                         spin_unlock(&sk->sk_receive_queue.lock);
1876                         err = put_user(amount, (int __user *)arg);
1877                         break;
1878                 }
1879
1880                 default:
1881                         err = dev_ioctl(cmd, (void __user *)arg);
1882                         break;
1883         }
1884         return err;
1885 }
1886
1887 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1888 {
1889         struct sock *sk = sock->sk;
1890         unsigned int mask;
1891
1892         poll_wait(file, sk->sk_sleep, wait);
1893         mask = 0;
1894
1895         /* exceptional events? */
1896         if (sk->sk_err)
1897                 mask |= POLLERR;
1898         if (sk->sk_shutdown == SHUTDOWN_MASK)
1899                 mask |= POLLHUP;
1900
1901         /* readable? */
1902         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1903             (sk->sk_shutdown & RCV_SHUTDOWN))
1904                 mask |= POLLIN | POLLRDNORM;
1905
1906         /* Connection-based need to check for termination and startup */
1907         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1908                 mask |= POLLHUP;
1909
1910         /*
1911          * we set writable also when the other side has shut down the
1912          * connection. This prevents stuck sockets.
1913          */
1914         if (unix_writable(sk))
1915                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1916
1917         return mask;
1918 }
1919
1920
1921 #ifdef CONFIG_PROC_FS
1922 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1923 {
1924         loff_t off = 0;
1925         struct sock *s;
1926
1927         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1928                 if (off == pos) 
1929                         return s;
1930                 ++off;
1931         }
1932         return NULL;
1933 }
1934
1935
1936 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1937 {
1938         read_lock(&unix_table_lock);
1939         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1940 }
1941
1942 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1943 {
1944         ++*pos;
1945
1946         if (v == (void *)1) 
1947                 return first_unix_socket(seq->private);
1948         return next_unix_socket(seq->private, v);
1949 }
1950
1951 static void unix_seq_stop(struct seq_file *seq, void *v)
1952 {
1953         read_unlock(&unix_table_lock);
1954 }
1955
1956 static int unix_seq_show(struct seq_file *seq, void *v)
1957 {
1958         
1959         if (v == (void *)1)
1960                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1961                          "Inode Path\n");
1962         else {
1963                 struct sock *s = v;
1964                 struct unix_sock *u = unix_sk(s);
1965                 unix_state_rlock(s);
1966
1967                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1968                         s,
1969                         atomic_read(&s->sk_refcnt),
1970                         0,
1971                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1972                         s->sk_type,
1973                         s->sk_socket ?
1974                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1975                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1976                         sock_i_ino(s));
1977
1978                 if (u->addr) {
1979                         int i, len;
1980                         seq_putc(seq, ' ');
1981
1982                         i = 0;
1983                         len = u->addr->len - sizeof(short);
1984                         if (!UNIX_ABSTRACT(s))
1985                                 len--;
1986                         else {
1987                                 seq_putc(seq, '@');
1988                                 i++;
1989                         }
1990                         for ( ; i < len; i++)
1991                                 seq_putc(seq, u->addr->name->sun_path[i]);
1992                 }
1993                 unix_state_runlock(s);
1994                 seq_putc(seq, '\n');
1995         }
1996
1997         return 0;
1998 }
1999
2000 static struct seq_operations unix_seq_ops = {
2001         .start  = unix_seq_start,
2002         .next   = unix_seq_next,
2003         .stop   = unix_seq_stop,
2004         .show   = unix_seq_show,
2005 };
2006
2007
2008 static int unix_seq_open(struct inode *inode, struct file *file)
2009 {
2010         struct seq_file *seq;
2011         int rc = -ENOMEM;
2012         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2013
2014         if (!iter)
2015                 goto out;
2016
2017         rc = seq_open(file, &unix_seq_ops);
2018         if (rc)
2019                 goto out_kfree;
2020
2021         seq          = file->private_data;
2022         seq->private = iter;
2023         *iter = 0;
2024 out:
2025         return rc;
2026 out_kfree:
2027         kfree(iter);
2028         goto out;
2029 }
2030
2031 static struct file_operations unix_seq_fops = {
2032         .owner          = THIS_MODULE,
2033         .open           = unix_seq_open,
2034         .read           = seq_read,
2035         .llseek         = seq_lseek,
2036         .release        = seq_release_private,
2037 };
2038
2039 #endif
2040
2041 static struct net_proto_family unix_family_ops = {
2042         .family = PF_UNIX,
2043         .create = unix_create,
2044         .owner  = THIS_MODULE,
2045 };
2046
2047 #ifdef CONFIG_SYSCTL
2048 extern void unix_sysctl_register(void);
2049 extern void unix_sysctl_unregister(void);
2050 #else
2051 static inline void unix_sysctl_register(void) {}
2052 static inline void unix_sysctl_unregister(void) {}
2053 #endif
2054
2055 static int __init af_unix_init(void)
2056 {
2057         struct sk_buff *dummy_skb;
2058
2059         if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2060                 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2061                 return -1;
2062         }
2063         /* allocate our sock slab cache */
2064         unix_sk_cachep = kmem_cache_create("unix_sock",
2065                                            sizeof(struct unix_sock), 0,
2066                                            SLAB_HWCACHE_ALIGN, NULL, NULL);
2067         if (!unix_sk_cachep)
2068                 printk(KERN_CRIT
2069                         "af_unix_init: Cannot create unix_sock SLAB cache!\n");
2070
2071         sock_register(&unix_family_ops);
2072 #ifdef CONFIG_PROC_FS
2073         proc_net_fops_create("unix", 0, &unix_seq_fops);
2074 #endif
2075         unix_sysctl_register();
2076         return 0;
2077 }
2078
2079 static void __exit af_unix_exit(void)
2080 {
2081         sock_unregister(PF_UNIX);
2082         unix_sysctl_unregister();
2083         proc_net_remove("unix");
2084         kmem_cache_destroy(unix_sk_cachep);
2085 }
2086
2087 module_init(af_unix_init);
2088 module_exit(af_unix_exit);
2089
2090 MODULE_LICENSE("GPL");
2091 MODULE_ALIAS_NETPROTO(PF_UNIX);