X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Funix%2Faf_unix.c;h=d2bdd01167623ab0d890cb033449d9e2fe022721;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=916b6bc4622708337c90f5a3186b2177de7fa76d;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 916b6bc46..d2bdd0116 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -85,7 +85,6 @@ #include #include #include -#include #include #include #include @@ -106,7 +105,7 @@ #include #include #include -#include +#include #include #include #include @@ -124,10 +123,8 @@ int sysctl_unix_max_dgram_qlen = 10; -static kmem_cache_t *unix_sk_cachep; - struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; -DEFINE_RWLOCK(unix_table_lock); +DEFINE_SPINLOCK(unix_table_lock); static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) @@ -136,7 +133,7 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); /* * SMP locking strategy: - * hash table is protected with rwlock unix_table_lock + * hash table is protected with spinlock unix_table_lock * each socket state is protected by separate rwlock. */ @@ -191,6 +188,13 @@ static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp) if (!sunaddr || sunaddr->sun_family != AF_UNIX) return -EINVAL; if (sunaddr->sun_path[0]) { + /* + * This may look like an off by one error but it is a bit more + * subtle. 108 is the longest valid AF_UNIX path for a binding. + * sun_path[108] doesnt as such exist. However in kernel space + * we are guaranteed that it is a valid memory location in our + * kernel address buffer. + */ ((char *)sunaddr)[len]=0; len = strlen(sunaddr->sun_path)+1+sizeof(short); return len; @@ -213,16 +217,16 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) static inline void unix_remove_socket(struct sock *sk) { - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); __unix_remove_socket(sk); - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); } static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) { - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); __unix_insert_socket(list, sk); - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); } static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, @@ -234,6 +238,8 @@ static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); + if (!vx_check(s->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) goto found; @@ -249,11 +255,11 @@ static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, { struct sock *s; - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); s = __unix_find_socket_byname(sunname, len, type, hash); if (s) sock_hold(s); - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); return s; } @@ -262,7 +268,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) struct sock *s; struct hlist_node *node; - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); sk_for_each(s, node, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; @@ -275,7 +281,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) } s = NULL; found: - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); return s; } @@ -301,7 +307,7 @@ static void unix_write_space(struct sock *sk) * may receive messages only from that peer. */ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) { - if (skb_queue_len(&sk->sk_receive_queue)) { + if (!skb_queue_empty(&sk->sk_receive_queue)) { skb_queue_purge(&sk->sk_receive_queue); wake_up_interruptible_all(&unix_sk(sk)->peer_wait); @@ -472,7 +478,7 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *, static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); -static struct proto_ops unix_stream_ops = { +static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, @@ -493,7 +499,7 @@ static struct proto_ops unix_stream_ops = { .sendpage = sock_no_sendpage, }; -static struct proto_ops unix_dgram_ops = { +static const struct proto_ops unix_dgram_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, @@ -514,7 +520,7 @@ static struct proto_ops unix_dgram_ops = { .sendpage = sock_no_sendpage, }; -static struct proto_ops unix_seqpacket_ops = { +static const struct proto_ops unix_seqpacket_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, @@ -535,23 +541,27 @@ static struct proto_ops unix_seqpacket_ops = { .sendpage = sock_no_sendpage, }; +static struct proto unix_proto = { + .name = "UNIX", + .owner = THIS_MODULE, + .obj_size = sizeof(struct unix_sock), +}; + static struct sock * unix_create1(struct socket *sock) { struct sock *sk = NULL; struct unix_sock *u; - if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files) + if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) goto out; - sk = sk_alloc(PF_UNIX, GFP_KERNEL, sizeof(struct unix_sock), - unix_sk_cachep); + sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); if (!sk) goto out; atomic_inc(&unix_nr_socks); sock_init_data(sock,sk); - sk_set_owner(sk, THIS_MODULE); sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; @@ -559,9 +569,9 @@ static struct sock * unix_create1(struct socket *sock) u = unix_sk(sk); u->dentry = NULL; u->mnt = NULL; - rwlock_init(&u->lock); + spin_lock_init(&u->lock); atomic_set(&u->inflight, sock ? 0 : -1); - init_MUTEX(&u->readsem); /* single task reading lock */ + mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); unix_insert_socket(unix_sockets_unbound, sk); out: @@ -618,7 +628,7 @@ static int unix_autobind(struct socket *sock) struct unix_address * addr; int err; - down(&u->readsem); + mutex_lock(&u->readlock); err = 0; if (u->addr) @@ -637,12 +647,12 @@ retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; if (__unix_find_socket_byname(addr->name, addr->len, sock->type, addr->hash)) { - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); /* Sanity yield. It is unusual case, but yet... */ if (!(ordernum&0xFF)) yield(); @@ -653,10 +663,10 @@ retry: __unix_remove_socket(sk); u->addr = addr; __unix_insert_socket(&unix_socket_table[addr->hash], sk); - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); err = 0; -out: up(&u->readsem); +out: mutex_unlock(&u->readlock); return err; } @@ -671,7 +681,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); if (err) goto fail; - err = permission(nd.dentry->d_inode,MAY_WRITE, &nd); + err = vfs_permission(&nd, MAY_WRITE); if (err) goto put_fail; @@ -739,7 +749,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - down(&u->readsem); + mutex_lock(&u->readlock); err = -EINVAL; if (u->addr) @@ -765,49 +775,28 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); if (err) goto out_mknod_parent; - /* - * Yucky last component or no last component at all? - * (foo/., foo/.., /////) - */ - err = -EEXIST; - if (nd.last_type != LAST_NORM) - goto out_mknod; - /* - * Lock the directory. - */ - down(&nd.dentry->d_inode->i_sem); - /* - * Do the final lookup. - */ - dentry = lookup_hash(&nd.last, nd.dentry); + + dentry = lookup_create(&nd, 0); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_mknod_unlock; - err = -ENOENT; - /* - * Special case - lookup gave negative, but... we had foo/bar/ - * From the vfs_mknod() POV we just have a negative dentry - - * all is fine. Let's be bastards - you had / on the end, you've - * been asking for (non-existent) directory. -ENOENT for you. - */ - if (nd.last.name[nd.last.len] && !dentry->d_inode) - goto out_mknod_dput; + /* * All right, let's create it. */ mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current->fs->umask); - err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); + err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0, NULL); if (err) goto out_mknod_dput; - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); dput(nd.dentry); nd.dentry = dentry; addr->hash = UNIX_HASH_SIZE; } - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); if (!sunaddr->sun_path[0]) { err = -EADDRINUSE; @@ -830,17 +819,16 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) __unix_insert_socket(list, sk); out_unlock: - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); out_up: - up(&u->readsem); + mutex_unlock(&u->readlock); out: return err; out_mknod_dput: dput(dentry); out_mknod_unlock: - up(&nd.dentry->d_inode->i_sem); -out_mknod: + mutex_unlock(&nd.dentry->d_inode->i_mutex); path_release(&nd); out_mknod_parent: if (err==-EEXIST) @@ -864,8 +852,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, goto out; alen = err; - if (test_bit(SOCK_PASS_CRED, &sock->flags) && !unix_sk(sk)->addr && - (err = unix_autobind(sock)) != 0) + if (test_bit(SOCK_PASSCRED, &sock->flags) && + !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) goto out; other=unix_find_other(sunaddr, alen, sock->type, hash, &err); @@ -955,7 +943,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; addr_len = err; - if (test_bit(SOCK_PASS_CRED, &sock->flags) + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && (err = unix_autobind(sock)) != 0) goto out; @@ -1080,10 +1068,12 @@ restart: /* Set credentials */ sk->sk_peercred = other->sk_peercred; - sock_hold(newsk); - unix_peer(sk) = newsk; sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; + sock_hold(newsk); + + smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */ + unix_peer(sk) = newsk; unix_state_wunlock(sk); @@ -1290,7 +1280,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } - if (test_bit(SOCK_PASS_CRED, &sock->flags) + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && (err = unix_autobind(sock)) != 0) goto out; @@ -1431,7 +1421,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, } else { sunaddr = NULL; err = -ENOTCONN; - other = unix_peer_get(sk); + other = unix_peer(sk); if (!other) goto out_err; } @@ -1442,15 +1432,15 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, while(sent < len) { /* - * Optimisation for the fact that under 0.01% of X messages typically - * need breaking up. + * Optimisation for the fact that under 0.01% of X + * messages typically need breaking up. */ - size=len-sent; + size = len-sent; /* Keep two messages in the pipe so it schedules better */ - if (size > sk->sk_sndbuf / 2 - 64) - size = sk->sk_sndbuf / 2 - 64; + if (size > ((sk->sk_sndbuf >> 1) - 64)) + size = (sk->sk_sndbuf >> 1) - 64; if (size > SKB_MAX_ALLOC) size = SKB_MAX_ALLOC; @@ -1493,7 +1483,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, other->sk_data_ready(other, size); sent+=size; } - sock_put(other); scm_destroy(siocb->scm); siocb->scm = NULL; @@ -1508,8 +1497,6 @@ pipe_err: send_sig(SIGPIPE,current,0); err = -EPIPE; out_err: - if (other) - sock_put(other); scm_destroy(siocb->scm); siocb->scm = NULL; return sent ? : err; @@ -1563,7 +1550,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = 0; - down(&u->readsem); + mutex_lock(&u->readlock); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -1618,7 +1605,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, out_free: skb_free_datagram(sk,skb); out_unlock: - up(&u->readsem); + mutex_unlock(&u->readlock); out: return err; } @@ -1636,7 +1623,7 @@ static long unix_stream_data_wait(struct sock * sk, long timeo) for (;;) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - if (skb_queue_len(&sk->sk_receive_queue) || + if (!skb_queue_empty(&sk->sk_receive_queue) || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current) || @@ -1694,7 +1681,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, memset(&tmp_scm, 0, sizeof(tmp_scm)); } - down(&u->readsem); + mutex_lock(&u->readlock); do { @@ -1718,7 +1705,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, err = -EAGAIN; if (!timeo) break; - up(&u->readsem); + mutex_unlock(&u->readlock); timeo = unix_stream_data_wait(sk, timeo); @@ -1726,7 +1713,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, err = sock_intr_errno(timeo); goto out; } - down(&u->readsem); + mutex_lock(&u->readlock); continue; } @@ -1792,7 +1779,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } } while (size); - up(&u->readsem); + mutex_unlock(&u->readlock); scm_recv(sock, msg, siocb->scm, flags); out: return copied ? : err; @@ -1877,7 +1864,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } default: - err = dev_ioctl(cmd, (void __user *)arg); + err = -ENOIOCTLCMD; break; } return err; @@ -1896,6 +1883,8 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl mask |= POLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP; /* readable? */ if (!skb_queue_empty(&sk->sk_receive_queue) || @@ -1934,7 +1923,7 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos) static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); } @@ -1949,7 +1938,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void unix_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); } static int unix_seq_show(struct seq_file *seq, void *v) @@ -2043,36 +2032,30 @@ static struct net_proto_family unix_family_ops = { .owner = THIS_MODULE, }; -#ifdef CONFIG_SYSCTL -extern void unix_sysctl_register(void); -extern void unix_sysctl_unregister(void); -#else -static inline void unix_sysctl_register(void) {} -static inline void unix_sysctl_unregister(void) {} -#endif - static int __init af_unix_init(void) { + int rc = -1; struct sk_buff *dummy_skb; if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { printk(KERN_CRIT "%s: panic\n", __FUNCTION__); - return -1; + goto out; + } + + rc = proto_register(&unix_proto, 1); + if (rc != 0) { + printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n", + __FUNCTION__); + goto out; } - /* allocate our sock slab cache */ - unix_sk_cachep = kmem_cache_create("unix_sock", - sizeof(struct unix_sock), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!unix_sk_cachep) - printk(KERN_CRIT - "af_unix_init: Cannot create unix_sock SLAB cache!\n"); sock_register(&unix_family_ops); #ifdef CONFIG_PROC_FS proc_net_fops_create("unix", 0, &unix_seq_fops); #endif unix_sysctl_register(); - return 0; +out: + return rc; } static void __exit af_unix_exit(void) @@ -2080,7 +2063,7 @@ static void __exit af_unix_exit(void) sock_unregister(PF_UNIX); unix_sysctl_unregister(); proc_net_remove("unix"); - kmem_cache_destroy(unix_sk_cachep); + proto_unregister(&unix_proto); } module_init(af_unix_init);