vserver 1.9.5.x5
[linux-2.6.git] / net / ipv4 / af_inet.c
index c4a967b..e51804d 100644 (file)
@@ -122,22 +122,17 @@ atomic_t inet_sock_nr;
 
 extern void ip_mc_drop_socket(struct sock *sk);
 
-/* Per protocol sock slabcache */
-kmem_cache_t *tcp_sk_cachep;
-static kmem_cache_t *udp_sk_cachep;
-static kmem_cache_t *raw4_sk_cachep;
-
 /* The inetsw table contains everything that inet_create needs to
  * build a new socket.
  */
 static struct list_head inetsw[SOCK_MAX];
-static spinlock_t inetsw_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(inetsw_lock);
 
 /* New destruction routine */
 
 void inet_sock_destruct(struct sock *sk)
 {
-       struct inet_opt *inet = inet_sk(sk);
+       struct inet_sock *inet = inet_sk(sk);
 
        __skb_queue_purge(&sk->sk_receive_queue);
        __skb_queue_purge(&sk->sk_error_queue);
@@ -159,13 +154,6 @@ void inet_sock_destruct(struct sock *sk)
 
        if (inet->opt)
                kfree(inet->opt);
-       
-       vx_sock_dec(sk);
-       clr_vx_info(&sk->sk_vx_info);
-       sk->sk_xid = -1;
-       clr_nx_info(&sk->sk_nx_info);
-       sk->sk_nid = -1;
-
        dst_release(sk->sk_dst_cache);
 #ifdef INET_REFCNT_DEBUG
        atomic_dec(&inet_sock_nr);
@@ -186,7 +174,7 @@ void inet_sock_destruct(struct sock *sk)
 
 static int inet_autobind(struct sock *sk)
 {
-       struct inet_opt *inet;
+       struct inet_sock *inet;
        /* We may need to bind the socket. */
        lock_sock(sk);
        inet = inet_sk(sk);
@@ -236,28 +224,6 @@ out:
        return err;
 }
 
-static __inline__ kmem_cache_t *inet_sk_slab(int protocol)
-{
-       kmem_cache_t* rc = tcp_sk_cachep;
-
-       if (protocol == IPPROTO_UDP)
-               rc = udp_sk_cachep;
-       else if (protocol == IPPROTO_RAW)
-               rc = raw4_sk_cachep;
-       return rc;
-}
-
-static __inline__ int inet_sk_size(int protocol)
-{
-       int rc = sizeof(struct tcp_sock);
-
-       if (protocol == IPPROTO_UDP)
-               rc = sizeof(struct udp_sock);
-       else if (protocol == IPPROTO_RAW)
-               rc = sizeof(struct raw_sock);
-       return rc;
-}
-
 /*
  *     Create an inet socket.
  */
@@ -267,14 +233,13 @@ static int inet_create(struct socket *sock, int protocol)
        struct sock *sk;
        struct list_head *p;
        struct inet_protosw *answer;
-       struct inet_opt *inet;
-       int err = -ENOBUFS;
+       struct inet_sock *inet;
+       struct proto *answer_prot;
+       unsigned char answer_flags;
+       char answer_no_check;
+       int err;
 
        sock->state = SS_UNCONNECTED;
-       sk = sk_alloc(PF_INET, GFP_KERNEL, inet_sk_size(protocol),
-                     inet_sk_slab(protocol));
-       if (!sk)
-               goto out;
 
        /* Look for the requested type/protocol pair. */
        answer = NULL;
@@ -300,24 +265,38 @@ static int inet_create(struct socket *sock, int protocol)
 
        err = -ESOCKTNOSUPPORT;
        if (!answer)
-               goto out_sk_free;
+               goto out_rcu_unlock;
        err = -EPERM;
        if ((protocol == IPPROTO_ICMP) && vx_ccaps(VXC_RAW_ICMP))
                goto override;
        if (answer->capability > 0 && !capable(answer->capability))
-               goto out_sk_free;
+               goto out_rcu_unlock;
 override:
        err = -EPROTONOSUPPORT;
        if (!protocol)
-               goto out_sk_free;
-       err = 0;
+               goto out_rcu_unlock;
+
        sock->ops = answer->ops;
-       sk->sk_prot = answer->prot;
-       sk->sk_no_check = answer->no_check;
-       if (INET_PROTOSW_REUSE & answer->flags)
-               sk->sk_reuse = 1;
+       answer_prot = answer->prot;
+       answer_no_check = answer->no_check;
+       answer_flags = answer->flags;
        rcu_read_unlock();
 
+       BUG_TRAP(answer_prot->slab != NULL);
+
+       err = -ENOBUFS;
+       sk = sk_alloc(PF_INET, GFP_KERNEL,
+                     answer_prot->slab_obj_size,
+                     answer_prot->slab);
+       if (sk == NULL)
+               goto out;
+
+       err = 0;
+       sk->sk_prot = answer_prot;
+       sk->sk_no_check = answer_no_check;
+       if (INET_PROTOSW_REUSE & answer_flags)
+               sk->sk_reuse = 1;
+
        inet = inet_sk(sk);
 
        if (SOCK_RAW == sock->type) {
@@ -334,19 +313,12 @@ override:
        inet->id = 0;
 
        sock_init_data(sock, sk);
-       sk_set_owner(sk, THIS_MODULE);
+       sk_set_owner(sk, sk->sk_prot->owner);
 
        sk->sk_destruct    = inet_sock_destruct;
-       sk->sk_zapped      = 0;
        sk->sk_family      = PF_INET;
        sk->sk_protocol    = protocol;
        sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
-       
-       set_vx_info(&sk->sk_vx_info, current->vx_info);
-       sk->sk_xid = vx_current_xid();
-       vx_sock_inc(sk);
-       set_nx_info(&sk->sk_nx_info, current->nx_info);
-       sk->sk_nid = nx_current_nid();
 
        inet->uc_ttl    = -1;
        inet->mc_loop   = 1;
@@ -376,9 +348,8 @@ override:
        }
 out:
        return err;
-out_sk_free:
+out_rcu_unlock:
        rcu_read_unlock();
-       sk_free(sk);
        goto out;
 }
 
@@ -410,11 +381,6 @@ int inet_release(struct socket *sock)
                    !(current->flags & PF_EXITING))
                        timeout = sk->sk_lingertime;
                sock->sk = NULL;
-               vx_sock_dec(sk);
-               clr_vx_info(&sk->sk_vx_info);
-       sk->sk_xid = -1;
-               clr_nx_info(&sk->sk_nx_info);
-       sk->sk_nid = -1;
                sk->sk_prot->close(sk, timeout);
        }
        return 0;
@@ -427,13 +393,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
        struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
        struct sock *sk = sock->sk;
-       struct inet_opt *inet = inet_sk(sk);
+       struct inet_sock *inet = inet_sk(sk);
        unsigned short snum;
        int chk_addr_ret;
        int err;
        __u32 s_addr;   /* Address used for validation */
-       __u32 s_addr1;
-       __u32 s_addr2 = 0xffffffffl;    /* Optional address of the socket */
+       __u32 s_addr1;  /* Address used for socket */
+       __u32 s_addr2;  /* Broadcast address for the socket */
        struct nx_info *nxi = sk->sk_nx_info;
 
        /* If the socket has its own bind function then use it. (RAW) */
@@ -445,38 +411,41 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        if (addr_len < sizeof(struct sockaddr_in))
                goto out;
 
-       s_addr = s_addr1 = addr->sin_addr.s_addr;
+       s_addr = addr->sin_addr.s_addr;
+       s_addr1 = s_addr;
+       s_addr2 = 0xffffffffl;
+
        vxdprintk(VXD_CBIT(net, 3),
-               "inet_bind(%p) %p,%p;%lx",
+               "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d",
                sk, sk->sk_nx_info, sk->sk_socket,
-               (sk->sk_socket?sk->sk_socket->flags:0));
+               (sk->sk_socket?sk->sk_socket->flags:0),
+               VXD_QUAD(s_addr));
        if (nxi) {
                __u32 v4_bcast = nxi->v4_bcast;
                __u32 ipv4root = nxi->ipv4[0];
                int nbipv4 = nxi->nbipv4;
+
                if (s_addr == 0) {
+                       /* bind to any for 1-n */
                        s_addr = ipv4root;
-                       if (nbipv4 > 1)
-                               s_addr1 = 0;
-                       else {
-                               s_addr1 = ipv4root;
-                       }
+                       s_addr1 = (nbipv4 > 1) ? 0 : s_addr;
                        s_addr2 = v4_bcast;
                } else if (s_addr == 0x0100007f) {
-                       s_addr = s_addr1 = ipv4root;
+                       /* rewrite localhost to ipv4root */
+                       s_addr = ipv4root;
+                       s_addr1 = ipv4root;
                } else if (s_addr != v4_bcast) {
-                       int i;
-                       for (i=0; i<nbipv4; i++) {
-                               if (s_addr == nxi->ipv4[i])
-                                       break;
-                       }
-                       if (i == nbipv4) {
+                       /* normal address bind */
+                       if (!addr_in_nx_info(nxi, s_addr))
                                return -EADDRNOTAVAIL;
-                       }
                }
        }
        chk_addr_ret = inet_addr_type(s_addr);
 
+       vxdprintk(VXD_CBIT(net, 3),
+               "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d",
+               sk, VXD_QUAD(s_addr), VXD_QUAD(s_addr1), VXD_QUAD(s_addr2));
+
        /* Not specified by any standard per-se, however it breaks too
         * many applications when removed.  It is unfortunate since
         * allowing applications to make a non-local bind solves
@@ -696,7 +665,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
                        int *uaddr_len, int peer)
 {
        struct sock *sk         = sock->sk;
-       struct inet_opt *inet   = inet_sk(sk);
+       struct inet_sock *inet  = inet_sk(sk);
        struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 
        sin->sin_family = AF_INET;
@@ -732,7 +701,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 }
 
 
-ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
 {
        struct sock *sk = sock->sk;
 
@@ -874,6 +843,31 @@ struct proto_ops inet_stream_ops = {
 };
 
 struct proto_ops inet_dgram_ops = {
+       .family =       PF_INET,
+       .owner =        THIS_MODULE,
+       .release =      inet_release,
+       .bind =         inet_bind,
+       .connect =      inet_dgram_connect,
+       .socketpair =   sock_no_socketpair,
+       .accept =       sock_no_accept,
+       .getname =      inet_getname,
+       .poll =         udp_poll,
+       .ioctl =        inet_ioctl,
+       .listen =       sock_no_listen,
+       .shutdown =     inet_shutdown,
+       .setsockopt =   sock_common_setsockopt,
+       .getsockopt =   sock_common_getsockopt,
+       .sendmsg =      inet_sendmsg,
+       .recvmsg =      sock_common_recvmsg,
+       .mmap =         sock_no_mmap,
+       .sendpage =     inet_sendpage,
+};
+
+/*
+ * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
+ * udp_poll
+ */
+static struct proto_ops inet_sockraw_ops = {
        .family =       PF_INET,
        .owner =        THIS_MODULE,
        .release =      inet_release,
@@ -894,7 +888,7 @@ struct proto_ops inet_dgram_ops = {
        .sendpage =     inet_sendpage,
 };
 
-struct net_proto_family inet_family_ops = {
+static struct net_proto_family inet_family_ops = {
        .family = PF_INET,
        .create = inet_create,
        .owner  = THIS_MODULE,
@@ -934,7 +928,7 @@ static struct inet_protosw inetsw_array[] =
                .type =       SOCK_RAW,
                .protocol =   IPPROTO_IP,       /* wild card */
                .prot =       &raw_prot,
-               .ops =        &inet_dgram_ops,
+               .ops =        &inet_sockraw_ops,
                .capability = CAP_NET_RAW,
                .no_check =   UDP_CSUM_DEFAULT,
                .flags =      INET_PROTOSW_REUSE,
@@ -1059,7 +1053,7 @@ static int __init init_ipv4_mibs(void)
        return 0;
 }
 
-int ipv4_proc_init(void);
+static int ipv4_proc_init(void);
 extern void ipfrag_init(void);
 
 static int __init inet_init(void)
@@ -1067,24 +1061,29 @@ static int __init inet_init(void)
        struct sk_buff *dummy_skb;
        struct inet_protosw *q;
        struct list_head *r;
+       int rc = -EINVAL;
 
        if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {
                printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
-               return -EINVAL;
+               goto out;
+       }
+
+       rc = sk_alloc_slab(&tcp_prot, "tcp_sock");
+       if (rc) {
+               sk_alloc_slab_error(&tcp_prot);
+               goto out;
+       }
+       rc = sk_alloc_slab(&udp_prot, "udp_sock");
+       if (rc) {
+               sk_alloc_slab_error(&udp_prot);
+               goto out_tcp_free_slab;
+       }
+       rc = sk_alloc_slab(&raw_prot, "raw_sock");
+       if (rc) {
+               sk_alloc_slab_error(&raw_prot);
+               goto out_udp_free_slab;
        }
 
-       tcp_sk_cachep = kmem_cache_create("tcp_sock",
-                                         sizeof(struct tcp_sock), 0,
-                                         SLAB_HWCACHE_ALIGN, NULL, NULL);
-       udp_sk_cachep = kmem_cache_create("udp_sock",
-                                         sizeof(struct udp_sock), 0,
-                                         SLAB_HWCACHE_ALIGN, NULL, NULL);
-       raw4_sk_cachep = kmem_cache_create("raw4_sock",
-                                          sizeof(struct raw_sock), 0,
-                                          SLAB_HWCACHE_ALIGN, NULL, NULL);
-       if (!tcp_sk_cachep || !udp_sk_cachep || !raw4_sk_cachep)
-               printk(KERN_CRIT
-                      "inet_init: Can't create protocol sock SLAB caches!\n");
        /*
         *      Tell SOCKET that we are alive... 
         */
@@ -1154,7 +1153,14 @@ static int __init inet_init(void)
 
        ipfrag_init();
 
-       return 0;
+       rc = 0;
+out:
+       return rc;
+out_tcp_free_slab:
+       sk_free_slab(&tcp_prot);
+out_udp_free_slab:
+       sk_free_slab(&udp_prot);
+       goto out;
 }
 
 module_init(inet_init);
@@ -1172,7 +1178,7 @@ extern void tcp4_proc_exit(void);
 extern int  udp4_proc_init(void);
 extern void udp4_proc_exit(void);
 
-int __init ipv4_proc_init(void)
+static int __init ipv4_proc_init(void)
 {
        int rc = 0;
 
@@ -1202,7 +1208,7 @@ out_raw:
 }
 
 #else /* CONFIG_PROC_FS */
-int __init ipv4_proc_init(void)
+static int __init ipv4_proc_init(void)
 {
        return 0;
 }
@@ -1214,7 +1220,6 @@ EXPORT_SYMBOL(inet_accept);
 EXPORT_SYMBOL(inet_bind);
 EXPORT_SYMBOL(inet_dgram_connect);
 EXPORT_SYMBOL(inet_dgram_ops);
-EXPORT_SYMBOL(inet_family_ops);
 EXPORT_SYMBOL(inet_getname);
 EXPORT_SYMBOL(inet_ioctl);
 EXPORT_SYMBOL(inet_listen);
@@ -1227,8 +1232,6 @@ EXPORT_SYMBOL(inet_stream_connect);
 EXPORT_SYMBOL(inet_stream_ops);
 EXPORT_SYMBOL(inet_unregister_protosw);
 EXPORT_SYMBOL(net_statistics);
-EXPORT_SYMBOL(tcp_protocol);
-EXPORT_SYMBOL(udp_protocol);
 
 #ifdef INET_REFCNT_DEBUG
 EXPORT_SYMBOL(inet_sock_nr);