fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / ipv4 / af_inet.c
index fef25db..35be3ed 100644 (file)
@@ -67,7 +67,7 @@
  *             2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <net/inet_connection_sock.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <net/udplite.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/raw.h>
@@ -205,7 +206,7 @@ int inet_listen(struct socket *sock, int backlog)
         * we can only allow the backlog to be adjusted.
         */
        if (old_state != TCP_LISTEN) {
-               err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
+               err = inet_csk_listen_start(sk, backlog);
                if (err)
                        goto out;
        }
@@ -307,7 +308,7 @@ override:
                sk->sk_reuse = 1;
 
        inet = inet_sk(sk);
-       inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
+       inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
        if (SOCK_RAW == sock->type) {
                inet->num = protocol;
@@ -394,7 +395,7 @@ int inet_release(struct socket *sock)
 }
 
 /* It is off by default, see below. */
-int sysctl_ip_nonlocal_bind;
+int sysctl_ip_nonlocal_bind __read_mostly;
 
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
@@ -684,7 +685,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
                sin->sin_port = inet->dport;
                sin->sin_addr.s_addr = inet->daddr;
        } else {
-               __u32 addr = inet->rcv_saddr;
+               __be32 addr = inet->rcv_saddr;
                if (!addr)
                        addr = inet->saddr;
                sin->sin_port = inet->sport;
@@ -829,45 +830,53 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 }
 
 const struct proto_ops inet_stream_ops = {
-       .family =       PF_INET,
-       .owner =        THIS_MODULE,
-       .release =      inet_release,
-       .bind =         inet_bind,
-       .connect =      inet_stream_connect,
-       .socketpair =   sock_no_socketpair,
-       .accept =       inet_accept,
-       .getname =      inet_getname,
-       .poll =         tcp_poll,
-       .ioctl =        inet_ioctl,
-       .listen =       inet_listen,
-       .shutdown =     inet_shutdown,
-       .setsockopt =   sock_common_setsockopt,
-       .getsockopt =   sock_common_getsockopt,
-       .sendmsg =      inet_sendmsg,
-       .recvmsg =      sock_common_recvmsg,
-       .mmap =         sock_no_mmap,
-       .sendpage =     tcp_sendpage
+       .family            = PF_INET,
+       .owner             = THIS_MODULE,
+       .release           = inet_release,
+       .bind              = inet_bind,
+       .connect           = inet_stream_connect,
+       .socketpair        = sock_no_socketpair,
+       .accept            = inet_accept,
+       .getname           = inet_getname,
+       .poll              = tcp_poll,
+       .ioctl             = inet_ioctl,
+       .listen            = inet_listen,
+       .shutdown          = inet_shutdown,
+       .setsockopt        = sock_common_setsockopt,
+       .getsockopt        = sock_common_getsockopt,
+       .sendmsg           = inet_sendmsg,
+       .recvmsg           = sock_common_recvmsg,
+       .mmap              = sock_no_mmap,
+       .sendpage          = tcp_sendpage,
+#ifdef CONFIG_COMPAT
+       .compat_setsockopt = compat_sock_common_setsockopt,
+       .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
 };
 
 const struct proto_ops inet_dgram_ops = {
-       .family =       PF_INET,
-       .owner =        THIS_MODULE,
-       .release =      inet_release,
-       .bind =         inet_bind,
-       .connect =      inet_dgram_connect,
-       .socketpair =   sock_no_socketpair,
-       .accept =       sock_no_accept,
-       .getname =      inet_getname,
-       .poll =         udp_poll,
-       .ioctl =        inet_ioctl,
-       .listen =       sock_no_listen,
-       .shutdown =     inet_shutdown,
-       .setsockopt =   sock_common_setsockopt,
-       .getsockopt =   sock_common_getsockopt,
-       .sendmsg =      inet_sendmsg,
-       .recvmsg =      sock_common_recvmsg,
-       .mmap =         sock_no_mmap,
-       .sendpage =     inet_sendpage,
+       .family            = PF_INET,
+       .owner             = THIS_MODULE,
+       .release           = inet_release,
+       .bind              = inet_bind,
+       .connect           = inet_dgram_connect,
+       .socketpair        = sock_no_socketpair,
+       .accept            = sock_no_accept,
+       .getname           = inet_getname,
+       .poll              = udp_poll,
+       .ioctl             = inet_ioctl,
+       .listen            = sock_no_listen,
+       .shutdown          = inet_shutdown,
+       .setsockopt        = sock_common_setsockopt,
+       .getsockopt        = sock_common_getsockopt,
+       .sendmsg           = inet_sendmsg,
+       .recvmsg           = sock_common_recvmsg,
+       .mmap              = sock_no_mmap,
+       .sendpage          = inet_sendpage,
+#ifdef CONFIG_COMPAT
+       .compat_setsockopt = compat_sock_common_setsockopt,
+       .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
 };
 
 /*
@@ -875,24 +884,28 @@ const struct proto_ops inet_dgram_ops = {
  * udp_poll
  */
 static const struct proto_ops inet_sockraw_ops = {
-       .family =       PF_INET,
-       .owner =        THIS_MODULE,
-       .release =      inet_release,
-       .bind =         inet_bind,
-       .connect =      inet_dgram_connect,
-       .socketpair =   sock_no_socketpair,
-       .accept =       sock_no_accept,
-       .getname =      inet_getname,
-       .poll =         datagram_poll,
-       .ioctl =        inet_ioctl,
-       .listen =       sock_no_listen,
-       .shutdown =     inet_shutdown,
-       .setsockopt =   sock_common_setsockopt,
-       .getsockopt =   sock_common_getsockopt,
-       .sendmsg =      inet_sendmsg,
-       .recvmsg =      sock_common_recvmsg,
-       .mmap =         sock_no_mmap,
-       .sendpage =     inet_sendpage,
+       .family            = PF_INET,
+       .owner             = THIS_MODULE,
+       .release           = inet_release,
+       .bind              = inet_bind,
+       .connect           = inet_dgram_connect,
+       .socketpair        = sock_no_socketpair,
+       .accept            = sock_no_accept,
+       .getname           = inet_getname,
+       .poll              = datagram_poll,
+       .ioctl             = inet_ioctl,
+       .listen            = sock_no_listen,
+       .shutdown          = inet_shutdown,
+       .setsockopt        = sock_common_setsockopt,
+       .getsockopt        = sock_common_getsockopt,
+       .sendmsg           = inet_sendmsg,
+       .recvmsg           = sock_common_recvmsg,
+       .mmap              = sock_no_mmap,
+       .sendpage          = inet_sendpage,
+#ifdef CONFIG_COMPAT
+       .compat_setsockopt = compat_sock_common_setsockopt,
+       .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
 };
 
 static struct net_proto_family inet_family_ops = {
@@ -1016,16 +1029,16 @@ void inet_unregister_protosw(struct inet_protosw *p)
  *      Shall we try to damage output packets if routing dev changes?
  */
 
-int sysctl_ip_dynaddr;
+int sysctl_ip_dynaddr __read_mostly;
 
 static int inet_sk_reselect_saddr(struct sock *sk)
 {
        struct inet_sock *inet = inet_sk(sk);
        int err;
        struct rtable *rt;
-       __u32 old_saddr = inet->saddr;
-       __u32 new_saddr;
-       __u32 daddr = inet->daddr;
+       __be32 old_saddr = inet->saddr;
+       __be32 new_saddr;
+       __be32 daddr = inet->daddr;
 
        if (inet->opt && inet->opt->srr)
                daddr = inet->opt->faddr;
@@ -1072,7 +1085,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
-       u32 daddr;
+       __be32 daddr;
        int err;
 
        /* Route is OK, nothing to do. */
@@ -1102,6 +1115,7 @@ int inet_sk_rebuild_header(struct sock *sk)
                },
        };
                                                
+       security_sk_classify_flow(sk, &fl);
        err = ip_route_output_flow(&rt, &fl, sk, 0);
 }
        if (!err)
@@ -1125,6 +1139,96 @@ int inet_sk_rebuild_header(struct sock *sk)
 
 EXPORT_SYMBOL(inet_sk_rebuild_header);
 
+static int inet_gso_send_check(struct sk_buff *skb)
+{
+       struct iphdr *iph;
+       struct net_protocol *ops;
+       int proto;
+       int ihl;
+       int err = -EINVAL;
+
+       if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+               goto out;
+
+       iph = skb->nh.iph;
+       ihl = iph->ihl * 4;
+       if (ihl < sizeof(*iph))
+               goto out;
+
+       if (unlikely(!pskb_may_pull(skb, ihl)))
+               goto out;
+
+       skb->h.raw = __skb_pull(skb, ihl);
+       iph = skb->nh.iph;
+       proto = iph->protocol & (MAX_INET_PROTOS - 1);
+       err = -EPROTONOSUPPORT;
+
+       rcu_read_lock();
+       ops = rcu_dereference(inet_protos[proto]);
+       if (likely(ops && ops->gso_send_check))
+               err = ops->gso_send_check(skb);
+       rcu_read_unlock();
+
+out:
+       return err;
+}
+
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
+{
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
+       struct iphdr *iph;
+       struct net_protocol *ops;
+       int proto;
+       int ihl;
+       int id;
+
+       if (unlikely(skb_shinfo(skb)->gso_type &
+                    ~(SKB_GSO_TCPV4 |
+                      SKB_GSO_UDP |
+                      SKB_GSO_DODGY |
+                      SKB_GSO_TCP_ECN |
+                      0)))
+               goto out;
+
+       if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+               goto out;
+
+       iph = skb->nh.iph;
+       ihl = iph->ihl * 4;
+       if (ihl < sizeof(*iph))
+               goto out;
+
+       if (unlikely(!pskb_may_pull(skb, ihl)))
+               goto out;
+
+       skb->h.raw = __skb_pull(skb, ihl);
+       iph = skb->nh.iph;
+       id = ntohs(iph->id);
+       proto = iph->protocol & (MAX_INET_PROTOS - 1);
+       segs = ERR_PTR(-EPROTONOSUPPORT);
+
+       rcu_read_lock();
+       ops = rcu_dereference(inet_protos[proto]);
+       if (likely(ops && ops->gso_segment))
+               segs = ops->gso_segment(skb, features);
+       rcu_read_unlock();
+
+       if (!segs || unlikely(IS_ERR(segs)))
+               goto out;
+
+       skb = segs;
+       do {
+               iph = skb->nh.iph;
+               iph->id = htons(id++);
+               iph->tot_len = htons(skb->len - skb->mac_len);
+               iph->check = 0;
+               iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+       } while ((skb = skb->next));
+
+out:
+       return segs;
+}
+
 #ifdef CONFIG_IP_MULTICAST
 static struct net_protocol igmp_protocol = {
        .handler =      igmp_rcv,
@@ -1134,6 +1238,8 @@ static struct net_protocol igmp_protocol = {
 static struct net_protocol tcp_protocol = {
        .handler =      tcp_v4_rcv,
        .err_handler =  tcp_v4_err,
+       .gso_send_check = tcp_v4_gso_send_check,
+       .gso_segment =  tcp_tso_segment,
        .no_policy =    1,
 };
 
@@ -1159,10 +1265,13 @@ static int __init init_ipv4_mibs(void)
        tcp_statistics[1] = alloc_percpu(struct tcp_mib);
        udp_statistics[0] = alloc_percpu(struct udp_mib);
        udp_statistics[1] = alloc_percpu(struct udp_mib);
+       udplite_statistics[0] = alloc_percpu(struct udp_mib);
+       udplite_statistics[1] = alloc_percpu(struct udp_mib);
        if (!
            (net_statistics[0] && net_statistics[1] && ip_statistics[0]
             && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
-            && udp_statistics[0] && udp_statistics[1]))
+            && udp_statistics[0] && udp_statistics[1]
+            && udplite_statistics[0] && udplite_statistics[1]             ) )
                return -ENOMEM;
 
        (void) tcp_mib_init();
@@ -1179,6 +1288,8 @@ static int ipv4_proc_init(void);
 static struct packet_type ip_packet_type = {
        .type = __constant_htons(ETH_P_IP),
        .func = ip_rcv,
+       .gso_send_check = inet_gso_send_check,
+       .gso_segment = inet_gso_segment,
 };
 
 static int __init inet_init(void)
@@ -1188,10 +1299,7 @@ static int __init inet_init(void)
        struct list_head *r;
        int rc = -EINVAL;
 
-       if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {
-               printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
-               goto out;
-       }
+       BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
 
        rc = proto_register(&tcp_prot, 1);
        if (rc)
@@ -1250,6 +1358,8 @@ static int __init inet_init(void)
        /* Setup TCP slab cache for open requests. */
        tcp_init();
 
+       /* Add UDP-Lite (RFC 3828) */
+       udplite4_register();
 
        /*
         *      Set the ICMP layer up
@@ -1279,14 +1389,14 @@ static int __init inet_init(void)
        rc = 0;
 out:
        return rc;
-out_unregister_tcp_proto:
-       proto_unregister(&tcp_prot);
 out_unregister_udp_proto:
        proto_unregister(&udp_prot);
+out_unregister_tcp_proto:
+       proto_unregister(&tcp_prot);
        goto out;
 }
 
-module_init(inet_init);
+fs_initcall(inet_init);
 
 /* ------------------------------------------------------------------------ */