vserver 1.9.5.x5
[linux-2.6.git] / net / ipv6 / ip6_output.c
index 81eed71..fce2a87 100644 (file)
 #include <net/rawv6.h>
 #include <net/icmp.h>
 #include <net/xfrm.h>
+#include <net/checksum.h>
 
-static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**));
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 
 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
 {
        static u32 ipv6_fragmentation_id = 1;
-       static spinlock_t ip6_id_lock = SPIN_LOCK_UNLOCKED;
+       static DEFINE_SPINLOCK(ip6_id_lock);
 
        spin_lock_bh(&ip6_id_lock);
        fhdr->identification = htonl(ipv6_fragmentation_id);
@@ -107,9 +108,8 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 }
 
 
-static int ip6_output2(struct sk_buff **pskb)
+static int ip6_output2(struct sk_buff *skb)
 {
-       struct sk_buff *skb = *pskb;
        struct dst_entry *dst = skb->dst;
        struct net_device *dev = dst->dev;
 
@@ -145,14 +145,12 @@ static int ip6_output2(struct sk_buff **pskb)
        return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 }
 
-int ip6_output(struct sk_buff **pskb)
+int ip6_output(struct sk_buff *skb)
 {
-       struct sk_buff *skb = *pskb;
-
-       if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list))
-               return ip6_fragment(pskb, ip6_output2);
+       if (skb->len > dst_pmtu(skb->dst))
+               return ip6_fragment(skb, ip6_output2);
        else
-               return ip6_output2(pskb);
+               return ip6_output2(skb);
 }
 
 #ifdef CONFIG_NETFILTER
@@ -313,7 +311,7 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
        return 0;
 }
 
-int ip6_call_ra_chain(struct sk_buff *skb, int sel)
+static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 {
        struct ip6_ra_chain *ra;
        struct sock *last = NULL;
@@ -465,6 +463,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
        to->priority = from->priority;
        to->protocol = from->protocol;
        to->security = from->security;
+       dst_release(to->dst);
        to->dst = dst_clone(from->dst);
        to->dev = from->dev;
 
@@ -476,6 +475,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
        /* Connection association is same as pre-frag packet */
        to->nfct = from->nfct;
        nf_conntrack_get(to->nfct);
+       to->nfctinfo = from->nfctinfo;
 #ifdef CONFIG_BRIDGE_NETFILTER
        nf_bridge_put(to->nf_bridge);
        to->nf_bridge = from->nf_bridge;
@@ -516,10 +516,10 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
        return offset;
 }
 
-static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**))
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
        struct net_device *dev;
-       struct sk_buff *frag, *skb = *pskb;
+       struct sk_buff *frag;
        struct rt6_info *rt = (struct rt6_info*)skb->dst;
        struct ipv6hdr *tmp_hdr;
        struct frag_hdr *fh;
@@ -593,6 +593,7 @@ static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**))
                        /* Prepare header of the next frame,
                         * before previous one went down. */
                        if (frag) {
+                               frag->ip_summed = CHECKSUM_NONE;
                                frag->h.raw = frag->data;
                                fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
                                frag->nh.raw = __skb_push(frag, hlen);
@@ -608,7 +609,7 @@ static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**))
                                ip6_copy_metadata(frag, skb);
                        }
                        
-                       err = output(&skb);
+                       err = output(skb);
                        if (err || !frag)
                                break;
 
@@ -724,7 +725,7 @@ slow_path:
 
                IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
 
-               err = output(&frag);
+               err = output(frag);
                if (err)
                        goto fail;
        }
@@ -746,7 +747,7 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
        if (sk) {
                struct ipv6_pinfo *np = inet6_sk(sk);
        
-               *dst = __sk_dst_check(sk, np->dst_cookie);
+               *dst = sk_dst_check(sk, np->dst_cookie);
                if (*dst) {
                        struct rt6_info *rt = (struct rt6_info*)*dst;
        
@@ -769,13 +770,13 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
                                 */
        
                        if (((rt->rt6i_dst.plen != 128 ||
-                             ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr))
+                             !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
                             && (np->daddr_cache == NULL ||
-                                ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache)))
+                                !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
                            || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
+                               dst_release(*dst);
                                *dst = NULL;
-                       } else
-                               dst_hold(*dst);
+                       }
                }
        }
 
@@ -796,10 +797,6 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
                        goto out_err_release;
                }
        }
-       if ((err = xfrm_lookup(dst, fl, sk, 0)) < 0) {
-               err = -ENETUNREACH;
-               goto out_err_release;
-        }
 
        return 0;
 
@@ -814,14 +811,14 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
                    int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
                    unsigned int flags)
 {
-       struct inet_opt *inet = inet_sk(sk);
+       struct inet_sock *inet = inet_sk(sk);
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct sk_buff *skb;
        unsigned int maxfraglen, fragheaderlen;
        int exthdrlen;
        int hh_len;
        int mtu;
-       int copy = 0;
+       int copy;
        int err;
        int offset = 0;
        int csummode = CHECKSUM_NONE;
@@ -879,29 +876,79 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse
                }
        }
 
+       /*
+        * Let's try using as much space as possible.
+        * Use MTU if total length of the message fits into the MTU.
+        * Otherwise, we need to reserve fragment header and
+        * fragment alignment (= 8-15 octects, in total).
+        *
+        * Note that we may need to "move" the data from the tail of
+        * of the buffer to the new fragment when we split 
+        * the message.
+        *
+        * FIXME: It may be fragmented into multiple chunks 
+        *        at once if non-fragmentable extension headers
+        *        are too large.
+        * --yoshfuji 
+        */
+
        inet->cork.length += length;
 
        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
                goto alloc_new_skb;
 
        while (length > 0) {
-               if ((copy = maxfraglen - skb->len) <= 0) {
+               /* Check if the remaining data fits into current packet. */
+               copy = mtu - skb->len;
+               if (copy < length)
+                       copy = maxfraglen - skb->len;
+
+               if (copy <= 0) {
                        char *data;
                        unsigned int datalen;
                        unsigned int fraglen;
+                       unsigned int fraggap;
                        unsigned int alloclen;
-                       BUG_TRAP(copy == 0);
+                       struct sk_buff *skb_prev;
 alloc_new_skb:
-                       datalen = maxfraglen - fragheaderlen;
-                       if (datalen > length)
-                               datalen = length;
+                       skb_prev = skb;
+
+                       /* There's no room in the current skb */
+                       if (skb_prev)
+                               fraggap = skb_prev->len - maxfraglen;
+                       else
+                               fraggap = 0;
+
+                       /*
+                        * If remaining data exceeds the mtu,
+                        * we know we need more fragment(s).
+                        */
+                       datalen = length + fraggap;
+                       if (datalen > mtu - fragheaderlen)
+                               datalen = maxfraglen - fragheaderlen;
+
                        fraglen = datalen + fragheaderlen;
                        if ((flags & MSG_MORE) &&
                            !(rt->u.dst.dev->features&NETIF_F_SG))
-                               alloclen = maxfraglen;
+                               alloclen = mtu;
                        else
-                               alloclen = fraglen;
+                               alloclen = datalen + fragheaderlen;
+
+                       /*
+                        * The last fragment gets additional space at tail.
+                        * Note: we overallocate on fragments with MSG_MODE
+                        * because we have no idea if we're the last one.
+                        */
+                       if (datalen == length + fraggap)
+                               alloclen += rt->u.dst.trailer_len;
+
+                       /*
+                        * We just reserve space for fragment header.
+                        * Note: this may be overallocation if the message 
+                        * (without MSG_MORE) fits into the MTU.
+                        */
                        alloclen += sizeof(struct frag_hdr);
+
                        if (transhdrlen) {
                                skb = sock_alloc_send_skb(sk,
                                                alloclen + hh_len,
@@ -923,7 +970,7 @@ alloc_new_skb:
                         */
                        skb->ip_summed = csummode;
                        skb->csum = 0;
-                       /* reserve 8 byte for fragmentation */
+                       /* reserve for fragmentation */
                        skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
 
                        /*
@@ -933,15 +980,29 @@ alloc_new_skb:
                        skb->nh.raw = data + exthdrlen;
                        data += fragheaderlen;
                        skb->h.raw = data + exthdrlen;
-                       copy = datalen - transhdrlen;
-                       if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
+
+                       if (fraggap) {
+                               skb->csum = skb_copy_and_csum_bits(
+                                       skb_prev, maxfraglen,
+                                       data + transhdrlen, fraggap, 0);
+                               skb_prev->csum = csum_sub(skb_prev->csum,
+                                                         skb->csum);
+                               data += fraggap;
+                               skb_trim(skb_prev, maxfraglen);
+                       }
+                       copy = datalen - transhdrlen - fraggap;
+                       if (copy < 0) {
+                               err = -EINVAL;
+                               kfree_skb(skb);
+                               goto error;
+                       } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
                                err = -EFAULT;
                                kfree_skb(skb);
                                goto error;
                        }
 
                        offset += copy;
-                       length -= datalen;
+                       length -= datalen - fraggap;
                        transhdrlen = 0;
                        exthdrlen = 0;
                        csummode = CHECKSUM_NONE;
@@ -1028,7 +1089,7 @@ int ip6_push_pending_frames(struct sock *sk)
        struct sk_buff *skb, *tmp_skb;
        struct sk_buff **tail_skb;
        struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
-       struct inet_opt *inet = inet_sk(sk);
+       struct inet_sock *inet = inet_sk(sk);
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct ipv6hdr *hdr;
        struct ipv6_txoptions *opt = np->cork.opt;
@@ -1106,7 +1167,7 @@ error:
 
 void ip6_flush_pending_frames(struct sock *sk)
 {
-       struct inet_opt *inet = inet_sk(sk);
+       struct inet_sock *inet = inet_sk(sk);
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct sk_buff *skb;