X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fip_output.c;h=8ef2b82630a28a6d8029b3c08c8785e677794bd5;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=5a853aac2e95955a47dc975125f34397ccea25f1;hpb=a2c21200f1c81b08cb55e417b68150bba439b646;p=linux-2.6.git diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5a853aac2..8ef2b8263 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -304,7 +305,6 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) struct ip_options *opt = inet->opt; struct rtable *rt; struct iphdr *iph; - u32 mtu; /* Skip all of this if the packet is already routed, * f.e. by something like SCTP. @@ -365,23 +365,11 @@ packet_routed: skb->nh.iph = iph; /* Transport layer set skb->h.foo itself. */ - if(opt && opt->optlen) { + if (opt && opt->optlen) { iph->ihl += opt->optlen >> 2; ip_options_build(skb, opt, inet->daddr, rt, 0); } - mtu = dst_pmtu(&rt->u.dst); - if (skb->len > mtu && (sk->sk_route_caps & NETIF_F_TSO)) { - unsigned int hlen; - - /* Hack zone: all this must be done by TCP. */ - hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); - skb_shinfo(skb)->tso_size = mtu - hlen; - skb_shinfo(skb)->tso_segs = - (skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/ - skb_shinfo(skb)->tso_size - 1; - } - ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs); /* Add an IP checksum. */ @@ -421,6 +409,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) nf_conntrack_put(to->nfct); to->nfct = from->nfct; nf_conntrack_get(to->nfct); + to->nfctinfo = from->nfctinfo; #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(to->nf_bridge); to->nf_bridge = from->nf_bridge; @@ -712,7 +701,7 @@ csum_page(struct page *page, int offset, int copy) /* * ip_append_data() and ip_append_page() can make one large IP datagram * from many pieces of data. Each pieces will be holded on the socket - * until ip_push_pending_frames() is called. Eache pieces can be a page + * until ip_push_pending_frames() is called. Each piece can be a page * or non-page data. * * Not only UDP, other transport protocols - e.g. raw sockets - can use @@ -780,7 +769,7 @@ int ip_append_data(struct sock *sk, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); - maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen; + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + length > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); @@ -792,7 +781,7 @@ int ip_append_data(struct sock *sk, * it won't be fragmented in the future. */ if (transhdrlen && - length + fragheaderlen <= maxfraglen && + length + fragheaderlen <= mtu && rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) && !exthdrlen) csummode = CHECKSUM_HW; @@ -804,34 +793,42 @@ int ip_append_data(struct sock *sk, * We use calculated fragment length to generate chained skb, * each of segments is IP fragment ready for sending to network after * adding appropriate IP header. - * - * Mistake is: - * - * If mtu-fragheaderlen is not 0 modulo 8, we generate additional - * small fragment of length (mtu-fragheaderlen)%8, even though - * it is not necessary. Not a big bug, but needs a fix. */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb; while (length > 0) { - if ((copy = maxfraglen - skb->len) <= 0) { + /* Check if the remaining data fits into current packet. */ + copy = mtu - skb->len; + if (copy < length) + copy = maxfraglen - skb->len; + if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; + unsigned int fraggap; unsigned int alloclen; - BUG_TRAP(copy == 0); - + struct sk_buff *skb_prev; alloc_new_skb: - datalen = maxfraglen - fragheaderlen; - if (datalen > length) - datalen = length; + skb_prev = skb; + if (skb_prev) + fraggap = skb_prev->len - maxfraglen; + else + fraggap = 0; + /* + * If remaining data exceeds the mtu, + * we know we need more fragment(s). + */ + datalen = length + fraggap; + if (datalen > mtu - fragheaderlen) + datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; + if ((flags & MSG_MORE) && !(rt->u.dst.dev->features&NETIF_F_SG)) - alloclen = maxfraglen; + alloclen = mtu; else alloclen = datalen + fragheaderlen; @@ -875,15 +872,25 @@ alloc_new_skb: data += fragheaderlen; skb->h.raw = data + exthdrlen; - copy = datalen - transhdrlen; - if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) { + if (fraggap) { + skb->csum = skb_copy_and_csum_bits( + skb_prev, maxfraglen, + data + transhdrlen, fraggap, 0); + skb_prev->csum = csum_sub(skb_prev->csum, + skb->csum); + data += fraggap; + skb_trim(skb_prev, maxfraglen); + } + + copy = datalen - transhdrlen - fraggap; + if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } offset += copy; - length -= datalen; + length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; @@ -978,7 +985,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, int mtu; int len; int err; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, fraggap; if (inet->hdrincl) return -EPERM; @@ -1000,7 +1007,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, mtu = inet->cork.fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); - maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen; + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + size > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); @@ -1014,13 +1021,25 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, while (size > 0) { int i; - if ((len = maxfraglen - skb->len) <= 0) { + + /* Check if the remaining data fits into current packet. */ + len = mtu - skb->len; + if (len < size) + len = maxfraglen - skb->len; + if (len <= 0) { + struct sk_buff *skb_prev; char *data; struct iphdr *iph; - BUG_TRAP(len == 0); + int alloclen; + + skb_prev = skb; + if (skb_prev) + fraggap = skb_prev->len - maxfraglen; + else + fraggap = 0; - skb = sock_wmalloc(sk, fragheaderlen + hh_len + 15, 1, - sk->sk_allocation); + alloclen = fragheaderlen + hh_len + fraggap + 15; + skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); if (unlikely(!skb)) { err = -ENOBUFS; goto error; @@ -1036,11 +1055,20 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, /* * Find where to start putting bytes. */ - data = skb_put(skb, fragheaderlen); + data = skb_put(skb, fragheaderlen + fraggap); skb->nh.iph = iph = (struct iphdr *)data; data += fragheaderlen; skb->h.raw = data; + if (fraggap) { + skb->csum = skb_copy_and_csum_bits( + skb_prev, maxfraglen, + data, fraggap, 0); + skb_prev->csum = csum_sub(skb_prev->csum, + skb->csum); + skb_trim(skb_prev, maxfraglen); + } + /* * Put the packet on the pending queue. */