X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fip_output.c;h=8ef2b82630a28a6d8029b3c08c8785e677794bd5;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=60fc5091c0ef690eba8540ab7548bc04922337e4;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 60fc5091c..8ef2b8263 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -233,7 +234,7 @@ int ip_mc_output(struct sk_buff **pskb) /* * If the indicated interface is up and running, send the packet. */ - IP_INC_STATS(OutRequests); + IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); skb->dev = dev; skb->protocol = htons(ETH_P_IP); @@ -288,7 +289,7 @@ int ip_output(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; - IP_INC_STATS(OutRequests); + IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list) && !skb_shinfo(skb)->tso_size) @@ -304,7 +305,6 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) struct ip_options *opt = inet->opt; struct rtable *rt; struct iphdr *iph; - u32 mtu; /* Skip all of this if the packet is already routed, * f.e. by something like SCTP. @@ -365,23 +365,11 @@ packet_routed: skb->nh.iph = iph; /* Transport layer set skb->h.foo itself. */ - if(opt && opt->optlen) { + if (opt && opt->optlen) { iph->ihl += opt->optlen >> 2; ip_options_build(skb, opt, inet->daddr, rt, 0); } - mtu = dst_pmtu(&rt->u.dst); - if (skb->len > mtu && (sk->sk_route_caps & NETIF_F_TSO)) { - unsigned int hlen; - - /* Hack zone: all this must be done by TCP. */ - hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); - skb_shinfo(skb)->tso_size = mtu - hlen; - skb_shinfo(skb)->tso_segs = - (skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/ - skb_shinfo(skb)->tso_size - 1; - } - ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs); /* Add an IP checksum. */ @@ -393,7 +381,7 @@ packet_routed: dst_output); no_route: - IP_INC_STATS(OutNoRoutes); + IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EHOSTUNREACH; } @@ -421,6 +409,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) nf_conntrack_put(to->nfct); to->nfct = from->nfct; nf_conntrack_get(to->nfct); + to->nfctinfo = from->nfctinfo; #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(to->nf_bridge); to->nf_bridge = from->nf_bridge; @@ -498,10 +487,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) skb_headroom(frag) < hlen) goto slow_path; - /* Correct socket ownership. */ - if (frag->sk == NULL && skb->sk) - goto slow_path; - /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path; @@ -512,7 +497,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) err = 0; offset = 0; frag = skb_shinfo(skb)->frag_list; - skb_shinfo(skb)->frag_list = 0; + skb_shinfo(skb)->frag_list = NULL; skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; iph->tot_len = htons(first_len); @@ -550,7 +535,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) } if (err == 0) { - IP_INC_STATS(FragOKs); + IP_INC_STATS(IPSTATS_MIB_FRAGOKS); return 0; } @@ -559,7 +544,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) kfree_skb(frag); frag = skb; } - IP_INC_STATS(FragFails); + IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); return err; } @@ -665,7 +650,7 @@ slow_path: * Put this fragment into the sending queue. */ - IP_INC_STATS(FragCreates); + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); iph->tot_len = htons(len + hlen); @@ -676,12 +661,12 @@ slow_path: goto fail; } kfree_skb(skb); - IP_INC_STATS(FragOKs); + IP_INC_STATS(IPSTATS_MIB_FRAGOKS); return err; fail: kfree_skb(skb); - IP_INC_STATS(FragFails); + IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); return err; } @@ -702,17 +687,6 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk return 0; } -static inline int -skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off) -{ - if (i) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - return page == frag->page && - off == frag->page_offset+frag->size; - } - return 0; -} - static inline unsigned int csum_page(struct page *page, int offset, int copy) { @@ -727,7 +701,7 @@ csum_page(struct page *page, int offset, int copy) /* * ip_append_data() and ip_append_page() can make one large IP datagram * from many pieces of data. Each pieces will be holded on the socket - * until ip_push_pending_frames() is called. Eache pieces can be a page + * until ip_push_pending_frames() is called. Each piece can be a page * or non-page data. * * Not only UDP, other transport protocols - e.g. raw sockets - can use @@ -777,8 +751,8 @@ int ip_append_data(struct sock *sk, inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst); inet->cork.rt = rt; inet->cork.length = 0; - inet->sndmsg_page = NULL; - inet->sndmsg_off = 0; + sk->sk_sndmsg_page = NULL; + sk->sk_sndmsg_off = 0; if ((exthdrlen = rt->u.dst.header_len) != 0) { length += exthdrlen; transhdrlen += exthdrlen; @@ -795,7 +769,7 @@ int ip_append_data(struct sock *sk, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); - maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen; + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + length > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); @@ -807,7 +781,7 @@ int ip_append_data(struct sock *sk, * it won't be fragmented in the future. */ if (transhdrlen && - length + fragheaderlen <= maxfraglen && + length + fragheaderlen <= mtu && rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) && !exthdrlen) csummode = CHECKSUM_HW; @@ -819,34 +793,42 @@ int ip_append_data(struct sock *sk, * We use calculated fragment length to generate chained skb, * each of segments is IP fragment ready for sending to network after * adding appropriate IP header. - * - * Mistake is: - * - * If mtu-fragheaderlen is not 0 modulo 8, we generate additional - * small fragment of length (mtu-fragheaderlen)%8, even though - * it is not necessary. Not a big bug, but needs a fix. */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb; while (length > 0) { - if ((copy = maxfraglen - skb->len) <= 0) { + /* Check if the remaining data fits into current packet. */ + copy = mtu - skb->len; + if (copy < length) + copy = maxfraglen - skb->len; + if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; + unsigned int fraggap; unsigned int alloclen; - BUG_TRAP(copy == 0); - + struct sk_buff *skb_prev; alloc_new_skb: - datalen = maxfraglen - fragheaderlen; - if (datalen > length) - datalen = length; + skb_prev = skb; + if (skb_prev) + fraggap = skb_prev->len - maxfraglen; + else + fraggap = 0; + /* + * If remaining data exceeds the mtu, + * we know we need more fragment(s). + */ + datalen = length + fraggap; + if (datalen > mtu - fragheaderlen) + datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; + if ((flags & MSG_MORE) && !(rt->u.dst.dev->features&NETIF_F_SG)) - alloclen = maxfraglen; + alloclen = mtu; else alloclen = datalen + fragheaderlen; @@ -890,15 +872,25 @@ alloc_new_skb: data += fragheaderlen; skb->h.raw = data + exthdrlen; - copy = datalen - transhdrlen; - if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) { + if (fraggap) { + skb->csum = skb_copy_and_csum_bits( + skb_prev, maxfraglen, + data + transhdrlen, fraggap, 0); + skb_prev->csum = csum_sub(skb_prev->csum, + skb->csum); + data += fraggap; + skb_trim(skb_prev, maxfraglen); + } + + copy = datalen - transhdrlen - fraggap; + if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } offset += copy; - length -= datalen; + length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; @@ -926,8 +918,8 @@ alloc_new_skb: } else { int i = skb_shinfo(skb)->nr_frags; skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = inet->sndmsg_page; - int off = inet->sndmsg_off; + struct page *page = sk->sk_sndmsg_page; + int off = sk->sk_sndmsg_off; unsigned int left; if (page && (left = PAGE_SIZE - off) > 0) { @@ -939,7 +931,7 @@ alloc_new_skb: goto error; } get_page(page); - skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0); + skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); frag = &skb_shinfo(skb)->frags[i]; } } else if (i < MAX_SKB_FRAGS) { @@ -950,8 +942,8 @@ alloc_new_skb: err = -ENOMEM; goto error; } - inet->sndmsg_page = page; - inet->sndmsg_off = 0; + sk->sk_sndmsg_page = page; + sk->sk_sndmsg_off = 0; skb_fill_page_desc(skb, i, page, 0, 0); frag = &skb_shinfo(skb)->frags[i]; @@ -965,7 +957,7 @@ alloc_new_skb: err = -EFAULT; goto error; } - inet->sndmsg_off += copy; + sk->sk_sndmsg_off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; @@ -978,7 +970,7 @@ alloc_new_skb: error: inet->cork.length -= length; - IP_INC_STATS(OutDiscards); + IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); return err; } @@ -993,7 +985,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, int mtu; int len; int err; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, fraggap; if (inet->hdrincl) return -EPERM; @@ -1015,7 +1007,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, mtu = inet->cork.fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); - maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen; + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + size > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); @@ -1029,13 +1021,25 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, while (size > 0) { int i; - if ((len = maxfraglen - skb->len) <= 0) { + + /* Check if the remaining data fits into current packet. */ + len = mtu - skb->len; + if (len < size) + len = maxfraglen - skb->len; + if (len <= 0) { + struct sk_buff *skb_prev; char *data; struct iphdr *iph; - BUG_TRAP(len == 0); + int alloclen; + + skb_prev = skb; + if (skb_prev) + fraggap = skb_prev->len - maxfraglen; + else + fraggap = 0; - skb = sock_wmalloc(sk, fragheaderlen + hh_len + 15, 1, - sk->sk_allocation); + alloclen = fragheaderlen + hh_len + fraggap + 15; + skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); if (unlikely(!skb)) { err = -ENOBUFS; goto error; @@ -1051,11 +1055,20 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, /* * Find where to start putting bytes. */ - data = skb_put(skb, fragheaderlen); + data = skb_put(skb, fragheaderlen + fraggap); skb->nh.iph = iph = (struct iphdr *)data; data += fragheaderlen; skb->h.raw = data; + if (fraggap) { + skb->csum = skb_copy_and_csum_bits( + skb_prev, maxfraglen, + data, fraggap, 0); + skb_prev->csum = csum_sub(skb_prev->csum, + skb->csum); + skb_trim(skb_prev, maxfraglen); + } + /* * Put the packet on the pending queue. */ @@ -1091,7 +1104,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, error: inet->cork.length -= size; - IP_INC_STATS(OutDiscards); + IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1124,12 +1137,10 @@ int ip_push_pending_frames(struct sock *sk) tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; -#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */ skb->truesize += tmp_skb->truesize; __sock_put(tmp_skb->sk); tmp_skb->destructor = NULL; tmp_skb->sk = NULL; -#endif } /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow @@ -1201,7 +1212,7 @@ out: return err; error: - IP_INC_STATS(OutDiscards); + IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); goto out; }