X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fipv4%2Fip_output.c;h=47054f8a3a33f449e93f6f8bd96603641d07ade0;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=5a853aac2e95955a47dc975125f34397ccea25f1;hpb=9bf4aaab3e101692164d49b7ca357651eb691cb6;p=linux-2.6.git diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5a853aac2..47054f8a3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -114,7 +115,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) return 0; } -static inline int ip_select_ttl(struct inet_opt *inet, struct dst_entry *dst) +static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) { int ttl = inet->uc_ttl; @@ -130,7 +131,7 @@ static inline int ip_select_ttl(struct inet_opt *inet, struct dst_entry *dst) int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, struct ip_options *opt) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)skb->dst; struct iphdr *iph; @@ -223,9 +224,8 @@ int ip_finish_output(struct sk_buff *skb) ip_finish_output2); } -int ip_mc_output(struct sk_buff **pskb) +int ip_mc_output(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; struct sock *sk = skb->sk; struct rtable *rt = (struct rtable*)skb->dst; struct net_device *dev = rt->u.dst.dev; @@ -278,20 +278,17 @@ int ip_mc_output(struct sk_buff **pskb) newskb->dev, ip_dev_loopback_xmit); } - if (skb->len > dst_pmtu(&rt->u.dst) || skb_shinfo(skb)->frag_list) + if (skb->len > dst_pmtu(&rt->u.dst)) return ip_fragment(skb, ip_finish_output); else return ip_finish_output(skb); } -int ip_output(struct sk_buff **pskb) +int ip_output(struct sk_buff *skb) { - struct sk_buff *skb = *pskb; - IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); - if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list) && - !skb_shinfo(skb)->tso_size) + if (skb->len > dst_pmtu(skb->dst) && !skb_shinfo(skb)->tso_size) return ip_fragment(skb, ip_finish_output); else return ip_finish_output(skb); @@ -300,11 +297,10 @@ int ip_output(struct sk_buff **pskb) int ip_queue_xmit(struct sk_buff *skb, int ipfragok) { struct sock *sk = skb->sk; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_options *opt = inet->opt; struct rtable *rt; struct iphdr *iph; - u32 mtu; /* Skip all of this if the packet is already routed, * f.e. by something like SCTP. @@ -365,23 +361,11 @@ packet_routed: skb->nh.iph = iph; /* Transport layer set skb->h.foo itself. */ - if(opt && opt->optlen) { + if (opt && opt->optlen) { iph->ihl += opt->optlen >> 2; ip_options_build(skb, opt, inet->daddr, rt, 0); } - mtu = dst_pmtu(&rt->u.dst); - if (skb->len > mtu && (sk->sk_route_caps & NETIF_F_TSO)) { - unsigned int hlen; - - /* Hack zone: all this must be done by TCP. */ - hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); - skb_shinfo(skb)->tso_size = mtu - hlen; - skb_shinfo(skb)->tso_segs = - (skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/ - skb_shinfo(skb)->tso_size - 1; - } - ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs); /* Add an IP checksum. */ @@ -405,6 +389,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->priority = from->priority; to->protocol = from->protocol; to->security = from->security; + dst_release(to->dst); to->dst = dst_clone(from->dst); to->dev = from->dev; @@ -421,6 +406,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) nf_conntrack_put(to->nfct); to->nfct = from->nfct; nf_conntrack_get(to->nfct); + to->nfctinfo = from->nfctinfo; #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(to->nf_bridge); to->nf_bridge = from->nf_bridge; @@ -519,6 +505,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) /* Prepare header of the next frame, * before previous one went down. */ if (frag) { + frag->ip_summed = CHECKSUM_NONE; frag->h.raw = frag->data; frag->nh.raw = __skb_push(frag, hlen); memcpy(frag->nh.raw, iph, hlen); @@ -712,7 +699,7 @@ csum_page(struct page *page, int offset, int copy) /* * ip_append_data() and ip_append_page() can make one large IP datagram * from many pieces of data. Each pieces will be holded on the socket - * until ip_push_pending_frames() is called. Eache pieces can be a page + * until ip_push_pending_frames() is called. Each piece can be a page * or non-page data. * * Not only UDP, other transport protocols - e.g. raw sockets - can use @@ -727,7 +714,7 @@ int ip_append_data(struct sock *sk, struct ipcm_cookie *ipc, struct rtable *rt, unsigned int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; struct ip_options *opt = NULL; @@ -780,7 +767,7 @@ int ip_append_data(struct sock *sk, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); - maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen; + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + length > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); @@ -792,7 +779,7 @@ int ip_append_data(struct sock *sk, * it won't be fragmented in the future. */ if (transhdrlen && - length + fragheaderlen <= maxfraglen && + length + fragheaderlen <= mtu && rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) && !exthdrlen) csummode = CHECKSUM_HW; @@ -804,34 +791,42 @@ int ip_append_data(struct sock *sk, * We use calculated fragment length to generate chained skb, * each of segments is IP fragment ready for sending to network after * adding appropriate IP header. - * - * Mistake is: - * - * If mtu-fragheaderlen is not 0 modulo 8, we generate additional - * small fragment of length (mtu-fragheaderlen)%8, even though - * it is not necessary. Not a big bug, but needs a fix. */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) goto alloc_new_skb; while (length > 0) { - if ((copy = maxfraglen - skb->len) <= 0) { + /* Check if the remaining data fits into current packet. */ + copy = mtu - skb->len; + if (copy < length) + copy = maxfraglen - skb->len; + if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; + unsigned int fraggap; unsigned int alloclen; - BUG_TRAP(copy == 0); - + struct sk_buff *skb_prev; alloc_new_skb: - datalen = maxfraglen - fragheaderlen; - if (datalen > length) - datalen = length; + skb_prev = skb; + if (skb_prev) + fraggap = skb_prev->len - maxfraglen; + else + fraggap = 0; + /* + * If remaining data exceeds the mtu, + * we know we need more fragment(s). + */ + datalen = length + fraggap; + if (datalen > mtu - fragheaderlen) + datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; + if ((flags & MSG_MORE) && !(rt->u.dst.dev->features&NETIF_F_SG)) - alloclen = maxfraglen; + alloclen = mtu; else alloclen = datalen + fragheaderlen; @@ -875,15 +870,25 @@ alloc_new_skb: data += fragheaderlen; skb->h.raw = data + exthdrlen; - copy = datalen - transhdrlen; - if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) { + if (fraggap) { + skb->csum = skb_copy_and_csum_bits( + skb_prev, maxfraglen, + data + transhdrlen, fraggap, 0); + skb_prev->csum = csum_sub(skb_prev->csum, + skb->csum); + data += fraggap; + skb_trim(skb_prev, maxfraglen); + } + + copy = datalen - transhdrlen - fraggap; + if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } offset += copy; - length -= datalen; + length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; @@ -970,7 +975,7 @@ error: ssize_t ip_append_page(struct sock *sk, struct page *page, int offset, size_t size, int flags) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; struct rtable *rt; struct ip_options *opt = NULL; @@ -978,7 +983,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, int mtu; int len; int err; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, fraggap; if (inet->hdrincl) return -EPERM; @@ -1000,7 +1005,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, mtu = inet->cork.fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); - maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen; + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + size > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); @@ -1014,13 +1019,25 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, while (size > 0) { int i; - if ((len = maxfraglen - skb->len) <= 0) { + + /* Check if the remaining data fits into current packet. */ + len = mtu - skb->len; + if (len < size) + len = maxfraglen - skb->len; + if (len <= 0) { + struct sk_buff *skb_prev; char *data; struct iphdr *iph; - BUG_TRAP(len == 0); + int alloclen; + + skb_prev = skb; + if (skb_prev) + fraggap = skb_prev->len - maxfraglen; + else + fraggap = 0; - skb = sock_wmalloc(sk, fragheaderlen + hh_len + 15, 1, - sk->sk_allocation); + alloclen = fragheaderlen + hh_len + fraggap + 15; + skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); if (unlikely(!skb)) { err = -ENOBUFS; goto error; @@ -1036,11 +1053,20 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, /* * Find where to start putting bytes. */ - data = skb_put(skb, fragheaderlen); + data = skb_put(skb, fragheaderlen + fraggap); skb->nh.iph = iph = (struct iphdr *)data; data += fragheaderlen; skb->h.raw = data; + if (fraggap) { + skb->csum = skb_copy_and_csum_bits( + skb_prev, maxfraglen, + data, fraggap, 0); + skb_prev->csum = csum_sub(skb_prev->csum, + skb->csum); + skb_trim(skb_prev, maxfraglen); + } + /* * Put the packet on the pending queue. */ @@ -1088,7 +1114,7 @@ int ip_push_pending_frames(struct sock *sk) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct ip_options *opt = NULL; struct rtable *rt = inet->cork.rt; struct iphdr *iph; @@ -1193,7 +1219,7 @@ error: */ void ip_flush_pending_frames(struct sock *sk) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) @@ -1236,7 +1262,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len) { - struct inet_opt *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct { struct ip_options opt; char data[40];