2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static spinlock_t ip6_id_lock = SPIN_LOCK_UNLOCKED;
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
111 static int ip6_output2(struct sk_buff *skb)
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff *skb)
150 if (skb->len > dst_pmtu(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
153 return ip6_output2(skb);
156 #ifdef CONFIG_NETFILTER
157 int ip6_route_me_harder(struct sk_buff *skb)
159 struct ipv6hdr *iph = skb->nh.ipv6h;
160 struct dst_entry *dst;
162 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
165 { .daddr = iph->daddr,
166 .saddr = iph->saddr, } },
167 .proto = iph->nexthdr,
170 dst = ip6_route_output(skb->sk, &fl);
173 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
175 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
180 /* Drop old route. */
181 dst_release(skb->dst);
188 static inline int ip6_maybe_reroute(struct sk_buff *skb)
190 #ifdef CONFIG_NETFILTER
191 if (skb->nfcache & NFC_ALTERED){
192 if (ip6_route_me_harder(skb) != 0){
197 #endif /* CONFIG_NETFILTER */
198 return dst_output(skb);
202 * xmit an sk_buff (used by TCP)
205 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
206 struct ipv6_txoptions *opt, int ipfragok)
208 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
209 struct in6_addr *first_hop = &fl->fl6_dst;
210 struct dst_entry *dst = skb->dst;
212 u8 proto = fl->proto;
213 int seg_len = skb->len;
220 /* First: exthdrs may take lots of space (~8K for now)
221 MAX_HEADER is not enough.
223 head_room = opt->opt_nflen + opt->opt_flen;
224 seg_len += head_room;
225 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
227 if (skb_headroom(skb) < head_room) {
228 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
232 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
236 skb_set_owner_w(skb, sk);
239 ipv6_push_frag_opts(skb, opt, &proto);
241 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
244 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
247 * Fill in the IPv6 header
250 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
253 hlimit = np->hop_limit;
255 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
257 hdr->payload_len = htons(seg_len);
258 hdr->nexthdr = proto;
259 hdr->hop_limit = hlimit;
261 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
262 ipv6_addr_copy(&hdr->daddr, first_hop);
265 if ((skb->len <= mtu) || ipfragok) {
266 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
267 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
271 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
273 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
274 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 struct in6_addr *saddr, struct in6_addr *daddr,
290 struct ipv6_pinfo *np = inet6_sk(sk);
294 skb->protocol = htons(ETH_P_IPV6);
297 totlen = len + sizeof(struct ipv6hdr);
299 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
302 *(u32*)hdr = htonl(0x60000000);
304 hdr->payload_len = htons(len);
305 hdr->nexthdr = proto;
306 hdr->hop_limit = np->hop_limit;
308 ipv6_addr_copy(&hdr->saddr, saddr);
309 ipv6_addr_copy(&hdr->daddr, daddr);
314 int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316 struct ip6_ra_chain *ra;
317 struct sock *last = NULL;
319 read_lock(&ip6_ra_lock);
320 for (ra = ip6_ra_chain; ra; ra = ra->next) {
321 struct sock *sk = ra->sk;
322 if (sk && ra->sel == sel) {
324 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
326 rawv6_rcv(last, skb2);
333 rawv6_rcv(last, skb);
334 read_unlock(&ip6_ra_lock);
337 read_unlock(&ip6_ra_lock);
341 static inline int ip6_forward_finish(struct sk_buff *skb)
343 return dst_output(skb);
346 int ip6_forward(struct sk_buff *skb)
348 struct dst_entry *dst = skb->dst;
349 struct ipv6hdr *hdr = skb->nh.ipv6h;
350 struct inet6_skb_parm *opt = IP6CB(skb);
352 if (ipv6_devconf.forwarding == 0)
355 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
356 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
360 skb->ip_summed = CHECKSUM_NONE;
363 * We DO NOT make any processing on
364 * RA packets, pushing them to user level AS IS
365 * without ane WARRANTY that application will be able
366 * to interpret them. The reason is that we
367 * cannot make anything clever here.
369 * We are not end-node, so that if packet contains
370 * AH/ESP, we cannot make anything.
371 * Defragmentation also would be mistake, RA packets
372 * cannot be fragmented, because there is no warranty
373 * that different fragments will go along one path. --ANK
376 u8 *ptr = skb->nh.raw + opt->ra;
377 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
382 * check and decrement ttl
384 if (hdr->hop_limit <= 1) {
385 /* Force OUTPUT device used as source address */
387 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
394 if (!xfrm6_route_forward(skb)) {
395 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
399 /* IPv6 specs say nothing about it, but it is clear that we cannot
400 send redirects to source routed frames.
402 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
403 struct in6_addr *target = NULL;
405 struct neighbour *n = dst->neighbour;
408 * incoming and outgoing devices are the same
412 rt = (struct rt6_info *) dst;
413 if ((rt->rt6i_flags & RTF_GATEWAY))
414 target = (struct in6_addr*)&n->primary_key;
416 target = &hdr->daddr;
418 /* Limit redirects both by destination (here)
419 and by source (inside ndisc_send_redirect)
421 if (xrlim_allow(dst, 1*HZ))
422 ndisc_send_redirect(skb, n, target);
423 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
424 |IPV6_ADDR_LINKLOCAL)) {
425 /* This check is security critical. */
429 if (skb->len > dst_pmtu(dst)) {
430 /* Again, force OUTPUT device used as source address */
432 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev);
433 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
434 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
439 if (skb_cow(skb, dst->dev->hard_header_len)) {
440 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
446 /* Mangling hops number delayed to point after skb COW */
450 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
451 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
454 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
460 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
462 to->pkt_type = from->pkt_type;
463 to->priority = from->priority;
464 to->protocol = from->protocol;
465 to->security = from->security;
466 to->dst = dst_clone(from->dst);
469 #ifdef CONFIG_NET_SCHED
470 to->tc_index = from->tc_index;
472 #ifdef CONFIG_NETFILTER
473 to->nfmark = from->nfmark;
474 /* Connection association is same as pre-frag packet */
475 to->nfct = from->nfct;
476 nf_conntrack_get(to->nfct);
477 to->nfctinfo = from->nfctinfo;
478 #ifdef CONFIG_BRIDGE_NETFILTER
479 nf_bridge_put(to->nf_bridge);
480 to->nf_bridge = from->nf_bridge;
481 nf_bridge_get(to->nf_bridge);
483 #ifdef CONFIG_NETFILTER_DEBUG
484 to->nf_debug = from->nf_debug;
489 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
491 u16 offset = sizeof(struct ipv6hdr);
492 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
493 unsigned int packet_len = skb->tail - skb->nh.raw;
495 *nexthdr = &skb->nh.ipv6h->nexthdr;
497 while (offset + 1 <= packet_len) {
502 case NEXTHDR_ROUTING:
504 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
505 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
506 offset += ipv6_optlen(exthdr);
507 *nexthdr = &exthdr->nexthdr;
508 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
518 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
520 struct net_device *dev;
521 struct sk_buff *frag;
522 struct rt6_info *rt = (struct rt6_info*)skb->dst;
523 struct ipv6hdr *tmp_hdr;
525 unsigned int mtu, hlen, left, len;
527 int ptr, offset = 0, err=0;
528 u8 *prevhdr, nexthdr = 0;
531 hlen = ip6_find_1stfragopt(skb, &prevhdr);
534 mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
536 if (skb_shinfo(skb)->frag_list) {
537 int first_len = skb_pagelen(skb);
539 if (first_len - hlen > mtu ||
540 ((first_len - hlen) & 7) ||
544 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
545 /* Correct geometry. */
546 if (frag->len > mtu ||
547 ((frag->len & 7) && frag->next) ||
548 skb_headroom(frag) < hlen)
551 /* Correct socket ownership. */
552 if (frag->sk == NULL)
555 /* Partially cloned skb? */
556 if (skb_shared(frag))
562 frag = skb_shinfo(skb)->frag_list;
563 skb_shinfo(skb)->frag_list = NULL;
566 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
568 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
572 *prevhdr = NEXTHDR_FRAGMENT;
573 memcpy(tmp_hdr, skb->nh.raw, hlen);
574 __skb_pull(skb, hlen);
575 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
576 skb->nh.raw = __skb_push(skb, hlen);
577 memcpy(skb->nh.raw, tmp_hdr, hlen);
579 ipv6_select_ident(skb, fh);
580 fh->nexthdr = nexthdr;
582 fh->frag_off = htons(IP6_MF);
583 frag_id = fh->identification;
585 first_len = skb_pagelen(skb);
586 skb->data_len = first_len - skb_headlen(skb);
587 skb->len = first_len;
588 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
592 /* Prepare header of the next frame,
593 * before previous one went down. */
595 frag->h.raw = frag->data;
596 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
597 frag->nh.raw = __skb_push(frag, hlen);
598 memcpy(frag->nh.raw, tmp_hdr, hlen);
599 offset += skb->len - hlen - sizeof(struct frag_hdr);
600 fh->nexthdr = nexthdr;
602 fh->frag_off = htons(offset);
603 if (frag->next != NULL)
604 fh->frag_off |= htons(IP6_MF);
605 fh->identification = frag_id;
606 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
607 ip6_copy_metadata(frag, skb);
623 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
633 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
638 left = skb->len - hlen; /* Space per frame */
639 ptr = hlen; /* Where to start from */
642 * Fragment the datagram.
645 *prevhdr = NEXTHDR_FRAGMENT;
648 * Keep copying data until we run out.
652 /* IF: it doesn't fit, use 'mtu' - the data space left */
655 /* IF: we are not sending upto and including the packet end
656 then align the next start on an eight byte boundary */
664 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
665 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
666 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
672 * Set up data on packet
675 ip6_copy_metadata(frag, skb);
676 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
677 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
678 frag->nh.raw = frag->data;
679 fh = (struct frag_hdr*)(frag->data + hlen);
680 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
683 * Charge the memory for the fragment to any owner
687 skb_set_owner_w(frag, skb->sk);
690 * Copy the packet header into the new buffer.
692 memcpy(frag->nh.raw, skb->data, hlen);
695 * Build fragment header.
697 fh->nexthdr = nexthdr;
700 ipv6_select_ident(skb, fh);
701 frag_id = fh->identification;
703 fh->identification = frag_id;
706 * Copy a block of the IP datagram.
708 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
712 fh->frag_off = htons(offset);
714 fh->frag_off |= htons(IP6_MF);
715 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
721 * Put this fragment into the sending queue.
724 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
731 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
736 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
740 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
746 struct ipv6_pinfo *np = inet6_sk(sk);
748 *dst = sk_dst_check(sk, np->dst_cookie);
750 struct rt6_info *rt = (struct rt6_info*)*dst;
752 /* Yes, checking route validity in not connected
753 case is not very simple. Take into account,
754 that we do not support routing by source, TOS,
755 and MSG_DONTROUTE --ANK (980726)
757 1. If route was host route, check that
758 cached destination is current.
759 If it is network route, we still may
760 check its validity using saved pointer
761 to the last used address: daddr_cache.
762 We do not want to save whole address now,
763 (because main consumer of this service
764 is tcp, which has not this problem),
765 so that the last trick works only on connected
767 2. oif also should be the same.
770 if (((rt->rt6i_dst.plen != 128 ||
771 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
772 && (np->daddr_cache == NULL ||
773 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
774 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
782 *dst = ip6_route_output(sk, fl);
784 if ((err = (*dst)->error))
785 goto out_err_release;
787 if (ipv6_addr_any(&fl->fl6_src)) {
788 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
792 printk(KERN_DEBUG "ip6_dst_lookup: "
793 "no available source address\n");
795 goto out_err_release;
807 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
808 void *from, int length, int transhdrlen,
809 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
812 struct inet_opt *inet = inet_sk(sk);
813 struct ipv6_pinfo *np = inet6_sk(sk);
815 unsigned int maxfraglen, fragheaderlen;
822 int csummode = CHECKSUM_NONE;
826 if (skb_queue_empty(&sk->sk_write_queue)) {
831 if (np->cork.opt == NULL) {
832 np->cork.opt = kmalloc(opt->tot_len,
834 if (unlikely(np->cork.opt == NULL))
836 } else if (np->cork.opt->tot_len < opt->tot_len) {
837 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
840 memcpy(np->cork.opt, opt, opt->tot_len);
841 inet->cork.flags |= IPCORK_OPT;
842 /* need source address above miyazawa*/
844 dst_hold(&rt->u.dst);
847 np->cork.hop_limit = hlimit;
848 inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
849 inet->cork.length = 0;
850 sk->sk_sndmsg_page = NULL;
851 sk->sk_sndmsg_off = 0;
852 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
854 transhdrlen += exthdrlen;
858 if (inet->cork.flags & IPCORK_OPT)
862 mtu = inet->cork.fragsize;
865 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
867 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
868 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
870 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
871 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
872 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
878 * Let's try using as much space as possible.
879 * Use MTU if total length of the message fits into the MTU.
880 * Otherwise, we need to reserve fragment header and
881 * fragment alignment (= 8-15 octects, in total).
883 * Note that we may need to "move" the data from the tail of
884 * of the buffer to the new fragment when we split
887 * FIXME: It may be fragmented into multiple chunks
888 * at once if non-fragmentable extension headers
893 inet->cork.length += length;
895 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
899 /* Check if the remaining data fits into current packet. */
900 copy = mtu - skb->len;
902 copy = maxfraglen - skb->len;
906 unsigned int datalen;
907 unsigned int fraglen;
908 unsigned int fraggap;
909 unsigned int alloclen;
910 struct sk_buff *skb_prev;
914 /* There's no room in the current skb */
916 fraggap = skb_prev->len - maxfraglen;
921 * If remaining data exceeds the mtu,
922 * we know we need more fragment(s).
924 datalen = length + fraggap;
925 if (datalen > mtu - fragheaderlen)
926 datalen = maxfraglen - fragheaderlen;
928 fraglen = datalen + fragheaderlen;
929 if ((flags & MSG_MORE) &&
930 !(rt->u.dst.dev->features&NETIF_F_SG))
933 alloclen = datalen + fragheaderlen;
936 * The last fragment gets additional space at tail.
937 * Note: we overallocate on fragments with MSG_MODE
938 * because we have no idea if we're the last one.
940 if (datalen == length + fraggap)
941 alloclen += rt->u.dst.trailer_len;
944 * We just reserve space for fragment header.
945 * Note: this may be overallocation if the message
946 * (without MSG_MORE) fits into the MTU.
948 alloclen += sizeof(struct frag_hdr);
951 skb = sock_alloc_send_skb(sk,
953 (flags & MSG_DONTWAIT), &err);
956 if (atomic_read(&sk->sk_wmem_alloc) <=
958 skb = sock_wmalloc(sk,
959 alloclen + hh_len, 1,
961 if (unlikely(skb == NULL))
967 * Fill in the control structures
969 skb->ip_summed = csummode;
971 /* reserve for fragmentation */
972 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
975 * Find where to start putting bytes
977 data = skb_put(skb, fraglen);
978 skb->nh.raw = data + exthdrlen;
979 data += fragheaderlen;
980 skb->h.raw = data + exthdrlen;
983 skb->csum = skb_copy_and_csum_bits(
984 skb_prev, maxfraglen,
985 data + transhdrlen, fraggap, 0);
986 skb_prev->csum = csum_sub(skb_prev->csum,
989 skb_trim(skb_prev, maxfraglen);
991 copy = datalen - transhdrlen - fraggap;
996 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1003 length -= datalen - fraggap;
1006 csummode = CHECKSUM_NONE;
1009 * Put the packet on the pending queue
1011 __skb_queue_tail(&sk->sk_write_queue, skb);
1018 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1022 if (getfrag(from, skb_put(skb, copy),
1023 offset, copy, off, skb) < 0) {
1024 __skb_trim(skb, off);
1029 int i = skb_shinfo(skb)->nr_frags;
1030 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1031 struct page *page = sk->sk_sndmsg_page;
1032 int off = sk->sk_sndmsg_off;
1035 if (page && (left = PAGE_SIZE - off) > 0) {
1038 if (page != frag->page) {
1039 if (i == MAX_SKB_FRAGS) {
1044 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1045 frag = &skb_shinfo(skb)->frags[i];
1047 } else if(i < MAX_SKB_FRAGS) {
1048 if (copy > PAGE_SIZE)
1050 page = alloc_pages(sk->sk_allocation, 0);
1055 sk->sk_sndmsg_page = page;
1056 sk->sk_sndmsg_off = 0;
1058 skb_fill_page_desc(skb, i, page, 0, 0);
1059 frag = &skb_shinfo(skb)->frags[i];
1060 skb->truesize += PAGE_SIZE;
1061 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1066 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1070 sk->sk_sndmsg_off += copy;
1073 skb->data_len += copy;
1080 inet->cork.length -= length;
1081 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1085 int ip6_push_pending_frames(struct sock *sk)
1087 struct sk_buff *skb, *tmp_skb;
1088 struct sk_buff **tail_skb;
1089 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1090 struct inet_opt *inet = inet_sk(sk);
1091 struct ipv6_pinfo *np = inet6_sk(sk);
1092 struct ipv6hdr *hdr;
1093 struct ipv6_txoptions *opt = np->cork.opt;
1094 struct rt6_info *rt = np->cork.rt;
1095 struct flowi *fl = &inet->cork.fl;
1096 unsigned char proto = fl->proto;
1099 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1101 tail_skb = &(skb_shinfo(skb)->frag_list);
1103 /* move skb->data to ip header from ext header */
1104 if (skb->data < skb->nh.raw)
1105 __skb_pull(skb, skb->nh.raw - skb->data);
1106 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1107 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1108 *tail_skb = tmp_skb;
1109 tail_skb = &(tmp_skb->next);
1110 skb->len += tmp_skb->len;
1111 skb->data_len += tmp_skb->len;
1112 #if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
1113 skb->truesize += tmp_skb->truesize;
1114 __sock_put(tmp_skb->sk);
1115 tmp_skb->destructor = NULL;
1120 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1121 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1122 if (opt && opt->opt_flen)
1123 ipv6_push_frag_opts(skb, opt, &proto);
1124 if (opt && opt->opt_nflen)
1125 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1127 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1129 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1131 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1132 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1134 hdr->payload_len = 0;
1135 hdr->hop_limit = np->cork.hop_limit;
1136 hdr->nexthdr = proto;
1137 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1138 ipv6_addr_copy(&hdr->daddr, final_dst);
1140 skb->dst = dst_clone(&rt->u.dst);
1141 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1142 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1145 err = inet->recverr ? net_xmit_errno(err) : 0;
1151 inet->cork.flags &= ~IPCORK_OPT;
1153 kfree(np->cork.opt);
1154 np->cork.opt = NULL;
1157 dst_release(&np->cork.rt->u.dst);
1160 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1166 void ip6_flush_pending_frames(struct sock *sk)
1168 struct inet_opt *inet = inet_sk(sk);
1169 struct ipv6_pinfo *np = inet6_sk(sk);
1170 struct sk_buff *skb;
1172 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1173 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1177 inet->cork.flags &= ~IPCORK_OPT;
1180 kfree(np->cork.opt);
1181 np->cork.opt = NULL;
1184 dst_release(&np->cork.rt->u.dst);
1187 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));