2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
111 static int ip6_output2(struct sk_buff *skb)
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff *skb)
150 if (skb->len > dst_pmtu(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
153 return ip6_output2(skb);
156 #ifdef CONFIG_NETFILTER
157 int ip6_route_me_harder(struct sk_buff *skb)
159 struct ipv6hdr *iph = skb->nh.ipv6h;
160 struct dst_entry *dst;
162 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
165 { .daddr = iph->daddr,
166 .saddr = iph->saddr, } },
167 .proto = iph->nexthdr,
170 dst = ip6_route_output(skb->sk, &fl);
173 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
175 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
180 /* Drop old route. */
181 dst_release(skb->dst);
188 static inline int ip6_maybe_reroute(struct sk_buff *skb)
190 #ifdef CONFIG_NETFILTER
191 if (skb->nfcache & NFC_ALTERED){
192 if (ip6_route_me_harder(skb) != 0){
197 #endif /* CONFIG_NETFILTER */
198 return dst_output(skb);
202 * xmit an sk_buff (used by TCP)
205 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
206 struct ipv6_txoptions *opt, int ipfragok)
208 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
209 struct in6_addr *first_hop = &fl->fl6_dst;
210 struct dst_entry *dst = skb->dst;
212 u8 proto = fl->proto;
213 int seg_len = skb->len;
220 /* First: exthdrs may take lots of space (~8K for now)
221 MAX_HEADER is not enough.
223 head_room = opt->opt_nflen + opt->opt_flen;
224 seg_len += head_room;
225 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
227 if (skb_headroom(skb) < head_room) {
228 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
232 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
236 skb_set_owner_w(skb, sk);
239 ipv6_push_frag_opts(skb, opt, &proto);
241 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
244 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
247 * Fill in the IPv6 header
250 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
253 hlimit = np->hop_limit;
255 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
257 hdr->payload_len = htons(seg_len);
258 hdr->nexthdr = proto;
259 hdr->hop_limit = hlimit;
261 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
262 ipv6_addr_copy(&hdr->daddr, first_hop);
265 if ((skb->len <= mtu) || ipfragok) {
266 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
267 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
271 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
273 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
274 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 struct in6_addr *saddr, struct in6_addr *daddr,
290 struct ipv6_pinfo *np = inet6_sk(sk);
294 skb->protocol = htons(ETH_P_IPV6);
297 totlen = len + sizeof(struct ipv6hdr);
299 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
302 *(u32*)hdr = htonl(0x60000000);
304 hdr->payload_len = htons(len);
305 hdr->nexthdr = proto;
306 hdr->hop_limit = np->hop_limit;
308 ipv6_addr_copy(&hdr->saddr, saddr);
309 ipv6_addr_copy(&hdr->daddr, daddr);
314 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316 struct ip6_ra_chain *ra;
317 struct sock *last = NULL;
319 read_lock(&ip6_ra_lock);
320 for (ra = ip6_ra_chain; ra; ra = ra->next) {
321 struct sock *sk = ra->sk;
322 if (sk && ra->sel == sel) {
324 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
326 rawv6_rcv(last, skb2);
333 rawv6_rcv(last, skb);
334 read_unlock(&ip6_ra_lock);
337 read_unlock(&ip6_ra_lock);
341 static inline int ip6_forward_finish(struct sk_buff *skb)
343 return dst_output(skb);
346 int ip6_forward(struct sk_buff *skb)
348 struct dst_entry *dst = skb->dst;
349 struct ipv6hdr *hdr = skb->nh.ipv6h;
350 struct inet6_skb_parm *opt = IP6CB(skb);
352 if (ipv6_devconf.forwarding == 0)
355 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
356 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
360 skb->ip_summed = CHECKSUM_NONE;
363 * We DO NOT make any processing on
364 * RA packets, pushing them to user level AS IS
365 * without ane WARRANTY that application will be able
366 * to interpret them. The reason is that we
367 * cannot make anything clever here.
369 * We are not end-node, so that if packet contains
370 * AH/ESP, we cannot make anything.
371 * Defragmentation also would be mistake, RA packets
372 * cannot be fragmented, because there is no warranty
373 * that different fragments will go along one path. --ANK
376 u8 *ptr = skb->nh.raw + opt->ra;
377 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
382 * check and decrement ttl
384 if (hdr->hop_limit <= 1) {
385 /* Force OUTPUT device used as source address */
387 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
394 if (!xfrm6_route_forward(skb)) {
395 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
399 /* IPv6 specs say nothing about it, but it is clear that we cannot
400 send redirects to source routed frames.
402 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
403 struct in6_addr *target = NULL;
405 struct neighbour *n = dst->neighbour;
408 * incoming and outgoing devices are the same
412 rt = (struct rt6_info *) dst;
413 if ((rt->rt6i_flags & RTF_GATEWAY))
414 target = (struct in6_addr*)&n->primary_key;
416 target = &hdr->daddr;
418 /* Limit redirects both by destination (here)
419 and by source (inside ndisc_send_redirect)
421 if (xrlim_allow(dst, 1*HZ))
422 ndisc_send_redirect(skb, n, target);
423 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
424 |IPV6_ADDR_LINKLOCAL)) {
425 /* This check is security critical. */
429 if (skb->len > dst_pmtu(dst)) {
430 /* Again, force OUTPUT device used as source address */
432 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev);
433 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
434 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
439 if (skb_cow(skb, dst->dev->hard_header_len)) {
440 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
446 /* Mangling hops number delayed to point after skb COW */
450 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
451 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
454 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
460 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
462 to->pkt_type = from->pkt_type;
463 to->priority = from->priority;
464 to->protocol = from->protocol;
465 to->security = from->security;
466 dst_release(to->dst);
467 to->dst = dst_clone(from->dst);
470 #ifdef CONFIG_NET_SCHED
471 to->tc_index = from->tc_index;
473 #ifdef CONFIG_NETFILTER
474 to->nfmark = from->nfmark;
475 /* Connection association is same as pre-frag packet */
476 to->nfct = from->nfct;
477 nf_conntrack_get(to->nfct);
478 to->nfctinfo = from->nfctinfo;
479 #ifdef CONFIG_BRIDGE_NETFILTER
480 nf_bridge_put(to->nf_bridge);
481 to->nf_bridge = from->nf_bridge;
482 nf_bridge_get(to->nf_bridge);
484 #ifdef CONFIG_NETFILTER_DEBUG
485 to->nf_debug = from->nf_debug;
490 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
492 u16 offset = sizeof(struct ipv6hdr);
493 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
494 unsigned int packet_len = skb->tail - skb->nh.raw;
496 *nexthdr = &skb->nh.ipv6h->nexthdr;
498 while (offset + 1 <= packet_len) {
503 case NEXTHDR_ROUTING:
505 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
506 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
507 offset += ipv6_optlen(exthdr);
508 *nexthdr = &exthdr->nexthdr;
509 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
519 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
521 struct net_device *dev;
522 struct sk_buff *frag;
523 struct rt6_info *rt = (struct rt6_info*)skb->dst;
524 struct ipv6hdr *tmp_hdr;
526 unsigned int mtu, hlen, left, len;
528 int ptr, offset = 0, err=0;
529 u8 *prevhdr, nexthdr = 0;
532 hlen = ip6_find_1stfragopt(skb, &prevhdr);
535 mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
537 if (skb_shinfo(skb)->frag_list) {
538 int first_len = skb_pagelen(skb);
540 if (first_len - hlen > mtu ||
541 ((first_len - hlen) & 7) ||
545 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
546 /* Correct geometry. */
547 if (frag->len > mtu ||
548 ((frag->len & 7) && frag->next) ||
549 skb_headroom(frag) < hlen)
552 /* Correct socket ownership. */
553 if (frag->sk == NULL)
556 /* Partially cloned skb? */
557 if (skb_shared(frag))
563 frag = skb_shinfo(skb)->frag_list;
564 skb_shinfo(skb)->frag_list = NULL;
567 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
569 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
573 *prevhdr = NEXTHDR_FRAGMENT;
574 memcpy(tmp_hdr, skb->nh.raw, hlen);
575 __skb_pull(skb, hlen);
576 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
577 skb->nh.raw = __skb_push(skb, hlen);
578 memcpy(skb->nh.raw, tmp_hdr, hlen);
580 ipv6_select_ident(skb, fh);
581 fh->nexthdr = nexthdr;
583 fh->frag_off = htons(IP6_MF);
584 frag_id = fh->identification;
586 first_len = skb_pagelen(skb);
587 skb->data_len = first_len - skb_headlen(skb);
588 skb->len = first_len;
589 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
593 /* Prepare header of the next frame,
594 * before previous one went down. */
596 frag->ip_summed = CHECKSUM_NONE;
597 frag->h.raw = frag->data;
598 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
599 frag->nh.raw = __skb_push(frag, hlen);
600 memcpy(frag->nh.raw, tmp_hdr, hlen);
601 offset += skb->len - hlen - sizeof(struct frag_hdr);
602 fh->nexthdr = nexthdr;
604 fh->frag_off = htons(offset);
605 if (frag->next != NULL)
606 fh->frag_off |= htons(IP6_MF);
607 fh->identification = frag_id;
608 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
609 ip6_copy_metadata(frag, skb);
625 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
635 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
640 left = skb->len - hlen; /* Space per frame */
641 ptr = hlen; /* Where to start from */
644 * Fragment the datagram.
647 *prevhdr = NEXTHDR_FRAGMENT;
650 * Keep copying data until we run out.
654 /* IF: it doesn't fit, use 'mtu' - the data space left */
657 /* IF: we are not sending upto and including the packet end
658 then align the next start on an eight byte boundary */
666 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
667 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
668 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
674 * Set up data on packet
677 ip6_copy_metadata(frag, skb);
678 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
679 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
680 frag->nh.raw = frag->data;
681 fh = (struct frag_hdr*)(frag->data + hlen);
682 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
685 * Charge the memory for the fragment to any owner
689 skb_set_owner_w(frag, skb->sk);
692 * Copy the packet header into the new buffer.
694 memcpy(frag->nh.raw, skb->data, hlen);
697 * Build fragment header.
699 fh->nexthdr = nexthdr;
702 ipv6_select_ident(skb, fh);
703 frag_id = fh->identification;
705 fh->identification = frag_id;
708 * Copy a block of the IP datagram.
710 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
714 fh->frag_off = htons(offset);
716 fh->frag_off |= htons(IP6_MF);
717 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
723 * Put this fragment into the sending queue.
726 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
733 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
738 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
742 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
748 struct ipv6_pinfo *np = inet6_sk(sk);
750 *dst = sk_dst_check(sk, np->dst_cookie);
752 struct rt6_info *rt = (struct rt6_info*)*dst;
754 /* Yes, checking route validity in not connected
755 case is not very simple. Take into account,
756 that we do not support routing by source, TOS,
757 and MSG_DONTROUTE --ANK (980726)
759 1. If route was host route, check that
760 cached destination is current.
761 If it is network route, we still may
762 check its validity using saved pointer
763 to the last used address: daddr_cache.
764 We do not want to save whole address now,
765 (because main consumer of this service
766 is tcp, which has not this problem),
767 so that the last trick works only on connected
769 2. oif also should be the same.
772 if (((rt->rt6i_dst.plen != 128 ||
773 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
774 && (np->daddr_cache == NULL ||
775 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
776 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
784 *dst = ip6_route_output(sk, fl);
786 if ((err = (*dst)->error))
787 goto out_err_release;
789 if (ipv6_addr_any(&fl->fl6_src)) {
790 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
794 printk(KERN_DEBUG "ip6_dst_lookup: "
795 "no available source address\n");
797 goto out_err_release;
809 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
810 void *from, int length, int transhdrlen,
811 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
814 struct inet_sock *inet = inet_sk(sk);
815 struct ipv6_pinfo *np = inet6_sk(sk);
817 unsigned int maxfraglen, fragheaderlen;
824 int csummode = CHECKSUM_NONE;
828 if (skb_queue_empty(&sk->sk_write_queue)) {
833 if (np->cork.opt == NULL) {
834 np->cork.opt = kmalloc(opt->tot_len,
836 if (unlikely(np->cork.opt == NULL))
838 } else if (np->cork.opt->tot_len < opt->tot_len) {
839 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
842 memcpy(np->cork.opt, opt, opt->tot_len);
843 inet->cork.flags |= IPCORK_OPT;
844 /* need source address above miyazawa*/
846 dst_hold(&rt->u.dst);
849 np->cork.hop_limit = hlimit;
850 inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
851 inet->cork.length = 0;
852 sk->sk_sndmsg_page = NULL;
853 sk->sk_sndmsg_off = 0;
854 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
856 transhdrlen += exthdrlen;
860 if (inet->cork.flags & IPCORK_OPT)
864 mtu = inet->cork.fragsize;
867 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
869 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
870 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
872 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
873 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
874 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
880 * Let's try using as much space as possible.
881 * Use MTU if total length of the message fits into the MTU.
882 * Otherwise, we need to reserve fragment header and
883 * fragment alignment (= 8-15 octects, in total).
885 * Note that we may need to "move" the data from the tail of
886 * of the buffer to the new fragment when we split
889 * FIXME: It may be fragmented into multiple chunks
890 * at once if non-fragmentable extension headers
895 inet->cork.length += length;
897 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
901 /* Check if the remaining data fits into current packet. */
902 copy = mtu - skb->len;
904 copy = maxfraglen - skb->len;
908 unsigned int datalen;
909 unsigned int fraglen;
910 unsigned int fraggap;
911 unsigned int alloclen;
912 struct sk_buff *skb_prev;
916 /* There's no room in the current skb */
918 fraggap = skb_prev->len - maxfraglen;
923 * If remaining data exceeds the mtu,
924 * we know we need more fragment(s).
926 datalen = length + fraggap;
927 if (datalen > mtu - fragheaderlen)
928 datalen = maxfraglen - fragheaderlen;
930 fraglen = datalen + fragheaderlen;
931 if ((flags & MSG_MORE) &&
932 !(rt->u.dst.dev->features&NETIF_F_SG))
935 alloclen = datalen + fragheaderlen;
938 * The last fragment gets additional space at tail.
939 * Note: we overallocate on fragments with MSG_MODE
940 * because we have no idea if we're the last one.
942 if (datalen == length + fraggap)
943 alloclen += rt->u.dst.trailer_len;
946 * We just reserve space for fragment header.
947 * Note: this may be overallocation if the message
948 * (without MSG_MORE) fits into the MTU.
950 alloclen += sizeof(struct frag_hdr);
953 skb = sock_alloc_send_skb(sk,
955 (flags & MSG_DONTWAIT), &err);
958 if (atomic_read(&sk->sk_wmem_alloc) <=
960 skb = sock_wmalloc(sk,
961 alloclen + hh_len, 1,
963 if (unlikely(skb == NULL))
969 * Fill in the control structures
971 skb->ip_summed = csummode;
973 /* reserve for fragmentation */
974 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
977 * Find where to start putting bytes
979 data = skb_put(skb, fraglen);
980 skb->nh.raw = data + exthdrlen;
981 data += fragheaderlen;
982 skb->h.raw = data + exthdrlen;
985 skb->csum = skb_copy_and_csum_bits(
986 skb_prev, maxfraglen,
987 data + transhdrlen, fraggap, 0);
988 skb_prev->csum = csum_sub(skb_prev->csum,
991 skb_trim(skb_prev, maxfraglen);
993 copy = datalen - transhdrlen - fraggap;
998 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1005 length -= datalen - fraggap;
1008 csummode = CHECKSUM_NONE;
1011 * Put the packet on the pending queue
1013 __skb_queue_tail(&sk->sk_write_queue, skb);
1020 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1024 if (getfrag(from, skb_put(skb, copy),
1025 offset, copy, off, skb) < 0) {
1026 __skb_trim(skb, off);
1031 int i = skb_shinfo(skb)->nr_frags;
1032 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1033 struct page *page = sk->sk_sndmsg_page;
1034 int off = sk->sk_sndmsg_off;
1037 if (page && (left = PAGE_SIZE - off) > 0) {
1040 if (page != frag->page) {
1041 if (i == MAX_SKB_FRAGS) {
1046 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1047 frag = &skb_shinfo(skb)->frags[i];
1049 } else if(i < MAX_SKB_FRAGS) {
1050 if (copy > PAGE_SIZE)
1052 page = alloc_pages(sk->sk_allocation, 0);
1057 sk->sk_sndmsg_page = page;
1058 sk->sk_sndmsg_off = 0;
1060 skb_fill_page_desc(skb, i, page, 0, 0);
1061 frag = &skb_shinfo(skb)->frags[i];
1062 skb->truesize += PAGE_SIZE;
1063 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1068 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1072 sk->sk_sndmsg_off += copy;
1075 skb->data_len += copy;
1082 inet->cork.length -= length;
1083 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1087 int ip6_push_pending_frames(struct sock *sk)
1089 struct sk_buff *skb, *tmp_skb;
1090 struct sk_buff **tail_skb;
1091 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1092 struct inet_sock *inet = inet_sk(sk);
1093 struct ipv6_pinfo *np = inet6_sk(sk);
1094 struct ipv6hdr *hdr;
1095 struct ipv6_txoptions *opt = np->cork.opt;
1096 struct rt6_info *rt = np->cork.rt;
1097 struct flowi *fl = &inet->cork.fl;
1098 unsigned char proto = fl->proto;
1101 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1103 tail_skb = &(skb_shinfo(skb)->frag_list);
1105 /* move skb->data to ip header from ext header */
1106 if (skb->data < skb->nh.raw)
1107 __skb_pull(skb, skb->nh.raw - skb->data);
1108 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1109 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1110 *tail_skb = tmp_skb;
1111 tail_skb = &(tmp_skb->next);
1112 skb->len += tmp_skb->len;
1113 skb->data_len += tmp_skb->len;
1114 #if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
1115 skb->truesize += tmp_skb->truesize;
1116 __sock_put(tmp_skb->sk);
1117 tmp_skb->destructor = NULL;
1122 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1123 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1124 if (opt && opt->opt_flen)
1125 ipv6_push_frag_opts(skb, opt, &proto);
1126 if (opt && opt->opt_nflen)
1127 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1129 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1131 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1133 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1134 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1136 hdr->payload_len = 0;
1137 hdr->hop_limit = np->cork.hop_limit;
1138 hdr->nexthdr = proto;
1139 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1140 ipv6_addr_copy(&hdr->daddr, final_dst);
1142 skb->dst = dst_clone(&rt->u.dst);
1143 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1144 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1147 err = inet->recverr ? net_xmit_errno(err) : 0;
1153 inet->cork.flags &= ~IPCORK_OPT;
1155 kfree(np->cork.opt);
1156 np->cork.opt = NULL;
1159 dst_release(&np->cork.rt->u.dst);
1162 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1168 void ip6_flush_pending_frames(struct sock *sk)
1170 struct inet_sock *inet = inet_sk(sk);
1171 struct ipv6_pinfo *np = inet6_sk(sk);
1172 struct sk_buff *skb;
1174 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1175 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1179 inet->cork.flags &= ~IPCORK_OPT;
1182 kfree(np->cork.opt);
1183 np->cork.opt = NULL;
1186 dst_release(&np->cork.rt->u.dst);
1189 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));