2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static spinlock_t ip6_id_lock = SPIN_LOCK_UNLOCKED;
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
111 static int ip6_output2(struct sk_buff **pskb)
113 struct sk_buff *skb = *pskb;
114 struct dst_entry *dst = skb->dst;
115 struct net_device *dev = dst->dev;
117 skb->protocol = htons(ETH_P_IPV6);
120 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
121 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
123 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
124 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
125 &skb->nh.ipv6h->saddr)) {
126 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
128 /* Do not check for IFF_ALLMULTI; multicast routing
129 is not supported in any case.
132 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
134 ip6_dev_loopback_xmit);
136 if (skb->nh.ipv6h->hop_limit == 0) {
137 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
143 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
146 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
149 int ip6_output(struct sk_buff **pskb)
151 struct sk_buff *skb = *pskb;
153 if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list))
154 return ip6_fragment(pskb, ip6_output2);
156 return ip6_output2(pskb);
159 #ifdef CONFIG_NETFILTER
160 int ip6_route_me_harder(struct sk_buff *skb)
162 struct ipv6hdr *iph = skb->nh.ipv6h;
163 struct dst_entry *dst;
165 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
168 { .daddr = iph->daddr,
169 .saddr = iph->saddr, } },
170 .proto = iph->nexthdr,
173 dst = ip6_route_output(skb->sk, &fl);
176 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
178 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
183 /* Drop old route. */
184 dst_release(skb->dst);
191 static inline int ip6_maybe_reroute(struct sk_buff *skb)
193 #ifdef CONFIG_NETFILTER
194 if (skb->nfcache & NFC_ALTERED){
195 if (ip6_route_me_harder(skb) != 0){
200 #endif /* CONFIG_NETFILTER */
201 return dst_output(skb);
205 * xmit an sk_buff (used by TCP)
208 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
209 struct ipv6_txoptions *opt, int ipfragok)
211 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
212 struct in6_addr *first_hop = &fl->fl6_dst;
213 struct dst_entry *dst = skb->dst;
215 u8 proto = fl->proto;
216 int seg_len = skb->len;
223 /* First: exthdrs may take lots of space (~8K for now)
224 MAX_HEADER is not enough.
226 head_room = opt->opt_nflen + opt->opt_flen;
227 seg_len += head_room;
228 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
230 if (skb_headroom(skb) < head_room) {
231 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
235 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
239 skb_set_owner_w(skb, sk);
242 ipv6_push_frag_opts(skb, opt, &proto);
244 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
247 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
250 * Fill in the IPv6 header
253 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
256 hlimit = np->hop_limit;
258 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
260 hdr->payload_len = htons(seg_len);
261 hdr->nexthdr = proto;
262 hdr->hop_limit = hlimit;
264 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
265 ipv6_addr_copy(&hdr->daddr, first_hop);
268 if ((skb->len <= mtu) || ipfragok) {
269 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
270 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
274 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
276 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
277 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
283 * To avoid extra problems ND packets are send through this
284 * routine. It's code duplication but I really want to avoid
285 * extra checks since ipv6_build_header is used by TCP (which
286 * is for us performance critical)
289 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
290 struct in6_addr *saddr, struct in6_addr *daddr,
293 struct ipv6_pinfo *np = inet6_sk(sk);
297 skb->protocol = htons(ETH_P_IPV6);
300 totlen = len + sizeof(struct ipv6hdr);
302 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
305 *(u32*)hdr = htonl(0x60000000);
307 hdr->payload_len = htons(len);
308 hdr->nexthdr = proto;
309 hdr->hop_limit = np->hop_limit;
311 ipv6_addr_copy(&hdr->saddr, saddr);
312 ipv6_addr_copy(&hdr->daddr, daddr);
317 int ip6_call_ra_chain(struct sk_buff *skb, int sel)
319 struct ip6_ra_chain *ra;
320 struct sock *last = NULL;
322 read_lock(&ip6_ra_lock);
323 for (ra = ip6_ra_chain; ra; ra = ra->next) {
324 struct sock *sk = ra->sk;
325 if (sk && ra->sel == sel) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
329 rawv6_rcv(last, skb2);
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
340 read_unlock(&ip6_ra_lock);
344 static inline int ip6_forward_finish(struct sk_buff *skb)
346 return dst_output(skb);
349 int ip6_forward(struct sk_buff *skb)
351 struct dst_entry *dst = skb->dst;
352 struct ipv6hdr *hdr = skb->nh.ipv6h;
353 struct inet6_skb_parm *opt = IP6CB(skb);
355 if (ipv6_devconf.forwarding == 0)
358 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
359 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
363 skb->ip_summed = CHECKSUM_NONE;
366 * We DO NOT make any processing on
367 * RA packets, pushing them to user level AS IS
368 * without ane WARRANTY that application will be able
369 * to interpret them. The reason is that we
370 * cannot make anything clever here.
372 * We are not end-node, so that if packet contains
373 * AH/ESP, we cannot make anything.
374 * Defragmentation also would be mistake, RA packets
375 * cannot be fragmented, because there is no warranty
376 * that different fragments will go along one path. --ANK
379 u8 *ptr = skb->nh.raw + opt->ra;
380 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
385 * check and decrement ttl
387 if (hdr->hop_limit <= 1) {
388 /* Force OUTPUT device used as source address */
390 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
397 if (!xfrm6_route_forward(skb)) {
398 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
402 /* IPv6 specs say nothing about it, but it is clear that we cannot
403 send redirects to source routed frames.
405 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
406 struct in6_addr *target = NULL;
408 struct neighbour *n = dst->neighbour;
411 * incoming and outgoing devices are the same
415 rt = (struct rt6_info *) dst;
416 if ((rt->rt6i_flags & RTF_GATEWAY))
417 target = (struct in6_addr*)&n->primary_key;
419 target = &hdr->daddr;
421 /* Limit redirects both by destination (here)
422 and by source (inside ndisc_send_redirect)
424 if (xrlim_allow(dst, 1*HZ))
425 ndisc_send_redirect(skb, n, target);
426 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
427 |IPV6_ADDR_LINKLOCAL)) {
428 /* This check is security critical. */
432 if (skb->len > dst_pmtu(dst)) {
433 /* Again, force OUTPUT device used as source address */
435 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev);
436 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
437 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
442 if (skb_cow(skb, dst->dev->hard_header_len)) {
443 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
449 /* Mangling hops number delayed to point after skb COW */
453 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
454 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
457 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
463 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
465 to->pkt_type = from->pkt_type;
466 to->priority = from->priority;
467 to->protocol = from->protocol;
468 to->security = from->security;
469 to->dst = dst_clone(from->dst);
472 #ifdef CONFIG_NET_SCHED
473 to->tc_index = from->tc_index;
475 #ifdef CONFIG_NETFILTER
476 to->nfmark = from->nfmark;
477 /* Connection association is same as pre-frag packet */
478 to->nfct = from->nfct;
479 nf_conntrack_get(to->nfct);
480 to->nfctinfo = from->nfctinfo;
481 #ifdef CONFIG_BRIDGE_NETFILTER
482 nf_bridge_put(to->nf_bridge);
483 to->nf_bridge = from->nf_bridge;
484 nf_bridge_get(to->nf_bridge);
486 #ifdef CONFIG_NETFILTER_DEBUG
487 to->nf_debug = from->nf_debug;
492 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
494 u16 offset = sizeof(struct ipv6hdr);
495 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
496 unsigned int packet_len = skb->tail - skb->nh.raw;
498 *nexthdr = &skb->nh.ipv6h->nexthdr;
500 while (offset + 1 <= packet_len) {
505 case NEXTHDR_ROUTING:
507 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
508 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
509 offset += ipv6_optlen(exthdr);
510 *nexthdr = &exthdr->nexthdr;
511 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
521 static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**))
523 struct net_device *dev;
524 struct sk_buff *frag, *skb = *pskb;
525 struct rt6_info *rt = (struct rt6_info*)skb->dst;
526 struct ipv6hdr *tmp_hdr;
528 unsigned int mtu, hlen, left, len;
530 int ptr, offset = 0, err=0;
531 u8 *prevhdr, nexthdr = 0;
534 hlen = ip6_find_1stfragopt(skb, &prevhdr);
537 mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
539 if (skb_shinfo(skb)->frag_list) {
540 int first_len = skb_pagelen(skb);
542 if (first_len - hlen > mtu ||
543 ((first_len - hlen) & 7) ||
547 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
548 /* Correct geometry. */
549 if (frag->len > mtu ||
550 ((frag->len & 7) && frag->next) ||
551 skb_headroom(frag) < hlen)
554 /* Correct socket ownership. */
555 if (frag->sk == NULL)
558 /* Partially cloned skb? */
559 if (skb_shared(frag))
565 frag = skb_shinfo(skb)->frag_list;
566 skb_shinfo(skb)->frag_list = NULL;
569 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
571 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
575 *prevhdr = NEXTHDR_FRAGMENT;
576 memcpy(tmp_hdr, skb->nh.raw, hlen);
577 __skb_pull(skb, hlen);
578 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
579 skb->nh.raw = __skb_push(skb, hlen);
580 memcpy(skb->nh.raw, tmp_hdr, hlen);
582 ipv6_select_ident(skb, fh);
583 fh->nexthdr = nexthdr;
585 fh->frag_off = htons(IP6_MF);
586 frag_id = fh->identification;
588 first_len = skb_pagelen(skb);
589 skb->data_len = first_len - skb_headlen(skb);
590 skb->len = first_len;
591 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
595 /* Prepare header of the next frame,
596 * before previous one went down. */
598 frag->h.raw = frag->data;
599 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
600 frag->nh.raw = __skb_push(frag, hlen);
601 memcpy(frag->nh.raw, tmp_hdr, hlen);
602 offset += skb->len - hlen - sizeof(struct frag_hdr);
603 fh->nexthdr = nexthdr;
605 fh->frag_off = htons(offset);
606 if (frag->next != NULL)
607 fh->frag_off |= htons(IP6_MF);
608 fh->identification = frag_id;
609 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
610 ip6_copy_metadata(frag, skb);
626 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
636 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
641 left = skb->len - hlen; /* Space per frame */
642 ptr = hlen; /* Where to start from */
645 * Fragment the datagram.
648 *prevhdr = NEXTHDR_FRAGMENT;
651 * Keep copying data until we run out.
655 /* IF: it doesn't fit, use 'mtu' - the data space left */
658 /* IF: we are not sending upto and including the packet end
659 then align the next start on an eight byte boundary */
667 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
668 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
669 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
675 * Set up data on packet
678 ip6_copy_metadata(frag, skb);
679 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
680 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
681 frag->nh.raw = frag->data;
682 fh = (struct frag_hdr*)(frag->data + hlen);
683 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
686 * Charge the memory for the fragment to any owner
690 skb_set_owner_w(frag, skb->sk);
693 * Copy the packet header into the new buffer.
695 memcpy(frag->nh.raw, skb->data, hlen);
698 * Build fragment header.
700 fh->nexthdr = nexthdr;
703 ipv6_select_ident(skb, fh);
704 frag_id = fh->identification;
706 fh->identification = frag_id;
709 * Copy a block of the IP datagram.
711 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
715 fh->frag_off = htons(offset);
717 fh->frag_off |= htons(IP6_MF);
718 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
724 * Put this fragment into the sending queue.
727 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
734 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
739 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
743 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
749 struct ipv6_pinfo *np = inet6_sk(sk);
751 *dst = __sk_dst_check(sk, np->dst_cookie);
753 struct rt6_info *rt = (struct rt6_info*)*dst;
755 /* Yes, checking route validity in not connected
756 case is not very simple. Take into account,
757 that we do not support routing by source, TOS,
758 and MSG_DONTROUTE --ANK (980726)
760 1. If route was host route, check that
761 cached destination is current.
762 If it is network route, we still may
763 check its validity using saved pointer
764 to the last used address: daddr_cache.
765 We do not want to save whole address now,
766 (because main consumer of this service
767 is tcp, which has not this problem),
768 so that the last trick works only on connected
770 2. oif also should be the same.
773 if (((rt->rt6i_dst.plen != 128 ||
774 ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr))
775 && (np->daddr_cache == NULL ||
776 ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache)))
777 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
785 *dst = ip6_route_output(sk, fl);
787 if ((err = (*dst)->error))
788 goto out_err_release;
790 if (ipv6_addr_any(&fl->fl6_src)) {
791 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
795 printk(KERN_DEBUG "ip6_dst_lookup: "
796 "no available source address\n");
798 goto out_err_release;
810 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
811 void *from, int length, int transhdrlen,
812 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
815 struct inet_opt *inet = inet_sk(sk);
816 struct ipv6_pinfo *np = inet6_sk(sk);
818 unsigned int maxfraglen, fragheaderlen;
825 int csummode = CHECKSUM_NONE;
829 if (skb_queue_empty(&sk->sk_write_queue)) {
834 if (np->cork.opt == NULL) {
835 np->cork.opt = kmalloc(opt->tot_len,
837 if (unlikely(np->cork.opt == NULL))
839 } else if (np->cork.opt->tot_len < opt->tot_len) {
840 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
843 memcpy(np->cork.opt, opt, opt->tot_len);
844 inet->cork.flags |= IPCORK_OPT;
845 /* need source address above miyazawa*/
847 dst_hold(&rt->u.dst);
850 np->cork.hop_limit = hlimit;
851 inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
852 inet->cork.length = 0;
853 sk->sk_sndmsg_page = NULL;
854 sk->sk_sndmsg_off = 0;
855 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
857 transhdrlen += exthdrlen;
861 if (inet->cork.flags & IPCORK_OPT)
865 mtu = inet->cork.fragsize;
868 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
870 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
871 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
873 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
874 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
875 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
881 * Let's try using as much space as possible.
882 * Use MTU if total length of the message fits into the MTU.
883 * Otherwise, we need to reserve fragment header and
884 * fragment alignment (= 8-15 octects, in total).
886 * Note that we may need to "move" the data from the tail of
887 * of the buffer to the new fragment when we split
890 * FIXME: It may be fragmented into multiple chunks
891 * at once if non-fragmentable extension headers
896 inet->cork.length += length;
898 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
902 /* Check if the remaining data fits into current packet. */
903 copy = mtu - skb->len;
905 copy = maxfraglen - skb->len;
909 unsigned int datalen;
910 unsigned int fraglen;
911 unsigned int fraggap;
912 unsigned int alloclen;
913 struct sk_buff *skb_prev;
917 /* There's no room in the current skb */
919 fraggap = skb_prev->len - maxfraglen;
924 * If remaining data exceeds the mtu,
925 * we know we need more fragment(s).
927 datalen = length + fraggap;
928 if (datalen > mtu - fragheaderlen)
929 datalen = maxfraglen - fragheaderlen;
931 fraglen = datalen + fragheaderlen;
932 if ((flags & MSG_MORE) &&
933 !(rt->u.dst.dev->features&NETIF_F_SG))
936 alloclen = datalen + fragheaderlen;
939 * The last fragment gets additional space at tail.
940 * Note: we overallocate on fragments with MSG_MODE
941 * because we have no idea if we're the last one.
943 if (datalen == length + fraggap)
944 alloclen += rt->u.dst.trailer_len;
947 * We just reserve space for fragment header.
948 * Note: this may be overallocation if the message
949 * (without MSG_MORE) fits into the MTU.
951 alloclen += sizeof(struct frag_hdr);
954 skb = sock_alloc_send_skb(sk,
956 (flags & MSG_DONTWAIT), &err);
959 if (atomic_read(&sk->sk_wmem_alloc) <=
961 skb = sock_wmalloc(sk,
962 alloclen + hh_len, 1,
964 if (unlikely(skb == NULL))
970 * Fill in the control structures
972 skb->ip_summed = csummode;
974 /* reserve for fragmentation */
975 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
978 * Find where to start putting bytes
980 data = skb_put(skb, fraglen);
981 skb->nh.raw = data + exthdrlen;
982 data += fragheaderlen;
983 skb->h.raw = data + exthdrlen;
986 skb->csum = skb_copy_and_csum_bits(
987 skb_prev, maxfraglen,
988 data + transhdrlen, fraggap, 0);
989 skb_prev->csum = csum_sub(skb_prev->csum,
992 skb_trim(skb_prev, maxfraglen);
994 copy = datalen - transhdrlen - fraggap;
999 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1006 length -= datalen - fraggap;
1009 csummode = CHECKSUM_NONE;
1012 * Put the packet on the pending queue
1014 __skb_queue_tail(&sk->sk_write_queue, skb);
1021 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1025 if (getfrag(from, skb_put(skb, copy),
1026 offset, copy, off, skb) < 0) {
1027 __skb_trim(skb, off);
1032 int i = skb_shinfo(skb)->nr_frags;
1033 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1034 struct page *page = sk->sk_sndmsg_page;
1035 int off = sk->sk_sndmsg_off;
1038 if (page && (left = PAGE_SIZE - off) > 0) {
1041 if (page != frag->page) {
1042 if (i == MAX_SKB_FRAGS) {
1047 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1048 frag = &skb_shinfo(skb)->frags[i];
1050 } else if(i < MAX_SKB_FRAGS) {
1051 if (copy > PAGE_SIZE)
1053 page = alloc_pages(sk->sk_allocation, 0);
1058 sk->sk_sndmsg_page = page;
1059 sk->sk_sndmsg_off = 0;
1061 skb_fill_page_desc(skb, i, page, 0, 0);
1062 frag = &skb_shinfo(skb)->frags[i];
1063 skb->truesize += PAGE_SIZE;
1064 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1069 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1073 sk->sk_sndmsg_off += copy;
1076 skb->data_len += copy;
1083 inet->cork.length -= length;
1084 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1088 int ip6_push_pending_frames(struct sock *sk)
1090 struct sk_buff *skb, *tmp_skb;
1091 struct sk_buff **tail_skb;
1092 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1093 struct inet_opt *inet = inet_sk(sk);
1094 struct ipv6_pinfo *np = inet6_sk(sk);
1095 struct ipv6hdr *hdr;
1096 struct ipv6_txoptions *opt = np->cork.opt;
1097 struct rt6_info *rt = np->cork.rt;
1098 struct flowi *fl = &inet->cork.fl;
1099 unsigned char proto = fl->proto;
1102 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1104 tail_skb = &(skb_shinfo(skb)->frag_list);
1106 /* move skb->data to ip header from ext header */
1107 if (skb->data < skb->nh.raw)
1108 __skb_pull(skb, skb->nh.raw - skb->data);
1109 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1110 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1111 *tail_skb = tmp_skb;
1112 tail_skb = &(tmp_skb->next);
1113 skb->len += tmp_skb->len;
1114 skb->data_len += tmp_skb->len;
1115 #if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
1116 skb->truesize += tmp_skb->truesize;
1117 __sock_put(tmp_skb->sk);
1118 tmp_skb->destructor = NULL;
1123 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1124 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1125 if (opt && opt->opt_flen)
1126 ipv6_push_frag_opts(skb, opt, &proto);
1127 if (opt && opt->opt_nflen)
1128 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1130 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1132 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1134 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1135 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1137 hdr->payload_len = 0;
1138 hdr->hop_limit = np->cork.hop_limit;
1139 hdr->nexthdr = proto;
1140 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1141 ipv6_addr_copy(&hdr->daddr, final_dst);
1143 skb->dst = dst_clone(&rt->u.dst);
1144 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1145 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1148 err = inet->recverr ? net_xmit_errno(err) : 0;
1154 inet->cork.flags &= ~IPCORK_OPT;
1156 kfree(np->cork.opt);
1157 np->cork.opt = NULL;
1160 dst_release(&np->cork.rt->u.dst);
1163 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1169 void ip6_flush_pending_frames(struct sock *sk)
1171 struct inet_opt *inet = inet_sk(sk);
1172 struct ipv6_pinfo *np = inet6_sk(sk);
1173 struct sk_buff *skb;
1175 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1176 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1180 inet->cork.flags &= ~IPCORK_OPT;
1183 kfree(np->cork.opt);
1184 np->cork.opt = NULL;
1187 dst_release(&np->cork.rt->u.dst);
1190 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));