2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
58 static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**));
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 static u32 ipv6_fragmentation_id = 1;
63 static spinlock_t ip6_id_lock = SPIN_LOCK_UNLOCKED;
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
72 static inline int ip6_output_finish(struct sk_buff *skb)
75 struct dst_entry *dst = skb->dst;
76 struct hh_cache *hh = dst->hh;
81 read_lock_bh(&hh->hh_lock);
82 hh_alen = HH_DATA_ALIGN(hh->hh_len);
83 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
84 read_unlock_bh(&hh->hh_lock);
85 skb_push(skb, hh->hh_len);
86 return hh->hh_output(skb);
87 } else if (dst->neighbour)
88 return dst->neighbour->output(skb);
90 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
96 /* dev_loopback_xmit for use with netfilter. */
97 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
99 newskb->mac.raw = newskb->data;
100 __skb_pull(newskb, newskb->nh.raw - newskb->data);
101 newskb->pkt_type = PACKET_LOOPBACK;
102 newskb->ip_summed = CHECKSUM_UNNECESSARY;
103 BUG_TRAP(newskb->dst);
110 static int ip6_output2(struct sk_buff **pskb)
112 struct sk_buff *skb = *pskb;
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff **pskb)
150 struct sk_buff *skb = *pskb;
152 if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list))
153 return ip6_fragment(pskb, ip6_output2);
155 return ip6_output2(pskb);
158 #ifdef CONFIG_NETFILTER
159 int ip6_route_me_harder(struct sk_buff *skb)
161 struct ipv6hdr *iph = skb->nh.ipv6h;
162 struct dst_entry *dst;
164 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
167 { .daddr = iph->daddr,
168 .saddr = iph->saddr, } },
169 .proto = iph->nexthdr,
172 dst = ip6_route_output(skb->sk, &fl);
175 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
177 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
182 /* Drop old route. */
183 dst_release(skb->dst);
190 static inline int ip6_maybe_reroute(struct sk_buff *skb)
192 #ifdef CONFIG_NETFILTER
193 if (skb->nfcache & NFC_ALTERED){
194 if (ip6_route_me_harder(skb) != 0){
199 #endif /* CONFIG_NETFILTER */
200 return dst_output(skb);
204 * xmit an sk_buff (used by TCP)
207 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
208 struct ipv6_txoptions *opt, int ipfragok)
210 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
211 struct in6_addr *first_hop = &fl->fl6_dst;
212 struct dst_entry *dst = skb->dst;
214 u8 proto = fl->proto;
215 int seg_len = skb->len;
222 /* First: exthdrs may take lots of space (~8K for now)
223 MAX_HEADER is not enough.
225 head_room = opt->opt_nflen + opt->opt_flen;
226 seg_len += head_room;
227 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
229 if (skb_headroom(skb) < head_room) {
230 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
234 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
238 skb_set_owner_w(skb, sk);
241 ipv6_push_frag_opts(skb, opt, &proto);
243 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
246 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
249 * Fill in the IPv6 header
252 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
255 hlimit = np->hop_limit;
257 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
259 hdr->payload_len = htons(seg_len);
260 hdr->nexthdr = proto;
261 hdr->hop_limit = hlimit;
263 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
264 ipv6_addr_copy(&hdr->daddr, first_hop);
267 if ((skb->len <= mtu) || ipfragok) {
268 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
269 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
273 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
275 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
276 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
282 * To avoid extra problems ND packets are send through this
283 * routine. It's code duplication but I really want to avoid
284 * extra checks since ipv6_build_header is used by TCP (which
285 * is for us performance critical)
288 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
289 struct in6_addr *saddr, struct in6_addr *daddr,
292 struct ipv6_pinfo *np = inet6_sk(sk);
296 skb->protocol = htons(ETH_P_IPV6);
299 totlen = len + sizeof(struct ipv6hdr);
301 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
304 *(u32*)hdr = htonl(0x60000000);
306 hdr->payload_len = htons(len);
307 hdr->nexthdr = proto;
308 hdr->hop_limit = np->hop_limit;
310 ipv6_addr_copy(&hdr->saddr, saddr);
311 ipv6_addr_copy(&hdr->daddr, daddr);
316 int ip6_call_ra_chain(struct sk_buff *skb, int sel)
318 struct ip6_ra_chain *ra;
319 struct sock *last = NULL;
321 read_lock(&ip6_ra_lock);
322 for (ra = ip6_ra_chain; ra; ra = ra->next) {
323 struct sock *sk = ra->sk;
324 if (sk && ra->sel == sel) {
326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 rawv6_rcv(last, skb2);
335 rawv6_rcv(last, skb);
336 read_unlock(&ip6_ra_lock);
339 read_unlock(&ip6_ra_lock);
343 static inline int ip6_forward_finish(struct sk_buff *skb)
345 return dst_output(skb);
348 int ip6_forward(struct sk_buff *skb)
350 struct dst_entry *dst = skb->dst;
351 struct ipv6hdr *hdr = skb->nh.ipv6h;
352 struct inet6_skb_parm *opt = IP6CB(skb);
354 if (ipv6_devconf.forwarding == 0)
357 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
358 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
362 skb->ip_summed = CHECKSUM_NONE;
365 * We DO NOT make any processing on
366 * RA packets, pushing them to user level AS IS
367 * without ane WARRANTY that application will be able
368 * to interpret them. The reason is that we
369 * cannot make anything clever here.
371 * We are not end-node, so that if packet contains
372 * AH/ESP, we cannot make anything.
373 * Defragmentation also would be mistake, RA packets
374 * cannot be fragmented, because there is no warranty
375 * that different fragments will go along one path. --ANK
378 u8 *ptr = skb->nh.raw + opt->ra;
379 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
384 * check and decrement ttl
386 if (hdr->hop_limit <= 1) {
387 /* Force OUTPUT device used as source address */
389 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
396 if (!xfrm6_route_forward(skb)) {
397 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
401 /* IPv6 specs say nothing about it, but it is clear that we cannot
402 send redirects to source routed frames.
404 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
405 struct in6_addr *target = NULL;
407 struct neighbour *n = dst->neighbour;
410 * incoming and outgoing devices are the same
414 rt = (struct rt6_info *) dst;
415 if ((rt->rt6i_flags & RTF_GATEWAY))
416 target = (struct in6_addr*)&n->primary_key;
418 target = &hdr->daddr;
420 /* Limit redirects both by destination (here)
421 and by source (inside ndisc_send_redirect)
423 if (xrlim_allow(dst, 1*HZ))
424 ndisc_send_redirect(skb, n, target);
425 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
426 |IPV6_ADDR_LINKLOCAL)) {
427 /* This check is security critical. */
431 if (skb->len > dst_pmtu(dst)) {
432 /* Again, force OUTPUT device used as source address */
434 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev);
435 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
436 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
441 if (skb_cow(skb, dst->dev->hard_header_len)) {
442 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
448 /* Mangling hops number delayed to point after skb COW */
452 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
453 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
456 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
462 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
464 to->pkt_type = from->pkt_type;
465 to->priority = from->priority;
466 to->protocol = from->protocol;
467 to->security = from->security;
468 to->dst = dst_clone(from->dst);
471 #ifdef CONFIG_NET_SCHED
472 to->tc_index = from->tc_index;
474 #ifdef CONFIG_NETFILTER
475 to->nfmark = from->nfmark;
476 /* Connection association is same as pre-frag packet */
477 to->nfct = from->nfct;
478 nf_conntrack_get(to->nfct);
479 #ifdef CONFIG_BRIDGE_NETFILTER
480 nf_bridge_put(to->nf_bridge);
481 to->nf_bridge = from->nf_bridge;
482 nf_bridge_get(to->nf_bridge);
484 #ifdef CONFIG_NETFILTER_DEBUG
485 to->nf_debug = from->nf_debug;
490 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
492 u16 offset = sizeof(struct ipv6hdr);
493 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
494 unsigned int packet_len = skb->tail - skb->nh.raw;
496 *nexthdr = &skb->nh.ipv6h->nexthdr;
498 while (offset + 1 <= packet_len) {
503 case NEXTHDR_ROUTING:
505 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
506 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
507 offset += ipv6_optlen(exthdr);
508 *nexthdr = &exthdr->nexthdr;
509 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
519 static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**))
521 struct net_device *dev;
522 struct sk_buff *frag, *skb = *pskb;
523 struct rt6_info *rt = (struct rt6_info*)skb->dst;
524 struct ipv6hdr *tmp_hdr;
526 unsigned int mtu, hlen, left, len;
528 int ptr, offset = 0, err=0;
529 u8 *prevhdr, nexthdr = 0;
532 hlen = ip6_find_1stfragopt(skb, &prevhdr);
535 mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
537 if (skb_shinfo(skb)->frag_list) {
538 int first_len = skb_pagelen(skb);
540 if (first_len - hlen > mtu ||
541 ((first_len - hlen) & 7) ||
545 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
546 /* Correct geometry. */
547 if (frag->len > mtu ||
548 ((frag->len & 7) && frag->next) ||
549 skb_headroom(frag) < hlen)
552 /* Correct socket ownership. */
553 if (frag->sk == NULL)
556 /* Partially cloned skb? */
557 if (skb_shared(frag))
563 frag = skb_shinfo(skb)->frag_list;
564 skb_shinfo(skb)->frag_list = NULL;
567 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
569 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
573 *prevhdr = NEXTHDR_FRAGMENT;
574 memcpy(tmp_hdr, skb->nh.raw, hlen);
575 __skb_pull(skb, hlen);
576 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
577 skb->nh.raw = __skb_push(skb, hlen);
578 memcpy(skb->nh.raw, tmp_hdr, hlen);
580 ipv6_select_ident(skb, fh);
581 fh->nexthdr = nexthdr;
583 fh->frag_off = htons(IP6_MF);
584 frag_id = fh->identification;
586 first_len = skb_pagelen(skb);
587 skb->data_len = first_len - skb_headlen(skb);
588 skb->len = first_len;
589 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
593 /* Prepare header of the next frame,
594 * before previous one went down. */
596 frag->h.raw = frag->data;
597 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
598 frag->nh.raw = __skb_push(frag, hlen);
599 memcpy(frag->nh.raw, tmp_hdr, hlen);
600 offset += skb->len - hlen - sizeof(struct frag_hdr);
601 fh->nexthdr = nexthdr;
603 fh->frag_off = htons(offset);
604 if (frag->next != NULL)
605 fh->frag_off |= htons(IP6_MF);
606 fh->identification = frag_id;
607 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
608 ip6_copy_metadata(frag, skb);
624 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
634 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
639 left = skb->len - hlen; /* Space per frame */
640 ptr = hlen; /* Where to start from */
643 * Fragment the datagram.
646 *prevhdr = NEXTHDR_FRAGMENT;
649 * Keep copying data until we run out.
653 /* IF: it doesn't fit, use 'mtu' - the data space left */
656 /* IF: we are not sending upto and including the packet end
657 then align the next start on an eight byte boundary */
665 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
666 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
667 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
673 * Set up data on packet
676 ip6_copy_metadata(frag, skb);
677 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
678 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
679 frag->nh.raw = frag->data;
680 fh = (struct frag_hdr*)(frag->data + hlen);
681 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
684 * Charge the memory for the fragment to any owner
688 skb_set_owner_w(frag, skb->sk);
691 * Copy the packet header into the new buffer.
693 memcpy(frag->nh.raw, skb->data, hlen);
696 * Build fragment header.
698 fh->nexthdr = nexthdr;
701 ipv6_select_ident(skb, fh);
702 frag_id = fh->identification;
704 fh->identification = frag_id;
707 * Copy a block of the IP datagram.
709 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
713 fh->frag_off = htons(offset);
715 fh->frag_off |= htons(IP6_MF);
716 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
722 * Put this fragment into the sending queue.
725 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
732 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
737 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
741 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
747 struct ipv6_pinfo *np = inet6_sk(sk);
749 *dst = __sk_dst_check(sk, np->dst_cookie);
751 struct rt6_info *rt = (struct rt6_info*)*dst;
753 /* Yes, checking route validity in not connected
754 case is not very simple. Take into account,
755 that we do not support routing by source, TOS,
756 and MSG_DONTROUTE --ANK (980726)
758 1. If route was host route, check that
759 cached destination is current.
760 If it is network route, we still may
761 check its validity using saved pointer
762 to the last used address: daddr_cache.
763 We do not want to save whole address now,
764 (because main consumer of this service
765 is tcp, which has not this problem),
766 so that the last trick works only on connected
768 2. oif also should be the same.
771 if (((rt->rt6i_dst.plen != 128 ||
772 ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr))
773 && (np->daddr_cache == NULL ||
774 ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache)))
775 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
783 *dst = ip6_route_output(sk, fl);
785 if ((err = (*dst)->error))
786 goto out_err_release;
788 if (ipv6_addr_any(&fl->fl6_src)) {
789 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
793 printk(KERN_DEBUG "ip6_dst_lookup: "
794 "no available source address\n");
796 goto out_err_release;
799 if ((err = xfrm_lookup(dst, fl, sk, 0)) < 0) {
801 goto out_err_release;
812 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
813 void *from, int length, int transhdrlen,
814 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
817 struct inet_opt *inet = inet_sk(sk);
818 struct ipv6_pinfo *np = inet6_sk(sk);
820 unsigned int maxfraglen, fragheaderlen;
827 int csummode = CHECKSUM_NONE;
831 if (skb_queue_empty(&sk->sk_write_queue)) {
836 if (np->cork.opt == NULL) {
837 np->cork.opt = kmalloc(opt->tot_len,
839 if (unlikely(np->cork.opt == NULL))
841 } else if (np->cork.opt->tot_len < opt->tot_len) {
842 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
845 memcpy(np->cork.opt, opt, opt->tot_len);
846 inet->cork.flags |= IPCORK_OPT;
847 /* need source address above miyazawa*/
849 dst_hold(&rt->u.dst);
852 np->cork.hop_limit = hlimit;
853 inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
854 inet->cork.length = 0;
855 sk->sk_sndmsg_page = NULL;
856 sk->sk_sndmsg_off = 0;
857 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
859 transhdrlen += exthdrlen;
863 if (inet->cork.flags & IPCORK_OPT)
867 mtu = inet->cork.fragsize;
870 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
872 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
873 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
875 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
876 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
877 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
882 inet->cork.length += length;
884 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
888 if ((copy = maxfraglen - skb->len) <= 0) {
890 unsigned int datalen;
891 unsigned int fraglen;
892 unsigned int alloclen;
895 datalen = maxfraglen - fragheaderlen;
896 if (datalen > length)
898 fraglen = datalen + fragheaderlen;
899 if ((flags & MSG_MORE) &&
900 !(rt->u.dst.dev->features&NETIF_F_SG))
901 alloclen = maxfraglen;
904 alloclen += sizeof(struct frag_hdr);
906 skb = sock_alloc_send_skb(sk,
908 (flags & MSG_DONTWAIT), &err);
911 if (atomic_read(&sk->sk_wmem_alloc) <=
913 skb = sock_wmalloc(sk,
914 alloclen + hh_len, 1,
916 if (unlikely(skb == NULL))
922 * Fill in the control structures
924 skb->ip_summed = csummode;
926 /* reserve 8 byte for fragmentation */
927 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
930 * Find where to start putting bytes
932 data = skb_put(skb, fraglen);
933 skb->nh.raw = data + exthdrlen;
934 data += fragheaderlen;
935 skb->h.raw = data + exthdrlen;
936 copy = datalen - transhdrlen;
937 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
947 csummode = CHECKSUM_NONE;
950 * Put the packet on the pending queue
952 __skb_queue_tail(&sk->sk_write_queue, skb);
959 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
963 if (getfrag(from, skb_put(skb, copy),
964 offset, copy, off, skb) < 0) {
965 __skb_trim(skb, off);
970 int i = skb_shinfo(skb)->nr_frags;
971 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
972 struct page *page = sk->sk_sndmsg_page;
973 int off = sk->sk_sndmsg_off;
976 if (page && (left = PAGE_SIZE - off) > 0) {
979 if (page != frag->page) {
980 if (i == MAX_SKB_FRAGS) {
985 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
986 frag = &skb_shinfo(skb)->frags[i];
988 } else if(i < MAX_SKB_FRAGS) {
989 if (copy > PAGE_SIZE)
991 page = alloc_pages(sk->sk_allocation, 0);
996 sk->sk_sndmsg_page = page;
997 sk->sk_sndmsg_off = 0;
999 skb_fill_page_desc(skb, i, page, 0, 0);
1000 frag = &skb_shinfo(skb)->frags[i];
1001 skb->truesize += PAGE_SIZE;
1002 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1007 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1011 sk->sk_sndmsg_off += copy;
1014 skb->data_len += copy;
1021 inet->cork.length -= length;
1022 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1026 int ip6_push_pending_frames(struct sock *sk)
1028 struct sk_buff *skb, *tmp_skb;
1029 struct sk_buff **tail_skb;
1030 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1031 struct inet_opt *inet = inet_sk(sk);
1032 struct ipv6_pinfo *np = inet6_sk(sk);
1033 struct ipv6hdr *hdr;
1034 struct ipv6_txoptions *opt = np->cork.opt;
1035 struct rt6_info *rt = np->cork.rt;
1036 struct flowi *fl = &inet->cork.fl;
1037 unsigned char proto = fl->proto;
1040 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1042 tail_skb = &(skb_shinfo(skb)->frag_list);
1044 /* move skb->data to ip header from ext header */
1045 if (skb->data < skb->nh.raw)
1046 __skb_pull(skb, skb->nh.raw - skb->data);
1047 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1048 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1049 *tail_skb = tmp_skb;
1050 tail_skb = &(tmp_skb->next);
1051 skb->len += tmp_skb->len;
1052 skb->data_len += tmp_skb->len;
1053 #if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
1054 skb->truesize += tmp_skb->truesize;
1055 __sock_put(tmp_skb->sk);
1056 tmp_skb->destructor = NULL;
1061 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1062 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1063 if (opt && opt->opt_flen)
1064 ipv6_push_frag_opts(skb, opt, &proto);
1065 if (opt && opt->opt_nflen)
1066 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1068 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1070 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1072 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1073 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1075 hdr->payload_len = 0;
1076 hdr->hop_limit = np->cork.hop_limit;
1077 hdr->nexthdr = proto;
1078 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1079 ipv6_addr_copy(&hdr->daddr, final_dst);
1081 skb->dst = dst_clone(&rt->u.dst);
1082 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1083 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1086 err = inet->recverr ? net_xmit_errno(err) : 0;
1092 inet->cork.flags &= ~IPCORK_OPT;
1094 kfree(np->cork.opt);
1095 np->cork.opt = NULL;
1098 dst_release(&np->cork.rt->u.dst);
1101 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1107 void ip6_flush_pending_frames(struct sock *sk)
1109 struct inet_opt *inet = inet_sk(sk);
1110 struct ipv6_pinfo *np = inet6_sk(sk);
1111 struct sk_buff *skb;
1113 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1114 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1118 inet->cork.flags &= ~IPCORK_OPT;
1121 kfree(np->cork.opt);
1122 np->cork.opt = NULL;
1125 dst_release(&np->cork.rt->u.dst);
1128 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));