2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static spinlock_t ip6_id_lock = SPIN_LOCK_UNLOCKED;
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
111 static int ip6_output2(struct sk_buff *skb)
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff *skb)
150 if (skb->len > dst_pmtu(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
153 return ip6_output2(skb);
156 #ifdef CONFIG_NETFILTER
157 int ip6_route_me_harder(struct sk_buff *skb)
159 struct ipv6hdr *iph = skb->nh.ipv6h;
160 struct dst_entry *dst;
162 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
165 { .daddr = iph->daddr,
166 .saddr = iph->saddr, } },
167 .proto = iph->nexthdr,
170 dst = ip6_route_output(skb->sk, &fl);
173 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
175 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
180 /* Drop old route. */
181 dst_release(skb->dst);
188 static inline int ip6_maybe_reroute(struct sk_buff *skb)
190 #ifdef CONFIG_NETFILTER
191 if (skb->nfcache & NFC_ALTERED){
192 if (ip6_route_me_harder(skb) != 0){
197 #endif /* CONFIG_NETFILTER */
198 return dst_output(skb);
202 * xmit an sk_buff (used by TCP)
205 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
206 struct ipv6_txoptions *opt, int ipfragok)
208 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
209 struct in6_addr *first_hop = &fl->fl6_dst;
210 struct dst_entry *dst = skb->dst;
212 u8 proto = fl->proto;
213 int seg_len = skb->len;
220 /* First: exthdrs may take lots of space (~8K for now)
221 MAX_HEADER is not enough.
223 head_room = opt->opt_nflen + opt->opt_flen;
224 seg_len += head_room;
225 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
227 if (skb_headroom(skb) < head_room) {
228 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
232 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
236 skb_set_owner_w(skb, sk);
239 ipv6_push_frag_opts(skb, opt, &proto);
241 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
244 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
247 * Fill in the IPv6 header
250 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
253 hlimit = np->hop_limit;
255 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
257 hdr->payload_len = htons(seg_len);
258 hdr->nexthdr = proto;
259 hdr->hop_limit = hlimit;
261 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
262 ipv6_addr_copy(&hdr->daddr, first_hop);
265 if ((skb->len <= mtu) || ipfragok) {
266 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
267 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
271 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
273 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
274 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 struct in6_addr *saddr, struct in6_addr *daddr,
290 struct ipv6_pinfo *np = inet6_sk(sk);
294 skb->protocol = htons(ETH_P_IPV6);
297 totlen = len + sizeof(struct ipv6hdr);
299 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
302 *(u32*)hdr = htonl(0x60000000);
304 hdr->payload_len = htons(len);
305 hdr->nexthdr = proto;
306 hdr->hop_limit = np->hop_limit;
308 ipv6_addr_copy(&hdr->saddr, saddr);
309 ipv6_addr_copy(&hdr->daddr, daddr);
314 int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316 struct ip6_ra_chain *ra;
317 struct sock *last = NULL;
319 read_lock(&ip6_ra_lock);
320 for (ra = ip6_ra_chain; ra; ra = ra->next) {
321 struct sock *sk = ra->sk;
322 if (sk && ra->sel == sel) {
324 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
326 rawv6_rcv(last, skb2);
333 rawv6_rcv(last, skb);
334 read_unlock(&ip6_ra_lock);
337 read_unlock(&ip6_ra_lock);
341 static inline int ip6_forward_finish(struct sk_buff *skb)
343 return dst_output(skb);
346 int ip6_forward(struct sk_buff *skb)
348 struct dst_entry *dst = skb->dst;
349 struct ipv6hdr *hdr = skb->nh.ipv6h;
350 struct inet6_skb_parm *opt = IP6CB(skb);
352 if (ipv6_devconf.forwarding == 0)
355 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
356 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
360 skb->ip_summed = CHECKSUM_NONE;
363 * We DO NOT make any processing on
364 * RA packets, pushing them to user level AS IS
365 * without ane WARRANTY that application will be able
366 * to interpret them. The reason is that we
367 * cannot make anything clever here.
369 * We are not end-node, so that if packet contains
370 * AH/ESP, we cannot make anything.
371 * Defragmentation also would be mistake, RA packets
372 * cannot be fragmented, because there is no warranty
373 * that different fragments will go along one path. --ANK
376 u8 *ptr = skb->nh.raw + opt->ra;
377 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
382 * check and decrement ttl
384 if (hdr->hop_limit <= 1) {
385 /* Force OUTPUT device used as source address */
387 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
394 if (!xfrm6_route_forward(skb)) {
395 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
399 /* IPv6 specs say nothing about it, but it is clear that we cannot
400 send redirects to source routed frames.
402 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
403 struct in6_addr *target = NULL;
405 struct neighbour *n = dst->neighbour;
408 * incoming and outgoing devices are the same
412 rt = (struct rt6_info *) dst;
413 if ((rt->rt6i_flags & RTF_GATEWAY))
414 target = (struct in6_addr*)&n->primary_key;
416 target = &hdr->daddr;
418 /* Limit redirects both by destination (here)
419 and by source (inside ndisc_send_redirect)
421 if (xrlim_allow(dst, 1*HZ))
422 ndisc_send_redirect(skb, n, target);
423 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
424 |IPV6_ADDR_LINKLOCAL)) {
425 /* This check is security critical. */
429 if (skb->len > dst_pmtu(dst)) {
430 /* Again, force OUTPUT device used as source address */
432 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev);
433 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
434 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
439 if (skb_cow(skb, dst->dev->hard_header_len)) {
440 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
446 /* Mangling hops number delayed to point after skb COW */
450 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
451 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
454 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
460 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
462 to->pkt_type = from->pkt_type;
463 to->priority = from->priority;
464 to->protocol = from->protocol;
465 to->security = from->security;
466 dst_release(to->dst);
467 to->dst = dst_clone(from->dst);
470 #ifdef CONFIG_NET_SCHED
471 to->tc_index = from->tc_index;
473 #ifdef CONFIG_NETFILTER
474 to->nfmark = from->nfmark;
475 /* Connection association is same as pre-frag packet */
476 to->nfct = from->nfct;
477 nf_conntrack_get(to->nfct);
478 to->nfctinfo = from->nfctinfo;
479 #ifdef CONFIG_BRIDGE_NETFILTER
480 nf_bridge_put(to->nf_bridge);
481 to->nf_bridge = from->nf_bridge;
482 nf_bridge_get(to->nf_bridge);
484 #ifdef CONFIG_NETFILTER_DEBUG
485 to->nf_debug = from->nf_debug;
490 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
492 u16 offset = sizeof(struct ipv6hdr);
493 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
494 unsigned int packet_len = skb->tail - skb->nh.raw;
496 *nexthdr = &skb->nh.ipv6h->nexthdr;
498 while (offset + 1 <= packet_len) {
503 case NEXTHDR_ROUTING:
505 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
506 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
507 offset += ipv6_optlen(exthdr);
508 *nexthdr = &exthdr->nexthdr;
509 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
519 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
521 struct net_device *dev;
522 struct sk_buff *frag;
523 struct rt6_info *rt = (struct rt6_info*)skb->dst;
524 struct ipv6hdr *tmp_hdr;
526 unsigned int mtu, hlen, left, len;
528 int ptr, offset = 0, err=0;
529 u8 *prevhdr, nexthdr = 0;
532 hlen = ip6_find_1stfragopt(skb, &prevhdr);
535 mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
537 if (skb_shinfo(skb)->frag_list) {
538 int first_len = skb_pagelen(skb);
540 if (first_len - hlen > mtu ||
541 ((first_len - hlen) & 7) ||
545 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
546 /* Correct geometry. */
547 if (frag->len > mtu ||
548 ((frag->len & 7) && frag->next) ||
549 skb_headroom(frag) < hlen)
552 /* Correct socket ownership. */
553 if (frag->sk == NULL)
556 /* Partially cloned skb? */
557 if (skb_shared(frag))
563 frag = skb_shinfo(skb)->frag_list;
564 skb_shinfo(skb)->frag_list = NULL;
567 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
569 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
573 *prevhdr = NEXTHDR_FRAGMENT;
574 memcpy(tmp_hdr, skb->nh.raw, hlen);
575 __skb_pull(skb, hlen);
576 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
577 skb->nh.raw = __skb_push(skb, hlen);
578 memcpy(skb->nh.raw, tmp_hdr, hlen);
580 ipv6_select_ident(skb, fh);
581 fh->nexthdr = nexthdr;
583 fh->frag_off = htons(IP6_MF);
584 frag_id = fh->identification;
586 first_len = skb_pagelen(skb);
587 skb->data_len = first_len - skb_headlen(skb);
588 skb->len = first_len;
589 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
593 /* Prepare header of the next frame,
594 * before previous one went down. */
596 frag->h.raw = frag->data;
597 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
598 frag->nh.raw = __skb_push(frag, hlen);
599 memcpy(frag->nh.raw, tmp_hdr, hlen);
600 offset += skb->len - hlen - sizeof(struct frag_hdr);
601 fh->nexthdr = nexthdr;
603 fh->frag_off = htons(offset);
604 if (frag->next != NULL)
605 fh->frag_off |= htons(IP6_MF);
606 fh->identification = frag_id;
607 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
608 ip6_copy_metadata(frag, skb);
624 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
634 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
639 left = skb->len - hlen; /* Space per frame */
640 ptr = hlen; /* Where to start from */
643 * Fragment the datagram.
646 *prevhdr = NEXTHDR_FRAGMENT;
649 * Keep copying data until we run out.
653 /* IF: it doesn't fit, use 'mtu' - the data space left */
656 /* IF: we are not sending upto and including the packet end
657 then align the next start on an eight byte boundary */
665 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
666 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
667 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
673 * Set up data on packet
676 ip6_copy_metadata(frag, skb);
677 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
678 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
679 frag->nh.raw = frag->data;
680 fh = (struct frag_hdr*)(frag->data + hlen);
681 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
684 * Charge the memory for the fragment to any owner
688 skb_set_owner_w(frag, skb->sk);
691 * Copy the packet header into the new buffer.
693 memcpy(frag->nh.raw, skb->data, hlen);
696 * Build fragment header.
698 fh->nexthdr = nexthdr;
701 ipv6_select_ident(skb, fh);
702 frag_id = fh->identification;
704 fh->identification = frag_id;
707 * Copy a block of the IP datagram.
709 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
713 fh->frag_off = htons(offset);
715 fh->frag_off |= htons(IP6_MF);
716 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
722 * Put this fragment into the sending queue.
725 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
732 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
737 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
741 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
747 struct ipv6_pinfo *np = inet6_sk(sk);
749 *dst = sk_dst_check(sk, np->dst_cookie);
751 struct rt6_info *rt = (struct rt6_info*)*dst;
753 /* Yes, checking route validity in not connected
754 case is not very simple. Take into account,
755 that we do not support routing by source, TOS,
756 and MSG_DONTROUTE --ANK (980726)
758 1. If route was host route, check that
759 cached destination is current.
760 If it is network route, we still may
761 check its validity using saved pointer
762 to the last used address: daddr_cache.
763 We do not want to save whole address now,
764 (because main consumer of this service
765 is tcp, which has not this problem),
766 so that the last trick works only on connected
768 2. oif also should be the same.
771 if (((rt->rt6i_dst.plen != 128 ||
772 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
773 && (np->daddr_cache == NULL ||
774 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
775 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
783 *dst = ip6_route_output(sk, fl);
785 if ((err = (*dst)->error))
786 goto out_err_release;
788 if (ipv6_addr_any(&fl->fl6_src)) {
789 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
793 printk(KERN_DEBUG "ip6_dst_lookup: "
794 "no available source address\n");
796 goto out_err_release;
808 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
809 void *from, int length, int transhdrlen,
810 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
813 struct inet_opt *inet = inet_sk(sk);
814 struct ipv6_pinfo *np = inet6_sk(sk);
816 unsigned int maxfraglen, fragheaderlen;
823 int csummode = CHECKSUM_NONE;
827 if (skb_queue_empty(&sk->sk_write_queue)) {
832 if (np->cork.opt == NULL) {
833 np->cork.opt = kmalloc(opt->tot_len,
835 if (unlikely(np->cork.opt == NULL))
837 } else if (np->cork.opt->tot_len < opt->tot_len) {
838 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
841 memcpy(np->cork.opt, opt, opt->tot_len);
842 inet->cork.flags |= IPCORK_OPT;
843 /* need source address above miyazawa*/
845 dst_hold(&rt->u.dst);
848 np->cork.hop_limit = hlimit;
849 inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
850 inet->cork.length = 0;
851 sk->sk_sndmsg_page = NULL;
852 sk->sk_sndmsg_off = 0;
853 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
855 transhdrlen += exthdrlen;
859 if (inet->cork.flags & IPCORK_OPT)
863 mtu = inet->cork.fragsize;
866 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
868 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
869 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
871 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
872 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
873 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
879 * Let's try using as much space as possible.
880 * Use MTU if total length of the message fits into the MTU.
881 * Otherwise, we need to reserve fragment header and
882 * fragment alignment (= 8-15 octects, in total).
884 * Note that we may need to "move" the data from the tail of
885 * of the buffer to the new fragment when we split
888 * FIXME: It may be fragmented into multiple chunks
889 * at once if non-fragmentable extension headers
894 inet->cork.length += length;
896 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
900 /* Check if the remaining data fits into current packet. */
901 copy = mtu - skb->len;
903 copy = maxfraglen - skb->len;
907 unsigned int datalen;
908 unsigned int fraglen;
909 unsigned int fraggap;
910 unsigned int alloclen;
911 struct sk_buff *skb_prev;
915 /* There's no room in the current skb */
917 fraggap = skb_prev->len - maxfraglen;
922 * If remaining data exceeds the mtu,
923 * we know we need more fragment(s).
925 datalen = length + fraggap;
926 if (datalen > mtu - fragheaderlen)
927 datalen = maxfraglen - fragheaderlen;
929 fraglen = datalen + fragheaderlen;
930 if ((flags & MSG_MORE) &&
931 !(rt->u.dst.dev->features&NETIF_F_SG))
934 alloclen = datalen + fragheaderlen;
937 * The last fragment gets additional space at tail.
938 * Note: we overallocate on fragments with MSG_MODE
939 * because we have no idea if we're the last one.
941 if (datalen == length + fraggap)
942 alloclen += rt->u.dst.trailer_len;
945 * We just reserve space for fragment header.
946 * Note: this may be overallocation if the message
947 * (without MSG_MORE) fits into the MTU.
949 alloclen += sizeof(struct frag_hdr);
952 skb = sock_alloc_send_skb(sk,
954 (flags & MSG_DONTWAIT), &err);
957 if (atomic_read(&sk->sk_wmem_alloc) <=
959 skb = sock_wmalloc(sk,
960 alloclen + hh_len, 1,
962 if (unlikely(skb == NULL))
968 * Fill in the control structures
970 skb->ip_summed = csummode;
972 /* reserve for fragmentation */
973 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
976 * Find where to start putting bytes
978 data = skb_put(skb, fraglen);
979 skb->nh.raw = data + exthdrlen;
980 data += fragheaderlen;
981 skb->h.raw = data + exthdrlen;
984 skb->csum = skb_copy_and_csum_bits(
985 skb_prev, maxfraglen,
986 data + transhdrlen, fraggap, 0);
987 skb_prev->csum = csum_sub(skb_prev->csum,
990 skb_trim(skb_prev, maxfraglen);
992 copy = datalen - transhdrlen - fraggap;
997 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1004 length -= datalen - fraggap;
1007 csummode = CHECKSUM_NONE;
1010 * Put the packet on the pending queue
1012 __skb_queue_tail(&sk->sk_write_queue, skb);
1019 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1023 if (getfrag(from, skb_put(skb, copy),
1024 offset, copy, off, skb) < 0) {
1025 __skb_trim(skb, off);
1030 int i = skb_shinfo(skb)->nr_frags;
1031 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1032 struct page *page = sk->sk_sndmsg_page;
1033 int off = sk->sk_sndmsg_off;
1036 if (page && (left = PAGE_SIZE - off) > 0) {
1039 if (page != frag->page) {
1040 if (i == MAX_SKB_FRAGS) {
1045 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1046 frag = &skb_shinfo(skb)->frags[i];
1048 } else if(i < MAX_SKB_FRAGS) {
1049 if (copy > PAGE_SIZE)
1051 page = alloc_pages(sk->sk_allocation, 0);
1056 sk->sk_sndmsg_page = page;
1057 sk->sk_sndmsg_off = 0;
1059 skb_fill_page_desc(skb, i, page, 0, 0);
1060 frag = &skb_shinfo(skb)->frags[i];
1061 skb->truesize += PAGE_SIZE;
1062 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1067 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1071 sk->sk_sndmsg_off += copy;
1074 skb->data_len += copy;
1081 inet->cork.length -= length;
1082 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1086 int ip6_push_pending_frames(struct sock *sk)
1088 struct sk_buff *skb, *tmp_skb;
1089 struct sk_buff **tail_skb;
1090 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1091 struct inet_opt *inet = inet_sk(sk);
1092 struct ipv6_pinfo *np = inet6_sk(sk);
1093 struct ipv6hdr *hdr;
1094 struct ipv6_txoptions *opt = np->cork.opt;
1095 struct rt6_info *rt = np->cork.rt;
1096 struct flowi *fl = &inet->cork.fl;
1097 unsigned char proto = fl->proto;
1100 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1102 tail_skb = &(skb_shinfo(skb)->frag_list);
1104 /* move skb->data to ip header from ext header */
1105 if (skb->data < skb->nh.raw)
1106 __skb_pull(skb, skb->nh.raw - skb->data);
1107 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1108 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1109 *tail_skb = tmp_skb;
1110 tail_skb = &(tmp_skb->next);
1111 skb->len += tmp_skb->len;
1112 skb->data_len += tmp_skb->len;
1113 #if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
1114 skb->truesize += tmp_skb->truesize;
1115 __sock_put(tmp_skb->sk);
1116 tmp_skb->destructor = NULL;
1121 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1122 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1123 if (opt && opt->opt_flen)
1124 ipv6_push_frag_opts(skb, opt, &proto);
1125 if (opt && opt->opt_nflen)
1126 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1128 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1130 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1132 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1133 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1135 hdr->payload_len = 0;
1136 hdr->hop_limit = np->cork.hop_limit;
1137 hdr->nexthdr = proto;
1138 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1139 ipv6_addr_copy(&hdr->daddr, final_dst);
1141 skb->dst = dst_clone(&rt->u.dst);
1142 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1143 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1146 err = inet->recverr ? net_xmit_errno(err) : 0;
1152 inet->cork.flags &= ~IPCORK_OPT;
1154 kfree(np->cork.opt);
1155 np->cork.opt = NULL;
1158 dst_release(&np->cork.rt->u.dst);
1161 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1167 void ip6_flush_pending_frames(struct sock *sk)
1169 struct inet_opt *inet = inet_sk(sk);
1170 struct ipv6_pinfo *np = inet6_sk(sk);
1171 struct sk_buff *skb;
1173 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1174 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1178 inet->cork.flags &= ~IPCORK_OPT;
1181 kfree(np->cork.opt);
1182 np->cork.opt = NULL;
1185 dst_release(&np->cork.rt->u.dst);
1188 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));