patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on linux/net/ipv4/ip_output.c
11  *
12  *      This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  *
17  *      Changes:
18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
19  *                              extension headers are implemented.
20  *                              route changes now work.
21  *                              ip6_forward does not confuse sniffers.
22  *                              etc.
23  *
24  *      H. von Brand    :       Added missing #include <linux/string.h>
25  *      Imran Patel     :       frag id should be in NBO
26  *      Kazunori MIYAZAWA @USAGI
27  *                      :       add ip6_append_data and related functions
28  *                              for datagram xmit
29  */
30
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
42
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45
46 #include <net/sock.h>
47 #include <net/snmp.h>
48
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57
58 static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**));
59
60 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61 {
62         static u32 ipv6_fragmentation_id = 1;
63         static spinlock_t ip6_id_lock = SPIN_LOCK_UNLOCKED;
64
65         spin_lock_bh(&ip6_id_lock);
66         fhdr->identification = htonl(ipv6_fragmentation_id);
67         if (++ipv6_fragmentation_id == 0)
68                 ipv6_fragmentation_id = 1;
69         spin_unlock_bh(&ip6_id_lock);
70 }
71
72 static inline int ip6_output_finish(struct sk_buff *skb)
73 {
74
75         struct dst_entry *dst = skb->dst;
76         struct hh_cache *hh = dst->hh;
77
78         if (hh) {
79                 int hh_alen;
80
81                 read_lock_bh(&hh->hh_lock);
82                 hh_alen = HH_DATA_ALIGN(hh->hh_len);
83                 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
84                 read_unlock_bh(&hh->hh_lock);
85                 skb_push(skb, hh->hh_len);
86                 return hh->hh_output(skb);
87         } else if (dst->neighbour)
88                 return dst->neighbour->output(skb);
89
90         IP6_INC_STATS_BH(OutNoRoutes);
91         kfree_skb(skb);
92         return -EINVAL;
93
94 }
95
96 /* dev_loopback_xmit for use with netfilter. */
97 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
98 {
99         newskb->mac.raw = newskb->data;
100         __skb_pull(newskb, newskb->nh.raw - newskb->data);
101         newskb->pkt_type = PACKET_LOOPBACK;
102         newskb->ip_summed = CHECKSUM_UNNECESSARY;
103         BUG_TRAP(newskb->dst);
104
105         netif_rx(newskb);
106         return 0;
107 }
108
109
110 static int ip6_output2(struct sk_buff **pskb)
111 {
112         struct sk_buff *skb = *pskb;
113         struct dst_entry *dst = skb->dst;
114         struct net_device *dev = dst->dev;
115
116         skb->protocol = htons(ETH_P_IPV6);
117         skb->dev = dev;
118
119         if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
121
122                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123                     ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124                                 &skb->nh.ipv6h->saddr)) {
125                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
126
127                         /* Do not check for IFF_ALLMULTI; multicast routing
128                            is not supported in any case.
129                          */
130                         if (newskb)
131                                 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
132                                         newskb->dev,
133                                         ip6_dev_loopback_xmit);
134
135                         if (skb->nh.ipv6h->hop_limit == 0) {
136                                 IP6_INC_STATS(OutDiscards);
137                                 kfree_skb(skb);
138                                 return 0;
139                         }
140                 }
141
142                 IP6_INC_STATS(OutMcastPkts);
143         }
144
145         return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
146 }
147
148 int ip6_output(struct sk_buff **pskb)
149 {
150         struct sk_buff *skb = *pskb;
151
152         if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list))
153                 return ip6_fragment(pskb, ip6_output2);
154         else
155                 return ip6_output2(pskb);
156 }
157
158 #ifdef CONFIG_NETFILTER
159 int ip6_route_me_harder(struct sk_buff *skb)
160 {
161         struct ipv6hdr *iph = skb->nh.ipv6h;
162         struct dst_entry *dst;
163         struct flowi fl = {
164                 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
165                 .nl_u =
166                 { .ip6_u =
167                   { .daddr = iph->daddr,
168                     .saddr = iph->saddr, } },
169                 .proto = iph->nexthdr,
170         };
171
172         dst = ip6_route_output(skb->sk, &fl);
173
174         if (dst->error) {
175                 IP6_INC_STATS(OutNoRoutes);
176                 LIMIT_NETDEBUG(
177                         printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
178                 dst_release(dst);
179                 return -EINVAL;
180         }
181
182         /* Drop old route. */
183         dst_release(skb->dst);
184
185         skb->dst = dst;
186         return 0;
187 }
188 #endif
189
190 static inline int ip6_maybe_reroute(struct sk_buff *skb)
191 {
192 #ifdef CONFIG_NETFILTER
193         if (skb->nfcache & NFC_ALTERED){
194                 if (ip6_route_me_harder(skb) != 0){
195                         kfree_skb(skb);
196                         return -EINVAL;
197                 }
198         }
199 #endif /* CONFIG_NETFILTER */
200         return dst_output(skb);
201 }
202
203 /*
204  *      xmit an sk_buff (used by TCP)
205  */
206
207 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
208              struct ipv6_txoptions *opt, int ipfragok)
209 {
210         struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
211         struct in6_addr *first_hop = &fl->fl6_dst;
212         struct dst_entry *dst = skb->dst;
213         struct ipv6hdr *hdr;
214         u8  proto = fl->proto;
215         int seg_len = skb->len;
216         int hlimit;
217         u32 mtu;
218
219         if (opt) {
220                 int head_room;
221
222                 /* First: exthdrs may take lots of space (~8K for now)
223                    MAX_HEADER is not enough.
224                  */
225                 head_room = opt->opt_nflen + opt->opt_flen;
226                 seg_len += head_room;
227                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
228
229                 if (skb_headroom(skb) < head_room) {
230                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
231                         kfree_skb(skb);
232                         skb = skb2;
233                         if (skb == NULL) {      
234                                 IP6_INC_STATS(OutDiscards);
235                                 return -ENOBUFS;
236                         }
237                         if (sk)
238                                 skb_set_owner_w(skb, sk);
239                 }
240                 if (opt->opt_flen)
241                         ipv6_push_frag_opts(skb, opt, &proto);
242                 if (opt->opt_nflen)
243                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
244         }
245
246         hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
247
248         /*
249          *      Fill in the IPv6 header
250          */
251
252         *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
253         hlimit = -1;
254         if (np)
255                 hlimit = np->hop_limit;
256         if (hlimit < 0)
257                 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
258
259         hdr->payload_len = htons(seg_len);
260         hdr->nexthdr = proto;
261         hdr->hop_limit = hlimit;
262
263         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
264         ipv6_addr_copy(&hdr->daddr, first_hop);
265
266         mtu = dst_pmtu(dst);
267         if ((skb->len <= mtu) || ipfragok) {
268                 IP6_INC_STATS(OutRequests);
269                 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
270         }
271
272         if (net_ratelimit())
273                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
274         skb->dev = dst->dev;
275         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
276         IP6_INC_STATS(FragFails);
277         kfree_skb(skb);
278         return -EMSGSIZE;
279 }
280
281 /*
282  *      To avoid extra problems ND packets are send through this
283  *      routine. It's code duplication but I really want to avoid
284  *      extra checks since ipv6_build_header is used by TCP (which
285  *      is for us performance critical)
286  */
287
288 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
289                struct in6_addr *saddr, struct in6_addr *daddr,
290                int proto, int len)
291 {
292         struct ipv6_pinfo *np = inet6_sk(sk);
293         struct ipv6hdr *hdr;
294         int totlen;
295
296         skb->protocol = htons(ETH_P_IPV6);
297         skb->dev = dev;
298
299         totlen = len + sizeof(struct ipv6hdr);
300
301         hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
302         skb->nh.ipv6h = hdr;
303
304         *(u32*)hdr = htonl(0x60000000);
305
306         hdr->payload_len = htons(len);
307         hdr->nexthdr = proto;
308         hdr->hop_limit = np->hop_limit;
309
310         ipv6_addr_copy(&hdr->saddr, saddr);
311         ipv6_addr_copy(&hdr->daddr, daddr);
312
313         return 0;
314 }
315
316 int ip6_call_ra_chain(struct sk_buff *skb, int sel)
317 {
318         struct ip6_ra_chain *ra;
319         struct sock *last = NULL;
320
321         read_lock(&ip6_ra_lock);
322         for (ra = ip6_ra_chain; ra; ra = ra->next) {
323                 struct sock *sk = ra->sk;
324                 if (sk && ra->sel == sel) {
325                         if (last) {
326                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
327                                 if (skb2)
328                                         rawv6_rcv(last, skb2);
329                         }
330                         last = sk;
331                 }
332         }
333
334         if (last) {
335                 rawv6_rcv(last, skb);
336                 read_unlock(&ip6_ra_lock);
337                 return 1;
338         }
339         read_unlock(&ip6_ra_lock);
340         return 0;
341 }
342
343 static inline int ip6_forward_finish(struct sk_buff *skb)
344 {
345         return dst_output(skb);
346 }
347
348 int ip6_forward(struct sk_buff *skb)
349 {
350         struct dst_entry *dst = skb->dst;
351         struct ipv6hdr *hdr = skb->nh.ipv6h;
352         struct inet6_skb_parm *opt = IP6CB(skb);
353         
354         if (ipv6_devconf.forwarding == 0)
355                 goto error;
356
357         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
358                 IP6_INC_STATS(InDiscards);
359                 goto drop;
360         }
361
362         skb->ip_summed = CHECKSUM_NONE;
363
364         /*
365          *      We DO NOT make any processing on
366          *      RA packets, pushing them to user level AS IS
367          *      without ane WARRANTY that application will be able
368          *      to interpret them. The reason is that we
369          *      cannot make anything clever here.
370          *
371          *      We are not end-node, so that if packet contains
372          *      AH/ESP, we cannot make anything.
373          *      Defragmentation also would be mistake, RA packets
374          *      cannot be fragmented, because there is no warranty
375          *      that different fragments will go along one path. --ANK
376          */
377         if (opt->ra) {
378                 u8 *ptr = skb->nh.raw + opt->ra;
379                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
380                         return 0;
381         }
382
383         /*
384          *      check and decrement ttl
385          */
386         if (hdr->hop_limit <= 1) {
387                 /* Force OUTPUT device used as source address */
388                 skb->dev = dst->dev;
389                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
390                             0, skb->dev);
391
392                 kfree_skb(skb);
393                 return -ETIMEDOUT;
394         }
395
396         if (!xfrm6_route_forward(skb)) {
397                 IP6_INC_STATS(InDiscards);
398                 goto drop;
399         }
400
401         /* IPv6 specs say nothing about it, but it is clear that we cannot
402            send redirects to source routed frames.
403          */
404         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
405                 struct in6_addr *target = NULL;
406                 struct rt6_info *rt;
407                 struct neighbour *n = dst->neighbour;
408
409                 /*
410                  *      incoming and outgoing devices are the same
411                  *      send a redirect.
412                  */
413
414                 rt = (struct rt6_info *) dst;
415                 if ((rt->rt6i_flags & RTF_GATEWAY))
416                         target = (struct in6_addr*)&n->primary_key;
417                 else
418                         target = &hdr->daddr;
419
420                 /* Limit redirects both by destination (here)
421                    and by source (inside ndisc_send_redirect)
422                  */
423                 if (xrlim_allow(dst, 1*HZ))
424                         ndisc_send_redirect(skb, n, target);
425         } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
426                                                 |IPV6_ADDR_LINKLOCAL)) {
427                 /* This check is security critical. */
428                 goto error;
429         }
430
431         if (skb->len > dst_pmtu(dst)) {
432                 /* Again, force OUTPUT device used as source address */
433                 skb->dev = dst->dev;
434                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev);
435                 IP6_INC_STATS_BH(InTooBigErrors);
436                 IP6_INC_STATS_BH(FragFails);
437                 kfree_skb(skb);
438                 return -EMSGSIZE;
439         }
440
441         if (skb_cow(skb, dst->dev->hard_header_len)) {
442                 IP6_INC_STATS(OutDiscards);
443                 goto drop;
444         }
445
446         hdr = skb->nh.ipv6h;
447
448         /* Mangling hops number delayed to point after skb COW */
449  
450         hdr->hop_limit--;
451
452         IP6_INC_STATS_BH(OutForwDatagrams);
453         return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
454
455 error:
456         IP6_INC_STATS_BH(InAddrErrors);
457 drop:
458         kfree_skb(skb);
459         return -EINVAL;
460 }
461
462 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
463 {
464         to->pkt_type = from->pkt_type;
465         to->priority = from->priority;
466         to->protocol = from->protocol;
467         to->security = from->security;
468         to->dst = dst_clone(from->dst);
469         to->dev = from->dev;
470
471 #ifdef CONFIG_NET_SCHED
472         to->tc_index = from->tc_index;
473 #endif
474 #ifdef CONFIG_NETFILTER
475         to->nfmark = from->nfmark;
476         /* Connection association is same as pre-frag packet */
477         to->nfct = from->nfct;
478         nf_conntrack_get(to->nfct);
479 #ifdef CONFIG_BRIDGE_NETFILTER
480         nf_bridge_put(to->nf_bridge);
481         to->nf_bridge = from->nf_bridge;
482         nf_bridge_get(to->nf_bridge);
483 #endif
484 #ifdef CONFIG_NETFILTER_DEBUG
485         to->nf_debug = from->nf_debug;
486 #endif
487 #endif
488 }
489
490 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
491 {
492         u16 offset = sizeof(struct ipv6hdr);
493         struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
494         unsigned int packet_len = skb->tail - skb->nh.raw;
495         int found_rhdr = 0;
496         *nexthdr = &skb->nh.ipv6h->nexthdr;
497
498         while (offset + 1 <= packet_len) {
499
500                 switch (**nexthdr) {
501
502                 case NEXTHDR_HOP:
503                 case NEXTHDR_ROUTING:
504                 case NEXTHDR_DEST:
505                         if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
506                         if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
507                         offset += ipv6_optlen(exthdr);
508                         *nexthdr = &exthdr->nexthdr;
509                         exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
510                         break;
511                 default :
512                         return offset;
513                 }
514         }
515
516         return offset;
517 }
518
519 static int ip6_fragment(struct sk_buff **pskb, int (*output)(struct sk_buff**))
520 {
521         struct net_device *dev;
522         struct sk_buff *frag, *skb = *pskb;
523         struct rt6_info *rt = (struct rt6_info*)skb->dst;
524         struct ipv6hdr *tmp_hdr;
525         struct frag_hdr *fh;
526         unsigned int mtu, hlen, left, len;
527         u32 frag_id = 0;
528         int ptr, offset = 0, err=0;
529         u8 *prevhdr, nexthdr = 0;
530
531         dev = rt->u.dst.dev;
532         hlen = ip6_find_1stfragopt(skb, &prevhdr);
533         nexthdr = *prevhdr;
534
535         mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
536
537         if (skb_shinfo(skb)->frag_list) {
538                 int first_len = skb_pagelen(skb);
539
540                 if (first_len - hlen > mtu ||
541                     ((first_len - hlen) & 7) ||
542                     skb_cloned(skb))
543                         goto slow_path;
544
545                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
546                         /* Correct geometry. */
547                         if (frag->len > mtu ||
548                             ((frag->len & 7) && frag->next) ||
549                             skb_headroom(frag) < hlen)
550                             goto slow_path;
551
552                         /* Correct socket ownership. */
553                         if (frag->sk == NULL)
554                                 goto slow_path;
555
556                         /* Partially cloned skb? */
557                         if (skb_shared(frag))
558                                 goto slow_path;
559                 }
560
561                 err = 0;
562                 offset = 0;
563                 frag = skb_shinfo(skb)->frag_list;
564                 skb_shinfo(skb)->frag_list = 0;
565                 /* BUILD HEADER */
566
567                 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
568                 if (!tmp_hdr) {
569                         IP6_INC_STATS(FragFails);
570                         return -ENOMEM;
571                 }
572
573                 *prevhdr = NEXTHDR_FRAGMENT;
574                 memcpy(tmp_hdr, skb->nh.raw, hlen);
575                 __skb_pull(skb, hlen);
576                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
577                 skb->nh.raw = __skb_push(skb, hlen);
578                 memcpy(skb->nh.raw, tmp_hdr, hlen);
579
580                 ipv6_select_ident(skb, fh);
581                 fh->nexthdr = nexthdr;
582                 fh->reserved = 0;
583                 fh->frag_off = htons(IP6_MF);
584                 frag_id = fh->identification;
585
586                 first_len = skb_pagelen(skb);
587                 skb->data_len = first_len - skb_headlen(skb);
588                 skb->len = first_len;
589                 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
590  
591
592                 for (;;) {
593                         /* Prepare header of the next frame,
594                          * before previous one went down. */
595                         if (frag) {
596                                 frag->h.raw = frag->data;
597                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
598                                 frag->nh.raw = __skb_push(frag, hlen);
599                                 memcpy(frag->nh.raw, tmp_hdr, hlen);
600                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
601                                 fh->nexthdr = nexthdr;
602                                 fh->reserved = 0;
603                                 fh->frag_off = htons(offset);
604                                 if (frag->next != NULL)
605                                         fh->frag_off |= htons(IP6_MF);
606                                 fh->identification = frag_id;
607                                 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
608                                 ip6_copy_metadata(frag, skb);
609                         }
610                         err = output(pskb);
611                         if (err || !frag) {
612                                 if (unlikely(skb != *pskb))
613                                         skb = *pskb;
614                                 break;
615                         }
616                         
617                         skb = frag;
618                         frag = skb->next;
619                         skb->next = NULL;
620                 }
621
622                 if (tmp_hdr)
623                         kfree(tmp_hdr);
624
625                 if (err == 0) {
626                         IP6_INC_STATS(FragOKs);
627                         return 0;
628                 }
629
630                 while (frag) {
631                         skb = frag->next;
632                         kfree_skb(frag);
633                         frag = skb;
634                 }
635
636                 IP6_INC_STATS(FragFails);
637                 return err;
638         }
639
640 slow_path:
641         left = skb->len - hlen;         /* Space per frame */
642         ptr = hlen;                     /* Where to start from */
643
644         /*
645          *      Fragment the datagram.
646          */
647
648         *prevhdr = NEXTHDR_FRAGMENT;
649
650         /*
651          *      Keep copying data until we run out.
652          */
653         while(left > 0) {
654                 len = left;
655                 /* IF: it doesn't fit, use 'mtu' - the data space left */
656                 if (len > mtu)
657                         len = mtu;
658                 /* IF: we are not sending upto and including the packet end
659                    then align the next start on an eight byte boundary */
660                 if (len < left) {
661                         len &= ~7;
662                 }
663                 /*
664                  *      Allocate buffer.
665                  */
666
667                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
668                         NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
669                         IP6_INC_STATS(FragFails);
670                         err = -ENOMEM;
671                         goto fail;
672                 }
673
674                 /*
675                  *      Set up data on packet
676                  */
677
678                 ip6_copy_metadata(frag, skb);
679                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
680                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
681                 frag->nh.raw = frag->data;
682                 fh = (struct frag_hdr*)(frag->data + hlen);
683                 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
684
685                 /*
686                  *      Charge the memory for the fragment to any owner
687                  *      it might possess
688                  */
689                 if (skb->sk)
690                         skb_set_owner_w(frag, skb->sk);
691
692                 /*
693                  *      Copy the packet header into the new buffer.
694                  */
695                 memcpy(frag->nh.raw, skb->data, hlen);
696
697                 /*
698                  *      Build fragment header.
699                  */
700                 fh->nexthdr = nexthdr;
701                 fh->reserved = 0;
702                 if (frag_id) {
703                         ipv6_select_ident(skb, fh);
704                         frag_id = fh->identification;
705                 } else
706                         fh->identification = frag_id;
707
708                 /*
709                  *      Copy a block of the IP datagram.
710                  */
711                 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
712                         BUG();
713                 left -= len;
714
715                 fh->frag_off = htons(offset);
716                 if (left > 0)
717                         fh->frag_off |= htons(IP6_MF);
718                 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
719
720                 ptr += len;
721                 offset += len;
722
723                 /*
724                  *      Put this fragment into the sending queue.
725                  */
726
727                 IP6_INC_STATS(FragCreates);
728
729                 err = output(&frag);
730                 if (err)
731                         goto fail;
732         }
733         kfree_skb(skb);
734         IP6_INC_STATS(FragOKs);
735         return err;
736
737 fail:
738         kfree_skb(skb); 
739         IP6_INC_STATS(FragFails);
740         return err;
741 }
742
743 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
744 {
745         int err = 0;
746
747         *dst = NULL;
748         if (sk) {
749                 struct ipv6_pinfo *np = inet6_sk(sk);
750         
751                 *dst = __sk_dst_check(sk, np->dst_cookie);
752                 if (*dst) {
753                         struct rt6_info *rt = (struct rt6_info*)*dst;
754         
755                                 /* Yes, checking route validity in not connected
756                                    case is not very simple. Take into account,
757                                    that we do not support routing by source, TOS,
758                                    and MSG_DONTROUTE            --ANK (980726)
759         
760                                    1. If route was host route, check that
761                                       cached destination is current.
762                                       If it is network route, we still may
763                                       check its validity using saved pointer
764                                       to the last used address: daddr_cache.
765                                       We do not want to save whole address now,
766                                       (because main consumer of this service
767                                        is tcp, which has not this problem),
768                                       so that the last trick works only on connected
769                                       sockets.
770                                    2. oif also should be the same.
771                                  */
772         
773                         if (((rt->rt6i_dst.plen != 128 ||
774                               ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr))
775                              && (np->daddr_cache == NULL ||
776                                  ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache)))
777                             || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
778                                 *dst = NULL;
779                         } else
780                                 dst_hold(*dst);
781                 }
782         }
783
784         if (*dst == NULL)
785                 *dst = ip6_route_output(sk, fl);
786
787         if ((err = (*dst)->error))
788                 goto out_err_release;
789
790         if (ipv6_addr_any(&fl->fl6_src)) {
791                 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
792
793                 if (err) {
794 #if IP6_DEBUG >= 2
795                         printk(KERN_DEBUG "ip6_dst_lookup: "
796                                "no available source address\n");
797 #endif
798                         goto out_err_release;
799                 }
800         }
801         if ((err = xfrm_lookup(dst, fl, sk, 0)) < 0) {
802                 err = -ENETUNREACH;
803                 goto out_err_release;
804         }
805
806         return 0;
807
808 out_err_release:
809         dst_release(*dst);
810         *dst = NULL;
811         return err;
812 }
813
814 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
815                     void *from, int length, int transhdrlen,
816                     int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
817                     unsigned int flags)
818 {
819         struct inet_opt *inet = inet_sk(sk);
820         struct ipv6_pinfo *np = inet6_sk(sk);
821         struct sk_buff *skb;
822         unsigned int maxfraglen, fragheaderlen;
823         int exthdrlen;
824         int hh_len;
825         int mtu;
826         int copy = 0;
827         int err;
828         int offset = 0;
829         int csummode = CHECKSUM_NONE;
830
831         if (flags&MSG_PROBE)
832                 return 0;
833         if (skb_queue_empty(&sk->sk_write_queue)) {
834                 /*
835                  * setup for corking
836                  */
837                 if (opt) {
838                         if (np->cork.opt == NULL) {
839                                 np->cork.opt = kmalloc(opt->tot_len,
840                                                        sk->sk_allocation);
841                                 if (unlikely(np->cork.opt == NULL))
842                                         return -ENOBUFS;
843                         } else if (np->cork.opt->tot_len < opt->tot_len) {
844                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
845                                 return -EINVAL;
846                         }
847                         memcpy(np->cork.opt, opt, opt->tot_len);
848                         inet->cork.flags |= IPCORK_OPT;
849                         /* need source address above miyazawa*/
850                 }
851                 dst_hold(&rt->u.dst);
852                 np->cork.rt = rt;
853                 inet->cork.fl = *fl;
854                 np->cork.hop_limit = hlimit;
855                 inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
856                 inet->cork.length = 0;
857                 inet->sndmsg_page = NULL;
858                 inet->sndmsg_off = 0;
859                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
860                 length += exthdrlen;
861                 transhdrlen += exthdrlen;
862         } else {
863                 rt = np->cork.rt;
864                 fl = &inet->cork.fl;
865                 if (inet->cork.flags & IPCORK_OPT)
866                         opt = np->cork.opt;
867                 transhdrlen = 0;
868                 exthdrlen = 0;
869                 mtu = inet->cork.fragsize;
870         }
871
872         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
873
874         fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
875         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
876
877         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
878                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
879                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
880                         return -EMSGSIZE;
881                 }
882         }
883
884         inet->cork.length += length;
885
886         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
887                 goto alloc_new_skb;
888
889         while (length > 0) {
890                 if ((copy = maxfraglen - skb->len) <= 0) {
891                         char *data;
892                         unsigned int datalen;
893                         unsigned int fraglen;
894                         unsigned int alloclen;
895                         BUG_TRAP(copy == 0);
896 alloc_new_skb:
897                         datalen = maxfraglen - fragheaderlen;
898                         if (datalen > length)
899                                 datalen = length;
900                         fraglen = datalen + fragheaderlen;
901                         if ((flags & MSG_MORE) &&
902                             !(rt->u.dst.dev->features&NETIF_F_SG))
903                                 alloclen = maxfraglen;
904                         else
905                                 alloclen = fraglen;
906                         alloclen += sizeof(struct frag_hdr);
907                         if (transhdrlen) {
908                                 skb = sock_alloc_send_skb(sk,
909                                                 alloclen + hh_len,
910                                                 (flags & MSG_DONTWAIT), &err);
911                         } else {
912                                 skb = NULL;
913                                 if (atomic_read(&sk->sk_wmem_alloc) <=
914                                     2 * sk->sk_sndbuf)
915                                         skb = sock_wmalloc(sk,
916                                                            alloclen + hh_len, 1,
917                                                            sk->sk_allocation);
918                                 if (unlikely(skb == NULL))
919                                         err = -ENOBUFS;
920                         }
921                         if (skb == NULL)
922                                 goto error;
923                         /*
924                          *      Fill in the control structures
925                          */
926                         skb->ip_summed = csummode;
927                         skb->csum = 0;
928                         /* reserve 8 byte for fragmentation */
929                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
930
931                         /*
932                          *      Find where to start putting bytes
933                          */
934                         data = skb_put(skb, fraglen);
935                         skb->nh.raw = data + exthdrlen;
936                         data += fragheaderlen;
937                         skb->h.raw = data + exthdrlen;
938                         copy = datalen - transhdrlen;
939                         if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
940                                 err = -EFAULT;
941                                 kfree_skb(skb);
942                                 goto error;
943                         }
944
945                         offset += copy;
946                         length -= datalen;
947                         transhdrlen = 0;
948                         exthdrlen = 0;
949                         csummode = CHECKSUM_NONE;
950
951                         /*
952                          * Put the packet on the pending queue
953                          */
954                         __skb_queue_tail(&sk->sk_write_queue, skb);
955                         continue;
956                 }
957
958                 if (copy > length)
959                         copy = length;
960
961                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
962                         unsigned int off;
963
964                         off = skb->len;
965                         if (getfrag(from, skb_put(skb, copy),
966                                                 offset, copy, off, skb) < 0) {
967                                 __skb_trim(skb, off);
968                                 err = -EFAULT;
969                                 goto error;
970                         }
971                 } else {
972                         int i = skb_shinfo(skb)->nr_frags;
973                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
974                         struct page *page = inet->sndmsg_page;
975                         int off = inet->sndmsg_off;
976                         unsigned int left;
977
978                         if (page && (left = PAGE_SIZE - off) > 0) {
979                                 if (copy >= left)
980                                         copy = left;
981                                 if (page != frag->page) {
982                                         if (i == MAX_SKB_FRAGS) {
983                                                 err = -EMSGSIZE;
984                                                 goto error;
985                                         }
986                                         get_page(page);
987                                         skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
988                                         frag = &skb_shinfo(skb)->frags[i];
989                                 }
990                         } else if(i < MAX_SKB_FRAGS) {
991                                 if (copy > PAGE_SIZE)
992                                         copy = PAGE_SIZE;
993                                 page = alloc_pages(sk->sk_allocation, 0);
994                                 if (page == NULL) {
995                                         err = -ENOMEM;
996                                         goto error;
997                                 }
998                                 inet->sndmsg_page = page;
999                                 inet->sndmsg_off = 0;
1000
1001                                 skb_fill_page_desc(skb, i, page, 0, 0);
1002                                 frag = &skb_shinfo(skb)->frags[i];
1003                                 skb->truesize += PAGE_SIZE;
1004                                 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1005                         } else {
1006                                 err = -EMSGSIZE;
1007                                 goto error;
1008                         }
1009                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1010                                 err = -EFAULT;
1011                                 goto error;
1012                         }
1013                         inet->sndmsg_off += copy;
1014                         frag->size += copy;
1015                         skb->len += copy;
1016                         skb->data_len += copy;
1017                 }
1018                 offset += copy;
1019                 length -= copy;
1020         }
1021         return 0;
1022 error:
1023         inet->cork.length -= length;
1024         IP6_INC_STATS(OutDiscards);
1025         return err;
1026 }
1027
1028 int ip6_push_pending_frames(struct sock *sk)
1029 {
1030         struct sk_buff *skb, *tmp_skb;
1031         struct sk_buff **tail_skb;
1032         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1033         struct inet_opt *inet = inet_sk(sk);
1034         struct ipv6_pinfo *np = inet6_sk(sk);
1035         struct ipv6hdr *hdr;
1036         struct ipv6_txoptions *opt = np->cork.opt;
1037         struct rt6_info *rt = np->cork.rt;
1038         struct flowi *fl = &inet->cork.fl;
1039         unsigned char proto = fl->proto;
1040         int err = 0;
1041
1042         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1043                 goto out;
1044         tail_skb = &(skb_shinfo(skb)->frag_list);
1045
1046         /* move skb->data to ip header from ext header */
1047         if (skb->data < skb->nh.raw)
1048                 __skb_pull(skb, skb->nh.raw - skb->data);
1049         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1050                 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1051                 *tail_skb = tmp_skb;
1052                 tail_skb = &(tmp_skb->next);
1053                 skb->len += tmp_skb->len;
1054                 skb->data_len += tmp_skb->len;
1055 #if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
1056                 skb->truesize += tmp_skb->truesize;
1057                 __sock_put(tmp_skb->sk);
1058                 tmp_skb->destructor = NULL;
1059                 tmp_skb->sk = NULL;
1060 #endif
1061         }
1062
1063         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1064         __skb_pull(skb, skb->h.raw - skb->nh.raw);
1065         if (opt && opt->opt_flen)
1066                 ipv6_push_frag_opts(skb, opt, &proto);
1067         if (opt && opt->opt_nflen)
1068                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1069
1070         skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1071         
1072         *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1073
1074         if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1075                 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1076         else
1077                 hdr->payload_len = 0;
1078         hdr->hop_limit = np->cork.hop_limit;
1079         hdr->nexthdr = proto;
1080         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1081         ipv6_addr_copy(&hdr->daddr, final_dst);
1082
1083         skb->dst = dst_clone(&rt->u.dst);
1084         IP6_INC_STATS(OutRequests);     
1085         err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1086         if (err) {
1087                 if (err > 0)
1088                         err = inet->recverr ? net_xmit_errno(err) : 0;
1089                 if (err)
1090                         goto error;
1091         }
1092
1093 out:
1094         inet->cork.flags &= ~IPCORK_OPT;
1095         if (np->cork.opt) {
1096                 kfree(np->cork.opt);
1097                 np->cork.opt = NULL;
1098         }
1099         if (np->cork.rt) {
1100                 dst_release(&np->cork.rt->u.dst);
1101                 np->cork.rt = NULL;
1102         }
1103         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1104         return err;
1105 error:
1106         goto out;
1107 }
1108
1109 void ip6_flush_pending_frames(struct sock *sk)
1110 {
1111         struct inet_opt *inet = inet_sk(sk);
1112         struct ipv6_pinfo *np = inet6_sk(sk);
1113         struct sk_buff *skb;
1114
1115         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1116                 IP6_INC_STATS(OutDiscards);
1117                 kfree_skb(skb);
1118         }
1119
1120         inet->cork.flags &= ~IPCORK_OPT;
1121
1122         if (np->cork.opt) {
1123                 kfree(np->cork.opt);
1124                 np->cork.opt = NULL;
1125         }
1126         if (np->cork.rt) {
1127                 dst_release(&np->cork.rt->u.dst);
1128                 np->cork.rt = NULL;
1129         }
1130         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1131 }