This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/ipv6.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
57 #include <net/xfrm.h>
58 #include <net/addrconf.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61
62 #include <asm/uaccess.h>
63
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
66
67 static void     tcp_v6_send_reset(struct sk_buff *skb);
68 static void     tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
70                                   struct sk_buff *skb);
71
72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
77
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80                                     struct in6_addr *faddr, u16 fport)
81 {
82         int hashent = (lport ^ fport);
83
84         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85         hashent ^= hashent>>16;
86         hashent ^= hashent>>8;
87         return (hashent & (tcp_ehash_size - 1));
88 }
89
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91 {
92         struct inet_opt *inet = inet_sk(sk);
93         struct ipv6_pinfo *np = inet6_sk(sk);
94         struct in6_addr *laddr = &np->rcv_saddr;
95         struct in6_addr *faddr = &np->daddr;
96         __u16 lport = inet->num;
97         __u16 fport = inet->dport;
98         return tcp_v6_hashfn(laddr, lport, faddr, fport);
99 }
100
101 static inline int tcp_v6_bind_conflict(struct sock *sk,
102                                        struct tcp_bind_bucket *tb)
103 {
104         struct sock *sk2;
105         struct hlist_node *node;
106
107         /* We must walk the whole port owner list in this case. -DaveM */
108         sk_for_each_bound(sk2, node, &tb->owners) {
109                 if (sk != sk2 &&
110                     (!sk->sk_bound_dev_if ||
111                      !sk2->sk_bound_dev_if ||
112                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113                     (!sk->sk_reuse || !sk2->sk_reuse ||
114                      sk2->sk_state == TCP_LISTEN) &&
115                      ipv6_rcv_saddr_equal(sk, sk2))
116                         break;
117         }
118
119         return node != NULL;
120 }
121
122 /* Grrr, addr_type already calculated by caller, but I don't want
123  * to add some silly "cookie" argument to this method just for that.
124  * But it doesn't matter, the recalculation is in the rarest path
125  * this function ever takes.
126  */
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128 {
129         struct tcp_bind_hashbucket *head;
130         struct tcp_bind_bucket *tb;
131         struct hlist_node *node;
132         int ret;
133
134         local_bh_disable();
135         if (snum == 0) {
136                 int low = sysctl_local_port_range[0];
137                 int high = sysctl_local_port_range[1];
138                 int remaining = (high - low) + 1;
139                 int rover;
140
141                 spin_lock(&tcp_portalloc_lock);
142                 rover = tcp_port_rover;
143                 do {    rover++;
144                         if ((rover < low) || (rover > high))
145                                 rover = low;
146                         head = &tcp_bhash[tcp_bhashfn(rover)];
147                         spin_lock(&head->lock);
148                         tb_for_each(tb, node, &head->chain)
149                                 if (tb->port == rover)
150                                         goto next;
151                         break;
152                 next:
153                         spin_unlock(&head->lock);
154                 } while (--remaining > 0);
155                 tcp_port_rover = rover;
156                 spin_unlock(&tcp_portalloc_lock);
157
158                 /* Exhausted local port range during search? */
159                 ret = 1;
160                 if (remaining <= 0)
161                         goto fail;
162
163                 /* OK, here is the one we will use. */
164                 snum = rover;
165         } else {
166                 head = &tcp_bhash[tcp_bhashfn(snum)];
167                 spin_lock(&head->lock);
168                 tb_for_each(tb, node, &head->chain)
169                         if (tb->port == snum)
170                                 goto tb_found;
171         }
172         tb = NULL;
173         goto tb_not_found;
174 tb_found:
175         if (tb && !hlist_empty(&tb->owners)) {
176                 if (tb->fastreuse > 0 && sk->sk_reuse &&
177                     sk->sk_state != TCP_LISTEN) {
178                         goto success;
179                 } else {
180                         ret = 1;
181                         if (tcp_v6_bind_conflict(sk, tb))
182                                 goto fail_unlock;
183                 }
184         }
185 tb_not_found:
186         ret = 1;
187         if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
188                 goto fail_unlock;
189         if (hlist_empty(&tb->owners)) {
190                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
191                         tb->fastreuse = 1;
192                 else
193                         tb->fastreuse = 0;
194         } else if (tb->fastreuse &&
195                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
196                 tb->fastreuse = 0;
197
198 success:
199         if (!tcp_sk(sk)->bind_hash)
200                 tcp_bind_hash(sk, tb, snum);
201         BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
202         ret = 0;
203
204 fail_unlock:
205         spin_unlock(&head->lock);
206 fail:
207         local_bh_enable();
208         return ret;
209 }
210
211 static __inline__ void __tcp_v6_hash(struct sock *sk)
212 {
213         struct hlist_head *list;
214         rwlock_t *lock;
215
216         BUG_TRAP(sk_unhashed(sk));
217
218         if (sk->sk_state == TCP_LISTEN) {
219                 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
220                 lock = &tcp_lhash_lock;
221                 tcp_listen_wlock();
222         } else {
223                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
224                 list = &tcp_ehash[sk->sk_hashent].chain;
225                 lock = &tcp_ehash[sk->sk_hashent].lock;
226                 write_lock(lock);
227         }
228
229         __sk_add_node(sk, list);
230         sock_prot_inc_use(sk->sk_prot);
231         write_unlock(lock);
232 }
233
234
235 static void tcp_v6_hash(struct sock *sk)
236 {
237         if (sk->sk_state != TCP_CLOSE) {
238                 struct tcp_opt *tp = tcp_sk(sk);
239
240                 if (tp->af_specific == &ipv6_mapped) {
241                         tcp_prot.hash(sk);
242                         return;
243                 }
244                 local_bh_disable();
245                 __tcp_v6_hash(sk);
246                 local_bh_enable();
247         }
248 }
249
250 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
251 {
252         struct sock *sk;
253         struct hlist_node *node;
254         struct sock *result = NULL;
255         int score, hiscore;
256
257         hiscore=0;
258         read_lock(&tcp_lhash_lock);
259         sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
260                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
261                         struct ipv6_pinfo *np = inet6_sk(sk);
262                         
263                         score = 1;
264                         if (!ipv6_addr_any(&np->rcv_saddr)) {
265                                 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
266                                         continue;
267                                 score++;
268                         }
269                         if (sk->sk_bound_dev_if) {
270                                 if (sk->sk_bound_dev_if != dif)
271                                         continue;
272                                 score++;
273                         }
274                         if (score == 3) {
275                                 result = sk;
276                                 break;
277                         }
278                         if (score > hiscore) {
279                                 hiscore = score;
280                                 result = sk;
281                         }
282                 }
283         }
284         if (result)
285                 sock_hold(result);
286         read_unlock(&tcp_lhash_lock);
287         return result;
288 }
289
290 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
291  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
292  *
293  * The sockhash lock must be held as a reader here.
294  */
295
296 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
297                                                        struct in6_addr *daddr, u16 hnum,
298                                                        int dif)
299 {
300         struct tcp_ehash_bucket *head;
301         struct sock *sk;
302         struct hlist_node *node;
303         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
304         int hash;
305
306         /* Optimize here for direct hit, only listening connections can
307          * have wildcards anyways.
308          */
309         hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
310         head = &tcp_ehash[hash];
311         read_lock(&head->lock);
312         sk_for_each(sk, node, &head->chain) {
313                 /* For IPV6 do the cheaper port and family tests first. */
314                 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
315                         goto hit; /* You sunk my battleship! */
316         }
317         /* Must check for a TIME_WAIT'er before going to listener hash. */
318         sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
319                 /* FIXME: acme: check this... */
320                 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
321
322                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
323                    sk->sk_family                == PF_INET6) {
324                         if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr)      &&
325                            !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr)  &&
326                            (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
327                                 goto hit;
328                 }
329         }
330         read_unlock(&head->lock);
331         return NULL;
332
333 hit:
334         sock_hold(sk);
335         read_unlock(&head->lock);
336         return sk;
337 }
338
339
340 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
341                                            struct in6_addr *daddr, u16 hnum,
342                                            int dif)
343 {
344         struct sock *sk;
345
346         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
347
348         if (sk)
349                 return sk;
350
351         return tcp_v6_lookup_listener(daddr, hnum, dif);
352 }
353
354 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
355                                   struct in6_addr *daddr, u16 dport,
356                                   int dif)
357 {
358         struct sock *sk;
359
360         local_bh_disable();
361         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
362         local_bh_enable();
363
364         return sk;
365 }
366
367
368 /*
369  * Open request hash tables.
370  */
371
372 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
373 {
374         u32 a, b, c;
375
376         a = raddr->s6_addr32[0];
377         b = raddr->s6_addr32[1];
378         c = raddr->s6_addr32[2];
379
380         a += JHASH_GOLDEN_RATIO;
381         b += JHASH_GOLDEN_RATIO;
382         c += rnd;
383         __jhash_mix(a, b, c);
384
385         a += raddr->s6_addr32[3];
386         b += (u32) rport;
387         __jhash_mix(a, b, c);
388
389         return c & (TCP_SYNQ_HSIZE - 1);
390 }
391
392 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
393                                               struct open_request ***prevp,
394                                               __u16 rport,
395                                               struct in6_addr *raddr,
396                                               struct in6_addr *laddr,
397                                               int iif)
398 {
399         struct tcp_listen_opt *lopt = tp->listen_opt;
400         struct open_request *req, **prev;  
401
402         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
403              (req = *prev) != NULL;
404              prev = &req->dl_next) {
405                 if (req->rmt_port == rport &&
406                     req->class->family == AF_INET6 &&
407                     !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
408                     !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
409                     (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
410                         BUG_TRAP(req->sk == NULL);
411                         *prevp = prev;
412                         return req;
413                 }
414         }
415
416         return NULL;
417 }
418
419 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
420                                    struct in6_addr *saddr, 
421                                    struct in6_addr *daddr, 
422                                    unsigned long base)
423 {
424         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
425 }
426
427 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
428 {
429         if (skb->protocol == htons(ETH_P_IPV6)) {
430                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
431                                                     skb->nh.ipv6h->saddr.s6_addr32,
432                                                     skb->h.th->dest,
433                                                     skb->h.th->source);
434         } else {
435                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
436                                                   skb->nh.iph->saddr,
437                                                   skb->h.th->dest,
438                                                   skb->h.th->source);
439         }
440 }
441
442 static int tcp_v6_check_established(struct sock *sk)
443 {
444         struct inet_opt *inet = inet_sk(sk);
445         struct ipv6_pinfo *np = inet6_sk(sk);
446         struct in6_addr *daddr = &np->rcv_saddr;
447         struct in6_addr *saddr = &np->daddr;
448         int dif = sk->sk_bound_dev_if;
449         u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
450         int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
451         struct tcp_ehash_bucket *head = &tcp_ehash[hash];
452         struct sock *sk2;
453         struct hlist_node *node;
454         struct tcp_tw_bucket *tw;
455
456         write_lock_bh(&head->lock);
457
458         /* Check TIME-WAIT sockets first. */
459         sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
460                 tw = (struct tcp_tw_bucket*)sk2;
461
462                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
463                    sk2->sk_family               == PF_INET6     &&
464                    !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr)      &&
465                    !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr)  &&
466                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
467                         struct tcp_opt *tp = tcp_sk(sk);
468
469                         if (tw->tw_ts_recent_stamp) {
470                                 /* See comment in tcp_ipv4.c */
471                                 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
472                                 if (!tp->write_seq)
473                                         tp->write_seq = 1;
474                                 tp->ts_recent = tw->tw_ts_recent;
475                                 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
476                                 sock_hold(sk2);
477                                 goto unique;
478                         } else
479                                 goto not_unique;
480                 }
481         }
482         tw = NULL;
483
484         /* And established part... */
485         sk_for_each(sk2, node, &head->chain) {
486                 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
487                         goto not_unique;
488         }
489
490 unique:
491         BUG_TRAP(sk_unhashed(sk));
492         __sk_add_node(sk, &head->chain);
493         sk->sk_hashent = hash;
494         sock_prot_inc_use(sk->sk_prot);
495         write_unlock_bh(&head->lock);
496
497         if (tw) {
498                 /* Silly. Should hash-dance instead... */
499                 local_bh_disable();
500                 tcp_tw_deschedule(tw);
501                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
502                 local_bh_enable();
503
504                 tcp_tw_put(tw);
505         }
506         return 0;
507
508 not_unique:
509         write_unlock_bh(&head->lock);
510         return -EADDRNOTAVAIL;
511 }
512
513 static int tcp_v6_hash_connect(struct sock *sk)
514 {
515         struct tcp_bind_hashbucket *head;
516         struct tcp_bind_bucket *tb;
517
518         /* XXX */
519         if (inet_sk(sk)->num == 0) { 
520                 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
521                 if (err)
522                         return err;
523                 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
524         }
525
526         head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
527         tb = tb_head(head);
528
529         spin_lock_bh(&head->lock);
530
531         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
532                 __tcp_v6_hash(sk);
533                 spin_unlock_bh(&head->lock);
534                 return 0;
535         } else {
536                 spin_unlock_bh(&head->lock);
537                 return tcp_v6_check_established(sk);
538         }
539 }
540
541 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
542 {
543         return IP6CB(skb)->iif;
544 }
545
546 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
547                           int addr_len)
548 {
549         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
550         struct inet_opt *inet = inet_sk(sk);
551         struct ipv6_pinfo *np = inet6_sk(sk);
552         struct tcp_opt *tp = tcp_sk(sk);
553         struct in6_addr *saddr = NULL, *final_p = NULL, final;
554         struct flowi fl;
555         struct dst_entry *dst;
556         int addr_type;
557         int err;
558
559         if (addr_len < SIN6_LEN_RFC2133) 
560                 return -EINVAL;
561
562         if (usin->sin6_family != AF_INET6) 
563                 return(-EAFNOSUPPORT);
564
565         memset(&fl, 0, sizeof(fl));
566
567         if (np->sndflow) {
568                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
569                 IP6_ECN_flow_init(fl.fl6_flowlabel);
570                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
571                         struct ip6_flowlabel *flowlabel;
572                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
573                         if (flowlabel == NULL)
574                                 return -EINVAL;
575                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
576                         fl6_sock_release(flowlabel);
577                 }
578         }
579
580         /*
581          *      connect() to INADDR_ANY means loopback (BSD'ism).
582          */
583         
584         if(ipv6_addr_any(&usin->sin6_addr))
585                 usin->sin6_addr.s6_addr[15] = 0x1; 
586
587         addr_type = ipv6_addr_type(&usin->sin6_addr);
588
589         if(addr_type & IPV6_ADDR_MULTICAST)
590                 return -ENETUNREACH;
591
592         if (addr_type&IPV6_ADDR_LINKLOCAL) {
593                 if (addr_len >= sizeof(struct sockaddr_in6) &&
594                     usin->sin6_scope_id) {
595                         /* If interface is set while binding, indices
596                          * must coincide.
597                          */
598                         if (sk->sk_bound_dev_if &&
599                             sk->sk_bound_dev_if != usin->sin6_scope_id)
600                                 return -EINVAL;
601
602                         sk->sk_bound_dev_if = usin->sin6_scope_id;
603                 }
604
605                 /* Connect to link-local address requires an interface */
606                 if (!sk->sk_bound_dev_if)
607                         return -EINVAL;
608         }
609
610         if (tp->ts_recent_stamp &&
611             ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
612                 tp->ts_recent = 0;
613                 tp->ts_recent_stamp = 0;
614                 tp->write_seq = 0;
615         }
616
617         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
618         np->flow_label = fl.fl6_flowlabel;
619
620         /*
621          *      TCP over IPv4
622          */
623
624         if (addr_type == IPV6_ADDR_MAPPED) {
625                 u32 exthdrlen = tp->ext_header_len;
626                 struct sockaddr_in sin;
627
628                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
629
630                 if (__ipv6_only_sock(sk))
631                         return -ENETUNREACH;
632
633                 sin.sin_family = AF_INET;
634                 sin.sin_port = usin->sin6_port;
635                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
636
637                 tp->af_specific = &ipv6_mapped;
638                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
639
640                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
641
642                 if (err) {
643                         tp->ext_header_len = exthdrlen;
644                         tp->af_specific = &ipv6_specific;
645                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
646                         goto failure;
647                 } else {
648                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
649                                       inet->saddr);
650                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
651                                       inet->rcv_saddr);
652                 }
653
654                 return err;
655         }
656
657         if (!ipv6_addr_any(&np->rcv_saddr))
658                 saddr = &np->rcv_saddr;
659
660         fl.proto = IPPROTO_TCP;
661         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
662         ipv6_addr_copy(&fl.fl6_src,
663                        (saddr ? saddr : &np->saddr));
664         fl.oif = sk->sk_bound_dev_if;
665         fl.fl_ip_dport = usin->sin6_port;
666         fl.fl_ip_sport = inet->sport;
667
668         if (np->opt && np->opt->srcrt) {
669                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
670                 ipv6_addr_copy(&final, &fl.fl6_dst);
671                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
672                 final_p = &final;
673         }
674
675         err = ip6_dst_lookup(sk, &dst, &fl);
676         if (err)
677                 goto failure;
678         if (final_p)
679                 ipv6_addr_copy(&fl.fl6_dst, final_p);
680
681         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
682                 dst_release(dst);
683                 goto failure;
684         }
685
686         if (saddr == NULL) {
687                 saddr = &fl.fl6_src;
688                 ipv6_addr_copy(&np->rcv_saddr, saddr);
689         }
690
691         /* set the source address */
692         ipv6_addr_copy(&np->saddr, saddr);
693         inet->rcv_saddr = LOOPBACK4_IPV6;
694
695         ip6_dst_store(sk, dst, NULL);
696         sk->sk_route_caps = dst->dev->features &
697                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
698
699         tp->ext_header_len = 0;
700         if (np->opt)
701                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
702         tp->ext2_header_len = dst->header_len;
703
704         tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
705
706         inet->dport = usin->sin6_port;
707
708         tcp_set_state(sk, TCP_SYN_SENT);
709         err = tcp_v6_hash_connect(sk);
710         if (err)
711                 goto late_failure;
712
713         if (!tp->write_seq)
714                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
715                                                              np->daddr.s6_addr32,
716                                                              inet->sport,
717                                                              inet->dport);
718
719         err = tcp_connect(sk);
720         if (err)
721                 goto late_failure;
722
723         return 0;
724
725 late_failure:
726         tcp_set_state(sk, TCP_CLOSE);
727         __sk_dst_reset(sk);
728 failure:
729         inet->dport = 0;
730         sk->sk_route_caps = 0;
731         return err;
732 }
733
734 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
735                 int type, int code, int offset, __u32 info)
736 {
737         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
738         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
739         struct ipv6_pinfo *np;
740         struct sock *sk;
741         int err;
742         struct tcp_opt *tp; 
743         __u32 seq;
744
745         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
746
747         if (sk == NULL) {
748                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
749                 return;
750         }
751
752         if (sk->sk_state == TCP_TIME_WAIT) {
753                 tcp_tw_put((struct tcp_tw_bucket*)sk);
754                 return;
755         }
756
757         bh_lock_sock(sk);
758         if (sock_owned_by_user(sk))
759                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
760
761         if (sk->sk_state == TCP_CLOSE)
762                 goto out;
763
764         tp = tcp_sk(sk);
765         seq = ntohl(th->seq); 
766         if (sk->sk_state != TCP_LISTEN &&
767             !between(seq, tp->snd_una, tp->snd_nxt)) {
768                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
769                 goto out;
770         }
771
772         np = inet6_sk(sk);
773
774         if (type == ICMPV6_PKT_TOOBIG) {
775                 struct dst_entry *dst = NULL;
776
777                 if (sock_owned_by_user(sk))
778                         goto out;
779                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
780                         goto out;
781
782                 /* icmp should have updated the destination cache entry */
783                 dst = __sk_dst_check(sk, np->dst_cookie);
784
785                 if (dst == NULL) {
786                         struct inet_opt *inet = inet_sk(sk);
787                         struct flowi fl;
788
789                         /* BUGGG_FUTURE: Again, it is not clear how
790                            to handle rthdr case. Ignore this complexity
791                            for now.
792                          */
793                         memset(&fl, 0, sizeof(fl));
794                         fl.proto = IPPROTO_TCP;
795                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
796                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
797                         fl.oif = sk->sk_bound_dev_if;
798                         fl.fl_ip_dport = inet->dport;
799                         fl.fl_ip_sport = inet->sport;
800
801                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
802                                 sk->sk_err_soft = -err;
803                                 goto out;
804                         }
805
806                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
807                                 sk->sk_err_soft = -err;
808                                 goto out;
809                         }
810
811                 } else
812                         dst_hold(dst);
813
814                 if (tp->pmtu_cookie > dst_pmtu(dst)) {
815                         tcp_sync_mss(sk, dst_pmtu(dst));
816                         tcp_simple_retransmit(sk);
817                 } /* else let the usual retransmit timer handle it */
818                 dst_release(dst);
819                 goto out;
820         }
821
822         icmpv6_err_convert(type, code, &err);
823
824         /* Might be for an open_request */
825         switch (sk->sk_state) {
826                 struct open_request *req, **prev;
827         case TCP_LISTEN:
828                 if (sock_owned_by_user(sk))
829                         goto out;
830
831                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
832                                         &hdr->saddr, tcp_v6_iif(skb));
833                 if (!req)
834                         goto out;
835
836                 /* ICMPs are not backlogged, hence we cannot get
837                  * an established socket here.
838                  */
839                 BUG_TRAP(req->sk == NULL);
840
841                 if (seq != req->snt_isn) {
842                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
843                         goto out;
844                 }
845
846                 tcp_synq_drop(sk, req, prev);
847                 goto out;
848
849         case TCP_SYN_SENT:
850         case TCP_SYN_RECV:  /* Cannot happen.
851                                It can, it SYNs are crossed. --ANK */ 
852                 if (!sock_owned_by_user(sk)) {
853                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
854                         sk->sk_err = err;
855                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
856
857                         tcp_done(sk);
858                 } else
859                         sk->sk_err_soft = err;
860                 goto out;
861         }
862
863         if (!sock_owned_by_user(sk) && np->recverr) {
864                 sk->sk_err = err;
865                 sk->sk_error_report(sk);
866         } else
867                 sk->sk_err_soft = err;
868
869 out:
870         bh_unlock_sock(sk);
871         sock_put(sk);
872 }
873
874
875 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
876                               struct dst_entry *dst)
877 {
878         struct ipv6_pinfo *np = inet6_sk(sk);
879         struct sk_buff * skb;
880         struct ipv6_txoptions *opt = NULL;
881         struct in6_addr * final_p = NULL, final;
882         struct flowi fl;
883         int err = -1;
884
885         memset(&fl, 0, sizeof(fl));
886         fl.proto = IPPROTO_TCP;
887         ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
888         ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
889         fl.fl6_flowlabel = 0;
890         fl.oif = req->af.v6_req.iif;
891         fl.fl_ip_dport = req->rmt_port;
892         fl.fl_ip_sport = inet_sk(sk)->sport;
893
894         if (dst == NULL) {
895                 opt = np->opt;
896                 if (opt == NULL &&
897                     np->rxopt.bits.srcrt == 2 &&
898                     req->af.v6_req.pktopts) {
899                         struct sk_buff *pktopts = req->af.v6_req.pktopts;
900                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
901                         if (rxopt->srcrt)
902                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
903                 }
904
905                 if (opt && opt->srcrt) {
906                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
907                         ipv6_addr_copy(&final, &fl.fl6_dst);
908                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
909                         final_p = &final;
910                 }
911
912                 err = ip6_dst_lookup(sk, &dst, &fl);
913                 if (err)
914                         goto done;
915                 if (final_p)
916                         ipv6_addr_copy(&fl.fl6_dst, final_p);
917                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
918                         goto done;
919         }
920
921         skb = tcp_make_synack(sk, dst, req);
922         if (skb) {
923                 struct tcphdr *th = skb->h.th;
924
925                 th->check = tcp_v6_check(th, skb->len,
926                                          &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
927                                          csum_partial((char *)th, skb->len, skb->csum));
928
929                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
930                 err = ip6_xmit(sk, skb, &fl, opt, 0);
931                 if (err == NET_XMIT_CN)
932                         err = 0;
933         }
934
935 done:
936         dst_release(dst);
937         if (opt && opt != np->opt)
938                 sock_kfree_s(sk, opt, opt->tot_len);
939         return err;
940 }
941
942 static void tcp_v6_or_free(struct open_request *req)
943 {
944         if (req->af.v6_req.pktopts)
945                 kfree_skb(req->af.v6_req.pktopts);
946 }
947
948 static struct or_calltable or_ipv6 = {
949         .family         =       AF_INET6,
950         .rtx_syn_ack    =       tcp_v6_send_synack,
951         .send_ack       =       tcp_v6_or_send_ack,
952         .destructor     =       tcp_v6_or_free,
953         .send_reset     =       tcp_v6_send_reset
954 };
955
956 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
957 {
958         struct ipv6_pinfo *np = inet6_sk(sk);
959         struct inet6_skb_parm *opt = IP6CB(skb);
960
961         if (np->rxopt.all) {
962                 if ((opt->hop && np->rxopt.bits.hopopts) ||
963                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
964                      np->rxopt.bits.rxflow) ||
965                     (opt->srcrt && np->rxopt.bits.srcrt) ||
966                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
967                         return 1;
968         }
969         return 0;
970 }
971
972
973 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
974                               struct sk_buff *skb)
975 {
976         struct ipv6_pinfo *np = inet6_sk(sk);
977
978         if (skb->ip_summed == CHECKSUM_HW) {
979                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
980                 skb->csum = offsetof(struct tcphdr, check);
981         } else {
982                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
983                                             csum_partial((char *)th, th->doff<<2, 
984                                                          skb->csum));
985         }
986 }
987
988
989 static void tcp_v6_send_reset(struct sk_buff *skb)
990 {
991         struct tcphdr *th = skb->h.th, *t1; 
992         struct sk_buff *buff;
993         struct flowi fl;
994
995         if (th->rst)
996                 return;
997
998         if (!ipv6_unicast_destination(skb))
999                 return; 
1000
1001         /*
1002          * We need to grab some memory, and put together an RST,
1003          * and then put it into the queue to be sent.
1004          */
1005
1006         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1007                          GFP_ATOMIC);
1008         if (buff == NULL) 
1009                 return;
1010
1011         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1012
1013         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1014
1015         /* Swap the send and the receive. */
1016         memset(t1, 0, sizeof(*t1));
1017         t1->dest = th->source;
1018         t1->source = th->dest;
1019         t1->doff = sizeof(*t1)/4;
1020         t1->rst = 1;
1021   
1022         if(th->ack) {
1023                 t1->seq = th->ack_seq;
1024         } else {
1025                 t1->ack = 1;
1026                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1027                                     + skb->len - (th->doff<<2));
1028         }
1029
1030         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1031
1032         memset(&fl, 0, sizeof(fl));
1033         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1034         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1035
1036         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1037                                     sizeof(*t1), IPPROTO_TCP,
1038                                     buff->csum);
1039
1040         fl.proto = IPPROTO_TCP;
1041         fl.oif = tcp_v6_iif(skb);
1042         fl.fl_ip_dport = t1->dest;
1043         fl.fl_ip_sport = t1->source;
1044
1045         /* sk = NULL, but it is safe for now. RST socket required. */
1046         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1047
1048                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1049                         dst_release(buff->dst);
1050                         return;
1051                 }
1052
1053                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1054                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1055                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1056                 return;
1057         }
1058
1059         kfree_skb(buff);
1060 }
1061
1062 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1063 {
1064         struct tcphdr *th = skb->h.th, *t1;
1065         struct sk_buff *buff;
1066         struct flowi fl;
1067         int tot_len = sizeof(struct tcphdr);
1068
1069         if (ts)
1070                 tot_len += 3*4;
1071
1072         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1073                          GFP_ATOMIC);
1074         if (buff == NULL)
1075                 return;
1076
1077         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1078
1079         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1080
1081         /* Swap the send and the receive. */
1082         memset(t1, 0, sizeof(*t1));
1083         t1->dest = th->source;
1084         t1->source = th->dest;
1085         t1->doff = tot_len/4;
1086         t1->seq = htonl(seq);
1087         t1->ack_seq = htonl(ack);
1088         t1->ack = 1;
1089         t1->window = htons(win);
1090         
1091         if (ts) {
1092                 u32 *ptr = (u32*)(t1 + 1);
1093                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1094                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1095                 *ptr++ = htonl(tcp_time_stamp);
1096                 *ptr = htonl(ts);
1097         }
1098
1099         buff->csum = csum_partial((char *)t1, tot_len, 0);
1100
1101         memset(&fl, 0, sizeof(fl));
1102         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1103         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1104
1105         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1106                                     tot_len, IPPROTO_TCP,
1107                                     buff->csum);
1108
1109         fl.proto = IPPROTO_TCP;
1110         fl.oif = tcp_v6_iif(skb);
1111         fl.fl_ip_dport = t1->dest;
1112         fl.fl_ip_sport = t1->source;
1113
1114         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1115                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1116                         dst_release(buff->dst);
1117                         return;
1118                 }
1119                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1120                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1121                 return;
1122         }
1123
1124         kfree_skb(buff);
1125 }
1126
1127 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1128 {
1129         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1130
1131         tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1132                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1133
1134         tcp_tw_put(tw);
1135 }
1136
1137 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1138 {
1139         tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1140 }
1141
1142
1143 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1144 {
1145         struct open_request *req, **prev;
1146         struct tcphdr *th = skb->h.th;
1147         struct tcp_opt *tp = tcp_sk(sk);
1148         struct sock *nsk;
1149
1150         /* Find possible connection requests. */
1151         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1152                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1153         if (req)
1154                 return tcp_check_req(sk, skb, req, prev);
1155
1156         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1157                                           th->source,
1158                                           &skb->nh.ipv6h->daddr,
1159                                           ntohs(th->dest),
1160                                           tcp_v6_iif(skb));
1161
1162         if (nsk) {
1163                 if (nsk->sk_state != TCP_TIME_WAIT) {
1164                         bh_lock_sock(nsk);
1165                         return nsk;
1166                 }
1167                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1168                 return NULL;
1169         }
1170
1171 #if 0 /*def CONFIG_SYN_COOKIES*/
1172         if (!th->rst && !th->syn && th->ack)
1173                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1174 #endif
1175         return sk;
1176 }
1177
1178 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1179 {
1180         struct tcp_opt *tp = tcp_sk(sk);
1181         struct tcp_listen_opt *lopt = tp->listen_opt;
1182         u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1183
1184         req->sk = NULL;
1185         req->expires = jiffies + TCP_TIMEOUT_INIT;
1186         req->retrans = 0;
1187         req->dl_next = lopt->syn_table[h];
1188
1189         write_lock(&tp->syn_wait_lock);
1190         lopt->syn_table[h] = req;
1191         write_unlock(&tp->syn_wait_lock);
1192
1193 #ifdef CONFIG_ACCEPT_QUEUES
1194         tcp_synq_added(sk, req);
1195 #else
1196         tcp_synq_added(sk);
1197 #endif
1198 }
1199
1200
1201 /* FIXME: this is substantially similar to the ipv4 code.
1202  * Can some kind of merge be done? -- erics
1203  */
1204 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1205 {
1206         struct ipv6_pinfo *np = inet6_sk(sk);
1207         struct tcp_opt tmptp, *tp = tcp_sk(sk);
1208         struct open_request *req = NULL;
1209         __u32 isn = TCP_SKB_CB(skb)->when;
1210 #ifdef CONFIG_ACCEPT_QUEUES
1211         int class = 0;
1212 #endif
1213
1214         if (skb->protocol == htons(ETH_P_IP))
1215                 return tcp_v4_conn_request(sk, skb);
1216
1217         if (!ipv6_unicast_destination(skb))
1218                 goto drop; 
1219
1220
1221         /*
1222          *      There are no SYN attacks on IPv6, yet...        
1223          */
1224         if (tcp_synq_is_full(sk) && !isn) {
1225                 if (net_ratelimit())
1226                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1227                 goto drop;              
1228         }
1229
1230 #ifdef CONFIG_ACCEPT_QUEUES
1231         class = (skb->nfmark <= 0) ? 0 :
1232                         ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
1233         /*
1234          * Accept only if the class has shares set or if the default class
1235          * i.e. class 0 has shares
1236          */
1237         if (!(tcp_sk(sk)->acceptq[class].aq_ratio)) {
1238                 if (tcp_sk(sk)->acceptq[0].aq_ratio) 
1239                         class = 0; 
1240                 else 
1241                         goto drop;
1242         }
1243
1244         if (sk_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
1245 #else
1246         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1247 #endif
1248                 goto drop;
1249
1250
1251         req = tcp_openreq_alloc();
1252         if (req == NULL)
1253                 goto drop;
1254
1255         tcp_clear_options(&tmptp);
1256         tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1257         tmptp.user_mss = tp->user_mss;
1258
1259         tcp_parse_options(skb, &tmptp, 0);
1260
1261         tmptp.tstamp_ok = tmptp.saw_tstamp;
1262         tcp_openreq_init(req, &tmptp, skb);
1263 #ifdef CONFIG_ACCEPT_QUEUES
1264         req->acceptq_class = class;
1265         req->acceptq_time_stamp = jiffies;
1266 #endif
1267         req->class = &or_ipv6;
1268         ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1269         ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1270         TCP_ECN_create_request(req, skb->h.th);
1271         req->af.v6_req.pktopts = NULL;
1272         if (ipv6_opt_accepted(sk, skb) ||
1273             np->rxopt.bits.rxinfo ||
1274             np->rxopt.bits.rxhlim) {
1275                 atomic_inc(&skb->users);
1276                 req->af.v6_req.pktopts = skb;
1277         }
1278         req->af.v6_req.iif = sk->sk_bound_dev_if;
1279
1280         /* So that link locals have meaning */
1281         if (!sk->sk_bound_dev_if &&
1282             ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1283                 req->af.v6_req.iif = tcp_v6_iif(skb);
1284
1285         if (isn == 0) 
1286                 isn = tcp_v6_init_sequence(sk,skb);
1287
1288         req->snt_isn = isn;
1289
1290         if (tcp_v6_send_synack(sk, req, NULL))
1291                 goto drop;
1292
1293         tcp_v6_synq_add(sk, req);
1294
1295         return 0;
1296
1297 drop:
1298         if (req)
1299                 tcp_openreq_free(req);
1300
1301         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1302         return 0; /* don't send reset */
1303 }
1304
1305 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1306                                           struct open_request *req,
1307                                           struct dst_entry *dst)
1308 {
1309         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1310         struct tcp6_sock *newtcp6sk;
1311         struct inet_opt *newinet;
1312         struct tcp_opt *newtp;
1313         struct sock *newsk;
1314         struct ipv6_txoptions *opt;
1315
1316         if (skb->protocol == htons(ETH_P_IP)) {
1317                 /*
1318                  *      v6 mapped
1319                  */
1320
1321                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1322
1323                 if (newsk == NULL) 
1324                         return NULL;
1325
1326                 newtcp6sk = (struct tcp6_sock *)newsk;
1327                 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1328
1329                 newinet = inet_sk(newsk);
1330                 newnp = inet6_sk(newsk);
1331                 newtp = tcp_sk(newsk);
1332
1333                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1334
1335                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1336                               newinet->daddr);
1337
1338                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1339                               newinet->saddr);
1340
1341                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1342
1343                 newtp->af_specific = &ipv6_mapped;
1344                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1345                 newnp->pktoptions  = NULL;
1346                 newnp->opt         = NULL;
1347                 newnp->mcast_oif   = tcp_v6_iif(skb);
1348                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1349
1350                 /* Charge newly allocated IPv6 socket. Though it is mapped,
1351                  * it is IPv6 yet.
1352                  */
1353 #ifdef INET_REFCNT_DEBUG
1354                 atomic_inc(&inet6_sock_nr);
1355 #endif
1356
1357                 /* It is tricky place. Until this moment IPv4 tcp
1358                    worked with IPv6 af_tcp.af_specific.
1359                    Sync it now.
1360                  */
1361                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1362
1363                 return newsk;
1364         }
1365
1366         opt = np->opt;
1367
1368 #ifdef CONFIG_ACCEPT_QUEUES
1369         if (sk_acceptq_is_full(sk, req->acceptq_class))
1370 #else
1371         if (sk_acceptq_is_full(sk))
1372 #endif
1373                 goto out_overflow;
1374
1375         if (np->rxopt.bits.srcrt == 2 &&
1376             opt == NULL && req->af.v6_req.pktopts) {
1377                 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1378                 if (rxopt->srcrt)
1379                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1380         }
1381
1382         if (dst == NULL) {
1383                 struct in6_addr *final_p = NULL, final;
1384                 struct flowi fl;
1385
1386                 memset(&fl, 0, sizeof(fl));
1387                 fl.proto = IPPROTO_TCP;
1388                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1389                 if (opt && opt->srcrt) {
1390                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1391                         ipv6_addr_copy(&final, &fl.fl6_dst);
1392                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1393                         final_p = &final;
1394                 }
1395                 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1396                 fl.oif = sk->sk_bound_dev_if;
1397                 fl.fl_ip_dport = req->rmt_port;
1398                 fl.fl_ip_sport = inet_sk(sk)->sport;
1399
1400                 if (ip6_dst_lookup(sk, &dst, &fl))
1401                         goto out;
1402
1403                 if (final_p)
1404                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1405
1406                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1407                         goto out;
1408         } 
1409
1410         newsk = tcp_create_openreq_child(sk, req, skb);
1411         if (newsk == NULL)
1412                 goto out;
1413
1414         /* Charge newly allocated IPv6 socket */
1415 #ifdef INET_REFCNT_DEBUG
1416         atomic_inc(&inet6_sock_nr);
1417 #endif
1418
1419         ip6_dst_store(newsk, dst, NULL);
1420         newsk->sk_route_caps = dst->dev->features &
1421                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1422
1423         newtcp6sk = (struct tcp6_sock *)newsk;
1424         newtcp6sk->pinet6 = &newtcp6sk->inet6;
1425
1426         newtp = tcp_sk(newsk);
1427         newinet = inet_sk(newsk);
1428         newnp = inet6_sk(newsk);
1429
1430         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1431
1432         ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1433         ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1434         ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1435         newsk->sk_bound_dev_if = req->af.v6_req.iif;
1436
1437         /* Now IPv6 options... 
1438
1439            First: no IPv4 options.
1440          */
1441         newinet->opt = NULL;
1442
1443         /* Clone RX bits */
1444         newnp->rxopt.all = np->rxopt.all;
1445
1446         /* Clone pktoptions received with SYN */
1447         newnp->pktoptions = NULL;
1448         if (req->af.v6_req.pktopts) {
1449                 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1450                                               GFP_ATOMIC);
1451                 kfree_skb(req->af.v6_req.pktopts);
1452                 req->af.v6_req.pktopts = NULL;
1453                 if (newnp->pktoptions)
1454                         skb_set_owner_r(newnp->pktoptions, newsk);
1455         }
1456         newnp->opt        = NULL;
1457         newnp->mcast_oif  = tcp_v6_iif(skb);
1458         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1459
1460         /* Clone native IPv6 options from listening socket (if any)
1461
1462            Yes, keeping reference count would be much more clever,
1463            but we make one more one thing there: reattach optmem
1464            to newsk.
1465          */
1466         if (opt) {
1467                 newnp->opt = ipv6_dup_options(newsk, opt);
1468                 if (opt != np->opt)
1469                         sock_kfree_s(sk, opt, opt->tot_len);
1470         }
1471
1472         newtp->ext_header_len = 0;
1473         if (newnp->opt)
1474                 newtp->ext_header_len = newnp->opt->opt_nflen +
1475                                         newnp->opt->opt_flen;
1476         newtp->ext2_header_len = dst->header_len;
1477
1478         tcp_sync_mss(newsk, dst_pmtu(dst));
1479         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1480         tcp_initialize_rcv_mss(newsk);
1481
1482         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1483
1484         __tcp_v6_hash(newsk);
1485         tcp_inherit_port(sk, newsk);
1486
1487         return newsk;
1488
1489 out_overflow:
1490         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1491 out:
1492         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1493         if (opt && opt != np->opt)
1494                 sock_kfree_s(sk, opt, opt->tot_len);
1495         dst_release(dst);
1496         return NULL;
1497 }
1498
1499 static int tcp_v6_checksum_init(struct sk_buff *skb)
1500 {
1501         if (skb->ip_summed == CHECKSUM_HW) {
1502                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1503                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1504                                   &skb->nh.ipv6h->daddr,skb->csum))
1505                         return 0;
1506                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1507         }
1508         if (skb->len <= 76) {
1509                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1510                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1511                         return -1;
1512                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1513         } else {
1514                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1515                                           &skb->nh.ipv6h->daddr,0);
1516         }
1517         return 0;
1518 }
1519
1520 /* The socket must have it's spinlock held when we get
1521  * here.
1522  *
1523  * We have a potential double-lock case here, so even when
1524  * doing backlog processing we use the BH locking scheme.
1525  * This is because we cannot sleep with the original spinlock
1526  * held.
1527  */
1528 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1529 {
1530         struct ipv6_pinfo *np = inet6_sk(sk);
1531         struct tcp_opt *tp;
1532         struct sk_buff *opt_skb = NULL;
1533
1534         /* Imagine: socket is IPv6. IPv4 packet arrives,
1535            goes to IPv4 receive handler and backlogged.
1536            From backlog it always goes here. Kerboom...
1537            Fortunately, tcp_rcv_established and rcv_established
1538            handle them correctly, but it is not case with
1539            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1540          */
1541
1542         if (skb->protocol == htons(ETH_P_IP))
1543                 return tcp_v4_do_rcv(sk, skb);
1544
1545         if (sk_filter(sk, skb, 0))
1546                 goto discard;
1547
1548         /*
1549          *      socket locking is here for SMP purposes as backlog rcv
1550          *      is currently called with bh processing disabled.
1551          */
1552
1553         /* Do Stevens' IPV6_PKTOPTIONS.
1554
1555            Yes, guys, it is the only place in our code, where we
1556            may make it not affecting IPv4.
1557            The rest of code is protocol independent,
1558            and I do not like idea to uglify IPv4.
1559
1560            Actually, all the idea behind IPV6_PKTOPTIONS
1561            looks not very well thought. For now we latch
1562            options, received in the last packet, enqueued
1563            by tcp. Feel free to propose better solution.
1564                                                --ANK (980728)
1565          */
1566         if (np->rxopt.all)
1567                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1568
1569         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1570                 TCP_CHECK_TIMER(sk);
1571                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1572                         goto reset;
1573                 TCP_CHECK_TIMER(sk);
1574                 if (opt_skb)
1575                         goto ipv6_pktoptions;
1576                 return 0;
1577         }
1578
1579         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1580                 goto csum_err;
1581
1582         if (sk->sk_state == TCP_LISTEN) { 
1583                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1584                 if (!nsk)
1585                         goto discard;
1586
1587                 /*
1588                  * Queue it on the new socket if the new socket is active,
1589                  * otherwise we just shortcircuit this and continue with
1590                  * the new socket..
1591                  */
1592                 if(nsk != sk) {
1593                         if (tcp_child_process(sk, nsk, skb))
1594                                 goto reset;
1595                         if (opt_skb)
1596                                 __kfree_skb(opt_skb);
1597                         return 0;
1598                 }
1599         }
1600
1601         TCP_CHECK_TIMER(sk);
1602         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1603                 goto reset;
1604         TCP_CHECK_TIMER(sk);
1605         if (opt_skb)
1606                 goto ipv6_pktoptions;
1607         return 0;
1608
1609 reset:
1610         tcp_v6_send_reset(skb);
1611 discard:
1612         if (opt_skb)
1613                 __kfree_skb(opt_skb);
1614         kfree_skb(skb);
1615         return 0;
1616 csum_err:
1617         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1618         goto discard;
1619
1620
1621 ipv6_pktoptions:
1622         /* Do you ask, what is it?
1623
1624            1. skb was enqueued by tcp.
1625            2. skb is added to tail of read queue, rather than out of order.
1626            3. socket is not in passive state.
1627            4. Finally, it really contains options, which user wants to receive.
1628          */
1629         tp = tcp_sk(sk);
1630         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1631             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1632                 if (np->rxopt.bits.rxinfo)
1633                         np->mcast_oif = tcp_v6_iif(opt_skb);
1634                 if (np->rxopt.bits.rxhlim)
1635                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1636                 if (ipv6_opt_accepted(sk, opt_skb)) {
1637                         skb_set_owner_r(opt_skb, sk);
1638                         opt_skb = xchg(&np->pktoptions, opt_skb);
1639                 } else {
1640                         __kfree_skb(opt_skb);
1641                         opt_skb = xchg(&np->pktoptions, NULL);
1642                 }
1643         }
1644
1645         if (opt_skb)
1646                 kfree_skb(opt_skb);
1647         return 0;
1648 }
1649
1650 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1651 {
1652         struct sk_buff *skb = *pskb;
1653         struct tcphdr *th;      
1654         struct sock *sk;
1655         int ret;
1656
1657         if (skb->pkt_type != PACKET_HOST)
1658                 goto discard_it;
1659
1660         /*
1661          *      Count it even if it's bad.
1662          */
1663         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1664
1665         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1666                 goto discard_it;
1667
1668         th = skb->h.th;
1669
1670         if (th->doff < sizeof(struct tcphdr)/4)
1671                 goto bad_packet;
1672         if (!pskb_may_pull(skb, th->doff*4))
1673                 goto discard_it;
1674
1675         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1676              tcp_v6_checksum_init(skb) < 0))
1677                 goto bad_packet;
1678
1679         th = skb->h.th;
1680         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1681         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1682                                     skb->len - th->doff*4);
1683         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1684         TCP_SKB_CB(skb)->when = 0;
1685         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1686         TCP_SKB_CB(skb)->sacked = 0;
1687
1688         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1689                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1690
1691         if (!sk)
1692                 goto no_tcp_socket;
1693
1694 process:
1695         if (sk->sk_state == TCP_TIME_WAIT)
1696                 goto do_time_wait;
1697
1698         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1699                 goto discard_and_relse;
1700
1701         if (sk_filter(sk, skb, 0))
1702                 goto discard_and_relse;
1703
1704         skb->dev = NULL;
1705
1706         bh_lock_sock(sk);
1707         ret = 0;
1708         if (!sock_owned_by_user(sk)) {
1709                 if (!tcp_prequeue(sk, skb))
1710                         ret = tcp_v6_do_rcv(sk, skb);
1711         } else
1712                 sk_add_backlog(sk, skb);
1713         bh_unlock_sock(sk);
1714
1715         sock_put(sk);
1716         return ret ? -1 : 0;
1717
1718 no_tcp_socket:
1719         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1720                 goto discard_it;
1721
1722         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1723 bad_packet:
1724                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1725         } else {
1726                 tcp_v6_send_reset(skb);
1727         }
1728
1729 discard_it:
1730
1731         /*
1732          *      Discard frame
1733          */
1734
1735         kfree_skb(skb);
1736         return 0;
1737
1738 discard_and_relse:
1739         sock_put(sk);
1740         goto discard_it;
1741
1742 do_time_wait:
1743         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1744                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1745                 goto discard_it;
1746         }
1747
1748         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1749                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1750                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1751                 goto discard_it;
1752         }
1753
1754         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1755                                           skb, th, skb->len)) {
1756         case TCP_TW_SYN:
1757         {
1758                 struct sock *sk2;
1759
1760                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1761                 if (sk2 != NULL) {
1762                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1763                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1764                         sk = sk2;
1765                         goto process;
1766                 }
1767                 /* Fall through to ACK */
1768         }
1769         case TCP_TW_ACK:
1770                 tcp_v6_timewait_ack(sk, skb);
1771                 break;
1772         case TCP_TW_RST:
1773                 goto no_tcp_socket;
1774         case TCP_TW_SUCCESS:;
1775         }
1776         goto discard_it;
1777 }
1778
1779 static int tcp_v6_rebuild_header(struct sock *sk)
1780 {
1781         int err;
1782         struct dst_entry *dst;
1783         struct ipv6_pinfo *np = inet6_sk(sk);
1784
1785         dst = __sk_dst_check(sk, np->dst_cookie);
1786
1787         if (dst == NULL) {
1788                 struct inet_opt *inet = inet_sk(sk);
1789                 struct in6_addr *final_p = NULL, final;
1790                 struct flowi fl;
1791
1792                 memset(&fl, 0, sizeof(fl));
1793                 fl.proto = IPPROTO_TCP;
1794                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1795                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1796                 fl.fl6_flowlabel = np->flow_label;
1797                 fl.oif = sk->sk_bound_dev_if;
1798                 fl.fl_ip_dport = inet->dport;
1799                 fl.fl_ip_sport = inet->sport;
1800
1801                 if (np->opt && np->opt->srcrt) {
1802                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1803                         ipv6_addr_copy(&final, &fl.fl6_dst);
1804                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1805                         final_p = &final;
1806                 }
1807
1808                 err = ip6_dst_lookup(sk, &dst, &fl);
1809                 if (err) {
1810                         sk->sk_route_caps = 0;
1811                         return err;
1812                 }
1813                 if (final_p)
1814                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1815
1816                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1817                         sk->sk_err_soft = -err;
1818                         dst_release(dst);
1819                         return err;
1820                 }
1821
1822                 ip6_dst_store(sk, dst, NULL);
1823                 sk->sk_route_caps = dst->dev->features &
1824                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1825                 tcp_sk(sk)->ext2_header_len = dst->header_len;
1826         }
1827
1828         return 0;
1829 }
1830
1831 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1832 {
1833         struct sock *sk = skb->sk;
1834         struct inet_opt *inet = inet_sk(sk);
1835         struct ipv6_pinfo *np = inet6_sk(sk);
1836         struct flowi fl;
1837         struct dst_entry *dst;
1838
1839         memset(&fl, 0, sizeof(fl));
1840         fl.proto = IPPROTO_TCP;
1841         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1842         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1843         fl.fl6_flowlabel = np->flow_label;
1844         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1845         fl.oif = sk->sk_bound_dev_if;
1846         fl.fl_ip_sport = inet->sport;
1847         fl.fl_ip_dport = inet->dport;
1848
1849         if (np->opt && np->opt->srcrt) {
1850                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1851                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1852         }
1853
1854         dst = __sk_dst_check(sk, np->dst_cookie);
1855
1856         if (dst == NULL) {
1857                 int err = ip6_dst_lookup(sk, &dst, &fl);
1858
1859                 if (err) {
1860                         sk->sk_err_soft = -err;
1861                         return err;
1862                 }
1863
1864                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1865                         sk->sk_route_caps = 0;
1866                         dst_release(dst);
1867                         return err;
1868                 }
1869
1870                 ip6_dst_store(sk, dst, NULL);
1871                 sk->sk_route_caps = dst->dev->features &
1872                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1873                 tcp_sk(sk)->ext2_header_len = dst->header_len;
1874         }
1875
1876         skb->dst = dst_clone(dst);
1877
1878         /* Restore final destination back after routing done */
1879         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1880
1881         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1882 }
1883
1884 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1885 {
1886         struct ipv6_pinfo *np = inet6_sk(sk);
1887         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1888
1889         sin6->sin6_family = AF_INET6;
1890         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1891         sin6->sin6_port = inet_sk(sk)->dport;
1892         /* We do not store received flowlabel for TCP */
1893         sin6->sin6_flowinfo = 0;
1894         sin6->sin6_scope_id = 0;
1895         if (sk->sk_bound_dev_if &&
1896             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1897                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1898 }
1899
1900 static int tcp_v6_remember_stamp(struct sock *sk)
1901 {
1902         /* Alas, not yet... */
1903         return 0;
1904 }
1905
1906 static struct tcp_func ipv6_specific = {
1907         .queue_xmit     =       tcp_v6_xmit,
1908         .send_check     =       tcp_v6_send_check,
1909         .rebuild_header =       tcp_v6_rebuild_header,
1910         .conn_request   =       tcp_v6_conn_request,
1911         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1912         .remember_stamp =       tcp_v6_remember_stamp,
1913         .net_header_len =       sizeof(struct ipv6hdr),
1914
1915         .setsockopt     =       ipv6_setsockopt,
1916         .getsockopt     =       ipv6_getsockopt,
1917         .addr2sockaddr  =       v6_addr2sockaddr,
1918         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1919 };
1920
1921 /*
1922  *      TCP over IPv4 via INET6 API
1923  */
1924
1925 static struct tcp_func ipv6_mapped = {
1926         .queue_xmit     =       ip_queue_xmit,
1927         .send_check     =       tcp_v4_send_check,
1928         .rebuild_header =       tcp_v4_rebuild_header,
1929         .conn_request   =       tcp_v6_conn_request,
1930         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1931         .remember_stamp =       tcp_v4_remember_stamp,
1932         .net_header_len =       sizeof(struct iphdr),
1933
1934         .setsockopt     =       ipv6_setsockopt,
1935         .getsockopt     =       ipv6_getsockopt,
1936         .addr2sockaddr  =       v6_addr2sockaddr,
1937         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1938 };
1939
1940
1941
1942 /* NOTE: A lot of things set to zero explicitly by call to
1943  *       sk_alloc() so need not be done here.
1944  */
1945 static int tcp_v6_init_sock(struct sock *sk)
1946 {
1947         struct tcp_opt *tp = tcp_sk(sk);
1948
1949         skb_queue_head_init(&tp->out_of_order_queue);
1950         tcp_init_xmit_timers(sk);
1951         tcp_prequeue_init(tp);
1952
1953         tp->rto  = TCP_TIMEOUT_INIT;
1954         tp->mdev = TCP_TIMEOUT_INIT;
1955
1956         /* So many TCP implementations out there (incorrectly) count the
1957          * initial SYN frame in their delayed-ACK and congestion control
1958          * algorithms that we must have the following bandaid to talk
1959          * efficiently to them.  -DaveM
1960          */
1961         tp->snd_cwnd = 2;
1962
1963         /* See draft-stevens-tcpca-spec-01 for discussion of the
1964          * initialization of these values.
1965          */
1966         tp->snd_ssthresh = 0x7fffffff;
1967         tp->snd_cwnd_clamp = ~0;
1968         tp->mss_cache_std = tp->mss_cache = 536;
1969
1970         tp->reordering = sysctl_tcp_reordering;
1971
1972         sk->sk_state = TCP_CLOSE;
1973
1974         tp->af_specific = &ipv6_specific;
1975
1976         sk->sk_write_space = sk_stream_write_space;
1977         sk->sk_use_write_queue = 1;
1978
1979         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1980         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1981
1982         atomic_inc(&tcp_sockets_allocated);
1983
1984         return 0;
1985 }
1986
1987 static int tcp_v6_destroy_sock(struct sock *sk)
1988 {
1989         extern int tcp_v4_destroy_sock(struct sock *sk);
1990
1991         tcp_v4_destroy_sock(sk);
1992         return inet6_destroy_sock(sk);
1993 }
1994
1995 /* Proc filesystem TCPv6 sock list dumping. */
1996 static void get_openreq6(struct seq_file *seq, 
1997                          struct sock *sk, struct open_request *req, int i, int uid)
1998 {
1999         struct in6_addr *dest, *src;
2000         int ttd = req->expires - jiffies;
2001
2002         if (ttd < 0)
2003                 ttd = 0;
2004
2005         src = &req->af.v6_req.loc_addr;
2006         dest = &req->af.v6_req.rmt_addr;
2007         seq_printf(seq,
2008                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2009                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2010                    i,
2011                    src->s6_addr32[0], src->s6_addr32[1],
2012                    src->s6_addr32[2], src->s6_addr32[3],
2013                    ntohs(inet_sk(sk)->sport),
2014                    dest->s6_addr32[0], dest->s6_addr32[1],
2015                    dest->s6_addr32[2], dest->s6_addr32[3],
2016                    ntohs(req->rmt_port),
2017                    TCP_SYN_RECV,
2018                    0,0, /* could print option size, but that is af dependent. */
2019                    1,   /* timers active (only the expire timer) */  
2020                    jiffies_to_clock_t(ttd), 
2021                    req->retrans,
2022                    uid,
2023                    0,  /* non standard timer */  
2024                    0, /* open_requests have no inode */
2025                    0, req);
2026 }
2027
2028 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2029 {
2030         struct in6_addr *dest, *src;
2031         __u16 destp, srcp;
2032         int timer_active;
2033         unsigned long timer_expires;
2034         struct inet_opt *inet = inet_sk(sp);
2035         struct tcp_opt *tp = tcp_sk(sp);
2036         struct ipv6_pinfo *np = inet6_sk(sp);
2037
2038         dest  = &np->daddr;
2039         src   = &np->rcv_saddr;
2040         destp = ntohs(inet->dport);
2041         srcp  = ntohs(inet->sport);
2042         if (tp->pending == TCP_TIME_RETRANS) {
2043                 timer_active    = 1;
2044                 timer_expires   = tp->timeout;
2045         } else if (tp->pending == TCP_TIME_PROBE0) {
2046                 timer_active    = 4;
2047                 timer_expires   = tp->timeout;
2048         } else if (timer_pending(&sp->sk_timer)) {
2049                 timer_active    = 2;
2050                 timer_expires   = sp->sk_timer.expires;
2051         } else {
2052                 timer_active    = 0;
2053                 timer_expires = jiffies;
2054         }
2055
2056         seq_printf(seq,
2057                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2058                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2059                    i,
2060                    src->s6_addr32[0], src->s6_addr32[1],
2061                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2062                    dest->s6_addr32[0], dest->s6_addr32[1],
2063                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2064                    sp->sk_state, 
2065                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2066                    timer_active,
2067                    jiffies_to_clock_t(timer_expires - jiffies),
2068                    tp->retransmits,
2069                    sock_i_uid(sp),
2070                    tp->probes_out,
2071                    sock_i_ino(sp),
2072                    atomic_read(&sp->sk_refcnt), sp,
2073                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2074                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2075                    );
2076 }
2077
2078 static void get_timewait6_sock(struct seq_file *seq, 
2079                                struct tcp_tw_bucket *tw, int i)
2080 {
2081         struct in6_addr *dest, *src;
2082         __u16 destp, srcp;
2083         int ttd = tw->tw_ttd - jiffies;
2084
2085         if (ttd < 0)
2086                 ttd = 0;
2087
2088         dest  = &tw->tw_v6_daddr;
2089         src   = &tw->tw_v6_rcv_saddr;
2090         destp = ntohs(tw->tw_dport);
2091         srcp  = ntohs(tw->tw_sport);
2092
2093         seq_printf(seq,
2094                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2095                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2096                    i,
2097                    src->s6_addr32[0], src->s6_addr32[1],
2098                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2099                    dest->s6_addr32[0], dest->s6_addr32[1],
2100                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2101                    tw->tw_substate, 0, 0,
2102                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2103                    atomic_read(&tw->tw_refcnt), tw);
2104 }
2105
2106 #ifdef CONFIG_PROC_FS
2107 static int tcp6_seq_show(struct seq_file *seq, void *v)
2108 {
2109         struct tcp_iter_state *st;
2110
2111         if (v == SEQ_START_TOKEN) {
2112                 seq_puts(seq,
2113                          "  sl  "
2114                          "local_address                         "
2115                          "remote_address                        "
2116                          "st tx_queue rx_queue tr tm->when retrnsmt"
2117                          "   uid  timeout inode\n");
2118                 goto out;
2119         }
2120         st = seq->private;
2121
2122         switch (st->state) {
2123         case TCP_SEQ_STATE_LISTENING:
2124         case TCP_SEQ_STATE_ESTABLISHED:
2125                 get_tcp6_sock(seq, v, st->num);
2126                 break;
2127         case TCP_SEQ_STATE_OPENREQ:
2128                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2129                 break;
2130         case TCP_SEQ_STATE_TIME_WAIT:
2131                 get_timewait6_sock(seq, v, st->num);
2132                 break;
2133         }
2134 out:
2135         return 0;
2136 }
2137
2138 static struct file_operations tcp6_seq_fops;
2139 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2140         .owner          = THIS_MODULE,
2141         .name           = "tcp6",
2142         .family         = AF_INET6,
2143         .seq_show       = tcp6_seq_show,
2144         .seq_fops       = &tcp6_seq_fops,
2145 };
2146
2147 int __init tcp6_proc_init(void)
2148 {
2149         return tcp_proc_register(&tcp6_seq_afinfo);
2150 }
2151
2152 void tcp6_proc_exit(void)
2153 {
2154         tcp_proc_unregister(&tcp6_seq_afinfo);
2155 }
2156 #endif
2157
2158 struct proto tcpv6_prot = {
2159         .name                   = "TCPv6",
2160         .close                  = tcp_close,
2161         .connect                = tcp_v6_connect,
2162         .disconnect             = tcp_disconnect,
2163         .accept                 = tcp_accept,
2164         .ioctl                  = tcp_ioctl,
2165         .init                   = tcp_v6_init_sock,
2166         .destroy                = tcp_v6_destroy_sock,
2167         .shutdown               = tcp_shutdown,
2168         .setsockopt             = tcp_setsockopt,
2169         .getsockopt             = tcp_getsockopt,
2170         .sendmsg                = tcp_sendmsg,
2171         .recvmsg                = tcp_recvmsg,
2172         .backlog_rcv            = tcp_v6_do_rcv,
2173         .hash                   = tcp_v6_hash,
2174         .unhash                 = tcp_unhash,
2175         .get_port               = tcp_v6_get_port,
2176         .enter_memory_pressure  = tcp_enter_memory_pressure,
2177         .sockets_allocated      = &tcp_sockets_allocated,
2178         .memory_allocated       = &tcp_memory_allocated,
2179         .memory_pressure        = &tcp_memory_pressure,
2180         .sysctl_mem             = sysctl_tcp_mem,
2181         .sysctl_wmem            = sysctl_tcp_wmem,
2182         .sysctl_rmem            = sysctl_tcp_rmem,
2183         .max_header             = MAX_TCP_HEADER,
2184         .slab_obj_size          = sizeof(struct tcp6_sock),
2185 };
2186
2187 static struct inet6_protocol tcpv6_protocol = {
2188         .handler        =       tcp_v6_rcv,
2189         .err_handler    =       tcp_v6_err,
2190         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2191 };
2192
2193 extern struct proto_ops inet6_stream_ops;
2194
2195 static struct inet_protosw tcpv6_protosw = {
2196         .type           =       SOCK_STREAM,
2197         .protocol       =       IPPROTO_TCP,
2198         .prot           =       &tcpv6_prot,
2199         .ops            =       &inet6_stream_ops,
2200         .capability     =       -1,
2201         .no_check       =       0,
2202         .flags          =       INET_PROTOSW_PERMANENT,
2203 };
2204
2205 void __init tcpv6_init(void)
2206 {
2207         /* register inet6 protocol */
2208         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2209                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2210         inet6_register_protosw(&tcpv6_protosw);
2211 }