VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/ipv6.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
57 #include <net/xfrm.h>
58 #include <net/addrconf.h>
59 #include <net/snmp.h>
60
61 #include <asm/uaccess.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 static void     tcp_v6_send_reset(struct sk_buff *skb);
67 static void     tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
68 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
69                                   struct sk_buff *skb);
70
71 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
72 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
73
74 static struct tcp_func ipv6_mapped;
75 static struct tcp_func ipv6_specific;
76
77 /* I have no idea if this is a good hash for v6 or not. -DaveM */
78 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
79                                     struct in6_addr *faddr, u16 fport)
80 {
81         int hashent = (lport ^ fport);
82
83         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
84         hashent ^= hashent>>16;
85         hashent ^= hashent>>8;
86         return (hashent & (tcp_ehash_size - 1));
87 }
88
89 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
90 {
91         struct inet_opt *inet = inet_sk(sk);
92         struct ipv6_pinfo *np = inet6_sk(sk);
93         struct in6_addr *laddr = &np->rcv_saddr;
94         struct in6_addr *faddr = &np->daddr;
95         __u16 lport = inet->num;
96         __u16 fport = inet->dport;
97         return tcp_v6_hashfn(laddr, lport, faddr, fport);
98 }
99
100 static inline int tcp_v6_bind_conflict(struct sock *sk,
101                                        struct tcp_bind_bucket *tb)
102 {
103         struct sock *sk2;
104         struct hlist_node *node;
105
106         /* We must walk the whole port owner list in this case. -DaveM */
107         sk_for_each_bound(sk2, node, &tb->owners) {
108                 if (sk != sk2 &&
109                     (!sk->sk_bound_dev_if ||
110                      !sk2->sk_bound_dev_if ||
111                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
112                     (!sk->sk_reuse || !sk2->sk_reuse ||
113                      sk2->sk_state == TCP_LISTEN) &&
114                      ipv6_rcv_saddr_equal(sk, sk2))
115                         break;
116         }
117
118         return node != NULL;
119 }
120
121 /* Grrr, addr_type already calculated by caller, but I don't want
122  * to add some silly "cookie" argument to this method just for that.
123  * But it doesn't matter, the recalculation is in the rarest path
124  * this function ever takes.
125  */
126 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
127 {
128         struct tcp_bind_hashbucket *head;
129         struct tcp_bind_bucket *tb;
130         struct hlist_node *node;
131         int ret;
132
133         local_bh_disable();
134         if (snum == 0) {
135                 int low = sysctl_local_port_range[0];
136                 int high = sysctl_local_port_range[1];
137                 int remaining = (high - low) + 1;
138                 int rover;
139
140                 spin_lock(&tcp_portalloc_lock);
141                 rover = tcp_port_rover;
142                 do {    rover++;
143                         if ((rover < low) || (rover > high))
144                                 rover = low;
145                         head = &tcp_bhash[tcp_bhashfn(rover)];
146                         spin_lock(&head->lock);
147                         tb_for_each(tb, node, &head->chain)
148                                 if (tb->port == rover)
149                                         goto next;
150                         break;
151                 next:
152                         spin_unlock(&head->lock);
153                 } while (--remaining > 0);
154                 tcp_port_rover = rover;
155                 spin_unlock(&tcp_portalloc_lock);
156
157                 /* Exhausted local port range during search? */
158                 ret = 1;
159                 if (remaining <= 0)
160                         goto fail;
161
162                 /* OK, here is the one we will use. */
163                 snum = rover;
164         } else {
165                 head = &tcp_bhash[tcp_bhashfn(snum)];
166                 spin_lock(&head->lock);
167                 tb_for_each(tb, node, &head->chain)
168                         if (tb->port == snum)
169                                 goto tb_found;
170         }
171         tb = NULL;
172         goto tb_not_found;
173 tb_found:
174         if (tb && !hlist_empty(&tb->owners)) {
175                 if (tb->fastreuse > 0 && sk->sk_reuse &&
176                     sk->sk_state != TCP_LISTEN) {
177                         goto success;
178                 } else {
179                         ret = 1;
180                         if (tcp_v6_bind_conflict(sk, tb))
181                                 goto fail_unlock;
182                 }
183         }
184 tb_not_found:
185         ret = 1;
186         if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
187                 goto fail_unlock;
188         if (hlist_empty(&tb->owners)) {
189                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
190                         tb->fastreuse = 1;
191                 else
192                         tb->fastreuse = 0;
193         } else if (tb->fastreuse &&
194                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
195                 tb->fastreuse = 0;
196
197 success:
198         if (!tcp_sk(sk)->bind_hash)
199                 tcp_bind_hash(sk, tb, snum);
200         BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
201         ret = 0;
202
203 fail_unlock:
204         spin_unlock(&head->lock);
205 fail:
206         local_bh_enable();
207         return ret;
208 }
209
210 static __inline__ void __tcp_v6_hash(struct sock *sk)
211 {
212         struct hlist_head *list;
213         rwlock_t *lock;
214
215         BUG_TRAP(sk_unhashed(sk));
216
217         if (sk->sk_state == TCP_LISTEN) {
218                 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
219                 lock = &tcp_lhash_lock;
220                 tcp_listen_wlock();
221         } else {
222                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
223                 list = &tcp_ehash[sk->sk_hashent].chain;
224                 lock = &tcp_ehash[sk->sk_hashent].lock;
225                 write_lock(lock);
226         }
227
228         __sk_add_node(sk, list);
229         sock_prot_inc_use(sk->sk_prot);
230         write_unlock(lock);
231 }
232
233
234 static void tcp_v6_hash(struct sock *sk)
235 {
236         if (sk->sk_state != TCP_CLOSE) {
237                 struct tcp_opt *tp = tcp_sk(sk);
238
239                 if (tp->af_specific == &ipv6_mapped) {
240                         tcp_prot.hash(sk);
241                         return;
242                 }
243                 local_bh_disable();
244                 __tcp_v6_hash(sk);
245                 local_bh_enable();
246         }
247 }
248
249 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
250 {
251         struct sock *sk;
252         struct hlist_node *node;
253         struct sock *result = NULL;
254         int score, hiscore;
255
256         hiscore=0;
257         read_lock(&tcp_lhash_lock);
258         sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
259                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
260                         struct ipv6_pinfo *np = inet6_sk(sk);
261                         
262                         score = 1;
263                         if (!ipv6_addr_any(&np->rcv_saddr)) {
264                                 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
265                                         continue;
266                                 score++;
267                         }
268                         if (sk->sk_bound_dev_if) {
269                                 if (sk->sk_bound_dev_if != dif)
270                                         continue;
271                                 score++;
272                         }
273                         if (score == 3) {
274                                 result = sk;
275                                 break;
276                         }
277                         if (score > hiscore) {
278                                 hiscore = score;
279                                 result = sk;
280                         }
281                 }
282         }
283         if (result)
284                 sock_hold(result);
285         read_unlock(&tcp_lhash_lock);
286         return result;
287 }
288
289 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
290  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
291  *
292  * The sockhash lock must be held as a reader here.
293  */
294
295 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
296                                                        struct in6_addr *daddr, u16 hnum,
297                                                        int dif)
298 {
299         struct tcp_ehash_bucket *head;
300         struct sock *sk;
301         struct hlist_node *node;
302         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
303         int hash;
304
305         /* Optimize here for direct hit, only listening connections can
306          * have wildcards anyways.
307          */
308         hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
309         head = &tcp_ehash[hash];
310         read_lock(&head->lock);
311         sk_for_each(sk, node, &head->chain) {
312                 /* For IPV6 do the cheaper port and family tests first. */
313                 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
314                         goto hit; /* You sunk my battleship! */
315         }
316         /* Must check for a TIME_WAIT'er before going to listener hash. */
317         sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
318                 /* FIXME: acme: check this... */
319                 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
320
321                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
322                    sk->sk_family                == PF_INET6) {
323                         if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr)      &&
324                            !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr)  &&
325                            (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
326                                 goto hit;
327                 }
328         }
329         read_unlock(&head->lock);
330         return NULL;
331
332 hit:
333         sock_hold(sk);
334         read_unlock(&head->lock);
335         return sk;
336 }
337
338
339 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
340                                            struct in6_addr *daddr, u16 hnum,
341                                            int dif)
342 {
343         struct sock *sk;
344
345         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
346
347         if (sk)
348                 return sk;
349
350         return tcp_v6_lookup_listener(daddr, hnum, dif);
351 }
352
353 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
354                                   struct in6_addr *daddr, u16 dport,
355                                   int dif)
356 {
357         struct sock *sk;
358
359         local_bh_disable();
360         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
361         local_bh_enable();
362
363         return sk;
364 }
365
366
367 /*
368  * Open request hash tables.
369  */
370
371 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
372 {
373         u32 a, b, c;
374
375         a = raddr->s6_addr32[0];
376         b = raddr->s6_addr32[1];
377         c = raddr->s6_addr32[2];
378
379         a += JHASH_GOLDEN_RATIO;
380         b += JHASH_GOLDEN_RATIO;
381         c += rnd;
382         __jhash_mix(a, b, c);
383
384         a += raddr->s6_addr32[3];
385         b += (u32) rport;
386         __jhash_mix(a, b, c);
387
388         return c & (TCP_SYNQ_HSIZE - 1);
389 }
390
391 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
392                                               struct open_request ***prevp,
393                                               __u16 rport,
394                                               struct in6_addr *raddr,
395                                               struct in6_addr *laddr,
396                                               int iif)
397 {
398         struct tcp_listen_opt *lopt = tp->listen_opt;
399         struct open_request *req, **prev;  
400
401         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
402              (req = *prev) != NULL;
403              prev = &req->dl_next) {
404                 if (req->rmt_port == rport &&
405                     req->class->family == AF_INET6 &&
406                     !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
407                     !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
408                     (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
409                         BUG_TRAP(req->sk == NULL);
410                         *prevp = prev;
411                         return req;
412                 }
413         }
414
415         return NULL;
416 }
417
418 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
419                                    struct in6_addr *saddr, 
420                                    struct in6_addr *daddr, 
421                                    unsigned long base)
422 {
423         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
424 }
425
426 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
427 {
428         if (skb->protocol == htons(ETH_P_IPV6)) {
429                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
430                                                     skb->nh.ipv6h->saddr.s6_addr32,
431                                                     skb->h.th->dest,
432                                                     skb->h.th->source);
433         } else {
434                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
435                                                   skb->nh.iph->saddr,
436                                                   skb->h.th->dest,
437                                                   skb->h.th->source);
438         }
439 }
440
441 static int tcp_v6_check_established(struct sock *sk)
442 {
443         struct inet_opt *inet = inet_sk(sk);
444         struct ipv6_pinfo *np = inet6_sk(sk);
445         struct in6_addr *daddr = &np->rcv_saddr;
446         struct in6_addr *saddr = &np->daddr;
447         int dif = sk->sk_bound_dev_if;
448         u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
449         int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
450         struct tcp_ehash_bucket *head = &tcp_ehash[hash];
451         struct sock *sk2;
452         struct hlist_node *node;
453         struct tcp_tw_bucket *tw;
454
455         write_lock_bh(&head->lock);
456
457         /* Check TIME-WAIT sockets first. */
458         sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
459                 tw = (struct tcp_tw_bucket*)sk2;
460
461                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
462                    sk2->sk_family               == PF_INET6     &&
463                    !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr)      &&
464                    !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr)  &&
465                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
466                         struct tcp_opt *tp = tcp_sk(sk);
467
468                         if (tw->tw_ts_recent_stamp) {
469                                 /* See comment in tcp_ipv4.c */
470                                 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
471                                 if (!tp->write_seq)
472                                         tp->write_seq = 1;
473                                 tp->ts_recent = tw->tw_ts_recent;
474                                 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
475                                 sock_hold(sk2);
476                                 goto unique;
477                         } else
478                                 goto not_unique;
479                 }
480         }
481         tw = NULL;
482
483         /* And established part... */
484         sk_for_each(sk2, node, &head->chain) {
485                 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
486                         goto not_unique;
487         }
488
489 unique:
490         BUG_TRAP(sk_unhashed(sk));
491         __sk_add_node(sk, &head->chain);
492         sk->sk_hashent = hash;
493         sock_prot_inc_use(sk->sk_prot);
494         write_unlock_bh(&head->lock);
495
496         if (tw) {
497                 /* Silly. Should hash-dance instead... */
498                 local_bh_disable();
499                 tcp_tw_deschedule(tw);
500                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
501                 local_bh_enable();
502
503                 tcp_tw_put(tw);
504         }
505         return 0;
506
507 not_unique:
508         write_unlock_bh(&head->lock);
509         return -EADDRNOTAVAIL;
510 }
511
512 static int tcp_v6_hash_connect(struct sock *sk)
513 {
514         struct tcp_bind_hashbucket *head;
515         struct tcp_bind_bucket *tb;
516
517         /* XXX */
518         if (inet_sk(sk)->num == 0) { 
519                 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
520                 if (err)
521                         return err;
522                 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
523         }
524
525         head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
526         tb = tb_head(head);
527
528         spin_lock_bh(&head->lock);
529
530         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
531                 __tcp_v6_hash(sk);
532                 spin_unlock_bh(&head->lock);
533                 return 0;
534         } else {
535                 spin_unlock_bh(&head->lock);
536                 return tcp_v6_check_established(sk);
537         }
538 }
539
540 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
541 {
542         return IP6CB(skb)->iif;
543 }
544
545 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
546                           int addr_len)
547 {
548         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
549         struct inet_opt *inet = inet_sk(sk);
550         struct ipv6_pinfo *np = inet6_sk(sk);
551         struct tcp_opt *tp = tcp_sk(sk);
552         struct in6_addr *saddr = NULL;
553         struct flowi fl;
554         struct dst_entry *dst;
555         int addr_type;
556         int err;
557
558         if (addr_len < SIN6_LEN_RFC2133) 
559                 return -EINVAL;
560
561         if (usin->sin6_family != AF_INET6) 
562                 return(-EAFNOSUPPORT);
563
564         memset(&fl, 0, sizeof(fl));
565
566         if (np->sndflow) {
567                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
568                 IP6_ECN_flow_init(fl.fl6_flowlabel);
569                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
570                         struct ip6_flowlabel *flowlabel;
571                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
572                         if (flowlabel == NULL)
573                                 return -EINVAL;
574                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
575                         fl6_sock_release(flowlabel);
576                 }
577         }
578
579         /*
580          *      connect() to INADDR_ANY means loopback (BSD'ism).
581          */
582         
583         if(ipv6_addr_any(&usin->sin6_addr))
584                 usin->sin6_addr.s6_addr[15] = 0x1; 
585
586         addr_type = ipv6_addr_type(&usin->sin6_addr);
587
588         if(addr_type & IPV6_ADDR_MULTICAST)
589                 return -ENETUNREACH;
590
591         if (addr_type&IPV6_ADDR_LINKLOCAL) {
592                 if (addr_len >= sizeof(struct sockaddr_in6) &&
593                     usin->sin6_scope_id) {
594                         /* If interface is set while binding, indices
595                          * must coincide.
596                          */
597                         if (sk->sk_bound_dev_if &&
598                             sk->sk_bound_dev_if != usin->sin6_scope_id)
599                                 return -EINVAL;
600
601                         sk->sk_bound_dev_if = usin->sin6_scope_id;
602                 }
603
604                 /* Connect to link-local address requires an interface */
605                 if (!sk->sk_bound_dev_if)
606                         return -EINVAL;
607         }
608
609         if (tp->ts_recent_stamp &&
610             ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
611                 tp->ts_recent = 0;
612                 tp->ts_recent_stamp = 0;
613                 tp->write_seq = 0;
614         }
615
616         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
617         np->flow_label = fl.fl6_flowlabel;
618
619         /*
620          *      TCP over IPv4
621          */
622
623         if (addr_type == IPV6_ADDR_MAPPED) {
624                 u32 exthdrlen = tp->ext_header_len;
625                 struct sockaddr_in sin;
626
627                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
628
629                 if (__ipv6_only_sock(sk))
630                         return -ENETUNREACH;
631
632                 sin.sin_family = AF_INET;
633                 sin.sin_port = usin->sin6_port;
634                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
635
636                 tp->af_specific = &ipv6_mapped;
637                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
638
639                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
640
641                 if (err) {
642                         tp->ext_header_len = exthdrlen;
643                         tp->af_specific = &ipv6_specific;
644                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
645                         goto failure;
646                 } else {
647                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
648                                       inet->saddr);
649                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
650                                       inet->rcv_saddr);
651                 }
652
653                 return err;
654         }
655
656         if (!ipv6_addr_any(&np->rcv_saddr))
657                 saddr = &np->rcv_saddr;
658
659         fl.proto = IPPROTO_TCP;
660         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
661         ipv6_addr_copy(&fl.fl6_src,
662                        (saddr ? saddr : &np->saddr));
663         fl.oif = sk->sk_bound_dev_if;
664         fl.fl_ip_dport = usin->sin6_port;
665         fl.fl_ip_sport = inet->sport;
666
667         if (np->opt && np->opt->srcrt) {
668                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
669                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
670         }
671
672         err = ip6_dst_lookup(sk, &dst, &fl);
673
674         if (err)
675                 goto failure;
676
677         if (saddr == NULL) {
678                 saddr = &fl.fl6_src;
679                 ipv6_addr_copy(&np->rcv_saddr, saddr);
680         }
681
682         /* set the source address */
683         ipv6_addr_copy(&np->saddr, saddr);
684         inet->rcv_saddr = LOOPBACK4_IPV6;
685
686         ip6_dst_store(sk, dst, NULL);
687         sk->sk_route_caps = dst->dev->features &
688                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
689
690         tp->ext_header_len = 0;
691         if (np->opt)
692                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
693         tp->ext2_header_len = dst->header_len;
694
695         tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
696
697         inet->dport = usin->sin6_port;
698
699         tcp_set_state(sk, TCP_SYN_SENT);
700         err = tcp_v6_hash_connect(sk);
701         if (err)
702                 goto late_failure;
703
704         if (!tp->write_seq)
705                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
706                                                              np->daddr.s6_addr32,
707                                                              inet->sport,
708                                                              inet->dport);
709
710         err = tcp_connect(sk);
711         if (err)
712                 goto late_failure;
713
714         return 0;
715
716 late_failure:
717         tcp_set_state(sk, TCP_CLOSE);
718         __sk_dst_reset(sk);
719 failure:
720         inet->dport = 0;
721         sk->sk_route_caps = 0;
722         return err;
723 }
724
725 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
726                 int type, int code, int offset, __u32 info)
727 {
728         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
729         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
730         struct ipv6_pinfo *np;
731         struct sock *sk;
732         int err;
733         struct tcp_opt *tp; 
734         __u32 seq;
735
736         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
737
738         if (sk == NULL) {
739                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
740                 return;
741         }
742
743         if (sk->sk_state == TCP_TIME_WAIT) {
744                 tcp_tw_put((struct tcp_tw_bucket*)sk);
745                 return;
746         }
747
748         bh_lock_sock(sk);
749         if (sock_owned_by_user(sk))
750                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
751
752         if (sk->sk_state == TCP_CLOSE)
753                 goto out;
754
755         tp = tcp_sk(sk);
756         seq = ntohl(th->seq); 
757         if (sk->sk_state != TCP_LISTEN &&
758             !between(seq, tp->snd_una, tp->snd_nxt)) {
759                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
760                 goto out;
761         }
762
763         np = inet6_sk(sk);
764
765         if (type == ICMPV6_PKT_TOOBIG) {
766                 struct dst_entry *dst = NULL;
767
768                 if (sock_owned_by_user(sk))
769                         goto out;
770                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
771                         goto out;
772
773                 /* icmp should have updated the destination cache entry */
774                 dst = __sk_dst_check(sk, np->dst_cookie);
775
776                 if (dst == NULL) {
777                         struct inet_opt *inet = inet_sk(sk);
778                         struct flowi fl;
779
780                         /* BUGGG_FUTURE: Again, it is not clear how
781                            to handle rthdr case. Ignore this complexity
782                            for now.
783                          */
784                         memset(&fl, 0, sizeof(fl));
785                         fl.proto = IPPROTO_TCP;
786                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
787                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
788                         fl.oif = sk->sk_bound_dev_if;
789                         fl.fl_ip_dport = inet->dport;
790                         fl.fl_ip_sport = inet->sport;
791
792                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
793                                 sk->sk_err_soft = -err;
794                                 goto out;
795                         }
796                 } else
797                         dst_hold(dst);
798
799                 if (tp->pmtu_cookie > dst_pmtu(dst)) {
800                         tcp_sync_mss(sk, dst_pmtu(dst));
801                         tcp_simple_retransmit(sk);
802                 } /* else let the usual retransmit timer handle it */
803                 dst_release(dst);
804                 goto out;
805         }
806
807         icmpv6_err_convert(type, code, &err);
808
809         /* Might be for an open_request */
810         switch (sk->sk_state) {
811                 struct open_request *req, **prev;
812         case TCP_LISTEN:
813                 if (sock_owned_by_user(sk))
814                         goto out;
815
816                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
817                                         &hdr->saddr, tcp_v6_iif(skb));
818                 if (!req)
819                         goto out;
820
821                 /* ICMPs are not backlogged, hence we cannot get
822                  * an established socket here.
823                  */
824                 BUG_TRAP(req->sk == NULL);
825
826                 if (seq != req->snt_isn) {
827                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
828                         goto out;
829                 }
830
831                 tcp_synq_drop(sk, req, prev);
832                 goto out;
833
834         case TCP_SYN_SENT:
835         case TCP_SYN_RECV:  /* Cannot happen.
836                                It can, it SYNs are crossed. --ANK */ 
837                 if (!sock_owned_by_user(sk)) {
838                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
839                         sk->sk_err = err;
840                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
841
842                         tcp_done(sk);
843                 } else
844                         sk->sk_err_soft = err;
845                 goto out;
846         }
847
848         if (!sock_owned_by_user(sk) && np->recverr) {
849                 sk->sk_err = err;
850                 sk->sk_error_report(sk);
851         } else
852                 sk->sk_err_soft = err;
853
854 out:
855         bh_unlock_sock(sk);
856         sock_put(sk);
857 }
858
859
860 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
861                               struct dst_entry *dst)
862 {
863         struct ipv6_pinfo *np = inet6_sk(sk);
864         struct sk_buff * skb;
865         struct ipv6_txoptions *opt = NULL;
866         struct flowi fl;
867         int err = -1;
868
869         memset(&fl, 0, sizeof(fl));
870         fl.proto = IPPROTO_TCP;
871         ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
872         ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
873         fl.fl6_flowlabel = 0;
874         fl.oif = req->af.v6_req.iif;
875         fl.fl_ip_dport = req->rmt_port;
876         fl.fl_ip_sport = inet_sk(sk)->sport;
877
878         if (dst == NULL) {
879                 opt = np->opt;
880                 if (opt == NULL &&
881                     np->rxopt.bits.srcrt == 2 &&
882                     req->af.v6_req.pktopts) {
883                         struct sk_buff *pktopts = req->af.v6_req.pktopts;
884                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
885                         if (rxopt->srcrt)
886                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
887                 }
888
889                 if (opt && opt->srcrt) {
890                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
891                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
892                 }
893
894                 err = ip6_dst_lookup(sk, &dst, &fl);
895                 if (err)
896                         goto done;
897         }
898
899         skb = tcp_make_synack(sk, dst, req);
900         if (skb) {
901                 struct tcphdr *th = skb->h.th;
902
903                 th->check = tcp_v6_check(th, skb->len,
904                                          &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
905                                          csum_partial((char *)th, skb->len, skb->csum));
906
907                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
908                 err = ip6_xmit(sk, skb, &fl, opt, 0);
909                 if (err == NET_XMIT_CN)
910                         err = 0;
911         }
912
913 done:
914         dst_release(dst);
915         if (opt && opt != np->opt)
916                 sock_kfree_s(sk, opt, opt->tot_len);
917         return err;
918 }
919
920 static void tcp_v6_or_free(struct open_request *req)
921 {
922         if (req->af.v6_req.pktopts)
923                 kfree_skb(req->af.v6_req.pktopts);
924 }
925
926 static struct or_calltable or_ipv6 = {
927         .family         =       AF_INET6,
928         .rtx_syn_ack    =       tcp_v6_send_synack,
929         .send_ack       =       tcp_v6_or_send_ack,
930         .destructor     =       tcp_v6_or_free,
931         .send_reset     =       tcp_v6_send_reset
932 };
933
934 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
935 {
936         struct ipv6_pinfo *np = inet6_sk(sk);
937         struct inet6_skb_parm *opt = IP6CB(skb);
938
939         if (np->rxopt.all) {
940                 if ((opt->hop && np->rxopt.bits.hopopts) ||
941                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
942                      np->rxopt.bits.rxflow) ||
943                     (opt->srcrt && np->rxopt.bits.srcrt) ||
944                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
945                         return 1;
946         }
947         return 0;
948 }
949
950
951 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
952                               struct sk_buff *skb)
953 {
954         struct ipv6_pinfo *np = inet6_sk(sk);
955
956         if (skb->ip_summed == CHECKSUM_HW) {
957                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
958                 skb->csum = offsetof(struct tcphdr, check);
959         } else {
960                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
961                                             csum_partial((char *)th, th->doff<<2, 
962                                                          skb->csum));
963         }
964 }
965
966
967 static void tcp_v6_send_reset(struct sk_buff *skb)
968 {
969         struct tcphdr *th = skb->h.th, *t1; 
970         struct sk_buff *buff;
971         struct flowi fl;
972
973         if (th->rst)
974                 return;
975
976         if (!ipv6_unicast_destination(skb))
977                 return; 
978
979         /*
980          * We need to grab some memory, and put together an RST,
981          * and then put it into the queue to be sent.
982          */
983
984         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
985         if (buff == NULL) 
986                 return;
987
988         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
989
990         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
991
992         /* Swap the send and the receive. */
993         memset(t1, 0, sizeof(*t1));
994         t1->dest = th->source;
995         t1->source = th->dest;
996         t1->doff = sizeof(*t1)/4;
997         t1->rst = 1;
998   
999         if(th->ack) {
1000                 t1->seq = th->ack_seq;
1001         } else {
1002                 t1->ack = 1;
1003                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1004                                     + skb->len - (th->doff<<2));
1005         }
1006
1007         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1008
1009         memset(&fl, 0, sizeof(fl));
1010         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1011         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1012
1013         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1014                                     sizeof(*t1), IPPROTO_TCP,
1015                                     buff->csum);
1016
1017         fl.proto = IPPROTO_TCP;
1018         fl.oif = tcp_v6_iif(skb);
1019         fl.fl_ip_dport = t1->dest;
1020         fl.fl_ip_sport = t1->source;
1021
1022         /* sk = NULL, but it is safe for now. RST socket required. */
1023         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1024                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1025                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1026                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1027                 return;
1028         }
1029
1030         kfree_skb(buff);
1031 }
1032
1033 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1034 {
1035         struct tcphdr *th = skb->h.th, *t1;
1036         struct sk_buff *buff;
1037         struct flowi fl;
1038         int tot_len = sizeof(struct tcphdr);
1039
1040         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
1041         if (buff == NULL)
1042                 return;
1043
1044         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
1045
1046         if (ts)
1047                 tot_len += 3*4;
1048
1049         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1050
1051         /* Swap the send and the receive. */
1052         memset(t1, 0, sizeof(*t1));
1053         t1->dest = th->source;
1054         t1->source = th->dest;
1055         t1->doff = tot_len/4;
1056         t1->seq = htonl(seq);
1057         t1->ack_seq = htonl(ack);
1058         t1->ack = 1;
1059         t1->window = htons(win);
1060         
1061         if (ts) {
1062                 u32 *ptr = (u32*)(t1 + 1);
1063                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1064                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1065                 *ptr++ = htonl(tcp_time_stamp);
1066                 *ptr = htonl(ts);
1067         }
1068
1069         buff->csum = csum_partial((char *)t1, tot_len, 0);
1070
1071         memset(&fl, 0, sizeof(fl));
1072         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1073         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1074
1075         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1076                                     tot_len, IPPROTO_TCP,
1077                                     buff->csum);
1078
1079         fl.proto = IPPROTO_TCP;
1080         fl.oif = tcp_v6_iif(skb);
1081         fl.fl_ip_dport = t1->dest;
1082         fl.fl_ip_sport = t1->source;
1083
1084         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1085                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1086                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1087                 return;
1088         }
1089
1090         kfree_skb(buff);
1091 }
1092
1093 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1094 {
1095         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1096
1097         tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1098                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1099
1100         tcp_tw_put(tw);
1101 }
1102
1103 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1104 {
1105         tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1106 }
1107
1108
1109 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1110 {
1111         struct open_request *req, **prev;
1112         struct tcphdr *th = skb->h.th;
1113         struct tcp_opt *tp = tcp_sk(sk);
1114         struct sock *nsk;
1115
1116         /* Find possible connection requests. */
1117         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1118                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1119         if (req)
1120                 return tcp_check_req(sk, skb, req, prev);
1121
1122         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1123                                           th->source,
1124                                           &skb->nh.ipv6h->daddr,
1125                                           ntohs(th->dest),
1126                                           tcp_v6_iif(skb));
1127
1128         if (nsk) {
1129                 if (nsk->sk_state != TCP_TIME_WAIT) {
1130                         bh_lock_sock(nsk);
1131                         return nsk;
1132                 }
1133                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1134                 return NULL;
1135         }
1136
1137 #if 0 /*def CONFIG_SYN_COOKIES*/
1138         if (!th->rst && !th->syn && th->ack)
1139                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1140 #endif
1141         return sk;
1142 }
1143
1144 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1145 {
1146         struct tcp_opt *tp = tcp_sk(sk);
1147         struct tcp_listen_opt *lopt = tp->listen_opt;
1148         u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1149
1150         req->sk = NULL;
1151         req->expires = jiffies + TCP_TIMEOUT_INIT;
1152         req->retrans = 0;
1153         req->dl_next = lopt->syn_table[h];
1154
1155         write_lock(&tp->syn_wait_lock);
1156         lopt->syn_table[h] = req;
1157         write_unlock(&tp->syn_wait_lock);
1158
1159         tcp_synq_added(sk);
1160 }
1161
1162
1163 /* FIXME: this is substantially similar to the ipv4 code.
1164  * Can some kind of merge be done? -- erics
1165  */
1166 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1167 {
1168         struct ipv6_pinfo *np = inet6_sk(sk);
1169         struct tcp_opt tmptp, *tp = tcp_sk(sk);
1170         struct open_request *req = NULL;
1171         __u32 isn = TCP_SKB_CB(skb)->when;
1172
1173         if (skb->protocol == htons(ETH_P_IP))
1174                 return tcp_v4_conn_request(sk, skb);
1175
1176         if (!ipv6_unicast_destination(skb))
1177                 goto drop; 
1178
1179         /*
1180          *      There are no SYN attacks on IPv6, yet...        
1181          */
1182         if (tcp_synq_is_full(sk) && !isn) {
1183                 if (net_ratelimit())
1184                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1185                 goto drop;              
1186         }
1187
1188         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1189                 goto drop;
1190
1191         req = tcp_openreq_alloc();
1192         if (req == NULL)
1193                 goto drop;
1194
1195         tcp_clear_options(&tmptp);
1196         tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1197         tmptp.user_mss = tp->user_mss;
1198
1199         tcp_parse_options(skb, &tmptp, 0);
1200
1201         tmptp.tstamp_ok = tmptp.saw_tstamp;
1202         tcp_openreq_init(req, &tmptp, skb);
1203
1204         req->class = &or_ipv6;
1205         ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1206         ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1207         TCP_ECN_create_request(req, skb->h.th);
1208         req->af.v6_req.pktopts = NULL;
1209         if (ipv6_opt_accepted(sk, skb) ||
1210             np->rxopt.bits.rxinfo ||
1211             np->rxopt.bits.rxhlim) {
1212                 atomic_inc(&skb->users);
1213                 req->af.v6_req.pktopts = skb;
1214         }
1215         req->af.v6_req.iif = sk->sk_bound_dev_if;
1216
1217         /* So that link locals have meaning */
1218         if (!sk->sk_bound_dev_if &&
1219             ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1220                 req->af.v6_req.iif = tcp_v6_iif(skb);
1221
1222         if (isn == 0) 
1223                 isn = tcp_v6_init_sequence(sk,skb);
1224
1225         req->snt_isn = isn;
1226
1227         if (tcp_v6_send_synack(sk, req, NULL))
1228                 goto drop;
1229
1230         tcp_v6_synq_add(sk, req);
1231
1232         return 0;
1233
1234 drop:
1235         if (req)
1236                 tcp_openreq_free(req);
1237
1238         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1239         return 0; /* don't send reset */
1240 }
1241
1242 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1243                                           struct open_request *req,
1244                                           struct dst_entry *dst)
1245 {
1246         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1247         struct tcp6_sock *newtcp6sk;
1248         struct inet_opt *newinet;
1249         struct tcp_opt *newtp;
1250         struct sock *newsk;
1251         struct ipv6_txoptions *opt;
1252
1253         if (skb->protocol == htons(ETH_P_IP)) {
1254                 /*
1255                  *      v6 mapped
1256                  */
1257
1258                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1259
1260                 if (newsk == NULL) 
1261                         return NULL;
1262
1263                 newtcp6sk = (struct tcp6_sock *)newsk;
1264                 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1265
1266                 newinet = inet_sk(newsk);
1267                 newnp = inet6_sk(newsk);
1268                 newtp = tcp_sk(newsk);
1269
1270                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1271
1272                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1273                               newinet->daddr);
1274
1275                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1276                               newinet->saddr);
1277
1278                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1279
1280                 newtp->af_specific = &ipv6_mapped;
1281                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1282                 newnp->pktoptions  = NULL;
1283                 newnp->opt         = NULL;
1284                 newnp->mcast_oif   = tcp_v6_iif(skb);
1285                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1286
1287                 /* Charge newly allocated IPv6 socket. Though it is mapped,
1288                  * it is IPv6 yet.
1289                  */
1290 #ifdef INET_REFCNT_DEBUG
1291                 atomic_inc(&inet6_sock_nr);
1292 #endif
1293
1294                 /* It is tricky place. Until this moment IPv4 tcp
1295                    worked with IPv6 af_tcp.af_specific.
1296                    Sync it now.
1297                  */
1298                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1299
1300                 return newsk;
1301         }
1302
1303         opt = np->opt;
1304
1305         if (sk_acceptq_is_full(sk))
1306                 goto out_overflow;
1307
1308         if (np->rxopt.bits.srcrt == 2 &&
1309             opt == NULL && req->af.v6_req.pktopts) {
1310                 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1311                 if (rxopt->srcrt)
1312                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1313         }
1314
1315         if (dst == NULL) {
1316                 struct flowi fl;
1317
1318                 memset(&fl, 0, sizeof(fl));
1319                 fl.proto = IPPROTO_TCP;
1320                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1321                 if (opt && opt->srcrt) {
1322                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1323                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1324                 }
1325                 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1326                 fl.oif = sk->sk_bound_dev_if;
1327                 fl.fl_ip_dport = req->rmt_port;
1328                 fl.fl_ip_sport = inet_sk(sk)->sport;
1329
1330                 if (ip6_dst_lookup(sk, &dst, &fl))
1331                         goto out;
1332         } 
1333
1334         newsk = tcp_create_openreq_child(sk, req, skb);
1335         if (newsk == NULL)
1336                 goto out;
1337
1338         /* Charge newly allocated IPv6 socket */
1339 #ifdef INET_REFCNT_DEBUG
1340         atomic_inc(&inet6_sock_nr);
1341 #endif
1342
1343         ip6_dst_store(newsk, dst, NULL);
1344         newsk->sk_route_caps = dst->dev->features &
1345                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1346
1347         newtcp6sk = (struct tcp6_sock *)newsk;
1348         newtcp6sk->pinet6 = &newtcp6sk->inet6;
1349
1350         newtp = tcp_sk(newsk);
1351         newinet = inet_sk(newsk);
1352         newnp = inet6_sk(newsk);
1353
1354         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1355
1356         ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1357         ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1358         ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1359         newsk->sk_bound_dev_if = req->af.v6_req.iif;
1360
1361         /* Now IPv6 options... 
1362
1363            First: no IPv4 options.
1364          */
1365         newinet->opt = NULL;
1366
1367         /* Clone RX bits */
1368         newnp->rxopt.all = np->rxopt.all;
1369
1370         /* Clone pktoptions received with SYN */
1371         newnp->pktoptions = NULL;
1372         if (req->af.v6_req.pktopts) {
1373                 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1374                                               GFP_ATOMIC);
1375                 kfree_skb(req->af.v6_req.pktopts);
1376                 req->af.v6_req.pktopts = NULL;
1377                 if (newnp->pktoptions)
1378                         skb_set_owner_r(newnp->pktoptions, newsk);
1379         }
1380         newnp->opt        = NULL;
1381         newnp->mcast_oif  = tcp_v6_iif(skb);
1382         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1383
1384         /* Clone native IPv6 options from listening socket (if any)
1385
1386            Yes, keeping reference count would be much more clever,
1387            but we make one more one thing there: reattach optmem
1388            to newsk.
1389          */
1390         if (opt) {
1391                 newnp->opt = ipv6_dup_options(newsk, opt);
1392                 if (opt != np->opt)
1393                         sock_kfree_s(sk, opt, opt->tot_len);
1394         }
1395
1396         newtp->ext_header_len = 0;
1397         if (newnp->opt)
1398                 newtp->ext_header_len = newnp->opt->opt_nflen +
1399                                         newnp->opt->opt_flen;
1400         newtp->ext2_header_len = dst->header_len;
1401
1402         tcp_sync_mss(newsk, dst_pmtu(dst));
1403         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1404         tcp_initialize_rcv_mss(newsk);
1405
1406         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1407
1408         __tcp_v6_hash(newsk);
1409         tcp_inherit_port(sk, newsk);
1410
1411         return newsk;
1412
1413 out_overflow:
1414         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1415 out:
1416         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1417         if (opt && opt != np->opt)
1418                 sock_kfree_s(sk, opt, opt->tot_len);
1419         dst_release(dst);
1420         return NULL;
1421 }
1422
1423 static int tcp_v6_checksum_init(struct sk_buff *skb)
1424 {
1425         if (skb->ip_summed == CHECKSUM_HW) {
1426                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1427                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1428                                   &skb->nh.ipv6h->daddr,skb->csum))
1429                         return 0;
1430                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1431         }
1432         if (skb->len <= 76) {
1433                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1434                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1435                         return -1;
1436                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1437         } else {
1438                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1439                                           &skb->nh.ipv6h->daddr,0);
1440         }
1441         return 0;
1442 }
1443
1444 /* The socket must have it's spinlock held when we get
1445  * here.
1446  *
1447  * We have a potential double-lock case here, so even when
1448  * doing backlog processing we use the BH locking scheme.
1449  * This is because we cannot sleep with the original spinlock
1450  * held.
1451  */
1452 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1453 {
1454         struct ipv6_pinfo *np = inet6_sk(sk);
1455         struct tcp_opt *tp;
1456         struct sk_buff *opt_skb = NULL;
1457
1458         /* Imagine: socket is IPv6. IPv4 packet arrives,
1459            goes to IPv4 receive handler and backlogged.
1460            From backlog it always goes here. Kerboom...
1461            Fortunately, tcp_rcv_established and rcv_established
1462            handle them correctly, but it is not case with
1463            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1464          */
1465
1466         if (skb->protocol == htons(ETH_P_IP))
1467                 return tcp_v4_do_rcv(sk, skb);
1468
1469         if (sk_filter(sk, skb, 0))
1470                 goto discard;
1471
1472         /*
1473          *      socket locking is here for SMP purposes as backlog rcv
1474          *      is currently called with bh processing disabled.
1475          */
1476
1477         /* Do Stevens' IPV6_PKTOPTIONS.
1478
1479            Yes, guys, it is the only place in our code, where we
1480            may make it not affecting IPv4.
1481            The rest of code is protocol independent,
1482            and I do not like idea to uglify IPv4.
1483
1484            Actually, all the idea behind IPV6_PKTOPTIONS
1485            looks not very well thought. For now we latch
1486            options, received in the last packet, enqueued
1487            by tcp. Feel free to propose better solution.
1488                                                --ANK (980728)
1489          */
1490         if (np->rxopt.all)
1491                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1492
1493         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1494                 TCP_CHECK_TIMER(sk);
1495                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1496                         goto reset;
1497                 TCP_CHECK_TIMER(sk);
1498                 if (opt_skb)
1499                         goto ipv6_pktoptions;
1500                 return 0;
1501         }
1502
1503         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1504                 goto csum_err;
1505
1506         if (sk->sk_state == TCP_LISTEN) { 
1507                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1508                 if (!nsk)
1509                         goto discard;
1510
1511                 /*
1512                  * Queue it on the new socket if the new socket is active,
1513                  * otherwise we just shortcircuit this and continue with
1514                  * the new socket..
1515                  */
1516                 if(nsk != sk) {
1517                         if (tcp_child_process(sk, nsk, skb))
1518                                 goto reset;
1519                         if (opt_skb)
1520                                 __kfree_skb(opt_skb);
1521                         return 0;
1522                 }
1523         }
1524
1525         TCP_CHECK_TIMER(sk);
1526         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1527                 goto reset;
1528         TCP_CHECK_TIMER(sk);
1529         if (opt_skb)
1530                 goto ipv6_pktoptions;
1531         return 0;
1532
1533 reset:
1534         tcp_v6_send_reset(skb);
1535 discard:
1536         if (opt_skb)
1537                 __kfree_skb(opt_skb);
1538         kfree_skb(skb);
1539         return 0;
1540 csum_err:
1541         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1542         goto discard;
1543
1544
1545 ipv6_pktoptions:
1546         /* Do you ask, what is it?
1547
1548            1. skb was enqueued by tcp.
1549            2. skb is added to tail of read queue, rather than out of order.
1550            3. socket is not in passive state.
1551            4. Finally, it really contains options, which user wants to receive.
1552          */
1553         tp = tcp_sk(sk);
1554         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1555             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1556                 if (np->rxopt.bits.rxinfo)
1557                         np->mcast_oif = tcp_v6_iif(opt_skb);
1558                 if (np->rxopt.bits.rxhlim)
1559                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1560                 if (ipv6_opt_accepted(sk, opt_skb)) {
1561                         skb_set_owner_r(opt_skb, sk);
1562                         opt_skb = xchg(&np->pktoptions, opt_skb);
1563                 } else {
1564                         __kfree_skb(opt_skb);
1565                         opt_skb = xchg(&np->pktoptions, NULL);
1566                 }
1567         }
1568
1569         if (opt_skb)
1570                 kfree_skb(opt_skb);
1571         return 0;
1572 }
1573
1574 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1575 {
1576         struct sk_buff *skb = *pskb;
1577         struct tcphdr *th;      
1578         struct sock *sk;
1579         int ret;
1580
1581         if (skb->pkt_type != PACKET_HOST)
1582                 goto discard_it;
1583
1584         /*
1585          *      Count it even if it's bad.
1586          */
1587         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1588
1589         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1590                 goto discard_it;
1591
1592         th = skb->h.th;
1593
1594         if (th->doff < sizeof(struct tcphdr)/4)
1595                 goto bad_packet;
1596         if (!pskb_may_pull(skb, th->doff*4))
1597                 goto discard_it;
1598
1599         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1600              tcp_v6_checksum_init(skb) < 0))
1601                 goto bad_packet;
1602
1603         th = skb->h.th;
1604         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1605         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1606                                     skb->len - th->doff*4);
1607         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1608         TCP_SKB_CB(skb)->when = 0;
1609         TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
1610         TCP_SKB_CB(skb)->sacked = 0;
1611
1612         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1613                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1614
1615         if (!sk)
1616                 goto no_tcp_socket;
1617
1618 process:
1619         if (sk->sk_state == TCP_TIME_WAIT)
1620                 goto do_time_wait;
1621
1622         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1623                 goto discard_and_relse;
1624
1625         if (sk_filter(sk, skb, 0))
1626                 goto discard_and_relse;
1627
1628         skb->dev = NULL;
1629
1630         bh_lock_sock(sk);
1631         ret = 0;
1632         if (!sock_owned_by_user(sk)) {
1633                 if (!tcp_prequeue(sk, skb))
1634                         ret = tcp_v6_do_rcv(sk, skb);
1635         } else
1636                 sk_add_backlog(sk, skb);
1637         bh_unlock_sock(sk);
1638
1639         sock_put(sk);
1640         return ret ? -1 : 0;
1641
1642 no_tcp_socket:
1643         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1644                 goto discard_it;
1645
1646         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1647 bad_packet:
1648                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1649         } else {
1650                 tcp_v6_send_reset(skb);
1651         }
1652
1653 discard_it:
1654
1655         /*
1656          *      Discard frame
1657          */
1658
1659         kfree_skb(skb);
1660         return 0;
1661
1662 discard_and_relse:
1663         sock_put(sk);
1664         goto discard_it;
1665
1666 do_time_wait:
1667         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1668                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1669                 goto discard_it;
1670         }
1671
1672         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1673                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1674                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1675                 goto discard_it;
1676         }
1677
1678         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1679                                           skb, th, skb->len)) {
1680         case TCP_TW_SYN:
1681         {
1682                 struct sock *sk2;
1683
1684                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1685                 if (sk2 != NULL) {
1686                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1687                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1688                         sk = sk2;
1689                         goto process;
1690                 }
1691                 /* Fall through to ACK */
1692         }
1693         case TCP_TW_ACK:
1694                 tcp_v6_timewait_ack(sk, skb);
1695                 break;
1696         case TCP_TW_RST:
1697                 goto no_tcp_socket;
1698         case TCP_TW_SUCCESS:;
1699         }
1700         goto discard_it;
1701 }
1702
1703 static int tcp_v6_rebuild_header(struct sock *sk)
1704 {
1705         int err;
1706         struct dst_entry *dst;
1707         struct ipv6_pinfo *np = inet6_sk(sk);
1708
1709         dst = __sk_dst_check(sk, np->dst_cookie);
1710
1711         if (dst == NULL) {
1712                 struct inet_opt *inet = inet_sk(sk);
1713                 struct flowi fl;
1714
1715                 memset(&fl, 0, sizeof(fl));
1716                 fl.proto = IPPROTO_TCP;
1717                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1718                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1719                 fl.fl6_flowlabel = np->flow_label;
1720                 fl.oif = sk->sk_bound_dev_if;
1721                 fl.fl_ip_dport = inet->dport;
1722                 fl.fl_ip_sport = inet->sport;
1723
1724                 if (np->opt && np->opt->srcrt) {
1725                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1726                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1727                 }
1728
1729                 err = ip6_dst_lookup(sk, &dst, &fl);
1730
1731                 if (err) {
1732                         sk->sk_route_caps = 0;
1733                         return err;
1734                 }
1735
1736                 ip6_dst_store(sk, dst, NULL);
1737                 sk->sk_route_caps = dst->dev->features &
1738                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1739                 tcp_sk(sk)->ext2_header_len = dst->header_len;
1740         }
1741
1742         return 0;
1743 }
1744
1745 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1746 {
1747         struct sock *sk = skb->sk;
1748         struct inet_opt *inet = inet_sk(sk);
1749         struct ipv6_pinfo *np = inet6_sk(sk);
1750         struct flowi fl;
1751         struct dst_entry *dst;
1752
1753         memset(&fl, 0, sizeof(fl));
1754         fl.proto = IPPROTO_TCP;
1755         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1756         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1757         fl.fl6_flowlabel = np->flow_label;
1758         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1759         fl.oif = sk->sk_bound_dev_if;
1760         fl.fl_ip_sport = inet->sport;
1761         fl.fl_ip_dport = inet->dport;
1762
1763         if (np->opt && np->opt->srcrt) {
1764                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1765                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1766         }
1767
1768         dst = __sk_dst_check(sk, np->dst_cookie);
1769
1770         if (dst == NULL) {
1771                 int err = ip6_dst_lookup(sk, &dst, &fl);
1772
1773                 if (err) {
1774                         sk->sk_err_soft = -err;
1775                         return err;
1776                 }
1777
1778                 ip6_dst_store(sk, dst, NULL);
1779                 sk->sk_route_caps = dst->dev->features &
1780                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1781                 tcp_sk(sk)->ext2_header_len = dst->header_len;
1782         }
1783
1784         skb->dst = dst_clone(dst);
1785
1786         /* Restore final destination back after routing done */
1787         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1788
1789         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1790 }
1791
1792 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1793 {
1794         struct ipv6_pinfo *np = inet6_sk(sk);
1795         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1796
1797         sin6->sin6_family = AF_INET6;
1798         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1799         sin6->sin6_port = inet_sk(sk)->dport;
1800         /* We do not store received flowlabel for TCP */
1801         sin6->sin6_flowinfo = 0;
1802         sin6->sin6_scope_id = 0;
1803         if (sk->sk_bound_dev_if &&
1804             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1805                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1806 }
1807
1808 static int tcp_v6_remember_stamp(struct sock *sk)
1809 {
1810         /* Alas, not yet... */
1811         return 0;
1812 }
1813
1814 static struct tcp_func ipv6_specific = {
1815         .queue_xmit     =       tcp_v6_xmit,
1816         .send_check     =       tcp_v6_send_check,
1817         .rebuild_header =       tcp_v6_rebuild_header,
1818         .conn_request   =       tcp_v6_conn_request,
1819         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1820         .remember_stamp =       tcp_v6_remember_stamp,
1821         .net_header_len =       sizeof(struct ipv6hdr),
1822
1823         .setsockopt     =       ipv6_setsockopt,
1824         .getsockopt     =       ipv6_getsockopt,
1825         .addr2sockaddr  =       v6_addr2sockaddr,
1826         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1827 };
1828
1829 /*
1830  *      TCP over IPv4 via INET6 API
1831  */
1832
1833 static struct tcp_func ipv6_mapped = {
1834         .queue_xmit     =       ip_queue_xmit,
1835         .send_check     =       tcp_v4_send_check,
1836         .rebuild_header =       tcp_v4_rebuild_header,
1837         .conn_request   =       tcp_v6_conn_request,
1838         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1839         .remember_stamp =       tcp_v4_remember_stamp,
1840         .net_header_len =       sizeof(struct iphdr),
1841
1842         .setsockopt     =       ipv6_setsockopt,
1843         .getsockopt     =       ipv6_getsockopt,
1844         .addr2sockaddr  =       v6_addr2sockaddr,
1845         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1846 };
1847
1848
1849
1850 /* NOTE: A lot of things set to zero explicitly by call to
1851  *       sk_alloc() so need not be done here.
1852  */
1853 static int tcp_v6_init_sock(struct sock *sk)
1854 {
1855         struct tcp_opt *tp = tcp_sk(sk);
1856
1857         skb_queue_head_init(&tp->out_of_order_queue);
1858         tcp_init_xmit_timers(sk);
1859         tcp_prequeue_init(tp);
1860
1861         tp->rto  = TCP_TIMEOUT_INIT;
1862         tp->mdev = TCP_TIMEOUT_INIT;
1863
1864         /* So many TCP implementations out there (incorrectly) count the
1865          * initial SYN frame in their delayed-ACK and congestion control
1866          * algorithms that we must have the following bandaid to talk
1867          * efficiently to them.  -DaveM
1868          */
1869         tp->snd_cwnd = 2;
1870
1871         /* See draft-stevens-tcpca-spec-01 for discussion of the
1872          * initialization of these values.
1873          */
1874         tp->snd_ssthresh = 0x7fffffff;
1875         tp->snd_cwnd_clamp = ~0;
1876         tp->mss_cache = 536;
1877
1878         tp->reordering = sysctl_tcp_reordering;
1879
1880         sk->sk_state = TCP_CLOSE;
1881
1882         tp->af_specific = &ipv6_specific;
1883
1884         sk->sk_write_space = sk_stream_write_space;
1885         sk->sk_use_write_queue = 1;
1886
1887         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1888         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1889
1890         atomic_inc(&tcp_sockets_allocated);
1891
1892         return 0;
1893 }
1894
1895 static int tcp_v6_destroy_sock(struct sock *sk)
1896 {
1897         extern int tcp_v4_destroy_sock(struct sock *sk);
1898
1899         tcp_v4_destroy_sock(sk);
1900         return inet6_destroy_sock(sk);
1901 }
1902
1903 /* Proc filesystem TCPv6 sock list dumping. */
1904 static void get_openreq6(struct seq_file *seq, 
1905                          struct sock *sk, struct open_request *req, int i, int uid)
1906 {
1907         struct in6_addr *dest, *src;
1908         int ttd = req->expires - jiffies;
1909
1910         if (ttd < 0)
1911                 ttd = 0;
1912
1913         src = &req->af.v6_req.loc_addr;
1914         dest = &req->af.v6_req.rmt_addr;
1915         seq_printf(seq,
1916                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1917                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1918                    i,
1919                    src->s6_addr32[0], src->s6_addr32[1],
1920                    src->s6_addr32[2], src->s6_addr32[3],
1921                    ntohs(inet_sk(sk)->sport),
1922                    dest->s6_addr32[0], dest->s6_addr32[1],
1923                    dest->s6_addr32[2], dest->s6_addr32[3],
1924                    ntohs(req->rmt_port),
1925                    TCP_SYN_RECV,
1926                    0,0, /* could print option size, but that is af dependent. */
1927                    1,   /* timers active (only the expire timer) */  
1928                    jiffies_to_clock_t(ttd), 
1929                    req->retrans,
1930                    uid,
1931                    0,  /* non standard timer */  
1932                    0, /* open_requests have no inode */
1933                    0, req);
1934 }
1935
1936 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1937 {
1938         struct in6_addr *dest, *src;
1939         __u16 destp, srcp;
1940         int timer_active;
1941         unsigned long timer_expires;
1942         struct inet_opt *inet = inet_sk(sp);
1943         struct tcp_opt *tp = tcp_sk(sp);
1944         struct ipv6_pinfo *np = inet6_sk(sp);
1945
1946         dest  = &np->daddr;
1947         src   = &np->rcv_saddr;
1948         destp = ntohs(inet->dport);
1949         srcp  = ntohs(inet->sport);
1950         if (tp->pending == TCP_TIME_RETRANS) {
1951                 timer_active    = 1;
1952                 timer_expires   = tp->timeout;
1953         } else if (tp->pending == TCP_TIME_PROBE0) {
1954                 timer_active    = 4;
1955                 timer_expires   = tp->timeout;
1956         } else if (timer_pending(&sp->sk_timer)) {
1957                 timer_active    = 2;
1958                 timer_expires   = sp->sk_timer.expires;
1959         } else {
1960                 timer_active    = 0;
1961                 timer_expires = jiffies;
1962         }
1963
1964         seq_printf(seq,
1965                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1966                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1967                    i,
1968                    src->s6_addr32[0], src->s6_addr32[1],
1969                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1970                    dest->s6_addr32[0], dest->s6_addr32[1],
1971                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1972                    sp->sk_state, 
1973                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1974                    timer_active,
1975                    jiffies_to_clock_t(timer_expires - jiffies),
1976                    tp->retransmits,
1977                    sock_i_uid(sp),
1978                    tp->probes_out,
1979                    sock_i_ino(sp),
1980                    atomic_read(&sp->sk_refcnt), sp,
1981                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
1982                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1983                    );
1984 }
1985
1986 static void get_timewait6_sock(struct seq_file *seq, 
1987                                struct tcp_tw_bucket *tw, int i)
1988 {
1989         struct in6_addr *dest, *src;
1990         __u16 destp, srcp;
1991         int ttd = tw->tw_ttd - jiffies;
1992
1993         if (ttd < 0)
1994                 ttd = 0;
1995
1996         dest  = &tw->tw_v6_daddr;
1997         src   = &tw->tw_v6_rcv_saddr;
1998         destp = ntohs(tw->tw_dport);
1999         srcp  = ntohs(tw->tw_sport);
2000
2001         seq_printf(seq,
2002                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2003                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2004                    i,
2005                    src->s6_addr32[0], src->s6_addr32[1],
2006                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2007                    dest->s6_addr32[0], dest->s6_addr32[1],
2008                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2009                    tw->tw_substate, 0, 0,
2010                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2011                    atomic_read(&tw->tw_refcnt), tw);
2012 }
2013
2014 #ifdef CONFIG_PROC_FS
2015 static int tcp6_seq_show(struct seq_file *seq, void *v)
2016 {
2017         struct tcp_iter_state *st;
2018
2019         if (v == SEQ_START_TOKEN) {
2020                 seq_puts(seq,
2021                          "  sl  "
2022                          "local_address                         "
2023                          "remote_address                        "
2024                          "st tx_queue rx_queue tr tm->when retrnsmt"
2025                          "   uid  timeout inode\n");
2026                 goto out;
2027         }
2028         st = seq->private;
2029
2030         switch (st->state) {
2031         case TCP_SEQ_STATE_LISTENING:
2032         case TCP_SEQ_STATE_ESTABLISHED:
2033                 get_tcp6_sock(seq, v, st->num);
2034                 break;
2035         case TCP_SEQ_STATE_OPENREQ:
2036                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2037                 break;
2038         case TCP_SEQ_STATE_TIME_WAIT:
2039                 get_timewait6_sock(seq, v, st->num);
2040                 break;
2041         }
2042 out:
2043         return 0;
2044 }
2045
2046 static struct file_operations tcp6_seq_fops;
2047 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2048         .owner          = THIS_MODULE,
2049         .name           = "tcp6",
2050         .family         = AF_INET6,
2051         .seq_show       = tcp6_seq_show,
2052         .seq_fops       = &tcp6_seq_fops,
2053 };
2054
2055 int __init tcp6_proc_init(void)
2056 {
2057         return tcp_proc_register(&tcp6_seq_afinfo);
2058 }
2059
2060 void tcp6_proc_exit(void)
2061 {
2062         tcp_proc_unregister(&tcp6_seq_afinfo);
2063 }
2064 #endif
2065
2066 struct proto tcpv6_prot = {
2067         .name                   = "TCPv6",
2068         .close                  = tcp_close,
2069         .connect                = tcp_v6_connect,
2070         .disconnect             = tcp_disconnect,
2071         .accept                 = tcp_accept,
2072         .ioctl                  = tcp_ioctl,
2073         .init                   = tcp_v6_init_sock,
2074         .destroy                = tcp_v6_destroy_sock,
2075         .shutdown               = tcp_shutdown,
2076         .setsockopt             = tcp_setsockopt,
2077         .getsockopt             = tcp_getsockopt,
2078         .sendmsg                = tcp_sendmsg,
2079         .recvmsg                = tcp_recvmsg,
2080         .backlog_rcv            = tcp_v6_do_rcv,
2081         .hash                   = tcp_v6_hash,
2082         .unhash                 = tcp_unhash,
2083         .get_port               = tcp_v6_get_port,
2084         .enter_memory_pressure  = tcp_enter_memory_pressure,
2085         .sockets_allocated      = &tcp_sockets_allocated,
2086         .memory_allocated       = &tcp_memory_allocated,
2087         .memory_pressure        = &tcp_memory_pressure,
2088         .sysctl_mem             = sysctl_tcp_mem,
2089         .sysctl_wmem            = sysctl_tcp_wmem,
2090         .sysctl_rmem            = sysctl_tcp_rmem,
2091         .max_header             = MAX_TCP_HEADER,
2092 };
2093
2094 static struct inet6_protocol tcpv6_protocol = {
2095         .handler        =       tcp_v6_rcv,
2096         .err_handler    =       tcp_v6_err,
2097         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2098 };
2099
2100 extern struct proto_ops inet6_stream_ops;
2101
2102 static struct inet_protosw tcpv6_protosw = {
2103         .type           =       SOCK_STREAM,
2104         .protocol       =       IPPROTO_TCP,
2105         .prot           =       &tcpv6_prot,
2106         .ops            =       &inet6_stream_ops,
2107         .capability     =       -1,
2108         .no_check       =       0,
2109         .flags          =       INET_PROTOSW_PERMANENT,
2110 };
2111
2112 void __init tcpv6_init(void)
2113 {
2114         /* register inet6 protocol */
2115         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2116                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2117         inet6_register_protosw(&tcpv6_protosw);
2118 }