net/ipv6/tcp_ipv6.c

   1 /*
   2  *      TCP over IPv6
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on:
  11  *      linux/net/ipv4/tcp.c
  12  *      linux/net/ipv4/tcp_input.c
  13  *      linux/net/ipv4/tcp_output.c
  14  *
  15  *      Fixes:
  16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  19  *                                      a single port at the same time.
  20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  */
  27
  28 #include <linux/module.h>
  29 #include <linux/config.h>
  30 #include <linux/errno.h>
  31 #include <linux/types.h>
  32 #include <linux/socket.h>
  33 #include <linux/sockios.h>
  34 #include <linux/net.h>
  35 #include <linux/jiffies.h>
  36 #include <linux/in.h>
  37 #include <linux/in6.h>
  38 #include <linux/netdevice.h>
  39 #include <linux/init.h>
  40 #include <linux/jhash.h>
  41 #include <linux/ipsec.h>
  42 #include <linux/times.h>
  43
  44 #include <linux/ipv6.h>
  45 #include <linux/icmpv6.h>
  46 #include <linux/random.h>
  47
  48 #include <net/tcp.h>
  49 #include <net/ndisc.h>
  50 #include <net/ipv6.h>
  51 #include <net/transp_v6.h>
  52 #include <net/addrconf.h>
  53 #include <net/ip6_route.h>
  54 #include <net/ip6_checksum.h>
  55 #include <net/inet_ecn.h>
  56 #include <net/protocol.h>
  57 #include <net/xfrm.h>
  58 #include <net/addrconf.h>
  59 #include <net/snmp.h>
  60 #include <net/dsfield.h>
  61
  62 #include <asm/uaccess.h>
  63
  64 #include <linux/proc_fs.h>
  65 #include <linux/seq_file.h>
  66
  67 static void     tcp_v6_send_reset(struct sk_buff *skb);
  68 static void     tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
  69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
  70                                   struct sk_buff *skb);
  71
  72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
  74
  75 static struct tcp_func ipv6_mapped;
  76 static struct tcp_func ipv6_specific;
  77
  78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
  79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
  80                                     struct in6_addr *faddr, u16 fport)
  81 {
  82         int hashent = (lport ^ fport);
  83
  84         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
  85         hashent ^= hashent>>16;
  86         hashent ^= hashent>>8;
  87         return (hashent & (tcp_ehash_size - 1));
  88 }
  89
  90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
  91 {
  92         struct inet_sock *inet = inet_sk(sk);
  93         struct ipv6_pinfo *np = inet6_sk(sk);
  94         struct in6_addr *laddr = &np->rcv_saddr;
  95         struct in6_addr *faddr = &np->daddr;
  96         __u16 lport = inet->num;
  97         __u16 fport = inet->dport;
  98         return tcp_v6_hashfn(laddr, lport, faddr, fport);
  99 }
 100
 101 static inline int tcp_v6_bind_conflict(struct sock *sk,
 102                                        struct tcp_bind_bucket *tb)
 103 {
 104         struct sock *sk2;
 105         struct hlist_node *node;
 106
 107         /* We must walk the whole port owner list in this case. -DaveM */
 108         sk_for_each_bound(sk2, node, &tb->owners) {
 109                 if (sk != sk2 &&
 110                     (!sk->sk_bound_dev_if ||
 111                      !sk2->sk_bound_dev_if ||
 112                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
 113                     (!sk->sk_reuse || !sk2->sk_reuse ||
 114                      sk2->sk_state == TCP_LISTEN) &&
 115                      ipv6_rcv_saddr_equal(sk, sk2))
 116                         break;
 117         }
 118
 119         return node != NULL;
 120 }
 121
 122 /* Grrr, addr_type already calculated by caller, but I don't want
 123  * to add some silly "cookie" argument to this method just for that.
 124  * But it doesn't matter, the recalculation is in the rarest path
 125  * this function ever takes.
 126  */
 127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 128 {
 129         struct tcp_bind_hashbucket *head;
 130         struct tcp_bind_bucket *tb;
 131         struct hlist_node *node;
 132         int ret;
 133
 134         local_bh_disable();
 135         if (snum == 0) {
 136                 int low = sysctl_local_port_range[0];
 137                 int high = sysctl_local_port_range[1];
 138                 int remaining = (high - low) + 1;
 139                 int rover;
 140
 141                 spin_lock(&tcp_portalloc_lock);
 142                 if (tcp_port_rover < low)
 143                         rover = low;
 144                 else
 145                         rover = tcp_port_rover;
 146                 do {    rover++;
 147                         if (rover > high)
 148                                 rover = low;
 149                         head = &tcp_bhash[tcp_bhashfn(rover)];
 150                         spin_lock(&head->lock);
 151                         tb_for_each(tb, node, &head->chain)
 152                                 if (tb->port == rover)
 153                                         goto next;
 154                         break;
 155                 next:
 156                         spin_unlock(&head->lock);
 157                 } while (--remaining > 0);
 158                 tcp_port_rover = rover;
 159                 spin_unlock(&tcp_portalloc_lock);
 160
 161                 /* Exhausted local port range during search? */
 162                 ret = 1;
 163                 if (remaining <= 0)
 164                         goto fail;
 165
 166                 /* OK, here is the one we will use. */
 167                 snum = rover;
 168         } else {
 169                 head = &tcp_bhash[tcp_bhashfn(snum)];
 170                 spin_lock(&head->lock);
 171                 tb_for_each(tb, node, &head->chain)
 172                         if (tb->port == snum)
 173                                 goto tb_found;
 174         }
 175         tb = NULL;
 176         goto tb_not_found;
 177 tb_found:
 178         if (tb && !hlist_empty(&tb->owners)) {
 179                 if (tb->fastreuse > 0 && sk->sk_reuse &&
 180                     sk->sk_state != TCP_LISTEN) {
 181                         goto success;
 182                 } else {
 183                         ret = 1;
 184                         if (tcp_v6_bind_conflict(sk, tb))
 185                                 goto fail_unlock;
 186                 }
 187         }
 188 tb_not_found:
 189         ret = 1;
 190         if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
 191                 goto fail_unlock;
 192         if (hlist_empty(&tb->owners)) {
 193                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
 194                         tb->fastreuse = 1;
 195                 else
 196                         tb->fastreuse = 0;
 197         } else if (tb->fastreuse &&
 198                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
 199                 tb->fastreuse = 0;
 200
 201 success:
 202         if (!tcp_sk(sk)->bind_hash)
 203                 tcp_bind_hash(sk, tb, snum);
 204         BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
 205         ret = 0;
 206
 207 fail_unlock:
 208         spin_unlock(&head->lock);
 209 fail:
 210         local_bh_enable();
 211         return ret;
 212 }
 213
 214 static __inline__ void __tcp_v6_hash(struct sock *sk)
 215 {
 216         struct hlist_head *list;
 217         rwlock_t *lock;
 218
 219         BUG_TRAP(sk_unhashed(sk));
 220
 221         if (sk->sk_state == TCP_LISTEN) {
 222                 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
 223                 lock = &tcp_lhash_lock;
 224                 tcp_listen_wlock();
 225         } else {
 226                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
 227                 list = &tcp_ehash[sk->sk_hashent].chain;
 228                 lock = &tcp_ehash[sk->sk_hashent].lock;
 229                 write_lock(lock);
 230         }
 231
 232         __sk_add_node(sk, list);
 233         sock_prot_inc_use(sk->sk_prot);
 234         write_unlock(lock);
 235 }
 236
 237
 238 static void tcp_v6_hash(struct sock *sk)
 239 {
 240         if (sk->sk_state != TCP_CLOSE) {
 241                 struct tcp_sock *tp = tcp_sk(sk);
 242
 243                 if (tp->af_specific == &ipv6_mapped) {
 244                         tcp_prot.hash(sk);
 245                         return;
 246                 }
 247                 local_bh_disable();
 248                 __tcp_v6_hash(sk);
 249                 local_bh_enable();
 250         }
 251 }
 252
 253 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
 254 {
 255         struct sock *sk;
 256         struct hlist_node *node;
 257         struct sock *result = NULL;
 258         int score, hiscore;
 259
 260         hiscore=0;
 261         read_lock(&tcp_lhash_lock);
 262         sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
 263                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
 264                         struct ipv6_pinfo *np = inet6_sk(sk);
 265
 266                         score = 1;
 267                         if (!ipv6_addr_any(&np->rcv_saddr)) {
 268                                 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
 269                                         continue;
 270                                 score++;
 271                         }
 272                         if (sk->sk_bound_dev_if) {
 273                                 if (sk->sk_bound_dev_if != dif)
 274                                         continue;
 275                                 score++;
 276                         }
 277                         if (score == 3) {
 278                                 result = sk;
 279                                 break;
 280                         }
 281                         if (score > hiscore) {
 282                                 hiscore = score;
 283                                 result = sk;
 284                         }
 285                 }
 286         }
 287         if (result)
 288                 sock_hold(result);
 289         read_unlock(&tcp_lhash_lock);
 290         return result;
 291 }
 292
 293 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 294  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 295  *
 296  * The sockhash lock must be held as a reader here.
 297  */
 298
 299 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
 300                                                        struct in6_addr *daddr, u16 hnum,
 301                                                        int dif)
 302 {
 303         struct tcp_ehash_bucket *head;
 304         struct sock *sk;
 305         struct hlist_node *node;
 306         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
 307         int hash;
 308
 309         /* Optimize here for direct hit, only listening connections can
 310          * have wildcards anyways.
 311          */
 312         hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
 313         head = &tcp_ehash[hash];
 314         read_lock(&head->lock);
 315         sk_for_each(sk, node, &head->chain) {
 316                 /* For IPV6 do the cheaper port and family tests first. */
 317                 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
 318                         goto hit; /* You sunk my battleship! */
 319         }
 320         /* Must check for a TIME_WAIT'er before going to listener hash. */
 321         sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
 322                 /* FIXME: acme: check this... */
 323                 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
 324
 325                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 326                    sk->sk_family                == PF_INET6) {
 327                         if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
 328                            ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
 329                            (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
 330                                 goto hit;
 331                 }
 332         }
 333         read_unlock(&head->lock);
 334         return NULL;
 335
 336 hit:
 337         sock_hold(sk);
 338         read_unlock(&head->lock);
 339         return sk;
 340 }
 341
 342
 343 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 344                                            struct in6_addr *daddr, u16 hnum,
 345                                            int dif)
 346 {
 347         struct sock *sk;
 348
 349         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
 350
 351         if (sk)
 352                 return sk;
 353
 354         return tcp_v6_lookup_listener(daddr, hnum, dif);
 355 }
 356
 357 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 358                                   struct in6_addr *daddr, u16 dport,
 359                                   int dif)
 360 {
 361         struct sock *sk;
 362
 363         local_bh_disable();
 364         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
 365         local_bh_enable();
 366
 367         return sk;
 368 }
 369
 370 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
 371
 372
 373 /*
 374  * Open request hash tables.
 375  */
 376
 377 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
 378 {
 379         u32 a, b, c;
 380
 381         a = raddr->s6_addr32[0];
 382         b = raddr->s6_addr32[1];
 383         c = raddr->s6_addr32[2];
 384
 385         a += JHASH_GOLDEN_RATIO;
 386         b += JHASH_GOLDEN_RATIO;
 387         c += rnd;
 388         __jhash_mix(a, b, c);
 389
 390         a += raddr->s6_addr32[3];
 391         b += (u32) rport;
 392         __jhash_mix(a, b, c);
 393
 394         return c & (TCP_SYNQ_HSIZE - 1);
 395 }
 396
 397 static struct open_request *tcp_v6_search_req(struct tcp_sock *tp,
 398                                               struct open_request ***prevp,
 399                                               __u16 rport,
 400                                               struct in6_addr *raddr,
 401                                               struct in6_addr *laddr,
 402                                               int iif)
 403 {
 404         struct tcp_listen_opt *lopt = tp->listen_opt;
 405         struct open_request *req, **prev;
 406
 407         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
 408              (req = *prev) != NULL;
 409              prev = &req->dl_next) {
 410                 if (req->rmt_port == rport &&
 411                     req->class->family == AF_INET6 &&
 412                     ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
 413                     ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
 414                     (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
 415                         BUG_TRAP(req->sk == NULL);
 416                         *prevp = prev;
 417                         return req;
 418                 }
 419         }
 420
 421         return NULL;
 422 }
 423
 424 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
 425                                    struct in6_addr *saddr,
 426                                    struct in6_addr *daddr,
 427                                    unsigned long base)
 428 {
 429         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
 430 }
 431
 432 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 433 {
 434         if (skb->protocol == htons(ETH_P_IPV6)) {
 435                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
 436                                                     skb->nh.ipv6h->saddr.s6_addr32,
 437                                                     skb->h.th->dest,
 438                                                     skb->h.th->source);
 439         } else {
 440                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
 441                                                   skb->nh.iph->saddr,
 442                                                   skb->h.th->dest,
 443                                                   skb->h.th->source);
 444         }
 445 }
 446
 447 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
 448                                       struct tcp_tw_bucket **twp)
 449 {
 450         struct inet_sock *inet = inet_sk(sk);
 451         struct ipv6_pinfo *np = inet6_sk(sk);
 452         struct in6_addr *daddr = &np->rcv_saddr;
 453         struct in6_addr *saddr = &np->daddr;
 454         int dif = sk->sk_bound_dev_if;
 455         u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
 456         int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
 457         struct tcp_ehash_bucket *head = &tcp_ehash[hash];
 458         struct sock *sk2;
 459         struct hlist_node *node;
 460         struct tcp_tw_bucket *tw;
 461
 462         write_lock(&head->lock);
 463
 464         /* Check TIME-WAIT sockets first. */
 465         sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
 466                 tw = (struct tcp_tw_bucket*)sk2;
 467
 468                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 469                    sk2->sk_family               == PF_INET6     &&
 470                    ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
 471                    ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
 472                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
 473                         struct tcp_sock *tp = tcp_sk(sk);
 474
 475                         if (tw->tw_ts_recent_stamp &&
 476                             (!twp || (sysctl_tcp_tw_reuse &&
 477                                       xtime.tv_sec -
 478                                       tw->tw_ts_recent_stamp > 1))) {
 479                                 /* See comment in tcp_ipv4.c */
 480                                 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
 481                                 if (!tp->write_seq)
 482                                         tp->write_seq = 1;
 483                                 tp->rx_opt.ts_recent = tw->tw_ts_recent;
 484                                 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
 485                                 sock_hold(sk2);
 486                                 goto unique;
 487                         } else
 488                                 goto not_unique;
 489                 }
 490         }
 491         tw = NULL;
 492
 493         /* And established part... */
 494         sk_for_each(sk2, node, &head->chain) {
 495                 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
 496                         goto not_unique;
 497         }
 498
 499 unique:
 500         BUG_TRAP(sk_unhashed(sk));
 501         __sk_add_node(sk, &head->chain);
 502         sk->sk_hashent = hash;
 503         sock_prot_inc_use(sk->sk_prot);
 504         write_unlock(&head->lock);
 505
 506         if (twp) {
 507                 *twp = tw;
 508                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 509         } else if (tw) {
 510                 /* Silly. Should hash-dance instead... */
 511                 tcp_tw_deschedule(tw);
 512                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 513
 514                 tcp_tw_put(tw);
 515         }
 516         return 0;
 517
 518 not_unique:
 519         write_unlock(&head->lock);
 520         return -EADDRNOTAVAIL;
 521 }
 522
 523 static inline u32 tcpv6_port_offset(const struct sock *sk)
 524 {
 525         const struct inet_sock *inet = inet_sk(sk);
 526         const struct ipv6_pinfo *np = inet6_sk(sk);
 527
 528         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
 529                                            np->daddr.s6_addr32,
 530                                            inet->dport);
 531 }
 532
 533 static int tcp_v6_hash_connect(struct sock *sk)
 534 {
 535         unsigned short snum = inet_sk(sk)->num;
 536         struct tcp_bind_hashbucket *head;
 537         struct tcp_bind_bucket *tb;
 538         int ret;
 539
 540         if (!snum) {
 541                 int low = sysctl_local_port_range[0];
 542                 int high = sysctl_local_port_range[1];
 543                 int range = high - low;
 544                 int i;
 545                 int port;
 546                 static u32 hint;
 547                 u32 offset = hint + tcpv6_port_offset(sk);
 548                 struct hlist_node *node;
 549                 struct tcp_tw_bucket *tw = NULL;
 550
 551                 local_bh_disable();
 552                 for (i = 1; i <= range; i++) {
 553                         port = low + (i + offset) % range;
 554                         head = &tcp_bhash[tcp_bhashfn(port)];
 555                         spin_lock(&head->lock);
 556
 557                         /* Does not bother with rcv_saddr checks,
 558                          * because the established check is already
 559                          * unique enough.
 560                          */
 561                         tb_for_each(tb, node, &head->chain) {
 562                                 if (tb->port == port) {
 563                                         BUG_TRAP(!hlist_empty(&tb->owners));
 564                                         if (tb->fastreuse >= 0)
 565                                                 goto next_port;
 566                                         if (!__tcp_v6_check_established(sk,
 567                                                                         port,
 568                                                                         &tw))
 569                                                 goto ok;
 570                                         goto next_port;
 571                                 }
 572                         }
 573
 574                         tb = tcp_bucket_create(head, port);
 575                         if (!tb) {
 576                                 spin_unlock(&head->lock);
 577                                 break;
 578                         }
 579                         tb->fastreuse = -1;
 580                         goto ok;
 581
 582                 next_port:
 583                         spin_unlock(&head->lock);
 584                 }
 585                 local_bh_enable();
 586
 587                 return -EADDRNOTAVAIL;
 588
 589 ok:
 590                 hint += i;
 591
 592                 /* Head lock still held and bh's disabled */
 593                 tcp_bind_hash(sk, tb, port);
 594                 if (sk_unhashed(sk)) {
 595                         inet_sk(sk)->sport = htons(port);
 596                         __tcp_v6_hash(sk);
 597                 }
 598                 spin_unlock(&head->lock);
 599
 600                 if (tw) {
 601                         tcp_tw_deschedule(tw);
 602                         tcp_tw_put(tw);
 603                 }
 604
 605                 ret = 0;
 606                 goto out;
 607         }
 608
 609         head  = &tcp_bhash[tcp_bhashfn(snum)];
 610         tb  = tcp_sk(sk)->bind_hash;
 611         spin_lock_bh(&head->lock);
 612
 613         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
 614                 __tcp_v6_hash(sk);
 615                 spin_unlock_bh(&head->lock);
 616                 return 0;
 617         } else {
 618                 spin_unlock(&head->lock);
 619                 /* No definite answer... Walk to established hash table */
 620                 ret = __tcp_v6_check_established(sk, snum, NULL);
 621 out:
 622                 local_bh_enable();
 623                 return ret;
 624         }
 625 }
 626
 627 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
 628 {
 629         return IP6CB(skb)->iif;
 630 }
 631
 632 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 633                           int addr_len)
 634 {
 635         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 636         struct inet_sock *inet = inet_sk(sk);
 637         struct ipv6_pinfo *np = inet6_sk(sk);
 638         struct tcp_sock *tp = tcp_sk(sk);
 639         struct in6_addr *saddr = NULL, *final_p = NULL, final;
 640         struct flowi fl;
 641         struct dst_entry *dst;
 642         int addr_type;
 643         int err;
 644
 645         if (addr_len < SIN6_LEN_RFC2133)
 646                 return -EINVAL;
 647
 648         if (usin->sin6_family != AF_INET6)
 649                 return(-EAFNOSUPPORT);
 650
 651         memset(&fl, 0, sizeof(fl));
 652
 653         if (np->sndflow) {
 654                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 655                 IP6_ECN_flow_init(fl.fl6_flowlabel);
 656                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
 657                         struct ip6_flowlabel *flowlabel;
 658                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
 659                         if (flowlabel == NULL)
 660                                 return -EINVAL;
 661                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
 662                         fl6_sock_release(flowlabel);
 663                 }
 664         }
 665
 666         /*
 667          *      connect() to INADDR_ANY means loopback (BSD'ism).
 668          */
 669
 670         if(ipv6_addr_any(&usin->sin6_addr))
 671                 usin->sin6_addr.s6_addr[15] = 0x1;
 672
 673         addr_type = ipv6_addr_type(&usin->sin6_addr);
 674
 675         if(addr_type & IPV6_ADDR_MULTICAST)
 676                 return -ENETUNREACH;
 677
 678         if (addr_type&IPV6_ADDR_LINKLOCAL) {
 679                 if (addr_len >= sizeof(struct sockaddr_in6) &&
 680                     usin->sin6_scope_id) {
 681                         /* If interface is set while binding, indices
 682                          * must coincide.
 683                          */
 684                         if (sk->sk_bound_dev_if &&
 685                             sk->sk_bound_dev_if != usin->sin6_scope_id)
 686                                 return -EINVAL;
 687
 688                         sk->sk_bound_dev_if = usin->sin6_scope_id;
 689                 }
 690
 691                 /* Connect to link-local address requires an interface */
 692                 if (!sk->sk_bound_dev_if)
 693                         return -EINVAL;
 694         }
 695
 696         if (tp->rx_opt.ts_recent_stamp &&
 697             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
 698                 tp->rx_opt.ts_recent = 0;
 699                 tp->rx_opt.ts_recent_stamp = 0;
 700                 tp->write_seq = 0;
 701         }
 702
 703         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
 704         np->flow_label = fl.fl6_flowlabel;
 705
 706         /*
 707          *      TCP over IPv4
 708          */
 709
 710         if (addr_type == IPV6_ADDR_MAPPED) {
 711                 u32 exthdrlen = tp->ext_header_len;
 712                 struct sockaddr_in sin;
 713
 714                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
 715
 716                 if (__ipv6_only_sock(sk))
 717                         return -ENETUNREACH;
 718
 719                 sin.sin_family = AF_INET;
 720                 sin.sin_port = usin->sin6_port;
 721                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 722
 723                 tp->af_specific = &ipv6_mapped;
 724                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
 725
 726                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 727
 728                 if (err) {
 729                         tp->ext_header_len = exthdrlen;
 730                         tp->af_specific = &ipv6_specific;
 731                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
 732                         goto failure;
 733                 } else {
 734                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
 735                                       inet->saddr);
 736                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
 737                                       inet->rcv_saddr);
 738                 }
 739
 740                 return err;
 741         }
 742
 743         if (!ipv6_addr_any(&np->rcv_saddr))
 744                 saddr = &np->rcv_saddr;
 745
 746         fl.proto = IPPROTO_TCP;
 747         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 748         ipv6_addr_copy(&fl.fl6_src,
 749                        (saddr ? saddr : &np->saddr));
 750         fl.oif = sk->sk_bound_dev_if;
 751         fl.fl_ip_dport = usin->sin6_port;
 752         fl.fl_ip_sport = inet->sport;
 753
 754         if (np->opt && np->opt->srcrt) {
 755                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
 756                 ipv6_addr_copy(&final, &fl.fl6_dst);
 757                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 758                 final_p = &final;
 759         }
 760
 761         err = ip6_dst_lookup(sk, &dst, &fl);
 762         if (err)
 763                 goto failure;
 764         if (final_p)
 765                 ipv6_addr_copy(&fl.fl6_dst, final_p);
 766
 767         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 768                 dst_release(dst);
 769                 goto failure;
 770         }
 771
 772         if (saddr == NULL) {
 773                 saddr = &fl.fl6_src;
 774                 ipv6_addr_copy(&np->rcv_saddr, saddr);
 775         }
 776
 777         /* set the source address */
 778         ipv6_addr_copy(&np->saddr, saddr);
 779         inet->rcv_saddr = LOOPBACK4_IPV6;
 780
 781         ip6_dst_store(sk, dst, NULL);
 782         sk->sk_route_caps = dst->dev->features &
 783                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 784
 785         tp->ext_header_len = 0;
 786         if (np->opt)
 787                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
 788
 789         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 790
 791         inet->dport = usin->sin6_port;
 792
 793         tcp_set_state(sk, TCP_SYN_SENT);
 794         err = tcp_v6_hash_connect(sk);
 795         if (err)
 796                 goto late_failure;
 797
 798         if (!tp->write_seq)
 799                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 800                                                              np->daddr.s6_addr32,
 801                                                              inet->sport,
 802                                                              inet->dport);
 803
 804         err = tcp_connect(sk);
 805         if (err)
 806                 goto late_failure;
 807
 808         return 0;
 809
 810 late_failure:
 811         tcp_set_state(sk, TCP_CLOSE);
 812         __sk_dst_reset(sk);
 813 failure:
 814         inet->dport = 0;
 815         sk->sk_route_caps = 0;
 816         return err;
 817 }
 818
 819 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 820                 int type, int code, int offset, __u32 info)
 821 {
 822         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
 823         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 824         struct ipv6_pinfo *np;
 825         struct sock *sk;
 826         int err;
 827         struct tcp_sock *tp;
 828         __u32 seq;
 829
 830         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
 831
 832         if (sk == NULL) {
 833                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 834                 return;
 835         }
 836
 837         if (sk->sk_state == TCP_TIME_WAIT) {
 838                 tcp_tw_put((struct tcp_tw_bucket*)sk);
 839                 return;
 840         }
 841
 842         bh_lock_sock(sk);
 843         if (sock_owned_by_user(sk))
 844                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
 845
 846         if (sk->sk_state == TCP_CLOSE)
 847                 goto out;
 848
 849         tp = tcp_sk(sk);
 850         seq = ntohl(th->seq);
 851         if (sk->sk_state != TCP_LISTEN &&
 852             !between(seq, tp->snd_una, tp->snd_nxt)) {
 853                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 854                 goto out;
 855         }
 856
 857         np = inet6_sk(sk);
 858
 859         if (type == ICMPV6_PKT_TOOBIG) {
 860                 struct dst_entry *dst = NULL;
 861
 862                 if (sock_owned_by_user(sk))
 863                         goto out;
 864                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 865                         goto out;
 866
 867                 /* icmp should have updated the destination cache entry */
 868                 dst = __sk_dst_check(sk, np->dst_cookie);
 869
 870                 if (dst == NULL) {
 871                         struct inet_sock *inet = inet_sk(sk);
 872                         struct flowi fl;
 873
 874                         /* BUGGG_FUTURE: Again, it is not clear how
 875                            to handle rthdr case. Ignore this complexity
 876                            for now.
 877                          */
 878                         memset(&fl, 0, sizeof(fl));
 879                         fl.proto = IPPROTO_TCP;
 880                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 881                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 882                         fl.oif = sk->sk_bound_dev_if;
 883                         fl.fl_ip_dport = inet->dport;
 884                         fl.fl_ip_sport = inet->sport;
 885
 886                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 887                                 sk->sk_err_soft = -err;
 888                                 goto out;
 889                         }
 890
 891                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 892                                 sk->sk_err_soft = -err;
 893                                 goto out;
 894                         }
 895
 896                 } else
 897                         dst_hold(dst);
 898
 899                 if (tp->pmtu_cookie > dst_mtu(dst)) {
 900                         tcp_sync_mss(sk, dst_mtu(dst));
 901                         tcp_simple_retransmit(sk);
 902                 } /* else let the usual retransmit timer handle it */
 903                 dst_release(dst);
 904                 goto out;
 905         }
 906
 907         icmpv6_err_convert(type, code, &err);
 908
 909         /* Might be for an open_request */
 910         switch (sk->sk_state) {
 911                 struct open_request *req, **prev;
 912         case TCP_LISTEN:
 913                 if (sock_owned_by_user(sk))
 914                         goto out;
 915
 916                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
 917                                         &hdr->saddr, tcp_v6_iif(skb));
 918                 if (!req)
 919                         goto out;
 920
 921                 /* ICMPs are not backlogged, hence we cannot get
 922                  * an established socket here.
 923                  */
 924                 BUG_TRAP(req->sk == NULL);
 925
 926                 if (seq != req->snt_isn) {
 927                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 928                         goto out;
 929                 }
 930
 931                 tcp_synq_drop(sk, req, prev);
 932                 goto out;
 933
 934         case TCP_SYN_SENT:
 935         case TCP_SYN_RECV:  /* Cannot happen.
 936                                It can, it SYNs are crossed. --ANK */
 937                 if (!sock_owned_by_user(sk)) {
 938                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 939                         sk->sk_err = err;
 940                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
 941
 942                         tcp_done(sk);
 943                 } else
 944                         sk->sk_err_soft = err;
 945                 goto out;
 946         }
 947
 948         if (!sock_owned_by_user(sk) && np->recverr) {
 949                 sk->sk_err = err;
 950                 sk->sk_error_report(sk);
 951         } else
 952                 sk->sk_err_soft = err;
 953
 954 out:
 955         bh_unlock_sock(sk);
 956         sock_put(sk);
 957 }
 958
 959
 960 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
 961                               struct dst_entry *dst)
 962 {
 963         struct ipv6_pinfo *np = inet6_sk(sk);
 964         struct sk_buff * skb;
 965         struct ipv6_txoptions *opt = NULL;
 966         struct in6_addr * final_p = NULL, final;
 967         struct flowi fl;
 968         int err = -1;
 969
 970         memset(&fl, 0, sizeof(fl));
 971         fl.proto = IPPROTO_TCP;
 972         ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
 973         ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
 974         fl.fl6_flowlabel = 0;
 975         fl.oif = req->af.v6_req.iif;
 976         fl.fl_ip_dport = req->rmt_port;
 977         fl.fl_ip_sport = inet_sk(sk)->sport;
 978
 979         if (dst == NULL) {
 980                 opt = np->opt;
 981                 if (opt == NULL &&
 982                     np->rxopt.bits.srcrt == 2 &&
 983                     req->af.v6_req.pktopts) {
 984                         struct sk_buff *pktopts = req->af.v6_req.pktopts;
 985                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 986                         if (rxopt->srcrt)
 987                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
 988                 }
 989
 990                 if (opt && opt->srcrt) {
 991                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
 992                         ipv6_addr_copy(&final, &fl.fl6_dst);
 993                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 994                         final_p = &final;
 995                 }
 996
 997                 err = ip6_dst_lookup(sk, &dst, &fl);
 998                 if (err)
 999                         goto done;
1000                 if (final_p)
1001                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1002                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1003                         goto done;
1004         }
1005
1006         skb = tcp_make_synack(sk, dst, req);
1007         if (skb) {
1008                 struct tcphdr *th = skb->h.th;
1009
1010                 th->check = tcp_v6_check(th, skb->len,
1011                                          &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
1012                                          csum_partial((char *)th, skb->len, skb->csum));
1013
1014                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1015                 err = ip6_xmit(sk, skb, &fl, opt, 0);
1016                 if (err == NET_XMIT_CN)
1017                         err = 0;
1018         }
1019
1020 done:
1021         dst_release(dst);
1022         if (opt && opt != np->opt)
1023                 sock_kfree_s(sk, opt, opt->tot_len);
1024         return err;
1025 }
1026
1027 static void tcp_v6_or_free(struct open_request *req)
1028 {
1029         if (req->af.v6_req.pktopts)
1030                 kfree_skb(req->af.v6_req.pktopts);
1031 }
1032
1033 static struct or_calltable or_ipv6 = {
1034         .family         =       AF_INET6,
1035         .rtx_syn_ack    =       tcp_v6_send_synack,
1036         .send_ack       =       tcp_v6_or_send_ack,
1037         .destructor     =       tcp_v6_or_free,
1038         .send_reset     =       tcp_v6_send_reset
1039 };
1040
1041 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1042 {
1043         struct ipv6_pinfo *np = inet6_sk(sk);
1044         struct inet6_skb_parm *opt = IP6CB(skb);
1045
1046         if (np->rxopt.all) {
1047                 if ((opt->hop && np->rxopt.bits.hopopts) ||
1048                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1049                      np->rxopt.bits.rxflow) ||
1050                     (opt->srcrt && np->rxopt.bits.srcrt) ||
1051                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1052                         return 1;
1053         }
1054         return 0;
1055 }
1056
1057
1058 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1059                               struct sk_buff *skb)
1060 {
1061         struct ipv6_pinfo *np = inet6_sk(sk);
1062
1063         if (skb->ip_summed == CHECKSUM_HW) {
1064                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
1065                 skb->csum = offsetof(struct tcphdr, check);
1066         } else {
1067                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1068                                             csum_partial((char *)th, th->doff<<2,
1069                                                          skb->csum));
1070         }
1071 }
1072
1073
1074 static void tcp_v6_send_reset(struct sk_buff *skb)
1075 {
1076         struct tcphdr *th = skb->h.th, *t1;
1077         struct sk_buff *buff;
1078         struct flowi fl;
1079
1080         if (th->rst)
1081                 return;
1082
1083         if (!ipv6_unicast_destination(skb))
1084                 return;
1085
1086         /*
1087          * We need to grab some memory, and put together an RST,
1088          * and then put it into the queue to be sent.
1089          */
1090
1091         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1092                          GFP_ATOMIC);
1093         if (buff == NULL)
1094                 return;
1095
1096         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1097
1098         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1099
1100         /* Swap the send and the receive. */
1101         memset(t1, 0, sizeof(*t1));
1102         t1->dest = th->source;
1103         t1->source = th->dest;
1104         t1->doff = sizeof(*t1)/4;
1105         t1->rst = 1;
1106
1107         if(th->ack) {
1108                 t1->seq = th->ack_seq;
1109         } else {
1110                 t1->ack = 1;
1111                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1112                                     + skb->len - (th->doff<<2));
1113         }
1114
1115         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1116
1117         memset(&fl, 0, sizeof(fl));
1118         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1119         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1120
1121         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1122                                     sizeof(*t1), IPPROTO_TCP,
1123                                     buff->csum);
1124
1125         fl.proto = IPPROTO_TCP;
1126         fl.oif = tcp_v6_iif(skb);
1127         fl.fl_ip_dport = t1->dest;
1128         fl.fl_ip_sport = t1->source;
1129
1130         /* sk = NULL, but it is safe for now. RST socket required. */
1131         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1132
1133                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1134                         dst_release(buff->dst);
1135                         return;
1136                 }
1137
1138                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1139                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1140                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1141                 return;
1142         }
1143
1144         kfree_skb(buff);
1145 }
1146
1147 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1148 {
1149         struct tcphdr *th = skb->h.th, *t1;
1150         struct sk_buff *buff;
1151         struct flowi fl;
1152         int tot_len = sizeof(struct tcphdr);
1153
1154         if (ts)
1155                 tot_len += 3*4;
1156
1157         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1158                          GFP_ATOMIC);
1159         if (buff == NULL)
1160                 return;
1161
1162         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1163
1164         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1165
1166         /* Swap the send and the receive. */
1167         memset(t1, 0, sizeof(*t1));
1168         t1->dest = th->source;
1169         t1->source = th->dest;
1170         t1->doff = tot_len/4;
1171         t1->seq = htonl(seq);
1172         t1->ack_seq = htonl(ack);
1173         t1->ack = 1;
1174         t1->window = htons(win);
1175
1176         if (ts) {
1177                 u32 *ptr = (u32*)(t1 + 1);
1178                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1179                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1180                 *ptr++ = htonl(tcp_time_stamp);
1181                 *ptr = htonl(ts);
1182         }
1183
1184         buff->csum = csum_partial((char *)t1, tot_len, 0);
1185
1186         memset(&fl, 0, sizeof(fl));
1187         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1188         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1189
1190         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1191                                     tot_len, IPPROTO_TCP,
1192                                     buff->csum);
1193
1194         fl.proto = IPPROTO_TCP;
1195         fl.oif = tcp_v6_iif(skb);
1196         fl.fl_ip_dport = t1->dest;
1197         fl.fl_ip_sport = t1->source;
1198
1199         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1200                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1201                         dst_release(buff->dst);
1202                         return;
1203                 }
1204                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1205                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1206                 return;
1207         }
1208
1209         kfree_skb(buff);
1210 }
1211
1212 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1213 {
1214         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1215
1216         tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1217                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1218
1219         tcp_tw_put(tw);
1220 }
1221
1222 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1223 {
1224         tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1225 }
1226
1227
1228 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1229 {
1230         struct open_request *req, **prev;
1231         struct tcphdr *th = skb->h.th;
1232         struct tcp_sock *tp = tcp_sk(sk);
1233         struct sock *nsk;
1234
1235         /* Find possible connection requests. */
1236         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1237                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1238         if (req)
1239                 return tcp_check_req(sk, skb, req, prev);
1240
1241         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1242                                           th->source,
1243                                           &skb->nh.ipv6h->daddr,
1244                                           ntohs(th->dest),
1245                                           tcp_v6_iif(skb));
1246
1247         if (nsk) {
1248                 if (nsk->sk_state != TCP_TIME_WAIT) {
1249                         bh_lock_sock(nsk);
1250                         return nsk;
1251                 }
1252                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1253                 return NULL;
1254         }
1255
1256 #if 0 /*def CONFIG_SYN_COOKIES*/
1257         if (!th->rst && !th->syn && th->ack)
1258                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1259 #endif
1260         return sk;
1261 }
1262
1263 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1264 {
1265         struct tcp_sock *tp = tcp_sk(sk);
1266         struct tcp_listen_opt *lopt = tp->listen_opt;
1267         u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1268
1269         req->sk = NULL;
1270         req->expires = jiffies + TCP_TIMEOUT_INIT;
1271         req->retrans = 0;
1272         req->dl_next = lopt->syn_table[h];
1273
1274         write_lock(&tp->syn_wait_lock);
1275         lopt->syn_table[h] = req;
1276         write_unlock(&tp->syn_wait_lock);
1277
1278         tcp_synq_added(sk);
1279 }
1280
1281
1282 /* FIXME: this is substantially similar to the ipv4 code.
1283  * Can some kind of merge be done? -- erics
1284  */
1285 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1286 {
1287         struct ipv6_pinfo *np = inet6_sk(sk);
1288         struct tcp_options_received tmp_opt;
1289         struct tcp_sock *tp = tcp_sk(sk);
1290         struct open_request *req = NULL;
1291         __u32 isn = TCP_SKB_CB(skb)->when;
1292
1293         if (skb->protocol == htons(ETH_P_IP))
1294                 return tcp_v4_conn_request(sk, skb);
1295
1296         if (!ipv6_unicast_destination(skb))
1297                 goto drop;
1298
1299
1300         /*
1301          *      There are no SYN attacks on IPv6, yet...
1302          */
1303         if (tcp_synq_is_full(sk) && !isn) {
1304                 if (net_ratelimit())
1305                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1306                 goto drop;
1307         }
1308
1309         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1310                 goto drop;
1311
1312
1313         req = tcp_openreq_alloc();
1314         if (req == NULL)
1315                 goto drop;
1316
1317         tcp_clear_options(&tmp_opt);
1318         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1319         tmp_opt.user_mss = tp->rx_opt.user_mss;
1320
1321         tcp_parse_options(skb, &tmp_opt, 0);
1322
1323         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1324         tcp_openreq_init(req, &tmp_opt, skb);
1325
1326         req->class = &or_ipv6;
1327         ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1328         ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1329         TCP_ECN_create_request(req, skb->h.th);
1330         req->af.v6_req.pktopts = NULL;
1331         if (ipv6_opt_accepted(sk, skb) ||
1332             np->rxopt.bits.rxinfo ||
1333             np->rxopt.bits.rxhlim) {
1334                 atomic_inc(&skb->users);
1335                 req->af.v6_req.pktopts = skb;
1336         }
1337         req->af.v6_req.iif = sk->sk_bound_dev_if;
1338
1339         /* So that link locals have meaning */
1340         if (!sk->sk_bound_dev_if &&
1341             ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1342                 req->af.v6_req.iif = tcp_v6_iif(skb);
1343
1344         if (isn == 0)
1345                 isn = tcp_v6_init_sequence(sk,skb);
1346
1347         req->snt_isn = isn;
1348
1349         if (tcp_v6_send_synack(sk, req, NULL))
1350                 goto drop;
1351
1352         tcp_v6_synq_add(sk, req);
1353
1354         return 0;
1355
1356 drop:
1357         if (req)
1358                 tcp_openreq_free(req);
1359
1360         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1361         return 0; /* don't send reset */
1362 }
1363
1364 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1365                                           struct open_request *req,
1366                                           struct dst_entry *dst)
1367 {
1368         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1369         struct tcp6_sock *newtcp6sk;
1370         struct inet_sock *newinet;
1371         struct tcp_sock *newtp;
1372         struct sock *newsk;
1373         struct ipv6_txoptions *opt;
1374
1375         if (skb->protocol == htons(ETH_P_IP)) {
1376                 /*
1377                  *      v6 mapped
1378                  */
1379
1380                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1381
1382                 if (newsk == NULL)
1383                         return NULL;
1384
1385                 newtcp6sk = (struct tcp6_sock *)newsk;
1386                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1387
1388                 newinet = inet_sk(newsk);
1389                 newnp = inet6_sk(newsk);
1390                 newtp = tcp_sk(newsk);
1391
1392                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1393
1394                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1395                               newinet->daddr);
1396
1397                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1398                               newinet->saddr);
1399
1400                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1401
1402                 newtp->af_specific = &ipv6_mapped;
1403                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1404                 newnp->pktoptions  = NULL;
1405                 newnp->opt         = NULL;
1406                 newnp->mcast_oif   = tcp_v6_iif(skb);
1407                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1408
1409                 /* Charge newly allocated IPv6 socket. Though it is mapped,
1410                  * it is IPv6 yet.
1411                  */
1412 #ifdef INET_REFCNT_DEBUG
1413                 atomic_inc(&inet6_sock_nr);
1414 #endif
1415
1416                 /* It is tricky place. Until this moment IPv4 tcp
1417                    worked with IPv6 af_tcp.af_specific.
1418                    Sync it now.
1419                  */
1420                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1421
1422                 return newsk;
1423         }
1424
1425         opt = np->opt;
1426
1427         if (sk_acceptq_is_full(sk))
1428                 goto out_overflow;
1429
1430         if (np->rxopt.bits.srcrt == 2 &&
1431             opt == NULL && req->af.v6_req.pktopts) {
1432                 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1433                 if (rxopt->srcrt)
1434                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1435         }
1436
1437         if (dst == NULL) {
1438                 struct in6_addr *final_p = NULL, final;
1439                 struct flowi fl;
1440
1441                 memset(&fl, 0, sizeof(fl));
1442                 fl.proto = IPPROTO_TCP;
1443                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1444                 if (opt && opt->srcrt) {
1445                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1446                         ipv6_addr_copy(&final, &fl.fl6_dst);
1447                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1448                         final_p = &final;
1449                 }
1450                 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1451                 fl.oif = sk->sk_bound_dev_if;
1452                 fl.fl_ip_dport = req->rmt_port;
1453                 fl.fl_ip_sport = inet_sk(sk)->sport;
1454
1455                 if (ip6_dst_lookup(sk, &dst, &fl))
1456                         goto out;
1457
1458                 if (final_p)
1459                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1460
1461                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1462                         goto out;
1463         }
1464
1465         newsk = tcp_create_openreq_child(sk, req, skb);
1466         if (newsk == NULL)
1467                 goto out;
1468
1469         /* Charge newly allocated IPv6 socket */
1470 #ifdef INET_REFCNT_DEBUG
1471         atomic_inc(&inet6_sock_nr);
1472 #endif
1473
1474         ip6_dst_store(newsk, dst, NULL);
1475         newsk->sk_route_caps = dst->dev->features &
1476                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1477
1478         newtcp6sk = (struct tcp6_sock *)newsk;
1479         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1480
1481         newtp = tcp_sk(newsk);
1482         newinet = inet_sk(newsk);
1483         newnp = inet6_sk(newsk);
1484
1485         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1486
1487         ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1488         ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1489         ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1490         newsk->sk_bound_dev_if = req->af.v6_req.iif;
1491
1492         /* Now IPv6 options...
1493
1494            First: no IPv4 options.
1495          */
1496         newinet->opt = NULL;
1497
1498         /* Clone RX bits */
1499         newnp->rxopt.all = np->rxopt.all;
1500
1501         /* Clone pktoptions received with SYN */
1502         newnp->pktoptions = NULL;
1503         if (req->af.v6_req.pktopts) {
1504                 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1505                                               GFP_ATOMIC);
1506                 kfree_skb(req->af.v6_req.pktopts);
1507                 req->af.v6_req.pktopts = NULL;
1508                 if (newnp->pktoptions)
1509                         skb_set_owner_r(newnp->pktoptions, newsk);
1510         }
1511         newnp->opt        = NULL;
1512         newnp->mcast_oif  = tcp_v6_iif(skb);
1513         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1514
1515         /* Clone native IPv6 options from listening socket (if any)
1516
1517            Yes, keeping reference count would be much more clever,
1518            but we make one more one thing there: reattach optmem
1519            to newsk.
1520          */
1521         if (opt) {
1522                 newnp->opt = ipv6_dup_options(newsk, opt);
1523                 if (opt != np->opt)
1524                         sock_kfree_s(sk, opt, opt->tot_len);
1525         }
1526
1527         newtp->ext_header_len = 0;
1528         if (newnp->opt)
1529                 newtp->ext_header_len = newnp->opt->opt_nflen +
1530                                         newnp->opt->opt_flen;
1531
1532         tcp_sync_mss(newsk, dst_mtu(dst));
1533         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1534         tcp_initialize_rcv_mss(newsk);
1535
1536         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1537
1538         __tcp_v6_hash(newsk);
1539         tcp_inherit_port(sk, newsk);
1540
1541         return newsk;
1542
1543 out_overflow:
1544         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1545 out:
1546         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1547         if (opt && opt != np->opt)
1548                 sock_kfree_s(sk, opt, opt->tot_len);
1549         dst_release(dst);
1550         return NULL;
1551 }
1552
1553 static int tcp_v6_checksum_init(struct sk_buff *skb)
1554 {
1555         if (skb->ip_summed == CHECKSUM_HW) {
1556                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1557                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1558                                   &skb->nh.ipv6h->daddr,skb->csum))
1559                         return 0;
1560                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1561         }
1562         if (skb->len <= 76) {
1563                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1565                         return -1;
1566                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1567         } else {
1568                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1569                                           &skb->nh.ipv6h->daddr,0);
1570         }
1571         return 0;
1572 }
1573
1574 /* The socket must have it's spinlock held when we get
1575  * here.
1576  *
1577  * We have a potential double-lock case here, so even when
1578  * doing backlog processing we use the BH locking scheme.
1579  * This is because we cannot sleep with the original spinlock
1580  * held.
1581  */
1582 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1583 {
1584         struct ipv6_pinfo *np = inet6_sk(sk);
1585         struct tcp_sock *tp;
1586         struct sk_buff *opt_skb = NULL;
1587
1588         /* Imagine: socket is IPv6. IPv4 packet arrives,
1589            goes to IPv4 receive handler and backlogged.
1590            From backlog it always goes here. Kerboom...
1591            Fortunately, tcp_rcv_established and rcv_established
1592            handle them correctly, but it is not case with
1593            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1594          */
1595
1596         if (skb->protocol == htons(ETH_P_IP))
1597                 return tcp_v4_do_rcv(sk, skb);
1598
1599         if (sk_filter(sk, skb, 0))
1600                 goto discard;
1601
1602         /*
1603          *      socket locking is here for SMP purposes as backlog rcv
1604          *      is currently called with bh processing disabled.
1605          */
1606
1607         /* Do Stevens' IPV6_PKTOPTIONS.
1608
1609            Yes, guys, it is the only place in our code, where we
1610            may make it not affecting IPv4.
1611            The rest of code is protocol independent,
1612            and I do not like idea to uglify IPv4.
1613
1614            Actually, all the idea behind IPV6_PKTOPTIONS
1615            looks not very well thought. For now we latch
1616            options, received in the last packet, enqueued
1617            by tcp. Feel free to propose better solution.
1618                                                --ANK (980728)
1619          */
1620         if (np->rxopt.all)
1621                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1622
1623         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1624                 TCP_CHECK_TIMER(sk);
1625                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1626                         goto reset;
1627                 TCP_CHECK_TIMER(sk);
1628                 if (opt_skb)
1629                         goto ipv6_pktoptions;
1630                 return 0;
1631         }
1632
1633         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1634                 goto csum_err;
1635
1636         if (sk->sk_state == TCP_LISTEN) {
1637                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1638                 if (!nsk)
1639                         goto discard;
1640
1641                 /*
1642                  * Queue it on the new socket if the new socket is active,
1643                  * otherwise we just shortcircuit this and continue with
1644                  * the new socket..
1645                  */
1646                 if(nsk != sk) {
1647                         if (tcp_child_process(sk, nsk, skb))
1648                                 goto reset;
1649                         if (opt_skb)
1650                                 __kfree_skb(opt_skb);
1651                         return 0;
1652                 }
1653         }
1654
1655         TCP_CHECK_TIMER(sk);
1656         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1657                 goto reset;
1658         TCP_CHECK_TIMER(sk);
1659         if (opt_skb)
1660                 goto ipv6_pktoptions;
1661         return 0;
1662
1663 reset:
1664         tcp_v6_send_reset(skb);
1665 discard:
1666         if (opt_skb)
1667                 __kfree_skb(opt_skb);
1668         kfree_skb(skb);
1669         return 0;
1670 csum_err:
1671         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1672         goto discard;
1673
1674
1675 ipv6_pktoptions:
1676         /* Do you ask, what is it?
1677
1678            1. skb was enqueued by tcp.
1679            2. skb is added to tail of read queue, rather than out of order.
1680            3. socket is not in passive state.
1681            4. Finally, it really contains options, which user wants to receive.
1682          */
1683         tp = tcp_sk(sk);
1684         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1685             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1686                 if (np->rxopt.bits.rxinfo)
1687                         np->mcast_oif = tcp_v6_iif(opt_skb);
1688                 if (np->rxopt.bits.rxhlim)
1689                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1690                 if (ipv6_opt_accepted(sk, opt_skb)) {
1691                         skb_set_owner_r(opt_skb, sk);
1692                         opt_skb = xchg(&np->pktoptions, opt_skb);
1693                 } else {
1694                         __kfree_skb(opt_skb);
1695                         opt_skb = xchg(&np->pktoptions, NULL);
1696                 }
1697         }
1698
1699         if (opt_skb)
1700                 kfree_skb(opt_skb);
1701         return 0;
1702 }
1703
1704 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1705 {
1706         struct sk_buff *skb = *pskb;
1707         struct tcphdr *th;
1708         struct sock *sk;
1709         int ret;
1710
1711         if (skb->pkt_type != PACKET_HOST)
1712                 goto discard_it;
1713
1714         /*
1715          *      Count it even if it's bad.
1716          */
1717         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1718
1719         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1720                 goto discard_it;
1721
1722         th = skb->h.th;
1723
1724         if (th->doff < sizeof(struct tcphdr)/4)
1725                 goto bad_packet;
1726         if (!pskb_may_pull(skb, th->doff*4))
1727                 goto discard_it;
1728
1729         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1730              tcp_v6_checksum_init(skb) < 0))
1731                 goto bad_packet;
1732
1733         th = skb->h.th;
1734         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1735         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1736                                     skb->len - th->doff*4);
1737         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1738         TCP_SKB_CB(skb)->when = 0;
1739         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1740         TCP_SKB_CB(skb)->sacked = 0;
1741
1742         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1743                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1744
1745         if (!sk)
1746                 goto no_tcp_socket;
1747
1748 process:
1749         if (sk->sk_state == TCP_TIME_WAIT)
1750                 goto do_time_wait;
1751
1752         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1753                 goto discard_and_relse;
1754
1755         if (sk_filter(sk, skb, 0))
1756                 goto discard_and_relse;
1757
1758         skb->dev = NULL;
1759
1760         bh_lock_sock(sk);
1761         ret = 0;
1762         if (!sock_owned_by_user(sk)) {
1763                 if (!tcp_prequeue(sk, skb))
1764                         ret = tcp_v6_do_rcv(sk, skb);
1765         } else
1766                 sk_add_backlog(sk, skb);
1767         bh_unlock_sock(sk);
1768
1769         sock_put(sk);
1770         return ret ? -1 : 0;
1771
1772 no_tcp_socket:
1773         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1774                 goto discard_it;
1775
1776         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1777 bad_packet:
1778                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1779         } else {
1780                 tcp_v6_send_reset(skb);
1781         }
1782
1783 discard_it:
1784
1785         /*
1786          *      Discard frame
1787          */
1788
1789         kfree_skb(skb);
1790         return 0;
1791
1792 discard_and_relse:
1793         sock_put(sk);
1794         goto discard_it;
1795
1796 do_time_wait:
1797         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1798                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1799                 goto discard_it;
1800         }
1801
1802         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1803                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1804                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1805                 goto discard_it;
1806         }
1807
1808         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1809                                           skb, th, skb->len)) {
1810         case TCP_TW_SYN:
1811         {
1812                 struct sock *sk2;
1813
1814                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1815                 if (sk2 != NULL) {
1816                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1817                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1818                         sk = sk2;
1819                         goto process;
1820                 }
1821                 /* Fall through to ACK */
1822         }
1823         case TCP_TW_ACK:
1824                 tcp_v6_timewait_ack(sk, skb);
1825                 break;
1826         case TCP_TW_RST:
1827                 goto no_tcp_socket;
1828         case TCP_TW_SUCCESS:;
1829         }
1830         goto discard_it;
1831 }
1832
1833 static int tcp_v6_rebuild_header(struct sock *sk)
1834 {
1835         int err;
1836         struct dst_entry *dst;
1837         struct ipv6_pinfo *np = inet6_sk(sk);
1838
1839         dst = __sk_dst_check(sk, np->dst_cookie);
1840
1841         if (dst == NULL) {
1842                 struct inet_sock *inet = inet_sk(sk);
1843                 struct in6_addr *final_p = NULL, final;
1844                 struct flowi fl;
1845
1846                 memset(&fl, 0, sizeof(fl));
1847                 fl.proto = IPPROTO_TCP;
1848                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1849                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1850                 fl.fl6_flowlabel = np->flow_label;
1851                 fl.oif = sk->sk_bound_dev_if;
1852                 fl.fl_ip_dport = inet->dport;
1853                 fl.fl_ip_sport = inet->sport;
1854
1855                 if (np->opt && np->opt->srcrt) {
1856                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1857                         ipv6_addr_copy(&final, &fl.fl6_dst);
1858                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1859                         final_p = &final;
1860                 }
1861
1862                 err = ip6_dst_lookup(sk, &dst, &fl);
1863                 if (err) {
1864                         sk->sk_route_caps = 0;
1865                         return err;
1866                 }
1867                 if (final_p)
1868                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1869
1870                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1871                         sk->sk_err_soft = -err;
1872                         dst_release(dst);
1873                         return err;
1874                 }
1875
1876                 ip6_dst_store(sk, dst, NULL);
1877                 sk->sk_route_caps = dst->dev->features &
1878                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1879         }
1880
1881         return 0;
1882 }
1883
1884 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1885 {
1886         struct sock *sk = skb->sk;
1887         struct inet_sock *inet = inet_sk(sk);
1888         struct ipv6_pinfo *np = inet6_sk(sk);
1889         struct flowi fl;
1890         struct dst_entry *dst;
1891         struct in6_addr *final_p = NULL, final;
1892
1893         memset(&fl, 0, sizeof(fl));
1894         fl.proto = IPPROTO_TCP;
1895         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1896         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1897         fl.fl6_flowlabel = np->flow_label;
1898         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1899         fl.oif = sk->sk_bound_dev_if;
1900         fl.fl_ip_sport = inet->sport;
1901         fl.fl_ip_dport = inet->dport;
1902
1903         if (np->opt && np->opt->srcrt) {
1904                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1905                 ipv6_addr_copy(&final, &fl.fl6_dst);
1906                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1907                 final_p = &final;
1908         }
1909
1910         dst = __sk_dst_check(sk, np->dst_cookie);
1911
1912         if (dst == NULL) {
1913                 int err = ip6_dst_lookup(sk, &dst, &fl);
1914
1915                 if (err) {
1916                         sk->sk_err_soft = -err;
1917                         return err;
1918                 }
1919
1920                 if (final_p)
1921                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1922
1923                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1924                         sk->sk_route_caps = 0;
1925                         dst_release(dst);
1926                         return err;
1927                 }
1928
1929                 ip6_dst_store(sk, dst, NULL);
1930                 sk->sk_route_caps = dst->dev->features &
1931                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1932         }
1933
1934         skb->dst = dst_clone(dst);
1935
1936         /* Restore final destination back after routing done */
1937         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1938
1939         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1940 }
1941
1942 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1943 {
1944         struct ipv6_pinfo *np = inet6_sk(sk);
1945         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1946
1947         sin6->sin6_family = AF_INET6;
1948         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1949         sin6->sin6_port = inet_sk(sk)->dport;
1950         /* We do not store received flowlabel for TCP */
1951         sin6->sin6_flowinfo = 0;
1952         sin6->sin6_scope_id = 0;
1953         if (sk->sk_bound_dev_if &&
1954             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1955                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1956 }
1957
1958 static int tcp_v6_remember_stamp(struct sock *sk)
1959 {
1960         /* Alas, not yet... */
1961         return 0;
1962 }
1963
1964 static struct tcp_func ipv6_specific = {
1965         .queue_xmit     =       tcp_v6_xmit,
1966         .send_check     =       tcp_v6_send_check,
1967         .rebuild_header =       tcp_v6_rebuild_header,
1968         .conn_request   =       tcp_v6_conn_request,
1969         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1970         .remember_stamp =       tcp_v6_remember_stamp,
1971         .net_header_len =       sizeof(struct ipv6hdr),
1972
1973         .setsockopt     =       ipv6_setsockopt,
1974         .getsockopt     =       ipv6_getsockopt,
1975         .addr2sockaddr  =       v6_addr2sockaddr,
1976         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1977 };
1978
1979 /*
1980  *      TCP over IPv4 via INET6 API
1981  */
1982
1983 static struct tcp_func ipv6_mapped = {
1984         .queue_xmit     =       ip_queue_xmit,
1985         .send_check     =       tcp_v4_send_check,
1986         .rebuild_header =       tcp_v4_rebuild_header,
1987         .conn_request   =       tcp_v6_conn_request,
1988         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1989         .remember_stamp =       tcp_v4_remember_stamp,
1990         .net_header_len =       sizeof(struct iphdr),
1991
1992         .setsockopt     =       ipv6_setsockopt,
1993         .getsockopt     =       ipv6_getsockopt,
1994         .addr2sockaddr  =       v6_addr2sockaddr,
1995         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1996 };
1997
1998
1999
2000 /* NOTE: A lot of things set to zero explicitly by call to
2001  *       sk_alloc() so need not be done here.
2002  */
2003 static int tcp_v6_init_sock(struct sock *sk)
2004 {
2005         struct tcp_sock *tp = tcp_sk(sk);
2006
2007         skb_queue_head_init(&tp->out_of_order_queue);
2008         tcp_init_xmit_timers(sk);
2009         tcp_prequeue_init(tp);
2010
2011         tp->rto  = TCP_TIMEOUT_INIT;
2012         tp->mdev = TCP_TIMEOUT_INIT;
2013
2014         /* So many TCP implementations out there (incorrectly) count the
2015          * initial SYN frame in their delayed-ACK and congestion control
2016          * algorithms that we must have the following bandaid to talk
2017          * efficiently to them.  -DaveM
2018          */
2019         tp->snd_cwnd = 2;
2020
2021         /* See draft-stevens-tcpca-spec-01 for discussion of the
2022          * initialization of these values.
2023          */
2024         tp->snd_ssthresh = 0x7fffffff;
2025         tp->snd_cwnd_clamp = ~0;
2026         tp->mss_cache_std = tp->mss_cache = 536;
2027
2028         tp->reordering = sysctl_tcp_reordering;
2029
2030         sk->sk_state = TCP_CLOSE;
2031
2032         tp->af_specific = &ipv6_specific;
2033
2034         sk->sk_write_space = sk_stream_write_space;
2035         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2036
2037         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2038         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2039
2040         atomic_inc(&tcp_sockets_allocated);
2041
2042         return 0;
2043 }
2044
2045 static int tcp_v6_destroy_sock(struct sock *sk)
2046 {
2047         extern int tcp_v4_destroy_sock(struct sock *sk);
2048
2049         tcp_v4_destroy_sock(sk);
2050         return inet6_destroy_sock(sk);
2051 }
2052
2053 /* Proc filesystem TCPv6 sock list dumping. */
2054 static void get_openreq6(struct seq_file *seq,
2055                          struct sock *sk, struct open_request *req, int i, int uid)
2056 {
2057         struct in6_addr *dest, *src;
2058         int ttd = req->expires - jiffies;
2059
2060         if (ttd < 0)
2061                 ttd = 0;
2062
2063         src = &req->af.v6_req.loc_addr;
2064         dest = &req->af.v6_req.rmt_addr;
2065         seq_printf(seq,
2066                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2067                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2068                    i,
2069                    src->s6_addr32[0], src->s6_addr32[1],
2070                    src->s6_addr32[2], src->s6_addr32[3],
2071                    ntohs(inet_sk(sk)->sport),
2072                    dest->s6_addr32[0], dest->s6_addr32[1],
2073                    dest->s6_addr32[2], dest->s6_addr32[3],
2074                    ntohs(req->rmt_port),
2075                    TCP_SYN_RECV,
2076                    0,0, /* could print option size, but that is af dependent. */
2077                    1,   /* timers active (only the expire timer) */
2078                    jiffies_to_clock_t(ttd),
2079                    req->retrans,
2080                    uid,
2081                    0,  /* non standard timer */
2082                    0, /* open_requests have no inode */
2083                    0, req);
2084 }
2085
2086 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2087 {
2088         struct in6_addr *dest, *src;
2089         __u16 destp, srcp;
2090         int timer_active;
2091         unsigned long timer_expires;
2092         struct inet_sock *inet = inet_sk(sp);
2093         struct tcp_sock *tp = tcp_sk(sp);
2094         struct ipv6_pinfo *np = inet6_sk(sp);
2095
2096         dest  = &np->daddr;
2097         src   = &np->rcv_saddr;
2098         destp = ntohs(inet->dport);
2099         srcp  = ntohs(inet->sport);
2100         if (tp->pending == TCP_TIME_RETRANS) {
2101                 timer_active    = 1;
2102                 timer_expires   = tp->timeout;
2103         } else if (tp->pending == TCP_TIME_PROBE0) {
2104                 timer_active    = 4;
2105                 timer_expires   = tp->timeout;
2106         } else if (timer_pending(&sp->sk_timer)) {
2107                 timer_active    = 2;
2108                 timer_expires   = sp->sk_timer.expires;
2109         } else {
2110                 timer_active    = 0;
2111                 timer_expires = jiffies;
2112         }
2113
2114         seq_printf(seq,
2115                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2116                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2117                    i,
2118                    src->s6_addr32[0], src->s6_addr32[1],
2119                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2120                    dest->s6_addr32[0], dest->s6_addr32[1],
2121                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2122                    sp->sk_state,
2123                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2124                    timer_active,
2125                    jiffies_to_clock_t(timer_expires - jiffies),
2126                    tp->retransmits,
2127                    sock_i_uid(sp),
2128                    tp->probes_out,
2129                    sock_i_ino(sp),
2130                    atomic_read(&sp->sk_refcnt), sp,
2131                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2132                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2133                    );
2134 }
2135
2136 static void get_timewait6_sock(struct seq_file *seq,
2137                                struct tcp_tw_bucket *tw, int i)
2138 {
2139         struct in6_addr *dest, *src;
2140         __u16 destp, srcp;
2141         int ttd = tw->tw_ttd - jiffies;
2142
2143         if (ttd < 0)
2144                 ttd = 0;
2145
2146         dest  = &tw->tw_v6_daddr;
2147         src   = &tw->tw_v6_rcv_saddr;
2148         destp = ntohs(tw->tw_dport);
2149         srcp  = ntohs(tw->tw_sport);
2150
2151         seq_printf(seq,
2152                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2153                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2154                    i,
2155                    src->s6_addr32[0], src->s6_addr32[1],
2156                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2157                    dest->s6_addr32[0], dest->s6_addr32[1],
2158                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2159                    tw->tw_substate, 0, 0,
2160                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2161                    atomic_read(&tw->tw_refcnt), tw);
2162 }
2163
2164 #ifdef CONFIG_PROC_FS
2165 static int tcp6_seq_show(struct seq_file *seq, void *v)
2166 {
2167         struct tcp_iter_state *st;
2168
2169         if (v == SEQ_START_TOKEN) {
2170                 seq_puts(seq,
2171                          "  sl  "
2172                          "local_address                         "
2173                          "remote_address                        "
2174                          "st tx_queue rx_queue tr tm->when retrnsmt"
2175                          "   uid  timeout inode\n");
2176                 goto out;
2177         }
2178         st = seq->private;
2179
2180         switch (st->state) {
2181         case TCP_SEQ_STATE_LISTENING:
2182         case TCP_SEQ_STATE_ESTABLISHED:
2183                 get_tcp6_sock(seq, v, st->num);
2184                 break;
2185         case TCP_SEQ_STATE_OPENREQ:
2186                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2187                 break;
2188         case TCP_SEQ_STATE_TIME_WAIT:
2189                 get_timewait6_sock(seq, v, st->num);
2190                 break;
2191         }
2192 out:
2193         return 0;
2194 }
2195
2196 static struct file_operations tcp6_seq_fops;
2197 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2198         .owner          = THIS_MODULE,
2199         .name           = "tcp6",
2200         .family         = AF_INET6,
2201         .seq_show       = tcp6_seq_show,
2202         .seq_fops       = &tcp6_seq_fops,
2203 };
2204
2205 int __init tcp6_proc_init(void)
2206 {
2207         return tcp_proc_register(&tcp6_seq_afinfo);
2208 }
2209
2210 void tcp6_proc_exit(void)
2211 {
2212         tcp_proc_unregister(&tcp6_seq_afinfo);
2213 }
2214 #endif
2215
2216 struct proto tcpv6_prot = {
2217         .name                   = "TCPv6",
2218         .owner                  = THIS_MODULE,
2219         .close                  = tcp_close,
2220         .connect                = tcp_v6_connect,
2221         .disconnect             = tcp_disconnect,
2222         .accept                 = tcp_accept,
2223         .ioctl                  = tcp_ioctl,
2224         .init                   = tcp_v6_init_sock,
2225         .destroy                = tcp_v6_destroy_sock,
2226         .shutdown               = tcp_shutdown,
2227         .setsockopt             = tcp_setsockopt,
2228         .getsockopt             = tcp_getsockopt,
2229         .sendmsg                = tcp_sendmsg,
2230         .recvmsg                = tcp_recvmsg,
2231         .backlog_rcv            = tcp_v6_do_rcv,
2232         .hash                   = tcp_v6_hash,
2233         .unhash                 = tcp_unhash,
2234         .get_port               = tcp_v6_get_port,
2235         .enter_memory_pressure  = tcp_enter_memory_pressure,
2236         .sockets_allocated      = &tcp_sockets_allocated,
2237         .memory_allocated       = &tcp_memory_allocated,
2238         .memory_pressure        = &tcp_memory_pressure,
2239         .sysctl_mem             = sysctl_tcp_mem,
2240         .sysctl_wmem            = sysctl_tcp_wmem,
2241         .sysctl_rmem            = sysctl_tcp_rmem,
2242         .max_header             = MAX_TCP_HEADER,
2243         .obj_size               = sizeof(struct tcp6_sock),
2244 };
2245
2246 static struct inet6_protocol tcpv6_protocol = {
2247         .handler        =       tcp_v6_rcv,
2248         .err_handler    =       tcp_v6_err,
2249         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2250 };
2251
2252 extern struct proto_ops inet6_stream_ops;
2253
2254 static struct inet_protosw tcpv6_protosw = {
2255         .type           =       SOCK_STREAM,
2256         .protocol       =       IPPROTO_TCP,
2257         .prot           =       &tcpv6_prot,
2258         .ops            =       &inet6_stream_ops,
2259         .capability     =       -1,
2260         .no_check       =       0,
2261         .flags          =       INET_PROTOSW_PERMANENT,
2262 };
2263
2264 void __init tcpv6_init(void)
2265 {
2266         /* register inet6 protocol */
2267         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2268                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2269         inet6_register_protosw(&tcpv6_protosw);
2270 }