net/ipv6/tcp_ipv6.c

   1 /*
   2  *      TCP over IPv6
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on:
  11  *      linux/net/ipv4/tcp.c
  12  *      linux/net/ipv4/tcp_input.c
  13  *      linux/net/ipv4/tcp_output.c
  14  *
  15  *      Fixes:
  16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  19  *                                      a single port at the same time.
  20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  */
  27
  28 #include <linux/module.h>
  29 #include <linux/config.h>
  30 #include <linux/errno.h>
  31 #include <linux/types.h>
  32 #include <linux/socket.h>
  33 #include <linux/sockios.h>
  34 #include <linux/net.h>
  35 #include <linux/jiffies.h>
  36 #include <linux/in.h>
  37 #include <linux/in6.h>
  38 #include <linux/netdevice.h>
  39 #include <linux/init.h>
  40 #include <linux/jhash.h>
  41 #include <linux/ipsec.h>
  42 #include <linux/times.h>
  43
  44 #include <linux/ipv6.h>
  45 #include <linux/icmpv6.h>
  46 #include <linux/random.h>
  47
  48 #include <net/tcp.h>
  49 #include <net/ndisc.h>
  50 #include <net/ipv6.h>
  51 #include <net/transp_v6.h>
  52 #include <net/addrconf.h>
  53 #include <net/ip6_route.h>
  54 #include <net/ip6_checksum.h>
  55 #include <net/inet_ecn.h>
  56 #include <net/protocol.h>
  57 #include <net/xfrm.h>
  58 #include <net/addrconf.h>
  59 #include <net/snmp.h>
  60 #include <net/dsfield.h>
  61
  62 #include <asm/uaccess.h>
  63
  64 #include <linux/proc_fs.h>
  65 #include <linux/seq_file.h>
  66
  67 static void     tcp_v6_send_reset(struct sk_buff *skb);
  68 static void     tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
  69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
  70                                   struct sk_buff *skb);
  71
  72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
  74
  75 static struct tcp_func ipv6_mapped;
  76 static struct tcp_func ipv6_specific;
  77
  78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
  79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
  80                                     struct in6_addr *faddr, u16 fport)
  81 {
  82         int hashent = (lport ^ fport);
  83
  84         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
  85         hashent ^= hashent>>16;
  86         hashent ^= hashent>>8;
  87         return (hashent & (tcp_ehash_size - 1));
  88 }
  89
  90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
  91 {
  92         struct inet_opt *inet = inet_sk(sk);
  93         struct ipv6_pinfo *np = inet6_sk(sk);
  94         struct in6_addr *laddr = &np->rcv_saddr;
  95         struct in6_addr *faddr = &np->daddr;
  96         __u16 lport = inet->num;
  97         __u16 fport = inet->dport;
  98         return tcp_v6_hashfn(laddr, lport, faddr, fport);
  99 }
 100
 101 static inline int tcp_v6_bind_conflict(struct sock *sk,
 102                                        struct tcp_bind_bucket *tb)
 103 {
 104         struct sock *sk2;
 105         struct hlist_node *node;
 106
 107         /* We must walk the whole port owner list in this case. -DaveM */
 108         sk_for_each_bound(sk2, node, &tb->owners) {
 109                 if (sk != sk2 &&
 110                     (!sk->sk_bound_dev_if ||
 111                      !sk2->sk_bound_dev_if ||
 112                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
 113                     (!sk->sk_reuse || !sk2->sk_reuse ||
 114                      sk2->sk_state == TCP_LISTEN) &&
 115                      ipv6_rcv_saddr_equal(sk, sk2))
 116                         break;
 117         }
 118
 119         return node != NULL;
 120 }
 121
 122 /* Grrr, addr_type already calculated by caller, but I don't want
 123  * to add some silly "cookie" argument to this method just for that.
 124  * But it doesn't matter, the recalculation is in the rarest path
 125  * this function ever takes.
 126  */
 127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 128 {
 129         struct tcp_bind_hashbucket *head;
 130         struct tcp_bind_bucket *tb;
 131         struct hlist_node *node;
 132         int ret;
 133
 134         local_bh_disable();
 135         if (snum == 0) {
 136                 int low = sysctl_local_port_range[0];
 137                 int high = sysctl_local_port_range[1];
 138                 int remaining = (high - low) + 1;
 139                 int rover;
 140
 141                 spin_lock(&tcp_portalloc_lock);
 142                 rover = tcp_port_rover;
 143                 do {    rover++;
 144                         if ((rover < low) || (rover > high))
 145                                 rover = low;
 146                         head = &tcp_bhash[tcp_bhashfn(rover)];
 147                         spin_lock(&head->lock);
 148                         tb_for_each(tb, node, &head->chain)
 149                                 if (tb->port == rover)
 150                                         goto next;
 151                         break;
 152                 next:
 153                         spin_unlock(&head->lock);
 154                 } while (--remaining > 0);
 155                 tcp_port_rover = rover;
 156                 spin_unlock(&tcp_portalloc_lock);
 157
 158                 /* Exhausted local port range during search? */
 159                 ret = 1;
 160                 if (remaining <= 0)
 161                         goto fail;
 162
 163                 /* OK, here is the one we will use. */
 164                 snum = rover;
 165         } else {
 166                 head = &tcp_bhash[tcp_bhashfn(snum)];
 167                 spin_lock(&head->lock);
 168                 tb_for_each(tb, node, &head->chain)
 169                         if (tb->port == snum)
 170                                 goto tb_found;
 171         }
 172         tb = NULL;
 173         goto tb_not_found;
 174 tb_found:
 175         if (tb && !hlist_empty(&tb->owners)) {
 176                 if (tb->fastreuse > 0 && sk->sk_reuse &&
 177                     sk->sk_state != TCP_LISTEN) {
 178                         goto success;
 179                 } else {
 180                         ret = 1;
 181                         if (tcp_v6_bind_conflict(sk, tb))
 182                                 goto fail_unlock;
 183                 }
 184         }
 185 tb_not_found:
 186         ret = 1;
 187         if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
 188                 goto fail_unlock;
 189         if (hlist_empty(&tb->owners)) {
 190                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
 191                         tb->fastreuse = 1;
 192                 else
 193                         tb->fastreuse = 0;
 194         } else if (tb->fastreuse &&
 195                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
 196                 tb->fastreuse = 0;
 197
 198 success:
 199         if (!tcp_sk(sk)->bind_hash)
 200                 tcp_bind_hash(sk, tb, snum);
 201         BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
 202         ret = 0;
 203
 204 fail_unlock:
 205         spin_unlock(&head->lock);
 206 fail:
 207         local_bh_enable();
 208         return ret;
 209 }
 210
 211 static __inline__ void __tcp_v6_hash(struct sock *sk)
 212 {
 213         struct hlist_head *list;
 214         rwlock_t *lock;
 215
 216         BUG_TRAP(sk_unhashed(sk));
 217
 218         if (sk->sk_state == TCP_LISTEN) {
 219                 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
 220                 lock = &tcp_lhash_lock;
 221                 tcp_listen_wlock();
 222         } else {
 223                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
 224                 list = &tcp_ehash[sk->sk_hashent].chain;
 225                 lock = &tcp_ehash[sk->sk_hashent].lock;
 226                 write_lock(lock);
 227         }
 228
 229         __sk_add_node(sk, list);
 230         sock_prot_inc_use(sk->sk_prot);
 231         write_unlock(lock);
 232 }
 233
 234
 235 static void tcp_v6_hash(struct sock *sk)
 236 {
 237         if (sk->sk_state != TCP_CLOSE) {
 238                 struct tcp_opt *tp = tcp_sk(sk);
 239
 240                 if (tp->af_specific == &ipv6_mapped) {
 241                         tcp_prot.hash(sk);
 242                         return;
 243                 }
 244                 local_bh_disable();
 245                 __tcp_v6_hash(sk);
 246                 local_bh_enable();
 247         }
 248 }
 249
 250 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
 251 {
 252         struct sock *sk;
 253         struct hlist_node *node;
 254         struct sock *result = NULL;
 255         int score, hiscore;
 256
 257         hiscore=0;
 258         read_lock(&tcp_lhash_lock);
 259         sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
 260                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
 261                         struct ipv6_pinfo *np = inet6_sk(sk);
 262
 263                         score = 1;
 264                         if (!ipv6_addr_any(&np->rcv_saddr)) {
 265                                 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
 266                                         continue;
 267                                 score++;
 268                         }
 269                         if (sk->sk_bound_dev_if) {
 270                                 if (sk->sk_bound_dev_if != dif)
 271                                         continue;
 272                                 score++;
 273                         }
 274                         if (score == 3) {
 275                                 result = sk;
 276                                 break;
 277                         }
 278                         if (score > hiscore) {
 279                                 hiscore = score;
 280                                 result = sk;
 281                         }
 282                 }
 283         }
 284         if (result)
 285                 sock_hold(result);
 286         read_unlock(&tcp_lhash_lock);
 287         return result;
 288 }
 289
 290 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 291  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 292  *
 293  * The sockhash lock must be held as a reader here.
 294  */
 295
 296 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
 297                                                        struct in6_addr *daddr, u16 hnum,
 298                                                        int dif)
 299 {
 300         struct tcp_ehash_bucket *head;
 301         struct sock *sk;
 302         struct hlist_node *node;
 303         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
 304         int hash;
 305
 306         /* Optimize here for direct hit, only listening connections can
 307          * have wildcards anyways.
 308          */
 309         hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
 310         head = &tcp_ehash[hash];
 311         read_lock(&head->lock);
 312         sk_for_each(sk, node, &head->chain) {
 313                 /* For IPV6 do the cheaper port and family tests first. */
 314                 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
 315                         goto hit; /* You sunk my battleship! */
 316         }
 317         /* Must check for a TIME_WAIT'er before going to listener hash. */
 318         sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
 319                 /* FIXME: acme: check this... */
 320                 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
 321
 322                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 323                    sk->sk_family                == PF_INET6) {
 324                         if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
 325                            ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
 326                            (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
 327                                 goto hit;
 328                 }
 329         }
 330         read_unlock(&head->lock);
 331         return NULL;
 332
 333 hit:
 334         sock_hold(sk);
 335         read_unlock(&head->lock);
 336         return sk;
 337 }
 338
 339
 340 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 341                                            struct in6_addr *daddr, u16 hnum,
 342                                            int dif)
 343 {
 344         struct sock *sk;
 345
 346         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
 347
 348         if (sk)
 349                 return sk;
 350
 351         return tcp_v6_lookup_listener(daddr, hnum, dif);
 352 }
 353
 354 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 355                                   struct in6_addr *daddr, u16 dport,
 356                                   int dif)
 357 {
 358         struct sock *sk;
 359
 360         local_bh_disable();
 361         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
 362         local_bh_enable();
 363
 364         return sk;
 365 }
 366
 367 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
 368
 369
 370 /*
 371  * Open request hash tables.
 372  */
 373
 374 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
 375 {
 376         u32 a, b, c;
 377
 378         a = raddr->s6_addr32[0];
 379         b = raddr->s6_addr32[1];
 380         c = raddr->s6_addr32[2];
 381
 382         a += JHASH_GOLDEN_RATIO;
 383         b += JHASH_GOLDEN_RATIO;
 384         c += rnd;
 385         __jhash_mix(a, b, c);
 386
 387         a += raddr->s6_addr32[3];
 388         b += (u32) rport;
 389         __jhash_mix(a, b, c);
 390
 391         return c & (TCP_SYNQ_HSIZE - 1);
 392 }
 393
 394 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
 395                                               struct open_request ***prevp,
 396                                               __u16 rport,
 397                                               struct in6_addr *raddr,
 398                                               struct in6_addr *laddr,
 399                                               int iif)
 400 {
 401         struct tcp_listen_opt *lopt = tp->listen_opt;
 402         struct open_request *req, **prev;
 403
 404         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
 405              (req = *prev) != NULL;
 406              prev = &req->dl_next) {
 407                 if (req->rmt_port == rport &&
 408                     req->class->family == AF_INET6 &&
 409                     ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
 410                     ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
 411                     (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
 412                         BUG_TRAP(req->sk == NULL);
 413                         *prevp = prev;
 414                         return req;
 415                 }
 416         }
 417
 418         return NULL;
 419 }
 420
 421 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
 422                                    struct in6_addr *saddr,
 423                                    struct in6_addr *daddr,
 424                                    unsigned long base)
 425 {
 426         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
 427 }
 428
 429 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 430 {
 431         if (skb->protocol == htons(ETH_P_IPV6)) {
 432                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
 433                                                     skb->nh.ipv6h->saddr.s6_addr32,
 434                                                     skb->h.th->dest,
 435                                                     skb->h.th->source);
 436         } else {
 437                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
 438                                                   skb->nh.iph->saddr,
 439                                                   skb->h.th->dest,
 440                                                   skb->h.th->source);
 441         }
 442 }
 443
 444 static int tcp_v6_check_established(struct sock *sk)
 445 {
 446         struct inet_opt *inet = inet_sk(sk);
 447         struct ipv6_pinfo *np = inet6_sk(sk);
 448         struct in6_addr *daddr = &np->rcv_saddr;
 449         struct in6_addr *saddr = &np->daddr;
 450         int dif = sk->sk_bound_dev_if;
 451         u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
 452         int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
 453         struct tcp_ehash_bucket *head = &tcp_ehash[hash];
 454         struct sock *sk2;
 455         struct hlist_node *node;
 456         struct tcp_tw_bucket *tw;
 457
 458         write_lock_bh(&head->lock);
 459
 460         /* Check TIME-WAIT sockets first. */
 461         sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
 462                 tw = (struct tcp_tw_bucket*)sk2;
 463
 464                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 465                    sk2->sk_family               == PF_INET6     &&
 466                    ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
 467                    ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
 468                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
 469                         struct tcp_opt *tp = tcp_sk(sk);
 470
 471                         if (tw->tw_ts_recent_stamp) {
 472                                 /* See comment in tcp_ipv4.c */
 473                                 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
 474                                 if (!tp->write_seq)
 475                                         tp->write_seq = 1;
 476                                 tp->ts_recent = tw->tw_ts_recent;
 477                                 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
 478                                 sock_hold(sk2);
 479                                 goto unique;
 480                         } else
 481                                 goto not_unique;
 482                 }
 483         }
 484         tw = NULL;
 485
 486         /* And established part... */
 487         sk_for_each(sk2, node, &head->chain) {
 488                 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
 489                         goto not_unique;
 490         }
 491
 492 unique:
 493         BUG_TRAP(sk_unhashed(sk));
 494         __sk_add_node(sk, &head->chain);
 495         sk->sk_hashent = hash;
 496         sock_prot_inc_use(sk->sk_prot);
 497         write_unlock_bh(&head->lock);
 498
 499         if (tw) {
 500                 /* Silly. Should hash-dance instead... */
 501                 local_bh_disable();
 502                 tcp_tw_deschedule(tw);
 503                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 504                 local_bh_enable();
 505
 506                 tcp_tw_put(tw);
 507         }
 508         return 0;
 509
 510 not_unique:
 511         write_unlock_bh(&head->lock);
 512         return -EADDRNOTAVAIL;
 513 }
 514
 515 static int tcp_v6_hash_connect(struct sock *sk)
 516 {
 517         struct tcp_bind_hashbucket *head;
 518         struct tcp_bind_bucket *tb;
 519
 520         /* XXX */
 521         if (inet_sk(sk)->num == 0) {
 522                 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
 523                 if (err)
 524                         return err;
 525                 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
 526         }
 527
 528         head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
 529         tb = tb_head(head);
 530
 531         spin_lock_bh(&head->lock);
 532
 533         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
 534                 __tcp_v6_hash(sk);
 535                 spin_unlock_bh(&head->lock);
 536                 return 0;
 537         } else {
 538                 spin_unlock_bh(&head->lock);
 539                 return tcp_v6_check_established(sk);
 540         }
 541 }
 542
 543 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
 544 {
 545         return IP6CB(skb)->iif;
 546 }
 547
 548 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 549                           int addr_len)
 550 {
 551         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 552         struct inet_opt *inet = inet_sk(sk);
 553         struct ipv6_pinfo *np = inet6_sk(sk);
 554         struct tcp_opt *tp = tcp_sk(sk);
 555         struct in6_addr *saddr = NULL, *final_p = NULL, final;
 556         struct flowi fl;
 557         struct dst_entry *dst;
 558         int addr_type;
 559         int err;
 560
 561         if (addr_len < SIN6_LEN_RFC2133)
 562                 return -EINVAL;
 563
 564         if (usin->sin6_family != AF_INET6)
 565                 return(-EAFNOSUPPORT);
 566
 567         memset(&fl, 0, sizeof(fl));
 568
 569         if (np->sndflow) {
 570                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 571                 IP6_ECN_flow_init(fl.fl6_flowlabel);
 572                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
 573                         struct ip6_flowlabel *flowlabel;
 574                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
 575                         if (flowlabel == NULL)
 576                                 return -EINVAL;
 577                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
 578                         fl6_sock_release(flowlabel);
 579                 }
 580         }
 581
 582         /*
 583          *      connect() to INADDR_ANY means loopback (BSD'ism).
 584          */
 585
 586         if(ipv6_addr_any(&usin->sin6_addr))
 587                 usin->sin6_addr.s6_addr[15] = 0x1;
 588
 589         addr_type = ipv6_addr_type(&usin->sin6_addr);
 590
 591         if(addr_type & IPV6_ADDR_MULTICAST)
 592                 return -ENETUNREACH;
 593
 594         if (addr_type&IPV6_ADDR_LINKLOCAL) {
 595                 if (addr_len >= sizeof(struct sockaddr_in6) &&
 596                     usin->sin6_scope_id) {
 597                         /* If interface is set while binding, indices
 598                          * must coincide.
 599                          */
 600                         if (sk->sk_bound_dev_if &&
 601                             sk->sk_bound_dev_if != usin->sin6_scope_id)
 602                                 return -EINVAL;
 603
 604                         sk->sk_bound_dev_if = usin->sin6_scope_id;
 605                 }
 606
 607                 /* Connect to link-local address requires an interface */
 608                 if (!sk->sk_bound_dev_if)
 609                         return -EINVAL;
 610         }
 611
 612         if (tp->ts_recent_stamp &&
 613             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
 614                 tp->ts_recent = 0;
 615                 tp->ts_recent_stamp = 0;
 616                 tp->write_seq = 0;
 617         }
 618
 619         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
 620         np->flow_label = fl.fl6_flowlabel;
 621
 622         /*
 623          *      TCP over IPv4
 624          */
 625
 626         if (addr_type == IPV6_ADDR_MAPPED) {
 627                 u32 exthdrlen = tp->ext_header_len;
 628                 struct sockaddr_in sin;
 629
 630                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
 631
 632                 if (__ipv6_only_sock(sk))
 633                         return -ENETUNREACH;
 634
 635                 sin.sin_family = AF_INET;
 636                 sin.sin_port = usin->sin6_port;
 637                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 638
 639                 tp->af_specific = &ipv6_mapped;
 640                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
 641
 642                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 643
 644                 if (err) {
 645                         tp->ext_header_len = exthdrlen;
 646                         tp->af_specific = &ipv6_specific;
 647                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
 648                         goto failure;
 649                 } else {
 650                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
 651                                       inet->saddr);
 652                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
 653                                       inet->rcv_saddr);
 654                 }
 655
 656                 return err;
 657         }
 658
 659         if (!ipv6_addr_any(&np->rcv_saddr))
 660                 saddr = &np->rcv_saddr;
 661
 662         fl.proto = IPPROTO_TCP;
 663         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 664         ipv6_addr_copy(&fl.fl6_src,
 665                        (saddr ? saddr : &np->saddr));
 666         fl.oif = sk->sk_bound_dev_if;
 667         fl.fl_ip_dport = usin->sin6_port;
 668         fl.fl_ip_sport = inet->sport;
 669
 670         if (np->opt && np->opt->srcrt) {
 671                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
 672                 ipv6_addr_copy(&final, &fl.fl6_dst);
 673                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 674                 final_p = &final;
 675         }
 676
 677         err = ip6_dst_lookup(sk, &dst, &fl);
 678         if (err)
 679                 goto failure;
 680         if (final_p)
 681                 ipv6_addr_copy(&fl.fl6_dst, final_p);
 682
 683         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 684                 dst_release(dst);
 685                 goto failure;
 686         }
 687
 688         if (saddr == NULL) {
 689                 saddr = &fl.fl6_src;
 690                 ipv6_addr_copy(&np->rcv_saddr, saddr);
 691         }
 692
 693         /* set the source address */
 694         ipv6_addr_copy(&np->saddr, saddr);
 695         inet->rcv_saddr = LOOPBACK4_IPV6;
 696
 697         ip6_dst_store(sk, dst, NULL);
 698         sk->sk_route_caps = dst->dev->features &
 699                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 700
 701         tp->ext_header_len = 0;
 702         if (np->opt)
 703                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
 704         tp->ext2_header_len = dst->header_len;
 705
 706         tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 707
 708         inet->dport = usin->sin6_port;
 709
 710         tcp_set_state(sk, TCP_SYN_SENT);
 711         err = tcp_v6_hash_connect(sk);
 712         if (err)
 713                 goto late_failure;
 714
 715         if (!tp->write_seq)
 716                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 717                                                              np->daddr.s6_addr32,
 718                                                              inet->sport,
 719                                                              inet->dport);
 720
 721         err = tcp_connect(sk);
 722         if (err)
 723                 goto late_failure;
 724
 725         return 0;
 726
 727 late_failure:
 728         tcp_set_state(sk, TCP_CLOSE);
 729         __sk_dst_reset(sk);
 730 failure:
 731         inet->dport = 0;
 732         sk->sk_route_caps = 0;
 733         return err;
 734 }
 735
 736 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 737                 int type, int code, int offset, __u32 info)
 738 {
 739         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
 740         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 741         struct ipv6_pinfo *np;
 742         struct sock *sk;
 743         int err;
 744         struct tcp_opt *tp;
 745         __u32 seq;
 746
 747         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
 748
 749         if (sk == NULL) {
 750                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 751                 return;
 752         }
 753
 754         if (sk->sk_state == TCP_TIME_WAIT) {
 755                 tcp_tw_put((struct tcp_tw_bucket*)sk);
 756                 return;
 757         }
 758
 759         bh_lock_sock(sk);
 760         if (sock_owned_by_user(sk))
 761                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
 762
 763         if (sk->sk_state == TCP_CLOSE)
 764                 goto out;
 765
 766         tp = tcp_sk(sk);
 767         seq = ntohl(th->seq);
 768         if (sk->sk_state != TCP_LISTEN &&
 769             !between(seq, tp->snd_una, tp->snd_nxt)) {
 770                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 771                 goto out;
 772         }
 773
 774         np = inet6_sk(sk);
 775
 776         if (type == ICMPV6_PKT_TOOBIG) {
 777                 struct dst_entry *dst = NULL;
 778
 779                 if (sock_owned_by_user(sk))
 780                         goto out;
 781                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 782                         goto out;
 783
 784                 /* icmp should have updated the destination cache entry */
 785                 dst = __sk_dst_check(sk, np->dst_cookie);
 786
 787                 if (dst == NULL) {
 788                         struct inet_opt *inet = inet_sk(sk);
 789                         struct flowi fl;
 790
 791                         /* BUGGG_FUTURE: Again, it is not clear how
 792                            to handle rthdr case. Ignore this complexity
 793                            for now.
 794                          */
 795                         memset(&fl, 0, sizeof(fl));
 796                         fl.proto = IPPROTO_TCP;
 797                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 798                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 799                         fl.oif = sk->sk_bound_dev_if;
 800                         fl.fl_ip_dport = inet->dport;
 801                         fl.fl_ip_sport = inet->sport;
 802
 803                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 804                                 sk->sk_err_soft = -err;
 805                                 goto out;
 806                         }
 807
 808                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 809                                 sk->sk_err_soft = -err;
 810                                 goto out;
 811                         }
 812
 813                 } else
 814                         dst_hold(dst);
 815
 816                 if (tp->pmtu_cookie > dst_pmtu(dst)) {
 817                         tcp_sync_mss(sk, dst_pmtu(dst));
 818                         tcp_simple_retransmit(sk);
 819                 } /* else let the usual retransmit timer handle it */
 820                 dst_release(dst);
 821                 goto out;
 822         }
 823
 824         icmpv6_err_convert(type, code, &err);
 825
 826         /* Might be for an open_request */
 827         switch (sk->sk_state) {
 828                 struct open_request *req, **prev;
 829         case TCP_LISTEN:
 830                 if (sock_owned_by_user(sk))
 831                         goto out;
 832
 833                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
 834                                         &hdr->saddr, tcp_v6_iif(skb));
 835                 if (!req)
 836                         goto out;
 837
 838                 /* ICMPs are not backlogged, hence we cannot get
 839                  * an established socket here.
 840                  */
 841                 BUG_TRAP(req->sk == NULL);
 842
 843                 if (seq != req->snt_isn) {
 844                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 845                         goto out;
 846                 }
 847
 848                 tcp_synq_drop(sk, req, prev);
 849                 goto out;
 850
 851         case TCP_SYN_SENT:
 852         case TCP_SYN_RECV:  /* Cannot happen.
 853                                It can, it SYNs are crossed. --ANK */
 854                 if (!sock_owned_by_user(sk)) {
 855                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 856                         sk->sk_err = err;
 857                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
 858
 859                         tcp_done(sk);
 860                 } else
 861                         sk->sk_err_soft = err;
 862                 goto out;
 863         }
 864
 865         if (!sock_owned_by_user(sk) && np->recverr) {
 866                 sk->sk_err = err;
 867                 sk->sk_error_report(sk);
 868         } else
 869                 sk->sk_err_soft = err;
 870
 871 out:
 872         bh_unlock_sock(sk);
 873         sock_put(sk);
 874 }
 875
 876
 877 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
 878                               struct dst_entry *dst)
 879 {
 880         struct ipv6_pinfo *np = inet6_sk(sk);
 881         struct sk_buff * skb;
 882         struct ipv6_txoptions *opt = NULL;
 883         struct in6_addr * final_p = NULL, final;
 884         struct flowi fl;
 885         int err = -1;
 886
 887         memset(&fl, 0, sizeof(fl));
 888         fl.proto = IPPROTO_TCP;
 889         ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
 890         ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
 891         fl.fl6_flowlabel = 0;
 892         fl.oif = req->af.v6_req.iif;
 893         fl.fl_ip_dport = req->rmt_port;
 894         fl.fl_ip_sport = inet_sk(sk)->sport;
 895
 896         if (dst == NULL) {
 897                 opt = np->opt;
 898                 if (opt == NULL &&
 899                     np->rxopt.bits.srcrt == 2 &&
 900                     req->af.v6_req.pktopts) {
 901                         struct sk_buff *pktopts = req->af.v6_req.pktopts;
 902                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 903                         if (rxopt->srcrt)
 904                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
 905                 }
 906
 907                 if (opt && opt->srcrt) {
 908                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
 909                         ipv6_addr_copy(&final, &fl.fl6_dst);
 910                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 911                         final_p = &final;
 912                 }
 913
 914                 err = ip6_dst_lookup(sk, &dst, &fl);
 915                 if (err)
 916                         goto done;
 917                 if (final_p)
 918                         ipv6_addr_copy(&fl.fl6_dst, final_p);
 919                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
 920                         goto done;
 921         }
 922
 923         skb = tcp_make_synack(sk, dst, req);
 924         if (skb) {
 925                 struct tcphdr *th = skb->h.th;
 926
 927                 th->check = tcp_v6_check(th, skb->len,
 928                                          &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
 929                                          csum_partial((char *)th, skb->len, skb->csum));
 930
 931                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
 932                 err = ip6_xmit(sk, skb, &fl, opt, 0);
 933                 if (err == NET_XMIT_CN)
 934                         err = 0;
 935         }
 936
 937 done:
 938         dst_release(dst);
 939         if (opt && opt != np->opt)
 940                 sock_kfree_s(sk, opt, opt->tot_len);
 941         return err;
 942 }
 943
 944 static void tcp_v6_or_free(struct open_request *req)
 945 {
 946         if (req->af.v6_req.pktopts)
 947                 kfree_skb(req->af.v6_req.pktopts);
 948 }
 949
 950 static struct or_calltable or_ipv6 = {
 951         .family         =       AF_INET6,
 952         .rtx_syn_ack    =       tcp_v6_send_synack,
 953         .send_ack       =       tcp_v6_or_send_ack,
 954         .destructor     =       tcp_v6_or_free,
 955         .send_reset     =       tcp_v6_send_reset
 956 };
 957
 958 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
 959 {
 960         struct ipv6_pinfo *np = inet6_sk(sk);
 961         struct inet6_skb_parm *opt = IP6CB(skb);
 962
 963         if (np->rxopt.all) {
 964                 if ((opt->hop && np->rxopt.bits.hopopts) ||
 965                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
 966                      np->rxopt.bits.rxflow) ||
 967                     (opt->srcrt && np->rxopt.bits.srcrt) ||
 968                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
 969                         return 1;
 970         }
 971         return 0;
 972 }
 973
 974
 975 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
 976                               struct sk_buff *skb)
 977 {
 978         struct ipv6_pinfo *np = inet6_sk(sk);
 979
 980         if (skb->ip_summed == CHECKSUM_HW) {
 981                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
 982                 skb->csum = offsetof(struct tcphdr, check);
 983         } else {
 984                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
 985                                             csum_partial((char *)th, th->doff<<2,
 986                                                          skb->csum));
 987         }
 988 }
 989
 990
 991 static void tcp_v6_send_reset(struct sk_buff *skb)
 992 {
 993         struct tcphdr *th = skb->h.th, *t1;
 994         struct sk_buff *buff;
 995         struct flowi fl;
 996
 997         if (th->rst)
 998                 return;
 999
1000         if (!ipv6_unicast_destination(skb))
1001                 return;
1002
1003         /*
1004          * We need to grab some memory, and put together an RST,
1005          * and then put it into the queue to be sent.
1006          */
1007
1008         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1009                          GFP_ATOMIC);
1010         if (buff == NULL)
1011                 return;
1012
1013         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1014
1015         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1016
1017         /* Swap the send and the receive. */
1018         memset(t1, 0, sizeof(*t1));
1019         t1->dest = th->source;
1020         t1->source = th->dest;
1021         t1->doff = sizeof(*t1)/4;
1022         t1->rst = 1;
1023
1024         if(th->ack) {
1025                 t1->seq = th->ack_seq;
1026         } else {
1027                 t1->ack = 1;
1028                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1029                                     + skb->len - (th->doff<<2));
1030         }
1031
1032         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1033
1034         memset(&fl, 0, sizeof(fl));
1035         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1036         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1037
1038         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1039                                     sizeof(*t1), IPPROTO_TCP,
1040                                     buff->csum);
1041
1042         fl.proto = IPPROTO_TCP;
1043         fl.oif = tcp_v6_iif(skb);
1044         fl.fl_ip_dport = t1->dest;
1045         fl.fl_ip_sport = t1->source;
1046
1047         /* sk = NULL, but it is safe for now. RST socket required. */
1048         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1049
1050                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1051                         dst_release(buff->dst);
1052                         return;
1053                 }
1054
1055                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1056                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1057                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1058                 return;
1059         }
1060
1061         kfree_skb(buff);
1062 }
1063
1064 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1065 {
1066         struct tcphdr *th = skb->h.th, *t1;
1067         struct sk_buff *buff;
1068         struct flowi fl;
1069         int tot_len = sizeof(struct tcphdr);
1070
1071         if (ts)
1072                 tot_len += 3*4;
1073
1074         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1075                          GFP_ATOMIC);
1076         if (buff == NULL)
1077                 return;
1078
1079         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1080
1081         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1082
1083         /* Swap the send and the receive. */
1084         memset(t1, 0, sizeof(*t1));
1085         t1->dest = th->source;
1086         t1->source = th->dest;
1087         t1->doff = tot_len/4;
1088         t1->seq = htonl(seq);
1089         t1->ack_seq = htonl(ack);
1090         t1->ack = 1;
1091         t1->window = htons(win);
1092
1093         if (ts) {
1094                 u32 *ptr = (u32*)(t1 + 1);
1095                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1096                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1097                 *ptr++ = htonl(tcp_time_stamp);
1098                 *ptr = htonl(ts);
1099         }
1100
1101         buff->csum = csum_partial((char *)t1, tot_len, 0);
1102
1103         memset(&fl, 0, sizeof(fl));
1104         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1105         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1106
1107         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1108                                     tot_len, IPPROTO_TCP,
1109                                     buff->csum);
1110
1111         fl.proto = IPPROTO_TCP;
1112         fl.oif = tcp_v6_iif(skb);
1113         fl.fl_ip_dport = t1->dest;
1114         fl.fl_ip_sport = t1->source;
1115
1116         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1117                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1118                         dst_release(buff->dst);
1119                         return;
1120                 }
1121                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1122                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1123                 return;
1124         }
1125
1126         kfree_skb(buff);
1127 }
1128
1129 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1130 {
1131         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1132
1133         tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1134                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1135
1136         tcp_tw_put(tw);
1137 }
1138
1139 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1140 {
1141         tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1142 }
1143
1144
1145 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1146 {
1147         struct open_request *req, **prev;
1148         struct tcphdr *th = skb->h.th;
1149         struct tcp_opt *tp = tcp_sk(sk);
1150         struct sock *nsk;
1151
1152         /* Find possible connection requests. */
1153         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1154                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1155         if (req)
1156                 return tcp_check_req(sk, skb, req, prev);
1157
1158         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1159                                           th->source,
1160                                           &skb->nh.ipv6h->daddr,
1161                                           ntohs(th->dest),
1162                                           tcp_v6_iif(skb));
1163
1164         if (nsk) {
1165                 if (nsk->sk_state != TCP_TIME_WAIT) {
1166                         bh_lock_sock(nsk);
1167                         return nsk;
1168                 }
1169                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1170                 return NULL;
1171         }
1172
1173 #if 0 /*def CONFIG_SYN_COOKIES*/
1174         if (!th->rst && !th->syn && th->ack)
1175                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1176 #endif
1177         return sk;
1178 }
1179
1180 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1181 {
1182         struct tcp_opt *tp = tcp_sk(sk);
1183         struct tcp_listen_opt *lopt = tp->listen_opt;
1184         u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1185
1186         req->sk = NULL;
1187         req->expires = jiffies + TCP_TIMEOUT_INIT;
1188         req->retrans = 0;
1189         req->dl_next = lopt->syn_table[h];
1190
1191         write_lock(&tp->syn_wait_lock);
1192         lopt->syn_table[h] = req;
1193         write_unlock(&tp->syn_wait_lock);
1194
1195 #ifdef CONFIG_ACCEPT_QUEUES
1196         tcp_synq_added(sk, req);
1197 #else
1198         tcp_synq_added(sk);
1199 #endif
1200 }
1201
1202
1203 /* FIXME: this is substantially similar to the ipv4 code.
1204  * Can some kind of merge be done? -- erics
1205  */
1206 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1207 {
1208         struct ipv6_pinfo *np = inet6_sk(sk);
1209         struct tcp_opt tmptp, *tp = tcp_sk(sk);
1210         struct open_request *req = NULL;
1211         __u32 isn = TCP_SKB_CB(skb)->when;
1212 #ifdef CONFIG_ACCEPT_QUEUES
1213         int class = 0;
1214 #endif
1215
1216         if (skb->protocol == htons(ETH_P_IP))
1217                 return tcp_v4_conn_request(sk, skb);
1218
1219         if (!ipv6_unicast_destination(skb))
1220                 goto drop;
1221
1222
1223         /*
1224          *      There are no SYN attacks on IPv6, yet...
1225          */
1226         if (tcp_synq_is_full(sk) && !isn) {
1227                 if (net_ratelimit())
1228                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1229                 goto drop;
1230         }
1231
1232 #ifdef CONFIG_ACCEPT_QUEUES
1233         class = (skb->nfmark <= 0) ? 0 :
1234                         ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
1235         /*
1236          * Accept only if the class has shares set or if the default class
1237          * i.e. class 0 has shares
1238          */
1239         if (!(tcp_sk(sk)->acceptq[class].aq_ratio)) {
1240                 if (tcp_sk(sk)->acceptq[0].aq_ratio)
1241                         class = 0;
1242                 else
1243                         goto drop;
1244         }
1245
1246         if (sk_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
1247 #else
1248         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1249 #endif
1250                 goto drop;
1251
1252
1253         req = tcp_openreq_alloc();
1254         if (req == NULL)
1255                 goto drop;
1256
1257         tcp_clear_options(&tmptp);
1258         tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1259         tmptp.user_mss = tp->user_mss;
1260
1261         tcp_parse_options(skb, &tmptp, 0);
1262
1263         tmptp.tstamp_ok = tmptp.saw_tstamp;
1264         tcp_openreq_init(req, &tmptp, skb);
1265 #ifdef CONFIG_ACCEPT_QUEUES
1266         req->acceptq_class = class;
1267         req->acceptq_time_stamp = jiffies;
1268 #endif
1269         req->class = &or_ipv6;
1270         ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1271         ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1272         TCP_ECN_create_request(req, skb->h.th);
1273         req->af.v6_req.pktopts = NULL;
1274         if (ipv6_opt_accepted(sk, skb) ||
1275             np->rxopt.bits.rxinfo ||
1276             np->rxopt.bits.rxhlim) {
1277                 atomic_inc(&skb->users);
1278                 req->af.v6_req.pktopts = skb;
1279         }
1280         req->af.v6_req.iif = sk->sk_bound_dev_if;
1281
1282         /* So that link locals have meaning */
1283         if (!sk->sk_bound_dev_if &&
1284             ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1285                 req->af.v6_req.iif = tcp_v6_iif(skb);
1286
1287         if (isn == 0)
1288                 isn = tcp_v6_init_sequence(sk,skb);
1289
1290         req->snt_isn = isn;
1291
1292         if (tcp_v6_send_synack(sk, req, NULL))
1293                 goto drop;
1294
1295         tcp_v6_synq_add(sk, req);
1296
1297         return 0;
1298
1299 drop:
1300         if (req)
1301                 tcp_openreq_free(req);
1302
1303         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1304         return 0; /* don't send reset */
1305 }
1306
1307 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1308                                           struct open_request *req,
1309                                           struct dst_entry *dst)
1310 {
1311         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1312         struct tcp6_sock *newtcp6sk;
1313         struct inet_opt *newinet;
1314         struct tcp_opt *newtp;
1315         struct sock *newsk;
1316         struct ipv6_txoptions *opt;
1317
1318         if (skb->protocol == htons(ETH_P_IP)) {
1319                 /*
1320                  *      v6 mapped
1321                  */
1322
1323                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1324
1325                 if (newsk == NULL)
1326                         return NULL;
1327
1328                 newtcp6sk = (struct tcp6_sock *)newsk;
1329                 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1330
1331                 newinet = inet_sk(newsk);
1332                 newnp = inet6_sk(newsk);
1333                 newtp = tcp_sk(newsk);
1334
1335                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1336
1337                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1338                               newinet->daddr);
1339
1340                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1341                               newinet->saddr);
1342
1343                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1344
1345                 newtp->af_specific = &ipv6_mapped;
1346                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1347                 newnp->pktoptions  = NULL;
1348                 newnp->opt         = NULL;
1349                 newnp->mcast_oif   = tcp_v6_iif(skb);
1350                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1351
1352                 /* Charge newly allocated IPv6 socket. Though it is mapped,
1353                  * it is IPv6 yet.
1354                  */
1355 #ifdef INET_REFCNT_DEBUG
1356                 atomic_inc(&inet6_sock_nr);
1357 #endif
1358
1359                 /* It is tricky place. Until this moment IPv4 tcp
1360                    worked with IPv6 af_tcp.af_specific.
1361                    Sync it now.
1362                  */
1363                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1364
1365                 return newsk;
1366         }
1367
1368         opt = np->opt;
1369
1370 #ifdef CONFIG_ACCEPT_QUEUES
1371         if (sk_acceptq_is_full(sk, req->acceptq_class))
1372 #else
1373         if (sk_acceptq_is_full(sk))
1374 #endif
1375                 goto out_overflow;
1376
1377         if (np->rxopt.bits.srcrt == 2 &&
1378             opt == NULL && req->af.v6_req.pktopts) {
1379                 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1380                 if (rxopt->srcrt)
1381                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1382         }
1383
1384         if (dst == NULL) {
1385                 struct in6_addr *final_p = NULL, final;
1386                 struct flowi fl;
1387
1388                 memset(&fl, 0, sizeof(fl));
1389                 fl.proto = IPPROTO_TCP;
1390                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1391                 if (opt && opt->srcrt) {
1392                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1393                         ipv6_addr_copy(&final, &fl.fl6_dst);
1394                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1395                         final_p = &final;
1396                 }
1397                 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1398                 fl.oif = sk->sk_bound_dev_if;
1399                 fl.fl_ip_dport = req->rmt_port;
1400                 fl.fl_ip_sport = inet_sk(sk)->sport;
1401
1402                 if (ip6_dst_lookup(sk, &dst, &fl))
1403                         goto out;
1404
1405                 if (final_p)
1406                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1407
1408                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1409                         goto out;
1410         }
1411
1412         newsk = tcp_create_openreq_child(sk, req, skb);
1413         if (newsk == NULL)
1414                 goto out;
1415
1416         /* Charge newly allocated IPv6 socket */
1417 #ifdef INET_REFCNT_DEBUG
1418         atomic_inc(&inet6_sock_nr);
1419 #endif
1420
1421         ip6_dst_store(newsk, dst, NULL);
1422         newsk->sk_route_caps = dst->dev->features &
1423                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1424
1425         newtcp6sk = (struct tcp6_sock *)newsk;
1426         newtcp6sk->pinet6 = &newtcp6sk->inet6;
1427
1428         newtp = tcp_sk(newsk);
1429         newinet = inet_sk(newsk);
1430         newnp = inet6_sk(newsk);
1431
1432         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1433
1434         ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1435         ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1436         ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1437         newsk->sk_bound_dev_if = req->af.v6_req.iif;
1438
1439         /* Now IPv6 options...
1440
1441            First: no IPv4 options.
1442          */
1443         newinet->opt = NULL;
1444
1445         /* Clone RX bits */
1446         newnp->rxopt.all = np->rxopt.all;
1447
1448         /* Clone pktoptions received with SYN */
1449         newnp->pktoptions = NULL;
1450         if (req->af.v6_req.pktopts) {
1451                 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1452                                               GFP_ATOMIC);
1453                 kfree_skb(req->af.v6_req.pktopts);
1454                 req->af.v6_req.pktopts = NULL;
1455                 if (newnp->pktoptions)
1456                         skb_set_owner_r(newnp->pktoptions, newsk);
1457         }
1458         newnp->opt        = NULL;
1459         newnp->mcast_oif  = tcp_v6_iif(skb);
1460         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1461
1462         /* Clone native IPv6 options from listening socket (if any)
1463
1464            Yes, keeping reference count would be much more clever,
1465            but we make one more one thing there: reattach optmem
1466            to newsk.
1467          */
1468         if (opt) {
1469                 newnp->opt = ipv6_dup_options(newsk, opt);
1470                 if (opt != np->opt)
1471                         sock_kfree_s(sk, opt, opt->tot_len);
1472         }
1473
1474         newtp->ext_header_len = 0;
1475         if (newnp->opt)
1476                 newtp->ext_header_len = newnp->opt->opt_nflen +
1477                                         newnp->opt->opt_flen;
1478         newtp->ext2_header_len = dst->header_len;
1479
1480         tcp_sync_mss(newsk, dst_pmtu(dst));
1481         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1482         tcp_initialize_rcv_mss(newsk);
1483
1484         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1485
1486         __tcp_v6_hash(newsk);
1487         tcp_inherit_port(sk, newsk);
1488
1489         return newsk;
1490
1491 out_overflow:
1492         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1493 out:
1494         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1495         if (opt && opt != np->opt)
1496                 sock_kfree_s(sk, opt, opt->tot_len);
1497         dst_release(dst);
1498         return NULL;
1499 }
1500
1501 static int tcp_v6_checksum_init(struct sk_buff *skb)
1502 {
1503         if (skb->ip_summed == CHECKSUM_HW) {
1504                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1505                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1506                                   &skb->nh.ipv6h->daddr,skb->csum))
1507                         return 0;
1508                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1509         }
1510         if (skb->len <= 76) {
1511                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1512                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1513                         return -1;
1514                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1515         } else {
1516                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1517                                           &skb->nh.ipv6h->daddr,0);
1518         }
1519         return 0;
1520 }
1521
1522 /* The socket must have it's spinlock held when we get
1523  * here.
1524  *
1525  * We have a potential double-lock case here, so even when
1526  * doing backlog processing we use the BH locking scheme.
1527  * This is because we cannot sleep with the original spinlock
1528  * held.
1529  */
1530 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1531 {
1532         struct ipv6_pinfo *np = inet6_sk(sk);
1533         struct tcp_opt *tp;
1534         struct sk_buff *opt_skb = NULL;
1535
1536         /* Imagine: socket is IPv6. IPv4 packet arrives,
1537            goes to IPv4 receive handler and backlogged.
1538            From backlog it always goes here. Kerboom...
1539            Fortunately, tcp_rcv_established and rcv_established
1540            handle them correctly, but it is not case with
1541            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1542          */
1543
1544         if (skb->protocol == htons(ETH_P_IP))
1545                 return tcp_v4_do_rcv(sk, skb);
1546
1547         if (sk_filter(sk, skb, 0))
1548                 goto discard;
1549
1550         /*
1551          *      socket locking is here for SMP purposes as backlog rcv
1552          *      is currently called with bh processing disabled.
1553          */
1554
1555         /* Do Stevens' IPV6_PKTOPTIONS.
1556
1557            Yes, guys, it is the only place in our code, where we
1558            may make it not affecting IPv4.
1559            The rest of code is protocol independent,
1560            and I do not like idea to uglify IPv4.
1561
1562            Actually, all the idea behind IPV6_PKTOPTIONS
1563            looks not very well thought. For now we latch
1564            options, received in the last packet, enqueued
1565            by tcp. Feel free to propose better solution.
1566                                                --ANK (980728)
1567          */
1568         if (np->rxopt.all)
1569                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1570
1571         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1572                 TCP_CHECK_TIMER(sk);
1573                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1574                         goto reset;
1575                 TCP_CHECK_TIMER(sk);
1576                 if (opt_skb)
1577                         goto ipv6_pktoptions;
1578                 return 0;
1579         }
1580
1581         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1582                 goto csum_err;
1583
1584         if (sk->sk_state == TCP_LISTEN) {
1585                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1586                 if (!nsk)
1587                         goto discard;
1588
1589                 /*
1590                  * Queue it on the new socket if the new socket is active,
1591                  * otherwise we just shortcircuit this and continue with
1592                  * the new socket..
1593                  */
1594                 if(nsk != sk) {
1595                         if (tcp_child_process(sk, nsk, skb))
1596                                 goto reset;
1597                         if (opt_skb)
1598                                 __kfree_skb(opt_skb);
1599                         return 0;
1600                 }
1601         }
1602
1603         TCP_CHECK_TIMER(sk);
1604         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1605                 goto reset;
1606         TCP_CHECK_TIMER(sk);
1607         if (opt_skb)
1608                 goto ipv6_pktoptions;
1609         return 0;
1610
1611 reset:
1612         tcp_v6_send_reset(skb);
1613 discard:
1614         if (opt_skb)
1615                 __kfree_skb(opt_skb);
1616         kfree_skb(skb);
1617         return 0;
1618 csum_err:
1619         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1620         goto discard;
1621
1622
1623 ipv6_pktoptions:
1624         /* Do you ask, what is it?
1625
1626            1. skb was enqueued by tcp.
1627            2. skb is added to tail of read queue, rather than out of order.
1628            3. socket is not in passive state.
1629            4. Finally, it really contains options, which user wants to receive.
1630          */
1631         tp = tcp_sk(sk);
1632         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1633             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1634                 if (np->rxopt.bits.rxinfo)
1635                         np->mcast_oif = tcp_v6_iif(opt_skb);
1636                 if (np->rxopt.bits.rxhlim)
1637                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1638                 if (ipv6_opt_accepted(sk, opt_skb)) {
1639                         skb_set_owner_r(opt_skb, sk);
1640                         opt_skb = xchg(&np->pktoptions, opt_skb);
1641                 } else {
1642                         __kfree_skb(opt_skb);
1643                         opt_skb = xchg(&np->pktoptions, NULL);
1644                 }
1645         }
1646
1647         if (opt_skb)
1648                 kfree_skb(opt_skb);
1649         return 0;
1650 }
1651
1652 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1653 {
1654         struct sk_buff *skb = *pskb;
1655         struct tcphdr *th;
1656         struct sock *sk;
1657         int ret;
1658
1659         if (skb->pkt_type != PACKET_HOST)
1660                 goto discard_it;
1661
1662         /*
1663          *      Count it even if it's bad.
1664          */
1665         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1666
1667         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1668                 goto discard_it;
1669
1670         th = skb->h.th;
1671
1672         if (th->doff < sizeof(struct tcphdr)/4)
1673                 goto bad_packet;
1674         if (!pskb_may_pull(skb, th->doff*4))
1675                 goto discard_it;
1676
1677         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1678              tcp_v6_checksum_init(skb) < 0))
1679                 goto bad_packet;
1680
1681         th = skb->h.th;
1682         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1683         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1684                                     skb->len - th->doff*4);
1685         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1686         TCP_SKB_CB(skb)->when = 0;
1687         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1688         TCP_SKB_CB(skb)->sacked = 0;
1689
1690         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1691                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1692
1693         if (!sk)
1694                 goto no_tcp_socket;
1695
1696 process:
1697         if (sk->sk_state == TCP_TIME_WAIT)
1698                 goto do_time_wait;
1699
1700         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1701                 goto discard_and_relse;
1702
1703         if (sk_filter(sk, skb, 0))
1704                 goto discard_and_relse;
1705
1706         skb->dev = NULL;
1707
1708         bh_lock_sock(sk);
1709         ret = 0;
1710         if (!sock_owned_by_user(sk)) {
1711                 if (!tcp_prequeue(sk, skb))
1712                         ret = tcp_v6_do_rcv(sk, skb);
1713         } else
1714                 sk_add_backlog(sk, skb);
1715         bh_unlock_sock(sk);
1716
1717         sock_put(sk);
1718         return ret ? -1 : 0;
1719
1720 no_tcp_socket:
1721         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1722                 goto discard_it;
1723
1724         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1725 bad_packet:
1726                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1727         } else {
1728                 tcp_v6_send_reset(skb);
1729         }
1730
1731 discard_it:
1732
1733         /*
1734          *      Discard frame
1735          */
1736
1737         kfree_skb(skb);
1738         return 0;
1739
1740 discard_and_relse:
1741         sock_put(sk);
1742         goto discard_it;
1743
1744 do_time_wait:
1745         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1746                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1747                 goto discard_it;
1748         }
1749
1750         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1751                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1752                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1753                 goto discard_it;
1754         }
1755
1756         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1757                                           skb, th, skb->len)) {
1758         case TCP_TW_SYN:
1759         {
1760                 struct sock *sk2;
1761
1762                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1763                 if (sk2 != NULL) {
1764                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1765                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1766                         sk = sk2;
1767                         goto process;
1768                 }
1769                 /* Fall through to ACK */
1770         }
1771         case TCP_TW_ACK:
1772                 tcp_v6_timewait_ack(sk, skb);
1773                 break;
1774         case TCP_TW_RST:
1775                 goto no_tcp_socket;
1776         case TCP_TW_SUCCESS:;
1777         }
1778         goto discard_it;
1779 }
1780
1781 static int tcp_v6_rebuild_header(struct sock *sk)
1782 {
1783         int err;
1784         struct dst_entry *dst;
1785         struct ipv6_pinfo *np = inet6_sk(sk);
1786
1787         dst = __sk_dst_check(sk, np->dst_cookie);
1788
1789         if (dst == NULL) {
1790                 struct inet_opt *inet = inet_sk(sk);
1791                 struct in6_addr *final_p = NULL, final;
1792                 struct flowi fl;
1793
1794                 memset(&fl, 0, sizeof(fl));
1795                 fl.proto = IPPROTO_TCP;
1796                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1797                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1798                 fl.fl6_flowlabel = np->flow_label;
1799                 fl.oif = sk->sk_bound_dev_if;
1800                 fl.fl_ip_dport = inet->dport;
1801                 fl.fl_ip_sport = inet->sport;
1802
1803                 if (np->opt && np->opt->srcrt) {
1804                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1805                         ipv6_addr_copy(&final, &fl.fl6_dst);
1806                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1807                         final_p = &final;
1808                 }
1809
1810                 err = ip6_dst_lookup(sk, &dst, &fl);
1811                 if (err) {
1812                         sk->sk_route_caps = 0;
1813                         return err;
1814                 }
1815                 if (final_p)
1816                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1817
1818                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1819                         sk->sk_err_soft = -err;
1820                         dst_release(dst);
1821                         return err;
1822                 }
1823
1824                 ip6_dst_store(sk, dst, NULL);
1825                 sk->sk_route_caps = dst->dev->features &
1826                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1827                 tcp_sk(sk)->ext2_header_len = dst->header_len;
1828         }
1829
1830         return 0;
1831 }
1832
1833 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1834 {
1835         struct sock *sk = skb->sk;
1836         struct inet_opt *inet = inet_sk(sk);
1837         struct ipv6_pinfo *np = inet6_sk(sk);
1838         struct flowi fl;
1839         struct dst_entry *dst;
1840         struct in6_addr *final_p = NULL, final;
1841
1842         memset(&fl, 0, sizeof(fl));
1843         fl.proto = IPPROTO_TCP;
1844         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1845         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1846         fl.fl6_flowlabel = np->flow_label;
1847         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1848         fl.oif = sk->sk_bound_dev_if;
1849         fl.fl_ip_sport = inet->sport;
1850         fl.fl_ip_dport = inet->dport;
1851
1852         if (np->opt && np->opt->srcrt) {
1853                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1854                 ipv6_addr_copy(&final, &fl.fl6_dst);
1855                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1856                 final_p = &final;
1857         }
1858
1859         dst = __sk_dst_check(sk, np->dst_cookie);
1860
1861         if (dst == NULL) {
1862                 int err = ip6_dst_lookup(sk, &dst, &fl);
1863
1864                 if (err) {
1865                         sk->sk_err_soft = -err;
1866                         return err;
1867                 }
1868
1869                 if (final_p)
1870                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1871
1872                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1873                         sk->sk_route_caps = 0;
1874                         dst_release(dst);
1875                         return err;
1876                 }
1877
1878                 ip6_dst_store(sk, dst, NULL);
1879                 sk->sk_route_caps = dst->dev->features &
1880                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1881                 tcp_sk(sk)->ext2_header_len = dst->header_len;
1882         }
1883
1884         skb->dst = dst_clone(dst);
1885
1886         /* Restore final destination back after routing done */
1887         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1888
1889         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1890 }
1891
1892 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1893 {
1894         struct ipv6_pinfo *np = inet6_sk(sk);
1895         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1896
1897         sin6->sin6_family = AF_INET6;
1898         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1899         sin6->sin6_port = inet_sk(sk)->dport;
1900         /* We do not store received flowlabel for TCP */
1901         sin6->sin6_flowinfo = 0;
1902         sin6->sin6_scope_id = 0;
1903         if (sk->sk_bound_dev_if &&
1904             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1905                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1906 }
1907
1908 static int tcp_v6_remember_stamp(struct sock *sk)
1909 {
1910         /* Alas, not yet... */
1911         return 0;
1912 }
1913
1914 static struct tcp_func ipv6_specific = {
1915         .queue_xmit     =       tcp_v6_xmit,
1916         .send_check     =       tcp_v6_send_check,
1917         .rebuild_header =       tcp_v6_rebuild_header,
1918         .conn_request   =       tcp_v6_conn_request,
1919         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1920         .remember_stamp =       tcp_v6_remember_stamp,
1921         .net_header_len =       sizeof(struct ipv6hdr),
1922
1923         .setsockopt     =       ipv6_setsockopt,
1924         .getsockopt     =       ipv6_getsockopt,
1925         .addr2sockaddr  =       v6_addr2sockaddr,
1926         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1927 };
1928
1929 /*
1930  *      TCP over IPv4 via INET6 API
1931  */
1932
1933 static struct tcp_func ipv6_mapped = {
1934         .queue_xmit     =       ip_queue_xmit,
1935         .send_check     =       tcp_v4_send_check,
1936         .rebuild_header =       tcp_v4_rebuild_header,
1937         .conn_request   =       tcp_v6_conn_request,
1938         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1939         .remember_stamp =       tcp_v4_remember_stamp,
1940         .net_header_len =       sizeof(struct iphdr),
1941
1942         .setsockopt     =       ipv6_setsockopt,
1943         .getsockopt     =       ipv6_getsockopt,
1944         .addr2sockaddr  =       v6_addr2sockaddr,
1945         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1946 };
1947
1948
1949
1950 /* NOTE: A lot of things set to zero explicitly by call to
1951  *       sk_alloc() so need not be done here.
1952  */
1953 static int tcp_v6_init_sock(struct sock *sk)
1954 {
1955         struct tcp_opt *tp = tcp_sk(sk);
1956
1957         skb_queue_head_init(&tp->out_of_order_queue);
1958         tcp_init_xmit_timers(sk);
1959         tcp_prequeue_init(tp);
1960
1961         tp->rto  = TCP_TIMEOUT_INIT;
1962         tp->mdev = TCP_TIMEOUT_INIT;
1963
1964         /* So many TCP implementations out there (incorrectly) count the
1965          * initial SYN frame in their delayed-ACK and congestion control
1966          * algorithms that we must have the following bandaid to talk
1967          * efficiently to them.  -DaveM
1968          */
1969         tp->snd_cwnd = 2;
1970
1971         /* See draft-stevens-tcpca-spec-01 for discussion of the
1972          * initialization of these values.
1973          */
1974         tp->snd_ssthresh = 0x7fffffff;
1975         tp->snd_cwnd_clamp = ~0;
1976         tp->mss_cache_std = tp->mss_cache = 536;
1977
1978         tp->reordering = sysctl_tcp_reordering;
1979
1980         sk->sk_state = TCP_CLOSE;
1981
1982         tp->af_specific = &ipv6_specific;
1983
1984         sk->sk_write_space = sk_stream_write_space;
1985         sk->sk_use_write_queue = 1;
1986
1987         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1988         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1989
1990         atomic_inc(&tcp_sockets_allocated);
1991
1992         return 0;
1993 }
1994
1995 static int tcp_v6_destroy_sock(struct sock *sk)
1996 {
1997         extern int tcp_v4_destroy_sock(struct sock *sk);
1998
1999         tcp_v4_destroy_sock(sk);
2000         return inet6_destroy_sock(sk);
2001 }
2002
2003 /* Proc filesystem TCPv6 sock list dumping. */
2004 static void get_openreq6(struct seq_file *seq,
2005                          struct sock *sk, struct open_request *req, int i, int uid)
2006 {
2007         struct in6_addr *dest, *src;
2008         int ttd = req->expires - jiffies;
2009
2010         if (ttd < 0)
2011                 ttd = 0;
2012
2013         src = &req->af.v6_req.loc_addr;
2014         dest = &req->af.v6_req.rmt_addr;
2015         seq_printf(seq,
2016                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2017                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2018                    i,
2019                    src->s6_addr32[0], src->s6_addr32[1],
2020                    src->s6_addr32[2], src->s6_addr32[3],
2021                    ntohs(inet_sk(sk)->sport),
2022                    dest->s6_addr32[0], dest->s6_addr32[1],
2023                    dest->s6_addr32[2], dest->s6_addr32[3],
2024                    ntohs(req->rmt_port),
2025                    TCP_SYN_RECV,
2026                    0,0, /* could print option size, but that is af dependent. */
2027                    1,   /* timers active (only the expire timer) */
2028                    jiffies_to_clock_t(ttd),
2029                    req->retrans,
2030                    uid,
2031                    0,  /* non standard timer */
2032                    0, /* open_requests have no inode */
2033                    0, req);
2034 }
2035
2036 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2037 {
2038         struct in6_addr *dest, *src;
2039         __u16 destp, srcp;
2040         int timer_active;
2041         unsigned long timer_expires;
2042         struct inet_opt *inet = inet_sk(sp);
2043         struct tcp_opt *tp = tcp_sk(sp);
2044         struct ipv6_pinfo *np = inet6_sk(sp);
2045
2046         dest  = &np->daddr;
2047         src   = &np->rcv_saddr;
2048         destp = ntohs(inet->dport);
2049         srcp  = ntohs(inet->sport);
2050         if (tp->pending == TCP_TIME_RETRANS) {
2051                 timer_active    = 1;
2052                 timer_expires   = tp->timeout;
2053         } else if (tp->pending == TCP_TIME_PROBE0) {
2054                 timer_active    = 4;
2055                 timer_expires   = tp->timeout;
2056         } else if (timer_pending(&sp->sk_timer)) {
2057                 timer_active    = 2;
2058                 timer_expires   = sp->sk_timer.expires;
2059         } else {
2060                 timer_active    = 0;
2061                 timer_expires = jiffies;
2062         }
2063
2064         seq_printf(seq,
2065                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2066                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2067                    i,
2068                    src->s6_addr32[0], src->s6_addr32[1],
2069                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2070                    dest->s6_addr32[0], dest->s6_addr32[1],
2071                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2072                    sp->sk_state,
2073                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2074                    timer_active,
2075                    jiffies_to_clock_t(timer_expires - jiffies),
2076                    tp->retransmits,
2077                    sock_i_uid(sp),
2078                    tp->probes_out,
2079                    sock_i_ino(sp),
2080                    atomic_read(&sp->sk_refcnt), sp,
2081                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2082                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2083                    );
2084 }
2085
2086 static void get_timewait6_sock(struct seq_file *seq,
2087                                struct tcp_tw_bucket *tw, int i)
2088 {
2089         struct in6_addr *dest, *src;
2090         __u16 destp, srcp;
2091         int ttd = tw->tw_ttd - jiffies;
2092
2093         if (ttd < 0)
2094                 ttd = 0;
2095
2096         dest  = &tw->tw_v6_daddr;
2097         src   = &tw->tw_v6_rcv_saddr;
2098         destp = ntohs(tw->tw_dport);
2099         srcp  = ntohs(tw->tw_sport);
2100
2101         seq_printf(seq,
2102                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2103                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2104                    i,
2105                    src->s6_addr32[0], src->s6_addr32[1],
2106                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2107                    dest->s6_addr32[0], dest->s6_addr32[1],
2108                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2109                    tw->tw_substate, 0, 0,
2110                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2111                    atomic_read(&tw->tw_refcnt), tw);
2112 }
2113
2114 #ifdef CONFIG_PROC_FS
2115 static int tcp6_seq_show(struct seq_file *seq, void *v)
2116 {
2117         struct tcp_iter_state *st;
2118
2119         if (v == SEQ_START_TOKEN) {
2120                 seq_puts(seq,
2121                          "  sl  "
2122                          "local_address                         "
2123                          "remote_address                        "
2124                          "st tx_queue rx_queue tr tm->when retrnsmt"
2125                          "   uid  timeout inode\n");
2126                 goto out;
2127         }
2128         st = seq->private;
2129
2130         switch (st->state) {
2131         case TCP_SEQ_STATE_LISTENING:
2132         case TCP_SEQ_STATE_ESTABLISHED:
2133                 get_tcp6_sock(seq, v, st->num);
2134                 break;
2135         case TCP_SEQ_STATE_OPENREQ:
2136                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2137                 break;
2138         case TCP_SEQ_STATE_TIME_WAIT:
2139                 get_timewait6_sock(seq, v, st->num);
2140                 break;
2141         }
2142 out:
2143         return 0;
2144 }
2145
2146 static struct file_operations tcp6_seq_fops;
2147 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2148         .owner          = THIS_MODULE,
2149         .name           = "tcp6",
2150         .family         = AF_INET6,
2151         .seq_show       = tcp6_seq_show,
2152         .seq_fops       = &tcp6_seq_fops,
2153 };
2154
2155 int __init tcp6_proc_init(void)
2156 {
2157         return tcp_proc_register(&tcp6_seq_afinfo);
2158 }
2159
2160 void tcp6_proc_exit(void)
2161 {
2162         tcp_proc_unregister(&tcp6_seq_afinfo);
2163 }
2164 #endif
2165
2166 struct proto tcpv6_prot = {
2167         .name                   = "TCPv6",
2168         .owner                  = THIS_MODULE,
2169         .close                  = tcp_close,
2170         .connect                = tcp_v6_connect,
2171         .disconnect             = tcp_disconnect,
2172         .accept                 = tcp_accept,
2173         .ioctl                  = tcp_ioctl,
2174         .init                   = tcp_v6_init_sock,
2175         .destroy                = tcp_v6_destroy_sock,
2176         .shutdown               = tcp_shutdown,
2177         .setsockopt             = tcp_setsockopt,
2178         .getsockopt             = tcp_getsockopt,
2179         .sendmsg                = tcp_sendmsg,
2180         .recvmsg                = tcp_recvmsg,
2181         .backlog_rcv            = tcp_v6_do_rcv,
2182         .hash                   = tcp_v6_hash,
2183         .unhash                 = tcp_unhash,
2184         .get_port               = tcp_v6_get_port,
2185         .enter_memory_pressure  = tcp_enter_memory_pressure,
2186         .sockets_allocated      = &tcp_sockets_allocated,
2187         .memory_allocated       = &tcp_memory_allocated,
2188         .memory_pressure        = &tcp_memory_pressure,
2189         .sysctl_mem             = sysctl_tcp_mem,
2190         .sysctl_wmem            = sysctl_tcp_wmem,
2191         .sysctl_rmem            = sysctl_tcp_rmem,
2192         .max_header             = MAX_TCP_HEADER,
2193         .slab_obj_size          = sizeof(struct tcp6_sock),
2194 };
2195
2196 static struct inet6_protocol tcpv6_protocol = {
2197         .handler        =       tcp_v6_rcv,
2198         .err_handler    =       tcp_v6_err,
2199         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2200 };
2201
2202 extern struct proto_ops inet6_stream_ops;
2203
2204 static struct inet_protosw tcpv6_protosw = {
2205         .type           =       SOCK_STREAM,
2206         .protocol       =       IPPROTO_TCP,
2207         .prot           =       &tcpv6_prot,
2208         .ops            =       &inet6_stream_ops,
2209         .capability     =       -1,
2210         .no_check       =       0,
2211         .flags          =       INET_PROTOSW_PERMANENT,
2212 };
2213
2214 void __init tcpv6_init(void)
2215 {
2216         /* register inet6 protocol */
2217         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2218                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2219         inet6_register_protosw(&tcpv6_protosw);
2220 }