3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
58 #include <asm/uaccess.h>
60 #include <linux/proc_fs.h>
61 #include <linux/seq_file.h>
63 static void tcp_v6_send_reset(struct sk_buff *skb);
64 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
65 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
68 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
69 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
71 static struct tcp_func ipv6_mapped;
72 static struct tcp_func ipv6_specific;
74 /* I have no idea if this is a good hash for v6 or not. -DaveM */
75 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
76 struct in6_addr *faddr, u16 fport)
78 int hashent = (lport ^ fport);
80 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
81 hashent ^= hashent>>16;
82 hashent ^= hashent>>8;
83 return (hashent & (tcp_ehash_size - 1));
86 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
88 struct inet_opt *inet = inet_sk(sk);
89 struct ipv6_pinfo *np = inet6_sk(sk);
90 struct in6_addr *laddr = &np->rcv_saddr;
91 struct in6_addr *faddr = &np->daddr;
92 __u16 lport = inet->num;
93 __u16 fport = inet->dport;
94 return tcp_v6_hashfn(laddr, lport, faddr, fport);
97 static inline int tcp_v6_bind_conflict(struct sock *sk,
98 struct tcp_bind_bucket *tb)
101 struct hlist_node *node;
103 /* We must walk the whole port owner list in this case. -DaveM */
104 sk_for_each_bound(sk2, node, &tb->owners) {
106 (!sk->sk_bound_dev_if ||
107 !sk2->sk_bound_dev_if ||
108 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
109 (!sk->sk_reuse || !sk2->sk_reuse ||
110 sk2->sk_state == TCP_LISTEN) &&
111 ipv6_rcv_saddr_equal(sk, sk2))
118 /* Grrr, addr_type already calculated by caller, but I don't want
119 * to add some silly "cookie" argument to this method just for that.
120 * But it doesn't matter, the recalculation is in the rarest path
121 * this function ever takes.
123 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
125 struct tcp_bind_hashbucket *head;
126 struct tcp_bind_bucket *tb;
127 struct hlist_node *node;
132 int low = sysctl_local_port_range[0];
133 int high = sysctl_local_port_range[1];
134 int remaining = (high - low) + 1;
137 spin_lock(&tcp_portalloc_lock);
138 rover = tcp_port_rover;
140 if ((rover < low) || (rover > high))
142 head = &tcp_bhash[tcp_bhashfn(rover)];
143 spin_lock(&head->lock);
144 tb_for_each(tb, node, &head->chain)
145 if (tb->port == rover)
149 spin_unlock(&head->lock);
150 } while (--remaining > 0);
151 tcp_port_rover = rover;
152 spin_unlock(&tcp_portalloc_lock);
154 /* Exhausted local port range during search? */
159 /* OK, here is the one we will use. */
162 head = &tcp_bhash[tcp_bhashfn(snum)];
163 spin_lock(&head->lock);
164 tb_for_each(tb, node, &head->chain)
165 if (tb->port == snum)
171 if (tb && !hlist_empty(&tb->owners)) {
172 if (tb->fastreuse > 0 && sk->sk_reuse &&
173 sk->sk_state != TCP_LISTEN) {
177 if (tcp_v6_bind_conflict(sk, tb))
183 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
185 if (hlist_empty(&tb->owners)) {
186 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
190 } else if (tb->fastreuse &&
191 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
195 if (!tcp_sk(sk)->bind_hash)
196 tcp_bind_hash(sk, tb, snum);
197 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
201 spin_unlock(&head->lock);
207 static __inline__ void __tcp_v6_hash(struct sock *sk)
209 struct hlist_head *list;
212 BUG_TRAP(sk_unhashed(sk));
214 if (sk->sk_state == TCP_LISTEN) {
215 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
216 lock = &tcp_lhash_lock;
219 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
220 list = &tcp_ehash[sk->sk_hashent].chain;
221 lock = &tcp_ehash[sk->sk_hashent].lock;
225 __sk_add_node(sk, list);
226 sock_prot_inc_use(sk->sk_prot);
231 static void tcp_v6_hash(struct sock *sk)
233 if (sk->sk_state != TCP_CLOSE) {
234 struct tcp_opt *tp = tcp_sk(sk);
236 if (tp->af_specific == &ipv6_mapped) {
246 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
249 struct hlist_node *node;
250 struct sock *result = NULL;
254 read_lock(&tcp_lhash_lock);
255 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
256 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
257 struct ipv6_pinfo *np = inet6_sk(sk);
260 if (!ipv6_addr_any(&np->rcv_saddr)) {
261 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
265 if (sk->sk_bound_dev_if) {
266 if (sk->sk_bound_dev_if != dif)
274 if (score > hiscore) {
282 read_unlock(&tcp_lhash_lock);
286 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
287 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
289 * The sockhash lock must be held as a reader here.
292 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
293 struct in6_addr *daddr, u16 hnum,
296 struct tcp_ehash_bucket *head;
298 struct hlist_node *node;
299 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
302 /* Optimize here for direct hit, only listening connections can
303 * have wildcards anyways.
305 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
306 head = &tcp_ehash[hash];
307 read_lock(&head->lock);
308 sk_for_each(sk, node, &head->chain) {
309 /* For IPV6 do the cheaper port and family tests first. */
310 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
311 goto hit; /* You sunk my battleship! */
313 /* Must check for a TIME_WAIT'er before going to listener hash. */
314 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
315 /* FIXME: acme: check this... */
316 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
318 if(*((__u32 *)&(tw->tw_dport)) == ports &&
319 sk->sk_family == PF_INET6) {
320 if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
321 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
322 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
326 read_unlock(&head->lock);
331 read_unlock(&head->lock);
336 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
337 struct in6_addr *daddr, u16 hnum,
342 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
347 return tcp_v6_lookup_listener(daddr, hnum, dif);
350 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
351 struct in6_addr *daddr, u16 dport,
357 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
365 * Open request hash tables.
368 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
372 a = raddr->s6_addr32[0];
373 b = raddr->s6_addr32[1];
374 c = raddr->s6_addr32[2];
376 a += JHASH_GOLDEN_RATIO;
377 b += JHASH_GOLDEN_RATIO;
379 __jhash_mix(a, b, c);
381 a += raddr->s6_addr32[3];
383 __jhash_mix(a, b, c);
385 return c & (TCP_SYNQ_HSIZE - 1);
388 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
389 struct open_request ***prevp,
391 struct in6_addr *raddr,
392 struct in6_addr *laddr,
395 struct tcp_listen_opt *lopt = tp->listen_opt;
396 struct open_request *req, **prev;
398 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
399 (req = *prev) != NULL;
400 prev = &req->dl_next) {
401 if (req->rmt_port == rport &&
402 req->class->family == AF_INET6 &&
403 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
404 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
405 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
406 BUG_TRAP(req->sk == NULL);
415 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
416 struct in6_addr *saddr,
417 struct in6_addr *daddr,
420 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
423 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
425 if (skb->protocol == htons(ETH_P_IPV6)) {
426 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
427 skb->nh.ipv6h->saddr.s6_addr32,
431 return secure_tcp_sequence_number(skb->nh.iph->daddr,
438 static int tcp_v6_check_established(struct sock *sk)
440 struct inet_opt *inet = inet_sk(sk);
441 struct ipv6_pinfo *np = inet6_sk(sk);
442 struct in6_addr *daddr = &np->rcv_saddr;
443 struct in6_addr *saddr = &np->daddr;
444 int dif = sk->sk_bound_dev_if;
445 u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
446 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
447 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
449 struct hlist_node *node;
450 struct tcp_tw_bucket *tw;
452 write_lock_bh(&head->lock);
454 /* Check TIME-WAIT sockets first. */
455 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
456 tw = (struct tcp_tw_bucket*)sk2;
458 if(*((__u32 *)&(tw->tw_dport)) == ports &&
459 sk2->sk_family == PF_INET6 &&
460 !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
461 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
462 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
463 struct tcp_opt *tp = tcp_sk(sk);
465 if (tw->tw_ts_recent_stamp) {
466 /* See comment in tcp_ipv4.c */
467 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
470 tp->ts_recent = tw->tw_ts_recent;
471 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
480 /* And established part... */
481 sk_for_each(sk2, node, &head->chain) {
482 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
487 BUG_TRAP(sk_unhashed(sk));
488 __sk_add_node(sk, &head->chain);
489 sk->sk_hashent = hash;
490 sock_prot_inc_use(sk->sk_prot);
491 write_unlock_bh(&head->lock);
494 /* Silly. Should hash-dance instead... */
496 tcp_tw_deschedule(tw);
497 NET_INC_STATS_BH(TimeWaitRecycled);
505 write_unlock_bh(&head->lock);
506 return -EADDRNOTAVAIL;
509 static int tcp_v6_hash_connect(struct sock *sk)
511 struct tcp_bind_hashbucket *head;
512 struct tcp_bind_bucket *tb;
515 if (inet_sk(sk)->num == 0) {
516 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
519 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
522 head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
525 spin_lock_bh(&head->lock);
527 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
529 spin_unlock_bh(&head->lock);
532 spin_unlock_bh(&head->lock);
533 return tcp_v6_check_established(sk);
537 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
539 return IP6CB(skb)->iif;
542 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
545 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
546 struct inet_opt *inet = inet_sk(sk);
547 struct ipv6_pinfo *np = inet6_sk(sk);
548 struct tcp_opt *tp = tcp_sk(sk);
549 struct in6_addr *saddr = NULL;
551 struct dst_entry *dst;
555 if (addr_len < SIN6_LEN_RFC2133)
558 if (usin->sin6_family != AF_INET6)
559 return(-EAFNOSUPPORT);
561 memset(&fl, 0, sizeof(fl));
564 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
565 IP6_ECN_flow_init(fl.fl6_flowlabel);
566 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
567 struct ip6_flowlabel *flowlabel;
568 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
569 if (flowlabel == NULL)
571 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
572 fl6_sock_release(flowlabel);
577 * connect() to INADDR_ANY means loopback (BSD'ism).
580 if(ipv6_addr_any(&usin->sin6_addr))
581 usin->sin6_addr.s6_addr[15] = 0x1;
583 addr_type = ipv6_addr_type(&usin->sin6_addr);
585 if(addr_type & IPV6_ADDR_MULTICAST)
588 if (addr_type&IPV6_ADDR_LINKLOCAL) {
589 if (addr_len >= sizeof(struct sockaddr_in6) &&
590 usin->sin6_scope_id) {
591 /* If interface is set while binding, indices
594 if (sk->sk_bound_dev_if &&
595 sk->sk_bound_dev_if != usin->sin6_scope_id)
598 sk->sk_bound_dev_if = usin->sin6_scope_id;
601 /* Connect to link-local address requires an interface */
602 if (!sk->sk_bound_dev_if)
606 if (tp->ts_recent_stamp &&
607 ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
609 tp->ts_recent_stamp = 0;
613 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
614 np->flow_label = fl.fl6_flowlabel;
620 if (addr_type == IPV6_ADDR_MAPPED) {
621 u32 exthdrlen = tp->ext_header_len;
622 struct sockaddr_in sin;
624 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
626 if (__ipv6_only_sock(sk))
629 sin.sin_family = AF_INET;
630 sin.sin_port = usin->sin6_port;
631 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
633 tp->af_specific = &ipv6_mapped;
634 sk->sk_backlog_rcv = tcp_v4_do_rcv;
636 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
639 tp->ext_header_len = exthdrlen;
640 tp->af_specific = &ipv6_specific;
641 sk->sk_backlog_rcv = tcp_v6_do_rcv;
644 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
646 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
653 if (!ipv6_addr_any(&np->rcv_saddr))
654 saddr = &np->rcv_saddr;
656 fl.proto = IPPROTO_TCP;
657 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
658 ipv6_addr_copy(&fl.fl6_src,
659 (saddr ? saddr : &np->saddr));
660 fl.oif = sk->sk_bound_dev_if;
661 fl.fl_ip_dport = usin->sin6_port;
662 fl.fl_ip_sport = inet->sport;
664 if (np->opt && np->opt->srcrt) {
665 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
666 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
669 err = ip6_dst_lookup(sk, &dst, &fl);
676 ipv6_addr_copy(&np->rcv_saddr, saddr);
679 /* set the source address */
680 ipv6_addr_copy(&np->saddr, saddr);
681 inet->rcv_saddr = LOOPBACK4_IPV6;
683 ip6_dst_store(sk, dst, NULL);
684 sk->sk_route_caps = dst->dev->features &
685 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
687 tp->ext_header_len = 0;
689 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
690 tp->ext2_header_len = dst->header_len;
692 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
694 inet->dport = usin->sin6_port;
696 tcp_set_state(sk, TCP_SYN_SENT);
697 err = tcp_v6_hash_connect(sk);
702 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
707 err = tcp_connect(sk);
714 tcp_set_state(sk, TCP_CLOSE);
718 sk->sk_route_caps = 0;
722 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
723 int type, int code, int offset, __u32 info)
725 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
726 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
727 struct ipv6_pinfo *np;
733 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
736 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), Icmp6InErrors);
740 if (sk->sk_state == TCP_TIME_WAIT) {
741 tcp_tw_put((struct tcp_tw_bucket*)sk);
746 if (sock_owned_by_user(sk))
747 NET_INC_STATS_BH(LockDroppedIcmps);
749 if (sk->sk_state == TCP_CLOSE)
753 seq = ntohl(th->seq);
754 if (sk->sk_state != TCP_LISTEN &&
755 !between(seq, tp->snd_una, tp->snd_nxt)) {
756 NET_INC_STATS_BH(OutOfWindowIcmps);
762 if (type == ICMPV6_PKT_TOOBIG) {
763 struct dst_entry *dst = NULL;
765 if (sock_owned_by_user(sk))
767 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
770 /* icmp should have updated the destination cache entry */
771 dst = __sk_dst_check(sk, np->dst_cookie);
774 struct inet_opt *inet = inet_sk(sk);
777 /* BUGGG_FUTURE: Again, it is not clear how
778 to handle rthdr case. Ignore this complexity
781 memset(&fl, 0, sizeof(fl));
782 fl.proto = IPPROTO_TCP;
783 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
784 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
785 fl.oif = sk->sk_bound_dev_if;
786 fl.fl_ip_dport = inet->dport;
787 fl.fl_ip_sport = inet->sport;
789 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
790 sk->sk_err_soft = -err;
796 if (tp->pmtu_cookie > dst_pmtu(dst)) {
797 tcp_sync_mss(sk, dst_pmtu(dst));
798 tcp_simple_retransmit(sk);
799 } /* else let the usual retransmit timer handle it */
804 icmpv6_err_convert(type, code, &err);
806 /* Might be for an open_request */
807 switch (sk->sk_state) {
808 struct open_request *req, **prev;
810 if (sock_owned_by_user(sk))
813 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
814 &hdr->saddr, tcp_v6_iif(skb));
818 /* ICMPs are not backlogged, hence we cannot get
819 * an established socket here.
821 BUG_TRAP(req->sk == NULL);
823 if (seq != req->snt_isn) {
824 NET_INC_STATS_BH(OutOfWindowIcmps);
828 tcp_synq_drop(sk, req, prev);
832 case TCP_SYN_RECV: /* Cannot happen.
833 It can, it SYNs are crossed. --ANK */
834 if (!sock_owned_by_user(sk)) {
835 TCP_INC_STATS_BH(TcpAttemptFails);
837 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
841 sk->sk_err_soft = err;
845 if (!sock_owned_by_user(sk) && np->recverr) {
847 sk->sk_error_report(sk);
849 sk->sk_err_soft = err;
857 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
858 struct dst_entry *dst)
860 struct ipv6_pinfo *np = inet6_sk(sk);
861 struct sk_buff * skb;
862 struct ipv6_txoptions *opt = NULL;
866 memset(&fl, 0, sizeof(fl));
867 fl.proto = IPPROTO_TCP;
868 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
869 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
870 fl.fl6_flowlabel = 0;
871 fl.oif = req->af.v6_req.iif;
872 fl.fl_ip_dport = req->rmt_port;
873 fl.fl_ip_sport = inet_sk(sk)->sport;
878 np->rxopt.bits.srcrt == 2 &&
879 req->af.v6_req.pktopts) {
880 struct sk_buff *pktopts = req->af.v6_req.pktopts;
881 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
883 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
886 if (opt && opt->srcrt) {
887 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
888 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
891 err = ip6_dst_lookup(sk, &dst, &fl);
896 skb = tcp_make_synack(sk, dst, req);
898 struct tcphdr *th = skb->h.th;
900 th->check = tcp_v6_check(th, skb->len,
901 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
902 csum_partial((char *)th, skb->len, skb->csum));
904 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
905 err = ip6_xmit(sk, skb, &fl, opt, 0);
906 if (err == NET_XMIT_CN)
912 if (opt && opt != np->opt)
913 sock_kfree_s(sk, opt, opt->tot_len);
917 static void tcp_v6_or_free(struct open_request *req)
919 if (req->af.v6_req.pktopts)
920 kfree_skb(req->af.v6_req.pktopts);
923 static struct or_calltable or_ipv6 = {
925 .rtx_syn_ack = tcp_v6_send_synack,
926 .send_ack = tcp_v6_or_send_ack,
927 .destructor = tcp_v6_or_free,
928 .send_reset = tcp_v6_send_reset
931 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
933 struct ipv6_pinfo *np = inet6_sk(sk);
934 struct inet6_skb_parm *opt = IP6CB(skb);
937 if ((opt->hop && np->rxopt.bits.hopopts) ||
938 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
939 np->rxopt.bits.rxflow) ||
940 (opt->srcrt && np->rxopt.bits.srcrt) ||
941 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
948 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
951 struct ipv6_pinfo *np = inet6_sk(sk);
953 if (skb->ip_summed == CHECKSUM_HW) {
954 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
955 skb->csum = offsetof(struct tcphdr, check);
957 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
958 csum_partial((char *)th, th->doff<<2,
964 static void tcp_v6_send_reset(struct sk_buff *skb)
966 struct tcphdr *th = skb->h.th, *t1;
967 struct sk_buff *buff;
973 if (!ipv6_unicast_destination(skb))
977 * We need to grab some memory, and put together an RST,
978 * and then put it into the queue to be sent.
981 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
985 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
987 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
989 /* Swap the send and the receive. */
990 memset(t1, 0, sizeof(*t1));
991 t1->dest = th->source;
992 t1->source = th->dest;
993 t1->doff = sizeof(*t1)/4;
997 t1->seq = th->ack_seq;
1000 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1001 + skb->len - (th->doff<<2));
1004 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1006 memset(&fl, 0, sizeof(fl));
1007 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1008 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1010 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1011 sizeof(*t1), IPPROTO_TCP,
1014 fl.proto = IPPROTO_TCP;
1015 fl.oif = tcp_v6_iif(skb);
1016 fl.fl_ip_dport = t1->dest;
1017 fl.fl_ip_sport = t1->source;
1019 /* sk = NULL, but it is safe for now. RST socket required. */
1020 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1021 ip6_xmit(NULL, buff, &fl, NULL, 0);
1022 TCP_INC_STATS_BH(TcpOutSegs);
1023 TCP_INC_STATS_BH(TcpOutRsts);
1030 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1032 struct tcphdr *th = skb->h.th, *t1;
1033 struct sk_buff *buff;
1035 int tot_len = sizeof(struct tcphdr);
1037 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
1041 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
1046 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1048 /* Swap the send and the receive. */
1049 memset(t1, 0, sizeof(*t1));
1050 t1->dest = th->source;
1051 t1->source = th->dest;
1052 t1->doff = tot_len/4;
1053 t1->seq = htonl(seq);
1054 t1->ack_seq = htonl(ack);
1056 t1->window = htons(win);
1059 u32 *ptr = (u32*)(t1 + 1);
1060 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1061 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1062 *ptr++ = htonl(tcp_time_stamp);
1066 buff->csum = csum_partial((char *)t1, tot_len, 0);
1068 memset(&fl, 0, sizeof(fl));
1069 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1070 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1072 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1073 tot_len, IPPROTO_TCP,
1076 fl.proto = IPPROTO_TCP;
1077 fl.oif = tcp_v6_iif(skb);
1078 fl.fl_ip_dport = t1->dest;
1079 fl.fl_ip_sport = t1->source;
1081 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1082 ip6_xmit(NULL, buff, &fl, NULL, 0);
1083 TCP_INC_STATS_BH(TcpOutSegs);
1090 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1092 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1094 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1095 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1100 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1102 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1106 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1108 struct open_request *req, **prev;
1109 struct tcphdr *th = skb->h.th;
1110 struct tcp_opt *tp = tcp_sk(sk);
1113 /* Find possible connection requests. */
1114 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1115 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1117 return tcp_check_req(sk, skb, req, prev);
1119 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1121 &skb->nh.ipv6h->daddr,
1126 if (nsk->sk_state != TCP_TIME_WAIT) {
1130 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1134 #if 0 /*def CONFIG_SYN_COOKIES*/
1135 if (!th->rst && !th->syn && th->ack)
1136 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1141 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1143 struct tcp_opt *tp = tcp_sk(sk);
1144 struct tcp_listen_opt *lopt = tp->listen_opt;
1145 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1148 req->expires = jiffies + TCP_TIMEOUT_INIT;
1150 req->dl_next = lopt->syn_table[h];
1152 write_lock(&tp->syn_wait_lock);
1153 lopt->syn_table[h] = req;
1154 write_unlock(&tp->syn_wait_lock);
1156 #ifdef CONFIG_ACCEPT_QUEUES
1157 tcp_synq_added(sk, req);
1164 /* FIXME: this is substantially similar to the ipv4 code.
1165 * Can some kind of merge be done? -- erics
1167 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1169 struct ipv6_pinfo *np = inet6_sk(sk);
1170 struct tcp_opt tmptp, *tp = tcp_sk(sk);
1171 struct open_request *req = NULL;
1172 __u32 isn = TCP_SKB_CB(skb)->when;
1173 #ifdef CONFIG_ACCEPT_QUEUES
1177 if (skb->protocol == htons(ETH_P_IP))
1178 return tcp_v4_conn_request(sk, skb);
1180 if (!ipv6_unicast_destination(skb))
1185 * There are no SYN attacks on IPv6, yet...
1187 if (tcp_synq_is_full(sk) && !isn) {
1188 if (net_ratelimit())
1189 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1193 #ifdef CONFIG_ACCEPT_QUEUES
1194 class = (skb->nfmark <= 0) ? 0 :
1195 ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
1197 * Accept only if the class has shares set or if the default class
1198 * i.e. class 0 has shares
1200 if (!(tcp_sk(sk)->acceptq[class].aq_ratio)) {
1201 if (tcp_sk(sk)->acceptq[0].aq_ratio)
1207 if (sk_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
1209 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1214 req = tcp_openreq_alloc();
1218 tcp_clear_options(&tmptp);
1219 tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1220 tmptp.user_mss = tp->user_mss;
1222 tcp_parse_options(skb, &tmptp, 0);
1224 tmptp.tstamp_ok = tmptp.saw_tstamp;
1225 tcp_openreq_init(req, &tmptp, skb);
1226 #ifdef CONFIG_ACCEPT_QUEUES
1227 req->acceptq_class = class;
1228 req->acceptq_time_stamp = jiffies;
1230 req->class = &or_ipv6;
1231 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1232 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1233 TCP_ECN_create_request(req, skb->h.th);
1234 req->af.v6_req.pktopts = NULL;
1235 if (ipv6_opt_accepted(sk, skb) ||
1236 np->rxopt.bits.rxinfo ||
1237 np->rxopt.bits.rxhlim) {
1238 atomic_inc(&skb->users);
1239 req->af.v6_req.pktopts = skb;
1241 req->af.v6_req.iif = sk->sk_bound_dev_if;
1243 /* So that link locals have meaning */
1244 if (!sk->sk_bound_dev_if &&
1245 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1246 req->af.v6_req.iif = tcp_v6_iif(skb);
1249 isn = tcp_v6_init_sequence(sk,skb);
1253 if (tcp_v6_send_synack(sk, req, NULL))
1256 tcp_v6_synq_add(sk, req);
1262 tcp_openreq_free(req);
1264 TCP_INC_STATS_BH(TcpAttemptFails);
1265 return 0; /* don't send reset */
1268 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1269 struct open_request *req,
1270 struct dst_entry *dst)
1272 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1273 struct tcp6_sock *newtcp6sk;
1274 struct inet_opt *newinet;
1275 struct tcp_opt *newtp;
1277 struct ipv6_txoptions *opt;
1279 if (skb->protocol == htons(ETH_P_IP)) {
1284 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1289 newtcp6sk = (struct tcp6_sock *)newsk;
1290 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1292 newinet = inet_sk(newsk);
1293 newnp = inet6_sk(newsk);
1294 newtp = tcp_sk(newsk);
1296 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1298 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1301 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1304 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1306 newtp->af_specific = &ipv6_mapped;
1307 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1308 newnp->pktoptions = NULL;
1310 newnp->mcast_oif = tcp_v6_iif(skb);
1311 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1313 /* Charge newly allocated IPv6 socket. Though it is mapped,
1316 #ifdef INET_REFCNT_DEBUG
1317 atomic_inc(&inet6_sock_nr);
1320 /* It is tricky place. Until this moment IPv4 tcp
1321 worked with IPv6 af_tcp.af_specific.
1324 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1331 #ifdef CONFIG_ACCEPT_QUEUES
1332 if (sk_acceptq_is_full(sk, req->acceptq_class))
1334 if (sk_acceptq_is_full(sk))
1338 if (np->rxopt.bits.srcrt == 2 &&
1339 opt == NULL && req->af.v6_req.pktopts) {
1340 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1342 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1348 memset(&fl, 0, sizeof(fl));
1349 fl.proto = IPPROTO_TCP;
1350 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1351 if (opt && opt->srcrt) {
1352 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1353 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1355 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1356 fl.oif = sk->sk_bound_dev_if;
1357 fl.fl_ip_dport = req->rmt_port;
1358 fl.fl_ip_sport = inet_sk(sk)->sport;
1360 if (ip6_dst_lookup(sk, &dst, &fl))
1364 newsk = tcp_create_openreq_child(sk, req, skb);
1368 /* Charge newly allocated IPv6 socket */
1369 #ifdef INET_REFCNT_DEBUG
1370 atomic_inc(&inet6_sock_nr);
1373 ip6_dst_store(newsk, dst, NULL);
1374 newsk->sk_route_caps = dst->dev->features &
1375 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1377 newtcp6sk = (struct tcp6_sock *)newsk;
1378 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1380 newtp = tcp_sk(newsk);
1381 newinet = inet_sk(newsk);
1382 newnp = inet6_sk(newsk);
1384 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1386 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1387 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1388 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1389 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1391 /* Now IPv6 options...
1393 First: no IPv4 options.
1395 newinet->opt = NULL;
1398 newnp->rxopt.all = np->rxopt.all;
1400 /* Clone pktoptions received with SYN */
1401 newnp->pktoptions = NULL;
1402 if (req->af.v6_req.pktopts) {
1403 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1405 kfree_skb(req->af.v6_req.pktopts);
1406 req->af.v6_req.pktopts = NULL;
1407 if (newnp->pktoptions)
1408 skb_set_owner_r(newnp->pktoptions, newsk);
1411 newnp->mcast_oif = tcp_v6_iif(skb);
1412 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1414 /* Clone native IPv6 options from listening socket (if any)
1416 Yes, keeping reference count would be much more clever,
1417 but we make one more one thing there: reattach optmem
1421 newnp->opt = ipv6_dup_options(newsk, opt);
1423 sock_kfree_s(sk, opt, opt->tot_len);
1426 newtp->ext_header_len = 0;
1428 newtp->ext_header_len = newnp->opt->opt_nflen +
1429 newnp->opt->opt_flen;
1430 newtp->ext2_header_len = dst->header_len;
1432 tcp_sync_mss(newsk, dst_pmtu(dst));
1433 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1434 tcp_initialize_rcv_mss(newsk);
1436 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1438 __tcp_v6_hash(newsk);
1439 tcp_inherit_port(sk, newsk);
1444 NET_INC_STATS_BH(ListenOverflows);
1446 NET_INC_STATS_BH(ListenDrops);
1447 if (opt && opt != np->opt)
1448 sock_kfree_s(sk, opt, opt->tot_len);
1453 static int tcp_v6_checksum_init(struct sk_buff *skb)
1455 if (skb->ip_summed == CHECKSUM_HW) {
1456 skb->ip_summed = CHECKSUM_UNNECESSARY;
1457 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1458 &skb->nh.ipv6h->daddr,skb->csum))
1460 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1462 if (skb->len <= 76) {
1463 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1464 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1466 skb->ip_summed = CHECKSUM_UNNECESSARY;
1468 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1469 &skb->nh.ipv6h->daddr,0);
1474 /* The socket must have it's spinlock held when we get
1477 * We have a potential double-lock case here, so even when
1478 * doing backlog processing we use the BH locking scheme.
1479 * This is because we cannot sleep with the original spinlock
1482 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1484 struct ipv6_pinfo *np = inet6_sk(sk);
1486 struct sk_buff *opt_skb = NULL;
1488 /* Imagine: socket is IPv6. IPv4 packet arrives,
1489 goes to IPv4 receive handler and backlogged.
1490 From backlog it always goes here. Kerboom...
1491 Fortunately, tcp_rcv_established and rcv_established
1492 handle them correctly, but it is not case with
1493 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1496 if (skb->protocol == htons(ETH_P_IP))
1497 return tcp_v4_do_rcv(sk, skb);
1499 if (sk_filter(sk, skb, 0))
1503 * socket locking is here for SMP purposes as backlog rcv
1504 * is currently called with bh processing disabled.
1507 /* Do Stevens' IPV6_PKTOPTIONS.
1509 Yes, guys, it is the only place in our code, where we
1510 may make it not affecting IPv4.
1511 The rest of code is protocol independent,
1512 and I do not like idea to uglify IPv4.
1514 Actually, all the idea behind IPV6_PKTOPTIONS
1515 looks not very well thought. For now we latch
1516 options, received in the last packet, enqueued
1517 by tcp. Feel free to propose better solution.
1521 opt_skb = skb_clone(skb, GFP_ATOMIC);
1523 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1524 TCP_CHECK_TIMER(sk);
1525 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1527 TCP_CHECK_TIMER(sk);
1529 goto ipv6_pktoptions;
1533 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1536 if (sk->sk_state == TCP_LISTEN) {
1537 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1542 * Queue it on the new socket if the new socket is active,
1543 * otherwise we just shortcircuit this and continue with
1547 if (tcp_child_process(sk, nsk, skb))
1550 __kfree_skb(opt_skb);
1555 TCP_CHECK_TIMER(sk);
1556 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1558 TCP_CHECK_TIMER(sk);
1560 goto ipv6_pktoptions;
1564 tcp_v6_send_reset(skb);
1567 __kfree_skb(opt_skb);
1571 TCP_INC_STATS_BH(TcpInErrs);
1576 /* Do you ask, what is it?
1578 1. skb was enqueued by tcp.
1579 2. skb is added to tail of read queue, rather than out of order.
1580 3. socket is not in passive state.
1581 4. Finally, it really contains options, which user wants to receive.
1584 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1585 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1586 if (np->rxopt.bits.rxinfo)
1587 np->mcast_oif = tcp_v6_iif(opt_skb);
1588 if (np->rxopt.bits.rxhlim)
1589 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1590 if (ipv6_opt_accepted(sk, opt_skb)) {
1591 skb_set_owner_r(opt_skb, sk);
1592 opt_skb = xchg(&np->pktoptions, opt_skb);
1594 __kfree_skb(opt_skb);
1595 opt_skb = xchg(&np->pktoptions, NULL);
1604 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1606 struct sk_buff *skb = *pskb;
1611 if (skb->pkt_type != PACKET_HOST)
1615 * Count it even if it's bad.
1617 TCP_INC_STATS_BH(TcpInSegs);
1619 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1624 if (th->doff < sizeof(struct tcphdr)/4)
1626 if (!pskb_may_pull(skb, th->doff*4))
1629 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1630 tcp_v6_checksum_init(skb) < 0))
1634 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1635 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1636 skb->len - th->doff*4);
1637 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1638 TCP_SKB_CB(skb)->when = 0;
1639 TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
1640 TCP_SKB_CB(skb)->sacked = 0;
1642 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1643 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1649 if (sk->sk_state == TCP_TIME_WAIT)
1652 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1653 goto discard_and_relse;
1655 if (sk_filter(sk, skb, 0))
1656 goto discard_and_relse;
1662 if (!sock_owned_by_user(sk)) {
1663 if (!tcp_prequeue(sk, skb))
1664 ret = tcp_v6_do_rcv(sk, skb);
1666 sk_add_backlog(sk, skb);
1670 return ret ? -1 : 0;
1673 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1676 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1678 TCP_INC_STATS_BH(TcpInErrs);
1680 tcp_v6_send_reset(skb);
1697 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1698 tcp_tw_put((struct tcp_tw_bucket *) sk);
1702 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1703 TCP_INC_STATS_BH(TcpInErrs);
1704 tcp_tw_put((struct tcp_tw_bucket *) sk);
1708 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1709 skb, th, skb->len)) {
1714 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1716 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1717 tcp_tw_put((struct tcp_tw_bucket *)sk);
1721 /* Fall through to ACK */
1724 tcp_v6_timewait_ack(sk, skb);
1728 case TCP_TW_SUCCESS:;
1733 static int tcp_v6_rebuild_header(struct sock *sk)
1736 struct dst_entry *dst;
1737 struct ipv6_pinfo *np = inet6_sk(sk);
1739 dst = __sk_dst_check(sk, np->dst_cookie);
1742 struct inet_opt *inet = inet_sk(sk);
1745 memset(&fl, 0, sizeof(fl));
1746 fl.proto = IPPROTO_TCP;
1747 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1748 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1749 fl.fl6_flowlabel = np->flow_label;
1750 fl.oif = sk->sk_bound_dev_if;
1751 fl.fl_ip_dport = inet->dport;
1752 fl.fl_ip_sport = inet->sport;
1754 if (np->opt && np->opt->srcrt) {
1755 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1756 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1759 err = ip6_dst_lookup(sk, &dst, &fl);
1762 sk->sk_route_caps = 0;
1766 ip6_dst_store(sk, dst, NULL);
1767 sk->sk_route_caps = dst->dev->features &
1768 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1769 tcp_sk(sk)->ext2_header_len = dst->header_len;
1775 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1777 struct sock *sk = skb->sk;
1778 struct inet_opt *inet = inet_sk(sk);
1779 struct ipv6_pinfo *np = inet6_sk(sk);
1781 struct dst_entry *dst;
1783 memset(&fl, 0, sizeof(fl));
1784 fl.proto = IPPROTO_TCP;
1785 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1786 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1787 fl.fl6_flowlabel = np->flow_label;
1788 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1789 fl.oif = sk->sk_bound_dev_if;
1790 fl.fl_ip_sport = inet->sport;
1791 fl.fl_ip_dport = inet->dport;
1793 if (np->opt && np->opt->srcrt) {
1794 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1795 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1798 dst = __sk_dst_check(sk, np->dst_cookie);
1801 int err = ip6_dst_lookup(sk, &dst, &fl);
1804 sk->sk_err_soft = -err;
1808 ip6_dst_store(sk, dst, NULL);
1809 sk->sk_route_caps = dst->dev->features &
1810 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1811 tcp_sk(sk)->ext2_header_len = dst->header_len;
1814 skb->dst = dst_clone(dst);
1816 /* Restore final destination back after routing done */
1817 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1819 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1822 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1824 struct ipv6_pinfo *np = inet6_sk(sk);
1825 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1827 sin6->sin6_family = AF_INET6;
1828 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1829 sin6->sin6_port = inet_sk(sk)->dport;
1830 /* We do not store received flowlabel for TCP */
1831 sin6->sin6_flowinfo = 0;
1832 sin6->sin6_scope_id = 0;
1833 if (sk->sk_bound_dev_if &&
1834 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1835 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1838 static int tcp_v6_remember_stamp(struct sock *sk)
1840 /* Alas, not yet... */
1844 static struct tcp_func ipv6_specific = {
1845 .queue_xmit = tcp_v6_xmit,
1846 .send_check = tcp_v6_send_check,
1847 .rebuild_header = tcp_v6_rebuild_header,
1848 .conn_request = tcp_v6_conn_request,
1849 .syn_recv_sock = tcp_v6_syn_recv_sock,
1850 .remember_stamp = tcp_v6_remember_stamp,
1851 .net_header_len = sizeof(struct ipv6hdr),
1853 .setsockopt = ipv6_setsockopt,
1854 .getsockopt = ipv6_getsockopt,
1855 .addr2sockaddr = v6_addr2sockaddr,
1856 .sockaddr_len = sizeof(struct sockaddr_in6)
1860 * TCP over IPv4 via INET6 API
1863 static struct tcp_func ipv6_mapped = {
1864 .queue_xmit = ip_queue_xmit,
1865 .send_check = tcp_v4_send_check,
1866 .rebuild_header = tcp_v4_rebuild_header,
1867 .conn_request = tcp_v6_conn_request,
1868 .syn_recv_sock = tcp_v6_syn_recv_sock,
1869 .remember_stamp = tcp_v4_remember_stamp,
1870 .net_header_len = sizeof(struct iphdr),
1872 .setsockopt = ipv6_setsockopt,
1873 .getsockopt = ipv6_getsockopt,
1874 .addr2sockaddr = v6_addr2sockaddr,
1875 .sockaddr_len = sizeof(struct sockaddr_in6)
1880 /* NOTE: A lot of things set to zero explicitly by call to
1881 * sk_alloc() so need not be done here.
1883 static int tcp_v6_init_sock(struct sock *sk)
1885 struct tcp_opt *tp = tcp_sk(sk);
1887 skb_queue_head_init(&tp->out_of_order_queue);
1888 tcp_init_xmit_timers(sk);
1889 tcp_prequeue_init(tp);
1891 tp->rto = TCP_TIMEOUT_INIT;
1892 tp->mdev = TCP_TIMEOUT_INIT;
1894 /* So many TCP implementations out there (incorrectly) count the
1895 * initial SYN frame in their delayed-ACK and congestion control
1896 * algorithms that we must have the following bandaid to talk
1897 * efficiently to them. -DaveM
1901 /* See draft-stevens-tcpca-spec-01 for discussion of the
1902 * initialization of these values.
1904 tp->snd_ssthresh = 0x7fffffff;
1905 tp->snd_cwnd_clamp = ~0;
1906 tp->mss_cache = 536;
1908 tp->reordering = sysctl_tcp_reordering;
1910 sk->sk_state = TCP_CLOSE;
1912 tp->af_specific = &ipv6_specific;
1914 sk->sk_write_space = sk_stream_write_space;
1915 sk->sk_use_write_queue = 1;
1917 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1918 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1920 atomic_inc(&tcp_sockets_allocated);
1925 static int tcp_v6_destroy_sock(struct sock *sk)
1927 struct tcp_opt *tp = tcp_sk(sk);
1928 struct inet_opt *inet = inet_sk(sk);
1930 tcp_clear_xmit_timers(sk);
1932 /* Cleanup up the write buffer. */
1933 tcp_writequeue_purge(sk);
1935 /* Cleans up our, hopefully empty, out_of_order_queue. */
1936 __skb_queue_purge(&tp->out_of_order_queue);
1938 /* Clean prequeue, it must be empty really */
1939 __skb_queue_purge(&tp->ucopy.prequeue);
1941 /* Clean up a referenced TCP bind bucket. */
1942 if (tcp_sk(sk)->bind_hash)
1945 /* If sendmsg cached page exists, toss it. */
1946 if (inet->sndmsg_page != NULL)
1947 __free_page(inet->sndmsg_page);
1949 atomic_dec(&tcp_sockets_allocated);
1951 return inet6_destroy_sock(sk);
1954 /* Proc filesystem TCPv6 sock list dumping. */
1955 static void get_openreq6(struct seq_file *seq,
1956 struct sock *sk, struct open_request *req, int i, int uid)
1958 struct in6_addr *dest, *src;
1959 int ttd = req->expires - jiffies;
1964 src = &req->af.v6_req.loc_addr;
1965 dest = &req->af.v6_req.rmt_addr;
1967 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1968 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1970 src->s6_addr32[0], src->s6_addr32[1],
1971 src->s6_addr32[2], src->s6_addr32[3],
1972 ntohs(inet_sk(sk)->sport),
1973 dest->s6_addr32[0], dest->s6_addr32[1],
1974 dest->s6_addr32[2], dest->s6_addr32[3],
1975 ntohs(req->rmt_port),
1977 0,0, /* could print option size, but that is af dependent. */
1978 1, /* timers active (only the expire timer) */
1979 jiffies_to_clock_t(ttd),
1982 0, /* non standard timer */
1983 0, /* open_requests have no inode */
1987 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1989 struct in6_addr *dest, *src;
1992 unsigned long timer_expires;
1993 struct inet_opt *inet = inet_sk(sp);
1994 struct tcp_opt *tp = tcp_sk(sp);
1995 struct ipv6_pinfo *np = inet6_sk(sp);
1998 src = &np->rcv_saddr;
1999 destp = ntohs(inet->dport);
2000 srcp = ntohs(inet->sport);
2001 if (tp->pending == TCP_TIME_RETRANS) {
2003 timer_expires = tp->timeout;
2004 } else if (tp->pending == TCP_TIME_PROBE0) {
2006 timer_expires = tp->timeout;
2007 } else if (timer_pending(&sp->sk_timer)) {
2009 timer_expires = sp->sk_timer.expires;
2012 timer_expires = jiffies;
2016 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2017 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2019 src->s6_addr32[0], src->s6_addr32[1],
2020 src->s6_addr32[2], src->s6_addr32[3], srcp,
2021 dest->s6_addr32[0], dest->s6_addr32[1],
2022 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2024 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2026 jiffies_to_clock_t(timer_expires - jiffies),
2031 atomic_read(&sp->sk_refcnt), sp,
2032 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2033 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2037 static void get_timewait6_sock(struct seq_file *seq,
2038 struct tcp_tw_bucket *tw, int i)
2040 struct in6_addr *dest, *src;
2042 int ttd = tw->tw_ttd - jiffies;
2047 dest = &tw->tw_v6_daddr;
2048 src = &tw->tw_v6_rcv_saddr;
2049 destp = ntohs(tw->tw_dport);
2050 srcp = ntohs(tw->tw_sport);
2053 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2054 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2056 src->s6_addr32[0], src->s6_addr32[1],
2057 src->s6_addr32[2], src->s6_addr32[3], srcp,
2058 dest->s6_addr32[0], dest->s6_addr32[1],
2059 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2060 tw->tw_substate, 0, 0,
2061 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2062 atomic_read(&tw->tw_refcnt), tw);
2065 #ifdef CONFIG_PROC_FS
2066 static int tcp6_seq_show(struct seq_file *seq, void *v)
2068 struct tcp_iter_state *st;
2070 if (v == SEQ_START_TOKEN) {
2075 "st tx_queue rx_queue tr tm->when retrnsmt"
2076 " uid timeout inode\n");
2081 switch (st->state) {
2082 case TCP_SEQ_STATE_LISTENING:
2083 case TCP_SEQ_STATE_ESTABLISHED:
2084 get_tcp6_sock(seq, v, st->num);
2086 case TCP_SEQ_STATE_OPENREQ:
2087 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2089 case TCP_SEQ_STATE_TIME_WAIT:
2090 get_timewait6_sock(seq, v, st->num);
2097 static struct file_operations tcp6_seq_fops;
2098 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2099 .owner = THIS_MODULE,
2102 .seq_show = tcp6_seq_show,
2103 .seq_fops = &tcp6_seq_fops,
2106 int __init tcp6_proc_init(void)
2108 return tcp_proc_register(&tcp6_seq_afinfo);
2111 void tcp6_proc_exit(void)
2113 tcp_proc_unregister(&tcp6_seq_afinfo);
2117 struct proto tcpv6_prot = {
2120 .connect = tcp_v6_connect,
2121 .disconnect = tcp_disconnect,
2122 .accept = tcp_accept,
2124 .init = tcp_v6_init_sock,
2125 .destroy = tcp_v6_destroy_sock,
2126 .shutdown = tcp_shutdown,
2127 .setsockopt = tcp_setsockopt,
2128 .getsockopt = tcp_getsockopt,
2129 .sendmsg = tcp_sendmsg,
2130 .recvmsg = tcp_recvmsg,
2131 .backlog_rcv = tcp_v6_do_rcv,
2132 .hash = tcp_v6_hash,
2133 .unhash = tcp_unhash,
2134 .get_port = tcp_v6_get_port,
2137 static struct inet6_protocol tcpv6_protocol = {
2138 .handler = tcp_v6_rcv,
2139 .err_handler = tcp_v6_err,
2140 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2143 extern struct proto_ops inet6_stream_ops;
2145 static struct inet_protosw tcpv6_protosw = {
2146 .type = SOCK_STREAM,
2147 .protocol = IPPROTO_TCP,
2148 .prot = &tcpv6_prot,
2149 .ops = &inet6_stream_ops,
2152 .flags = INET_PROTOSW_PERMANENT,
2155 void __init tcpv6_init(void)
2157 /* register inet6 protocol */
2158 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2159 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2160 inet6_register_protosw(&tcpv6_protosw);