3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
58 #include <asm/uaccess.h>
60 #include <linux/proc_fs.h>
61 #include <linux/seq_file.h>
63 static void tcp_v6_send_reset(struct sk_buff *skb);
64 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
65 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
68 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
69 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
71 static struct tcp_func ipv6_mapped;
72 static struct tcp_func ipv6_specific;
74 /* I have no idea if this is a good hash for v6 or not. -DaveM */
75 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
76 struct in6_addr *faddr, u16 fport)
78 int hashent = (lport ^ fport);
80 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
81 hashent ^= hashent>>16;
82 hashent ^= hashent>>8;
83 return (hashent & (tcp_ehash_size - 1));
86 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
88 struct inet_opt *inet = inet_sk(sk);
89 struct ipv6_pinfo *np = inet6_sk(sk);
90 struct in6_addr *laddr = &np->rcv_saddr;
91 struct in6_addr *faddr = &np->daddr;
92 __u16 lport = inet->num;
93 __u16 fport = inet->dport;
94 return tcp_v6_hashfn(laddr, lport, faddr, fport);
97 static inline int tcp_v6_bind_conflict(struct sock *sk,
98 struct tcp_bind_bucket *tb)
101 struct hlist_node *node;
103 /* We must walk the whole port owner list in this case. -DaveM */
104 sk_for_each_bound(sk2, node, &tb->owners) {
106 (!sk->sk_bound_dev_if ||
107 !sk2->sk_bound_dev_if ||
108 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
109 (!sk->sk_reuse || !sk2->sk_reuse ||
110 sk2->sk_state == TCP_LISTEN) &&
111 ipv6_rcv_saddr_equal(sk, sk2))
118 /* Grrr, addr_type already calculated by caller, but I don't want
119 * to add some silly "cookie" argument to this method just for that.
120 * But it doesn't matter, the recalculation is in the rarest path
121 * this function ever takes.
123 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
125 struct tcp_bind_hashbucket *head;
126 struct tcp_bind_bucket *tb;
127 struct hlist_node *node;
132 int low = sysctl_local_port_range[0];
133 int high = sysctl_local_port_range[1];
134 int remaining = (high - low) + 1;
137 spin_lock(&tcp_portalloc_lock);
138 rover = tcp_port_rover;
140 if ((rover < low) || (rover > high))
142 head = &tcp_bhash[tcp_bhashfn(rover)];
143 spin_lock(&head->lock);
144 tb_for_each(tb, node, &head->chain)
145 if (tb->port == rover)
149 spin_unlock(&head->lock);
150 } while (--remaining > 0);
151 tcp_port_rover = rover;
152 spin_unlock(&tcp_portalloc_lock);
154 /* Exhausted local port range during search? */
159 /* OK, here is the one we will use. */
162 head = &tcp_bhash[tcp_bhashfn(snum)];
163 spin_lock(&head->lock);
164 tb_for_each(tb, node, &head->chain)
165 if (tb->port == snum)
171 if (tb && !hlist_empty(&tb->owners)) {
172 if (tb->fastreuse > 0 && sk->sk_reuse &&
173 sk->sk_state != TCP_LISTEN) {
177 if (tcp_v6_bind_conflict(sk, tb))
183 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
185 if (hlist_empty(&tb->owners)) {
186 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
190 } else if (tb->fastreuse &&
191 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
195 if (!tcp_sk(sk)->bind_hash)
196 tcp_bind_hash(sk, tb, snum);
197 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
201 spin_unlock(&head->lock);
207 static __inline__ void __tcp_v6_hash(struct sock *sk)
209 struct hlist_head *list;
212 BUG_TRAP(sk_unhashed(sk));
214 if (sk->sk_state == TCP_LISTEN) {
215 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
216 lock = &tcp_lhash_lock;
219 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
220 list = &tcp_ehash[sk->sk_hashent].chain;
221 lock = &tcp_ehash[sk->sk_hashent].lock;
225 __sk_add_node(sk, list);
226 sock_prot_inc_use(sk->sk_prot);
231 static void tcp_v6_hash(struct sock *sk)
233 if (sk->sk_state != TCP_CLOSE) {
234 struct tcp_opt *tp = tcp_sk(sk);
236 if (tp->af_specific == &ipv6_mapped) {
246 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
249 struct hlist_node *node;
250 struct sock *result = NULL;
254 read_lock(&tcp_lhash_lock);
255 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
256 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
257 struct ipv6_pinfo *np = inet6_sk(sk);
260 if (!ipv6_addr_any(&np->rcv_saddr)) {
261 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
265 if (sk->sk_bound_dev_if) {
266 if (sk->sk_bound_dev_if != dif)
274 if (score > hiscore) {
282 read_unlock(&tcp_lhash_lock);
286 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
287 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
289 * The sockhash lock must be held as a reader here.
292 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
293 struct in6_addr *daddr, u16 hnum,
296 struct tcp_ehash_bucket *head;
298 struct hlist_node *node;
299 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
302 /* Optimize here for direct hit, only listening connections can
303 * have wildcards anyways.
305 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
306 head = &tcp_ehash[hash];
307 read_lock(&head->lock);
308 sk_for_each(sk, node, &head->chain) {
309 /* For IPV6 do the cheaper port and family tests first. */
310 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
311 goto hit; /* You sunk my battleship! */
313 /* Must check for a TIME_WAIT'er before going to listener hash. */
314 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
315 /* FIXME: acme: check this... */
316 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
318 if(*((__u32 *)&(tw->tw_dport)) == ports &&
319 sk->sk_family == PF_INET6) {
320 if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
321 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
322 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
326 read_unlock(&head->lock);
331 read_unlock(&head->lock);
336 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
337 struct in6_addr *daddr, u16 hnum,
342 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
347 return tcp_v6_lookup_listener(daddr, hnum, dif);
350 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
351 struct in6_addr *daddr, u16 dport,
357 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
365 * Open request hash tables.
368 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
372 a = raddr->s6_addr32[0];
373 b = raddr->s6_addr32[1];
374 c = raddr->s6_addr32[2];
376 a += JHASH_GOLDEN_RATIO;
377 b += JHASH_GOLDEN_RATIO;
379 __jhash_mix(a, b, c);
381 a += raddr->s6_addr32[3];
383 __jhash_mix(a, b, c);
385 return c & (TCP_SYNQ_HSIZE - 1);
388 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
389 struct open_request ***prevp,
391 struct in6_addr *raddr,
392 struct in6_addr *laddr,
395 struct tcp_listen_opt *lopt = tp->listen_opt;
396 struct open_request *req, **prev;
398 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
399 (req = *prev) != NULL;
400 prev = &req->dl_next) {
401 if (req->rmt_port == rport &&
402 req->class->family == AF_INET6 &&
403 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
404 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
405 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
406 BUG_TRAP(req->sk == NULL);
415 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
416 struct in6_addr *saddr,
417 struct in6_addr *daddr,
420 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
423 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
425 if (skb->protocol == htons(ETH_P_IPV6)) {
426 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
427 skb->nh.ipv6h->saddr.s6_addr32,
431 return secure_tcp_sequence_number(skb->nh.iph->daddr,
438 static int tcp_v6_check_established(struct sock *sk)
440 struct inet_opt *inet = inet_sk(sk);
441 struct ipv6_pinfo *np = inet6_sk(sk);
442 struct in6_addr *daddr = &np->rcv_saddr;
443 struct in6_addr *saddr = &np->daddr;
444 int dif = sk->sk_bound_dev_if;
445 u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
446 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
447 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
449 struct hlist_node *node;
450 struct tcp_tw_bucket *tw;
452 write_lock_bh(&head->lock);
454 /* Check TIME-WAIT sockets first. */
455 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
456 tw = (struct tcp_tw_bucket*)sk2;
458 if(*((__u32 *)&(tw->tw_dport)) == ports &&
459 sk2->sk_family == PF_INET6 &&
460 !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
461 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
462 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
463 struct tcp_opt *tp = tcp_sk(sk);
465 if (tw->tw_ts_recent_stamp) {
466 /* See comment in tcp_ipv4.c */
467 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
470 tp->ts_recent = tw->tw_ts_recent;
471 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
480 /* And established part... */
481 sk_for_each(sk2, node, &head->chain) {
482 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
487 BUG_TRAP(sk_unhashed(sk));
488 __sk_add_node(sk, &head->chain);
489 sk->sk_hashent = hash;
490 sock_prot_inc_use(sk->sk_prot);
491 write_unlock_bh(&head->lock);
494 /* Silly. Should hash-dance instead... */
496 tcp_tw_deschedule(tw);
497 NET_INC_STATS_BH(TimeWaitRecycled);
505 write_unlock_bh(&head->lock);
506 return -EADDRNOTAVAIL;
509 static int tcp_v6_hash_connect(struct sock *sk)
511 struct tcp_bind_hashbucket *head;
512 struct tcp_bind_bucket *tb;
515 if (inet_sk(sk)->num == 0) {
516 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
519 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
522 head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
525 spin_lock_bh(&head->lock);
527 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
529 spin_unlock_bh(&head->lock);
532 spin_unlock_bh(&head->lock);
533 return tcp_v6_check_established(sk);
537 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
539 return IP6CB(skb)->iif;
542 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
545 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
546 struct inet_opt *inet = inet_sk(sk);
547 struct ipv6_pinfo *np = inet6_sk(sk);
548 struct tcp_opt *tp = tcp_sk(sk);
549 struct in6_addr *saddr = NULL;
551 struct dst_entry *dst;
555 if (addr_len < SIN6_LEN_RFC2133)
558 if (usin->sin6_family != AF_INET6)
559 return(-EAFNOSUPPORT);
561 memset(&fl, 0, sizeof(fl));
564 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
565 IP6_ECN_flow_init(fl.fl6_flowlabel);
566 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
567 struct ip6_flowlabel *flowlabel;
568 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
569 if (flowlabel == NULL)
571 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
572 fl6_sock_release(flowlabel);
577 * connect() to INADDR_ANY means loopback (BSD'ism).
580 if(ipv6_addr_any(&usin->sin6_addr))
581 usin->sin6_addr.s6_addr[15] = 0x1;
583 addr_type = ipv6_addr_type(&usin->sin6_addr);
585 if(addr_type & IPV6_ADDR_MULTICAST)
588 if (addr_type&IPV6_ADDR_LINKLOCAL) {
589 if (addr_len >= sizeof(struct sockaddr_in6) &&
590 usin->sin6_scope_id) {
591 /* If interface is set while binding, indices
594 if (sk->sk_bound_dev_if &&
595 sk->sk_bound_dev_if != usin->sin6_scope_id)
598 sk->sk_bound_dev_if = usin->sin6_scope_id;
601 /* Connect to link-local address requires an interface */
602 if (!sk->sk_bound_dev_if)
606 if (tp->ts_recent_stamp &&
607 ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
609 tp->ts_recent_stamp = 0;
613 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
614 np->flow_label = fl.fl6_flowlabel;
620 if (addr_type == IPV6_ADDR_MAPPED) {
621 u32 exthdrlen = tp->ext_header_len;
622 struct sockaddr_in sin;
624 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
626 if (__ipv6_only_sock(sk))
629 sin.sin_family = AF_INET;
630 sin.sin_port = usin->sin6_port;
631 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
633 tp->af_specific = &ipv6_mapped;
634 sk->sk_backlog_rcv = tcp_v4_do_rcv;
636 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
639 tp->ext_header_len = exthdrlen;
640 tp->af_specific = &ipv6_specific;
641 sk->sk_backlog_rcv = tcp_v6_do_rcv;
644 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
646 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
653 if (!ipv6_addr_any(&np->rcv_saddr))
654 saddr = &np->rcv_saddr;
656 fl.proto = IPPROTO_TCP;
657 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
658 ipv6_addr_copy(&fl.fl6_src,
659 (saddr ? saddr : &np->saddr));
660 fl.oif = sk->sk_bound_dev_if;
661 fl.fl_ip_dport = usin->sin6_port;
662 fl.fl_ip_sport = inet->sport;
664 if (np->opt && np->opt->srcrt) {
665 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
666 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
669 err = ip6_dst_lookup(sk, &dst, &fl);
676 ipv6_addr_copy(&np->rcv_saddr, saddr);
679 /* set the source address */
680 ipv6_addr_copy(&np->saddr, saddr);
681 inet->rcv_saddr = LOOPBACK4_IPV6;
683 ip6_dst_store(sk, dst, NULL);
684 sk->sk_route_caps = dst->dev->features &
685 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
687 tp->ext_header_len = 0;
689 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
690 tp->ext2_header_len = dst->header_len;
692 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
694 inet->dport = usin->sin6_port;
696 tcp_set_state(sk, TCP_SYN_SENT);
697 err = tcp_v6_hash_connect(sk);
702 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
707 err = tcp_connect(sk);
714 tcp_set_state(sk, TCP_CLOSE);
718 sk->sk_route_caps = 0;
722 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
723 int type, int code, int offset, __u32 info)
725 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
726 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
727 struct ipv6_pinfo *np;
733 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
736 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), Icmp6InErrors);
740 if (sk->sk_state == TCP_TIME_WAIT) {
741 tcp_tw_put((struct tcp_tw_bucket*)sk);
746 if (sock_owned_by_user(sk))
747 NET_INC_STATS_BH(LockDroppedIcmps);
749 if (sk->sk_state == TCP_CLOSE)
753 seq = ntohl(th->seq);
754 if (sk->sk_state != TCP_LISTEN &&
755 !between(seq, tp->snd_una, tp->snd_nxt)) {
756 NET_INC_STATS_BH(OutOfWindowIcmps);
762 if (type == ICMPV6_PKT_TOOBIG) {
763 struct dst_entry *dst = NULL;
765 if (sock_owned_by_user(sk))
767 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
770 /* icmp should have updated the destination cache entry */
771 dst = __sk_dst_check(sk, np->dst_cookie);
774 struct inet_opt *inet = inet_sk(sk);
777 /* BUGGG_FUTURE: Again, it is not clear how
778 to handle rthdr case. Ignore this complexity
781 memset(&fl, 0, sizeof(fl));
782 fl.proto = IPPROTO_TCP;
783 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
784 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
785 fl.oif = sk->sk_bound_dev_if;
786 fl.fl_ip_dport = inet->dport;
787 fl.fl_ip_sport = inet->sport;
789 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
790 sk->sk_err_soft = -err;
796 if (tp->pmtu_cookie > dst_pmtu(dst)) {
797 tcp_sync_mss(sk, dst_pmtu(dst));
798 tcp_simple_retransmit(sk);
799 } /* else let the usual retransmit timer handle it */
804 icmpv6_err_convert(type, code, &err);
806 /* Might be for an open_request */
807 switch (sk->sk_state) {
808 struct open_request *req, **prev;
810 if (sock_owned_by_user(sk))
813 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
814 &hdr->saddr, tcp_v6_iif(skb));
818 /* ICMPs are not backlogged, hence we cannot get
819 * an established socket here.
821 BUG_TRAP(req->sk == NULL);
823 if (seq != req->snt_isn) {
824 NET_INC_STATS_BH(OutOfWindowIcmps);
828 tcp_synq_drop(sk, req, prev);
832 case TCP_SYN_RECV: /* Cannot happen.
833 It can, it SYNs are crossed. --ANK */
834 if (!sock_owned_by_user(sk)) {
835 TCP_INC_STATS_BH(TcpAttemptFails);
837 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
841 sk->sk_err_soft = err;
845 if (!sock_owned_by_user(sk) && np->recverr) {
847 sk->sk_error_report(sk);
849 sk->sk_err_soft = err;
857 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
858 struct dst_entry *dst)
860 struct ipv6_pinfo *np = inet6_sk(sk);
861 struct sk_buff * skb;
862 struct ipv6_txoptions *opt = NULL;
866 memset(&fl, 0, sizeof(fl));
867 fl.proto = IPPROTO_TCP;
868 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
869 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
870 fl.fl6_flowlabel = 0;
871 fl.oif = req->af.v6_req.iif;
872 fl.fl_ip_dport = req->rmt_port;
873 fl.fl_ip_sport = inet_sk(sk)->sport;
878 np->rxopt.bits.srcrt == 2 &&
879 req->af.v6_req.pktopts) {
880 struct sk_buff *pktopts = req->af.v6_req.pktopts;
881 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
883 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
886 if (opt && opt->srcrt) {
887 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
888 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
891 err = ip6_dst_lookup(sk, &dst, &fl);
896 skb = tcp_make_synack(sk, dst, req);
898 struct tcphdr *th = skb->h.th;
900 th->check = tcp_v6_check(th, skb->len,
901 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
902 csum_partial((char *)th, skb->len, skb->csum));
904 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
905 err = ip6_xmit(sk, skb, &fl, opt, 0);
906 if (err == NET_XMIT_CN)
912 if (opt && opt != np->opt)
913 sock_kfree_s(sk, opt, opt->tot_len);
917 static void tcp_v6_or_free(struct open_request *req)
919 if (req->af.v6_req.pktopts)
920 kfree_skb(req->af.v6_req.pktopts);
923 static struct or_calltable or_ipv6 = {
925 .rtx_syn_ack = tcp_v6_send_synack,
926 .send_ack = tcp_v6_or_send_ack,
927 .destructor = tcp_v6_or_free,
928 .send_reset = tcp_v6_send_reset
931 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
933 struct ipv6_pinfo *np = inet6_sk(sk);
934 struct inet6_skb_parm *opt = IP6CB(skb);
937 if ((opt->hop && np->rxopt.bits.hopopts) ||
938 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
939 np->rxopt.bits.rxflow) ||
940 (opt->srcrt && np->rxopt.bits.srcrt) ||
941 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
948 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
951 struct ipv6_pinfo *np = inet6_sk(sk);
953 if (skb->ip_summed == CHECKSUM_HW) {
954 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
955 skb->csum = offsetof(struct tcphdr, check);
957 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
958 csum_partial((char *)th, th->doff<<2,
964 static void tcp_v6_send_reset(struct sk_buff *skb)
966 struct tcphdr *th = skb->h.th, *t1;
967 struct sk_buff *buff;
973 if (!ipv6_unicast_destination(skb))
977 * We need to grab some memory, and put together an RST,
978 * and then put it into the queue to be sent.
981 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
985 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
987 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
989 /* Swap the send and the receive. */
990 memset(t1, 0, sizeof(*t1));
991 t1->dest = th->source;
992 t1->source = th->dest;
993 t1->doff = sizeof(*t1)/4;
997 t1->seq = th->ack_seq;
1000 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1001 + skb->len - (th->doff<<2));
1004 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1006 memset(&fl, 0, sizeof(fl));
1007 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1008 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1010 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1011 sizeof(*t1), IPPROTO_TCP,
1014 fl.proto = IPPROTO_TCP;
1015 fl.oif = tcp_v6_iif(skb);
1016 fl.fl_ip_dport = t1->dest;
1017 fl.fl_ip_sport = t1->source;
1019 /* sk = NULL, but it is safe for now. RST socket required. */
1020 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1021 ip6_xmit(NULL, buff, &fl, NULL, 0);
1022 TCP_INC_STATS_BH(TcpOutSegs);
1023 TCP_INC_STATS_BH(TcpOutRsts);
1030 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1032 struct tcphdr *th = skb->h.th, *t1;
1033 struct sk_buff *buff;
1035 int tot_len = sizeof(struct tcphdr);
1037 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
1041 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
1046 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1048 /* Swap the send and the receive. */
1049 memset(t1, 0, sizeof(*t1));
1050 t1->dest = th->source;
1051 t1->source = th->dest;
1052 t1->doff = tot_len/4;
1053 t1->seq = htonl(seq);
1054 t1->ack_seq = htonl(ack);
1056 t1->window = htons(win);
1059 u32 *ptr = (u32*)(t1 + 1);
1060 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1061 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1062 *ptr++ = htonl(tcp_time_stamp);
1066 buff->csum = csum_partial((char *)t1, tot_len, 0);
1068 memset(&fl, 0, sizeof(fl));
1069 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1070 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1072 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1073 tot_len, IPPROTO_TCP,
1076 fl.proto = IPPROTO_TCP;
1077 fl.oif = tcp_v6_iif(skb);
1078 fl.fl_ip_dport = t1->dest;
1079 fl.fl_ip_sport = t1->source;
1081 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1082 ip6_xmit(NULL, buff, &fl, NULL, 0);
1083 TCP_INC_STATS_BH(TcpOutSegs);
1090 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1092 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1094 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1095 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1100 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1102 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1106 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1108 struct open_request *req, **prev;
1109 struct tcphdr *th = skb->h.th;
1110 struct tcp_opt *tp = tcp_sk(sk);
1113 /* Find possible connection requests. */
1114 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1115 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1117 return tcp_check_req(sk, skb, req, prev);
1119 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1121 &skb->nh.ipv6h->daddr,
1126 if (nsk->sk_state != TCP_TIME_WAIT) {
1130 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1134 #if 0 /*def CONFIG_SYN_COOKIES*/
1135 if (!th->rst && !th->syn && th->ack)
1136 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1141 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1143 struct tcp_opt *tp = tcp_sk(sk);
1144 struct tcp_listen_opt *lopt = tp->listen_opt;
1145 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1148 req->expires = jiffies + TCP_TIMEOUT_INIT;
1150 req->dl_next = lopt->syn_table[h];
1152 write_lock(&tp->syn_wait_lock);
1153 lopt->syn_table[h] = req;
1154 write_unlock(&tp->syn_wait_lock);
1160 /* FIXME: this is substantially similar to the ipv4 code.
1161 * Can some kind of merge be done? -- erics
1163 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1165 struct ipv6_pinfo *np = inet6_sk(sk);
1166 struct tcp_opt tmptp, *tp = tcp_sk(sk);
1167 struct open_request *req = NULL;
1168 __u32 isn = TCP_SKB_CB(skb)->when;
1170 if (skb->protocol == htons(ETH_P_IP))
1171 return tcp_v4_conn_request(sk, skb);
1173 if (!ipv6_unicast_destination(skb))
1177 * There are no SYN attacks on IPv6, yet...
1179 if (tcp_synq_is_full(sk) && !isn) {
1180 if (net_ratelimit())
1181 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1185 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1188 req = tcp_openreq_alloc();
1192 tcp_clear_options(&tmptp);
1193 tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1194 tmptp.user_mss = tp->user_mss;
1196 tcp_parse_options(skb, &tmptp, 0);
1198 tmptp.tstamp_ok = tmptp.saw_tstamp;
1199 tcp_openreq_init(req, &tmptp, skb);
1201 req->class = &or_ipv6;
1202 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1203 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1204 TCP_ECN_create_request(req, skb->h.th);
1205 req->af.v6_req.pktopts = NULL;
1206 if (ipv6_opt_accepted(sk, skb) ||
1207 np->rxopt.bits.rxinfo ||
1208 np->rxopt.bits.rxhlim) {
1209 atomic_inc(&skb->users);
1210 req->af.v6_req.pktopts = skb;
1212 req->af.v6_req.iif = sk->sk_bound_dev_if;
1214 /* So that link locals have meaning */
1215 if (!sk->sk_bound_dev_if &&
1216 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1217 req->af.v6_req.iif = tcp_v6_iif(skb);
1220 isn = tcp_v6_init_sequence(sk,skb);
1224 if (tcp_v6_send_synack(sk, req, NULL))
1227 tcp_v6_synq_add(sk, req);
1233 tcp_openreq_free(req);
1235 TCP_INC_STATS_BH(TcpAttemptFails);
1236 return 0; /* don't send reset */
1239 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1240 struct open_request *req,
1241 struct dst_entry *dst)
1243 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1244 struct tcp6_sock *newtcp6sk;
1245 struct inet_opt *newinet;
1246 struct tcp_opt *newtp;
1248 struct ipv6_txoptions *opt;
1250 if (skb->protocol == htons(ETH_P_IP)) {
1255 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1260 newtcp6sk = (struct tcp6_sock *)newsk;
1261 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1263 newinet = inet_sk(newsk);
1264 newnp = inet6_sk(newsk);
1265 newtp = tcp_sk(newsk);
1267 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1269 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1272 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1275 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1277 newtp->af_specific = &ipv6_mapped;
1278 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1279 newnp->pktoptions = NULL;
1281 newnp->mcast_oif = tcp_v6_iif(skb);
1282 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1284 /* Charge newly allocated IPv6 socket. Though it is mapped,
1287 #ifdef INET_REFCNT_DEBUG
1288 atomic_inc(&inet6_sock_nr);
1291 /* It is tricky place. Until this moment IPv4 tcp
1292 worked with IPv6 af_tcp.af_specific.
1295 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1302 if (sk_acceptq_is_full(sk))
1305 if (np->rxopt.bits.srcrt == 2 &&
1306 opt == NULL && req->af.v6_req.pktopts) {
1307 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1309 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1315 memset(&fl, 0, sizeof(fl));
1316 fl.proto = IPPROTO_TCP;
1317 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1318 if (opt && opt->srcrt) {
1319 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1320 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1322 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1323 fl.oif = sk->sk_bound_dev_if;
1324 fl.fl_ip_dport = req->rmt_port;
1325 fl.fl_ip_sport = inet_sk(sk)->sport;
1327 if (ip6_dst_lookup(sk, &dst, &fl))
1331 newsk = tcp_create_openreq_child(sk, req, skb);
1335 /* Charge newly allocated IPv6 socket */
1336 #ifdef INET_REFCNT_DEBUG
1337 atomic_inc(&inet6_sock_nr);
1340 ip6_dst_store(newsk, dst, NULL);
1341 newsk->sk_route_caps = dst->dev->features &
1342 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1344 newtcp6sk = (struct tcp6_sock *)newsk;
1345 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1347 newtp = tcp_sk(newsk);
1348 newinet = inet_sk(newsk);
1349 newnp = inet6_sk(newsk);
1351 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1353 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1354 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1355 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1356 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1358 /* Now IPv6 options...
1360 First: no IPv4 options.
1362 newinet->opt = NULL;
1365 newnp->rxopt.all = np->rxopt.all;
1367 /* Clone pktoptions received with SYN */
1368 newnp->pktoptions = NULL;
1369 if (req->af.v6_req.pktopts) {
1370 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1372 kfree_skb(req->af.v6_req.pktopts);
1373 req->af.v6_req.pktopts = NULL;
1374 if (newnp->pktoptions)
1375 skb_set_owner_r(newnp->pktoptions, newsk);
1378 newnp->mcast_oif = tcp_v6_iif(skb);
1379 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1381 /* Clone native IPv6 options from listening socket (if any)
1383 Yes, keeping reference count would be much more clever,
1384 but we make one more one thing there: reattach optmem
1388 newnp->opt = ipv6_dup_options(newsk, opt);
1390 sock_kfree_s(sk, opt, opt->tot_len);
1393 newtp->ext_header_len = 0;
1395 newtp->ext_header_len = newnp->opt->opt_nflen +
1396 newnp->opt->opt_flen;
1397 newtp->ext2_header_len = dst->header_len;
1399 tcp_sync_mss(newsk, dst_pmtu(dst));
1400 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1401 tcp_initialize_rcv_mss(newsk);
1403 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1405 __tcp_v6_hash(newsk);
1406 tcp_inherit_port(sk, newsk);
1411 NET_INC_STATS_BH(ListenOverflows);
1413 NET_INC_STATS_BH(ListenDrops);
1414 if (opt && opt != np->opt)
1415 sock_kfree_s(sk, opt, opt->tot_len);
1420 static int tcp_v6_checksum_init(struct sk_buff *skb)
1422 if (skb->ip_summed == CHECKSUM_HW) {
1423 skb->ip_summed = CHECKSUM_UNNECESSARY;
1424 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1425 &skb->nh.ipv6h->daddr,skb->csum))
1427 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1429 if (skb->len <= 76) {
1430 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1431 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1433 skb->ip_summed = CHECKSUM_UNNECESSARY;
1435 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1436 &skb->nh.ipv6h->daddr,0);
1441 /* The socket must have it's spinlock held when we get
1444 * We have a potential double-lock case here, so even when
1445 * doing backlog processing we use the BH locking scheme.
1446 * This is because we cannot sleep with the original spinlock
1449 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1451 struct ipv6_pinfo *np = inet6_sk(sk);
1453 struct sk_buff *opt_skb = NULL;
1455 /* Imagine: socket is IPv6. IPv4 packet arrives,
1456 goes to IPv4 receive handler and backlogged.
1457 From backlog it always goes here. Kerboom...
1458 Fortunately, tcp_rcv_established and rcv_established
1459 handle them correctly, but it is not case with
1460 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1463 if (skb->protocol == htons(ETH_P_IP))
1464 return tcp_v4_do_rcv(sk, skb);
1466 if (sk_filter(sk, skb, 0))
1470 * socket locking is here for SMP purposes as backlog rcv
1471 * is currently called with bh processing disabled.
1474 /* Do Stevens' IPV6_PKTOPTIONS.
1476 Yes, guys, it is the only place in our code, where we
1477 may make it not affecting IPv4.
1478 The rest of code is protocol independent,
1479 and I do not like idea to uglify IPv4.
1481 Actually, all the idea behind IPV6_PKTOPTIONS
1482 looks not very well thought. For now we latch
1483 options, received in the last packet, enqueued
1484 by tcp. Feel free to propose better solution.
1488 opt_skb = skb_clone(skb, GFP_ATOMIC);
1490 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1491 TCP_CHECK_TIMER(sk);
1492 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1494 TCP_CHECK_TIMER(sk);
1496 goto ipv6_pktoptions;
1500 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1503 if (sk->sk_state == TCP_LISTEN) {
1504 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1509 * Queue it on the new socket if the new socket is active,
1510 * otherwise we just shortcircuit this and continue with
1514 if (tcp_child_process(sk, nsk, skb))
1517 __kfree_skb(opt_skb);
1522 TCP_CHECK_TIMER(sk);
1523 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1525 TCP_CHECK_TIMER(sk);
1527 goto ipv6_pktoptions;
1531 tcp_v6_send_reset(skb);
1534 __kfree_skb(opt_skb);
1538 TCP_INC_STATS_BH(TcpInErrs);
1543 /* Do you ask, what is it?
1545 1. skb was enqueued by tcp.
1546 2. skb is added to tail of read queue, rather than out of order.
1547 3. socket is not in passive state.
1548 4. Finally, it really contains options, which user wants to receive.
1551 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1552 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1553 if (np->rxopt.bits.rxinfo)
1554 np->mcast_oif = tcp_v6_iif(opt_skb);
1555 if (np->rxopt.bits.rxhlim)
1556 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1557 if (ipv6_opt_accepted(sk, opt_skb)) {
1558 skb_set_owner_r(opt_skb, sk);
1559 opt_skb = xchg(&np->pktoptions, opt_skb);
1561 __kfree_skb(opt_skb);
1562 opt_skb = xchg(&np->pktoptions, NULL);
1571 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1573 struct sk_buff *skb = *pskb;
1578 if (skb->pkt_type != PACKET_HOST)
1582 * Count it even if it's bad.
1584 TCP_INC_STATS_BH(TcpInSegs);
1586 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1591 if (th->doff < sizeof(struct tcphdr)/4)
1593 if (!pskb_may_pull(skb, th->doff*4))
1596 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1597 tcp_v6_checksum_init(skb) < 0))
1601 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1602 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1603 skb->len - th->doff*4);
1604 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1605 TCP_SKB_CB(skb)->when = 0;
1606 TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
1607 TCP_SKB_CB(skb)->sacked = 0;
1609 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1610 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1616 if (sk->sk_state == TCP_TIME_WAIT)
1619 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1620 goto discard_and_relse;
1622 if (sk_filter(sk, skb, 0))
1623 goto discard_and_relse;
1629 if (!sock_owned_by_user(sk)) {
1630 if (!tcp_prequeue(sk, skb))
1631 ret = tcp_v6_do_rcv(sk, skb);
1633 sk_add_backlog(sk, skb);
1637 return ret ? -1 : 0;
1640 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1643 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1645 TCP_INC_STATS_BH(TcpInErrs);
1647 tcp_v6_send_reset(skb);
1664 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1665 tcp_tw_put((struct tcp_tw_bucket *) sk);
1669 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1670 TCP_INC_STATS_BH(TcpInErrs);
1671 tcp_tw_put((struct tcp_tw_bucket *) sk);
1675 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1676 skb, th, skb->len)) {
1681 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1683 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1684 tcp_tw_put((struct tcp_tw_bucket *)sk);
1688 /* Fall through to ACK */
1691 tcp_v6_timewait_ack(sk, skb);
1695 case TCP_TW_SUCCESS:;
1700 static int tcp_v6_rebuild_header(struct sock *sk)
1703 struct dst_entry *dst;
1704 struct ipv6_pinfo *np = inet6_sk(sk);
1706 dst = __sk_dst_check(sk, np->dst_cookie);
1709 struct inet_opt *inet = inet_sk(sk);
1712 memset(&fl, 0, sizeof(fl));
1713 fl.proto = IPPROTO_TCP;
1714 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1715 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1716 fl.fl6_flowlabel = np->flow_label;
1717 fl.oif = sk->sk_bound_dev_if;
1718 fl.fl_ip_dport = inet->dport;
1719 fl.fl_ip_sport = inet->sport;
1721 if (np->opt && np->opt->srcrt) {
1722 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1723 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1726 err = ip6_dst_lookup(sk, &dst, &fl);
1729 sk->sk_route_caps = 0;
1733 ip6_dst_store(sk, dst, NULL);
1734 sk->sk_route_caps = dst->dev->features &
1735 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1736 tcp_sk(sk)->ext2_header_len = dst->header_len;
1742 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1744 struct sock *sk = skb->sk;
1745 struct inet_opt *inet = inet_sk(sk);
1746 struct ipv6_pinfo *np = inet6_sk(sk);
1748 struct dst_entry *dst;
1750 memset(&fl, 0, sizeof(fl));
1751 fl.proto = IPPROTO_TCP;
1752 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1753 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1754 fl.fl6_flowlabel = np->flow_label;
1755 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1756 fl.oif = sk->sk_bound_dev_if;
1757 fl.fl_ip_sport = inet->sport;
1758 fl.fl_ip_dport = inet->dport;
1760 if (np->opt && np->opt->srcrt) {
1761 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1762 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1765 dst = __sk_dst_check(sk, np->dst_cookie);
1768 int err = ip6_dst_lookup(sk, &dst, &fl);
1771 sk->sk_err_soft = -err;
1775 ip6_dst_store(sk, dst, NULL);
1776 sk->sk_route_caps = dst->dev->features &
1777 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1778 tcp_sk(sk)->ext2_header_len = dst->header_len;
1781 skb->dst = dst_clone(dst);
1783 /* Restore final destination back after routing done */
1784 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1786 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1789 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1791 struct ipv6_pinfo *np = inet6_sk(sk);
1792 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1794 sin6->sin6_family = AF_INET6;
1795 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1796 sin6->sin6_port = inet_sk(sk)->dport;
1797 /* We do not store received flowlabel for TCP */
1798 sin6->sin6_flowinfo = 0;
1799 sin6->sin6_scope_id = 0;
1800 if (sk->sk_bound_dev_if &&
1801 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1802 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1805 static int tcp_v6_remember_stamp(struct sock *sk)
1807 /* Alas, not yet... */
1811 static struct tcp_func ipv6_specific = {
1812 .queue_xmit = tcp_v6_xmit,
1813 .send_check = tcp_v6_send_check,
1814 .rebuild_header = tcp_v6_rebuild_header,
1815 .conn_request = tcp_v6_conn_request,
1816 .syn_recv_sock = tcp_v6_syn_recv_sock,
1817 .remember_stamp = tcp_v6_remember_stamp,
1818 .net_header_len = sizeof(struct ipv6hdr),
1820 .setsockopt = ipv6_setsockopt,
1821 .getsockopt = ipv6_getsockopt,
1822 .addr2sockaddr = v6_addr2sockaddr,
1823 .sockaddr_len = sizeof(struct sockaddr_in6)
1827 * TCP over IPv4 via INET6 API
1830 static struct tcp_func ipv6_mapped = {
1831 .queue_xmit = ip_queue_xmit,
1832 .send_check = tcp_v4_send_check,
1833 .rebuild_header = tcp_v4_rebuild_header,
1834 .conn_request = tcp_v6_conn_request,
1835 .syn_recv_sock = tcp_v6_syn_recv_sock,
1836 .remember_stamp = tcp_v4_remember_stamp,
1837 .net_header_len = sizeof(struct iphdr),
1839 .setsockopt = ipv6_setsockopt,
1840 .getsockopt = ipv6_getsockopt,
1841 .addr2sockaddr = v6_addr2sockaddr,
1842 .sockaddr_len = sizeof(struct sockaddr_in6)
1847 /* NOTE: A lot of things set to zero explicitly by call to
1848 * sk_alloc() so need not be done here.
1850 static int tcp_v6_init_sock(struct sock *sk)
1852 struct tcp_opt *tp = tcp_sk(sk);
1854 skb_queue_head_init(&tp->out_of_order_queue);
1855 tcp_init_xmit_timers(sk);
1856 tcp_prequeue_init(tp);
1858 tp->rto = TCP_TIMEOUT_INIT;
1859 tp->mdev = TCP_TIMEOUT_INIT;
1861 /* So many TCP implementations out there (incorrectly) count the
1862 * initial SYN frame in their delayed-ACK and congestion control
1863 * algorithms that we must have the following bandaid to talk
1864 * efficiently to them. -DaveM
1868 /* See draft-stevens-tcpca-spec-01 for discussion of the
1869 * initialization of these values.
1871 tp->snd_ssthresh = 0x7fffffff;
1872 tp->snd_cwnd_clamp = ~0;
1873 tp->mss_cache = 536;
1875 tp->reordering = sysctl_tcp_reordering;
1877 sk->sk_state = TCP_CLOSE;
1879 tp->af_specific = &ipv6_specific;
1881 sk->sk_write_space = tcp_write_space;
1882 sk->sk_use_write_queue = 1;
1884 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1885 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1887 atomic_inc(&tcp_sockets_allocated);
1892 static int tcp_v6_destroy_sock(struct sock *sk)
1894 struct tcp_opt *tp = tcp_sk(sk);
1895 struct inet_opt *inet = inet_sk(sk);
1897 tcp_clear_xmit_timers(sk);
1899 /* Cleanup up the write buffer. */
1900 tcp_writequeue_purge(sk);
1902 /* Cleans up our, hopefully empty, out_of_order_queue. */
1903 __skb_queue_purge(&tp->out_of_order_queue);
1905 /* Clean prequeue, it must be empty really */
1906 __skb_queue_purge(&tp->ucopy.prequeue);
1908 /* Clean up a referenced TCP bind bucket. */
1909 if (tcp_sk(sk)->bind_hash)
1912 /* If sendmsg cached page exists, toss it. */
1913 if (inet->sndmsg_page != NULL)
1914 __free_page(inet->sndmsg_page);
1916 atomic_dec(&tcp_sockets_allocated);
1918 return inet6_destroy_sock(sk);
1921 /* Proc filesystem TCPv6 sock list dumping. */
1922 static void get_openreq6(struct seq_file *seq,
1923 struct sock *sk, struct open_request *req, int i, int uid)
1925 struct in6_addr *dest, *src;
1926 int ttd = req->expires - jiffies;
1931 src = &req->af.v6_req.loc_addr;
1932 dest = &req->af.v6_req.rmt_addr;
1934 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1935 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1937 src->s6_addr32[0], src->s6_addr32[1],
1938 src->s6_addr32[2], src->s6_addr32[3],
1939 ntohs(inet_sk(sk)->sport),
1940 dest->s6_addr32[0], dest->s6_addr32[1],
1941 dest->s6_addr32[2], dest->s6_addr32[3],
1942 ntohs(req->rmt_port),
1944 0,0, /* could print option size, but that is af dependent. */
1945 1, /* timers active (only the expire timer) */
1946 jiffies_to_clock_t(ttd),
1949 0, /* non standard timer */
1950 0, /* open_requests have no inode */
1954 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1956 struct in6_addr *dest, *src;
1959 unsigned long timer_expires;
1960 struct inet_opt *inet = inet_sk(sp);
1961 struct tcp_opt *tp = tcp_sk(sp);
1962 struct ipv6_pinfo *np = inet6_sk(sp);
1965 src = &np->rcv_saddr;
1966 destp = ntohs(inet->dport);
1967 srcp = ntohs(inet->sport);
1968 if (tp->pending == TCP_TIME_RETRANS) {
1970 timer_expires = tp->timeout;
1971 } else if (tp->pending == TCP_TIME_PROBE0) {
1973 timer_expires = tp->timeout;
1974 } else if (timer_pending(&sp->sk_timer)) {
1976 timer_expires = sp->sk_timer.expires;
1979 timer_expires = jiffies;
1983 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1984 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1986 src->s6_addr32[0], src->s6_addr32[1],
1987 src->s6_addr32[2], src->s6_addr32[3], srcp,
1988 dest->s6_addr32[0], dest->s6_addr32[1],
1989 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1991 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1993 jiffies_to_clock_t(timer_expires - jiffies),
1998 atomic_read(&sp->sk_refcnt), sp,
1999 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2000 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2004 static void get_timewait6_sock(struct seq_file *seq,
2005 struct tcp_tw_bucket *tw, int i)
2007 struct in6_addr *dest, *src;
2009 int ttd = tw->tw_ttd - jiffies;
2014 dest = &tw->tw_v6_daddr;
2015 src = &tw->tw_v6_rcv_saddr;
2016 destp = ntohs(tw->tw_dport);
2017 srcp = ntohs(tw->tw_sport);
2020 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2021 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2023 src->s6_addr32[0], src->s6_addr32[1],
2024 src->s6_addr32[2], src->s6_addr32[3], srcp,
2025 dest->s6_addr32[0], dest->s6_addr32[1],
2026 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2027 tw->tw_substate, 0, 0,
2028 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2029 atomic_read(&tw->tw_refcnt), tw);
2032 #ifdef CONFIG_PROC_FS
2033 static int tcp6_seq_show(struct seq_file *seq, void *v)
2035 struct tcp_iter_state *st;
2037 if (v == SEQ_START_TOKEN) {
2042 "st tx_queue rx_queue tr tm->when retrnsmt"
2043 " uid timeout inode\n");
2048 switch (st->state) {
2049 case TCP_SEQ_STATE_LISTENING:
2050 case TCP_SEQ_STATE_ESTABLISHED:
2051 get_tcp6_sock(seq, v, st->num);
2053 case TCP_SEQ_STATE_OPENREQ:
2054 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2056 case TCP_SEQ_STATE_TIME_WAIT:
2057 get_timewait6_sock(seq, v, st->num);
2064 static struct file_operations tcp6_seq_fops;
2065 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2066 .owner = THIS_MODULE,
2069 .seq_show = tcp6_seq_show,
2070 .seq_fops = &tcp6_seq_fops,
2073 int __init tcp6_proc_init(void)
2075 return tcp_proc_register(&tcp6_seq_afinfo);
2078 void tcp6_proc_exit(void)
2080 tcp_proc_unregister(&tcp6_seq_afinfo);
2084 struct proto tcpv6_prot = {
2087 .connect = tcp_v6_connect,
2088 .disconnect = tcp_disconnect,
2089 .accept = tcp_accept,
2091 .init = tcp_v6_init_sock,
2092 .destroy = tcp_v6_destroy_sock,
2093 .shutdown = tcp_shutdown,
2094 .setsockopt = tcp_setsockopt,
2095 .getsockopt = tcp_getsockopt,
2096 .sendmsg = tcp_sendmsg,
2097 .recvmsg = tcp_recvmsg,
2098 .backlog_rcv = tcp_v6_do_rcv,
2099 .hash = tcp_v6_hash,
2100 .unhash = tcp_unhash,
2101 .get_port = tcp_v6_get_port,
2104 static struct inet6_protocol tcpv6_protocol = {
2105 .handler = tcp_v6_rcv,
2106 .err_handler = tcp_v6_err,
2107 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2110 extern struct proto_ops inet6_stream_ops;
2112 static struct inet_protosw tcpv6_protosw = {
2113 .type = SOCK_STREAM,
2114 .protocol = IPPROTO_TCP,
2115 .prot = &tcpv6_prot,
2116 .ops = &inet6_stream_ops,
2119 .flags = INET_PROTOSW_PERMANENT,
2122 void __init tcpv6_init(void)
2124 /* register inet6 protocol */
2125 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2126 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2127 inet6_register_protosw(&tcpv6_protosw);