3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/inet_ecn.h>
55 #include <net/protocol.h>
58 #include <asm/uaccess.h>
60 #include <linux/proc_fs.h>
61 #include <linux/seq_file.h>
63 static void tcp_v6_send_reset(struct sk_buff *skb);
64 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
65 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
68 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
69 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
71 static struct tcp_func ipv6_mapped;
72 static struct tcp_func ipv6_specific;
74 /* I have no idea if this is a good hash for v6 or not. -DaveM */
75 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
76 struct in6_addr *faddr, u16 fport)
78 int hashent = (lport ^ fport);
80 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
81 hashent ^= hashent>>16;
82 hashent ^= hashent>>8;
83 return (hashent & (tcp_ehash_size - 1));
86 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
88 struct inet_opt *inet = inet_sk(sk);
89 struct ipv6_pinfo *np = inet6_sk(sk);
90 struct in6_addr *laddr = &np->rcv_saddr;
91 struct in6_addr *faddr = &np->daddr;
92 __u16 lport = inet->num;
93 __u16 fport = inet->dport;
94 return tcp_v6_hashfn(laddr, lport, faddr, fport);
97 static inline int tcp_v6_bind_conflict(struct sock *sk,
98 struct tcp_bind_bucket *tb)
101 struct hlist_node *node;
103 /* We must walk the whole port owner list in this case. -DaveM */
104 sk_for_each_bound(sk2, node, &tb->owners) {
106 (!sk->sk_bound_dev_if ||
107 !sk2->sk_bound_dev_if ||
108 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
109 (!sk->sk_reuse || !sk2->sk_reuse ||
110 sk2->sk_state == TCP_LISTEN) &&
111 ipv6_rcv_saddr_equal(sk, sk2))
118 /* Grrr, addr_type already calculated by caller, but I don't want
119 * to add some silly "cookie" argument to this method just for that.
120 * But it doesn't matter, the recalculation is in the rarest path
121 * this function ever takes.
123 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
125 struct tcp_bind_hashbucket *head;
126 struct tcp_bind_bucket *tb;
127 struct hlist_node *node;
132 int low = sysctl_local_port_range[0];
133 int high = sysctl_local_port_range[1];
134 int remaining = (high - low) + 1;
137 spin_lock(&tcp_portalloc_lock);
138 rover = tcp_port_rover;
140 if ((rover < low) || (rover > high))
142 head = &tcp_bhash[tcp_bhashfn(rover)];
143 spin_lock(&head->lock);
144 tb_for_each(tb, node, &head->chain)
145 if (tb->port == rover)
149 spin_unlock(&head->lock);
150 } while (--remaining > 0);
151 tcp_port_rover = rover;
152 spin_unlock(&tcp_portalloc_lock);
154 /* Exhausted local port range during search? */
159 /* OK, here is the one we will use. */
162 head = &tcp_bhash[tcp_bhashfn(snum)];
163 spin_lock(&head->lock);
164 tb_for_each(tb, node, &head->chain)
165 if (tb->port == snum)
171 if (tb && !hlist_empty(&tb->owners)) {
172 if (tb->fastreuse > 0 && sk->sk_reuse &&
173 sk->sk_state != TCP_LISTEN) {
177 if (tcp_v6_bind_conflict(sk, tb))
183 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
185 if (hlist_empty(&tb->owners)) {
186 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
190 } else if (tb->fastreuse &&
191 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
195 if (!tcp_sk(sk)->bind_hash)
196 tcp_bind_hash(sk, tb, snum);
197 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
201 spin_unlock(&head->lock);
207 static __inline__ void __tcp_v6_hash(struct sock *sk)
209 struct hlist_head *list;
212 BUG_TRAP(sk_unhashed(sk));
214 if (sk->sk_state == TCP_LISTEN) {
215 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
216 lock = &tcp_lhash_lock;
219 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
220 list = &tcp_ehash[sk->sk_hashent].chain;
221 lock = &tcp_ehash[sk->sk_hashent].lock;
225 __sk_add_node(sk, list);
226 sock_prot_inc_use(sk->sk_prot);
231 static void tcp_v6_hash(struct sock *sk)
233 if (sk->sk_state != TCP_CLOSE) {
234 struct tcp_opt *tp = tcp_sk(sk);
236 if (tp->af_specific == &ipv6_mapped) {
246 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
249 struct hlist_node *node;
250 struct sock *result = NULL;
254 read_lock(&tcp_lhash_lock);
255 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
256 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
257 struct ipv6_pinfo *np = inet6_sk(sk);
260 if (!ipv6_addr_any(&np->rcv_saddr)) {
261 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
265 if (sk->sk_bound_dev_if) {
266 if (sk->sk_bound_dev_if != dif)
274 if (score > hiscore) {
282 read_unlock(&tcp_lhash_lock);
286 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
287 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
289 * The sockhash lock must be held as a reader here.
292 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
293 struct in6_addr *daddr, u16 hnum,
296 struct tcp_ehash_bucket *head;
298 struct hlist_node *node;
299 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
302 /* Optimize here for direct hit, only listening connections can
303 * have wildcards anyways.
305 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
306 head = &tcp_ehash[hash];
307 read_lock(&head->lock);
308 sk_for_each(sk, node, &head->chain) {
309 /* For IPV6 do the cheaper port and family tests first. */
310 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
311 goto hit; /* You sunk my battleship! */
313 /* Must check for a TIME_WAIT'er before going to listener hash. */
314 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
315 /* FIXME: acme: check this... */
316 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
318 if(*((__u32 *)&(tw->tw_dport)) == ports &&
319 sk->sk_family == PF_INET6) {
320 if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
321 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
322 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
326 read_unlock(&head->lock);
331 read_unlock(&head->lock);
336 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
337 struct in6_addr *daddr, u16 hnum,
342 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
347 return tcp_v6_lookup_listener(daddr, hnum, dif);
350 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
351 struct in6_addr *daddr, u16 dport,
357 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
365 * Open request hash tables.
368 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
372 a = raddr->s6_addr32[0];
373 b = raddr->s6_addr32[1];
374 c = raddr->s6_addr32[2];
376 a += JHASH_GOLDEN_RATIO;
377 b += JHASH_GOLDEN_RATIO;
379 __jhash_mix(a, b, c);
381 a += raddr->s6_addr32[3];
383 __jhash_mix(a, b, c);
385 return c & (TCP_SYNQ_HSIZE - 1);
388 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
389 struct open_request ***prevp,
391 struct in6_addr *raddr,
392 struct in6_addr *laddr,
395 struct tcp_listen_opt *lopt = tp->listen_opt;
396 struct open_request *req, **prev;
398 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
399 (req = *prev) != NULL;
400 prev = &req->dl_next) {
401 if (req->rmt_port == rport &&
402 req->class->family == AF_INET6 &&
403 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
404 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
405 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
406 BUG_TRAP(req->sk == NULL);
415 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
416 struct in6_addr *saddr,
417 struct in6_addr *daddr,
420 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
423 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
425 if (skb->protocol == htons(ETH_P_IPV6)) {
426 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
427 skb->nh.ipv6h->saddr.s6_addr32,
431 return secure_tcp_sequence_number(skb->nh.iph->daddr,
438 static int tcp_v6_check_established(struct sock *sk)
440 struct inet_opt *inet = inet_sk(sk);
441 struct ipv6_pinfo *np = inet6_sk(sk);
442 struct in6_addr *daddr = &np->rcv_saddr;
443 struct in6_addr *saddr = &np->daddr;
444 int dif = sk->sk_bound_dev_if;
445 u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
446 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
447 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
449 struct hlist_node *node;
450 struct tcp_tw_bucket *tw;
452 write_lock_bh(&head->lock);
454 /* Check TIME-WAIT sockets first. */
455 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
456 tw = (struct tcp_tw_bucket*)sk2;
458 if(*((__u32 *)&(tw->tw_dport)) == ports &&
459 sk2->sk_family == PF_INET6 &&
460 !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
461 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
462 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
463 struct tcp_opt *tp = tcp_sk(sk);
465 if (tw->tw_ts_recent_stamp) {
466 /* See comment in tcp_ipv4.c */
467 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
470 tp->ts_recent = tw->tw_ts_recent;
471 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
480 /* And established part... */
481 sk_for_each(sk2, node, &head->chain) {
482 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
487 BUG_TRAP(sk_unhashed(sk));
488 __sk_add_node(sk, &head->chain);
489 sk->sk_hashent = hash;
490 sock_prot_inc_use(sk->sk_prot);
491 write_unlock_bh(&head->lock);
494 /* Silly. Should hash-dance instead... */
496 tcp_tw_deschedule(tw);
497 NET_INC_STATS_BH(TimeWaitRecycled);
505 write_unlock_bh(&head->lock);
506 return -EADDRNOTAVAIL;
509 static int tcp_v6_hash_connect(struct sock *sk)
511 struct tcp_bind_hashbucket *head;
512 struct tcp_bind_bucket *tb;
515 if (inet_sk(sk)->num == 0) {
516 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
519 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
522 head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
525 spin_lock_bh(&head->lock);
527 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
529 spin_unlock_bh(&head->lock);
532 spin_unlock_bh(&head->lock);
533 return tcp_v6_check_established(sk);
537 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
539 return IP6CB(skb)->iif;
542 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
545 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
546 struct inet_opt *inet = inet_sk(sk);
547 struct ipv6_pinfo *np = inet6_sk(sk);
548 struct tcp_opt *tp = tcp_sk(sk);
549 struct in6_addr *saddr = NULL;
551 struct dst_entry *dst;
555 if (addr_len < SIN6_LEN_RFC2133)
558 if (usin->sin6_family != AF_INET6)
559 return(-EAFNOSUPPORT);
561 memset(&fl, 0, sizeof(fl));
564 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
565 IP6_ECN_flow_init(fl.fl6_flowlabel);
566 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
567 struct ip6_flowlabel *flowlabel;
568 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
569 if (flowlabel == NULL)
571 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
572 fl6_sock_release(flowlabel);
577 * connect() to INADDR_ANY means loopback (BSD'ism).
580 if(ipv6_addr_any(&usin->sin6_addr))
581 usin->sin6_addr.s6_addr[15] = 0x1;
583 addr_type = ipv6_addr_type(&usin->sin6_addr);
585 if(addr_type & IPV6_ADDR_MULTICAST)
588 if (addr_type&IPV6_ADDR_LINKLOCAL) {
589 if (addr_len >= sizeof(struct sockaddr_in6) &&
590 usin->sin6_scope_id) {
591 /* If interface is set while binding, indices
594 if (sk->sk_bound_dev_if &&
595 sk->sk_bound_dev_if != usin->sin6_scope_id)
598 sk->sk_bound_dev_if = usin->sin6_scope_id;
601 /* Connect to link-local address requires an interface */
602 if (!sk->sk_bound_dev_if)
606 if (tp->ts_recent_stamp &&
607 ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
609 tp->ts_recent_stamp = 0;
613 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
614 np->flow_label = fl.fl6_flowlabel;
620 if (addr_type == IPV6_ADDR_MAPPED) {
621 u32 exthdrlen = tp->ext_header_len;
622 struct sockaddr_in sin;
624 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
626 if (__ipv6_only_sock(sk))
629 sin.sin_family = AF_INET;
630 sin.sin_port = usin->sin6_port;
631 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
633 tp->af_specific = &ipv6_mapped;
634 sk->sk_backlog_rcv = tcp_v4_do_rcv;
636 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
639 tp->ext_header_len = exthdrlen;
640 tp->af_specific = &ipv6_specific;
641 sk->sk_backlog_rcv = tcp_v6_do_rcv;
644 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
646 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
653 if (!ipv6_addr_any(&np->rcv_saddr))
654 saddr = &np->rcv_saddr;
656 fl.proto = IPPROTO_TCP;
657 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
658 ipv6_addr_copy(&fl.fl6_src,
659 (saddr ? saddr : &np->saddr));
660 fl.oif = sk->sk_bound_dev_if;
661 fl.fl_ip_dport = usin->sin6_port;
662 fl.fl_ip_sport = inet->sport;
664 if (np->opt && np->opt->srcrt) {
665 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
666 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
669 err = ip6_dst_lookup(sk, &dst, &fl);
676 ipv6_addr_copy(&np->rcv_saddr, saddr);
679 /* set the source address */
680 ipv6_addr_copy(&np->saddr, saddr);
681 inet->rcv_saddr = LOOPBACK4_IPV6;
683 ip6_dst_store(sk, dst, NULL);
684 sk->sk_route_caps = dst->dev->features &
685 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
687 tp->ext_header_len = 0;
689 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
690 tp->ext2_header_len = dst->header_len;
692 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
694 inet->dport = usin->sin6_port;
696 tcp_set_state(sk, TCP_SYN_SENT);
697 err = tcp_v6_hash_connect(sk);
702 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
707 err = tcp_connect(sk);
714 tcp_set_state(sk, TCP_CLOSE);
718 sk->sk_route_caps = 0;
722 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
723 int type, int code, int offset, __u32 info)
725 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
726 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
727 struct ipv6_pinfo *np;
733 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
736 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), Icmp6InErrors);
740 if (sk->sk_state == TCP_TIME_WAIT) {
741 tcp_tw_put((struct tcp_tw_bucket*)sk);
746 if (sock_owned_by_user(sk))
747 NET_INC_STATS_BH(LockDroppedIcmps);
749 if (sk->sk_state == TCP_CLOSE)
753 seq = ntohl(th->seq);
754 if (sk->sk_state != TCP_LISTEN &&
755 !between(seq, tp->snd_una, tp->snd_nxt)) {
756 NET_INC_STATS_BH(OutOfWindowIcmps);
762 if (type == ICMPV6_PKT_TOOBIG) {
763 struct dst_entry *dst = NULL;
765 if (sock_owned_by_user(sk))
767 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
770 /* icmp should have updated the destination cache entry */
771 dst = __sk_dst_check(sk, np->dst_cookie);
774 struct inet_opt *inet = inet_sk(sk);
777 /* BUGGG_FUTURE: Again, it is not clear how
778 to handle rthdr case. Ignore this complexity
781 memset(&fl, 0, sizeof(fl));
782 fl.proto = IPPROTO_TCP;
783 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
784 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
785 fl.oif = sk->sk_bound_dev_if;
786 fl.fl_ip_dport = inet->dport;
787 fl.fl_ip_sport = inet->sport;
789 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
790 sk->sk_err_soft = -err;
796 if (tp->pmtu_cookie > dst_pmtu(dst)) {
797 tcp_sync_mss(sk, dst_pmtu(dst));
798 tcp_simple_retransmit(sk);
799 } /* else let the usual retransmit timer handle it */
804 icmpv6_err_convert(type, code, &err);
806 /* Might be for an open_request */
807 switch (sk->sk_state) {
808 struct open_request *req, **prev;
810 if (sock_owned_by_user(sk))
813 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
814 &hdr->saddr, tcp_v6_iif(skb));
818 /* ICMPs are not backlogged, hence we cannot get
819 * an established socket here.
821 BUG_TRAP(req->sk == NULL);
823 if (seq != req->snt_isn) {
824 NET_INC_STATS_BH(OutOfWindowIcmps);
828 tcp_synq_drop(sk, req, prev);
832 case TCP_SYN_RECV: /* Cannot happen.
833 It can, it SYNs are crossed. --ANK */
834 if (!sock_owned_by_user(sk)) {
835 TCP_INC_STATS_BH(TcpAttemptFails);
837 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
841 sk->sk_err_soft = err;
845 if (!sock_owned_by_user(sk) && np->recverr) {
847 sk->sk_error_report(sk);
849 sk->sk_err_soft = err;
857 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
858 struct dst_entry *dst)
860 struct ipv6_pinfo *np = inet6_sk(sk);
861 struct sk_buff * skb;
862 struct ipv6_txoptions *opt = NULL;
866 memset(&fl, 0, sizeof(fl));
867 fl.proto = IPPROTO_TCP;
868 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
869 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
870 fl.fl6_flowlabel = 0;
871 fl.oif = req->af.v6_req.iif;
872 fl.fl_ip_dport = req->rmt_port;
873 fl.fl_ip_sport = inet_sk(sk)->sport;
878 np->rxopt.bits.srcrt == 2 &&
879 req->af.v6_req.pktopts) {
880 struct sk_buff *pktopts = req->af.v6_req.pktopts;
881 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
883 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
886 if (opt && opt->srcrt) {
887 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
888 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
891 err = ip6_dst_lookup(sk, &dst, &fl);
896 skb = tcp_make_synack(sk, dst, req);
898 struct tcphdr *th = skb->h.th;
900 th->check = tcp_v6_check(th, skb->len,
901 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
902 csum_partial((char *)th, skb->len, skb->csum));
904 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
905 err = ip6_xmit(sk, skb, &fl, opt, 0);
906 if (err == NET_XMIT_CN)
912 if (opt && opt != np->opt)
913 sock_kfree_s(sk, opt, opt->tot_len);
917 static void tcp_v6_or_free(struct open_request *req)
919 if (req->af.v6_req.pktopts)
920 kfree_skb(req->af.v6_req.pktopts);
923 static struct or_calltable or_ipv6 = {
925 .rtx_syn_ack = tcp_v6_send_synack,
926 .send_ack = tcp_v6_or_send_ack,
927 .destructor = tcp_v6_or_free,
928 .send_reset = tcp_v6_send_reset
931 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
933 struct ipv6_pinfo *np = inet6_sk(sk);
934 struct inet6_skb_parm *opt = IP6CB(skb);
937 if ((opt->hop && np->rxopt.bits.hopopts) ||
938 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
939 np->rxopt.bits.rxflow) ||
940 (opt->srcrt && np->rxopt.bits.srcrt) ||
941 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
948 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
951 struct ipv6_pinfo *np = inet6_sk(sk);
953 if (skb->ip_summed == CHECKSUM_HW) {
954 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
955 skb->csum = offsetof(struct tcphdr, check);
957 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
958 csum_partial((char *)th, th->doff<<2,
964 static void tcp_v6_send_reset(struct sk_buff *skb)
966 struct tcphdr *th = skb->h.th, *t1;
967 struct sk_buff *buff;
973 if (!ipv6_unicast_destination(skb))
977 * We need to grab some memory, and put together an RST,
978 * and then put it into the queue to be sent.
981 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
985 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
987 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
989 /* Swap the send and the receive. */
990 memset(t1, 0, sizeof(*t1));
991 t1->dest = th->source;
992 t1->source = th->dest;
993 t1->doff = sizeof(*t1)/4;
997 t1->seq = th->ack_seq;
1000 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1001 + skb->len - (th->doff<<2));
1004 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1006 memset(&fl, 0, sizeof(fl));
1007 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1008 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1010 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1011 sizeof(*t1), IPPROTO_TCP,
1014 fl.proto = IPPROTO_TCP;
1015 fl.oif = tcp_v6_iif(skb);
1016 fl.fl_ip_dport = t1->dest;
1017 fl.fl_ip_sport = t1->source;
1019 /* sk = NULL, but it is safe for now. RST socket required. */
1020 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1021 ip6_xmit(NULL, buff, &fl, NULL, 0);
1022 TCP_INC_STATS_BH(TcpOutSegs);
1023 TCP_INC_STATS_BH(TcpOutRsts);
1030 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1032 struct tcphdr *th = skb->h.th, *t1;
1033 struct sk_buff *buff;
1035 int tot_len = sizeof(struct tcphdr);
1037 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
1041 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
1046 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1048 /* Swap the send and the receive. */
1049 memset(t1, 0, sizeof(*t1));
1050 t1->dest = th->source;
1051 t1->source = th->dest;
1052 t1->doff = tot_len/4;
1053 t1->seq = htonl(seq);
1054 t1->ack_seq = htonl(ack);
1056 t1->window = htons(win);
1059 u32 *ptr = (u32*)(t1 + 1);
1060 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1061 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1062 *ptr++ = htonl(tcp_time_stamp);
1066 buff->csum = csum_partial((char *)t1, tot_len, 0);
1068 memset(&fl, 0, sizeof(fl));
1069 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1070 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1072 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1073 tot_len, IPPROTO_TCP,
1076 fl.proto = IPPROTO_TCP;
1077 fl.oif = tcp_v6_iif(skb);
1078 fl.fl_ip_dport = t1->dest;
1079 fl.fl_ip_sport = t1->source;
1081 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1082 ip6_xmit(NULL, buff, &fl, NULL, 0);
1083 TCP_INC_STATS_BH(TcpOutSegs);
1090 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1092 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1094 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1095 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1100 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1102 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1106 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1108 struct open_request *req, **prev;
1109 struct tcphdr *th = skb->h.th;
1110 struct tcp_opt *tp = tcp_sk(sk);
1113 /* Find possible connection requests. */
1114 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1115 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1117 return tcp_check_req(sk, skb, req, prev);
1119 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1121 &skb->nh.ipv6h->daddr,
1126 if (nsk->sk_state != TCP_TIME_WAIT) {
1130 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1134 #if 0 /*def CONFIG_SYN_COOKIES*/
1135 if (!th->rst && !th->syn && th->ack)
1136 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1141 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1143 struct tcp_opt *tp = tcp_sk(sk);
1144 struct tcp_listen_opt *lopt = tp->listen_opt;
1145 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1148 req->expires = jiffies + TCP_TIMEOUT_INIT;
1150 req->dl_next = lopt->syn_table[h];
1152 write_lock(&tp->syn_wait_lock);
1153 lopt->syn_table[h] = req;
1154 write_unlock(&tp->syn_wait_lock);
1156 #ifdef CONFIG_ACCEPT_QUEUES
1157 tcp_synq_added(sk, req);
1164 /* FIXME: this is substantially similar to the ipv4 code.
1165 * Can some kind of merge be done? -- erics
1167 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1169 struct ipv6_pinfo *np = inet6_sk(sk);
1170 struct tcp_opt tmptp, *tp = tcp_sk(sk);
1171 struct open_request *req = NULL;
1172 __u32 isn = TCP_SKB_CB(skb)->when;
1173 #ifdef CONFIG_ACCEPT_QUEUES
1177 if (skb->protocol == htons(ETH_P_IP))
1178 return tcp_v4_conn_request(sk, skb);
1180 if (!ipv6_unicast_destination(skb))
1185 * There are no SYN attacks on IPv6, yet...
1187 if (tcp_synq_is_full(sk) && !isn) {
1188 if (net_ratelimit())
1189 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1193 #ifdef CONFIG_ACCEPT_QUEUES
1194 class = (skb->nfmark <= 0) ? 0 :
1195 ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
1197 * Accept only if the class has shares set or if the default class
1198 * i.e. class 0 has shares
1200 if (!(tcp_sk(sk)->acceptq[class].aq_valid)) {
1201 if (tcp_sk(sk)->acceptq[0].aq_valid)
1208 /* Accept backlog is full. If we have already queued enough
1209 * of warm entries in syn queue, drop request. It is better than
1210 * clogging syn queue with openreqs with exponentially increasing
1213 #ifdef CONFIG_ACCEPT_QUEUES
1214 if (tcp_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
1216 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1221 req = tcp_openreq_alloc();
1225 tcp_clear_options(&tmptp);
1226 tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1227 tmptp.user_mss = tp->user_mss;
1229 tcp_parse_options(skb, &tmptp, 0);
1231 tmptp.tstamp_ok = tmptp.saw_tstamp;
1232 tcp_openreq_init(req, &tmptp, skb);
1233 #ifdef CONFIG_ACCEPT_QUEUES
1234 req->acceptq_class = class;
1235 req->acceptq_time_stamp = jiffies;
1237 req->class = &or_ipv6;
1238 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1239 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1240 TCP_ECN_create_request(req, skb->h.th);
1241 req->af.v6_req.pktopts = NULL;
1242 if (ipv6_opt_accepted(sk, skb) ||
1243 np->rxopt.bits.rxinfo ||
1244 np->rxopt.bits.rxhlim) {
1245 atomic_inc(&skb->users);
1246 req->af.v6_req.pktopts = skb;
1248 req->af.v6_req.iif = sk->sk_bound_dev_if;
1250 /* So that link locals have meaning */
1251 if (!sk->sk_bound_dev_if &&
1252 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1253 req->af.v6_req.iif = tcp_v6_iif(skb);
1256 isn = tcp_v6_init_sequence(sk,skb);
1260 if (tcp_v6_send_synack(sk, req, NULL))
1263 tcp_v6_synq_add(sk, req);
1269 tcp_openreq_free(req);
1271 TCP_INC_STATS_BH(TcpAttemptFails);
1272 return 0; /* don't send reset */
1275 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1276 struct open_request *req,
1277 struct dst_entry *dst)
1279 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1280 struct tcp6_sock *newtcp6sk;
1281 struct inet_opt *newinet;
1282 struct tcp_opt *newtp;
1284 struct ipv6_txoptions *opt;
1286 if (skb->protocol == htons(ETH_P_IP)) {
1291 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1296 newtcp6sk = (struct tcp6_sock *)newsk;
1297 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1299 newinet = inet_sk(newsk);
1300 newnp = inet6_sk(newsk);
1301 newtp = tcp_sk(newsk);
1303 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1305 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1308 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1311 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1313 newtp->af_specific = &ipv6_mapped;
1314 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1315 newnp->pktoptions = NULL;
1317 newnp->mcast_oif = tcp_v6_iif(skb);
1318 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1320 /* Charge newly allocated IPv6 socket. Though it is mapped,
1323 #ifdef INET_REFCNT_DEBUG
1324 atomic_inc(&inet6_sock_nr);
1327 /* It is tricky place. Until this moment IPv4 tcp
1328 worked with IPv6 af_tcp.af_specific.
1331 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1338 #ifdef CONFIG_ACCEPT_QUEUES
1339 if (tcp_acceptq_is_full(sk, req->acceptq_class))
1341 if (sk_acceptq_is_full(sk))
1345 if (np->rxopt.bits.srcrt == 2 &&
1346 opt == NULL && req->af.v6_req.pktopts) {
1347 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1349 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1355 memset(&fl, 0, sizeof(fl));
1356 fl.proto = IPPROTO_TCP;
1357 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1358 if (opt && opt->srcrt) {
1359 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1360 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1362 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1363 fl.oif = sk->sk_bound_dev_if;
1364 fl.fl_ip_dport = req->rmt_port;
1365 fl.fl_ip_sport = inet_sk(sk)->sport;
1367 if (ip6_dst_lookup(sk, &dst, &fl))
1371 newsk = tcp_create_openreq_child(sk, req, skb);
1375 /* Charge newly allocated IPv6 socket */
1376 #ifdef INET_REFCNT_DEBUG
1377 atomic_inc(&inet6_sock_nr);
1380 ip6_dst_store(newsk, dst, NULL);
1381 newsk->sk_route_caps = dst->dev->features &
1382 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1384 newtcp6sk = (struct tcp6_sock *)newsk;
1385 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1387 newtp = tcp_sk(newsk);
1388 newinet = inet_sk(newsk);
1389 newnp = inet6_sk(newsk);
1391 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1393 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1394 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1395 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1396 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1398 /* Now IPv6 options...
1400 First: no IPv4 options.
1402 newinet->opt = NULL;
1405 newnp->rxopt.all = np->rxopt.all;
1407 /* Clone pktoptions received with SYN */
1408 newnp->pktoptions = NULL;
1409 if (req->af.v6_req.pktopts) {
1410 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1412 kfree_skb(req->af.v6_req.pktopts);
1413 req->af.v6_req.pktopts = NULL;
1414 if (newnp->pktoptions)
1415 skb_set_owner_r(newnp->pktoptions, newsk);
1418 newnp->mcast_oif = tcp_v6_iif(skb);
1419 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1421 /* Clone native IPv6 options from listening socket (if any)
1423 Yes, keeping reference count would be much more clever,
1424 but we make one more one thing there: reattach optmem
1428 newnp->opt = ipv6_dup_options(newsk, opt);
1430 sock_kfree_s(sk, opt, opt->tot_len);
1433 newtp->ext_header_len = 0;
1435 newtp->ext_header_len = newnp->opt->opt_nflen +
1436 newnp->opt->opt_flen;
1437 newtp->ext2_header_len = dst->header_len;
1439 tcp_sync_mss(newsk, dst_pmtu(dst));
1440 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1441 tcp_initialize_rcv_mss(newsk);
1443 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1445 __tcp_v6_hash(newsk);
1446 tcp_inherit_port(sk, newsk);
1451 NET_INC_STATS_BH(ListenOverflows);
1453 NET_INC_STATS_BH(ListenDrops);
1454 if (opt && opt != np->opt)
1455 sock_kfree_s(sk, opt, opt->tot_len);
1460 static int tcp_v6_checksum_init(struct sk_buff *skb)
1462 if (skb->ip_summed == CHECKSUM_HW) {
1463 skb->ip_summed = CHECKSUM_UNNECESSARY;
1464 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1465 &skb->nh.ipv6h->daddr,skb->csum))
1467 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1469 if (skb->len <= 76) {
1470 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1471 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1473 skb->ip_summed = CHECKSUM_UNNECESSARY;
1475 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1476 &skb->nh.ipv6h->daddr,0);
1481 /* The socket must have it's spinlock held when we get
1484 * We have a potential double-lock case here, so even when
1485 * doing backlog processing we use the BH locking scheme.
1486 * This is because we cannot sleep with the original spinlock
1489 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1491 struct ipv6_pinfo *np = inet6_sk(sk);
1493 struct sk_buff *opt_skb = NULL;
1495 /* Imagine: socket is IPv6. IPv4 packet arrives,
1496 goes to IPv4 receive handler and backlogged.
1497 From backlog it always goes here. Kerboom...
1498 Fortunately, tcp_rcv_established and rcv_established
1499 handle them correctly, but it is not case with
1500 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1503 if (skb->protocol == htons(ETH_P_IP))
1504 return tcp_v4_do_rcv(sk, skb);
1506 if (sk_filter(sk, skb, 0))
1510 * socket locking is here for SMP purposes as backlog rcv
1511 * is currently called with bh processing disabled.
1514 /* Do Stevens' IPV6_PKTOPTIONS.
1516 Yes, guys, it is the only place in our code, where we
1517 may make it not affecting IPv4.
1518 The rest of code is protocol independent,
1519 and I do not like idea to uglify IPv4.
1521 Actually, all the idea behind IPV6_PKTOPTIONS
1522 looks not very well thought. For now we latch
1523 options, received in the last packet, enqueued
1524 by tcp. Feel free to propose better solution.
1528 opt_skb = skb_clone(skb, GFP_ATOMIC);
1530 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1531 TCP_CHECK_TIMER(sk);
1532 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1534 TCP_CHECK_TIMER(sk);
1536 goto ipv6_pktoptions;
1540 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1543 if (sk->sk_state == TCP_LISTEN) {
1544 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1549 * Queue it on the new socket if the new socket is active,
1550 * otherwise we just shortcircuit this and continue with
1554 if (tcp_child_process(sk, nsk, skb))
1557 __kfree_skb(opt_skb);
1562 TCP_CHECK_TIMER(sk);
1563 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1565 TCP_CHECK_TIMER(sk);
1567 goto ipv6_pktoptions;
1571 tcp_v6_send_reset(skb);
1574 __kfree_skb(opt_skb);
1578 TCP_INC_STATS_BH(TcpInErrs);
1583 /* Do you ask, what is it?
1585 1. skb was enqueued by tcp.
1586 2. skb is added to tail of read queue, rather than out of order.
1587 3. socket is not in passive state.
1588 4. Finally, it really contains options, which user wants to receive.
1591 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1592 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1593 if (np->rxopt.bits.rxinfo)
1594 np->mcast_oif = tcp_v6_iif(opt_skb);
1595 if (np->rxopt.bits.rxhlim)
1596 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1597 if (ipv6_opt_accepted(sk, opt_skb)) {
1598 skb_set_owner_r(opt_skb, sk);
1599 opt_skb = xchg(&np->pktoptions, opt_skb);
1601 __kfree_skb(opt_skb);
1602 opt_skb = xchg(&np->pktoptions, NULL);
1611 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1613 struct sk_buff *skb = *pskb;
1618 if (skb->pkt_type != PACKET_HOST)
1622 * Count it even if it's bad.
1624 TCP_INC_STATS_BH(TcpInSegs);
1626 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1631 if (th->doff < sizeof(struct tcphdr)/4)
1633 if (!pskb_may_pull(skb, th->doff*4))
1636 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1637 tcp_v6_checksum_init(skb) < 0))
1641 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1642 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1643 skb->len - th->doff*4);
1644 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1645 TCP_SKB_CB(skb)->when = 0;
1646 TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
1647 TCP_SKB_CB(skb)->sacked = 0;
1649 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1650 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1656 if (sk->sk_state == TCP_TIME_WAIT)
1659 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1660 goto discard_and_relse;
1662 if (sk_filter(sk, skb, 0))
1663 goto discard_and_relse;
1669 if (!sock_owned_by_user(sk)) {
1670 if (!tcp_prequeue(sk, skb))
1671 ret = tcp_v6_do_rcv(sk, skb);
1673 sk_add_backlog(sk, skb);
1677 return ret ? -1 : 0;
1680 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1683 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1685 TCP_INC_STATS_BH(TcpInErrs);
1687 tcp_v6_send_reset(skb);
1704 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1705 tcp_tw_put((struct tcp_tw_bucket *) sk);
1709 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1710 TCP_INC_STATS_BH(TcpInErrs);
1711 tcp_tw_put((struct tcp_tw_bucket *) sk);
1715 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1716 skb, th, skb->len)) {
1721 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1723 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1724 tcp_tw_put((struct tcp_tw_bucket *)sk);
1728 /* Fall through to ACK */
1731 tcp_v6_timewait_ack(sk, skb);
1735 case TCP_TW_SUCCESS:;
1740 static int tcp_v6_rebuild_header(struct sock *sk)
1743 struct dst_entry *dst;
1744 struct ipv6_pinfo *np = inet6_sk(sk);
1746 dst = __sk_dst_check(sk, np->dst_cookie);
1749 struct inet_opt *inet = inet_sk(sk);
1752 memset(&fl, 0, sizeof(fl));
1753 fl.proto = IPPROTO_TCP;
1754 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1755 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1756 fl.fl6_flowlabel = np->flow_label;
1757 fl.oif = sk->sk_bound_dev_if;
1758 fl.fl_ip_dport = inet->dport;
1759 fl.fl_ip_sport = inet->sport;
1761 if (np->opt && np->opt->srcrt) {
1762 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1763 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1766 err = ip6_dst_lookup(sk, &dst, &fl);
1769 sk->sk_route_caps = 0;
1773 ip6_dst_store(sk, dst, NULL);
1774 sk->sk_route_caps = dst->dev->features &
1775 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1776 tcp_sk(sk)->ext2_header_len = dst->header_len;
1782 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1784 struct sock *sk = skb->sk;
1785 struct inet_opt *inet = inet_sk(sk);
1786 struct ipv6_pinfo *np = inet6_sk(sk);
1788 struct dst_entry *dst;
1790 memset(&fl, 0, sizeof(fl));
1791 fl.proto = IPPROTO_TCP;
1792 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1793 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1794 fl.fl6_flowlabel = np->flow_label;
1795 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1796 fl.oif = sk->sk_bound_dev_if;
1797 fl.fl_ip_sport = inet->sport;
1798 fl.fl_ip_dport = inet->dport;
1800 if (np->opt && np->opt->srcrt) {
1801 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1802 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1805 dst = __sk_dst_check(sk, np->dst_cookie);
1808 int err = ip6_dst_lookup(sk, &dst, &fl);
1811 sk->sk_err_soft = -err;
1815 ip6_dst_store(sk, dst, NULL);
1816 sk->sk_route_caps = dst->dev->features &
1817 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1818 tcp_sk(sk)->ext2_header_len = dst->header_len;
1821 skb->dst = dst_clone(dst);
1823 /* Restore final destination back after routing done */
1824 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1826 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1829 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1831 struct ipv6_pinfo *np = inet6_sk(sk);
1832 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1834 sin6->sin6_family = AF_INET6;
1835 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1836 sin6->sin6_port = inet_sk(sk)->dport;
1837 /* We do not store received flowlabel for TCP */
1838 sin6->sin6_flowinfo = 0;
1839 sin6->sin6_scope_id = 0;
1840 if (sk->sk_bound_dev_if &&
1841 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1842 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1845 static int tcp_v6_remember_stamp(struct sock *sk)
1847 /* Alas, not yet... */
1851 static struct tcp_func ipv6_specific = {
1852 .queue_xmit = tcp_v6_xmit,
1853 .send_check = tcp_v6_send_check,
1854 .rebuild_header = tcp_v6_rebuild_header,
1855 .conn_request = tcp_v6_conn_request,
1856 .syn_recv_sock = tcp_v6_syn_recv_sock,
1857 .remember_stamp = tcp_v6_remember_stamp,
1858 .net_header_len = sizeof(struct ipv6hdr),
1860 .setsockopt = ipv6_setsockopt,
1861 .getsockopt = ipv6_getsockopt,
1862 .addr2sockaddr = v6_addr2sockaddr,
1863 .sockaddr_len = sizeof(struct sockaddr_in6)
1867 * TCP over IPv4 via INET6 API
1870 static struct tcp_func ipv6_mapped = {
1871 .queue_xmit = ip_queue_xmit,
1872 .send_check = tcp_v4_send_check,
1873 .rebuild_header = tcp_v4_rebuild_header,
1874 .conn_request = tcp_v6_conn_request,
1875 .syn_recv_sock = tcp_v6_syn_recv_sock,
1876 .remember_stamp = tcp_v4_remember_stamp,
1877 .net_header_len = sizeof(struct iphdr),
1879 .setsockopt = ipv6_setsockopt,
1880 .getsockopt = ipv6_getsockopt,
1881 .addr2sockaddr = v6_addr2sockaddr,
1882 .sockaddr_len = sizeof(struct sockaddr_in6)
1887 /* NOTE: A lot of things set to zero explicitly by call to
1888 * sk_alloc() so need not be done here.
1890 static int tcp_v6_init_sock(struct sock *sk)
1892 struct tcp_opt *tp = tcp_sk(sk);
1894 skb_queue_head_init(&tp->out_of_order_queue);
1895 tcp_init_xmit_timers(sk);
1896 tcp_prequeue_init(tp);
1898 tp->rto = TCP_TIMEOUT_INIT;
1899 tp->mdev = TCP_TIMEOUT_INIT;
1901 /* So many TCP implementations out there (incorrectly) count the
1902 * initial SYN frame in their delayed-ACK and congestion control
1903 * algorithms that we must have the following bandaid to talk
1904 * efficiently to them. -DaveM
1908 /* See draft-stevens-tcpca-spec-01 for discussion of the
1909 * initialization of these values.
1911 tp->snd_ssthresh = 0x7fffffff;
1912 tp->snd_cwnd_clamp = ~0;
1913 tp->mss_cache = 536;
1915 tp->reordering = sysctl_tcp_reordering;
1917 sk->sk_state = TCP_CLOSE;
1919 tp->af_specific = &ipv6_specific;
1921 sk->sk_write_space = sk_stream_write_space;
1922 sk->sk_use_write_queue = 1;
1924 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1925 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1927 atomic_inc(&tcp_sockets_allocated);
1932 static int tcp_v6_destroy_sock(struct sock *sk)
1934 struct tcp_opt *tp = tcp_sk(sk);
1935 struct inet_opt *inet = inet_sk(sk);
1937 tcp_clear_xmit_timers(sk);
1939 /* Cleanup up the write buffer. */
1940 tcp_writequeue_purge(sk);
1942 /* Cleans up our, hopefully empty, out_of_order_queue. */
1943 __skb_queue_purge(&tp->out_of_order_queue);
1945 /* Clean prequeue, it must be empty really */
1946 __skb_queue_purge(&tp->ucopy.prequeue);
1948 /* Clean up a referenced TCP bind bucket. */
1949 if (tcp_sk(sk)->bind_hash)
1952 /* If sendmsg cached page exists, toss it. */
1953 if (inet->sndmsg_page != NULL)
1954 __free_page(inet->sndmsg_page);
1956 atomic_dec(&tcp_sockets_allocated);
1958 return inet6_destroy_sock(sk);
1961 /* Proc filesystem TCPv6 sock list dumping. */
1962 static void get_openreq6(struct seq_file *seq,
1963 struct sock *sk, struct open_request *req, int i, int uid)
1965 struct in6_addr *dest, *src;
1966 int ttd = req->expires - jiffies;
1971 src = &req->af.v6_req.loc_addr;
1972 dest = &req->af.v6_req.rmt_addr;
1974 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1975 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1977 src->s6_addr32[0], src->s6_addr32[1],
1978 src->s6_addr32[2], src->s6_addr32[3],
1979 ntohs(inet_sk(sk)->sport),
1980 dest->s6_addr32[0], dest->s6_addr32[1],
1981 dest->s6_addr32[2], dest->s6_addr32[3],
1982 ntohs(req->rmt_port),
1984 0,0, /* could print option size, but that is af dependent. */
1985 1, /* timers active (only the expire timer) */
1986 jiffies_to_clock_t(ttd),
1989 0, /* non standard timer */
1990 0, /* open_requests have no inode */
1994 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1996 struct in6_addr *dest, *src;
1999 unsigned long timer_expires;
2000 struct inet_opt *inet = inet_sk(sp);
2001 struct tcp_opt *tp = tcp_sk(sp);
2002 struct ipv6_pinfo *np = inet6_sk(sp);
2005 src = &np->rcv_saddr;
2006 destp = ntohs(inet->dport);
2007 srcp = ntohs(inet->sport);
2008 if (tp->pending == TCP_TIME_RETRANS) {
2010 timer_expires = tp->timeout;
2011 } else if (tp->pending == TCP_TIME_PROBE0) {
2013 timer_expires = tp->timeout;
2014 } else if (timer_pending(&sp->sk_timer)) {
2016 timer_expires = sp->sk_timer.expires;
2019 timer_expires = jiffies;
2023 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2024 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2026 src->s6_addr32[0], src->s6_addr32[1],
2027 src->s6_addr32[2], src->s6_addr32[3], srcp,
2028 dest->s6_addr32[0], dest->s6_addr32[1],
2029 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2031 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2033 jiffies_to_clock_t(timer_expires - jiffies),
2038 atomic_read(&sp->sk_refcnt), sp,
2039 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2040 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2044 static void get_timewait6_sock(struct seq_file *seq,
2045 struct tcp_tw_bucket *tw, int i)
2047 struct in6_addr *dest, *src;
2049 int ttd = tw->tw_ttd - jiffies;
2054 dest = &tw->tw_v6_daddr;
2055 src = &tw->tw_v6_rcv_saddr;
2056 destp = ntohs(tw->tw_dport);
2057 srcp = ntohs(tw->tw_sport);
2060 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2061 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2063 src->s6_addr32[0], src->s6_addr32[1],
2064 src->s6_addr32[2], src->s6_addr32[3], srcp,
2065 dest->s6_addr32[0], dest->s6_addr32[1],
2066 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2067 tw->tw_substate, 0, 0,
2068 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2069 atomic_read(&tw->tw_refcnt), tw);
2072 #ifdef CONFIG_PROC_FS
2073 static int tcp6_seq_show(struct seq_file *seq, void *v)
2075 struct tcp_iter_state *st;
2077 if (v == SEQ_START_TOKEN) {
2082 "st tx_queue rx_queue tr tm->when retrnsmt"
2083 " uid timeout inode\n");
2088 switch (st->state) {
2089 case TCP_SEQ_STATE_LISTENING:
2090 case TCP_SEQ_STATE_ESTABLISHED:
2091 get_tcp6_sock(seq, v, st->num);
2093 case TCP_SEQ_STATE_OPENREQ:
2094 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2096 case TCP_SEQ_STATE_TIME_WAIT:
2097 get_timewait6_sock(seq, v, st->num);
2104 static struct file_operations tcp6_seq_fops;
2105 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2106 .owner = THIS_MODULE,
2109 .seq_show = tcp6_seq_show,
2110 .seq_fops = &tcp6_seq_fops,
2113 int __init tcp6_proc_init(void)
2115 return tcp_proc_register(&tcp6_seq_afinfo);
2118 void tcp6_proc_exit(void)
2120 tcp_proc_unregister(&tcp6_seq_afinfo);
2124 struct proto tcpv6_prot = {
2127 .connect = tcp_v6_connect,
2128 .disconnect = tcp_disconnect,
2129 .accept = tcp_accept,
2131 .init = tcp_v6_init_sock,
2132 .destroy = tcp_v6_destroy_sock,
2133 .shutdown = tcp_shutdown,
2134 .setsockopt = tcp_setsockopt,
2135 .getsockopt = tcp_getsockopt,
2136 .sendmsg = tcp_sendmsg,
2137 .recvmsg = tcp_recvmsg,
2138 .backlog_rcv = tcp_v6_do_rcv,
2139 .hash = tcp_v6_hash,
2140 .unhash = tcp_unhash,
2141 .get_port = tcp_v6_get_port,
2144 static struct inet6_protocol tcpv6_protocol = {
2145 .handler = tcp_v6_rcv,
2146 .err_handler = tcp_v6_err,
2147 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2150 extern struct proto_ops inet6_stream_ops;
2152 static struct inet_protosw tcpv6_protosw = {
2153 .type = SOCK_STREAM,
2154 .protocol = IPPROTO_TCP,
2155 .prot = &tcpv6_prot,
2156 .ops = &inet6_stream_ops,
2159 .flags = INET_PROTOSW_PERMANENT,
2162 void __init tcpv6_init(void)
2164 /* register inet6 protocol */
2165 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2166 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2167 inet6_register_protosw(&tcpv6_protosw);