3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
82 int hashent = (lport ^ fport);
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
92 struct inet_opt *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
101 static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
105 struct hlist_node *node;
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
141 spin_lock(&tcp_portalloc_lock);
142 rover = tcp_port_rover;
144 if ((rover < low) || (rover > high))
146 head = &tcp_bhash[tcp_bhashfn(rover)];
147 spin_lock(&head->lock);
148 tb_for_each(tb, node, &head->chain)
149 if (tb->port == rover)
153 spin_unlock(&head->lock);
154 } while (--remaining > 0);
155 tcp_port_rover = rover;
156 spin_unlock(&tcp_portalloc_lock);
158 /* Exhausted local port range during search? */
163 /* OK, here is the one we will use. */
166 head = &tcp_bhash[tcp_bhashfn(snum)];
167 spin_lock(&head->lock);
168 tb_for_each(tb, node, &head->chain)
169 if (tb->port == snum)
175 if (tb && !hlist_empty(&tb->owners)) {
176 if (tb->fastreuse > 0 && sk->sk_reuse &&
177 sk->sk_state != TCP_LISTEN) {
181 if (tcp_v6_bind_conflict(sk, tb))
187 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
189 if (hlist_empty(&tb->owners)) {
190 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
194 } else if (tb->fastreuse &&
195 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
199 if (!tcp_sk(sk)->bind_hash)
200 tcp_bind_hash(sk, tb, snum);
201 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
205 spin_unlock(&head->lock);
211 static __inline__ void __tcp_v6_hash(struct sock *sk)
213 struct hlist_head *list;
216 BUG_TRAP(sk_unhashed(sk));
218 if (sk->sk_state == TCP_LISTEN) {
219 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
220 lock = &tcp_lhash_lock;
223 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
224 list = &tcp_ehash[sk->sk_hashent].chain;
225 lock = &tcp_ehash[sk->sk_hashent].lock;
229 __sk_add_node(sk, list);
230 sock_prot_inc_use(sk->sk_prot);
235 static void tcp_v6_hash(struct sock *sk)
237 if (sk->sk_state != TCP_CLOSE) {
238 struct tcp_opt *tp = tcp_sk(sk);
240 if (tp->af_specific == &ipv6_mapped) {
250 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
253 struct hlist_node *node;
254 struct sock *result = NULL;
258 read_lock(&tcp_lhash_lock);
259 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
260 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
261 struct ipv6_pinfo *np = inet6_sk(sk);
264 if (!ipv6_addr_any(&np->rcv_saddr)) {
265 if (ipv6_addr_cmp(&np->rcv_saddr, daddr))
269 if (sk->sk_bound_dev_if) {
270 if (sk->sk_bound_dev_if != dif)
278 if (score > hiscore) {
286 read_unlock(&tcp_lhash_lock);
290 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
291 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
293 * The sockhash lock must be held as a reader here.
296 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
297 struct in6_addr *daddr, u16 hnum,
300 struct tcp_ehash_bucket *head;
302 struct hlist_node *node;
303 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
306 /* Optimize here for direct hit, only listening connections can
307 * have wildcards anyways.
309 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
310 head = &tcp_ehash[hash];
311 read_lock(&head->lock);
312 sk_for_each(sk, node, &head->chain) {
313 /* For IPV6 do the cheaper port and family tests first. */
314 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
315 goto hit; /* You sunk my battleship! */
317 /* Must check for a TIME_WAIT'er before going to listener hash. */
318 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
319 /* FIXME: acme: check this... */
320 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
322 if(*((__u32 *)&(tw->tw_dport)) == ports &&
323 sk->sk_family == PF_INET6) {
324 if(!ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
325 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
326 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
330 read_unlock(&head->lock);
335 read_unlock(&head->lock);
340 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
341 struct in6_addr *daddr, u16 hnum,
346 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
351 return tcp_v6_lookup_listener(daddr, hnum, dif);
354 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
355 struct in6_addr *daddr, u16 dport,
361 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
369 * Open request hash tables.
372 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
376 a = raddr->s6_addr32[0];
377 b = raddr->s6_addr32[1];
378 c = raddr->s6_addr32[2];
380 a += JHASH_GOLDEN_RATIO;
381 b += JHASH_GOLDEN_RATIO;
383 __jhash_mix(a, b, c);
385 a += raddr->s6_addr32[3];
387 __jhash_mix(a, b, c);
389 return c & (TCP_SYNQ_HSIZE - 1);
392 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
393 struct open_request ***prevp,
395 struct in6_addr *raddr,
396 struct in6_addr *laddr,
399 struct tcp_listen_opt *lopt = tp->listen_opt;
400 struct open_request *req, **prev;
402 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
403 (req = *prev) != NULL;
404 prev = &req->dl_next) {
405 if (req->rmt_port == rport &&
406 req->class->family == AF_INET6 &&
407 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
408 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
409 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
410 BUG_TRAP(req->sk == NULL);
419 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
420 struct in6_addr *saddr,
421 struct in6_addr *daddr,
424 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
427 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
429 if (skb->protocol == htons(ETH_P_IPV6)) {
430 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
431 skb->nh.ipv6h->saddr.s6_addr32,
435 return secure_tcp_sequence_number(skb->nh.iph->daddr,
442 static int tcp_v6_check_established(struct sock *sk)
444 struct inet_opt *inet = inet_sk(sk);
445 struct ipv6_pinfo *np = inet6_sk(sk);
446 struct in6_addr *daddr = &np->rcv_saddr;
447 struct in6_addr *saddr = &np->daddr;
448 int dif = sk->sk_bound_dev_if;
449 u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num);
450 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
451 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
453 struct hlist_node *node;
454 struct tcp_tw_bucket *tw;
456 write_lock_bh(&head->lock);
458 /* Check TIME-WAIT sockets first. */
459 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
460 tw = (struct tcp_tw_bucket*)sk2;
462 if(*((__u32 *)&(tw->tw_dport)) == ports &&
463 sk2->sk_family == PF_INET6 &&
464 !ipv6_addr_cmp(&tw->tw_v6_daddr, saddr) &&
465 !ipv6_addr_cmp(&tw->tw_v6_rcv_saddr, daddr) &&
466 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
467 struct tcp_opt *tp = tcp_sk(sk);
469 if (tw->tw_ts_recent_stamp) {
470 /* See comment in tcp_ipv4.c */
471 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
474 tp->ts_recent = tw->tw_ts_recent;
475 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
484 /* And established part... */
485 sk_for_each(sk2, node, &head->chain) {
486 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
491 BUG_TRAP(sk_unhashed(sk));
492 __sk_add_node(sk, &head->chain);
493 sk->sk_hashent = hash;
494 sock_prot_inc_use(sk->sk_prot);
495 write_unlock_bh(&head->lock);
498 /* Silly. Should hash-dance instead... */
500 tcp_tw_deschedule(tw);
501 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
509 write_unlock_bh(&head->lock);
510 return -EADDRNOTAVAIL;
513 static int tcp_v6_hash_connect(struct sock *sk)
515 struct tcp_bind_hashbucket *head;
516 struct tcp_bind_bucket *tb;
519 if (inet_sk(sk)->num == 0) {
520 int err = tcp_v6_get_port(sk, inet_sk(sk)->num);
523 inet_sk(sk)->sport = htons(inet_sk(sk)->num);
526 head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
529 spin_lock_bh(&head->lock);
531 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
533 spin_unlock_bh(&head->lock);
536 spin_unlock_bh(&head->lock);
537 return tcp_v6_check_established(sk);
541 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
543 return IP6CB(skb)->iif;
546 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
549 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
550 struct inet_opt *inet = inet_sk(sk);
551 struct ipv6_pinfo *np = inet6_sk(sk);
552 struct tcp_opt *tp = tcp_sk(sk);
553 struct in6_addr *saddr = NULL, *final_p = NULL, final;
555 struct dst_entry *dst;
559 if (addr_len < SIN6_LEN_RFC2133)
562 if (usin->sin6_family != AF_INET6)
563 return(-EAFNOSUPPORT);
565 memset(&fl, 0, sizeof(fl));
568 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
569 IP6_ECN_flow_init(fl.fl6_flowlabel);
570 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
571 struct ip6_flowlabel *flowlabel;
572 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
573 if (flowlabel == NULL)
575 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
576 fl6_sock_release(flowlabel);
581 * connect() to INADDR_ANY means loopback (BSD'ism).
584 if(ipv6_addr_any(&usin->sin6_addr))
585 usin->sin6_addr.s6_addr[15] = 0x1;
587 addr_type = ipv6_addr_type(&usin->sin6_addr);
589 if(addr_type & IPV6_ADDR_MULTICAST)
592 if (addr_type&IPV6_ADDR_LINKLOCAL) {
593 if (addr_len >= sizeof(struct sockaddr_in6) &&
594 usin->sin6_scope_id) {
595 /* If interface is set while binding, indices
598 if (sk->sk_bound_dev_if &&
599 sk->sk_bound_dev_if != usin->sin6_scope_id)
602 sk->sk_bound_dev_if = usin->sin6_scope_id;
605 /* Connect to link-local address requires an interface */
606 if (!sk->sk_bound_dev_if)
610 if (tp->ts_recent_stamp &&
611 ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
613 tp->ts_recent_stamp = 0;
617 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
618 np->flow_label = fl.fl6_flowlabel;
624 if (addr_type == IPV6_ADDR_MAPPED) {
625 u32 exthdrlen = tp->ext_header_len;
626 struct sockaddr_in sin;
628 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
630 if (__ipv6_only_sock(sk))
633 sin.sin_family = AF_INET;
634 sin.sin_port = usin->sin6_port;
635 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
637 tp->af_specific = &ipv6_mapped;
638 sk->sk_backlog_rcv = tcp_v4_do_rcv;
640 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
643 tp->ext_header_len = exthdrlen;
644 tp->af_specific = &ipv6_specific;
645 sk->sk_backlog_rcv = tcp_v6_do_rcv;
648 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
650 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
657 if (!ipv6_addr_any(&np->rcv_saddr))
658 saddr = &np->rcv_saddr;
660 fl.proto = IPPROTO_TCP;
661 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
662 ipv6_addr_copy(&fl.fl6_src,
663 (saddr ? saddr : &np->saddr));
664 fl.oif = sk->sk_bound_dev_if;
665 fl.fl_ip_dport = usin->sin6_port;
666 fl.fl_ip_sport = inet->sport;
668 if (np->opt && np->opt->srcrt) {
669 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
670 ipv6_addr_copy(&final, &fl.fl6_dst);
671 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
675 err = ip6_dst_lookup(sk, &dst, &fl);
679 ipv6_addr_copy(&fl.fl6_dst, final_p);
681 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
688 ipv6_addr_copy(&np->rcv_saddr, saddr);
691 /* set the source address */
692 ipv6_addr_copy(&np->saddr, saddr);
693 inet->rcv_saddr = LOOPBACK4_IPV6;
695 ip6_dst_store(sk, dst, NULL);
696 sk->sk_route_caps = dst->dev->features &
697 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
699 tp->ext_header_len = 0;
701 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
702 tp->ext2_header_len = dst->header_len;
704 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
706 inet->dport = usin->sin6_port;
708 tcp_set_state(sk, TCP_SYN_SENT);
709 err = tcp_v6_hash_connect(sk);
714 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
719 err = tcp_connect(sk);
726 tcp_set_state(sk, TCP_CLOSE);
730 sk->sk_route_caps = 0;
734 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
735 int type, int code, int offset, __u32 info)
737 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
738 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
739 struct ipv6_pinfo *np;
745 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
748 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
752 if (sk->sk_state == TCP_TIME_WAIT) {
753 tcp_tw_put((struct tcp_tw_bucket*)sk);
758 if (sock_owned_by_user(sk))
759 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
761 if (sk->sk_state == TCP_CLOSE)
765 seq = ntohl(th->seq);
766 if (sk->sk_state != TCP_LISTEN &&
767 !between(seq, tp->snd_una, tp->snd_nxt)) {
768 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
774 if (type == ICMPV6_PKT_TOOBIG) {
775 struct dst_entry *dst = NULL;
777 if (sock_owned_by_user(sk))
779 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
782 /* icmp should have updated the destination cache entry */
783 dst = __sk_dst_check(sk, np->dst_cookie);
786 struct inet_opt *inet = inet_sk(sk);
789 /* BUGGG_FUTURE: Again, it is not clear how
790 to handle rthdr case. Ignore this complexity
793 memset(&fl, 0, sizeof(fl));
794 fl.proto = IPPROTO_TCP;
795 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
796 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
797 fl.oif = sk->sk_bound_dev_if;
798 fl.fl_ip_dport = inet->dport;
799 fl.fl_ip_sport = inet->sport;
801 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
802 sk->sk_err_soft = -err;
806 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
807 sk->sk_err_soft = -err;
814 if (tp->pmtu_cookie > dst_pmtu(dst)) {
815 tcp_sync_mss(sk, dst_pmtu(dst));
816 tcp_simple_retransmit(sk);
817 } /* else let the usual retransmit timer handle it */
822 icmpv6_err_convert(type, code, &err);
824 /* Might be for an open_request */
825 switch (sk->sk_state) {
826 struct open_request *req, **prev;
828 if (sock_owned_by_user(sk))
831 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
832 &hdr->saddr, tcp_v6_iif(skb));
836 /* ICMPs are not backlogged, hence we cannot get
837 * an established socket here.
839 BUG_TRAP(req->sk == NULL);
841 if (seq != req->snt_isn) {
842 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
846 tcp_synq_drop(sk, req, prev);
850 case TCP_SYN_RECV: /* Cannot happen.
851 It can, it SYNs are crossed. --ANK */
852 if (!sock_owned_by_user(sk)) {
853 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
855 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
859 sk->sk_err_soft = err;
863 if (!sock_owned_by_user(sk) && np->recverr) {
865 sk->sk_error_report(sk);
867 sk->sk_err_soft = err;
875 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
876 struct dst_entry *dst)
878 struct ipv6_pinfo *np = inet6_sk(sk);
879 struct sk_buff * skb;
880 struct ipv6_txoptions *opt = NULL;
881 struct in6_addr * final_p = NULL, final;
885 memset(&fl, 0, sizeof(fl));
886 fl.proto = IPPROTO_TCP;
887 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
888 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
889 fl.fl6_flowlabel = 0;
890 fl.oif = req->af.v6_req.iif;
891 fl.fl_ip_dport = req->rmt_port;
892 fl.fl_ip_sport = inet_sk(sk)->sport;
897 np->rxopt.bits.srcrt == 2 &&
898 req->af.v6_req.pktopts) {
899 struct sk_buff *pktopts = req->af.v6_req.pktopts;
900 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
902 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
905 if (opt && opt->srcrt) {
906 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
907 ipv6_addr_copy(&final, &fl.fl6_dst);
908 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
912 err = ip6_dst_lookup(sk, &dst, &fl);
916 ipv6_addr_copy(&fl.fl6_dst, final_p);
917 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
921 skb = tcp_make_synack(sk, dst, req);
923 struct tcphdr *th = skb->h.th;
925 th->check = tcp_v6_check(th, skb->len,
926 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
927 csum_partial((char *)th, skb->len, skb->csum));
929 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
930 err = ip6_xmit(sk, skb, &fl, opt, 0);
931 if (err == NET_XMIT_CN)
937 if (opt && opt != np->opt)
938 sock_kfree_s(sk, opt, opt->tot_len);
942 static void tcp_v6_or_free(struct open_request *req)
944 if (req->af.v6_req.pktopts)
945 kfree_skb(req->af.v6_req.pktopts);
948 static struct or_calltable or_ipv6 = {
950 .rtx_syn_ack = tcp_v6_send_synack,
951 .send_ack = tcp_v6_or_send_ack,
952 .destructor = tcp_v6_or_free,
953 .send_reset = tcp_v6_send_reset
956 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
958 struct ipv6_pinfo *np = inet6_sk(sk);
959 struct inet6_skb_parm *opt = IP6CB(skb);
962 if ((opt->hop && np->rxopt.bits.hopopts) ||
963 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
964 np->rxopt.bits.rxflow) ||
965 (opt->srcrt && np->rxopt.bits.srcrt) ||
966 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
973 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
976 struct ipv6_pinfo *np = inet6_sk(sk);
978 if (skb->ip_summed == CHECKSUM_HW) {
979 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
980 skb->csum = offsetof(struct tcphdr, check);
982 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
983 csum_partial((char *)th, th->doff<<2,
989 static void tcp_v6_send_reset(struct sk_buff *skb)
991 struct tcphdr *th = skb->h.th, *t1;
992 struct sk_buff *buff;
998 if (!ipv6_unicast_destination(skb))
1002 * We need to grab some memory, and put together an RST,
1003 * and then put it into the queue to be sent.
1006 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1011 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1013 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1015 /* Swap the send and the receive. */
1016 memset(t1, 0, sizeof(*t1));
1017 t1->dest = th->source;
1018 t1->source = th->dest;
1019 t1->doff = sizeof(*t1)/4;
1023 t1->seq = th->ack_seq;
1026 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1027 + skb->len - (th->doff<<2));
1030 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1032 memset(&fl, 0, sizeof(fl));
1033 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1034 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1036 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1037 sizeof(*t1), IPPROTO_TCP,
1040 fl.proto = IPPROTO_TCP;
1041 fl.oif = tcp_v6_iif(skb);
1042 fl.fl_ip_dport = t1->dest;
1043 fl.fl_ip_sport = t1->source;
1045 /* sk = NULL, but it is safe for now. RST socket required. */
1046 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1048 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1049 dst_release(buff->dst);
1053 ip6_xmit(NULL, buff, &fl, NULL, 0);
1054 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1055 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1062 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1064 struct tcphdr *th = skb->h.th, *t1;
1065 struct sk_buff *buff;
1067 int tot_len = sizeof(struct tcphdr);
1072 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1077 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1079 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1081 /* Swap the send and the receive. */
1082 memset(t1, 0, sizeof(*t1));
1083 t1->dest = th->source;
1084 t1->source = th->dest;
1085 t1->doff = tot_len/4;
1086 t1->seq = htonl(seq);
1087 t1->ack_seq = htonl(ack);
1089 t1->window = htons(win);
1092 u32 *ptr = (u32*)(t1 + 1);
1093 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1094 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1095 *ptr++ = htonl(tcp_time_stamp);
1099 buff->csum = csum_partial((char *)t1, tot_len, 0);
1101 memset(&fl, 0, sizeof(fl));
1102 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1103 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1105 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1106 tot_len, IPPROTO_TCP,
1109 fl.proto = IPPROTO_TCP;
1110 fl.oif = tcp_v6_iif(skb);
1111 fl.fl_ip_dport = t1->dest;
1112 fl.fl_ip_sport = t1->source;
1114 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1115 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1116 dst_release(buff->dst);
1119 ip6_xmit(NULL, buff, &fl, NULL, 0);
1120 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1127 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1129 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1131 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1132 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1137 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1139 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1143 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1145 struct open_request *req, **prev;
1146 struct tcphdr *th = skb->h.th;
1147 struct tcp_opt *tp = tcp_sk(sk);
1150 /* Find possible connection requests. */
1151 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1152 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1154 return tcp_check_req(sk, skb, req, prev);
1156 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1158 &skb->nh.ipv6h->daddr,
1163 if (nsk->sk_state != TCP_TIME_WAIT) {
1167 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1171 #if 0 /*def CONFIG_SYN_COOKIES*/
1172 if (!th->rst && !th->syn && th->ack)
1173 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1178 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1180 struct tcp_opt *tp = tcp_sk(sk);
1181 struct tcp_listen_opt *lopt = tp->listen_opt;
1182 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1185 req->expires = jiffies + TCP_TIMEOUT_INIT;
1187 req->dl_next = lopt->syn_table[h];
1189 write_lock(&tp->syn_wait_lock);
1190 lopt->syn_table[h] = req;
1191 write_unlock(&tp->syn_wait_lock);
1193 #ifdef CONFIG_ACCEPT_QUEUES
1194 tcp_synq_added(sk, req);
1201 /* FIXME: this is substantially similar to the ipv4 code.
1202 * Can some kind of merge be done? -- erics
1204 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1206 struct ipv6_pinfo *np = inet6_sk(sk);
1207 struct tcp_opt tmptp, *tp = tcp_sk(sk);
1208 struct open_request *req = NULL;
1209 __u32 isn = TCP_SKB_CB(skb)->when;
1210 #ifdef CONFIG_ACCEPT_QUEUES
1214 if (skb->protocol == htons(ETH_P_IP))
1215 return tcp_v4_conn_request(sk, skb);
1217 if (!ipv6_unicast_destination(skb))
1222 * There are no SYN attacks on IPv6, yet...
1224 if (tcp_synq_is_full(sk) && !isn) {
1225 if (net_ratelimit())
1226 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1230 #ifdef CONFIG_ACCEPT_QUEUES
1231 class = (skb->nfmark <= 0) ? 0 :
1232 ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
1234 * Accept only if the class has shares set or if the default class
1235 * i.e. class 0 has shares
1237 if (!(tcp_sk(sk)->acceptq[class].aq_ratio)) {
1238 if (tcp_sk(sk)->acceptq[0].aq_ratio)
1244 if (sk_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
1246 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1251 req = tcp_openreq_alloc();
1255 tcp_clear_options(&tmptp);
1256 tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1257 tmptp.user_mss = tp->user_mss;
1259 tcp_parse_options(skb, &tmptp, 0);
1261 tmptp.tstamp_ok = tmptp.saw_tstamp;
1262 tcp_openreq_init(req, &tmptp, skb);
1263 #ifdef CONFIG_ACCEPT_QUEUES
1264 req->acceptq_class = class;
1265 req->acceptq_time_stamp = jiffies;
1267 req->class = &or_ipv6;
1268 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1269 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1270 TCP_ECN_create_request(req, skb->h.th);
1271 req->af.v6_req.pktopts = NULL;
1272 if (ipv6_opt_accepted(sk, skb) ||
1273 np->rxopt.bits.rxinfo ||
1274 np->rxopt.bits.rxhlim) {
1275 atomic_inc(&skb->users);
1276 req->af.v6_req.pktopts = skb;
1278 req->af.v6_req.iif = sk->sk_bound_dev_if;
1280 /* So that link locals have meaning */
1281 if (!sk->sk_bound_dev_if &&
1282 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1283 req->af.v6_req.iif = tcp_v6_iif(skb);
1286 isn = tcp_v6_init_sequence(sk,skb);
1290 if (tcp_v6_send_synack(sk, req, NULL))
1293 tcp_v6_synq_add(sk, req);
1299 tcp_openreq_free(req);
1301 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1302 return 0; /* don't send reset */
1305 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1306 struct open_request *req,
1307 struct dst_entry *dst)
1309 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1310 struct tcp6_sock *newtcp6sk;
1311 struct inet_opt *newinet;
1312 struct tcp_opt *newtp;
1314 struct ipv6_txoptions *opt;
1316 if (skb->protocol == htons(ETH_P_IP)) {
1321 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1326 newtcp6sk = (struct tcp6_sock *)newsk;
1327 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1329 newinet = inet_sk(newsk);
1330 newnp = inet6_sk(newsk);
1331 newtp = tcp_sk(newsk);
1333 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1335 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1338 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1341 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1343 newtp->af_specific = &ipv6_mapped;
1344 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1345 newnp->pktoptions = NULL;
1347 newnp->mcast_oif = tcp_v6_iif(skb);
1348 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1350 /* Charge newly allocated IPv6 socket. Though it is mapped,
1353 #ifdef INET_REFCNT_DEBUG
1354 atomic_inc(&inet6_sock_nr);
1357 /* It is tricky place. Until this moment IPv4 tcp
1358 worked with IPv6 af_tcp.af_specific.
1361 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1368 #ifdef CONFIG_ACCEPT_QUEUES
1369 if (sk_acceptq_is_full(sk, req->acceptq_class))
1371 if (sk_acceptq_is_full(sk))
1375 if (np->rxopt.bits.srcrt == 2 &&
1376 opt == NULL && req->af.v6_req.pktopts) {
1377 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1379 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1383 struct in6_addr *final_p = NULL, final;
1386 memset(&fl, 0, sizeof(fl));
1387 fl.proto = IPPROTO_TCP;
1388 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1389 if (opt && opt->srcrt) {
1390 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1391 ipv6_addr_copy(&final, &fl.fl6_dst);
1392 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1395 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1396 fl.oif = sk->sk_bound_dev_if;
1397 fl.fl_ip_dport = req->rmt_port;
1398 fl.fl_ip_sport = inet_sk(sk)->sport;
1400 if (ip6_dst_lookup(sk, &dst, &fl))
1404 ipv6_addr_copy(&fl.fl6_dst, final_p);
1406 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1410 newsk = tcp_create_openreq_child(sk, req, skb);
1414 /* Charge newly allocated IPv6 socket */
1415 #ifdef INET_REFCNT_DEBUG
1416 atomic_inc(&inet6_sock_nr);
1419 ip6_dst_store(newsk, dst, NULL);
1420 newsk->sk_route_caps = dst->dev->features &
1421 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1423 newtcp6sk = (struct tcp6_sock *)newsk;
1424 newtcp6sk->pinet6 = &newtcp6sk->inet6;
1426 newtp = tcp_sk(newsk);
1427 newinet = inet_sk(newsk);
1428 newnp = inet6_sk(newsk);
1430 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1432 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1433 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1434 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1435 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1437 /* Now IPv6 options...
1439 First: no IPv4 options.
1441 newinet->opt = NULL;
1444 newnp->rxopt.all = np->rxopt.all;
1446 /* Clone pktoptions received with SYN */
1447 newnp->pktoptions = NULL;
1448 if (req->af.v6_req.pktopts) {
1449 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1451 kfree_skb(req->af.v6_req.pktopts);
1452 req->af.v6_req.pktopts = NULL;
1453 if (newnp->pktoptions)
1454 skb_set_owner_r(newnp->pktoptions, newsk);
1457 newnp->mcast_oif = tcp_v6_iif(skb);
1458 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1460 /* Clone native IPv6 options from listening socket (if any)
1462 Yes, keeping reference count would be much more clever,
1463 but we make one more one thing there: reattach optmem
1467 newnp->opt = ipv6_dup_options(newsk, opt);
1469 sock_kfree_s(sk, opt, opt->tot_len);
1472 newtp->ext_header_len = 0;
1474 newtp->ext_header_len = newnp->opt->opt_nflen +
1475 newnp->opt->opt_flen;
1476 newtp->ext2_header_len = dst->header_len;
1478 tcp_sync_mss(newsk, dst_pmtu(dst));
1479 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1480 tcp_initialize_rcv_mss(newsk);
1482 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1484 __tcp_v6_hash(newsk);
1485 tcp_inherit_port(sk, newsk);
1490 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1492 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1493 if (opt && opt != np->opt)
1494 sock_kfree_s(sk, opt, opt->tot_len);
1499 static int tcp_v6_checksum_init(struct sk_buff *skb)
1501 if (skb->ip_summed == CHECKSUM_HW) {
1502 skb->ip_summed = CHECKSUM_UNNECESSARY;
1503 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1504 &skb->nh.ipv6h->daddr,skb->csum))
1506 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1508 if (skb->len <= 76) {
1509 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1510 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1512 skb->ip_summed = CHECKSUM_UNNECESSARY;
1514 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1515 &skb->nh.ipv6h->daddr,0);
1520 /* The socket must have it's spinlock held when we get
1523 * We have a potential double-lock case here, so even when
1524 * doing backlog processing we use the BH locking scheme.
1525 * This is because we cannot sleep with the original spinlock
1528 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1530 struct ipv6_pinfo *np = inet6_sk(sk);
1532 struct sk_buff *opt_skb = NULL;
1534 /* Imagine: socket is IPv6. IPv4 packet arrives,
1535 goes to IPv4 receive handler and backlogged.
1536 From backlog it always goes here. Kerboom...
1537 Fortunately, tcp_rcv_established and rcv_established
1538 handle them correctly, but it is not case with
1539 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1542 if (skb->protocol == htons(ETH_P_IP))
1543 return tcp_v4_do_rcv(sk, skb);
1545 if (sk_filter(sk, skb, 0))
1549 * socket locking is here for SMP purposes as backlog rcv
1550 * is currently called with bh processing disabled.
1553 /* Do Stevens' IPV6_PKTOPTIONS.
1555 Yes, guys, it is the only place in our code, where we
1556 may make it not affecting IPv4.
1557 The rest of code is protocol independent,
1558 and I do not like idea to uglify IPv4.
1560 Actually, all the idea behind IPV6_PKTOPTIONS
1561 looks not very well thought. For now we latch
1562 options, received in the last packet, enqueued
1563 by tcp. Feel free to propose better solution.
1567 opt_skb = skb_clone(skb, GFP_ATOMIC);
1569 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1570 TCP_CHECK_TIMER(sk);
1571 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1573 TCP_CHECK_TIMER(sk);
1575 goto ipv6_pktoptions;
1579 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1582 if (sk->sk_state == TCP_LISTEN) {
1583 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1588 * Queue it on the new socket if the new socket is active,
1589 * otherwise we just shortcircuit this and continue with
1593 if (tcp_child_process(sk, nsk, skb))
1596 __kfree_skb(opt_skb);
1601 TCP_CHECK_TIMER(sk);
1602 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1604 TCP_CHECK_TIMER(sk);
1606 goto ipv6_pktoptions;
1610 tcp_v6_send_reset(skb);
1613 __kfree_skb(opt_skb);
1617 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1622 /* Do you ask, what is it?
1624 1. skb was enqueued by tcp.
1625 2. skb is added to tail of read queue, rather than out of order.
1626 3. socket is not in passive state.
1627 4. Finally, it really contains options, which user wants to receive.
1630 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1631 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1632 if (np->rxopt.bits.rxinfo)
1633 np->mcast_oif = tcp_v6_iif(opt_skb);
1634 if (np->rxopt.bits.rxhlim)
1635 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1636 if (ipv6_opt_accepted(sk, opt_skb)) {
1637 skb_set_owner_r(opt_skb, sk);
1638 opt_skb = xchg(&np->pktoptions, opt_skb);
1640 __kfree_skb(opt_skb);
1641 opt_skb = xchg(&np->pktoptions, NULL);
1650 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1652 struct sk_buff *skb = *pskb;
1657 if (skb->pkt_type != PACKET_HOST)
1661 * Count it even if it's bad.
1663 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1665 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1670 if (th->doff < sizeof(struct tcphdr)/4)
1672 if (!pskb_may_pull(skb, th->doff*4))
1675 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1676 tcp_v6_checksum_init(skb) < 0))
1680 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1681 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1682 skb->len - th->doff*4);
1683 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1684 TCP_SKB_CB(skb)->when = 0;
1685 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1686 TCP_SKB_CB(skb)->sacked = 0;
1688 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1689 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1695 if (sk->sk_state == TCP_TIME_WAIT)
1698 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1699 goto discard_and_relse;
1701 if (sk_filter(sk, skb, 0))
1702 goto discard_and_relse;
1708 if (!sock_owned_by_user(sk)) {
1709 if (!tcp_prequeue(sk, skb))
1710 ret = tcp_v6_do_rcv(sk, skb);
1712 sk_add_backlog(sk, skb);
1716 return ret ? -1 : 0;
1719 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1722 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1724 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1726 tcp_v6_send_reset(skb);
1743 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1744 tcp_tw_put((struct tcp_tw_bucket *) sk);
1748 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1749 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1750 tcp_tw_put((struct tcp_tw_bucket *) sk);
1754 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1755 skb, th, skb->len)) {
1760 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1762 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1763 tcp_tw_put((struct tcp_tw_bucket *)sk);
1767 /* Fall through to ACK */
1770 tcp_v6_timewait_ack(sk, skb);
1774 case TCP_TW_SUCCESS:;
1779 static int tcp_v6_rebuild_header(struct sock *sk)
1782 struct dst_entry *dst;
1783 struct ipv6_pinfo *np = inet6_sk(sk);
1785 dst = __sk_dst_check(sk, np->dst_cookie);
1788 struct inet_opt *inet = inet_sk(sk);
1789 struct in6_addr *final_p = NULL, final;
1792 memset(&fl, 0, sizeof(fl));
1793 fl.proto = IPPROTO_TCP;
1794 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1795 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1796 fl.fl6_flowlabel = np->flow_label;
1797 fl.oif = sk->sk_bound_dev_if;
1798 fl.fl_ip_dport = inet->dport;
1799 fl.fl_ip_sport = inet->sport;
1801 if (np->opt && np->opt->srcrt) {
1802 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1803 ipv6_addr_copy(&final, &fl.fl6_dst);
1804 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1808 err = ip6_dst_lookup(sk, &dst, &fl);
1810 sk->sk_route_caps = 0;
1814 ipv6_addr_copy(&fl.fl6_dst, final_p);
1816 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1817 sk->sk_err_soft = -err;
1822 ip6_dst_store(sk, dst, NULL);
1823 sk->sk_route_caps = dst->dev->features &
1824 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1825 tcp_sk(sk)->ext2_header_len = dst->header_len;
1831 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1833 struct sock *sk = skb->sk;
1834 struct inet_opt *inet = inet_sk(sk);
1835 struct ipv6_pinfo *np = inet6_sk(sk);
1837 struct dst_entry *dst;
1839 memset(&fl, 0, sizeof(fl));
1840 fl.proto = IPPROTO_TCP;
1841 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1842 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1843 fl.fl6_flowlabel = np->flow_label;
1844 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1845 fl.oif = sk->sk_bound_dev_if;
1846 fl.fl_ip_sport = inet->sport;
1847 fl.fl_ip_dport = inet->dport;
1849 if (np->opt && np->opt->srcrt) {
1850 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1851 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1854 dst = __sk_dst_check(sk, np->dst_cookie);
1857 int err = ip6_dst_lookup(sk, &dst, &fl);
1860 sk->sk_err_soft = -err;
1864 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1865 sk->sk_route_caps = 0;
1870 ip6_dst_store(sk, dst, NULL);
1871 sk->sk_route_caps = dst->dev->features &
1872 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1873 tcp_sk(sk)->ext2_header_len = dst->header_len;
1876 skb->dst = dst_clone(dst);
1878 /* Restore final destination back after routing done */
1879 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1881 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1884 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1886 struct ipv6_pinfo *np = inet6_sk(sk);
1887 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1889 sin6->sin6_family = AF_INET6;
1890 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1891 sin6->sin6_port = inet_sk(sk)->dport;
1892 /* We do not store received flowlabel for TCP */
1893 sin6->sin6_flowinfo = 0;
1894 sin6->sin6_scope_id = 0;
1895 if (sk->sk_bound_dev_if &&
1896 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1897 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1900 static int tcp_v6_remember_stamp(struct sock *sk)
1902 /* Alas, not yet... */
1906 static struct tcp_func ipv6_specific = {
1907 .queue_xmit = tcp_v6_xmit,
1908 .send_check = tcp_v6_send_check,
1909 .rebuild_header = tcp_v6_rebuild_header,
1910 .conn_request = tcp_v6_conn_request,
1911 .syn_recv_sock = tcp_v6_syn_recv_sock,
1912 .remember_stamp = tcp_v6_remember_stamp,
1913 .net_header_len = sizeof(struct ipv6hdr),
1915 .setsockopt = ipv6_setsockopt,
1916 .getsockopt = ipv6_getsockopt,
1917 .addr2sockaddr = v6_addr2sockaddr,
1918 .sockaddr_len = sizeof(struct sockaddr_in6)
1922 * TCP over IPv4 via INET6 API
1925 static struct tcp_func ipv6_mapped = {
1926 .queue_xmit = ip_queue_xmit,
1927 .send_check = tcp_v4_send_check,
1928 .rebuild_header = tcp_v4_rebuild_header,
1929 .conn_request = tcp_v6_conn_request,
1930 .syn_recv_sock = tcp_v6_syn_recv_sock,
1931 .remember_stamp = tcp_v4_remember_stamp,
1932 .net_header_len = sizeof(struct iphdr),
1934 .setsockopt = ipv6_setsockopt,
1935 .getsockopt = ipv6_getsockopt,
1936 .addr2sockaddr = v6_addr2sockaddr,
1937 .sockaddr_len = sizeof(struct sockaddr_in6)
1942 /* NOTE: A lot of things set to zero explicitly by call to
1943 * sk_alloc() so need not be done here.
1945 static int tcp_v6_init_sock(struct sock *sk)
1947 struct tcp_opt *tp = tcp_sk(sk);
1949 skb_queue_head_init(&tp->out_of_order_queue);
1950 tcp_init_xmit_timers(sk);
1951 tcp_prequeue_init(tp);
1953 tp->rto = TCP_TIMEOUT_INIT;
1954 tp->mdev = TCP_TIMEOUT_INIT;
1956 /* So many TCP implementations out there (incorrectly) count the
1957 * initial SYN frame in their delayed-ACK and congestion control
1958 * algorithms that we must have the following bandaid to talk
1959 * efficiently to them. -DaveM
1963 /* See draft-stevens-tcpca-spec-01 for discussion of the
1964 * initialization of these values.
1966 tp->snd_ssthresh = 0x7fffffff;
1967 tp->snd_cwnd_clamp = ~0;
1968 tp->mss_cache_std = tp->mss_cache = 536;
1970 tp->reordering = sysctl_tcp_reordering;
1972 sk->sk_state = TCP_CLOSE;
1974 tp->af_specific = &ipv6_specific;
1976 sk->sk_write_space = sk_stream_write_space;
1977 sk->sk_use_write_queue = 1;
1979 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1980 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1982 atomic_inc(&tcp_sockets_allocated);
1987 static int tcp_v6_destroy_sock(struct sock *sk)
1989 extern int tcp_v4_destroy_sock(struct sock *sk);
1991 tcp_v4_destroy_sock(sk);
1992 return inet6_destroy_sock(sk);
1995 /* Proc filesystem TCPv6 sock list dumping. */
1996 static void get_openreq6(struct seq_file *seq,
1997 struct sock *sk, struct open_request *req, int i, int uid)
1999 struct in6_addr *dest, *src;
2000 int ttd = req->expires - jiffies;
2005 src = &req->af.v6_req.loc_addr;
2006 dest = &req->af.v6_req.rmt_addr;
2008 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2009 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2011 src->s6_addr32[0], src->s6_addr32[1],
2012 src->s6_addr32[2], src->s6_addr32[3],
2013 ntohs(inet_sk(sk)->sport),
2014 dest->s6_addr32[0], dest->s6_addr32[1],
2015 dest->s6_addr32[2], dest->s6_addr32[3],
2016 ntohs(req->rmt_port),
2018 0,0, /* could print option size, but that is af dependent. */
2019 1, /* timers active (only the expire timer) */
2020 jiffies_to_clock_t(ttd),
2023 0, /* non standard timer */
2024 0, /* open_requests have no inode */
2028 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2030 struct in6_addr *dest, *src;
2033 unsigned long timer_expires;
2034 struct inet_opt *inet = inet_sk(sp);
2035 struct tcp_opt *tp = tcp_sk(sp);
2036 struct ipv6_pinfo *np = inet6_sk(sp);
2039 src = &np->rcv_saddr;
2040 destp = ntohs(inet->dport);
2041 srcp = ntohs(inet->sport);
2042 if (tp->pending == TCP_TIME_RETRANS) {
2044 timer_expires = tp->timeout;
2045 } else if (tp->pending == TCP_TIME_PROBE0) {
2047 timer_expires = tp->timeout;
2048 } else if (timer_pending(&sp->sk_timer)) {
2050 timer_expires = sp->sk_timer.expires;
2053 timer_expires = jiffies;
2057 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2058 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2060 src->s6_addr32[0], src->s6_addr32[1],
2061 src->s6_addr32[2], src->s6_addr32[3], srcp,
2062 dest->s6_addr32[0], dest->s6_addr32[1],
2063 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2065 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2067 jiffies_to_clock_t(timer_expires - jiffies),
2072 atomic_read(&sp->sk_refcnt), sp,
2073 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2074 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2078 static void get_timewait6_sock(struct seq_file *seq,
2079 struct tcp_tw_bucket *tw, int i)
2081 struct in6_addr *dest, *src;
2083 int ttd = tw->tw_ttd - jiffies;
2088 dest = &tw->tw_v6_daddr;
2089 src = &tw->tw_v6_rcv_saddr;
2090 destp = ntohs(tw->tw_dport);
2091 srcp = ntohs(tw->tw_sport);
2094 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2095 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2097 src->s6_addr32[0], src->s6_addr32[1],
2098 src->s6_addr32[2], src->s6_addr32[3], srcp,
2099 dest->s6_addr32[0], dest->s6_addr32[1],
2100 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2101 tw->tw_substate, 0, 0,
2102 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2103 atomic_read(&tw->tw_refcnt), tw);
2106 #ifdef CONFIG_PROC_FS
2107 static int tcp6_seq_show(struct seq_file *seq, void *v)
2109 struct tcp_iter_state *st;
2111 if (v == SEQ_START_TOKEN) {
2116 "st tx_queue rx_queue tr tm->when retrnsmt"
2117 " uid timeout inode\n");
2122 switch (st->state) {
2123 case TCP_SEQ_STATE_LISTENING:
2124 case TCP_SEQ_STATE_ESTABLISHED:
2125 get_tcp6_sock(seq, v, st->num);
2127 case TCP_SEQ_STATE_OPENREQ:
2128 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2130 case TCP_SEQ_STATE_TIME_WAIT:
2131 get_timewait6_sock(seq, v, st->num);
2138 static struct file_operations tcp6_seq_fops;
2139 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2140 .owner = THIS_MODULE,
2143 .seq_show = tcp6_seq_show,
2144 .seq_fops = &tcp6_seq_fops,
2147 int __init tcp6_proc_init(void)
2149 return tcp_proc_register(&tcp6_seq_afinfo);
2152 void tcp6_proc_exit(void)
2154 tcp_proc_unregister(&tcp6_seq_afinfo);
2158 struct proto tcpv6_prot = {
2161 .connect = tcp_v6_connect,
2162 .disconnect = tcp_disconnect,
2163 .accept = tcp_accept,
2165 .init = tcp_v6_init_sock,
2166 .destroy = tcp_v6_destroy_sock,
2167 .shutdown = tcp_shutdown,
2168 .setsockopt = tcp_setsockopt,
2169 .getsockopt = tcp_getsockopt,
2170 .sendmsg = tcp_sendmsg,
2171 .recvmsg = tcp_recvmsg,
2172 .backlog_rcv = tcp_v6_do_rcv,
2173 .hash = tcp_v6_hash,
2174 .unhash = tcp_unhash,
2175 .get_port = tcp_v6_get_port,
2176 .enter_memory_pressure = tcp_enter_memory_pressure,
2177 .sockets_allocated = &tcp_sockets_allocated,
2178 .memory_allocated = &tcp_memory_allocated,
2179 .memory_pressure = &tcp_memory_pressure,
2180 .sysctl_mem = sysctl_tcp_mem,
2181 .sysctl_wmem = sysctl_tcp_wmem,
2182 .sysctl_rmem = sysctl_tcp_rmem,
2183 .max_header = MAX_TCP_HEADER,
2184 .slab_obj_size = sizeof(struct tcp6_sock),
2187 static struct inet6_protocol tcpv6_protocol = {
2188 .handler = tcp_v6_rcv,
2189 .err_handler = tcp_v6_err,
2190 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2193 extern struct proto_ops inet6_stream_ops;
2195 static struct inet_protosw tcpv6_protosw = {
2196 .type = SOCK_STREAM,
2197 .protocol = IPPROTO_TCP,
2198 .prot = &tcpv6_prot,
2199 .ops = &inet6_stream_ops,
2202 .flags = INET_PROTOSW_PERMANENT,
2205 void __init tcpv6_init(void)
2207 /* register inet6 protocol */
2208 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2209 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2210 inet6_register_protosw(&tcpv6_protosw);